셀레니움
- 브라우저를 컨트롤 할 수 있도록 지원하는 라이브러리
!pip install selenium
!pip install chromedriver_autoinstaller
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
driver = webdriver.Chrome()
driver.get('https://www.google.com')
search = driver.find_element('name','q')
search.send_keys('날씨')
search.send_keys(Keys.RETURN)
# 정리
driver = webdriver.Chrome()
driver.get('https://www.google.com')
search = driver.find_element('name','q')
search.send_keys('미세먼지')
search.send_keys(Keys.RETURN)
❤ 네이버 웹툰
driver = webdriver.Chrome()
driver.get('https://comic.naver.com/webtoon/detail?titleId=783053&no=134&week=tue')
!pip install bs4
from bs4 import BeautifulSoup
# driver.page_source
soup = BeautifulSoup(driver.page_source)
comment_area=soup.findAll('span',{'class':'u_cbox_contents'})
print(comment_area)
print('*********** 베스트 댓글 ************')
for i in range(len(comment_area)):
comment = comment_area[i].text.strip()
print(comment)
print('-' * 30)
Xpath
: 기존의 컴퓨터 파일 시스템에서 사용하는 경로 표현식과 유사한 경로 언어
- 예) /html/body/div[1]/div[5]/div/div/div[5]/div[1]/div[3]/div/div/div[6]/ul/li[1]/div[1]/div/div[2]/span[1]
driver.find_element('xpath','/html/body/div[1]/div[5]/div/div/div[5]/div[1]/div[3]/div/div/div[6]/ul/li[1]/div[1]/div/div[2]/span[1]').click()
soup = BeautifulSoup(driver.page_source)
comment_area=soup.findAll('span',{'class':'u_cbox_contents'})
print(comment_area)
print('*********** 전체 댓글 ************')
for i in range(len(comment_area)):
comment = comment_area[i].text.strip()
print(comment)
print('-' * 30)
❤ 인스타그램
import chromedriver_autoinstaller
from selenium import webdriver
driver = webdriver.Chrome()
url = 'https://www.instagram.com/'
driver.get(url)
id = '아이디'
pw = '비밀번호'
input_id = driver.find_element('xpath','/html/body/div[2]/div/div/div[2]/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[1]/div/label/input')
input_pw = driver.find_element('xpath','/html/body/div[2]/div/div/div[2]/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[2]/div/label/input')
# 아이디 입력
input_id.send_keys(id)
# 비밀번호 입력
input_pw.send_keys(pw)
driver.find_element('xpath', '/html/body/div[2]/div/div/div[2]/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[3]').click()
✔ 해시태그 검색
hashtag = '맛점'
url = f'https://www.instagram.com/explore/tags/{hashtag}/'
driver.get(url)
✔ 스크롤 내리기
import time
for _ in range(1):
driver.execute_script('window.scrollTo(0, document.body.scrollHeight)')
# time.sleep(3)
✔ 원하는 사진 클릭하기
xpath = '/html/body/div[2]/div/div/div[2]/div/div/div[1]/div[1]/div[2]/section/main/article/div/div[2]/div/div[2]/div[2]/a'
driver.find_element('xpath', xpath).click()
✔ 좋아요 및 댓글 달기
like_xpath = '/html/body/div[8]/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[3]/div/div/section[1]/span[1]/div/div'
driver.find_element('xpath', like_xpath).click()
driver.find_element('xpath', like_xpath).click()
reply_xpath = '/html/body/div[2]/div/div/div[2]/div/div/div[1]/div[1]/div[2]/section/main/div/div[1]/div/div[2]/div/div[4]/section/div/form/div/textarea'
driver.find_element('xpath', reply_xpath).click()
driver.find_element('xpath', reply_xpath).send_keys('👍')
✔ 함수로 리팩토링
#로그인
def login(id, pw):
input_id = driver.find_element("xpath", "/html/body/div[2]/div/div/div[2]/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[1]/div/label/input")
input_pw = driver.find_element("xpath", "/html/body/div[2]/div/div/div[2]/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[2]/div/label/input")
input_id.send_keys(pw)
input_pw.send_keys(id)
driver.find_element("xpath", "/html/body/div[2]/div/div/div[2]/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[3]/button").click()
#해시태그 검색
def search(hashtag):
url = f"https://instagram.com/explore/tags/{hashtag}/"
driver.get(url)
#좋아요 및 댓글달기
def like_and_comment(comment):
xpath = '/html/body/div[2]/div/div/div[2]/div/div/div[1]/div[1]/div[2]/section/main/article/div/div[2]/div/div[1]/div[2]/a'
driver.find_element("xpath", xpath).click()
like_xpath = "/html/body/div[2]/div/div/div[2]/div/div/div[1]/div[1]/div[2]/section/main/div/article/div[3]/div/div[1]/section[1]/div[1]/span[1]/div/div"
driver.find_element("xpath", like_xpath).click()
driver.find_element("xpath", like_xpath).click()
# 실행
driver = webdriver.Chrome()
url = 'https://www.instagram.com/'
driver.get(url)
driver.implicitly_wait(3)
id = '아이디'
pw = '비번'
login(id, pw)
time.sleep(4)
hashtag = '사과'
search(hashtag)
time.sleep(4)
like_and_comment('좋아요!!')
❤ 픽사베이
✔ 이미지 수집하기
import chromedriver_autoinstaller
import time
from selenium import webdriver
from urllib.request import Request, urlopen
driver = webdriver.Chrome()
url = 'https://pixabay.com/ko/images/search/강아지/'
driver.get(url)
image_xpath = '/html/body/div[1]/div[1]/div/div/div/div[1]/div[2]/div/img'
image_url = driver.find_element('xpath', image_xpath).get_attribute('src')
print('image_url: ', image_url)
image_byte = Request(image_url, headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'})
f = open('dog.jpg', 'wb')
f.write(urlopen(image_byte).read())
f.close()
'코딩 > AI' 카테고리의 다른 글
데이터 분석, 판다스 (0) | 2024.05.23 |
---|---|
데이터 분석, 넘파이 (0) | 2024.05.23 |
Web Crawling, 3일차 (0) | 2024.05.23 |
Web Crawling, 과제1 (0) | 2024.05.21 |
Web Crawling , 1일차 (0) | 2024.05.21 |