어제는 href 얻어오는거밖에 못했는데
오늘은 이미지를 로컬에 저장해보고
반복문으로 나머지 맴버들의 사진까지 전부 가져와보자
그리고 시간이 되면
리사이징까지 해보자
오늘의 안주
- 이미지 저장
0914.11시에 시작해서 지금 0915.1시인데
음.. 지금 저 페이지에는 10장의 사진이 올라와있는데
img 순서대로 긁어오면 오른쪽 사람들의 작은 프로필 이미지가 긁혀온다..
이건 지금 어떻게 할 엄두가 안나서 각 페이지의 첫번째, 두번째 사진만 긁어오는걸로 하자
로컬저장까지 되었으니 이제 코드 전체를 정리하고
(30분걸림)
맴버 4명의 제일 위의 3개 게시물에서 각각 2장씩 저장해보자
와 잘되서 마무리할려고 했는데
갑자기 안되서 한참 찾았는데
파일 경로에 str 이 아니라 정수가 들어가있었네
변수로 넣어놨어서 한참찾았네 아 ㅅㅂㅋㅋ
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
import instargram_crawler as Icr
import time
user_id="asdf"
user_passwd="asdf"
driver_path="D:\\DEV\\fansieve-background_maker\\web-fansieve-backgroundmaker\\chromedriver.exe"
instagram_id_name="username"
instagram_pw_name="password"
instagram_login_btn=".sqdOP.L3NKy.y3zKF "
facebook_login_page_css=".sqdOP.L3NKy.y3zKF "
facebook_login_page_css2=".sqdOP.yWX7d.y3zKF "
facebook_id_form_name="email"
facebook_pw_form_name="pass"
facebook_login_btn_name="login"
JENNIE='jennierubyjane'
LISA='lalalalisa_m'
JISOO='sooyaaa__'
ROSIE='roses_are_rosie'
blackpink=[JENNIE, LISA, JISOO, ROSIE]
login_option="facebook" # facebook or instagram
login_url = "https://www.instagram.com/accounts/login/"
alarm_popup_xpath='/html/body/div[1]/div/div/div/div[2]/div/div/div[1]/div/div[2]/div/div/div/div/div[2]/div/div/div[3]/button[2]'
nextbtn_xpath='/html/body/div[1]/div/div/div/div[1]/div/div/div/div[1]/section/main/div[1]/div[1]/article/div/div[1]/div/div[1]/div[2]/div/button/div'
save_path="D:/DEV/fansieve-background_maker/web-fansieve-backgroundmaker/images/"
div_class_name='_ac7v _aang'
#__main__
print(f"login start - option {login_option}")
# 페이스북으로 로그인
Icr.get_start(login_url)
Icr.pagelogin(user_id, user_passwd, login_option, facebook_login_page_css, facebook_login_page_css2
, facebook_id_form_name, facebook_pw_form_name, facebook_login_btn_name)
# 팝업 닫기
Icr.close_popup(alarm_popup_xpath)
for member_id in blackpink : # 4명 아이디 순회
Icr.searchUserId(member_id) # 맴버 페이지로 이동
href_arr=list()
href_arr=Icr.get_href(div_class_name) # href 3개 얻기
for i in range(3) : # 최근 3개 게시물
Icr.searchInstaUrl(href_arr[i]) # 사진 링크로 이동
time.sleep(3)
Icr.get_imageUrl(save_path, member_id, i) # 사진 저장
Icr.driver_close()
|
cs |
instargram_crawler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
|
from selenium import webdriver as wd
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import requests
from urllib import request
import urllib
import time
import re
driver = wd.Chrome(service=Service(ChromeDriverManager().install()))
def get_start(login_url) :
driver.get(login_url)
time.sleep(10)
#login
def pagelogin(user_id, user_passwd, login_option, facebook_login_page_css, facebook_login_page_css2
,facebook_id_form_name, facebook_pw_form_name, facebook_login_btn_name) :
is_login_success=False
if login_option == "facebook":
is_facebook_btn_click = False
try:
print("try click facebook login button 1")
facebook_login_btn = driver.find_element(By.CSS_SELECTOR, facebook_login_page_css)
time.sleep(5)
facebook_login_btn.click()
is_facebook_btn_click = True
is_login_success = True
except:
print("click facebook login button 1 fail")
is_facebook_btn_click = False
is_login_success = False
time.sleep(10)
if not is_facebook_btn_click:
try:
print("try click facebook login button 2")
facebook_login_btn = driver.find_element(By.CSS_SELECTOR, facebook_login_page_css2)
time.sleep(5)
facebook_login_btn.click()
is_facebook_btn_click = True
print("facebook login btn clicked")
is_login_success = True
except:
print("click facebook login button 2 fail")
is_login_success = False
time.sleep(10)
if is_facebook_btn_click:
id_input_form = driver.find_element(By.NAME, facebook_id_form_name)
time.sleep(3)
id_input_form.send_keys(user_id)
time.sleep(3)
pw_input_form = driver.find_element(By.NAME, facebook_pw_form_name)
time.sleep(3)
pw_input_form.send_keys(user_passwd)
time.sleep(3)
login_btn = driver.find_element(By.NAME, facebook_login_btn_name)
time.sleep(5)
login_btn.click()
if is_login_success :
print("login success")
time.sleep(10)
def close_popup(alarm_popup_xpath) :
# 정보 저장 팝업 닫기
#popup = driver.find_element_by_xpath('//*[@id="react-root"]/section/main/div/div/div/div/button')
#popup.send_keys(Keys.ENTER)
#time.sleep(2)
# 알림 설정 팝업 닫기
popup = driver.find_element(By.XPATH, alarm_popup_xpath)
popup.send_keys(Keys.ENTER)
print("popup closed")
time.sleep(2)
def searchUserId(strUserId):
url = "https://www.instagram.com/{0}/".format(strUserId)
driver.get(url)
print("access to account")
time.sleep(3)
def get_href(classname):
href_list=list()
pageString=driver.page_source
bsObj=BeautifulSoup(pageString, 'lxml')
for link1 in bsObj.find_all(name='div', attrs={'class':classname}) :
SelData=link1.select('a')
for i in range(len(SelData)) :
title=SelData[i]
real=title.attrs['href']
href_list.append(real)
if 3==len(href_list) :
break
print("access to first image")
time.sleep(2)
return href_list
def searchInstaUrl(strUrl):
url = "https://www.instagram.com/{0}".format(strUrl)
driver.get(url)
print("success : search instargram url")
time.sleep(3)
def get_imageUrl(save_path, membername, postnum):
imageUrl = list()
html = driver.page_source
soup = BeautifulSoup(html, 'lxml')
'''
nextbtn = driver.find_element(By.XPATH, nextbtn_xpath)
nextcnt=0
while nextbtn and nextcnt<2 :
nextbtn.click()
nextcnt+=1
'''
try :
for i in range(0,2) :
imgs = soup.select('img')[i].attrs['src']
imageUrl.append(str(imgs))
except :
print("error" + str(driver.current_url))
print(imageUrl)
try :
print("try : save image")
imgnum=0
for url in imageUrl :
urllib.request.urlretrieve(url, save_path+membername+"_"+str(postnum)+"_"+str(imgnum)+".jpg")
imgnum+=1
time.sleep(1)
except :
print("except : no image or error")
def driver_close() :
driver.close()
|
cs |
실행 영상
참고
https://kjk92.tistory.com/66?category=689641
'Archive > [Infra+Web] fanpage:BLACKPINKINYOURAREA' 카테고리의 다른 글
[Web]Fanseive_backgroundmaker-8-AWS 백앤드 구성 (0) | 2022.09.16 |
---|---|
[Web]Fanseive_backgroundmaker-7-AWS 구상 (0) | 2022.09.16 |
[Web]Fanseive_backgroundmaker-5-인스타그램계정_이미지크롤링1 (0) | 2022.09.14 |
[Web]Fanseive_backgroundmaker-4-팝업해제+계정검색 (0) | 2022.09.10 |
[Web]Fanseive_backgroundmaker-3-python 인스타 페북으로 로그인 (0) | 2022.09.10 |