python+selenium爬取好友qq空间相册图片
能用抓包工具分析出来爬取的就别用驱动这种方法,这种方法模拟人工操作浏览器,速度慢,下面是代码:(前提是qq相册没加密并且你能访问qq空间,可能有一些小差异)第一次写
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
from time import sleep
import urllib
index = 0
def form():
driver = webdriver.Chrome(executable_path = 'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe')
# driver = webdriver.PhantomJS(executable_path="C:\\Users\\Administrator\\Downloads\\phantomjs-2.5.0-beta-windows\\bin\\phantomjs.exe")
driver.implicitly_wait(10)
driver.maximize_window()
action = ActionChains(driver)
return driver,action
def join_login():
driver,action = form()
driver.get('https://qzone.qq.com/')
driver.switch_to.frame("login_frame")
# driver.find_element_by_id('img_out_1430062586').is_displayed()
driver.find_element_by_id('switcher_plogin').click()
driver.find_element_by_id('u').clear()
driver.find_element_by_id('u').send_keys('987654321')//自己的qq号
driver.find_element_by_id('p').clear()
driver.find_element_by_id('p').send_keys('123456')/自己的qq号密码
driver.find_element_by_id('login_button').click()
return driver,action
def search():
try:
driver,action = join_login()
sleep(3)
js = "document.getElementById('friends-drop-down').style.display='block'" #编写JS语句
driver.execute_script(js)
driver.find_element_by_id('friend_search_input').click()
driver.find_element_by_id('friend_search_input').send_keys('网友名字')
driver.find_element_by_xpath("//i[@class='ui-icon sp-top-search']").click()
driver.find_element_by_id('friend_search_input').click()
driver.find_element_by_xpath("//i[@class='ui-icon sp-top-search']").click()
js = "document.getElementsByClassName('friends-results-topbar')[0].style.display='block'" #编写JS语句
driver.execute_script(js)
driver.find_element_by_xpath("//ul[@id='search_friend_result']/li/a").click()
driver.switch_to.window(driver.window_handles[1])
driver.find_elements_by_class_name("menu_item_4")[1].click()
driver.switch_to.frame("app_canvas_frame")
a = driver.find_elements_by_xpath('//a[@class="album-cover js-album-cover"]')
for id in range(0,len(a)):
li = driver.find_elements_by_xpath('//a[@class="album-cover js-album-cover"]')[id]
joinPhoto(driver,li)
except:
raise
finally:
driver.quit()
def joinPhoto(driver,li):
li.click()
driver.switch_to.default_content()
e = isExit(driver,'qz_dialog_layer')
if e == 1:
print(0)
driver.find_element_by_class_name('qz_dialog_btn_close').click()
driver.switch_to.frame("app_canvas_frame")
return '0'
else:
page(driver)
driver.switch_to.frame("app_canvas_frame")
getImg(driver)
e = isExit(driver,'j-pl-photolist-pager')
if e == 1:#有分页
a = driver.find_elements_by_xpath("//div[@class='mod-photo-pagenav j-pl-photolist-pager']/div/a")
for id1 in range(len(a) - 1):
driver.find_elements_by_xpath("//div[@class='mod-photo-pagenav j-pl-photolist-pager']/div/a")[id1].click()
page(driver)
getImg(driver)
else:#无分页
pass
driver.switch_to.default_content()
js="var q=document.documentElement.scrollTop=0"
driver.execute_script(js)
driver.switch_to.frame("app_canvas_frame")
driver.find_element_by_xpath('//span[@class="item-wrap bor-tx"]').click()
def getImg(driver):
data = []
img = driver.find_elements_by_class_name('j-pl-photoitem-img')
for img1 in img:
data.append(img1.get_attribute('src'))
global index
path = "C:\\Users\\Administrator\\Desktop\\img\\zq\\"
for url in data:
try:
f = open(path + str(index) +'.jpg','wb') #注意第二个参数要写成wb,写成w会报错
req = urllib.request.urlopen(url)
buf = req.read()
f.write(buf)
index += 1
except:
continue
def page(driver):
js="var q=document.documentElement.scrollTop=100000"
driver.execute_script(js)
sleep(3)
js="var q=document.documentElement.scrollTop=100000"
driver.execute_script(js)
sleep(3)
def isExit(driver,name):
try:
driver.find_element_by_class_name(name).is_displayed()
return 1
except:
return 0
if __name__ == '__main__':
search()