python+selenium爬取好友qq空间相册图片
能用抓包工具分析出来爬取的就别用驱动这种方法,这种方法模拟人工操作浏览器,速度慢,下面是代码:(前提是qq相册没加密并且你能访问qq空间,可能有一些小差异)第一次写
from selenium.webdriver.common.action_chains import ActionChains from selenium import webdriver from time import sleep import urllib index = 0 def form(): driver = webdriver.Chrome(executable_path = 'C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe') # driver = webdriver.PhantomJS(executable_path="C:\\Users\\Administrator\\Downloads\\phantomjs-2.5.0-beta-windows\\bin\\phantomjs.exe") driver.implicitly_wait(10) driver.maximize_window() action = ActionChains(driver) return driver,action def join_login(): driver,action = form() driver.get('https://qzone.qq.com/') driver.switch_to.frame("login_frame") # driver.find_element_by_id('img_out_1430062586').is_displayed() driver.find_element_by_id('switcher_plogin').click() driver.find_element_by_id('u').clear() driver.find_element_by_id('u').send_keys('987654321')//自己的qq号 driver.find_element_by_id('p').clear() driver.find_element_by_id('p').send_keys('123456')/自己的qq号密码 driver.find_element_by_id('login_button').click() return driver,action def search(): try: driver,action = join_login() sleep(3) js = "document.getElementById('friends-drop-down').style.display='block'" #编写JS语句 driver.execute_script(js) driver.find_element_by_id('friend_search_input').click() driver.find_element_by_id('friend_search_input').send_keys('网友名字') driver.find_element_by_xpath("//i[@class='ui-icon sp-top-search']").click() driver.find_element_by_id('friend_search_input').click() driver.find_element_by_xpath("//i[@class='ui-icon sp-top-search']").click() js = "document.getElementsByClassName('friends-results-topbar')[0].style.display='block'" #编写JS语句 driver.execute_script(js) driver.find_element_by_xpath("//ul[@id='search_friend_result']/li/a").click() driver.switch_to.window(driver.window_handles[1]) driver.find_elements_by_class_name("menu_item_4")[1].click() driver.switch_to.frame("app_canvas_frame") a = driver.find_elements_by_xpath('//a[@class="album-cover js-album-cover"]') for id in range(0,len(a)): li = driver.find_elements_by_xpath('//a[@class="album-cover js-album-cover"]')[id] joinPhoto(driver,li) except: raise finally: driver.quit() def joinPhoto(driver,li): li.click() driver.switch_to.default_content() e = isExit(driver,'qz_dialog_layer') if e == 1: print(0) driver.find_element_by_class_name('qz_dialog_btn_close').click() driver.switch_to.frame("app_canvas_frame") return '0' else: page(driver) driver.switch_to.frame("app_canvas_frame") getImg(driver) e = isExit(driver,'j-pl-photolist-pager') if e == 1:#有分页 a = driver.find_elements_by_xpath("//div[@class='mod-photo-pagenav j-pl-photolist-pager']/div/a") for id1 in range(len(a) - 1): driver.find_elements_by_xpath("//div[@class='mod-photo-pagenav j-pl-photolist-pager']/div/a")[id1].click() page(driver) getImg(driver) else:#无分页 pass driver.switch_to.default_content() js="var q=document.documentElement.scrollTop=0" driver.execute_script(js) driver.switch_to.frame("app_canvas_frame") driver.find_element_by_xpath('//span[@class="item-wrap bor-tx"]').click() def getImg(driver): data = [] img = driver.find_elements_by_class_name('j-pl-photoitem-img') for img1 in img: data.append(img1.get_attribute('src')) global index path = "C:\\Users\\Administrator\\Desktop\\img\\zq\\" for url in data: try: f = open(path + str(index) +'.jpg','wb') #注意第二个参数要写成wb,写成w会报错 req = urllib.request.urlopen(url) buf = req.read() f.write(buf) index += 1 except: continue def page(driver): js="var q=document.documentElement.scrollTop=100000" driver.execute_script(js) sleep(3) js="var q=document.documentElement.scrollTop=100000" driver.execute_script(js) sleep(3) def isExit(driver,name): try: driver.find_element_by_class_name(name).is_displayed() return 1 except: return 0 if __name__ == '__main__': search()