php是最好的语言

利用PhantomJS无头浏览器 爬取豆瓣搜索电影信息

from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
import selenium.webdriver.support.ui as ui
from time import sleep
import pymysql
import urllib

class one():


    def form(self,):
        driver = webdriver.PhantomJS(executable_path="/Users/dqsj/Desktop/phan/bin/phantomjs",service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])

        action = ActionChains(driver)
        return driver,action

    def join_login(self,name):
        driver,action = self.form()

        url = 'https://search.douban.com/movie/subject_search?search_text={}&cat=1002'.format(urllib.parse.quote(name))

        driver.get(url)

        return driver,action

    def search(self,name):

        driver,action = self.join_login(name)
        sleep(2)
        # wait = ui.WebDriverWait(driver, 20)
        #
        # wait.until(lambda dr: dr.find_elements_by_class_name('detail')[0].is_displayed())
        contents = driver.find_elements_by_class_name('detail')
        # contents = driver.find_elements_by_class_name('detail')[0].find_elements_by_class_name('title')[0].text

        data = []
        for content in contents:
            temp    =   []
            try:
                temp.append(content.find_elements_by_class_name('title')[0].text)
            except:
                temp.append("")
            try:
                temp.append(content.find_elements_by_class_name('rating_nums')[0].text)
            except:
                temp.append(0)
            try:
                temp.append(content.find_elements_by_class_name('pl')[0].text)
            except:
                temp.append("")
            try:
                temp.append(content.find_elements_by_class_name('abstract_2')[0].text)
            except:
                temp.append("")

            data.append(temp)

        return data

    def isExit(self,driver,name):
        try:
            driver.find_element_by_class_name(name).is_displayed()
            return 1
        except:
            return 0

    def douban(self):
       
        douD    =   self.search("电影名字")  #从豆瓣获取数据
         


if __name__ == '__main__':
    one = one()

    one.douban()


作者:xTao 分类:LNMP 浏览:2366 评论:0