php是最好的语言

python+PhantomJS简单爬取本网站

和上一个差不多 只是有无界面
from selenium.webdriver.common.action_chains import ActionChains
from selenium import webdriver
import selenium.webdriver.support.ui as ui
from time import sleep
def form():
    driver = webdriver.PhantomJS(executable_path="C:\\Users\\Administrator\\Downloads\\phantomjs-2.5.0-beta-windows\\bin\\phantomjs.exe",service_args=['--ignore-ssl-errors=true', '--ssl-protocol=any'])

    action = ActionChains(driver)
    return driver,action

def join_login():
    driver,action = form()
    driver.get('http://phpindex.win/')
    return driver,action
def search():
    try:
        driver,action = join_login()
        wait = ui.WebDriverWait(driver, 20)
        wait.until(lambda dr: dr.find_element_by_id('extra').is_displayed())
        allHref = driver.find_elements_by_xpath('//article[@class="top cate1 auth1"]/header/h2/a')
        data = []
        for i in range(len(allHref)):
            aa = driver.find_elements_by_xpath('//article[@class="top cate1 auth1"]/header/h2/a')[i]
            text = getData(driver,aa)
            driver.back()
            data.append(text)
        print(data)
    except:
        raise
    finally:
        driver.quit()

def getData(driver,aa):
    try:
        aa.click()
        wait = ui.WebDriverWait(driver, 10)
        wait.until(lambda dr: dr.find_element_by_tag_name('pre').is_displayed())
        sleep(2)
        text = driver.find_element_by_tag_name('pre').text
        return text
    except:
        return 0

def isExit(driver,name):
    try:
        driver.find_element_by_class_name(name).is_displayed()
        return 1
    except:
        return 0

if __name__ == '__main__':
   search()


作者:xTao 分类:LNMP 浏览:2484 评论:0