php是最好的语言

python分析爬取有道词典

因为有道词典是通过js加签请求接口的,所以借助了charles工具分析,爬取代码如下:
import requests
import pymysql
import time
import hashlib
import datetime
import sys
import string
import random
class Spider():
    def getData(self):
        url = "http://fanyi.youdao.com/translate_o?smartresult=dict&smartresult=rule"
        header = {
            'Accept-Language':'zh-CN,zh;q=0.9',
            'Cookie':'OUTFOX_SEARCH_USER_ID_NCOO=1522750920.8871377; _ntes_nnid=863ef4c9c4f58e81df7cf3d914d6eed9,1512555909922; OUTFOX_SEARCH_USER_ID=2095748874@183.14.28.164; fanyi-ad-id=39535; fanyi-ad-closed=1; JSESSIONID=aaar-nMwL8C627zPvj7cw; ___rl__test__cookies=1515120006518',
            'Host':'fanyi.youdao.com',
            'Referer':'http://fanyi.youdao.com/',
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36',
            }
        data = {'i':'美女','from':'AUTO','to':'AUTO','smartresult':'dict','client':'fanyideskweb','salt':1515118947550,'sign':'1c6c3bc7025c5f3fe11e1575a25a2358','doctype':'json','version':'2.1','keyfrom':'fanyi.web','action':'FY_BY_CLICKBUTTION','typoResult':'false'}
        t= time.time()
        timet = str(round(t * 1000)+random.randint(1,10))
        str1 = 'fanyideskweb'+'美女'+''+timet+'aNPG!!u6sesA>hBAW1@(-'
        m = hashlib.md5()
        m.update(str1.encode("utf-8"))
        md5value = m.hexdigest()
        data['sign'] = md5value #这个是签名
        data['salt'] = timet    #这个也是必须的
        dd = requests.post(url,data=data,headers=header).json()
        print(dd)
if __name__ =='__main__':
    Spider = Spider()
    Spider.getData()


作者:xTao 分类:LNMP 浏览:2543 评论:1