php是最好的语言

python爬取微信公众号

参考 

import requests
import pymysql
import time

#跑列 表
class gongzhonghao():
    def __init__(self):
        self.host = "10.0.2.8"
 self.port = 3306
 self.user = "XXX"
 self.pwd = "XXX"
 self.db = "XXXX"
 self.type = type



 #下面6个参数是必须参数 具体怎么获取参考 https://blog.csdn.net/qq_41686130/article/details/88296981 (不需要获取)
        # cookie token fakeid 是电脑端数据

 self.cookie = "appmsglist_action_3877428835=card; pgv_pvid=4619892386; RK=27KB1GMa+Z; ptcz=a65cebe84a17c8b900fa411405bf3b357f08283771e19c42a13dee8373d04179; iip=0; o_cookie=3451590500; ua_id=XJDO1DvhTJ2mOLPQAAAAAMo4Ju9fzUuhgx74g4PKxoM=; wxuin=18302783316758; openid2ticket_omkRZvzC5jur9SmKJMZxjts7i5Yk=; mm_lang=zh_CN; pgv_info=ssid=s6040833152; openid2ticket_oZOCH5GLtjJJM8Uc-r8EEUC2QMM0=; cert=5WZkNlr0kf1xedNGYqmw36v5kAGeELX9; uin=o3451590500; skey=@e58afHKfI; master_key=xI3mKrM/uPTJPWdX6NjJ9x0eS77oacSfHRoXFjJnrjM=; sig=h01618e165f4d49d4e228041a135d0e4e9d72b5a4cb1162660d336375453ddb35a883bc98808ba977b1; rewardsn=; wxtokenkey=777; openid2ticket_oY94c5x5r371UeYUKa0iWEq0YNig=jZaHETcRHu5MX4N3Cj4LCSEi/6RPq52Lip1tRsEK850=; pac_uid=1_3451590500; uuid=affea5975882b75bfdb42ea4a1606094; rand_info=CAESIEl/gDAmnZ0wMA7poNSOLNvi8Yo7PhEC2MKRTlGLVnRZ; slave_bizuin=3877428835; data_bizuin=3877428835; bizuin=3877428835; data_ticket=S1uYRDKNUROM5HpMwPPpT8MVcBvXhICKqdrQjnG23I9bDVTTWRRVx54rkYsMGRnL; slave_sid=OHBpNHFObko0QktzOTJUWno5blBTN2UyWmNWRGo1WHNLTjI3NEZhbmZhU3JGZmlDWVBqdFVPZlI3N29mZjE4UFJkNWE2ZU82UFpvWTNtNk9kSkJ2Tm41ZXViUDJaWE9ad29wNWdUWlJsempoVVN2OW5UbHhoZ05yTnhYdURKWEZTS0Q3aFhFQ043ZGU5RG9z; slave_user=gh_3392d0843cd5; xid=a2e6379fe1ac71ad4c0800e595c9e075"
 self.token = "1141601607"
 self.fakeid = "MjM5MzA1MTA4Mg=="
 self.gongzhonghao_name="桃桃淘电影"

 # key  appmsg_token pass_ticket是抓包工具抓出来的数据  *这一组数据过期很快会导致拿到的阅读数 点赞数 评论数为0
 self.key = "78be2463e59dfd16fb3cf0e285769bdc097005b7d76a0ee7c68f4cabd1098f24594dadd009fb911ba7473653015086bf51e5ff285e098ffa4503b2924f4959552fc5b48989c503128a04433b47d99d769fc79e4e4906d41d2b0cb9acc7dcfbb6fc77dd4fbaea99d6a51478ce5cd27f51d409e08a8f1732cf92aebceb0dfdb1de"
 self.appmsg_token="1109_TTOS%2Bo6HT1B6nC458K7UAvYOYM2NzPPamjrFEQQLlKJkGqZIDLubp7mkKcU9-z4EkmQUv08vtiUaa-DR"
 self.pass_ticket = "JGV2TdZ4e/WZR+psdtXGodXT4ifF3v8N1g9rwX9ZWdRg+1jBMN8h/vTHcBafpUB7"

 # 连接数据库
 def get_conn(self):

        conn = pymysql.connect(host=self.host, port=self.port, user=self.user, passwd=self.pwd, db=self.db)
        return conn

    def insertMany(self,sql,data):
        conn = self.get_conn()

        try:
            with conn.cursor() as cursor:
                cursor.executemany(sql, data)
                conn.commit()
        except:
            raise
 conn.rollback()
        finally:
            conn.close()


    def main(self):
        # link = "http://mp.weixin.qq.com/s?__biz=MzU4NDE2NjU1Nw==&mid=2247486374&idx=1&sn=cdb4880d09f8ed4639038226e5e9b307&chksm=fd9cb654caeb3f42442b0bb1bf60b239ba909c1c7f83885bb508afbcfc6580617466667745de#rd"
        # self.getMoreInfo(link)
        # 目标url
 url = "https://mp.weixin.qq.com/cgi-bin/appmsg"
 Cookie = self.cookie
        # 使用Cookie,跳过登陆操作
 headers = {
            "Cookie": Cookie,
 "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.114 Safari/537.36",
 }

        token = self.token
        fakeid = self.fakeid




        # type在网页中会是10,但是无法取到对应的消息link地址,改为9就可以了
 type = '9'
 data1 = {"token": token,"lang": "zh_CN","f": "json","ajax": "1","action": "list_ex","begin": "0","count": "5","query": "","fakeid": fakeid,"type": type}

        for i in range(333, 566):
            begin = i * 5
 print("第%s页" % i)
            # 拿一页,存一页
 messageAllInfo = []
            # begin 从0开始,365结束
 data1["begin"] = begin

            # 使用get方法进行提交
 content_json = requests.get(url, headers=headers, params=data1, verify=False).json()

            time.sleep(30)

            # 返回了一个json,里面是每一页的数据
 if "app_msg_list" in content_json:
                for item in content_json["app_msg_list"]:
                    # 提取每页文章的标题及对应的url
                    # url_content = item['link']
                    # readNum, likeNum, comment_count = self.getMoreInfo(url_content)
 update_at = self.getDate(item['update_time'])
                    messageAllInfo.append([self.gongzhonghao_name,item['title'],item['link'],update_at])

            if len(messageAllInfo) == 0:
                exit()

            print(messageAllInfo)



            sql = "insert into gongzhonghao_article(gongzhonghao_name,title,url,update_at) values(%s,%s,%s,%s)"
 self.insertMany(sql, messageAllInfo)
        exit()

    # 毫秒数转日期
 def getMoreInfo(self, link):

        mid = link.split("&")[1].split("=")[1]
        idx = link.split("&")[2].split("=")[1]
        sn = link.split("&")[3].split("=")[1].split("#rd")[0]
        _biz = link.split("&")[0].split("_biz=")[1]




        key = self.key
        appmsg_token = self.appmsg_token
        pass_ticket = self.pass_ticket



        cookies = {
            'appmsg_token': appmsg_token,
 'devicetype': 'iMacMacmini71OSXOSX10.13.6build(17G8030)',
 'lang': 'zh_CN',
 'pass_ticket': pass_ticket,
 'rewardsn': '',
 'version': '12040090',
 'wap_sid2': 'CPemucICEooBeV9IRUxiVWNFQWU0RTdzSUVtcjc1ZFVZSGNDdXdIaXlibFAxV1cyVThVTEQ1bDZtWmxhSzh4aHQ2clotTjRqNTl1SmstTTZMZzRNT0NWU0Y1dThMZExIejBXYjZYNXNxZDc5QnloaVBSY0ZhMFY4WW9USmpWQVRQT2htWG5VQ0lvTUNqSVNBQUF+MMbD2oMGOA1AAQ==',
 'wxtokenkey': '777',
 'wxuin': '676221815',
 'pgv_pvid': '5127348511',
 'pac_uid': '3_136E0CA40E4E2536B7E31D220FFC4179',
 'qq_access_token': '871751B84053CFCF77C78A6A4251B141',
 'qq_client_id': '101487368',
 'qq_openid': '136E0CA40E4E2536B7E31D220FFC4179',
 'xw_main_login': 'qq',
 'ptcz': 'ede1fc286733d680d1f94ebf109ab13d804900f3c677048467d7b932f1e3d6b4',
 'pgv_pvi': '1267529728',
 'RK': 'f6KB1GMyvb',
 'tvfe_boss_uuid': 'cc2e45ce3c203bd3',
 }

        headers = {
            'Host': 'mp.weixin.qq.com',
 'content-type': 'application/x-www-form-urlencoded; charset=UTF-8',
 'accept': '*/*',
 'x-requested-with': 'XMLHttpRequest',
 'accept-language': 'zh-cn',
 'origin': 'https://mp.weixin.qq.com',
 'referer': 'https://mp.weixin.qq.com/s?__biz=MzU4NDE2NjU1Nw==&mid=2247486374&idx=1&sn=cdb4880d09f8ed4639038226e5e9b307&chksm=fd9cb654caeb3f42442b0bb1bf60b239ba909c1c7f83885bb508afbcfc6580617466667745de&scene=0&xtrack=1&key=07b5d4a00ecd0726a581cf7f93847b6c8b4fde5fb9e2844200c5fcd5c1639cc07fc4282d6eb5de0b6a4e2e90c4bcc1a3b9af7e075e8b5486452f3d5749656be8f1573d559c07e3b7962b5c7b5c86dce28799a37b0c2c1c198cf5bb065de74ce80f141288c9a26d754ba07b3a8019d9d01130705f98f71f57a0d46daa28984996&ascene=0&uin=Njc2MjIxODE1&devicetype=iMac+Macmini7%2C1+OSX+OSX+10.13.6+build(17G8030)&version=12040090&nettype=WIFI&lang=zh_CN&fontScale=100&exportkey=A90xPSZsSqZekMz5MhfRwKM%3D&pass_ticket=JGV2TdZ4e%2FWZR%2BpsdtXGodXT4ifF3v8N1g9rwX9ZWdRg%2B1jBMN8h%2FvTHcBafpUB7&wx_header=0&winzoom=1.000000',
 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/605.1.15 (KHTML, like Gecko) MicroMessenger/6.8.0(0x16080000) MacWechat/2.4(0x12040090) Chrome/39.0.2171.95 Safari/537.36 NetType/WIFI WindowsWechat MicroMessenger/6.8.0(0x16080000) MacWechat/2.4(0x12040090) Chrome/39.0.2171.95 Safari/537.36 NetType/WIFI WindowsWechat MicroMessenger/6.8.0(0x16080000) MacWechat/2.4(0x12040090) Chrome/39.0.2171.95 Safari/537.36 NetType/WIFI WindowsWechat MicroMessenger/6.8.0(0x16080000) MacWechat/2.4(0x12040090) Chrome/39.0.2171.95 Safari/537.36 NetType/WIFI WindowsWechat MicroMessenger/6.8.0(0x16080000) MacWechat/2.4(0x12040090) Chrome/39.0.2171.95 Safari/537.36 NetType/WIFI WindowsWechat MicroMessenger/6.8.0(0x16080000) MacWechat/2.4(0x12040090) Chrome/39.0.2171.95 Safari/537.36 NetType/WIFI WindowsWechat MicroMessenger/6.8.0(0x16080000) MacWechat/2.4(0x12040090) Chrome/39.0.2171.95 Safari/537.36 NetType/WIFI WindowsWechat MicroMessenger/6.8.0(0x16080000) MacWechat/2.4(0x12040090) Chrome/39.0.2171.95 Safari/537.36 NetType/WIFI WindowsWechat MicroMessenger/6.8.0(0x16080000) MacWechat/2.4(0x12040090) Chrome/39.0.2171.95 Safari/537.36 NetType/WIFI WindowsWechat MicroMessenger/6.8.0(0x16080000) MacWechat/2.4(0x12040090) Chrome/39.0.2171.95 Safari/537.36 NetType/WIFI WindowsWechat MicroMessenger/6.8.0(0x16080000) MacWechat/2.4(0x12040090) Chrome/39.0.2171.95 Safari/537.36 NetType/WIFI WindowsWechat MicroMessenger/6.8.0(0x16080000) MacWechat/2.4(0x12040090) Chrome/39.0.2171.95 Safari/537.36 NetType/WIFI WindowsWechat',
 }

        params = (
            ('f', ['json', 'json']),
 ('mock', ''),
 ('uin', 'Njc2MjIxODE1'),
 ('key', key),
 ('pass_ticket', pass_ticket),
 ('wxtoken', '777'),
 ('devicetype', 'iMac Macmini7,1 OSX OSX 10.13.6 build(17G8030)'),
 ('clientversion', '12040090'),
 ('__biz', _biz),
 ('mid', mid),
 ('sn', sn),
 ('idx', idx),
 ('appmsg_token', appmsg_token),
 ('x5', '0'),
 )
        data = {
            "is_only_read": "1",
 "is_temp_url": "0",
 "appmsg_type": "9",
 'reward_uin_count': '0'
 }

        # data = 'r=0.09916988404266747&__biz=MjM5MTE2NzAwMA%3D%3D&appmsg_type=9&mid=' + mid + '&sn=' + sn + '&idx=' + str(
        #     idx) + '&scene=0&title=%25E8%2591%25A1%25E8%2590%2584%25E9%2585%2592%25E7%259A%2584%25E2%2580%259C%25E5%258F%25A3%25E6%2584%259F%25E2%2580%259D%25E4%25BB%25A3%25E8%25A1%25A8%25E4%25BB%2580%25E4%25B9%2588&ct=1618276200&abtest_cookie=&devicetype=iMac%20Macmini7%2C1%20OSX%20OSX%2010.13.6%20build(17G8030)&version=12040090&is_need_ticket=0&is_need_ad=0&comment_id=1823375253096415234&is_need_reward=0&both_ad=0&reward_uin_count=0&send_time=&msg_daily_idx=1&is_original=0&is_only_read=1&req_id=1410Z7ACXcNbiarV1j85akXO&pass_ticket=JGV2TdZ4e%2FWZR%2BpsdtXGodXT4ifF3v8N1g9rwX9ZWdRg%2B1jBMN8h%2FvTHcBafpUB7&is_temp_url=0&item_show_type=0&tmp_version=1&more_read_type=0&appmsg_like_type=2&related_video_sn=&related_video_num=5&vid=&is_pay_subscribe=0&pay_subscribe_uin_count=0&has_red_packet_cover=0&album_id=1296223588617486300&album_video_num=5&cur_album_id=undefined&is_public_related_video=undefined&encode_info_by_base64=undefined'
        #
        #
        # data = 'r=0.11733690180429401&__biz=MzU4NDE2NjU1Nw%3D%3D&appmsg_type=9&mid=2247486374&sn=cdb4880d09f8ed4639038226e5e9b307&idx=1&scene=0&title=%25E7%25AD%2589%25E4%25B8%2580%25E5%259C%25BA%25E6%259D%258F%25E8%258A%25B1%25E9%259B%25A8%25EF%25BC%258C%25E7%2595%2599%25E4%25BD%258F%25E6%2598%25A5%25E8%2589%25B2&ct=1617498000&abtest_cookie=&devicetype=iMac%20Macmini7%2C1%20OSX%20OSX%2010.13.6%20build(17G8030)&version=12040090&is_need_ticket=0&is_need_ad=0&comment_id=1808897538612084738&is_need_reward=0&both_ad=0&reward_uin_count=0&send_time=&msg_daily_idx=1&is_original=0&is_only_read=1&req_id=1416S0XpiwCqacyhkVyARCGp&pass_ticket=JGV2TdZ4e%2FWZR%2BpsdtXGodXT4ifF3v8N1g9rwX9ZWdRg%2B1jBMN8h%2FvTHcBafpUB7&is_temp_url=0&item_show_type=0&tmp_version=1&more_read_type=0&appmsg_like_type=2&related_video_sn=&related_video_num=5&vid=&is_pay_subscribe=0&pay_subscribe_uin_count=0&has_red_packet_cover=0&album_id=1296223588617486300&album_video_num=5&cur_album_id=undefined&is_public_related_video=undefined&encode_info_by_base64=undefined'

 content = requests.post('https://mp.weixin.qq.com/mp/getappmsgext', headers=headers, params=params, data=data, cookies=cookies).json()

        try:
            readNum = content["appmsgstat"]["read_num"]

        except:
            readNum = 0
 try:
            likeNum = content["appmsgstat"]["old_like_num"]

        except:
            likeNum = 0
 try:
            comment_count = content['comment_count']
        except:
            comment_count = 0

 # 歇3s,防止被封
 time.sleep(3)
        return readNum, likeNum, comment_count



    # 毫秒数转日期
 def getDate(self,times):
        # print(times)
 timearr = time.localtime(times)
        date = time.strftime("%Y-%m-%d %H:%M:%S", timearr)
        return date


if __name__ == '__main__':

    gongzhonghao = gongzhonghao()
    gongzhonghao.main()
    # baidu.newToOld(4)
作者:xTao 分类:LNMP 浏览:2493 评论:0