python爬取微信公众号历史消息.md 2.0 KB

准备:

  • 公众号历史消息的请求地址
  • cookie

2种方法获取地址和cookie

  1. 用浏览器打开公众号历史消息页面,使用开发工具获取请求地址和cookie
  2. 用抓包工具获取请求地址和cookie,比如 Fidder、charles

源码如下:

# -*- coding: utf-8 -*-  
import requests  
import jsonpath  
import json  
   
headers = {  
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36",  
    "Host": "mp.weixin.qq.com",  
    "Referer": "https://mp.weixin.qq.com",  
	# 设置好cookie  
    "Cookie": "RK=c+zMAuktP8; ptcz=005f33a36542502454b119382853de0d9ea6aa693367c6ae312a1c34c0dcfebe; pgv_pvi=4737076224; ptui_loginuin=1254428526; pgv_pvid=3106019708; wxuin=1411706915; devicetype=Windows7; version=62070152; lang=zh_CN; pass_ticket=FyE/xFBG3nyqQokgb6OoN9VFXaZVJPK53op9NWOsmqB2HZm8CUhy5Hz9+fgVo+PA; wap_sid2=CKPgk6EFElxuY2VlWndUVWJjT1d2YnVzcXMxTk4xcldfQ3hVQUYzUnB1LTVZTDlyQkVCb2ZPZHQ2S3hXbUdsMEJ0VkNVdDBZUmJXUC0wb0ZUb1N6U0JVSlYybHdGZ29FQUFBfjCJsKjuBTgNQJVO"  
           }  
   
for i in range(10):  
    # 设置请求地址  
	url = "https://mp.weixin.qq.com/mp/profile_ext?action=getmsg&__biz=MjM5Mjg3MTIzMQ==&f=json&offset={}&count=10&is_ok=1&scene=124&uin=777&key=777&pass_ticket=&wxtoken=&appmsg_token=1034_N22Qb3TiIEjqcdGLa-1KO9dkAZgO1e2zBcGB5w~~&x5=0&f=json".format(str(i * 10))  
	  
	response = requests.get(url, headers = headers)  
   
	res = response.json()  
	  
	# 此处要根据具体的json结构进行解析  
	jsonRes = json.loads(res['general_msg_list'])  
	titleList = jsonpath.jsonpath(jsonRes, "$..title")  
	urlList = jsonpath.jsonpath(jsonRes, "$..content_url")  
  
	  
	# 遍历 构造可存储字符串·  
	for index in range(len(titleList)):  
		title = titleList[index]  
		url = urlList[index]  
   
		scvStr = "%s,%s,\n" % (title, url)  
		with open("info.csv", "a+", encoding="gbk", newline='') as f:  
			f.write(scvStr)