CSDN 论坛没法像课程里面的方式操作了
来源:14-7 分析和获取所有的版块 - 1

qq_呵呵_47
2021-08-14
按照Bobby老师的教程,先分析整个网站的数据的结构,最终发现与教程出入很大。
原因是它现在里面的api接口做了x-ca-none和x-ca-signature 签名计算。
写回答
1回答
-
qq_呵呵_47
提问者
2021-08-15
哎!!!找到答案了
从这里开始不能完全根据课程操作了:下面是x-ca-none和x-ca-signature 签名计算
import json import random import requests import re from collections import defaultdict from urllib.parse import urlparse, parse_qs, parse_qsl import hmac import base64 from hashlib import sha256 # 16进制对应字符0-9 A-F base_16 = [str(x) for x in range(10)] + [chr(x) for x in range(ord('A'), ord('A') + 6)] app_secret = 'bK9jk5dBEtjauy6gXL7vZCPJ1fOy076H' def csdn_request(url, method='GET', params=None): headers = { 'accept': 'application/json, text/plain, */*', 'x-ca-key': '203899271', 'x-ca-nonce': x_ca_nonce(), } headers['x-ca-signature'] = x_ca_signature(url, method=method, params=params, headers=headers) headers['x-ca-signature-headers'] = 'x-ca-key,x-ca-nonce' res = requests.get(url, params=params, headers=headers) try: res = res.json() if res['code'] == 200: return res['data'] else: print(res['message']) return None except json.decoder.JSONDecodeError: return None # 生成x-ca-nonce def x_ca_nonce(ca=None): if ca is None: ca = "xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx" ca = re.sub('(?P<value>[xy])', x_ca_nonce_replace, ca) return ca # ca替换字符 def x_ca_nonce_replace(matched): value = matched.groups('value') num = int(16 * random.random()) | 0 if value[0] == 'x': return dec2hex(num) else: return dec2hex(3 & num | 8) # 十进制转16进制 def dec2hex(num): mid = [] while True: if num == 0: break num, rem = divmod(num, 16) mid.append(base_16[rem]) return ''.join([str(x) for x in mid[::-1]]) # 生成x-ca-signature def x_ca_signature(url, method='GET', params=None, headers={}): method = method.upper() accept = headers.get('accept', '*/*') date = headers.get('date', '') content_type = headers.get('content-type', '') origin_ca = '' if params or url.find('?') == -1: params = params or {} else: params = parse_url(url) url = url.split('?')[0] origin_ca += (method + '\n') origin_ca += (accept + '\n') origin_ca += ("" + '\n') origin_ca += (content_type + '\n') origin_ca += (date + '\n') headers = parse_headers(headers) for key, value in headers.items(): origin_ca += (key + ':' + value + '\n') origin_ca += rewrite_url(url, params=params) signature = base64.b64encode( hmac.new(app_secret.encode('utf8'), origin_ca.encode('utf8'), digestmod=sha256).digest()) return signature.decode('utf8') # url 解析参数 def parse_url(url): params = result = parse_qsl(urlparse(url).query) result = defaultdict(dict) for key, value in params: if result[key]: if type(result[key]) is not list: result[key] = [result[key]] result[key].append(value) else: result[key].append(value) else: result[key] = value return result # headers解析 def parse_headers(headers): headers = headers or {} p_headers = {} for key, value in headers.items(): key = key.lower() if re.match('^x-ca-', key) is not None: p_headers[key] = value return p_headers def rewrite_url(url, params={}): url = re.sub('^(?=^.{3,255}$)(http(s)?:\/\/)?(www\.)?[a-zA-Z0-9][-a-zA-Z0-9]{0,62}(\.csdn\.net)', '', url) return url + parse_obj_to_str(params=params) def parse_obj_to_str(params={}): result_params = "" keys = [key for key, value in params.items()] keys.sort() for key in keys: value = params[key] if type(params[key]) is list: value = ",".join('%s' % _v for _v in value) elif value is True: value = 'true' elif value is False: value = 'false' result_params += (key + '=' + str(value) + '&') result_params = result_params[:-1] if result_params: return '?' + result_params return result_params
bobby老师看到的话
麻烦看看什么地方需要完善的吗
012021-08-16
相似问题