1 #!/usr/bin/env python 2 # -*- coding:utf-8 -*- 3 # Author:woshinidaye 4 5 #抓取网易云歌曲的热评,为了简单,不要登录了 6 #1、找到未加密的参数 7 #2、想办法把参数进行加密,加密逻辑与网易一致,params,encSecKey 8 #3、请求,拿去数据 9 #加密 10 # var 11 # bUM2x = window.asrsea(JSON.stringify(i6c), bsG7z(["流泪", "强"]), bsG7z(WW3x.md), bsG7z(["爱心", "女孩", "惊恐", "大笑"])); 12 # e6c.data = j6d.cs6m({ 13 # params: bUM2x.encText, 14 # encSecKey: bUM2x.encSecKey 15 # }) 16 17 import requests,re,json,base64 18 from lxml import html 19 from Crypto.Cipher import AES #pip install pycryptodome 20 etree = html.etree 21 22 url = "https://music.163.com/weapi/comment/resource/comments/get?csrf_token=" 23 #请求方式 24 e = '010001' 25 f = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7' 26 g = "0CoJUm6Qyw8W8jud" 27 i = 'hjbiwTejTo17235R' 28 def get_encSecKey(): 29 return '6c11f64c829ec94df8ce7d711932c15c4c6e46daf00674f0f22dc1170ba68e809047ee5a7e12c3e07d8c1c3f66b76e4518201b1d4679bd1659a747856f16ac17c32286fba6a82034fa2597004dcca90ca9bfce49bd1a85d09fac162d7b40b390fe8d4c4be15bcc65788d0002fdbd91fb529a71d4d42aa702170fd8e92f1ed87e' 30 def to_16 (data): 31 pad = 16 -len(data)%16 32 data = data + chr(pad)*pad 33 return data 34 def enc_params(data,key): 35 iv = '0102030405060708' 36 data = to_16(data) 37 aes = AES.new(key=key.encode('utf-8'),IV=iv.encode('utf-8'),mode=AES.MODE_CBC) 38 bs = aes.encrypt(data.encode('utf-8')) 39 return str(base64.b64encode(bs),'utf-8') #返回params 40 def get_params(data): #默认data是字符串 41 first = enc_params(data,g) 42 second = enc_params(first,i) 43 return second 44 45 46 # "c6aaef7d7fe54edc416de03808f94c8de2590f943d4f334d8bc485e53f00b95acdfbe704330a01d81bfe666c00b5d681321ab4b04147d0ba1683877e4350b1310e3ad67465ffa1dc9ea57b9d682f1efffbe14ad734a9454faf8e28464491542226109de2fdce6751b63426bd3b18543108c5076ef2b8eab03358ea7a88ce90e9" 47 data = { 48 'csrf_token': "", 49 'cursor': '-1', 50 'offset': '0', 51 'orderType': '1', 52 'pageNo': '1', 53 'pageSize': '20', 54 'rid': "R_SO_4_1881521546", 55 'threadId': "R_SO_4_1881521546" 56 } 57 #加密方式 58 ''' 59 function a(a) { #随机产生16位字符串 60 var d, e, b = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789", c = ""; 61 for (d = 0; a > d; d += 1) #循环16次 62 e = Math.random() * b.length, #随机数 63 e = Math.floor(e), #取整 64 c += b.charAt(e); #取字符串中的xxxx位置 65 return c 66 } 67 function b(a, b) { #a是要加密的数据 68 var c = CryptoJS.enc.Utf8.parse(b) 69 , d = CryptoJS.enc.Utf8.parse("0102030405060708") 70 , e = CryptoJS.enc.Utf8.parse(a) 71 , f = CryptoJS.AES.encrypt(e, c, { 72 iv: d, #偏移量 73 mode: CryptoJS.mode.CBC #模式CBC 74 }); 75 return f.toString() 76 } 77 function c(a, b, c) { 78 var d, e; 79 return setMaxDigits(131), 80 d = new RSAKeyPair(b,"",c), 81 e = encryptedString(d, a) 82 } 83 function d(d, e, f, g) { #d:data e:010001 f:bsG7z(WW3x.md) g:bsG7z(["爱心", "女孩", "惊恐", "大笑"]) 84 var h = {} 85 , i = a(16); #i就是16位随机字符 86 return h.encText = b(d, g), 87 h.encText = b(h.encText, i), #得到params, 做了两次加密,第一次 data+g 88 h.encSecKey = c(i, e, f), #得到encSecKey 89 h 90 } 91 ''' 92 # var bUM2x = window.asrsea(JSON.stringify(i6c), bsG7z(["流泪", "强"]), bsG7z(WW3x.md), bsG7z(["爱心", "女孩", "惊恐", "大笑"])); 93 ''' 94 u6o.be6Y = function(Y6S, e6c) { 95 var i6c = {} 96 , e6c = NEJ.X({}, e6c) 97 , mo0x = Y6S.indexOf("?"); 98 if (window.GEnc && /(^|\.com)\/api/.test(Y6S) && !(e6c.headers && e6c.headers[eu7n.AI4M] == eu7n.FD6x) && !e6c.noEnc) { 99 if (mo0x != -1) { 100 i6c = j6d.gW8O(Y6S.substring(mo0x + 1)); 101 Y6S = Y6S.substring(0, mo0x) 102 } 103 if (e6c.query) { 104 i6c = NEJ.X(i6c, j6d.fT8L(e6c.query) ? j6d.gW8O(e6c.query) : e6c.query) 105 } 106 if (e6c.data) { 107 i6c = NEJ.X(i6c, j6d.fT8L(e6c.data) ? j6d.gW8O(e6c.data) : e6c.data) 108 } 109 i6c["csrf_token"] = u6o.gQ8I("__csrf"); 110 Y6S = Y6S.replace("api", "weapi"); 111 e6c.method = "post"; 112 delete e6c.query; 113 var bUM2x = window.asrsea(JSON.stringify(i6c), bsG7z(["流泪", "强"]), bsG7z(WW3x.md), bsG7z(["爱心", "女孩", "惊恐", "大笑"])); 114 e6c.data = j6d.cs6m({ 115 params: bUM2x.encText, 116 encSecKey: bUM2x.encSecKey 117 }) 118 } 119 var cdnHost = "y.music.163.com"; 120 var apiHost = "interface.music.163.com"; 121 if (location.host === cdnHost) { 122 Y6S = Y6S.replace(cdnHost, apiHost); 123 if (Y6S.match(/^\/(we)?api/)) { 124 Y6S = "//" + apiHost + Y6S 125 } 126 e6c.cookie = true 127 } 128 cxg5l(Y6S, e6c) 129 ''' 130 131 resp = requests.post(url,data={ 132 'params': get_params(json.dumps(data)), 133 "encSecKey":get_encSecKey() 134 }) 135 print(resp.text) 136 137 138 #上面是获取某一首歌的评论,变量主要在data里面,更换歌曲ID,可以通过页面查找获取 139 ''' 140 url = 'https://music.163.com/playlist?id=6920064959' 141 resp = requests.get(url=url,headers=headers) 142 resp.encoding = 'utf-8' 143 # print(resp.text) 144 # 用RE 145 # obj = re.compile(r'<li><a href="/(?P<song_id>.*?)">(?P<song_title>.*?)</a></li>',re.S) 146 # songs = obj.finditer(resp.text) 147 # for my_list in songs: 148 # aa = my_list.group('song_id').split('=')[-1] 149 # print(aa,'\t',my_list.group('song_title')) 150 151 #用Xpath 152 # etree = html.etree 153 # # print(resp.text) 154 # html = etree.HTML(resp.text) 155 # test = html.xpath('//html/body/div[3]/div[1]/div/div/div[2]/div[2]//a/@href') 156 # #这个地方试了好久,写全的话取不出来,感觉是跟页面有嵌套有关系 157 # print(test) 158 159 #用bs4 160 # from bs4 import BeautifulSoup 161 # html = BeautifulSoup(resp.text,'html.parser') 162 # test = html.find('ul',class_='f-hide').find_all('a') 163 # print(test) 164 '''