import requests import json import my_fake_useragent import threading def getHTMLText(url): try: headers = {"user-agent": my_fake_useragent.UserAgent().random()} r = requests.get(url, headers=headers) r.raise_for_status() # r.encoding = r.apparent_encoding #这里自动识别编码方式会乱码,注释掉就行了 return r.text except: #print("getHTMLText失败!") #return "" pass def fillList(music_id, url, commentlist): new_url = url + "{0}".format(music_id) #print("new_url======>", new_url) html = getHTMLText(new_url) #print("html======>", html) try: json_dict = json.loads(html) # 利用json方法把json类型转成dict #print(json_dict) # likedCount 点赞数量 likedCount = json_dict['hotComments'][0]['likedCount'] # print(type(likedCount)) #print("点赞数======>",likedCount) # 如果点赞数大于30万 if likedCount > 300000: comments = json_dict['hotComments'] # print("comments======>",comments) # print("type======>",type(comments)) # print("type======>",len(comments)) # commentlist.append([item['user']['nickname'], item['content']]) m = "点赞数===>" + str(likedCount) + " 歌曲ID===>" + str(music_id) + " 用户===>" + comments[0]['user']['nickname'] + " 评论===>" + comments[0][ 'content'] print(m) save(m, "./网抑云.txt") except: pass def save(m, path): with open(path, 'a', encoding='utf-8') as f: f.write(m + "\n") f.close() def main(): mutex = threading.Lock() mutex.acquire() i = 254574 mutex.release() #新歌的ID 1882041535 18亿 while i <= 2000000000: music_id = str(i) url = "http://music.163.com/api/v1/resource/comments/R_SO_4_" commentlist = [] fillList(music_id, url, commentlist) i = i + 1 #print("url_id======>",music_id) def thread(): for i in range(6): t = threading.Thread(target=main()) t.start() main_thread = threading.currentThread() for t in threading.enumerate(): if t is main_thread: continue t.join() thread()
参考:https://blog.csdn.net/weixin_43881394/article/details/109240813