百度:
import requests from lxml import etree Search_term = input('请输入需要搜索的关键词') page = input('请输入要爬取多少页(一页30章图片):') page = int(page) + 1 header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36' } n = 0 pn = 1 # pn是从第几张图片获取 百度图片下滑时默认一次性显示30张 for m in range(1, page): url = 'https://image.baidu.com/search/acjson?' param = { 'tn': 'resultjson_com', 'logid': '8846269338939606587', 'ipn': 'rj', 'ct': '201326592', 'is': '', 'fp': 'result', 'queryWord': Search_term, 'cl': '2', 'lm': '-1', 'ie': 'utf-8', 'oe': 'utf-8', 'adpicid': '', 'st': '-1', 'z': '', 'ic': '', 'hd': '', 'latest': '', 'copyright': '', 'word': Search_term, 's': '', 'se': '', 'tab': '', 'width': '', 'height': '', 'face': '0', 'istype': '2', 'qc': '', 'nc': '1', 'fr': '', 'expermode': '', 'force': '', 'cg': 'girl', 'pn': pn, # 从第几张图片开始 'rn': '30', 'gsm': '1e', } page_text = requests.get(url=url, headers=header, params=param) page_text.encoding = 'utf-8' page_text = page_text.json() info_list = page_text['data'] del info_list[-1] img_path_list = [] for i in info_list: img_path_list.append(i['thumbURL']) for img_path in img_path_list: img_data = requests.get(url=img_path, headers=header).content img_path = './' + str(n) + '.jpg' with open(img_path, 'wb') as fp: fp.write(img_data) n = n + 1 pn += 29 必应:
import requests,lxml from lxml import etree header = { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36' } param = {'q': '狗', 'first': '36', #开始页码 'count': '35', 'cw': '1177', 'ch': '500', 'relp': '35', 'tsc': 'ImageHoverTitle', 'datsrc': 'I', 'layout': 'RowBased_Landscape', 'mmasync': '1', 'dgState': 'x * 288_y * 960_h * 187_c * 1_i * 106_r * 20', # 'IG': 'A26F88CB79FA45C2994C9ED8A20099E4', 'SFX': '4', 'iid': 'images.5559'} url = 'https://cn.bing.com/images/search?q=%e7%8c%ab&form=HDRSC2&first=1&tsc=ImageHoverTitle' html = requests.get(url=url,headers=header,params=param) html.encoding = 'utf-8' page_text = html.text # print(page_text) page = etree.HTML(page_text) page_texts = page.xpath('//*[@id="mmComponent_images_5559_4_1"]/ul/li/div/div[1]/a/div/img//@src')# print(len(page_texts)) for url in page_texts: print(url) 360: 搜狗: