使用同一个ip频繁爬取一个网站,久了之后会被该网站的服务器屏蔽。所以这个时候需要使用代理服务器。通过ip欺骗的方式去爬取网站
from urllib import request # 也可以from urllib.request as request def use_porxy(porxy_addr,url): porxy = request.ProxyHandler({'http':porxy_addr}) opener = request.build_opener(porxy, request.ProxyHandler) request.install_opener(opener) try: response = request.urlopen(url, timeout=5) except Exception as e: print('交接错误,退出程序') exit() data = response.read().decode('utf-8') return data data = use_porxy("114.115.182.59:128","http://www.baidu.com") print(len(data)