简单爬取网页内容
# encoding = 'utf-8' import urllib.request import urllib.parse #解析 # Get请求 # response = urllib.request.urlopen("http://www.baidu.com") # print(response.read().decode('utf-8')) ''' # Post data = bytes(urllib.parse.urlencode({"User":"Password"}), encoding = 'utf-8') response = urllib.request.urlopen("http://httpbin.org/post", data = data) print(response.read().decode("utf-8")) ''' # try : # response = urllib.request.urlopen("http://httpbin.org/get", timeout = 3) # print(response.read().decode("utf-8")) # except urllib.error.URLError as e : # print("timeout") # 超时处理 # 响应头 # response = urllib.request.urlopen("http://www.baidu.com", timeout = 3) # print(response.getheader("Server")) # 状态码 200 404 # 伪装浏览器 ''' # url = "http://www.douban.com" url = "http://httpbin.org/post" headers = { "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36" } data = bytes(urllib.parse.urlencode({"name":"eric"}), encoding = 'utf-8') req = urllib.request.Request(url = url, data = data, headers = headers, method = "POST") response = urllib.request.urlopen(req) print(response.read().decode('utf-8')) ''' url = "http://81.68.109.40:30004/" headers = { "User-Agent" : "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.61 Safari/537.36" } req = urllib.request.Request(url = url, headers = headers) response = urllib.request.urlopen(req) print(response.read().decode("utf-8"))