这是我们python课程要求我们制作一个项目,用python爬取结果并作数据展示。
我们使用requests的方法对房价的信息做了爬取,一下就是我们所爬取的网页
我们主要爬取的内容包括了房价的走势,上月的价格,本月的价格,和历史最高的价格和涨幅,等信息做了爬取并用matplotlib 画出了一个折线图并将其保存下来
import matplotlib.pyplot as plt import datetime import requests import pinyin import re import os def oneyear_m(): x = [] y = [] with open("zoushi.txt", 'r', encoding='utf-8') as data1: for line in data1.read().split("\n"): data1_line = line.split(":") x.append(data1_line[0][5:]) y.append(int(data1_line[1])) plt.figure(figsize=(28, 10)) plt.title('一年变化图') # 折线图标题 plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示汉字 plt.xlabel('时间') # x轴标题 plt.ylabel('价格 (元/㎡)') # y轴标题 plt.plot(x, y, marker='o', markersize=5) # 绘制折线图,添加数据点,设置点的大小 for a, b in zip(x, y): plt.text(a, b, b, ha='center', va='bottom', fontsize=10) # 设置数据标签位置及大小 plt.legend(['走势']) # 设置折线名称 plt.savefig('一年变化图.jpg') plt.show() def paint(x, y,flag): # 小区上月价格折线图 plt.figure(figsize=(10, 5)) plt.title(flag) # 折线图标题 plt.rcParams['font.sans-serif'] = ['SimHei'] # 显示汉字 plt.xlabel('时间') # x轴标题 plt.ylabel('价格 (元/㎡)') # y轴标题 plt.plot(x, y, marker='o', markersize=5) # 绘制折线图,添加数据点,设置点的大小 for a, b in zip(x, y): plt.text(a, b, b, ha='center', va='bottom', fontsize=10) # 设置数据标签位置及大小 plt.legend(['方案']) # 设置折线名称 plt.savefig(flag+'.jpg') plt.show() def getdata_txt(txt): name = [] lastmon = [] nowmon = [] history_max = [] change = [] for line in txt.read().split("\n"): lines = line.split(" ") name.append(lines[0]) lastmon.append(int(lines[1])) nowmon.append(int(lines[2])) history_max.append(int(lines[3])) front = int(lines[1]) end = int(lines[2]) if front > end: temp = front - end change.append(-round(float(temp / front), 4)) else: temp = end - front change.append(round(float(temp / front), 4)) return name, lastmon, nowmon, history_max, change def main(): name = [] lastmon = [] nowmon = [] history_max = [] change = [] txt = open("data_up.txt", "r", encoding='utf-8') name, lastmon, nowmon, history_max, change = getdata_txt(txt) txt = open("data_down.txt", "r", encoding='utf-8') name, lastmon, nowmon, history_max, change = getdata_txt(txt) paint(name, lastmon,"上月房价图") paint(name, nowmon,"本月房价图") paint(name, history_max,"历史最高分布图") paint(name, change,"增率变化图") def get_first(s): # 通过pinyin.get()拿到一个汉字的拼音,利用切片拿到首个字母 # 原因:url中间某个字段表示该网页是哪个城市 ans = '' for i in s: if i == '重': ans = ans + 'c' else: ans = ans + pinyin.get(i)[0] return ans def get_really_time(time): your_dt = datetime.datetime.fromtimestamp(int(time) / 1000) return your_dt.strftime("%Y-%m-%d") class reptile: def __init__(self): self.__city = '天津' self.__header = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.55 Safari/537.36 Edg/96.0.1054.43' } def up_data(self, city): if city != '': self.__city = city else: print('没有得到新的城市名。') def write_in(self, data, *, fileName='', title='', time=False): # 数据写入 flag = False with open(fileName, 'w', encoding='utf-8') as fp: if not title == '': fp.write(title + '\n') if time: for i, j in data: if flag: fp.write('\n') else: flag = True fp.write(str(get_really_time(i)) + ':') fp.write(str(j)) else: for i, j in data.items(): if flag: fp.write('\n') else: flag = True fp.write(i + ' ') for k in j: fp.write(k + ' ') def show_all(self): oneyear_m() main() def get_photo_data(self): # 获取目标城市的总体价格走势图的数据 url = 'http://' + get_first(self.__city) + '.fangjia.com/trend/yearData?' param = { 'defaultCityName': self.__city, 'districtName': '', 'region': '', 'block': '', 'keyword': '' } res = requests.get(url=url, params=param, headers=self.__header).json() data = res['series'] d = data[0]['data'] # 文件写入 self.write_in(d, fileName='zoushi.txt', time=True) def get_which(self, choose='up'): url = 'http://' + get_first(self.__city) + '.fangjia.com/zoushi' page_txt = requests.get(url=url, headers=self.__header).text if choose == 'up': ex = '<div class="trend trend03">.*?<tbody>(.*?)<tbody>' else: ex = '<div class="trend trend03" style="border-bottom:none;">.*?<tbody>(.*?)</tbody>' url_list = str(re.findall(ex, page_txt, re.S)[0]) ex = '<tr class=".*?">(.*?)</tr>' all = str(re.findall(ex, url_list, re.S)) ex_name = '<td class="td02"><a href=".*?">(.*?)</a></td>' ex_data = '<td>(.*?)</td>' need_name = re.findall(ex_name, all, re.S) need_data = re.findall(ex_data, all, re.S) need_data = [i for i in need_data if not i == '元/㎡' and not i == '周度'] d = {} i = 1 for house_name in need_name: d[house_name] = need_data[4 * (i - 1):4 * i] i += 1 self.write_in(d, fileName='data_' + choose + '.txt') if __name__ == '__main__': # 程序入口 a = reptile() postion = input("请输入城市\n") print(get_first(postion)) if not os.path.exists (postion): os.mkdir(postion) os.chdir(postion) a.up_data(postion) a.get_which() a.get_which('down') a.get_photo_data() a.show_all()