Python教程

python期末大作业-图形化爬虫

本文主要是介绍python期末大作业-图形化爬虫,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
  1. 爬虫部分
  2. 数据库部分
  3. ui部分
  4. 文件系统部分
  5. 数据简单分析部分

结果演示:

上述展示的是主页面,当点击提交的时候会进行线程的开启来进行数据的爬取,当然,在点击的时候也会有简单的检验功能,url的填写要注意格式:当写完完整的url之后有一个空格,再写要创建的表的表名。下面的两列为xpath,和其对应的表的列名。

        当点击查看数据库的时候,会弹出第二个图形化,有一个下来菜单来让我们选择要查看的表,数据会在下面的表格当中显示,并且可以通过在文本框当中写入查询的sql语句来实现部分数据的查询。

 当点击保存到本地文件系统的按钮时:

会弹出下面的页面:然后点击File,穿件自己的文件的地址和名称,在右侧的只读文本框当中显示其路径,然后也要写要保存到本地的文件的sql语句,最终会将sql查询的结果显示在下面的文本框当中,也会保存当对应的文件当中。

 当点击数据分析按钮的时候:会在pychrom当中显示一些数据,当然,这些数据是之前在测试的时候从淘宝爬取的一些品类的商品,然后通过读取数据库,从中的到各个品种的平均值,通过python图形化展示来将其展示出来。

        

 至此,该项目的各个模块功能阐述完毕,具体实现细节在下方的代码当中,如果有什么问题,我们可以一起探讨。谢谢各位的支持。

 

整个文件的部署:

 

爬虫部分:

        是通过selnium无头浏览器和xpath结合起来确定数据的,和我们平时的爬虫差不多,但是最终要将数据存放到数据库当中,当然在数据库的部分会通过ui界面的输入,将表名和字段传入,然后动态的进行SQL语句的拼接来实现对数据的安全存储。

代码:script.py文件当中的结果:

from selenium.webdriver.chrome.options import Options
from selenium import webdriver
# from bbigwork import ThreadUi
from lxml import etree
import pymysql

connection = pymysql.connect(user='whp', password='wms111', database='python', charset='utf8',
                             port=3306, host='localhost')
cur = connection.cursor()

def share_brower():
    chrome_options = Options()
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--disable-gpu')
    path='C:\Program Files (x86)\Google\Chrome\Application\chrome.exe'
    chrome_options.binary_location = path
    brower = webdriver.Chrome(chrome_options=chrome_options)
    return brower


def paMain(url, xpath):
    brower=share_brower()
    brower.get(url)
    # 等待一会儿解析
    brower.implicitly_wait(10)
    # 浏览器页面下滑
    js = 'document.documentElement.scrollTop=100000'
    brower.execute_script(js)

    page = brower.page_source
    tree = etree.HTML(page)
    news = []
    for i in xpath:
        news.append(tree.xpath(i))
    brower.quit()
    print(news)
    return news


# 表的字段全部都是varchar
def creaetTable(tableName, column, xpath):
    news = paMain(tableName.split(" ")[0], xpath)
    min = 1000  # 最大的行数
    for i in news:
        if min > len(i):
            min = len(i)
    if min == 0:
        cur.close()
        connection.close()
        return
    # 创建表
    print("最小的行数是"+str(min))
    url =  tableName.split(" ")[0]
    tableName= tableName.split(" ")[1]
    print(url+"   "+tableName)
    cur.execute('drop table if exists `%s`'% tableName)
    sql = 'create table %s ('% tableName
    for i in range(len(column)):
        sql += ('`'+column[i]+'`')
        sql += ' varchar(150),'
    sql = sql[:-1]
    sql += ');'
    print(sql)
    print("表创建成功")
    cur.execute(sql)
    connection.commit()

    for i in range(min):
        sql = 'insert into `%s` values('%tableName
        for j in range(len(news)):
            sql += '"%s",' % news[j][i]
        sql = sql[:-1]+');'
        print(sql)
        cur.execute(sql)
    connection.commit()
    print("数据插入成功")

多线程爬取:ThreadSrc.py

        多线程实现对数据的爬取,并且伴有图形化:

        

import threading
from bbigwork import script
import wx
import pymysql

class myThread(threading.Thread):
    def __init__(self, tableName, column, xpath):
        threading.Thread.__init__(self)
        self.tableName = tableName
        self.column = column
        self.xpath = xpath

    def run(self) -> None:
        try:
            script.creaetTable(self.tableName, self.column, self.xpath)
            dlg = wx.MessageDialog(None, u"爬取成功!!恭喜", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
        except RuntimeError:
            dlg = wx.MessageDialog(None, u"爬取失败,请重新检查,或者输入新的url", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()

图形化:借助wxFormBuiler来实现的:ThreadUi.py

代码也与其他模块有接触:

import wx
import wx.xrc
from bbigwork import SqlUi
import pymysql
from bbigwork import ThreadScr, fileSave, analyse, script

# 最开始的数据库连接
connection = pymysql.connect(user='whp', password='wms111', database='python', charset='utf8',
                             port=3306, host='localhost')
cur = connection.cursor()


# 局限,只能爬取text的文件
class MyFrame2(wx.Frame):
    def __init__(self, parent):
        wx.Frame.__init__(self, parent, id=wx.ID_ANY, title=wx.EmptyString, pos=wx.DefaultPosition,
                          size=wx.Size(751, 486), style=wx.DEFAULT_FRAME_STYLE | wx.TAB_TRAVERSAL)

        self.SetSizeHintsSz(wx.DefaultSize, wx.DefaultSize)
        # self.SetSizeHintsSz(-1, -1)
        bSizer1 = wx.BoxSizer(wx.VERTICAL)

        gSizer1 = wx.GridSizer(6, 3, 0, 0)

        self.m_staticText5 = wx.StaticText(self, wx.ID_ANY, u"网站url(1)", wx.DefaultPosition, wx.DefaultSize, 0)
        self.m_staticText5.Wrap(-1)
        gSizer1.Add(self.m_staticText5, 0, wx.ALL, 5)

        self.m_textCtrl5 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(210, 30), 0)
        gSizer1.Add(self.m_textCtrl5, 0, wx.ALL, 5)

        self.m_button61 = wx.Button(self, wx.ID_ANY, u"提交", wx.DefaultPosition, wx.DefaultSize, 0)
        gSizer1.Add(self.m_button61, 0, wx.ALL, 5)

        self.m_staticText7 = wx.StaticText(self, wx.ID_ANY, u"xpath1和列名", wx.DefaultPosition, wx.DefaultSize, 0)
        self.m_staticText7.Wrap(-1)
        gSizer1.Add(self.m_staticText7, 0, wx.ALL, 5)

        self.m_textCtrl7 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(150, -1), 0)
        gSizer1.Add(self.m_textCtrl7, 0, wx.ALL, 5)

        self.m_textCtrl8 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(60, -1), 0)
        gSizer1.Add(self.m_textCtrl8, 0, wx.ALL, 5)

        self.m_staticText8 = wx.StaticText(self, wx.ID_ANY, u"xpath2和列名", wx.DefaultPosition, wx.DefaultSize, 0)
        self.m_staticText8.Wrap(-1)
        gSizer1.Add(self.m_staticText8, 0, wx.ALL, 5)

        self.m_textCtrl9 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(150, -1), 0)
        gSizer1.Add(self.m_textCtrl9, 0, wx.ALL, 5)

        self.m_textCtrl10 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(60, -1), 0)
        gSizer1.Add(self.m_textCtrl10, 0, wx.ALL, 5)

        self.m_staticText9 = wx.StaticText(self, wx.ID_ANY, u"xpath3和列名", wx.DefaultPosition, wx.DefaultSize, 0)
        self.m_staticText9.Wrap(-1)
        gSizer1.Add(self.m_staticText9, 0, wx.ALL, 5)

        self.m_textCtrl11 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(150, -1), 0)
        gSizer1.Add(self.m_textCtrl11, 0, wx.ALL, 5)

        self.m_textCtrl12 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(60, -1), 0)
        gSizer1.Add(self.m_textCtrl12, 0, wx.ALL, 5)

        self.m_staticText10 = wx.StaticText(self, wx.ID_ANY, u"xpath4和列名", wx.DefaultPosition, wx.DefaultSize, 0)
        self.m_staticText10.Wrap(-1)
        gSizer1.Add(self.m_staticText10, 0, wx.ALL, 5)

        self.m_textCtrl13 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(150, -1), 0)
        gSizer1.Add(self.m_textCtrl13, 0, wx.ALL, 5)

        self.m_textCtrl14 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(60, -1), 0)
        gSizer1.Add(self.m_textCtrl14, 0, wx.ALL, 5)

        self.m_staticText15 = wx.StaticText(self, wx.ID_ANY, u"xpath5和列名", wx.DefaultPosition, wx.DefaultSize, 0)
        self.m_staticText15.Wrap(-1)
        gSizer1.Add(self.m_staticText15, 0, wx.ALL, 5)

        self.m_textCtrl21 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(150, -1), 0)
        gSizer1.Add(self.m_textCtrl21, 0, wx.ALL, 5)

        self.m_textCtrl22 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(60, -1), 0)
        gSizer1.Add(self.m_textCtrl22, 0, wx.ALL, 5)

        bSizer1.Add(gSizer1, 1, wx.EXPAND, 20)

        gSizer3 = wx.GridSizer(5, 3, 0, 0)

        self.m_staticText2 = wx.StaticText(self, wx.ID_ANY, u"网站url(2)", wx.DefaultPosition, wx.DefaultSize, 0)
        self.m_staticText2.Wrap(-1)
        gSizer3.Add(self.m_staticText2, 0, wx.ALL, 5)

        self.m_textCtrl3 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(200, 30), 0)
        gSizer3.Add(self.m_textCtrl3, 0, wx.ALL, 5)

        self.m_button8 = wx.Button(self, wx.ID_ANY, u"提交", wx.DefaultPosition, wx.DefaultSize, 0)
        gSizer3.Add(self.m_button8, 0, wx.ALL, 5)

        self.m_staticText11 = wx.StaticText(self, wx.ID_ANY, u"xpath1AndColumnName", wx.DefaultPosition, wx.DefaultSize,
                                            0)
        self.m_staticText11.Wrap(-1)
        gSizer3.Add(self.m_staticText11, 0, wx.ALL, 5)

        self.m_textCtrl15 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(150, -1), 0)
        gSizer3.Add(self.m_textCtrl15, 0, wx.ALL, 5)

        self.m_textCtrl16 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(60, -1), 0)
        gSizer3.Add(self.m_textCtrl16, 0, wx.ALL, 5)

        self.m_staticText12 = wx.StaticText(self, wx.ID_ANY, u"xpath2AndColumnName", wx.DefaultPosition, wx.DefaultSize,
                                            0)
        self.m_staticText12.Wrap(-1)
        gSizer3.Add(self.m_staticText12, 0, wx.ALL, 5)

        self.m_textCtrl17 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(150, -1), 0)
        gSizer3.Add(self.m_textCtrl17, 0, wx.ALL, 5)

        self.m_textCtrl18 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(60, -1), 0)
        gSizer3.Add(self.m_textCtrl18, 0, wx.ALL, 5)

        self.m_staticText13 = wx.StaticText(self, wx.ID_ANY, u"xpath3AndColumnName", wx.DefaultPosition, wx.DefaultSize,
                                            0)
        self.m_staticText13.Wrap(-1)
        gSizer3.Add(self.m_staticText13, 0, wx.ALL, 5)

        self.m_textCtrl19 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(150, -1), 0)
        gSizer3.Add(self.m_textCtrl19, 0, wx.ALL, 5)

        self.m_textCtrl20 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(60, -1), 0)
        gSizer3.Add(self.m_textCtrl20, 0, wx.ALL, 5)

        self.m_staticText17 = wx.StaticText(self, wx.ID_ANY, u"xpath4AndColumnName", wx.DefaultPosition, wx.DefaultSize,
                                            0)
        self.m_staticText17.Wrap(-1)
        gSizer3.Add(self.m_staticText17, 0, wx.ALL, 5)

        self.m_textCtrl23 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(150, -1), 0)
        gSizer3.Add(self.m_textCtrl23, 0, wx.ALL, 5)

        self.m_textCtrl24 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(60, -1), 0)
        gSizer3.Add(self.m_textCtrl24, 0, wx.ALL, 5)

        bSizer1.Add(gSizer3, 1, wx.EXPAND, 5)

        gSizer5 = wx.GridSizer(0, 3, 0, 0)

        self.m_button5 = wx.Button(self, wx.ID_ANY, u"查看数据库", wx.DefaultPosition, wx.DefaultSize, 0)
        gSizer5.Add(self.m_button5, 0, wx.ALL, 5)

        self.m_button4 = wx.Button(self, wx.ID_ANY, u"保存到本地文件", wx.DefaultPosition, wx.DefaultSize, 0)
        gSizer5.Add(self.m_button4, 0, wx.ALL, 5)

        self.m_button51 = wx.Button(self, wx.ID_ANY, u"数据分析", wx.DefaultPosition, wx.DefaultSize, 0)
        gSizer5.Add(self.m_button51, 0, wx.ALL, 5)


        bSizer1.Add(gSizer5, 0, wx.EXPAND, 5)

        self.SetSizer(bSizer1)
        self.Layout()

        self.Centre(wx.BOTH)

        self.Bind(wx.EVT_BUTTON, self.openSql, self.m_button5)
        self.Bind(wx.EVT_BUTTON,self.execte1, self.m_button61)
        self.Bind(wx.EVT_BUTTON, self.execute2, self.m_button8)
        self.Bind(wx.EVT_BUTTON, self.save, self.m_button4)
        self.Bind(wx.EVT_BUTTON, self.any, self.m_button51)

    def any(self,event):
        analyse.run()

    #     存储这一块
    def save(self, event):
        fileSave.run(frame)

    # 对数据库的查询的ui
    def openSql(self, event):
        SqlUi.Main(frame)

    # 是执行爬虫程序
    def execte1(self, event):
        tableName = self.m_textCtrl5.GetValue()
        flag = False
        if tableName =="":
            flag=True
            dlg = wx.MessageDialog(frame, u"请输入url", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
        # print(tableName)
        xapth=[]
        cloumn=[]
        # 第一个
        xp = self.m_textCtrl7.GetValue()
        co = self.m_textCtrl8.GetValue()
        print(xp+"  "+co)

        if (xp =="" and co!="") or (xp!="" and co==""):
    #         错误提示
            dlg = wx.MessageDialog(frame, u"请检查你的xpath1,请一一对应输入信息", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
            flag=True
        elif xp != "" and co != "":
            xapth.append(xp)
            cloumn.append(co)

        xp = self.m_textCtrl9.GetValue()
        co = self.m_textCtrl10.GetValue()
        print(xp+"  "+co)
        if xp == "" and co != "" or xp != "" and co == "":
            dlg = wx.MessageDialog(None, u"请检查你的xpath2,请一一对应输入信息", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
            flag=True
        elif xp != "" and co != "":
            xapth.append(xp)
            cloumn.append(co)

        xp = self.m_textCtrl11.GetValue()
        co = self.m_textCtrl12.GetValue()
        if xp == "" and co != "" or xp != "" and co == "":
                #         错误提示
            dlg = wx.MessageDialog(None, u"请检查你的xpath3,请一一对应输入信息", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
            flag=True
        elif xp != "" and co != "":
            xapth.append(xp)
            cloumn.append(co)

        xp = self.m_textCtrl13.GetValue()
        co = self.m_textCtrl14.GetValue()
        if xp == "" and co != "" or xp != "" and co == "":
            dlg = wx.MessageDialog(None, u"请检查你的xpath4,请一一对应输入信息", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
            flag=True
        elif xp != "" and co != "":
            xapth.append(xp)
            cloumn.append(co)

        xp = self.m_textCtrl21.GetValue()
        co = self.m_textCtrl22.GetValue()
        if xp == "" and co != "" or xp != "" and co == "":
            dlg = wx.MessageDialog(None, u"请检查你的xpath5,请一一对应输入信息", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
            flag = True
        elif xp != "" and co != "":
            xapth.append(xp)
            cloumn.append(co)
        print(xapth)
        print(cloumn)
    #     数据已经存放在列表当中了
        if not flag and len(cloumn) > 0:
            print("合法的")
            start = ThreadScr.myThread(tableName, cloumn, xapth)
            start.start()
        else:
            dlg = wx.MessageDialog(None, u"请再次检查你的输入,有错误!", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()



    # 第二个执行爬虫爬取数据
    def execute2(self,event):
        tableName = self.m_textCtrl3.GetValue()
        flag = False
        if tableName == "":
            flag = True
            dlg = wx.MessageDialog(frame, u"请输入url", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
        xapth = []
        cloumn = []
        # 第一个
        xp = self.m_textCtrl15.GetValue()
        co = self.m_textCtrl16.GetValue()
        if (xp == "" and co != "") or (xp != "" and co == ""):
            dlg = wx.MessageDialog(frame, u"请检查你的xpath1,请一一对应输入信息", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
            flag = True
        elif xp != "" and co != "":
            xapth.append(xp)
            cloumn.append(co)

        xp = self.m_textCtrl17.GetValue()
        co = self.m_textCtrl18.GetValue()
        if xp == "" and co != "" or xp != "" and co == "":
            dlg = wx.MessageDialog(None, u"请检查你的xpath2,请一一对应输入信息", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
            flag = True
        elif xp != "" and co != "":
            xapth.append(xp)
            cloumn.append(co)

        xp = self.m_textCtrl19.GetValue()
        co = self.m_textCtrl20.GetValue()
        if xp == "" and co != "" or xp != "" and co == "":
            #         错误提示
            dlg = wx.MessageDialog(None, u"请检查你的xpath3,请一一对应输入信息", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
            flag = True
        elif xp != "" and co != "":
            xapth.append(xp)
            cloumn.append(co)

        xp = self.m_textCtrl23.GetValue()
        co = self.m_textCtrl24.GetValue()
        if xp == "" and co != "" or xp != "" and co == "":
            dlg = wx.MessageDialog(None, u"请检查你的xpath4,请一一对应输入信息", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
            flag = True
        elif xp != "" and co != "":
            xapth.append(xp)
            cloumn.append(co)
        print(xapth)
        print(cloumn)
        if not flag and len(cloumn) > 0:
            print("合法的")
            start = ThreadScr.myThread(tableName, cloumn, xapth)
            start.start()
        else:
            dlg = wx.MessageDialog(None, u"", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()

    def __del__(self):
        pass


if __name__ == '__main__':
    try:
        app = wx.App(False)
        frame = MyFrame2(None)
        frame.Show()
        app.MainLoop()
    finally:
        cur.close()
        connection.close()
        script.cur.close()
        script.connection.close()

数据库部分:SqlUi.py:使用的时候更改数据库的连接参数

import wx
import wx.xrc
import wx.grid
import pymysql
wx._biao = 1000

class MyFrame1(wx.Frame):

    def __init__(self, parent):
        wx.Frame.__init__(self, parent, id=wx.ID_ANY, title=wx.EmptyString, pos=wx.DefaultPosition,
                          size=wx.Size(700, 400), style=wx.DEFAULT_FRAME_STYLE | wx.TAB_TRAVERSAL)
        # 数据库的初始化连接
        self.connection = pymysql.connect(user='whp', password='wms111', database='python', charset='utf8',
                                         port=3306, host='localhost')
        self.cur = self.connection.cursor()
        self.weidth = 5
        self.high = 5
        self.cloumnName = ['A', 'B', 'C', 'D', 'E']

        self.SetSizeHintsSz(wx.DefaultSize, wx.DefaultSize)

        self.sbSizer1 = wx.StaticBoxSizer(wx.StaticBox(self, wx.ID_ANY, wx.EmptyString), wx.VERTICAL)

        self.gSizer1 = wx.GridSizer(0, 2, 0, 0)

        self.m_staticText1 = wx.StaticText(self.sbSizer1.GetStaticBox(), wx.ID_ANY, u"    选择你要展示的的表:", wx.DefaultPosition,
                                           wx.DefaultSize, 0)
        self.m_staticText1.Wrap(-1)
        self.gSizer1.Add(self.m_staticText1, 0, wx.ALL, 5)

        # 显示这个库底的所有的表
        self.cur.execute("show tables")
        all = self.cur.fetchall()
        choiceChoices = []
        for i in all:
            print(i[0])
            choiceChoices.append(i[0])

        self.choice = wx.Choice(self.sbSizer1.GetStaticBox(), wx.ID_ANY, wx.DefaultPosition, wx.DefaultSize, choiceChoices,
                                0)
        self.choice.SetSelection(len(choiceChoices)-1)
        self.gSizer1.Add(self.choice, 0, wx.ALL, 5)

        self.sbSizer1.Add(self.gSizer1, 1, wx.EXPAND, 5)

        self.m_grid1 = wx.grid.Grid(self.sbSizer1.GetStaticBox(), wx._biao, wx.DefaultPosition, wx.Size(700, 250), 0)

        self.m_textCtrl3 = wx.TextCtrl(self.sbSizer1.GetStaticBox(), wx.ID_ANY, "", wx.DefaultPosition,
                                       wx.Size(300, 30), 0)
        self.gSizer1.Add(self.m_textCtrl3, 0, wx.ALL, 5)

        self.m_button3 = wx.Button(self.sbSizer1.GetStaticBox(), wx.ID_ANY, u"查询", wx.Point(600, -1), wx.DefaultSize, 0)
        self.gSizer1.Add(self.m_button3, 0, wx.ALL, 5)



        # Grid
        self.m_grid1.CreateGrid(self.high, self.weidth)
        self.m_grid1.EnableEditing(False)
        self.m_grid1.EnableGridLines(True)
        self.m_grid1.SetGridLineColour(wx.SystemSettings.GetColour(wx.SYS_COLOUR_WINDOWFRAME))
        self.m_grid1.EnableDragGridSize(False)
        self.m_grid1.SetMargins(0, 0)

        # Columns
        self.m_grid1.EnableDragColMove(True)
        self.m_grid1.EnableDragColSize(True)
        self.m_grid1.SetColLabelSize(30)
        for i in range(len(self.cloumnName)):
            self.m_grid1.SetColLabelValue(i, self.cloumnName[i])
        self.m_grid1.SetColLabelAlignment(wx.ALIGN_CENTRE, wx.ALIGN_CENTRE)

        # Rows
        self.m_grid1.AutoSizeRows()
        self.m_grid1.EnableDragRowSize(True)
        self.m_grid1.SetRowLabelSize(80)
        self.m_grid1.SetRowLabelAlignment(wx.ALIGN_CENTRE, wx.ALIGN_CENTRE)

        # Label Appearance

        # Cell Defaults
        self.m_grid1.SetDefaultCellTextColour(wx.SystemSettings.GetColour(wx.SYS_COLOUR_INACTIVECAPTIONTEXT))
        self.m_grid1.SetDefaultCellAlignment(wx.ALIGN_LEFT, wx.ALIGN_TOP)
        self.sbSizer1.Add(self.m_grid1, 0, wx.ALL, 5)

        self.SetSizer(self.sbSizer1)
        self.Layout()

        self.Centre(wx.BOTH)


        self.Bind(wx.EVT_CHOICE, self.ChoiceS, self.choice)
        self.Bind(wx.EVT_BUTTON, self.getNews,self.m_button3)

    # choic,列表对应的数据
    def ChoiceS(self, event):
        selected = self.choice.GetStringSelection()
        high, cloumnName =self.weightAndLength(selected)
        # 对行的处理
        if high > self.high:
            for i in range(high-self.high):
                self.m_grid1.AppendRows()
        else:
            for i in range(self.high-high):
                self.m_grid1.DeleteRows()

        self.high = high
        # 对列的处理
        if len(cloumnName) > self.weidth:
            for i in range(len(cloumnName)-self.weidth):
                self.m_grid1.AppendCols()
        else:
            for i in range(self.weidth-len(cloumnName)):
                self.m_grid1.DeleteCols()

        self.weidth=len(cloumnName)

        for i in range(len(cloumnName)):
            self.m_grid1.SetColLabelValue(i, cloumnName[i])
        #         将数据写进去:
        #         setsellvalue 写值
        value = self.getDatabseValue(selected)
        print(value[:2])
        # self.m_grid1.SetCellValue(0,0,"我的")
        for i in range(len(value)):
            for j in range(len(value[i])):
                self.m_grid1.SetCellValue(int(i), int(j), str(value[i][j]))





    def weightAndLength(self, table):
        self.cur.execute("select count(*) from %s" % table)
        high = self.cur.fetchone()
        self.cur.execute("desc %s" % table)
        tup = self.cur.fetchall()
        name = []
        for i in tup:
            name.append(i[0])
        print(str(high[0])+"  "+str(name))
        return high[0], name

    def getDatabseValue(self,table):
        self.cur.execute("select * from %s"% table)
        allValue = self.cur.fetchall()
        return allValue


    def getNews(self, event):
        sql = self.m_textCtrl3.GetValue()
        self.cur.execute(sql)
        allnews = self.cur.fetchall()
        high = len(allnews)
        width = len(allnews[0])
        if self.high > high:
            for i in range(self.high-high):
                self.m_grid1.DeleteRows()
        else:
            for i in range(high-self.high):
                self.m_grid1.AppendRows()

        if self.weidth > width:
            for i in range(self.weidth-width):
                self.m_grid1.DeleteCols()
        else:
            for i in range(width-self.weidth):
                self.m_grid1.AppendCols()

        for i in range(width):
            self.m_grid1.SetColLabelValue(i+1, '第%s项'% str(i))

        self.high = high
        self.weidth = width

        for i in range(len(allnews)):
            for j in range(len(allnews[0])):
                self.m_grid1.SetCellValue(i, j, str(allnews[i][j]))

    def __del__(self):
        pass


# if __name__ == '__main__':
def Main(parent):
    try:
        app = wx.App(False)
        frame = MyFrame1(parent)
        frame.Show()
        app.MainLoop()
    finally:
        frame.cur.close()
        frame.connection.close()

文件系统:FileSave.py:

        通过sql语句将结果保存在选择的txt文件当中:

import os
import wx
import wx.xrc
from bbigwork import ThreadUi


class MyFrame1(wx.Frame):

    def __init__(self, parent):
        wx.Frame.__init__(self, parent, id=wx.ID_ANY, title=wx.EmptyString, pos=wx.DefaultPosition,
                          size=wx.Size(600, 370), style=wx.DEFAULT_FRAME_STYLE | wx.TAB_TRAVERSAL)

        self.SetSizeHintsSz(wx.DefaultSize, wx.DefaultSize)

        bSizer2 = wx.BoxSizer(wx.VERTICAL)

        gSizer4 = wx.GridSizer(0, 2, 0, 0)

        self.m_button7 = wx.Button(self, wx.ID_ANY, u"File", wx.DefaultPosition, wx.DefaultSize, 0)
        gSizer4.Add(self.m_button7, 0, wx.ALL, 5)

        self.m_textCtrl21 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(280, 25), 0)
        self.m_textCtrl21.Enable(False)

        gSizer4.Add(self.m_textCtrl21, 0, wx.ALL, 5)

        bSizer2.Add(gSizer4, 0, wx.EXPAND, 5)

        gSizer6 = wx.GridSizer(0, 2, 0, 0)

        self.m_staticText13 = wx.StaticText(self, wx.ID_ANY, u"输入你的查询sql语句", wx.DefaultPosition, wx.DefaultSize, 0)
        self.m_staticText13.Wrap(-1)
        gSizer6.Add(self.m_staticText13, 0, wx.ALL, 5)

        self.m_textCtrl26 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(280, 25), 0)
        gSizer6.Add(self.m_textCtrl26, 0, wx.ALL, 5)

        self.m_button15 = wx.Button(self, wx.ID_ANY, u"commit", wx.DefaultPosition, wx.DefaultSize, 0)
        gSizer6.Add(self.m_button15, 0, wx.ALL, 5)

        bSizer2.Add(gSizer6, 0, wx.EXPAND, 5)

        self.m_textCtrl25 = wx.TextCtrl(self, wx.ID_ANY, wx.EmptyString, wx.DefaultPosition, wx.Size(580, 250), style=wx.TE_MULTILINE|wx.TE_RICH2)
        bSizer2.Add(self.m_textCtrl25, 0, wx.ALL, 5)

        self.SetSizer(bSizer2)
        self.Layout()

        self.Centre(wx.BOTH)
        self.Bind(wx.EVT_BUTTON, self.openFilePath, self.m_button7)
        self.Bind(wx.EVT_BUTTON,self.commit, self.m_button15)


    def commit(self,event):
        sql = self.m_textCtrl26.GetValue()
        if sql == "" or self.path == "" or self.path == None:
            dlg = wx.MessageDialog(None, u"请输入sql语句和选择地址", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
        news = self.getMessage(sql)
        print(news)
        # 写数据
        s = ""
        with open(self.path, 'w', encoding='utf-8') as op:
            for i in news:
                for j in i:
                    s = s+str(j)+'  '
                s += '\n'
            op.write(s)

        self.m_textCtrl25.SetValue(s)

    def openFilePath(self, event):
        dialog = wx.FileDialog(self, "创建文件:", os.getcwd(), style=wx.FD_OPEN, wildcard='*.txt')
        if dialog.ShowModal() == wx.ID_OK:
            self.path = dialog.GetPath()  # 获取路径
            print(self.path)
            self.m_textCtrl21.SetValue(self.path)


    # 的到所有的想要的数据
    def getMessage(self, sql):
        cur = ThreadUi.cur
        try:
            cur.execute(sql)
            allnews = cur.fetchall()
        except RuntimeError:
            dlg = wx.MessageDialog(None, u"请检查你的sql是否正确", u"提示", wx.YES_NO | wx.ICON_QUESTION)
            if dlg.ShowModal() == wx.ID_YES:
                pass
            dlg.Destroy()
        return allnews


    def __del__(self):
        pass


def run(parent):
    app = wx.App(False)
    frame = MyFrame1(parent)
    frame.Show()
    app.MainLoop()


# if __name__ == '__main__':
#     app = wx.App(False)
#     frame = MyFrame1(None)
#     frame.Show()
#     app.MainLoop()

数据分析模块:analyse.py

因为在之前自己的实现当中爬取的是淘宝和京东,所以分析了一下各个数据商品类型的平均值:

通过简单的图表便是出来:

import numpy as np
from matplotlib import pyplot as plt
from bbigwork import ThreadUi


def run():
    cur = ThreadUi.cur
    conn = ThreadUi.connection
    zidian = {}

    tableName = ['diannao', 'diandyashua', 'qiuxie', 'shouji', 'shubao']

    for i in tableName:
        cur.execute('select price from %s' % i)
        news = cur.fetchall()
        li = []
        for new in news:
            li.append(float(new[0]))
        zidian[i] = li

    name = []
    aver = []
    for i in zidian:
        name.append(i)
        num = 0
        for j in zidian.get(i):
            num += j
        aver.append(int(num / len(zidian.get(i))))

    print(name)
    print(aver)

    plt.bar(name, aver)
    plt.show()

这篇关于python期末大作业-图形化爬虫的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!