python爬取豆瓣250

本文主要是介绍python爬取豆瓣250，对大家解决编程问题具有一定的参考价值，需要的程序猿们随着小编来一起学习吧！

import urllib.request
import ssl
import re
import xlwt
import DBUtils
import xlrd
from xlutils.copy import copy
def getContent(ye):
    headers={
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36" ,
    'Connection': 'keep-alive'
    }
    url = "https://movie.douban.com/top250?start=%s&filter="%ye
    ##请求对象（url+请求头）
    req = urllib.request.Request(url,headers = headers)
    ##获取页面内容
    page = urllib.request.urlopen(req).read()
 
    page = page.decode("utf-8")
    return page
 
# print(getContent(ye))
 
def getItem(content):
    pattern = re.compile(r'alt="(.*?)"')
    res = re.findall(pattern,content)
    res.pop()
    return res;
#
# content = getContent(ye)
# print(getItem(content))
 
def saveExcel():
    wb = xlwt.Workbook()
    sheet = wb.add_sheet("豆瓣250")
    header = ["书名"]
    for (i,v) in enumerate(header):
        sheet.write(0,i,v)
    wb.save("豆瓣.xls")
# content = getContent()
# list = getItem(content)
# saveExcel(list)
def wb(list,x):
    # 打开工作薄
    wb = xlrd.open_workbook("豆瓣.xls")
    # 复制一份工作薄，用来写入
    copyWb = copy(wb)
    # 通过索引获取表
    sheet = copyWb.get_sheet(0)
    for (i, v) in enumerate(list):
        sheet.write(x,0, v)
        x +=1
    # 保存，如果文件名和之前一样，覆盖
    # 文件名不存在：新的文件
    copyWb.save("豆瓣.xls")
def ye():
    ye = 0
    x = 1
    saveExcel()
    while ye<250:
        content = getContent(ye)
        list = getItem(content)
        wb(list,x)
        for i in range(0,len(list)):
            sql = "insert into tb_use(name) values ('%s');"%list[i]
            DBUtils.insertData(sql)
        ye +=25
        x +=25
    return "完成"
print(ye())

import pymysql.cursors
 
#获取连接
def getConnect():
    conn = pymysql.connect(host="", user="root", password="123", database="pymysql", charset="utf8")
    return conn
#关闭连接
def closeConnect(cursor,conn):
    if cursor:
        cursor.close()
    if conn:
        conn.close()
 
#插入数据
def insertData(sql):
    conn = getConnect()
 
    cursor = conn.cursor()
 
    cursor.execute(sql)
    conn.commit()
 
    closeConnect(cursor, conn)
    count = cursor.rowcount
    if count > 0:
        return True
    else:
        return False

这篇关于python爬取豆瓣250的文章就介绍到这儿，希望我们推荐的文章对大家有所帮助，也希望大家多多支持为之网！

Python教程

python爬取豆瓣250

前端开发

后端开发

移动端开发

数据库

服务器运维

人工智能

区块链

游戏开发

网站运营

大数据/云计算

软件工程

软件/开发工具使用

资讯