Python教程

爬取意林杂志&&Python操作excel

本文主要是介绍爬取意林杂志&&Python操作excel,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!

前言

刘太太对一篇意林的文章印象很深,但现在只记得两句话
想着爬完所有意林文章应该能找到
于是就写了个小玩意
BTW
刘太太已经是我npy啦!

import re
import requests
from bs4 import BeautifulSoup
import os

def GetLinkSum():
    url='https://www.yilinzazhi.com/'
    road='td>a'
    strhtml=requests.get(url)
    soup=BeautifulSoup(strhtml.text,'lxml')#lxml
    data = soup.select(road)
    #print("read:data[1]:",data[1])
    result=[]
    for item in data:
        result.append({
            #'title':item.get_text(),
            'link':item.get('href'),
            #'ID':re.findall('\d+',item.get('href'))
        })
    print('read:',len(result))
    #print(result)
    return result
def articaltitle(aim):

    for i in range(0,len(aim)):
        url2='https://www.yilinzazhi.com/'+aim[i]['link']
        date=(aim[i]['link']).split('index.html')
        #print(date)
        strhtml=requests.get(url2)
        soup=BeautifulSoup(strhtml.text,'lxml')
        data=soup.select('span > a')
        #print('next:data[1]',data[0])
        for item in data:
            url3='https://www.yilinzazhi.com/'+date[0]+item.get('href')
            artical(url3)
            print('第{0}期第{1}篇'.format(date[0],item.get('href')))
        print('i:',i)
def artical(url):
    strhtml=requests.get(url)
    Html=strhtml.text.encode('iso-8859-1').decode('utf-8')
    soup=BeautifulSoup(Html,'lxml')
    data = soup.select('div> p')
    #body>div.wrap>div>div.blkContainer>div>h1
    str1='异乡的情侣'
    str2='共同的兰州'
    str3='兰州'
    if str(data).find(str1)!=-1:
        print(str(data))
        print('str1:',url)
    if str(data).find(str2)!=-1:
        print(str(data))
        print('str2:',url)
    if str(data).find(str3)!=-1:
         print(str(data))
         print('str3:',url)
    url=url.split('/')
    dictionary=url[3]
    title = soup.select('body>div.wrap>div>div.blkContainer>div>h1')
    demotitle=str(title).split('<h1>')
    titlename=demotitle[1].split('</h1>')
    path="F:\\PyCode\\Notes\\YiLin\\{0}".format(dictionary)
    isExists=os.path.exists(path)
    if not isExists:
        os.makedirs(path)
    with open("F:\\PyCode\\Notes\\YiLin\\{0}\\{1}.txt".format(dictionary,titlename[0]),"w",encoding="utf-8") as f:
        sentencese=str(data).split('<p>')
        for item in sentencese:
            demo=item.split('</p>')
            for i in demo:
                f.write(i)
                f.write("\n")

if __name__ == '__main__':
    
    status=GetLinkSum()
    aim=articaltitle(status)

import xlwt
def ReadParameter():
    JMTaiJiaNum=int(input("请输入加密段台架个数:"))
    JMSFNum=int(input("请输入加密段每台架水阀数:"))
    CGTaiJiaNum=int(input("请输入常规冷却段台架个数:"))
    CGSFNum=int(input("请输入常规冷却段每台架水阀数:"))
    WTTaiJiaNum=int(input("请输入微调段台架个数:"))
    WTSFNum=int(input("请输入微调段每台架水阀个数:"))

    print("加密段台架个数:{0},水阀个数{1}".format(JMTaiJiaNum,JMSFNum))
    print("常规冷却段台架个数:{0},水阀个数{1}".format(CGTaiJiaNum,CGSFNum))
    print("微调段台架个数:{0},水阀个数{1}".format(WTTaiJiaNum,WTSFNum))
    demo=[[JMTaiJiaNum,JMSFNum],[CGTaiJiaNum,CGSFNum],[WTTaiJiaNum,WTSFNum]]

    return demo
def change8(demo):
    demo=demo+1
    if demo>=8:
        demo=0
    return demo
def change16(demo):
    demo=demo+1
    if demo>16:
        demo=1
    return demo
def First(base):
    xls = xlwt.Workbook()
    sht1 = xls.add_sheet('L1->L2(CTC)',cell_overwrite_ok=True)
    sht2 = xls.add_sheet('L2->L1(CTC)',cell_overwrite_ok=True)
    tittle=['Group Name','Pin Comment','管脚注释','Type','Unit','Offset','L2 Var Name','Wincc Var Name','Note']
    firstdata=['','数据包ID','','INT','','0','','','IOMASTER读配置文件,自动添加']
    sht1.col(0).width=200*20
    sht1.col(1).width=450*20
    sht1.col(6).width=450*20
    sht1.col(7).width=450*20
    sht1.col(8).width=400*20
    # style=XFStyle()
    # pattern=Pattern()
    # fnt=Font()
    startpoint=5
    offset=2
    spr=0
    bit=0
    CTCL2=1
    sht1.write_merge(startpoint-5,startpoint-5,0,8,
                     'DC TO CTC Data.Communication Table(CTC)')
    sht1.write_merge(startpoint-4,startpoint-4,0,8,
                     'CTC IP:10.{0}.10.161 端口:1611 二级服务器主机IP:10.{0}.10.184 端口:4611 数据长度:224 byte ID:1 DB:159'.format(24))
    sht1.write_merge(startpoint-3,startpoint-3,0,8,
                     'CTC IP:10.{0}.10.161 端口:2611 二级服务器主机IP:10.{0}.10.186 端口:4611 数据长度:224 byte ID:1 DB:159'.format(24))
    #标题
    for i in range(0,len(tittle)):
        sht1.write(startpoint-2,i,tittle[i])
        sht1.write(startpoint-1,i,firstdata[i])
    #上喷信号
    for i in range(0,base[0][0]+base[1][0]+base[2][0]):
        sht1.write_merge(startpoint+i*16,startpoint+i*16+15,0,0,'')
        if i<base[0][0]:
            for h in range(0,base[0][1]):
                sht1.write(startpoint+16*i+h,1,'BANK {0}-{1}上喷有效信号'.format(i+1,h+1))
                if bit==0:
                    sht1.write(startpoint+16*i+h,3,'BYTE')
                    sht1.write(startpoint+16*i+h,5,offset)
                    offset=offset+1
                else:
                    sht1.write(startpoint+16*i+h,3,'---')
                sht1.write(startpoint+16*i+h,4,'BIT{0}'.format(bit))
                bit=change8(bit)
                sht1.write(startpoint+16*i+h,6,'pIO->ctcRead.rtsAvl.spr[0][{0}]'.format(spr))
                spr=spr+1
                sht1.write(startpoint+16*i+h,7,'CTCL2_OUT_Bank{0}_Top.spr{1}'.format(i+1,CTCL2))
                CTCL2=change16(CTCL2)
            for demo in range(base[0][1],16):
                sht1.write(startpoint+16*i+demo,1,'预留')
                if bit==0:
                    sht1.write(startpoint+16*i+demo,3,'BYTE')
                    sht1.write(startpoint+16*i+demo,5,offset)
                    offset=offset+1
                else:
                    sht1.write(startpoint+16*i+demo,3,'---')
                sht1.write(startpoint+16*i+demo,4,'BIT{0}'.format(bit))
                bit=change8(bit)
                sht1.write(startpoint+16*i+demo,7,'CTCL2_OUT_Bank{0}_Top.spr{1}'.format(i+1,CTCL2))
                CTCL2=change16(CTCL2)
        elif i<base[0][0]+base[1][0]:
            for h in range(0,base[1][1]):
                sht1.write(startpoint+16*i+h,1,'BANK {0}-{1}上喷有效信号'.format(i+1,h+1))
                if bit==0:
                    sht1.write(startpoint+16*i+h,3,'BYTE')
                    sht1.write(startpoint+16*i+h,5,offset)
                    offset=offset+1
                else:
                    sht1.write(startpoint+16*i+h,3,'---')
                sht1.write(startpoint+16*i+h,4,'BIT{0}'.format(bit))
                bit=change8(bit)
                sht1.write(startpoint+16*i+h,6,'pIO->ctcRead.rtsAvl.spr[0][{0}]'.format(spr))
                spr=spr+1
                sht1.write(startpoint+16*i+h,7,'CTCL2_OUT_Bank{0}_Top.spr{1}'.format(i+1,CTCL2))
                CTCL2=change16(CTCL2)
            for demo in range(base[1][1],16):
                sht1.write(startpoint+16*i+demo,1,'预留')
                if bit==0:
                    sht1.write(startpoint+16*i+demo,3,'BYTE')
                    sht1.write(startpoint+16*i+demo,5,offset)
                    offset=offset+1
                else:
                    sht1.write(startpoint+16*i+demo,3,'---')
                sht1.write(startpoint+16*i+demo,4,'BIT{0}'.format(bit))
                bit=change8(bit)
                sht1.write(startpoint+16*i+demo,7,'CTCL2_OUT_Bank{0}_Top.spr{1}'.format(i+1,CTCL2))
                CTCL2=change16(CTCL2)
        elif i<base[0][0]+base[1][0]+base[2][0]:
            for h in range(0,base[2][1]):
                sht1.write(startpoint+16*i+h,1,'BANK {0}-{1}上喷有效信号'.format(i+1,h+1))
                if bit==0:
                    sht1.write(startpoint+16*i+h,3,'BYTE')
                    sht1.write(startpoint+16*i+h,5,offset)
                    offset=offset+1
                else:
                    sht1.write(startpoint+16*i+h,3,'---')
                sht1.write(startpoint+16*i+h,4,'BIT{0}'.format(bit))
                bit=change8(bit)
                sht1.write(startpoint+16*i+h,6,'pIO->ctcRead.rtsAvl.spr[0][{0}]'.format(spr))
                spr=spr+1
                sht1.write(startpoint+16*i+h,7,'CTCL2_OUT_Bank{0}_Top.spr{1}'.format(i+1,CTCL2))
                CTCL2=change16(CTCL2)
            for demo in range(base[2][1],16):
                sht1.write(startpoint+16*i+demo,1,'预留')
                if bit==0:
                    sht1.write(startpoint+16*i+demo,3,'BYTE')
                    sht1.write(startpoint+16*i+demo,5,offset)
                    offset=offset+1
                else:
                    sht1.write(startpoint+16*i+demo,3,'---')
                sht1.write(startpoint+16*i+demo,4,'BIT{0}'.format(bit))
                bit=change8(bit)
                sht1.write(startpoint+16*i+demo,7,'CTCL2_OUT_Bank{0}_Top.spr{1}'.format(i+1,CTCL2))
                CTCL2=change16(CTCL2)

    #下喷信号
    #name=input("请输入项目名称:")
    name="DaDongHai"
    xls.save('./层冷L1-L2通讯变量表({0}).xls'.format(name))
if __name__ == '__main__':
    # base=ReadParameter()
    # print(base)
    base=[[8,10],[8,4],[2,8]]
    First(base)



这篇关于爬取意林杂志&&Python操作excel的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!