Python教程

Python网络爬虫-青年大学习

本文主要是介绍Python网络爬虫-青年大学习,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!

前提

由于每周都要查看是否所有人都完成了本周的大学习,一个一个查是比较麻烦的,收集截图也是另一种方法,因此诞生了下面的爬虫程序

整体思路

1、请求网页,获取Cookie(会在整个过程中使用),获取_jfinal_token

 url = 'http://mp.vol.jxmfkj.com/pub/login?returnUrl=/'
    response = requests.get(url)

    dict = response.cookies.get_dict()
    start = response.text.find("value='")
    end = response.text.find("'", start + 7)
    str = response.text[start + 7:end]

    _jfinal_token = str
    session = dict["JSESSIONID"]

2、模拟登录请求初步判断为使得Cookie生效

2.1、加密后的密码如何查看

1)首先打开登陆页面

2)打开F12中的网络

3)登录

4)在F12中找到如下包

 

 然后点击在右边的载荷中就可以看到

2.2 代码:

def step1(_jfinal_token,session):
    session = "JSESSIONID="+session

    headers = {
        'Content-Type':'application/x-www-form-urlencoded',
        'Origin':'http://mp.vol.jxmfkj.com',
        'Referer':'http://mp.vol.jxmfkj.com/pub/login',
        'Upgrade-Insecure-Requests':'1',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36',
        'Cookie':session
    }
    body_value = {
        "_jfinal_token":_jfinal_token,
        "userCode":"后台账户",
        "password":"加密后的密码",
        "verifyCode":""
    }
    # body_value = urllib.urlencode(body_value)
    requests.post(url="http://mp.vol.jxmfkj.com/pub/login/submit?returnUrl=/",data=body_value,headers=headers)
    response=requests.get(url="http://mp.vol.jxmfkj.com/",headers=headers)

3、模拟请求爬虫下来完成的名单

def step2(session):
    session = "JSESSIONID="+session
    print(session)
    headers = {
        "Accept":"*/*",
        "Accept-Encoding":"gzip,deflate,br",
        "Accept-Language":"zh-CN,zh;q=0.9",
        "Connection":"keep-alive",
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36",
        "Cookie":session,
        "Host":"mp.vol.jxmfkj.com",
        "Referer":"http://mp.vol.jxmfkj.com/portal/vol/jxgqtClassRecord/index"
    }
    response=requests.get(url="http://mp.vol.jxmfkj.com/portal/vol/jxgqtClassRecord/list?iclassId=9&inid=N0013000510051004&pageNumber=1&pageSize=50",headers=headers)
    list =  response.json()
    return list['list']

4、Mysql读入班级或者组织名单(学号,姓名之类的),在此之前需要处理出已完成的组织人员名单

def step3(list):
    ans = []
    for rows in list:
        ans.append(rows['username'])
    return ans
def step4():
    conn = pymysql.connect(
        host='localhost',
        user='root',
        password='root',
        db='tt',
        charset='utf8'
    )
    cur = conn.cursor()
    try:
        create_sql = "select * from info "
        cur.execute(create_sql)
        data=cur.fetchall()
    except Exception as e:
        print("exception",e)
    else:
        conn.close()
        print('finish')
    return data

5、验证查看

 for rows in data:
        flag = False
        if rows[0] in finish:
            flag = True
        if rows[1] in finish:
            flag = True
        if(flag == False):
            print(rows[0])

!!!!!!!!!!!注意!!!!!!!!!!!!!!!!!

只为简便工作使用,切记不要恶意给服务器增加压力,切记不要恶意使用

完整代码

import requests
import pymysql

def step1(_jfinal_token,session):
    session = "JSESSIONID="+session

    headers = {
        'Content-Type':'application/x-www-form-urlencoded',
        'Origin':'http://mp.vol.jxmfkj.com',
        'Referer':'http://mp.vol.jxmfkj.com/pub/login',
        'Upgrade-Insecure-Requests':'1',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36',
        'Cookie':session
    }
    body_value = {
        "_jfinal_token":_jfinal_token,
        "userCode":"后台账户",
        "password":"加密后的密码",
        "verifyCode":""
    }
    # body_value = urllib.urlencode(body_value)
    requests.post(url="http://mp.vol.jxmfkj.com/pub/login/submit?returnUrl=/",data=body_value,headers=headers)
    response=requests.get(url="http://mp.vol.jxmfkj.com/",headers=headers)
    # print(response.text)

def step2(session):
    session = "JSESSIONID="+session
    print(session)
    headers = {
        "Accept":"*/*",
        "Accept-Encoding":"gzip,deflate,br",
        "Accept-Language":"zh-CN,zh;q=0.9",
        "Connection":"keep-alive",
        "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.84 Safari/537.36",
        "Cookie":session,
        "Host":"mp.vol.jxmfkj.com",
        "Referer":"http://mp.vol.jxmfkj.com/portal/vol/jxgqtClassRecord/index"
    }
    response=requests.get(url="http://mp.vol.jxmfkj.com/portal/vol/jxgqtClassRecord/list?iclassId=9&inid=N0013000510051004&pageNumber=1&pageSize=50",headers=headers)
    list =  response.json()
    return list['list']
def step3(list):
    ans = []
    for rows in list:
        ans.append(rows['username'])
    return ans
def step4():
    conn = pymysql.connect(
        host='localhost',
        user='root',
        password='root',
        db='tt',
        charset='utf8'
    )
    cur = conn.cursor()
    try:
        create_sql = "select * from info "
        cur.execute(create_sql)
        data=cur.fetchall()
    except Exception as e:
        print("exception",e)
    else:
        conn.close()
        print('finish')
    return data



if __name__ == '__main__':
    url = 'http://mp.vol.jxmfkj.com/pub/login?returnUrl=/'
    response = requests.get(url)

    dict = response.cookies.get_dict()
    start = response.text.find("value='")
    end = response.text.find("'", start + 7)
    str = response.text[start + 7:end]

    _jfinal_token = str
    session = dict["JSESSIONID"]

    step1(_jfinal_token,session)
    list = step2(session)
    finish = step3(list)
    data=step4()
    for rows in data:
        flag = False
        if rows[0] in finish:
            flag = True
        if rows[1] in finish:
            flag = True
        if(flag == False):
            print(rows[0])

 

这篇关于Python网络爬虫-青年大学习的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!