Java教程

tiktok

本文主要是介绍tiktok,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
'''
常见问题
1.网速问题,有时候加载不出页面,需要盯着,下滑有时候也没数据
2.滑动验证码

'''
import datetime
import re
import time


def time_turn(timenum):
    if 0 < len((timenum)) < 11 and timenum.isdigit():
        timenum = int(timenum)
        timeArray = time.localtime(timenum)
        otherStyleTime = time.strftime("%Y-%m-%d %H:%M:%S", timeArray)
        return otherStyleTime
    else:
        print('请输入11位以内的数字')


def today_start():
    today = datetime.date.today()
    today_time = int(time.mktime(today.timetuple()))
    return today_time


def time_turns(time1):
    time1 = str(time1).replace('发布', '').replace('發布', '')
    if time1[0] == '昨' and len(time1) > 2:
        time1 = time1.split('發佈')[0]
        time1 = (time1.split('天')[-1])
        time1 = (today_start() - 24 * 3600) + int(time1.split(':')[0]) * 3600 + int(time1.split(':')[1]) * 60
        # print(time1)
        return time1
    if time1 == '昨天':
        time1 = (int(time.time()) - 24 * 3600)
        return time1
    if time1 == '今天更新':
        time1 = (int(time.time()))
        return time1
    if time1 == '刚刚':
        time1 = int(time.time())
        return time1
    if '天前' in time1:
        time1 = int(time.time()) - (int(time1.split('天')[0]) * 3600 * 24)
        return time1
    try:
        try:
            # 1小时转年月日
            TTime = time.time()
            try:
                xs = int(time1.split('小时')[0])
            except:
                xs = int(time1.split('小時')[0])
            sjc = xs * 60 * 60
            time1 = int(TTime - sjc)
            # print(time1)
            return time1
        except:
            try:
                TTime = time.time()
                try:
                    xs = int(time1.split('分钟')[0])
                except:
                    xs = int(time1.split('分鐘')[0])
                sjc = xs * 60
                time1 = int(TTime - sjc)
                # print(time1)
                return time1
            except:
                TTime = time.time()

                xs = int(time1.split('天')[0])

                sjc = xs * 60 * 60 * 24
                time1 = int(TTime - sjc)
                # print(time1)
                return time1
    except:

        if time1[1] == '月':
            if len(re.findall('(.*?)月', time1)) == 1:

                time1 = time1.replace('月', '-').replace('日', ' ')
                if ':' in time1:
                    try:
                        time1 = '2022-' + time1 + ':00'
                        time1 = time1.replace(' :', ':')
                    except:
                        time1 = '2022-0' + time1 + ':00'
                        time1 = time1.replace(' :', ':')
                else:
                    try:
                        time1 = '2022-' + time1 + '00:00:00'
                        time1 = time1.replace(' :', ':')
                    except:
                        time1 = '2022-0' + time1 + '00:00:00'
                        time1 = time1.replace(' :', ':')
            else:
                time1 = time1.replace('月', '-').replace('日', ' ')
                time1 = '2022-' + time1 + '00:00:00'
                time1 = time1.replace(' :', ':')
            dt = datetime.datetime.strptime(time1, '%Y-%m-%d %H:%M:%S')
            # result从数据库中读出来的标准格式时间数据
            # # 10位,时间点相当于从1.1开始的当年时间编号
            time1 = int(str(int(time.mktime(dt.timetuple()))))
            # print(time1)
            return time1


        elif '2022年' in time1:
            time1 = time1.replace('年', '-').replace('月', '-').replace('日', ' ')
            time1 = time1 + ':00'
            time1 = time1.replace(' :', ':')
            dt = datetime.datetime.strptime(time1, '%Y-%m-%d %H:%M:%S')
            # result从数据库中读出来的标准格式时间数据
            # # 10位,时间点相当于从1.1开始的当年时间编号
            time1 = int(str(int(time.mktime(dt.timetuple()))))
            return time1
        elif time1.split('年')[0] != 2022:
            time1 = 0
            print('不是今年的数据,不采集')
            return time1
    time1 = time_turn(time_turns(time1))
    return time1


import random
import pandas as pd
from selenium import webdriver
from lxml import etree
import time

url = 'https://www.tiktok.com/@xiaoqiww'
driver = webdriver.Chrome()
driver.get(url=url)
time.sleep(5)
for page in range(1, 3):
    time.sleep(random.randint(3, 5))
    print(f'********************第{page}页******************')
    driver.execute_script('window.scrollBy(0,2200)')
html = driver.page_source
tree = etree.HTML(html)
second_url = tree.xpath('//div[@class="tiktok-yz6ijl-DivWrapper e1cg0wnj1"]//a//@href')
print(len(second_url))
names = []
publishtimes = []
contents = []
loves = []
comments = []
shares = []
second_urls = []
sums = 0
for second_url in second_url:
    time.sleep(5)
    print(second_url)
    second_urls.append(second_url)
    driver.get(second_url)

    html2 = driver.page_source
    tree2 = etree.HTML(html2)
    # 姓名
    name = tree2.xpath('/html/body/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[1]/div[1]/a[2]/h3//text()')[0]
    names.append(name)
    # 发布时间
    publishtime = tree2.xpath(
        '//div[@data-e2e="recommend-list-item-container"][1]//a[@class="tiktok-1lqhxf7-StyledAuthorAnchor emt6k1z1"]//text()')[
        -1]
    publishtime = time_turn(str(time_turns(publishtime)))
    publishtimes.append(publishtime)
    # 内容
    content = tree2.xpath('/html/body/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[1]/div[2]//text()')[0]
    contents.append(content)
    # 点赞
    love = tree2.xpath('/html/body/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[2]/div[2]/button[1]/strong//text()')[0]
    love = int(love)
    loves.append(love)
    # 评论
    comment = tree2.xpath('/html/body/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[2]/div[2]/button[2]/strong//text()')[0]
    comment = int(comment)
    comments.append(comment)
    # 转发
    share = tree2.xpath('/html/body/div[2]/div[2]/div[2]/div[1]/div[1]/div/div[2]/div[2]/button[3]/strong//text()')[0]
    share = ''.join(share).replace('分享', '0')
    share = int(share)
    shares.append(share)
    # print(publishtime)
data = {
    '详情页链接': second_urls,
    '姓名': names,
    '发布时间': publishtimes,
    '内容': contents,
    '点赞': loves,
    '评论': comments,
    '转发': shares,
}
print(data)
s = pd.DataFrame(data=data)
s.to_excel('tiktok.xlsx')
print('保存成功')
driver.quit()
这篇关于tiktok的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!