亲测有效
# encoding: utf-8 import requests # 模拟发送请求 import json import re import os # 定义请求头 headers = { 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36' } class BilibiliVideoSpider(object): def __init__(self, url, output_root): self.url = url self.output_root = output_root self.headers = { 'Accept': '*/*', 'Accept-Language': 'en-US,en;q=0.5', 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36' } # 定义请求头 def _match(self, text, pattern): match = re.search(pattern, text) if match is None: print('this pattern was not matched !') return json.loads(match.group(1)) def getHtml(self): try: response = requests.get(url=self.url, headers=self.headers) # 发请求,拿数据 (获取响应对象) print(f'status_code: {response.status_code}') if response.status_code == 200: return response except RequestException: print('html reques error !') def parseHtml(self, response): playinfo = self._match(response.text, '__playinfo__=(.*?)</script><script>') # 视频详情json initial_state = self._match(response.text, r'__INITIAL_STATE__=(.*?);\(function\(\)') # 视频内容json video_url = playinfo['data']['dash']['video'][0]['baseUrl'] # 视频分多种格式,直接取分辨率最高的视频 1080p audio_url = playinfo['data']['dash']['audio'][0]['baseUrl'] # 取音频地址 video_name = initial_state['videoData']['title'] # 取视频名字 # print(f'视频名字为: {video_name}') # print(f'视频地址为:{video_url}') # print(f'音频地址为:{audio_url}') return video_url, audio_url, video_name def downloadVideo(self, video_url, audio_url, video_name): headers.update({"Referer": self.url}) print('开始下载视频: ') video_content = requests.get(video_url, headers=headers) audio_content = requests.get(audio_url, headers=headers) print('%s视频大小:' % video_name, video_content.headers['content-length']) print('%s音频大小:' % video_name, audio_content.headers['content-length']) # 下载视频 received_video = 0 video = f'{self.output_root}video.mp4' with open(video, 'ab') as output: while int(video_content.headers['content-length']) > received_video: headers['Range'] = 'bytes=' + str(received_video) + '-' response = requests.get(video_url, headers=headers) output.write(response.content) received_video += len(response.content) # 下载音频开始 audio_content = requests.get(audio_url, headers=headers) received_audio = 0 audio = f'{self.output_root}audio.mp4' with open(audio, 'ab') as output: while int(audio_content.headers['content-length']) > received_audio: headers['Range'] = 'bytes=' + str(received_audio) + '-' response = requests.get(audio_url, headers=headers) output.write(response.content) received_audio += len(response.content) print('视频下载完成') root_path = os.path.abspath(os.path.dirname(__file__)).split('shippingSchedule')[0] video_dst = root_path+'/download.mp4' self.video_audio_merge(video, audio, video_dst) print(f'下载的视频: {video_dst}') os.remove(video) os.remove(audio) def video_audio_merge(self, video_src, audio_src, video_dst): '''使用ffmpeg单个视频音频合并''' import subprocess command = 'ffmpeg -i %s_video.mp4 -i %s_audio.mp4 -c copy %s.mp4 -y -loglevel quiet' % ( video_src, audio_src, video_dst) subprocess.Popen(command, shell=True) def run(self): response = self.getHtml() video_url, audio_url, video_name = self.parseHtml(response) self.downloadVideo(video_url, audio_url, video_name) def demo(): url = 'https://www.bilibili.com/video/BV1Q5411p7bz?from=search&seid=14643382716113842219' output_root = './' b = BilibiliVideoSpider(url, output_root) b.run() if __name__ == '__main__': demo()