Python爬虫,B站视频下载源码脚本工具助手学习参考

B站,bilibili,以二次元闻名的站点,同时也是学习的天堂,推荐大家学习,尤其是你想要视频学习的话,里面有很多大佬发布了学习视频资源,如果有意,也可以下载下来慢慢看,慢慢学,对于本渣渣来说,收藏从未停止,学习从未开始!

pic_001.png

这里推荐一个python基础学习视频教程,来自于莫烦python

https://www.bilibili.com/video/av16926522?p=1

pic_002.png

友情提示:本渣渣没看过,因为,我太懒了。。

对于喜欢的视频,本渣渣当然是先down下来啦,尤其是身为白嫖怪,下载==学会!

我已经学废了,那么,你呢?!

前面本渣渣已经分享过 宝藏下载神器,python一键下载B站视频 强烈推荐使用,白嫖到底!!

不过调包侠还是有很多不方便之处,看不懂大佬的代码,不会修改调用!!

乘着有空隙,找了相关资料参考,找到接口重新写了一下b站视频下载爬虫,仅供参考和学习使用哈!

接口一:获取cid值 https://api.bilibili.com/x/player/pagelist?aid=69542806&jsonp=jsonp

   def get_cid(self,avid):
        headers = {"User-Agent": self.ua.random}
        url=f'https://api.bilibili.com/x/player/pagelist?aid={avid}&jsonp=jsonp'
        cid_json = requests.get(url=url,headers=headers).json()
        print(cid_json)
        cid = cid_json['data'][0]['cid']
        print(cid)
        return cid

 接口二:获取真实地址 https://api.bilibili.com/x/player/playurl?avid=69542806&cid=120570181&qn=32&type=&otype=json

    def get_vedio_url(self,avid,cid):
        url=f'https://api.bilibili.com/x/player/playurl?avid={avid}&cid={cid}&qn=32&type=&otype=json'
        print(url)
        headers = {"User-Agent": self.ua.random}
        vedio_url_json=requests.get(url=url,headers=headers).json()
        print(vedio_url_json)
        vedio_url=vedio_url_json['data']['durl'][0]['url']
        print(vedio_url)
        vedio_size=vedio_url_json['data']['durl'][0]['size']
        vedio_size=vedio_size/1024/1024
        vedio_size ="%.2fM" % vedio_size
        print(vedio_size)
        video_con=vedio_url,vedio_size
        return video_con

B站视频下载存在反爬,请注意协议头,一定要携带referer

    #下载视频
    def get_vedio(self,vedio_url,title):
        headers = {
            "Referer": "https://www.bilibili.com",
            "User-Agent": self.ua.random
        }
        print("开始下载视频..")
        r=requests.get(url=vedio_url,headers=headers)
        with open(f'{title}.flv',"wb") as f:
            f.write(r.content)
        print("下载视频完成!")

附完整源码参考:

# -*- coding: utf-8 -*-
#author:微信:huguo00289
import requests
from fake_useragent import UserAgent
import re


class Bz(object):
    def __init__(self,url):
        self.ua=UserAgent()
        self.url=url


    def get_html(self):
        headers={"User-Agent":self.ua.random}
        html=requests.get(url=self.url,headers=headers).content.decode("utf-8")
        title=re.findall('<title data-vue-meta="true">(.+?)_哔哩哔哩',html)[0]
        title=self.filter(title)
        print(title)
        rurl=re.findall('<meta data-vue-meta="true" itemprop="url" content="(.+?)">',html)[0]
        print(rurl)
        avid = re.findall("video/av(.+?)/", rurl)[0]
        print(avid)
        vedio_parm=title,rurl,avid
        return vedio_parm

    # 替换不合法字符
    def filter(self, old_str):
        pattern = r'[\|\/\<\>\:\*\?\\\"]'
        new_str = re.sub(pattern, "_", old_str)  # 剔除不合法字符
        return new_str

    # 获取cid
    def get_cid(self,avid):
        headers = {"User-Agent": self.ua.random}
        url=f'https://api.bilibili.com/x/player/pagelist?aid={avid}&jsonp=jsonp'
        cid_json = requests.get(url=url,headers=headers).json()
        print(cid_json)
        cid = cid_json['data'][0]['cid']
        print(cid)
        return cid

    #获得视频真实flv地址
    def get_vedio_url(self,avid,cid):
        url=f'https://api.bilibili.com/x/player/playurl?avid={avid}&cid={cid}&qn=32&type=&otype=json'
        print(url)
        headers = {"User-Agent": self.ua.random}
        vedio_url_json=requests.get(url=url,headers=headers).json()
        print(vedio_url_json)
        vedio_url=vedio_url_json['data']['durl'][0]['url']
        print(vedio_url)
        vedio_size=vedio_url_json['data']['durl'][0]['size']
        vedio_size=vedio_size/1024/1024
        vedio_size ="%.2fM" % vedio_size
        print(vedio_size)
        video_con=vedio_url,vedio_size
        return video_con

    #下载视频
    def get_vedio(self,vedio_url,title):
        headers = {
            "Referer": "https://www.bilibili.com",
            "User-Agent": self.ua.random
        }
        print("开始下载视频..")
        r=requests.get(url=vedio_url,headers=headers)
        with open(f'{title}.flv',"wb") as f:
            f.write(r.content)
        print("下载视频完成!")


if __name__=="__main__":
    url="https://www.bilibili.com/video/BV1xh411k7by"
    spider=Bz(url)
    vedio_parm=spider.get_html()
    cid=spider.get_cid(vedio_parm[2])
    video_con=spider.get_vedio_url(vedio_parm[2],cid)
    spider.get_vedio(video_con[0],vedio_parm[0])

附上参考资料:https://blog.csdn.net/qq_41696843/article/details/102488100

https://www.52pojie.cn/forum.php?mod=viewthread&tid=1203086&highlight=b%D5%BE


再次提醒:功能仅供学习交流使用哈!