aqd网站视频下载

# -*- coding: utf-8 -*-

from selenium import webdriver
from selenium.webdriver.common.by import By
import requests
import ffmpy3
import os
import time
import sqlite3
import threading
import psutil
from pypushdeer import PushDeer
 
pushdeer = PushDeer(pushkey="PDU28629Tv8oUtCPPMCBTn2RnVNsabQFAvlabyV7t")

def download_video(video_url,video_local_path):
    ffmpy3.FFmpeg(executable='D:\\ffmpeg-master-latest-win64-gpl\\bin\\ffmpeg.exe', inputs={video_url: None}, outputs={video_local_path:None}, global_options=['-y', '-hide_banner']).run()
    '''加timeout参数容易出现意外报错，无解'''
    # ffmpy3.FFmpeg(executable='D:\\ffmpeg-master-latest-win64-gpl\\bin\\ffmpeg.exe', inputs={video_url: None}, outputs={video_local_path:None}, global_options=['-hide_banner','-timeout','1800']).run()

video_file = r'H:\yiyiyi\videos'
contents_path = r'H:\yiyiyi\contents.db'

conn = sqlite3.connect(contents_path)
cursor = conn.cursor()
sql = 'select max(id) from contents;'
cursor.execute(sql)
try:
    page_num = (cursor.fetchall()[0][0]) + 1
    is_page_exists = True
    retry_times = 0
except:
    page_num = 0
    is_page_exists = True
    retry_times = 0
cursor.close()
conn.commit()
conn.close()

# while is_page_exists and page_num <= 5000:
while is_page_exists:
    # if retry_times > 0:
    #     retry_times += 1
    # else:
    #     retry_times = 0
    print('Begin processing:',page_num)
    src_url = "https://192.151.197.205:13096/videos/play/" + str(page_num) + ".html"
    current_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))

    # process_name = 'msedgedriver.exe'
    # # 查找所有的Chrome驱动进程
    # process_list = [process for process in psutil.process_iter() if process.name() == process_name]
    # if len(process_list) > 0:
    #     # 如果有多个Chrome驱动程序正在运行，则杀死所有的Chrome驱动程序
    #     for process in process_list:
    #         process.kill()
    # else:
    #     pass

    print("    加载网址:",src_url)
    options = webdriver.EdgeOptions()
    # options.binary_location = r"C:\Program Files\Google\Chrome\Application"
    options.add_argument('--headless')
    options.add_argument("--mute-audio")
    options.add_experimental_option('excludeSwitches', ['enable-automation'])#实现了规避监测
    options.add_experimental_option('excludeSwitches', ['enable-logging'])
    options.add_argument('–disable-gpu')
    options.add_argument('log-level=3')#INFO = 0 WARNING = 1 LOG_ERROR = 2 LOG_FATAL = 3 default is 0

    try:
        driver = webdriver.Edge(options=options)
        driver.implicitly_wait(10)

        driver.get(src_url)

        # search_text = driver.find_element(by=By.CLASS_NAME,value='art-video')
        # print(search_text.get_attribute("src"))

        video_url = driver.execute_script("return video_url")
        print('    视频地址:',video_url)

        video_title = (driver.find_element(by=By.CLASS_NAME,value="video-player-title")).text
        print("    视频名称:",video_title)

        video_type = ((driver.find_element(by=By.CLASS_NAME,value="detail-video-cate")).text).replace("分類：","")
        print("    视频分类:",video_type)

        upload_time = (driver.find_element(by=By.CLASS_NAME,value="detail-video-date").text).replace("上傳時間：","")
        print("    视频上传时间:",upload_time)

        view_num = str((driver.find_element(by=By.CLASS_NAME,value="detail-video-views")).text).replace("次觀看","").replace(",","").strip()
        print("    当前观看人数:",view_num)

        try:
            video_contents = (driver.find_element(by=By.CLASS_NAME,value="video-content").text).replace("內容提要：","")
        except:
            video_contents = ''
        print("    视频简介:",video_contents)

        video_next = (driver.find_element(by=By.CLASS_NAME,value="video-player-title")).text

        video_tags = []
        tag_elements = driver.find_elements(by=By.CLASS_NAME,value="btn.btn-default.btn-sm")
        if len(tag_elements) > 0:
            for element in tag_elements:
                if element.text in tag_elements:
                    pass
                else:
                    video_tags.append(element.text)
        else:
            pass
        try:
            video_tags.remove("今日不再顯示")
        except:
            pass
        print("    视频标签:",','.join(video_tags))

        video_name = (os.path.split(video_url)[1]).split('.m3u8')[0]
        

        # m3u8_text = (requests.get(video_url)).text
        # m3u8_text_list = m3u8_text.split('\n')
        # indexs_list = []
        # for line in m3u8_text_list:
        #     if line.startswith(video_name) and line.endswith('.ts'):
        #         indexs_list.append(line)
        #     else:
        #         pass

        print("    视频流下载...")
        video_local_path = os.path.join(video_file,(str(page_num) + "_" + str(video_name)+'.mp4'))

        threads = []
        t1 = threading.Thread(target=download_video, args=(video_url,video_local_path))
        threads.append(t1)
        for t in threads:
            t.setDaemon(True)
            t.start()
        for t in threads:
            t.join(timeout = 1800)

        # # 2S 等待时间
        # time.sleep(2)
        # # 清除缓存
        # clearButton = driver.execute_script("return document.querySelector('settings-ui').shadowRoot.querySelector('settings-main').shadowRoot.querySelector('settings-basic-page').shadowRoot.querySelector('settings-section > settings-privacy-page').shadowRoot.querySelector('settings-clear-browsing-data-dialog').shadowRoot.querySelector('#clearBrowsingDataDialog').querySelector('#clearBrowsingDataConfirm')")
        # clearButton.click()
        # time.sleep(5)

        driver.close()
        driver.quit()

        time.sleep(3)

        conn = sqlite3.connect(contents_path)
        cursor = conn.cursor()
        cursor.execute("insert into contents (id,src_url,status,video_url,video_name,type,upload_time,download_time,view_nums,contents,tags) values (?,?,?,?,?,?,?,?,?,?,?)",[page_num,src_url,'ok',video_url,video_title,video_type,upload_time,current_time,view_num,video_contents,','.join(video_tags)])
        cursor.close()
        conn.commit()
        conn.close()
        print("    下载并保存完成!\n")
        # pushdeer.send_text("Download Successful:" + str(page_num) + "_" + str(video_name))
        # requests.post('https://api2.pushdeer.com/message/push?pushkey=PDU28634TL5vJRLg3x8oPpQDgHqHs2RkWAgEIUTn1&text=' + '下载成功：' + str(page_num) + "_" + str(video_name))

        if '下一篇：沒有啦' in video_next:
            is_page_exists = False
        else:
            is_page_exists = True
            page_num += 1

    except Exception as e:
        if retry_times < 3:
            is_page_exists = True
            retry_times += 1
        else:
            # aaa = requests.get(src_url)
            conn = sqlite3.connect(contents_path)
            cursor = conn.cursor()
            cursor.execute("insert into contents (id,src_url,status,video_url,video_name,type,upload_time,download_time,view_nums,contents,tags) values (?,?,?,?,?,?,?,?,?,?,?)",[page_num,'','ng','','','','',current_time,'','',''])
            cursor.close()
            conn.commit()
            conn.close()

            retry_times = 0
            page_num += 1
            is_page_exists = True

            # pushdeer.send_text("Download Failed:" + str(page_num))

            # requests.post('https://api2.pushdeer.com/message/push?pushkey=PDU28634TL5vJRLg3x8oPpQDgHqHs2RkWAgEIUTn1&text=' + '下载失败：' + str(page_num))

            # if int(aaa.status_code) < 300 and int(aaa.status_code) >= 200:
            #     page_num += 1
            #     is_page_exists = True
            # else:
            #     is_page_exists = False
                
            #     print("    错误:",src_url,e)
        continue
一	二	三	四	五	六	日
1	2	3	4	5	6	7
8	9	10	11	12	13	14
15	16	17	18	19	20	21
22	23	24	25	26	27	28
29	30	31
发送评论 编辑评论

推荐文章

发送评论编辑评论