# -*- coding: utf-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
import requests
import ffmpy3
import os
import time
import sqlite3
import threading
import psutil
from pypushdeer import PushDeer
pushdeer = PushDeer(pushkey="PDU28629Tv8oUtCPPMCBTn2RnVNsabQFAvlabyV7t")
def download_video(video_url,video_local_path):
ffmpy3.FFmpeg(executable='D:\\ffmpeg-master-latest-win64-gpl\\bin\\ffmpeg.exe', inputs={video_url: None}, outputs={video_local_path:None}, global_options=['-y', '-hide_banner']).run()
'''加timeout参数容易出现意外报错,无解'''
# ffmpy3.FFmpeg(executable='D:\\ffmpeg-master-latest-win64-gpl\\bin\\ffmpeg.exe', inputs={video_url: None}, outputs={video_local_path:None}, global_options=['-hide_banner','-timeout','1800']).run()
video_file = r'H:\yiyiyi\videos'
contents_path = r'H:\yiyiyi\contents.db'
conn = sqlite3.connect(contents_path)
cursor = conn.cursor()
sql = 'select max(id) from contents;'
cursor.execute(sql)
try:
page_num = (cursor.fetchall()[0][0]) + 1
is_page_exists = True
retry_times = 0
except:
page_num = 0
is_page_exists = True
retry_times = 0
cursor.close()
conn.commit()
conn.close()
# while is_page_exists and page_num <= 5000:
while is_page_exists:
# if retry_times > 0:
# retry_times += 1
# else:
# retry_times = 0
print('Begin processing:',page_num)
src_url = "https://192.151.197.205:13096/videos/play/" + str(page_num) + ".html"
current_time = time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()))
# process_name = 'msedgedriver.exe'
# # 查找所有的Chrome驱动进程
# process_list = [process for process in psutil.process_iter() if process.name() == process_name]
# if len(process_list) > 0:
# # 如果有多个Chrome驱动程序正在运行,则杀死所有的Chrome驱动程序
# for process in process_list:
# process.kill()
# else:
# pass
print(" 加载网址:",src_url)
options = webdriver.EdgeOptions()
# options.binary_location = r"C:\Program Files\Google\Chrome\Application"
options.add_argument('--headless')
options.add_argument("--mute-audio")
options.add_experimental_option('excludeSwitches', ['enable-automation'])#实现了规避监测
options.add_experimental_option('excludeSwitches', ['enable-logging'])
options.add_argument('–disable-gpu')
options.add_argument('log-level=3')#INFO = 0 WARNING = 1 LOG_ERROR = 2 LOG_FATAL = 3 default is 0
try:
driver = webdriver.Edge(options=options)
driver.implicitly_wait(10)
driver.get(src_url)
# search_text = driver.find_element(by=By.CLASS_NAME,value='art-video')
# print(search_text.get_attribute("src"))
video_url = driver.execute_script("return video_url")
print(' 视频地址:',video_url)
video_title = (driver.find_element(by=By.CLASS_NAME,value="video-player-title")).text
print(" 视频名称:",video_title)
video_type = ((driver.find_element(by=By.CLASS_NAME,value="detail-video-cate")).text).replace("分類:","")
print(" 视频分类:",video_type)
upload_time = (driver.find_element(by=By.CLASS_NAME,value="detail-video-date").text).replace("上傳時間:","")
print(" 视频上传时间:",upload_time)
view_num = str((driver.find_element(by=By.CLASS_NAME,value="detail-video-views")).text).replace("次觀看","").replace(",","").strip()
print(" 当前观看人数:",view_num)
try:
video_contents = (driver.find_element(by=By.CLASS_NAME,value="video-content").text).replace("內容提要:","")
except:
video_contents = ''
print(" 视频简介:",video_contents)
video_next = (driver.find_element(by=By.CLASS_NAME,value="video-player-title")).text
video_tags = []
tag_elements = driver.find_elements(by=By.CLASS_NAME,value="btn.btn-default.btn-sm")
if len(tag_elements) > 0:
for element in tag_elements:
if element.text in tag_elements:
pass
else:
video_tags.append(element.text)
else:
pass
try:
video_tags.remove("今日不再顯示")
except:
pass
print(" 视频标签:",','.join(video_tags))
video_name = (os.path.split(video_url)[1]).split('.m3u8')[0]
# m3u8_text = (requests.get(video_url)).text
# m3u8_text_list = m3u8_text.split('\n')
# indexs_list = []
# for line in m3u8_text_list:
# if line.startswith(video_name) and line.endswith('.ts'):
# indexs_list.append(line)
# else:
# pass
print(" 视频流下载...")
video_local_path = os.path.join(video_file,(str(page_num) + "_" + str(video_name)+'.mp4'))
threads = []
t1 = threading.Thread(target=download_video, args=(video_url,video_local_path))
threads.append(t1)
for t in threads:
t.setDaemon(True)
t.start()
for t in threads:
t.join(timeout = 1800)
# # 2S 等待时间
# time.sleep(2)
# # 清除缓存
# clearButton = driver.execute_script("return document.querySelector('settings-ui').shadowRoot.querySelector('settings-main').shadowRoot.querySelector('settings-basic-page').shadowRoot.querySelector('settings-section > settings-privacy-page').shadowRoot.querySelector('settings-clear-browsing-data-dialog').shadowRoot.querySelector('#clearBrowsingDataDialog').querySelector('#clearBrowsingDataConfirm')")
# clearButton.click()
# time.sleep(5)
driver.close()
driver.quit()
time.sleep(3)
conn = sqlite3.connect(contents_path)
cursor = conn.cursor()
cursor.execute("insert into contents (id,src_url,status,video_url,video_name,type,upload_time,download_time,view_nums,contents,tags) values (?,?,?,?,?,?,?,?,?,?,?)",[page_num,src_url,'ok',video_url,video_title,video_type,upload_time,current_time,view_num,video_contents,','.join(video_tags)])
cursor.close()
conn.commit()
conn.close()
print(" 下载并保存完成!\n")
# pushdeer.send_text("Download Successful:" + str(page_num) + "_" + str(video_name))
# requests.post('https://api2.pushdeer.com/message/push?pushkey=PDU28634TL5vJRLg3x8oPpQDgHqHs2RkWAgEIUTn1&text=' + '下载成功:' + str(page_num) + "_" + str(video_name))
if '下一篇:沒有啦' in video_next:
is_page_exists = False
else:
is_page_exists = True
page_num += 1
except Exception as e:
if retry_times < 3:
is_page_exists = True
retry_times += 1
else:
# aaa = requests.get(src_url)
conn = sqlite3.connect(contents_path)
cursor = conn.cursor()
cursor.execute("insert into contents (id,src_url,status,video_url,video_name,type,upload_time,download_time,view_nums,contents,tags) values (?,?,?,?,?,?,?,?,?,?,?)",[page_num,'','ng','','','','',current_time,'','',''])
cursor.close()
conn.commit()
conn.close()
retry_times = 0
page_num += 1
is_page_exists = True
# pushdeer.send_text("Download Failed:" + str(page_num))
# requests.post('https://api2.pushdeer.com/message/push?pushkey=PDU28634TL5vJRLg3x8oPpQDgHqHs2RkWAgEIUTn1&text=' + '下载失败:' + str(page_num))
# if int(aaa.status_code) < 300 and int(aaa.status_code) >= 200:
# page_num += 1
# is_page_exists = True
# else:
# is_page_exists = False
# print(" 错误:",src_url,e)
continue
暂无评论

