参考:https://cloud.tencent.com/developer/article/1768680
参考的代码有点问题,请求头需要修改,上代码:
import requestsimport re # 正则表达式import pprintimport jsonfrom moviepy.editor import AudioFileClip, VideoFileClipfrom bs4 import BeautifulSoup as bsheaders = { # 防盗链 告诉服务器 我们请求的url网址是从哪里跳转过来的 'referer': 'https://www.bilibili.com/a', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}def send_request(url): response = requests.get(url=url, headers=headers) return responsedef get_video_data(html_data): """解析视频数据""" # 提取视频的标题 soup = bs(html_data, 'lxml') title = soup.find_all(name='h1',attrs={"class":"video-title special-text-indent"})[0].get_text() # print(title) # 提取视频对应的json数据 json_data = re.findall('<script>window\.__playinfo__=(.*?)</script>', html_data)[0] # print(json_data) # json_data 字符串 json_data = json.loads(json_data) pprint.pprint(json_data) # 提取音频的url地址 audio_url = json_data['data']['dash']['audio'][0]['backupUrl'][0] print('解析到的音频地址:', audio_url) # 提取视频画面的url地址 video_url = json_data['data']['dash']['video'][0]['backupUrl'][0] print('解析到的视频地址:', video_url) video_data = [title, audio_url, video_url] return video_datadef save_data(file_name, audio_url, video_url): # 请求数据 print('正在请求音频数据') audio_data = send_request(audio_url).content print('正在请求视频数据') video_data = send_request(video_url).content with open(file_name + '.mp3', mode='wb') as f: f.write(audio_data) print('正在保存音频数据') with open(file_name + '.mp4', mode='wb') as f: f.write(video_data) print('正在保存视频数据')def merge_data(video_name): print('视频合成开始:', video_name) audioclip = AudioFileClip(video_name+'.mp3') videoclip = VideoFileClip(video_name+'.mp4') # 3.获取视频和音频的时长 video_time = videoclip.duration audio_time = audioclip.duration # 4.对视频或者音频进行裁剪 if video_time > audio_time: # 视频时长>音频时长,对视频进行截取 videoclip_new = videoclip.subclip(0, audio_time) audioclip_new = audioclip else: # 音频时长>视频时长,对音频进行截取 videoclip_new = videoclip audioclip_new = audioclip.subclip(0, video_time) # 5.视频中加入音频 video_with_new_audio = videoclip_new.set_audio(audioclip_new) # 6.写入到新的视频文件中 video_with_new_audio.write_videofile("output.mp4", codec='libx264', audio_codec='aac', temp_audiofile='temp-video.m4a', remove_temp=True ) print('视频合成结束:', video_name)url = 'https://www.bilibili.com/video/BV1bK421a7qG/?spm_id_from=333.1007.tianma.6-4-22.click'response = send_request(url)response.encoding = requests.utils.get_encodings_from_content(response.text)[0]html_data = response.textvideo_data = get_video_data(html_data)save_data(video_data[0], video_data[1], video_data[2])merge_data(video_data[0])
效果
小姐姐挺靓,就是左下角有水印,想办法去除水印,参考:python实战之去除视频水印&字幕_python 去除视频水印-CSDN博客
import osimport sysimport cv2import numpyfrom moviepy import editor TEMP_VIDEO = 'temp.mp4' class WatermarkRemover(): def __init__(self, video_path, output, threshold: int, kernel_size: int): self.threshold = threshold # 阈值分割所用阈值 self.kernel_size = kernel_size # 膨胀运算核尺寸 self.video_path = video_path self.output = output #根据用户手动选择的ROI(Region of Interest,感兴趣区域)框选水印或字幕位置。 def select_roi(self, img: numpy.ndarray, hint: str) -> list: ''' 框选水印或字幕位置,SPACE或ENTER键退出 :param img: 显示图片 :return: 框选区域坐标 ''' COFF = 0.7 w, h = int(COFF * img.shape[1]), int(COFF * img.shape[0]) resize_img = cv2.resize(img, (w, h)) roi = cv2.selectROI(hint, resize_img, False, False) cv2.destroyAllWindows() watermark_roi = [int(roi[0] / COFF), int(roi[1] / COFF), int(roi[2] / COFF), int(roi[3] / COFF)] return watermark_roi #对输入的蒙版进行膨胀运算,扩大蒙版的范围 def dilate_mask(self, mask: numpy.ndarray) -> numpy.ndarray: ''' 对蒙版进行膨胀运算 :param mask: 蒙版图片 :return: 膨胀处理后蒙版 ''' kernel = numpy.ones((self.kernel_size, self.kernel_size), numpy.uint8) mask = cv2.dilate(mask, kernel) return mask #根据手动选择的ROI区域,在单帧图像中生成水印或字幕的蒙版。 def generate_single_mask(self, img: numpy.ndarray, roi: list, threshold: int) -> numpy.ndarray: ''' 通过手动选择的ROI区域生成单帧图像的水印蒙版 :param img: 单帧图像 :param roi: 手动选择区域坐标 :param threshold: 二值化阈值 :return: 水印蒙版 ''' # 区域无效,程序退出 if len(roi) != 4: print('NULL ROI!') sys.exit() # 复制单帧灰度图像ROI内像素点 roi_img = numpy.zeros((img.shape[0], img.shape[1]), numpy.uint8) start_x, end_x = int(roi[1]), int(roi[1] + roi[3]) start_y, end_y = int(roi[0]), int(roi[0] + roi[2]) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) roi_img[start_x:end_x, start_y:end_y] = gray[start_x:end_x, start_y:end_y] # 阈值分割 _, mask = cv2.threshold(roi_img, threshold, 255, cv2.THRESH_BINARY) return mask #通过截取视频中多帧图像生成多张水印蒙版,并通过逻辑与计算生成最终的水印蒙版 def generate_watermark_mask(self, video_path: str) -> numpy.ndarray: ''' 截取视频中多帧图像生成多张水印蒙版,通过逻辑与计算生成最终水印蒙版 :param video_path: 视频文件路径 :return: 水印蒙版 ''' video = cv2.VideoCapture(video_path) success, frame = video.read() roi = self.select_roi(frame, 'select watermark ROI') mask = numpy.ones((frame.shape[0], frame.shape[1]), numpy.uint8) mask.fill(255) step = video.get(cv2.CAP_PROP_FRAME_COUNT) // 5 index = 0 while success: if index % step == 0: mask = cv2.bitwise_and(mask, self.generate_single_mask(frame, roi, self.threshold)) success, frame = video.read() index += 1 video.release() return self.dilate_mask(mask) #根据手动选择的ROI区域,在单帧图像中生成字幕的蒙版。 def generate_subtitle_mask(self, frame: numpy.ndarray, roi: list) -> numpy.ndarray: ''' 通过手动选择ROI区域生成单帧图像字幕蒙版 :param frame: 单帧图像 :param roi: 手动选择区域坐标 :return: 字幕蒙版 ''' mask = self.generate_single_mask(frame, [0, roi[1], frame.shape[1], roi[3]], self.threshold) # 仅使用ROI横坐标区域 return self.dilate_mask(mask) def inpaint_image(self, img: numpy.ndarray, mask: numpy.ndarray) -> numpy.ndarray: ''' 修复图像 :param img: 单帧图像 :parma mask: 蒙版 :return: 修复后图像 ''' telea = cv2.inpaint(img, mask, 1, cv2.INPAINT_TELEA) return telea def merge_audio(self, input_path: str, output_path: str, temp_path: str): ''' 合并音频与处理后视频 :param input_path: 原视频文件路径 :param output_path: 封装音视频后文件路径 :param temp_path: 无声视频文件路径 ''' with editor.VideoFileClip(input_path) as video: audio = video.audio with editor.VideoFileClip(temp_path) as opencv_video: clip = opencv_video.set_audio(audio) clip.to_videofile(output_path) def remove_video_watermark(self): ''' 去除视频水印 ''' if not os.path.exists(self.output): os.makedirs(self.output) filenames = [os.path.join(self.video_path, i) for i in os.listdir(self.video_path)] mask = None for i, name in enumerate(filenames): if i == 0: # 生成水印蒙版 mask = self.generate_watermark_mask(name) # 创建待写入文件对象 video = cv2.VideoCapture(name) fps = video.get(cv2.CAP_PROP_FPS) size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))) video_writer = cv2.VideoWriter(TEMP_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fps, size) # 逐帧处理图像 success, frame = video.read() while success: frame = self.inpaint_image(frame, mask) video_writer.write(frame) success, frame = video.read() video.release() video_writer.release() # 封装视频 (_, filename) = os.path.split(name) output_path = os.path.join(self.output, filename.split('.')[0] + '_no_watermark.mp4') # 输出文件路径 self.merge_audio(name, output_path, TEMP_VIDEO) if os.path.exists(TEMP_VIDEO): os.remove(TEMP_VIDEO) def remove_video_subtitle(self): '''去除视频字幕''' if not os.path.exists(self.output): os.makedirs(self.output) filenames = [os.path.join(self.video_path, i) for i in os.listdir(self.video_path)] roi = [] for i, name in enumerate(filenames): # 创建待写入文件对象 video = cv2.VideoCapture(name) fps = video.get(cv2.CAP_PROP_FPS) size = (int(video.get(cv2.CAP_PROP_FRAME_WIDTH)), int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))) video_writer = cv2.VideoWriter(TEMP_VIDEO, cv2.VideoWriter_fourcc(*'mp4v'), fps, size) # 逐帧处理图像 success, frame = video.read() if i == 0: roi = self.select_roi(frame, 'select subtitle ROI') while success: mask = self.generate_subtitle_mask(frame, roi) frame = self.inpaint_image(frame, mask) video_writer.write(frame) success, frame = video.read() video.release() video_writer.release() # 封装视频 (_, filename) = os.path.split(name) output_path = os.path.join(OUTPUT_PATH, filename.split('.')[0] + '_no_sub.mp4') # 输出文件路径 self.merge_audio(name, output_path, TEMP_VIDEO) if os.path.exists(TEMP_VIDEO): os.remove(TEMP_VIDEO) # 去水印video_path = 'video'output_path = 'output'remover = WatermarkRemover(video_path,output_path,threshold=80, kernel_size=5)remover.remove_video_watermark() #去字幕# remover = WatermarkRemover(video_path,output_path,threshold=80, kernel_size=5)# remover.remove_video_subtitle()
效果一般吧: