Spaces:
Sleeping
Sleeping
File size: 6,759 Bytes
eb70d7f bbaf394 eb70d7f bbaf394 eb70d7f bbaf394 eb70d7f 1d6be01 eb70d7f 1d6be01 eb70d7f 1d6be01 eb70d7f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 | import gradio as gr
import cv2
import numpy as np
import subprocess
import os
import torch
import whisper
from deep_translator import GoogleTranslator
from math import floor
import tempfile
# ---------------------------
# 驻讜谞拽爪讬讜转 注讬讘讜讚 讜讬讚讗讜
# ---------------------------
def draw_grid(frame, width, height, num_lines=5, line_color=(255, 255, 0), line_thickness=1):
marked_frame = frame.copy()
for i in range(1, num_lines):
x = floor(i * width / num_lines)
cv2.line(marked_frame, (x, 0), (x, height), line_color, line_thickness)
cv2.putText(marked_frame, str(x), (x + 5, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, line_color, 1, cv2.LINE_AA)
for i in range(1, num_lines):
y = floor(i * height / num_lines)
cv2.line(marked_frame, (0, y), (width, y), line_color, line_thickness)
cv2.putText(marked_frame, str(y), (10, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, line_color, 1, cv2.LINE_AA)
cv2.putText(marked_frame, '(0,0)', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, line_color, 2, cv2.LINE_AA)
return marked_frame
def is_ffmpeg_available():
try:
subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
return True
except (subprocess.CalledProcessError, FileNotFoundError):
return False
def remove_watermark_from_frame(frame, mask_coords):
if mask_coords is None:
return frame
# 讘讚讬拽讛 讗诐 谞砖诇讞 tuple 专讙讬诇 (x1, y1, x2, y2)
if isinstance(mask_coords[0], int):
x1, y1, x2, y2 = mask_coords
else:
(x1, y1), (x2, y2) = mask_coords
mask = np.zeros(frame.shape[:2], dtype=np.uint8)
mask[y1:y2, x1:x2] = 255
frame = cv2.inpaint(frame, mask, 3, cv2.INPAINT_TELEA)
return frame
def extract_first_frame(video_file_path):
cap = cv2.VideoCapture(video_file_path)
ret, frame = cap.read()
cap.release()
if not ret:
return None
return frame
def frames_to_video(frames, output_path, fps, frame_size):
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(output_path, fourcc, fps, frame_size)
for frame in frames:
out.write(frame)
out.release()
return True
def remove_watermark_process(video_path, coords_input=None):
if not is_ffmpeg_available():
return None, "ffmpeg not available"
temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
cap = cv2.VideoCapture(video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames = []
while True:
ret, frame = cap.read()
if not ret:
break
frames.append(remove_watermark_from_frame(frame, coords_input))
cap.release()
frames_to_video(frames, temp_output, fps, (width, height))
return temp_output, "Watermark removed"
def add_subtitles_process(video_path, video_voice_language='en', goal_transcript='en'):
device = "cuda" if torch.cuda.is_available() else "cpu"
model = whisper.load_model("small").to(device=device)
result = model.transcribe(video_path, language=video_voice_language)
segments = result["segments"]
max_words_per_segment = 6
new_segments = []
for seg in segments:
start = seg["start"]
end = seg["end"]
text = seg["text"].strip()
words = text.split()
duration = end - start
num_splits = (len(words) + max_words_per_segment - 1) // max_words_per_segment
split_duration = duration / num_splits if num_splits > 0 else duration
for i in range(num_splits):
split_text = " ".join(words[i * max_words_per_segment:(i + 1) * max_words_per_segment])
new_segments.append({"start": start + i * split_duration, "end": start + (i + 1) * split_duration, "text": split_text})
segments = new_segments
translator = GoogleTranslator(source=video_voice_language, target=goal_transcript)
srt_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt").name
def format_timestamp(t):
hours = int(t // 3600)
minutes = int((t % 3600) // 60)
seconds = int(t % 60)
millis = int((t % 1) * 1000)
return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"
with open(srt_file, "w", encoding="utf-8") as f:
for i, seg in enumerate(segments, start=1):
text = seg["text"].strip()
if goal_transcript != video_voice_language:
try:
text = translator.translate(text)
except:
pass
f.write(f"{i}\n{format_timestamp(seg['start'])} --> {format_timestamp(seg['end'])}\n{text}\n\n")
final_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
font_name = 'Arial'
cmd = [
"ffmpeg", "-y",
"-i", video_path,
"-vf", f"subtitles={srt_file}:force_style='FontName={font_name},FontSize=20,PrimaryColour=&HFFFFFF&,BackColour=&H000000&,BorderStyle=3,Outline=1,Shadow=0'",
"-c:a", "copy",
final_output
]
subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
return final_output
# ---------------------------
# Gradio interface
# ---------------------------
def process_video(video_file_path, wm_coords=None, video_lang='en', subs_lang='en'):
# 讛住专转 住讬诪谉 诪讬诐
wm_coords_tuple = tuple(map(int, wm_coords.split(','))) if wm_coords else None
no_wm_path, wm_msg = remove_watermark_process(video_file_path, coords_input=wm_coords_tuple)
# 讛讜住驻转 讻转讜讘讬讜转
final_video_path = add_subtitles_process(no_wm_path, video_voice_language=video_lang, goal_transcript=subs_lang)
return final_video_path
# 讬爪讬专转 GUI 讘-Gradio
lang_options = {'注讘专讬转':'iw','讗谞讙诇讬转':'en','讛讬谞讚讬':'hi','住驻专讚讬转':'es','爪专驻转讬转':'fr','讙专诪谞讬转':'de','注专讘讬转':'ar'}
gr.Interface(
process_video,
inputs=[
gr.File(label="讘讞专 拽讜讘抓 讜讬讚讗讜"),
gr.Textbox(label="拽讜讗讜专讚讬谞讟讜转 诇讛住专转 住讬诪谉 诪讬诐 (x1,y1,x2,y2)", placeholder="诇诪砖诇: 0,0,200,50"),
gr.Dropdown(list(lang_options.keys()), value='讗谞讙诇讬转', label="砖驻转 讗讜讚讬讜 诪拽讜专讬转"),
gr.Dropdown(list(lang_options.keys()), value='注讘专讬转', label="砖驻转 讻转讜讘讬讜转")
],
outputs=gr.Video(label="讜讬讚讗讜 住讜驻讬 注诐 讻转讜讘讬讜转"),
title="馃幀 讻诇讬 注讬讘讜讚 讜讬讚讗讜 - 讛住专转 住讬诪谉 诪讬诐 讜讛讜住驻转 讻转讜讘讬讜转",
description="讛注诇讛 讜讬讚讗讜, 讘讞专 讗讝讜专 住讬诪谉 诪讬诐, 讘爪注 转诪诇讜诇 讜讛讜住驻转 讻转讜讘讬讜转."
).launch()
|