import gradio as gr import cv2 import numpy as np import subprocess import os import torch import whisper from deep_translator import GoogleTranslator from math import floor import tempfile # --------------------------- # פונקציות עיבוד וידאו # --------------------------- def draw_grid(frame, width, height, num_lines=5, line_color=(255, 255, 0), line_thickness=1): marked_frame = frame.copy() for i in range(1, num_lines): x = floor(i * width / num_lines) cv2.line(marked_frame, (x, 0), (x, height), line_color, line_thickness) cv2.putText(marked_frame, str(x), (x + 5, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, line_color, 1, cv2.LINE_AA) for i in range(1, num_lines): y = floor(i * height / num_lines) cv2.line(marked_frame, (0, y), (width, y), line_color, line_thickness) cv2.putText(marked_frame, str(y), (10, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, line_color, 1, cv2.LINE_AA) cv2.putText(marked_frame, '(0,0)', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, line_color, 2, cv2.LINE_AA) return marked_frame def is_ffmpeg_available(): try: subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True) return True except (subprocess.CalledProcessError, FileNotFoundError): return False def remove_watermark_from_frame(frame, mask_coords): if mask_coords is None: return frame # בדיקה אם נשלח tuple רגיל (x1, y1, x2, y2) if isinstance(mask_coords[0], int): x1, y1, x2, y2 = mask_coords else: (x1, y1), (x2, y2) = mask_coords mask = np.zeros(frame.shape[:2], dtype=np.uint8) mask[y1:y2, x1:x2] = 255 frame = cv2.inpaint(frame, mask, 3, cv2.INPAINT_TELEA) return frame def extract_first_frame(video_file_path): cap = cv2.VideoCapture(video_file_path) ret, frame = cap.read() cap.release() if not ret: return None return frame def frames_to_video(frames, output_path, fps, frame_size): fourcc = cv2.VideoWriter_fourcc(*'mp4v') out = cv2.VideoWriter(output_path, fourcc, fps, frame_size) for frame in frames: out.write(frame) out.release() return True def remove_watermark_process(video_path, coords_input=None): if not is_ffmpeg_available(): return None, "ffmpeg not available" temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name cap = cv2.VideoCapture(video_path) fps = cap.get(cv2.CAP_PROP_FPS) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) frames = [] while True: ret, frame = cap.read() if not ret: break frames.append(remove_watermark_from_frame(frame, coords_input)) cap.release() frames_to_video(frames, temp_output, fps, (width, height)) return temp_output, "Watermark removed" def add_subtitles_process(video_path, video_voice_language='en', goal_transcript='en'): device = "cuda" if torch.cuda.is_available() else "cpu" model = whisper.load_model("small").to(device=device) result = model.transcribe(video_path, language=video_voice_language) segments = result["segments"] max_words_per_segment = 6 new_segments = [] for seg in segments: start = seg["start"] end = seg["end"] text = seg["text"].strip() words = text.split() duration = end - start num_splits = (len(words) + max_words_per_segment - 1) // max_words_per_segment split_duration = duration / num_splits if num_splits > 0 else duration for i in range(num_splits): split_text = " ".join(words[i * max_words_per_segment:(i + 1) * max_words_per_segment]) new_segments.append({"start": start + i * split_duration, "end": start + (i + 1) * split_duration, "text": split_text}) segments = new_segments translator = GoogleTranslator(source=video_voice_language, target=goal_transcript) srt_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt").name def format_timestamp(t): hours = int(t // 3600) minutes = int((t % 3600) // 60) seconds = int(t % 60) millis = int((t % 1) * 1000) return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}" with open(srt_file, "w", encoding="utf-8") as f: for i, seg in enumerate(segments, start=1): text = seg["text"].strip() if goal_transcript != video_voice_language: try: text = translator.translate(text) except: pass f.write(f"{i}\n{format_timestamp(seg['start'])} --> {format_timestamp(seg['end'])}\n{text}\n\n") final_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name font_name = 'Arial' cmd = [ "ffmpeg", "-y", "-i", video_path, "-vf", f"subtitles={srt_file}:force_style='FontName={font_name},FontSize=20,PrimaryColour=&HFFFFFF&,BackColour=&H000000&,BorderStyle=3,Outline=1,Shadow=0'", "-c:a", "copy", final_output ] subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE) return final_output # --------------------------- # Gradio interface # --------------------------- def process_video(video_file_path, wm_coords=None, video_lang='en', subs_lang='en'): # הסרת סימן מים wm_coords_tuple = tuple(map(int, wm_coords.split(','))) if wm_coords else None no_wm_path, wm_msg = remove_watermark_process(video_file_path, coords_input=wm_coords_tuple) # הוספת כתוביות final_video_path = add_subtitles_process(no_wm_path, video_voice_language=video_lang, goal_transcript=subs_lang) return final_video_path # יצירת GUI ב-Gradio lang_options = {'עברית':'iw','אנגלית':'en','הינדי':'hi','ספרדית':'es','צרפתית':'fr','גרמנית':'de','ערבית':'ar'} gr.Interface( process_video, inputs=[ gr.File(label="בחר קובץ וידאו"), gr.Textbox(label="קואורדינטות להסרת סימן מים (x1,y1,x2,y2)", placeholder="למשל: 0,0,200,50"), gr.Dropdown(list(lang_options.keys()), value='אנגלית', label="שפת אודיו מקורית"), gr.Dropdown(list(lang_options.keys()), value='עברית', label="שפת כתוביות") ], outputs=gr.Video(label="וידאו סופי עם כתוביות"), title="🎬 כלי עיבוד וידאו - הסרת סימן מים והוספת כתוביות", description="העלה וידאו, בחר אזור סימן מים, בצע תמלול והוספת כתוביות." ).launch()