Spaces:

sahar-yaccov
/

Auto-Sub

Sleeping

File size: 6,759 Bytes

import gradio as gr
import cv2
import numpy as np
import subprocess
import os
import torch
import whisper
from deep_translator import GoogleTranslator
from math import floor
import tempfile

# ---------------------------
# פונקציות עיבוד וידאו
# ---------------------------

def draw_grid(frame, width, height, num_lines=5, line_color=(255, 255, 0), line_thickness=1):
    marked_frame = frame.copy()
    for i in range(1, num_lines):
        x = floor(i * width / num_lines)
        cv2.line(marked_frame, (x, 0), (x, height), line_color, line_thickness)
        cv2.putText(marked_frame, str(x), (x + 5, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, line_color, 1, cv2.LINE_AA)
    for i in range(1, num_lines):
        y = floor(i * height / num_lines)
        cv2.line(marked_frame, (0, y), (width, y), line_color, line_thickness)
        cv2.putText(marked_frame, str(y), (10, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, line_color, 1, cv2.LINE_AA)
    cv2.putText(marked_frame, '(0,0)', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, line_color, 2, cv2.LINE_AA)
    return marked_frame

def is_ffmpeg_available():
    try:
        subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
        return True
    except (subprocess.CalledProcessError, FileNotFoundError):
        return False


def remove_watermark_from_frame(frame, mask_coords):
    if mask_coords is None:
        return frame

    # בדיקה אם נשלח tuple רגיל (x1, y1, x2, y2)
    if isinstance(mask_coords[0], int):
        x1, y1, x2, y2 = mask_coords
    else:
        (x1, y1), (x2, y2) = mask_coords

    mask = np.zeros(frame.shape[:2], dtype=np.uint8)
    mask[y1:y2, x1:x2] = 255
    frame = cv2.inpaint(frame, mask, 3, cv2.INPAINT_TELEA)
    return frame


def extract_first_frame(video_file_path):
    cap = cv2.VideoCapture(video_file_path)
    ret, frame = cap.read()
    cap.release()
    if not ret:
        return None
    return frame

def frames_to_video(frames, output_path, fps, frame_size):
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, frame_size)
    for frame in frames:
        out.write(frame)
    out.release()
    return True
    

def remove_watermark_process(video_path, coords_input=None):
    if not is_ffmpeg_available():
        return None, "ffmpeg not available"

    temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frames = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(remove_watermark_from_frame(frame, coords_input))
    cap.release()

    frames_to_video(frames, temp_output, fps, (width, height))
    return temp_output, "Watermark removed"

def add_subtitles_process(video_path, video_voice_language='en', goal_transcript='en'):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = whisper.load_model("small").to(device=device)

    result = model.transcribe(video_path, language=video_voice_language)
    segments = result["segments"]

    max_words_per_segment = 6
    new_segments = []
    for seg in segments:
        start = seg["start"]
        end = seg["end"]
        text = seg["text"].strip()
        words = text.split()
        duration = end - start
        num_splits = (len(words) + max_words_per_segment - 1) // max_words_per_segment
        split_duration = duration / num_splits if num_splits > 0 else duration
        for i in range(num_splits):
            split_text = " ".join(words[i * max_words_per_segment:(i + 1) * max_words_per_segment])
            new_segments.append({"start": start + i * split_duration, "end": start + (i + 1) * split_duration, "text": split_text})
    segments = new_segments

    translator = GoogleTranslator(source=video_voice_language, target=goal_transcript)
    srt_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt").name
    def format_timestamp(t):
        hours = int(t // 3600)
        minutes = int((t % 3600) // 60)
        seconds = int(t % 60)
        millis = int((t % 1) * 1000)
        return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"

    with open(srt_file, "w", encoding="utf-8") as f:
        for i, seg in enumerate(segments, start=1):
            text = seg["text"].strip()
            if goal_transcript != video_voice_language:
                try:
                    text = translator.translate(text)
                except:
                    pass
            f.write(f"{i}\n{format_timestamp(seg['start'])} --> {format_timestamp(seg['end'])}\n{text}\n\n")

    final_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
    font_name = 'Arial'
    cmd = [
        "ffmpeg", "-y",
        "-i", video_path,
        "-vf", f"subtitles={srt_file}:force_style='FontName={font_name},FontSize=20,PrimaryColour=&HFFFFFF&,BackColour=&H000000&,BorderStyle=3,Outline=1,Shadow=0'",
        "-c:a", "copy",
        final_output
    ]
    subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    return final_output

# ---------------------------
# Gradio interface
# ---------------------------

def process_video(video_file_path, wm_coords=None, video_lang='en', subs_lang='en'):
    # הסרת סימן מים
    wm_coords_tuple = tuple(map(int, wm_coords.split(','))) if wm_coords else None
    no_wm_path, wm_msg = remove_watermark_process(video_file_path, coords_input=wm_coords_tuple)

    # הוספת כתוביות
    final_video_path = add_subtitles_process(no_wm_path, video_voice_language=video_lang, goal_transcript=subs_lang)

    return final_video_path


# יצירת GUI ב-Gradio
lang_options = {'עברית':'iw','אנגלית':'en','הינדי':'hi','ספרדית':'es','צרפתית':'fr','גרמנית':'de','ערבית':'ar'}

gr.Interface(
    process_video,
    inputs=[
        gr.File(label="בחר קובץ וידאו"),
        gr.Textbox(label="קואורדינטות להסרת סימן מים (x1,y1,x2,y2)", placeholder="למשל: 0,0,200,50"),
        gr.Dropdown(list(lang_options.keys()), value='אנגלית', label="שפת אודיו מקורית"),
        gr.Dropdown(list(lang_options.keys()), value='עברית', label="שפת כתוביות")
    ],
    outputs=gr.Video(label="וידאו סופי עם כתוביות"),
    title="🎬 כלי עיבוד וידאו - הסרת סימן מים והוספת כתוביות",
    description="העלה וידאו, בחר אזור סימן מים, בצע תמלול והוספת כתוביות."
).launch()