File size: 6,759 Bytes
eb70d7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbaf394
 
 
 
 
 
 
 
eb70d7f
bbaf394
 
 
 
 
 
 
eb70d7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbaf394
eb70d7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1d6be01
eb70d7f
 
1d6be01
eb70d7f
 
 
 
 
 
1d6be01
eb70d7f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import gradio as gr
import cv2
import numpy as np
import subprocess
import os
import torch
import whisper
from deep_translator import GoogleTranslator
from math import floor
import tempfile

# ---------------------------
# 驻讜谞拽爪讬讜转 注讬讘讜讚 讜讬讚讗讜
# ---------------------------

def draw_grid(frame, width, height, num_lines=5, line_color=(255, 255, 0), line_thickness=1):
    marked_frame = frame.copy()
    for i in range(1, num_lines):
        x = floor(i * width / num_lines)
        cv2.line(marked_frame, (x, 0), (x, height), line_color, line_thickness)
        cv2.putText(marked_frame, str(x), (x + 5, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, line_color, 1, cv2.LINE_AA)
    for i in range(1, num_lines):
        y = floor(i * height / num_lines)
        cv2.line(marked_frame, (0, y), (width, y), line_color, line_thickness)
        cv2.putText(marked_frame, str(y), (10, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, line_color, 1, cv2.LINE_AA)
    cv2.putText(marked_frame, '(0,0)', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, line_color, 2, cv2.LINE_AA)
    return marked_frame

def is_ffmpeg_available():
    try:
        subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
        return True
    except (subprocess.CalledProcessError, FileNotFoundError):
        return False


def remove_watermark_from_frame(frame, mask_coords):
    if mask_coords is None:
        return frame

    # 讘讚讬拽讛 讗诐 谞砖诇讞 tuple 专讙讬诇 (x1, y1, x2, y2)
    if isinstance(mask_coords[0], int):
        x1, y1, x2, y2 = mask_coords
    else:
        (x1, y1), (x2, y2) = mask_coords

    mask = np.zeros(frame.shape[:2], dtype=np.uint8)
    mask[y1:y2, x1:x2] = 255
    frame = cv2.inpaint(frame, mask, 3, cv2.INPAINT_TELEA)
    return frame


def extract_first_frame(video_file_path):
    cap = cv2.VideoCapture(video_file_path)
    ret, frame = cap.read()
    cap.release()
    if not ret:
        return None
    return frame

def frames_to_video(frames, output_path, fps, frame_size):
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, frame_size)
    for frame in frames:
        out.write(frame)
    out.release()
    return True
    

def remove_watermark_process(video_path, coords_input=None):
    if not is_ffmpeg_available():
        return None, "ffmpeg not available"

    temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
    cap = cv2.VideoCapture(video_path)
    fps = cap.get(cv2.CAP_PROP_FPS)
    width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frames = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        frames.append(remove_watermark_from_frame(frame, coords_input))
    cap.release()

    frames_to_video(frames, temp_output, fps, (width, height))
    return temp_output, "Watermark removed"

def add_subtitles_process(video_path, video_voice_language='en', goal_transcript='en'):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = whisper.load_model("small").to(device=device)

    result = model.transcribe(video_path, language=video_voice_language)
    segments = result["segments"]

    max_words_per_segment = 6
    new_segments = []
    for seg in segments:
        start = seg["start"]
        end = seg["end"]
        text = seg["text"].strip()
        words = text.split()
        duration = end - start
        num_splits = (len(words) + max_words_per_segment - 1) // max_words_per_segment
        split_duration = duration / num_splits if num_splits > 0 else duration
        for i in range(num_splits):
            split_text = " ".join(words[i * max_words_per_segment:(i + 1) * max_words_per_segment])
            new_segments.append({"start": start + i * split_duration, "end": start + (i + 1) * split_duration, "text": split_text})
    segments = new_segments

    translator = GoogleTranslator(source=video_voice_language, target=goal_transcript)
    srt_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt").name
    def format_timestamp(t):
        hours = int(t // 3600)
        minutes = int((t % 3600) // 60)
        seconds = int(t % 60)
        millis = int((t % 1) * 1000)
        return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"

    with open(srt_file, "w", encoding="utf-8") as f:
        for i, seg in enumerate(segments, start=1):
            text = seg["text"].strip()
            if goal_transcript != video_voice_language:
                try:
                    text = translator.translate(text)
                except:
                    pass
            f.write(f"{i}\n{format_timestamp(seg['start'])} --> {format_timestamp(seg['end'])}\n{text}\n\n")

    final_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
    font_name = 'Arial'
    cmd = [
        "ffmpeg", "-y",
        "-i", video_path,
        "-vf", f"subtitles={srt_file}:force_style='FontName={font_name},FontSize=20,PrimaryColour=&HFFFFFF&,BackColour=&H000000&,BorderStyle=3,Outline=1,Shadow=0'",
        "-c:a", "copy",
        final_output
    ]
    subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    return final_output

# ---------------------------
# Gradio interface
# ---------------------------

def process_video(video_file_path, wm_coords=None, video_lang='en', subs_lang='en'):
    # 讛住专转 住讬诪谉 诪讬诐
    wm_coords_tuple = tuple(map(int, wm_coords.split(','))) if wm_coords else None
    no_wm_path, wm_msg = remove_watermark_process(video_file_path, coords_input=wm_coords_tuple)

    # 讛讜住驻转 讻转讜讘讬讜转
    final_video_path = add_subtitles_process(no_wm_path, video_voice_language=video_lang, goal_transcript=subs_lang)

    return final_video_path


# 讬爪讬专转 GUI 讘-Gradio
lang_options = {'注讘专讬转':'iw','讗谞讙诇讬转':'en','讛讬谞讚讬':'hi','住驻专讚讬转':'es','爪专驻转讬转':'fr','讙专诪谞讬转':'de','注专讘讬转':'ar'}

gr.Interface(
    process_video,
    inputs=[
        gr.File(label="讘讞专 拽讜讘抓 讜讬讚讗讜"),
        gr.Textbox(label="拽讜讗讜专讚讬谞讟讜转 诇讛住专转 住讬诪谉 诪讬诐 (x1,y1,x2,y2)", placeholder="诇诪砖诇: 0,0,200,50"),
        gr.Dropdown(list(lang_options.keys()), value='讗谞讙诇讬转', label="砖驻转 讗讜讚讬讜 诪拽讜专讬转"),
        gr.Dropdown(list(lang_options.keys()), value='注讘专讬转', label="砖驻转 讻转讜讘讬讜转")
    ],
    outputs=gr.Video(label="讜讬讚讗讜 住讜驻讬 注诐 讻转讜讘讬讜转"),
    title="馃幀 讻诇讬 注讬讘讜讚 讜讬讚讗讜 - 讛住专转 住讬诪谉 诪讬诐 讜讛讜住驻转 讻转讜讘讬讜转",
    description="讛注诇讛 讜讬讚讗讜, 讘讞专 讗讝讜专 住讬诪谉 诪讬诐, 讘爪注 转诪诇讜诇 讜讛讜住驻转 讻转讜讘讬讜转."
).launch()