Spaces:

sahar-yaccov
/

Auto-Sub

Sleeping

App Files Files Community

Auto-Sub / app.py

sahar-yaccov

Update app.py

bbaf394 verified 3 months ago

raw

history blame contribute delete

6.76 kB

	import gradio as gr
	import cv2
	import numpy as np
	import subprocess
	import os
	import torch
	import whisper
	from deep_translator import GoogleTranslator
	from math import floor
	import tempfile

	# ---------------------------
	# פונקציות עיבוד וידאו
	# ---------------------------

	def draw_grid(frame, width, height, num_lines=5, line_color=(255, 255, 0), line_thickness=1):
	marked_frame = frame.copy()
	for i in range(1, num_lines):
	x = floor(i * width / num_lines)
	cv2.line(marked_frame, (x, 0), (x, height), line_color, line_thickness)
	cv2.putText(marked_frame, str(x), (x + 5, height - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, line_color, 1, cv2.LINE_AA)
	for i in range(1, num_lines):
	y = floor(i * height / num_lines)
	cv2.line(marked_frame, (0, y), (width, y), line_color, line_thickness)
	cv2.putText(marked_frame, str(y), (10, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, line_color, 1, cv2.LINE_AA)
	cv2.putText(marked_frame, '(0,0)', (10, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, line_color, 2, cv2.LINE_AA)
	return marked_frame

	def is_ffmpeg_available():
	try:
	subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True)
	return True
	except (subprocess.CalledProcessError, FileNotFoundError):
	return False


	def remove_watermark_from_frame(frame, mask_coords):
	if mask_coords is None:
	return frame

	# בדיקה אם נשלח tuple רגיל (x1, y1, x2, y2)
	if isinstance(mask_coords[0], int):
	x1, y1, x2, y2 = mask_coords
	else:
	(x1, y1), (x2, y2) = mask_coords

	mask = np.zeros(frame.shape[:2], dtype=np.uint8)
	mask[y1:y2, x1:x2] = 255
	frame = cv2.inpaint(frame, mask, 3, cv2.INPAINT_TELEA)
	return frame


	def extract_first_frame(video_file_path):
	cap = cv2.VideoCapture(video_file_path)
	ret, frame = cap.read()
	cap.release()
	if not ret:
	return None
	return frame

	def frames_to_video(frames, output_path, fps, frame_size):
	fourcc = cv2.VideoWriter_fourcc(*'mp4v')
	out = cv2.VideoWriter(output_path, fourcc, fps, frame_size)
	for frame in frames:
	out.write(frame)
	out.release()
	return True


	def remove_watermark_process(video_path, coords_input=None):
	if not is_ffmpeg_available():
	return None, "ffmpeg not available"

	temp_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
	cap = cv2.VideoCapture(video_path)
	fps = cap.get(cv2.CAP_PROP_FPS)
	width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
	height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
	frames = []

	while True:
	ret, frame = cap.read()
	if not ret:
	break
	frames.append(remove_watermark_from_frame(frame, coords_input))
	cap.release()

	frames_to_video(frames, temp_output, fps, (width, height))
	return temp_output, "Watermark removed"

	def add_subtitles_process(video_path, video_voice_language='en', goal_transcript='en'):
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = whisper.load_model("small").to(device=device)

	result = model.transcribe(video_path, language=video_voice_language)
	segments = result["segments"]

	max_words_per_segment = 6
	new_segments = []
	for seg in segments:
	start = seg["start"]
	end = seg["end"]
	text = seg["text"].strip()
	words = text.split()
	duration = end - start
	num_splits = (len(words) + max_words_per_segment - 1) // max_words_per_segment
	split_duration = duration / num_splits if num_splits > 0 else duration
	for i in range(num_splits):
	split_text = " ".join(words[i * max_words_per_segment:(i + 1) * max_words_per_segment])
	new_segments.append({"start": start + i * split_duration, "end": start + (i + 1) * split_duration, "text": split_text})
	segments = new_segments

	translator = GoogleTranslator(source=video_voice_language, target=goal_transcript)
	srt_file = tempfile.NamedTemporaryFile(delete=False, suffix=".srt").name
	def format_timestamp(t):
	hours = int(t // 3600)
	minutes = int((t % 3600) // 60)
	seconds = int(t % 60)
	millis = int((t % 1) * 1000)
	return f"{hours:02}:{minutes:02}:{seconds:02},{millis:03}"

	with open(srt_file, "w", encoding="utf-8") as f:
	for i, seg in enumerate(segments, start=1):
	text = seg["text"].strip()
	if goal_transcript != video_voice_language:
	try:
	text = translator.translate(text)
	except:
	pass
	f.write(f"{i}\n{format_timestamp(seg['start'])} --> {format_timestamp(seg['end'])}\n{text}\n\n")

	final_output = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
	font_name = 'Arial'
	cmd = [
	"ffmpeg", "-y",
	"-i", video_path,
	"-vf", f"subtitles={srt_file}:force_style='FontName={font_name},FontSize=20,PrimaryColour=&HFFFFFF&,BackColour=&H000000&,BorderStyle=3,Outline=1,Shadow=0'",
	"-c:a", "copy",
	final_output
	]
	subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	return final_output

	# ---------------------------
	# Gradio interface
	# ---------------------------

	def process_video(video_file_path, wm_coords=None, video_lang='en', subs_lang='en'):
	# הסרת סימן מים
	wm_coords_tuple = tuple(map(int, wm_coords.split(','))) if wm_coords else None
	no_wm_path, wm_msg = remove_watermark_process(video_file_path, coords_input=wm_coords_tuple)

	# הוספת כתוביות
	final_video_path = add_subtitles_process(no_wm_path, video_voice_language=video_lang, goal_transcript=subs_lang)

	return final_video_path


	# יצירת GUI ב-Gradio
	lang_options = {'עברית':'iw','אנגלית':'en','הינדי':'hi','ספרדית':'es','צרפתית':'fr','גרמנית':'de','ערבית':'ar'}

	gr.Interface(
	process_video,
	inputs=[
	gr.File(label="בחר קובץ וידאו"),
	gr.Textbox(label="קואורדינטות להסרת סימן מים (x1,y1,x2,y2)", placeholder="למשל: 0,0,200,50"),
	gr.Dropdown(list(lang_options.keys()), value='אנגלית', label="שפת אודיו מקורית"),
	gr.Dropdown(list(lang_options.keys()), value='עברית', label="שפת כתוביות")
	],
	outputs=gr.Video(label="וידאו סופי עם כתוביות"),
	title="🎬 כלי עיבוד וידאו - הסרת סימן מים והוספת כתוביות",
	description="העלה וידאו, בחר אזור סימן מים, בצע תמלול והוספת כתוביות."
	).launch()