Spaces:

Lenylvt
/

VideoSubtitleCreation-API

Running on Zero

App Files Files Community

Lenylvt commited on Feb 17, 2024

Commit

4f0841f

verified ·

1 Parent(s): 42666b7

Update app.py

Browse files

Files changed (1) hide show

app.py +89 -42

app.py CHANGED Viewed

@@ -1,50 +1,97 @@
 import gradio as gr
 import os
-import ffmpeg
-import pysrt
 import pandas as pd
 import requests
-import io
-from transformers import MarianMTModel, MarianTokenizer
-from gradio_client import Client
-# Initialize Gradio Client for Whisper JAX
-client = Client(src="sanchit-gandhi/whisper-jax")
-def fetch_languages(url):
-    response = requests.get(url)
-    if response.status_code == 200:
-        csv_content = response.content.decode('utf-8')
-        df = pd.read_csv(io.StringIO(csv_content), delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
-        df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
-        df['ISO 639-1'] = df['ISO 639-1'].str.strip()
-        language_options = [(row['ISO 639-1'], f"{row['ISO 639-1']} - {row['Language Name']}") for index, row in df.iterrows()]
-        return language_options
-    else:
-        return []
-def transcript_audio(audio_file, task, return_timestamps, api_name="/predict_1"):
-    prediction = client.predict(audio_file=audio_file, task=task, return_timestamps=return_timestamps, api_name=api_name)
-    return prediction['transcription'], prediction['transcription_time_s']
-def process_video(input_video, video_language, target_language):
-    transcription, _ = transcript_audio(input_video, "transcribe", True)
-    srt_path = text_to_srt(transcription)
-    translated_srt_path = translate_srt(srt_path, video_language, target_language)
-    output_video = add_subtitle_to_video(input_video, translated_srt_path)
-    return output_video
-language_url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
-video_language_options = fetch_languages(language_url)
-with gr.Blocks() as app:
-    with gr.Row():
-        input_video = gr.Video(label="Video File")
-        video_language = gr.Dropdown(choices=video_language_options, label="Language of the Video")
-        target_language = gr.Dropdown(choices=video_language_options, label="Language Translated")
-        process_btn = gr.Button("Process Video")
-    output_video = gr.Video(label="Video with Translated Subtitles")
-    process_btn.click(fn=process_video, inputs=[input_video, video_language, target_language], outputs=output_video)
-app.launch()

 import gradio as gr
+from faster_whisper import WhisperModel
+import logging
 import os
+from moviepy.editor import VideoFileClip
+import ffmpeg  # Make sure to install ffmpeg-python
+from transformers import MarianMTModel, MarianTokenizer
 import pandas as pd
+import pysrt
 import requests
+# Configure logging for debugging purposes
+logging.basicConfig()
+logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
+# Fetch and parse language options from the provided URL
+url = "https://huggingface.co/Lenylvt/LanguageISO/resolve/main/iso.md"
+df = pd.read_csv(url, delimiter="|", skiprows=2, header=None).dropna(axis=1, how='all')
+df.columns = ['ISO 639-1', 'ISO 639-2', 'Language Name', 'Native Name']
+df['ISO 639-1'] = df['ISO 639-1'].str.strip()
+# Prepare language options for the dropdown
+language_options = [(row['ISO 639-1'], f"{row['Language Name']} ({row['ISO 639-1']})") for index, row in df.iterrows()]
+def format_timestamp(seconds):
+    """Convert seconds to HH:MM:SS.mmm format."""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    seconds_remainder = seconds % 60
+    return f"{hours:02d}:{minutes:02d}:{seconds_remainder:06.3f}"
+def extract_audio(video_path):
+    """Extract audio from video to a temporary audio file."""
+    output_audio_path = '/tmp/audio.wav'
+    ffmpeg.input(video_path).output(output_audio_path, acodec='pcm_s16le', ac=1, ar='16k').run(quiet=True)
+    return output_audio_path
+def transcribe_and_optionally_translate(video_file, source_language, target_language, model_size, allow_modification):
+    audio_file = extract_audio(video_file)
+    # Transcription
+    device = "cpu"
+    compute_type = "int8"
+    model = WhisperModel(model_size, device=device, compute_type=compute_type)
+    segments, _ = model.transcribe(audio_file, source_language=source_language)
+    transcription = " ".join([segment.text for segment in segments])
+    # Translation
+    if source_language != target_language:
+        model_name = f"Helsinki-NLP/opus-mt-{source_language}-{target_language}"
+        tokenizer = MarianTokenizer.from_pretrained(model_name)
+        model = MarianMTModel.from_pretrained(model_name)
+        translated = model.generate(**tokenizer(transcription, return_tensors="pt", padding=True, truncation=True, max_length=512))
+        transcription = tokenizer.decode(translated[0], skip_special_tokens=True)
+    return transcription, allow_modification
+def add_hard_subtitle_to_video(input_video, transcript):
+    """Add hard subtitles to video."""
+    temp_subtitle_path = '/tmp/subtitle.srt'
+    with open(temp_subtitle_path, 'w', encoding='utf-8') as file:
+        file.write(transcript)  # Assuming transcript is in SRT format
+    output_video_path = f"/tmp/output_video.mp4"
+    ffmpeg.input(input_video).output(output_video_path, vf=f"subtitles={temp_subtitle_path}").run(quiet=True)
+    return output_video_path
+# Gradio Interface
+def process_video(video, source_language, target_language, model_size='base', allow_modification=False, modified_transcript=None):
+    transcript, can_modify = transcribe_and_optionally_translate(video, source_language, target_language, model_size, allow_modification)
+    if can_modify and modified_transcript:
+        transcript = modified_transcript  # Use the modified transcript if provided
+    # Add hard subtitles to the video
+    output_video = add_hard_subtitle_to_video(video, transcript)
+    return output_video
+# Setup the Gradio app
+app = gr.Interface(
+    fn=process_video,
+    inputs=[
+        gr.Video(label="Upload Video"),
+        gr.Dropdown(choices=language_options, label="Source Language"),
+        gr.Dropdown(choices=language_options, label="Target Language"),
+        gr.Dropdown(choices=["base", "small", "medium", "large", "large-v2", "large-v3"], label="Model Size"),
+        gr.Checkbox(label="Allow Transcript Modification?", value=False),
+        gr.TextArea(label="Modified Transcript (if allowed)")
+    ],
+    outputs=gr.Video(label="Processed Video with Hard Subtitles"),
+    title="Video Transcription and Translation Tool",
+    description="Transcribe or translate your video content. Optionally, edit the transcription before adding hard subtitles."
+)
+if __name__ == "__main__":
+    app.launch()