Spaces:

garyuzair
/

VideoTranscriber

Runtime error

App Files Files Community

garyuzair commited on May 30, 2025

Commit

f874e3c

verified ·

1 Parent(s): 176dbeb

Create app.py

Browse files

Files changed (1) hide show

app.py +109 -0

app.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import gradio as gr
+from transformers import pipeline
+import tempfile
+import os
+import time
+import ffmpeg
+# Cache the model with CPU optimization
+def load_model():
+    return pipeline(
+        "automatic-speech-recognition",
+        model="openai/whisper-tiny",
+        device="cpu"  # Force CPU usage
+    )
+# Load model at startup
+model = load_model()
+def extract_audio(video_path):
+    """Optimized audio extraction for CPU"""
+    audio_path = tempfile.mktemp(suffix=".wav")
+    (
+        ffmpeg
+        .input(video_path)
+        .output(audio_path, ac=1, ar=16000, acodec='pcm_s16le')
+        .overwrite_output()
+        .run(quiet=True, cmd="ffmpeg")
+    return audio_path
+def transcribe_video(video_file):
+    """Process video and return transcript"""
+    start_time = time.time()
+    # Create temp video file
+    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video:
+        tmp_video.write(video_file)
+        video_path = tmp_video.name
+    # Extract audio
+    audio_path = extract_audio(video_path)
+    os.unlink(video_path)  # Clean up video
+    # Transcribe
+    result = model(audio_path)
+    transcript = result["text"]
+    # Clean up
+    os.unlink(audio_path)
+    process_time = time.time() - start_time
+    # Get file size
+    file_size = len(video_file) / (1024 * 1024)  # in MB
+    return transcript, f"✅ Processed {file_size:.1f}MB video in {process_time:.1f} seconds"
+# Gradio interface
+with gr.Blocks(title="Free Video Transcriber", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎥 Free Video Transcriber")
+    gr.Markdown("Upload any video to transcribe using Whisper Tiny (optimized for CPU)")
+    with gr.Row():
+        with gr.Column():
+            video_input = gr.Video(label="Upload Video", sources=["upload"])
+            transcribe_btn = gr.Button("Transcribe Video", variant="primary")
+        with gr.Column():
+            transcript_output = gr.Textbox(label="Transcript", lines=10, interactive=True)
+            status_output = gr.Textbox(label="Status", interactive=False)
+            download_btn = gr.DownloadButton(label="Download Transcript")
+    # Processing function
+    def process_video(video):
+        if video is None:
+            return "", "Please upload a video file first"
+        # Get video bytes
+        with open(video, "rb") as f:
+            video_bytes = f.read()
+        transcript, status = transcribe_video(video_bytes)
+        return transcript, status, gr.update(value=transcript, visible=True)
+    # Set up button actions
+    transcribe_btn.click(
+        fn=process_video,
+        inputs=video_input,
+        outputs=[transcript_output, status_output, download_btn]
+    )
+    # Info section
+    with gr.Accordion("ℹ️ About this app", open=False):
+        gr.Markdown("""
+        **How it works:**
+        - Uses OpenAI's Whisper Tiny model optimized for CPU
+        - Extracts audio from video using FFmpeg
+        - Transcribes audio to text
+        - Works with MP4, MOV, AVI, MKV, WEBM formats
+        **Performance notes:**
+        - 1 min video: ~10-20 seconds
+        - 5 min video: ~1-2 minutes
+        - 10 min video: ~2-4 minutes
+        **Optimized for:** Hugging Face Spaces free tier (CPU only)
+        """)
+# Launch the app
+if __name__ == "__main__":
+    demo.launch()