garyuzair commited on
Commit
f874e3c
·
verified ·
1 Parent(s): 176dbeb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -0
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import tempfile
4
+ import os
5
+ import time
6
+ import ffmpeg
7
+
8
+ # Cache the model with CPU optimization
9
+ def load_model():
10
+ return pipeline(
11
+ "automatic-speech-recognition",
12
+ model="openai/whisper-tiny",
13
+ device="cpu" # Force CPU usage
14
+ )
15
+
16
+ # Load model at startup
17
+ model = load_model()
18
+
19
+ def extract_audio(video_path):
20
+ """Optimized audio extraction for CPU"""
21
+ audio_path = tempfile.mktemp(suffix=".wav")
22
+ (
23
+ ffmpeg
24
+ .input(video_path)
25
+ .output(audio_path, ac=1, ar=16000, acodec='pcm_s16le')
26
+ .overwrite_output()
27
+ .run(quiet=True, cmd="ffmpeg")
28
+ return audio_path
29
+
30
+ def transcribe_video(video_file):
31
+ """Process video and return transcript"""
32
+ start_time = time.time()
33
+
34
+ # Create temp video file
35
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video:
36
+ tmp_video.write(video_file)
37
+ video_path = tmp_video.name
38
+
39
+ # Extract audio
40
+ audio_path = extract_audio(video_path)
41
+ os.unlink(video_path) # Clean up video
42
+
43
+ # Transcribe
44
+ result = model(audio_path)
45
+ transcript = result["text"]
46
+
47
+ # Clean up
48
+ os.unlink(audio_path)
49
+ process_time = time.time() - start_time
50
+
51
+ # Get file size
52
+ file_size = len(video_file) / (1024 * 1024) # in MB
53
+
54
+ return transcript, f"✅ Processed {file_size:.1f}MB video in {process_time:.1f} seconds"
55
+
56
+ # Gradio interface
57
+ with gr.Blocks(title="Free Video Transcriber", theme=gr.themes.Soft()) as demo:
58
+ gr.Markdown("# 🎥 Free Video Transcriber")
59
+ gr.Markdown("Upload any video to transcribe using Whisper Tiny (optimized for CPU)")
60
+
61
+ with gr.Row():
62
+ with gr.Column():
63
+ video_input = gr.Video(label="Upload Video", sources=["upload"])
64
+ transcribe_btn = gr.Button("Transcribe Video", variant="primary")
65
+
66
+ with gr.Column():
67
+ transcript_output = gr.Textbox(label="Transcript", lines=10, interactive=True)
68
+ status_output = gr.Textbox(label="Status", interactive=False)
69
+ download_btn = gr.DownloadButton(label="Download Transcript")
70
+
71
+ # Processing function
72
+ def process_video(video):
73
+ if video is None:
74
+ return "", "Please upload a video file first"
75
+
76
+ # Get video bytes
77
+ with open(video, "rb") as f:
78
+ video_bytes = f.read()
79
+
80
+ transcript, status = transcribe_video(video_bytes)
81
+ return transcript, status, gr.update(value=transcript, visible=True)
82
+
83
+ # Set up button actions
84
+ transcribe_btn.click(
85
+ fn=process_video,
86
+ inputs=video_input,
87
+ outputs=[transcript_output, status_output, download_btn]
88
+ )
89
+
90
+ # Info section
91
+ with gr.Accordion("ℹ️ About this app", open=False):
92
+ gr.Markdown("""
93
+ **How it works:**
94
+ - Uses OpenAI's Whisper Tiny model optimized for CPU
95
+ - Extracts audio from video using FFmpeg
96
+ - Transcribes audio to text
97
+ - Works with MP4, MOV, AVI, MKV, WEBM formats
98
+
99
+ **Performance notes:**
100
+ - 1 min video: ~10-20 seconds
101
+ - 5 min video: ~1-2 minutes
102
+ - 10 min video: ~2-4 minutes
103
+
104
+ **Optimized for:** Hugging Face Spaces free tier (CPU only)
105
+ """)
106
+
107
+ # Launch the app
108
+ if __name__ == "__main__":
109
+ demo.launch()