Offex commited on
Commit
d816888
Β·
verified Β·
1 Parent(s): ef9a67d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -25
app.py CHANGED
@@ -6,7 +6,7 @@ import subprocess
6
  from faster_whisper import WhisperModel
7
 
8
  # ===============================
9
- # 1. Whisper Model (Lazy Load)
10
  # ===============================
11
  model = None
12
 
@@ -26,7 +26,7 @@ def get_ffmpeg_path():
26
  return path if path else "/usr/bin/ffmpeg"
27
 
28
  # ===============================
29
- # 3. Convert Video β†’ Audio
30
  # ===============================
31
  def extract_audio(video_path):
32
  audio_path = "uploaded_audio.wav"
@@ -46,7 +46,7 @@ def extract_audio(video_path):
46
  return audio_path
47
 
48
  # ===============================
49
- # 4. Download Audio from ANY URL
50
  # ===============================
51
  def download_audio_from_url(url):
52
  output = "url_audio.%(ext)s"
@@ -58,7 +58,6 @@ def download_audio_from_url(url):
58
  "postprocessors": [{
59
  "key": "FFmpegExtractAudio",
60
  "preferredcodec": "wav",
61
- "preferredquality": "192",
62
  }],
63
  "quiet": True,
64
  "nocheckcertificate": True,
@@ -70,17 +69,16 @@ def download_audio_from_url(url):
70
  return "url_audio.wav"
71
 
72
  # ===============================
73
- # 5. Main Transcribe Logic
74
  # ===============================
75
- def transcribe_media(url_input, file_input):
76
 
77
  try:
78
  audio_path = None
79
 
80
- # ---------- FILE UPLOAD ----------
81
  if file_input:
82
  ext = os.path.splitext(file_input)[1].lower()
83
-
84
  if ext in [".mp3", ".wav", ".m4a"]:
85
  audio_path = file_input
86
  else:
@@ -91,21 +89,28 @@ def transcribe_media(url_input, file_input):
91
  audio_path = download_audio_from_url(url_input)
92
 
93
  else:
94
- return "⚠️ Please paste a link or upload a file."
95
 
96
  if not os.path.exists(audio_path):
97
  return "❌ Audio processing failed."
98
 
99
  model = load_model()
100
 
101
- segments, _ = model.transcribe(
 
 
 
102
  audio_path,
103
  beam_size=1,
104
- vad_filter=True
 
105
  )
106
 
 
 
107
  text = " ".join(seg.text for seg in segments)
108
- return text.strip() if text else "⚠️ No speech detected."
 
109
 
110
  except Exception as e:
111
  return f"❌ Error: {str(e)}"
@@ -116,7 +121,7 @@ def transcribe_media(url_input, file_input):
116
  css = """
117
  .container {max-width: 900px; margin: auto;}
118
  .gr-button-primary {
119
- background: linear-gradient(90deg,#ff416c,#ff4b2b);
120
  border: none;
121
  color: white;
122
  }
@@ -124,18 +129,12 @@ css = """
124
 
125
  with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
126
  with gr.Column(elem_classes="container"):
127
- gr.Markdown("## πŸš€ Universal Video Transcript Tool")
128
- gr.Markdown(
129
- "Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n"
130
- "**OR** upload video/audio file."
131
- )
132
 
133
  with gr.Tabs():
134
  with gr.TabItem("πŸ”— Paste Link"):
135
- url_in = gr.Textbox(
136
- label="Video URL",
137
- placeholder="https://youtube.com / tiktok.com / instagram.com"
138
- )
139
  btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
140
 
141
  with gr.TabItem("πŸ“‚ Upload File"):
@@ -145,9 +144,28 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
145
  )
146
  btn_file = gr.Button("πŸ“‚ Transcribe File", variant="primary")
147
 
148
- output = gr.Code(label="Transcript Output", language="markdown", lines=15)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
- btn_url.click(transcribe_media, [url_in, gr.State(None)], output)
151
- btn_file.click(transcribe_media, [gr.State(None), file_in], output)
152
 
153
  demo.launch()
 
6
  from faster_whisper import WhisperModel
7
 
8
  # ===============================
9
+ # 1. Whisper Model
10
  # ===============================
11
  model = None
12
 
 
26
  return path if path else "/usr/bin/ffmpeg"
27
 
28
  # ===============================
29
+ # 3. Extract Audio
30
  # ===============================
31
  def extract_audio(video_path):
32
  audio_path = "uploaded_audio.wav"
 
46
  return audio_path
47
 
48
  # ===============================
49
+ # 4. Download Audio
50
  # ===============================
51
  def download_audio_from_url(url):
52
  output = "url_audio.%(ext)s"
 
58
  "postprocessors": [{
59
  "key": "FFmpegExtractAudio",
60
  "preferredcodec": "wav",
 
61
  }],
62
  "quiet": True,
63
  "nocheckcertificate": True,
 
69
  return "url_audio.wav"
70
 
71
  # ===============================
72
+ # 5. Transcribe Function
73
  # ===============================
74
+ def transcribe_media(url_input, file_input, language_choice):
75
 
76
  try:
77
  audio_path = None
78
 
79
+ # ---------- FILE ----------
80
  if file_input:
81
  ext = os.path.splitext(file_input)[1].lower()
 
82
  if ext in [".mp3", ".wav", ".m4a"]:
83
  audio_path = file_input
84
  else:
 
89
  audio_path = download_audio_from_url(url_input)
90
 
91
  else:
92
+ return "⚠️ Please provide URL or Upload file."
93
 
94
  if not os.path.exists(audio_path):
95
  return "❌ Audio processing failed."
96
 
97
  model = load_model()
98
 
99
+ # Language Handling
100
+ language = None if language_choice == "Auto Detect" else language_choice
101
+
102
+ segments, info = model.transcribe(
103
  audio_path,
104
  beam_size=1,
105
+ vad_filter=True,
106
+ language=language
107
  )
108
 
109
+ detected_lang = info.language
110
+
111
  text = " ".join(seg.text for seg in segments)
112
+
113
+ return f"🌍 Detected Language: {detected_lang}\n\n{text.strip()}"
114
 
115
  except Exception as e:
116
  return f"❌ Error: {str(e)}"
 
121
  css = """
122
  .container {max-width: 900px; margin: auto;}
123
  .gr-button-primary {
124
+ background: linear-gradient(90deg,#667eea,#764ba2);
125
  border: none;
126
  color: white;
127
  }
 
129
 
130
  with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
131
  with gr.Column(elem_classes="container"):
132
+ gr.Markdown("## πŸš€ Universal Transcript Tool")
133
+ gr.Markdown("Supports YouTube, TikTok, Instagram, Facebook, Twitter/X")
 
 
 
134
 
135
  with gr.Tabs():
136
  with gr.TabItem("πŸ”— Paste Link"):
137
+ url_in = gr.Textbox(label="Video URL")
 
 
 
138
  btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
139
 
140
  with gr.TabItem("πŸ“‚ Upload File"):
 
144
  )
145
  btn_file = gr.Button("πŸ“‚ Transcribe File", variant="primary")
146
 
147
+ # 🌍 Language Selector
148
+ language_selector = gr.Dropdown(
149
+ choices=[
150
+ "Auto Detect",
151
+ "en", # English
152
+ "hi", # Hindi
153
+ "ur", # Urdu
154
+ "ar", # Arabic
155
+ "fr", # French
156
+ "de", # German
157
+ "es", # Spanish
158
+ "ru", # Russian
159
+ "ja", # Japanese
160
+ "zh" # Chinese
161
+ ],
162
+ value="Auto Detect",
163
+ label="🌍 Select Transcript Language"
164
+ )
165
+
166
+ output = gr.Code(label="Transcript Output", lines=15)
167
 
168
+ btn_url.click(transcribe_media, [url_in, gr.State(None), language_selector], output)
169
+ btn_file.click(transcribe_media, [gr.State(None), file_in, language_selector], output)
170
 
171
  demo.launch()