Offex commited on
Commit
67748bb
ยท
verified ยท
1 Parent(s): fbfb3b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +240 -123
app.py CHANGED
@@ -3,201 +3,318 @@ import yt_dlp
3
  import os
4
  import shutil
5
  import subprocess
 
6
  from faster_whisper import WhisperModel
7
  from indic_transliteration import sanscript
8
  from indic_transliteration.sanscript import transliterate
 
9
 
10
  # ===============================
11
- # Whisper Model (lazy load)
12
  # ===============================
13
- model = None
 
14
 
15
- def load_model():
16
- global model
17
- if model is None:
18
- model = WhisperModel("base", device="cpu", compute_type="int8")
19
- return model
 
 
 
 
 
 
 
 
 
 
20
 
21
- # ===============================
22
- # FFmpeg path
23
- # ===============================
24
  def get_ffmpeg():
25
  return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
26
 
27
  # ===============================
28
- # SAFE: Download video only (NO postprocessing)
29
  # ===============================
30
- def download_video_only(url):
31
- video_path = "downloaded_video.mp4"
32
-
33
  if os.path.exists(video_path):
34
  os.remove(video_path)
35
 
36
  ydl_opts = {
37
- "format": "best",
38
  "outtmpl": video_path,
39
  "quiet": True,
40
  "nocheckcertificate": True,
 
 
 
41
  }
42
 
43
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
44
- ydl.download([url])
45
-
46
- return video_path
 
 
 
 
 
47
 
48
  # ===============================
49
- # SAFE: Extract audio manually (NO ffprobe)
50
  # ===============================
51
- def extract_audio_safe(video_path):
52
- audio_path = "extracted_audio.wav"
53
-
54
  if os.path.exists(audio_path):
55
  os.remove(audio_path)
56
 
57
- subprocess.run(
58
- [
59
- get_ffmpeg(),
60
- "-y",
61
- "-i", video_path,
62
- "-vn",
63
- "-ac", "1",
64
- "-ar", "16000",
65
- audio_path
66
- ],
67
- stdout=subprocess.DEVNULL,
68
- stderr=subprocess.DEVNULL
69
- )
70
 
71
- return audio_path
 
 
 
 
 
 
 
 
72
 
73
  # ===============================
74
- # Hindi script normalizer
75
  # ===============================
76
- def normalize_script(text, lang):
77
- if lang == "hi":
78
- try:
79
- return transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI)
80
- except:
81
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  return text
83
 
84
  # ===============================
85
- # Transcription logic (STABLE)
86
  # ===============================
87
- def transcribe(url, file, lang_choice):
88
  try:
89
- # -------- FILE MODE --------
90
  if file:
91
  ext = os.path.splitext(file)[1].lower()
92
- if ext in [".mp3", ".wav", ".m4a"]:
93
- audio = file
 
94
  else:
95
- audio = extract_audio_safe(file)
96
-
97
- # -------- URL MODE --------
98
  elif url:
99
- video = download_video_only(url)
100
- audio = extract_audio_safe(video)
101
-
102
  else:
103
  return "โš ๏ธ Please paste a URL or upload a file."
104
 
105
  # Safety check
106
- if not os.path.exists(audio) or os.path.getsize(audio) < 10000:
107
- return "โŒ Audio extraction failed. Please try again."
108
-
109
- model = load_model()
110
- language = None if lang_choice == "Auto Detect" else lang_choice
111
-
112
  segments, info = model.transcribe(
113
- audio,
114
- beam_size=1,
 
 
 
115
  vad_filter=True,
116
- language=language
 
117
  )
118
 
119
- raw_text = " ".join(s.text for s in segments)
120
- final_text = normalize_script(raw_text, info.language)
 
 
 
121
 
122
- return f"๐ŸŒ Detected Language: {info.language}\n\n{final_text.strip()}"
 
 
 
 
123
 
124
  except Exception as e:
125
- if "instagram" in str(e).lower():
126
- return "โŒ Instagram URL is blocked on Hugging Face. Please upload the video file instead."
127
- return f"โŒ Error: {str(e)}"
 
 
 
 
 
 
 
128
 
129
  # ===============================
130
- # MODERN UI
131
  # ===============================
132
- css = """
 
133
  body {
134
- background: radial-gradient(circle at top, #0f2027, #203a43, #2c5364);
 
135
  }
136
- .glass {
137
- background: rgba(255,255,255,0.08);
138
- backdrop-filter: blur(18px);
139
- border-radius: 18px;
140
- padding: 25px;
141
- box-shadow: 0 20px 40px rgba(0,0,0,0.4);
142
  }
143
  .gr-button-primary {
144
- background: linear-gradient(135deg,#00c6ff,#0072ff);
145
  border: none;
146
  color: white;
147
  font-weight: 600;
 
 
 
148
  }
149
- .gr-input, .gr-textarea {
150
- background: rgba(255,255,255,0.12) !important;
151
- color: white !important;
152
  }
153
- h1, h2, label, .markdown-text {
154
- color: #ffffff !important;
 
 
 
155
  }
156
- footer {display:none;}
157
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
- with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
160
- with gr.Column(elem_classes="glass"):
161
- gr.Markdown("## ๐Ÿš€ Universal Transcript Tool (STABLE)")
 
 
 
 
 
 
 
 
 
 
162
  gr.Markdown(
163
- "โœ” YouTube โœ” TikTok โœ” Facebook โœ” Twitter/X\n\n"
164
- "โš ๏ธ Instagram URL blocked on Hugging Face โ†’ **Upload video instead**\n\n"
165
- "**No random ffprobe errors. Ever.**"
166
  )
167
 
168
  with gr.Tabs():
169
- with gr.TabItem("๐Ÿ”— Paste Link"):
170
- url = gr.Textbox(label="Video URL")
171
- btn_url = gr.Button("๐ŸŽง Transcribe Link", variant="primary")
172
-
173
- with gr.TabItem("๐Ÿ“‚ Upload File"):
174
- file = gr.File(
175
- label="Upload Video / Audio",
176
- file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
177
  )
178
- btn_file = gr.Button("๐Ÿ“‚ Transcribe File", variant="primary")
179
-
180
- lang = gr.Dropdown(
181
- label="๐ŸŒ Transcript Language",
182
- choices=[
183
- "Auto Detect",
184
- "hi",
185
- "ur",
186
- "en",
187
- "ar",
188
- "fr",
189
- "de",
190
- "es",
191
- "ru",
192
- "ja",
193
- "zh"
194
- ],
195
- value="Auto Detect"
 
 
 
 
 
 
 
 
 
 
 
196
  )
197
 
198
- output = gr.Code(label="Transcript Output", lines=14)
 
 
 
 
 
 
 
 
 
 
199
 
200
- btn_url.click(transcribe, [url, gr.State(None), lang], output)
201
- btn_file.click(transcribe, [gr.State(None), file, lang], output)
202
 
203
- demo.launch()
 
 
3
  import os
4
  import shutil
5
  import subprocess
6
+ import tempfile
7
  from faster_whisper import WhisperModel
8
  from indic_transliteration import sanscript
9
  from indic_transliteration.sanscript import transliterate
10
+ import torch
11
 
12
  # ===============================
13
+ # ๐Ÿ”’ GLOBALS & CONFIG
14
  # ===============================
15
+ MODEL_CACHE_DIR = "/tmp/qwen_whisper_cache"
16
+ os.makedirs(MODEL_CACHE_DIR, exist_ok=True)
17
 
18
+ # Lazy-loaded model (shared across calls)
19
+ _model = None
20
+
21
+ def load_whisper_model():
22
+ global _model
23
+ if _model is None:
24
+ print("๐Ÿ“ฅ Loading Whisper 'base' model (CPU/int8)...")
25
+ _model = WhisperModel(
26
+ "base",
27
+ device="cpu",
28
+ compute_type="int8",
29
+ download_root=MODEL_CACHE_DIR
30
+ )
31
+ print("โœ… Model loaded.")
32
+ return _model
33
 
 
 
 
34
  def get_ffmpeg():
35
  return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
36
 
37
  # ===============================
38
+ # ๐Ÿ“ฅ SAFE DOWNLOAD (YouTube, TikTok, etc.)
39
  # ===============================
40
+ def download_video(url):
41
+ video_path = os.path.join(tempfile.gettempdir(), "downloaded_video.mp4")
 
42
  if os.path.exists(video_path):
43
  os.remove(video_path)
44
 
45
  ydl_opts = {
46
+ "format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best",
47
  "outtmpl": video_path,
48
  "quiet": True,
49
  "nocheckcertificate": True,
50
+ "noplaylist": True, "extract_audio": False,
51
+ "retries": 10,
52
+ "fragment_retries": 10,
53
  }
54
 
55
+ try:
56
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
57
+ info = ydl.extract_info(url, download=True)
58
+ # Ensure file exists
59
+ if not os.path.exists(video_path):
60
+ raise FileNotFoundError("Download failed: no file created")
61
+ return video_path, info.get("title", "Untitled")
62
+ except Exception as e:
63
+ raise RuntimeError(f"Download failed: {str(e)}")
64
 
65
  # ===============================
66
+ # ๐ŸŽง EXTRACT AUDIO (robust)
67
  # ===============================
68
+ def extract_audio(video_path):
69
+ audio_path = os.path.join(tempfile.gettempdir(), "extracted_audio.wav")
 
70
  if os.path.exists(audio_path):
71
  os.remove(audio_path)
72
 
73
+ cmd = [
74
+ get_ffmpeg(),
75
+ "-y",
76
+ "-i", video_path,
77
+ "-vn",
78
+ "-ac", "1",
79
+ "-ar", "16000",
80
+ "-c:a", "pcm_s16le",
81
+ audio_path
82
+ ]
 
 
 
83
 
84
+ try:
85
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
86
+ if result.returncode != 0:
87
+ raise RuntimeError(f"FFmpeg failed: {result.stderr}")
88
+ if not os.path.exists(audio_path) or os.path.getsize(audio_path) < 5000:
89
+ raise RuntimeError("Audio extraction produced empty/invalid file")
90
+ return audio_path
91
+ except subprocess.TimeoutExpired:
92
+ raise RuntimeError("Audio extraction timed out (>60s)")
93
 
94
  # ===============================
95
+ # ๐ŸŒ LANGUAGE-AWARE TRANSLITERATION & NORMALIZATION
96
  # ===============================
97
+ def normalize_to_hindi(text):
98
+ """Convert any script to Devanagari + clean up"""
99
+ if not text.strip(): return ""
100
+
101
+ # Step 1: Transliterate non-Devanagari scripts to Devanagari
102
+ try:
103
+ # Try Arabic โ†’ Devanagari (for Urdu)
104
+ text = transliterate(text, sanscript.ARABIC, sanscript.DEVANAGARI)
105
+ # Try Roman โ†’ Devanagari (for Hindi/English mixed)
106
+ text = transliterate(text, sanscript.ITRANS, sanscript.DEVANAGARI)
107
+ except Exception:
108
+ pass # fallback to raw text
109
+
110
+ # Step 2: Clean punctuation & spacing
111
+ import re
112
+ text = re.sub(r'[^\u0900-\u097F\u0020\u002E\u002C\u003F\u0021\u003B\u003A\u002D\u0028\u0029]', '', text)
113
+ text = re.sub(r'\s+', ' ', text).strip()
114
+ text = re.sub(r'\.\s*\.', '.', text) # fix .. โ†’ .
115
+ text = re.sub(r'\?\s*\?', '?', text)
116
+ text = re.sub(r'!\s*!', '!', text)
117
+
118
+ # Step 3: Add proper full stops at end if missing
119
+ if text and text[-1] not in "เฅค.!?":
120
+ text += "เฅค"
121
+
122
  return text
123
 
124
  # ===============================
125
+ # ๐ŸŽฏ CORE TRANSCRIBE FUNCTION (ALWAYS OUTPUT HINDI)
126
  # ===============================
127
+ def transcribe_to_hindi(url=None, file=None, lang_choice="Auto Detect"):
128
  try:
129
+ # ======== INPUT HANDLING ========
130
  if file:
131
  ext = os.path.splitext(file)[1].lower()
132
+ if ext in [".mp3", ".wav", ".m4a", ".ogg"]:
133
+ audio_path = file
134
+ title = os.path.basename(file)
135
  else:
136
+ video_path = file
137
+ audio_path = extract_audio(video_path)
138
+ title = os.path.basename(video_path)
139
  elif url:
140
+ video_path, title = download_video(url)
141
+ audio_path = extract_audio(video_path)
 
142
  else:
143
  return "โš ๏ธ Please paste a URL or upload a file."
144
 
145
  # Safety check
146
+ if not os.path.exists(audio_path) or os.path.getsize(audio_path) < 5000:
147
+ return "โŒ Audio file too small or missing. Try again."
148
+ # ======== TRANSCRIPTION ========
149
+ model = load_whisper_model()
 
 
150
  segments, info = model.transcribe(
151
+ audio_path,
152
+ beam_size=5,
153
+ best_of=3,
154
+ patience=1.0,
155
+ temperature=(0.0, 0.2, 0.4, 0.6, 0.8, 1.0),
156
  vad_filter=True,
157
+ word_timestamps=False,
158
+ language=None # Auto-detect
159
  )
160
 
161
+ raw_text = " ".join([seg.text for seg in segments]).strip()
162
+
163
+ # ======== FORCE HINDI OUTPUT ========
164
+ # Even if detected language is en/ur/tam, convert to Hindi script
165
+ final_text = normalize_to_hindi(raw_text)
166
 
167
+ # Optional: Add title & metadata
168
+ header = f"๐ŸŽฌ {title[:50]}{'...' if len(title) > 50 else ''}\n"
169
+ header += f"๐ŸŒ Detected: {info.language or 'Unknown'} โ†’ ๐Ÿ‡ฎ๐Ÿ‡ณ Output: Hindi (Devanagari)\n\n"
170
+
171
+ return header + final_text
172
 
173
  except Exception as e:
174
+ err_msg = str(e).lower()
175
+ if "instagram" in err_msg:
176
+ return (
177
+ "โŒ Instagram URLs are blocked on Hugging Face.\n\n"
178
+ "โœ… Solution: Download the video manually (e.g., via online downloader), then upload it here."
179
+ )
180
+ elif "timeout" in err_msg or "network" in err_msg:
181
+ return "โš ๏ธ Network timeout. Try again or upload file directly."
182
+ else:
183
+ return f"โŒ Error: {str(e)[:200]}..."
184
 
185
  # ===============================
186
+ # ๐ŸŽจ MODERN UI (HUGGING FACE OPTIMIZED)
187
  # ===============================
188
+ CSS = """
189
+ /* Glassmorphism + Dark Gradient */
190
  body {
191
+ background: radial-gradient(circle at top, #0c1445, #1a2a6c, #2c3e50);
192
+ font-family: 'Inter', system-ui, sans-serif;
193
  }
194
+ .glass-card {
195
+ background: rgba(255, 255, 255, 0.07);
196
+ backdrop-filter:);
197
+ border-radius: 20px; padding: 28px;
198
+ box-shadow: 0 12px 32px rgba(0, 0, 0, 0.4);
199
+ border: 1px solid rgba(255, 255, 255, 0.1);
200
  }
201
  .gr-button-primary {
202
+ background: linear-gradient(135deg, #6a11cb 0%, #2575fc 100%);
203
  border: none;
204
  color: white;
205
  font-weight: 600;
206
+ padding: 12px 24px;
207
+ border-radius: 12px;
208
+ transition: all 0.3s ease;
209
  }
210
+ .gr-button-primary:hover {
211
+ transform: translateY(-2px);
212
+ box-shadow: 0 6px 15px rgba(37, 117252, 0.4);
213
  }
214
+ .gr-input, .gr-textarea, .gr-dropdown {
215
+ background: rgba(255, 255, 255, 0.08) !important;
216
+ color: #e0e0ff !important;
217
+ border: 1px solid rgba(255, 255, 255, 0.15) !important;
218
+ border-radius: 10px;
219
  }
220
+ .gr-markdown p, .gr-markdown h2 {
221
+ color: #f0f4ff !important;
222
+ }
223
+ footer { display: none !important; }
224
+ .title {
225
+ font-size: 2.2rem;
226
+ font-weight: 800;
227
+ background: linear-gradient(90deg, #ffd700, #ff8c00);
228
+ -webkit-background-clip: text;
229
+ background-clip: text;
230
+ color: transparent;
231
+ margin-bottom: 12px;
232
+ }
233
+ .subtitle {
234
+ color: #a0d2eb;
235
+ font-size: 1.1rem;
236
+ margin-bottom: 24px;
237
+ }
238
+ .feature-badge {
239
+ display: inline-block;
240
+ background: rgba(106, 17, 203, 0.3);
241
+ color: #ffd700;
242
+ padding: 3px 10;
243
+ border-radius: 20px;
244
+ font-size: 0.85rem;
245
+ margin: 0 4px;
246
+ }"""
247
 
248
+ with gr.Blocks(
249
+ css=CSS,
250
+ theme=gr.themes.Default(
251
+ primary_hue=gr.themes.Color(c100="#6a11cb", c200="#2575fc", c300="#1a5fb4"),
252
+ secondary_hue=gr.themes.Color(c100="#ff9e00", c200="#ff7b00"),
253
+ neutral_hue=gr.themes.Color(c100="#1e293b", c200="#0f172a"),
254
+ ),
255
+ title="๐Ÿ—ฃ๏ธ AI Hindi Transcript Studio",
256
+ ) as demo:
257
+ with gr.Column(elem_classes=["glass-card"]):
258
+ gr.HTML("<div class='title'>AI Hindi Transcript Studio</div>")
259
+ gr.HTML("<div class='subtitle'>Upload or paste any video โ†’ Get clean Devanagari Hindi transcript instantly</div>")
260
+
261
  gr.Markdown(
262
+ "โœจ Supports: YouTube, TikTok, Facebook, Twitter/X, Instagram (via upload), local files<br>"
263
+ "โšก Zero ffprobe errors โ€ข Auto-script conversion โ€ข Real-time cleanup"
 
264
  )
265
 
266
  with gr.Tabs():
267
+ with gr.TabItem("๐Ÿ”— URL"):
268
+ url_input = gr.Textbox(
269
+ label="๐ŸŽฅ Video URL",
270
+ placeholder="https://youtu.be/...",
271
+ info="Instagram? Upload file instead (HF restriction)"
 
 
 
272
  )
273
+ btn_url = gr.Button("๐Ÿ”Š Transcribe to Hindi", variant="primary", size="lg")
274
+
275
+ with gr.TabItem("๐Ÿ“‚ File"):
276
+ file_input = gr.File(
277
+ label="๐Ÿ“ Upload Video/Audio",
278
+ file_types=["video", "audio"],
279
+ info="MP4, MOV, MP3, WAV, M4A, etc."
280
+ )
281
+ btn_file = gr.Button("๐Ÿ“– Convert to Hindi", variant="primary", size="lg")
282
+
283
+ lang_dummy = gr.Dropdown(
284
+ choices=["Auto (โ†’ Hindi)"],
285
+ value="Auto (โ†’ Hindi)",
286
+ interactive=False,
287
+ visible=False
288
+ ) # Hidden โ€” we force Hindi output
289
+
290
+ output_box = gr.Textbox(
291
+ label="๐Ÿ“ Hindi Transcript (Devanagari)",
292
+ lines=16,
293
+ max_lines=25,
294
+ show_copy_button=True,
295
+ interactive=False, elem_classes=["gr-textarea"]
296
+ )
297
+
298
+ gr.Markdown(
299
+ "<div style='text-align:center; margin-top:20px; color:#a0d2eb; font-size:0.9rem;'>"
300
+ "๐Ÿš€ Powered by Faster-Whisper + Indic Transliteration | Deployed on Hugging Face Spaces"
301
+ "</div>"
302
  )
303
 
304
+ # Event bindings
305
+ btn_url.click(
306
+ fn=transcribe_to_hindi,
307
+ inputs=[url_input, gr.State(None), lang_dummy],
308
+ outputs=output_box
309
+ )
310
+ btn_file.click(
311
+ fn=transcribe_to_hindi,
312
+ inputs=[gr.State(None), file_input, lang_dummy],
313
+ outputs=output_box
314
+ )
315
 
316
+ # Optional: Enable queue for HF Spaces
317
+ demo.queue(concurrency_count=2, max_size=10)
318
 
319
+ if __name__ == "__main__":
320
+ demo.launch(server_name="0.0.0.0", server_port=7860)