Offex commited on
Commit
fbfb3b5
·
verified ·
1 Parent(s): eeb950d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +96 -80
app.py CHANGED
@@ -4,63 +4,38 @@ import os
4
  import shutil
5
  import subprocess
6
  from faster_whisper import WhisperModel
7
-
8
- # 🔤 Hindi Script Fix
9
  from indic_transliteration import sanscript
10
  from indic_transliteration.sanscript import transliterate
11
 
12
  # ===============================
13
- # 1. Whisper Model (Lazy Load)
14
  # ===============================
15
  model = None
16
 
17
  def load_model():
18
  global model
19
  if model is None:
20
- print("📥 Loading Whisper Model...")
21
  model = WhisperModel("base", device="cpu", compute_type="int8")
22
- print("✅ Model Loaded")
23
  return model
24
 
25
  # ===============================
26
- # 2. FFmpeg Path
27
  # ===============================
28
  def get_ffmpeg():
29
  return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
30
 
31
  # ===============================
32
- # 3. Video Audio
33
  # ===============================
34
- def extract_audio(video_path):
35
- audio_path = "uploaded_audio.wav"
36
- if os.path.exists(audio_path):
37
- os.remove(audio_path)
38
 
39
- cmd = [
40
- get_ffmpeg(),
41
- "-i", video_path,
42
- "-vn",
43
- "-ac", "1",
44
- "-ar", "16000",
45
- audio_path,
46
- "-y"
47
- ]
48
- subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
49
- return audio_path
50
-
51
- # ===============================
52
- # 4. Download Audio from URL
53
- # ===============================
54
- def download_audio_from_url(url):
55
- output = "url_audio"
56
 
57
  ydl_opts = {
58
- "format": "bestaudio/best",
59
- "outtmpl": output,
60
- "postprocessors": [{
61
- "key": "FFmpegExtractAudio",
62
- "preferredcodec": "wav",
63
- }],
64
  "quiet": True,
65
  "nocheckcertificate": True,
66
  }
@@ -68,10 +43,35 @@ def download_audio_from_url(url):
68
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
69
  ydl.download([url])
70
 
71
- return "url_audio.wav"
72
 
73
  # ===============================
74
- # 5. Hindi Script Normalizer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  # ===============================
76
  def normalize_script(text, lang):
77
  if lang == "hi":
@@ -82,91 +82,108 @@ def normalize_script(text, lang):
82
  return text
83
 
84
  # ===============================
85
- # 6. Main Transcribe Logic
86
  # ===============================
87
- def transcribe_media(url_input, file_input, language_choice):
88
-
89
  try:
90
- audio_path = None
91
-
92
- # ---------- FILE ----------
93
- if file_input:
94
- ext = os.path.splitext(file_input)[1].lower()
95
  if ext in [".mp3", ".wav", ".m4a"]:
96
- audio_path = file_input
97
  else:
98
- audio_path = extract_audio(file_input)
99
 
100
- # ---------- URL ----------
101
- elif url_input and url_input.strip():
102
- audio_path = download_audio_from_url(url_input)
 
103
 
104
  else:
105
- return "⚠️ Please paste a link or upload a file."
106
 
107
- if not os.path.exists(audio_path):
108
- return "❌ Audio processing failed."
 
109
 
110
  model = load_model()
111
-
112
- # Language handling
113
- language = None if language_choice == "Auto Detect" else language_choice
114
 
115
  segments, info = model.transcribe(
116
- audio_path,
117
  beam_size=1,
118
  vad_filter=True,
119
  language=language
120
  )
121
 
122
- detected_lang = info.language
123
- raw_text = " ".join(seg.text for seg in segments)
124
- final_text = normalize_script(raw_text, detected_lang)
125
 
126
- return f"🌍 Detected Language: {detected_lang}\n\n{final_text.strip()}"
127
 
128
  except Exception as e:
 
 
129
  return f"❌ Error: {str(e)}"
130
 
131
  # ===============================
132
- # 7. UI
133
  # ===============================
134
  css = """
135
- .container {max-width: 900px; margin: auto;}
 
 
 
 
 
 
 
 
 
136
  .gr-button-primary {
137
- background: linear-gradient(90deg,#667eea,#764ba2);
138
  border: none;
139
  color: white;
 
 
 
 
 
 
 
 
140
  }
 
141
  """
142
 
143
- with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
144
- with gr.Column(elem_classes="container"):
145
- gr.Markdown("## 🚀 Universal Transcript Tool")
146
  gr.Markdown(
147
- "Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n"
148
- "Hindi output is always **Devanagari** 🇮🇳"
 
149
  )
150
 
151
  with gr.Tabs():
152
  with gr.TabItem("🔗 Paste Link"):
153
- url_in = gr.Textbox(label="Video URL")
154
  btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
155
 
156
  with gr.TabItem("📂 Upload File"):
157
- file_in = gr.File(
158
  label="Upload Video / Audio",
159
  file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
160
  )
161
  btn_file = gr.Button("📂 Transcribe File", variant="primary")
162
 
163
- # 🌍 Language Selector
164
- language_selector = gr.Dropdown(
165
  choices=[
166
  "Auto Detect",
167
- "hi", # Hindi (Devanagari)
168
- "ur", # Urdu
169
- "en", # English
170
  "ar",
171
  "fr",
172
  "de",
@@ -175,13 +192,12 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
175
  "ja",
176
  "zh"
177
  ],
178
- value="Auto Detect",
179
- label="🌍 Select Transcript Language"
180
  )
181
 
182
- output = gr.Code(label="Transcript Output", lines=15)
183
 
184
- btn_url.click(transcribe_media, [url_in, gr.State(None), language_selector], output)
185
- btn_file.click(transcribe_media, [gr.State(None), file_in, language_selector], output)
186
 
187
  demo.launch()
 
4
  import shutil
5
  import subprocess
6
  from faster_whisper import WhisperModel
 
 
7
  from indic_transliteration import sanscript
8
  from indic_transliteration.sanscript import transliterate
9
 
10
  # ===============================
11
+ # Whisper Model (lazy load)
12
  # ===============================
13
  model = None
14
 
15
  def load_model():
16
  global model
17
  if model is None:
 
18
  model = WhisperModel("base", device="cpu", compute_type="int8")
 
19
  return model
20
 
21
  # ===============================
22
+ # FFmpeg path
23
  # ===============================
24
  def get_ffmpeg():
25
  return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
26
 
27
  # ===============================
28
+ # SAFE: Download video only (NO postprocessing)
29
  # ===============================
30
+ def download_video_only(url):
31
+ video_path = "downloaded_video.mp4"
 
 
32
 
33
+ if os.path.exists(video_path):
34
+ os.remove(video_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
 
36
  ydl_opts = {
37
+ "format": "best",
38
+ "outtmpl": video_path,
 
 
 
 
39
  "quiet": True,
40
  "nocheckcertificate": True,
41
  }
 
43
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
44
  ydl.download([url])
45
 
46
+ return video_path
47
 
48
  # ===============================
49
+ # SAFE: Extract audio manually (NO ffprobe)
50
+ # ===============================
51
+ def extract_audio_safe(video_path):
52
+ audio_path = "extracted_audio.wav"
53
+
54
+ if os.path.exists(audio_path):
55
+ os.remove(audio_path)
56
+
57
+ subprocess.run(
58
+ [
59
+ get_ffmpeg(),
60
+ "-y",
61
+ "-i", video_path,
62
+ "-vn",
63
+ "-ac", "1",
64
+ "-ar", "16000",
65
+ audio_path
66
+ ],
67
+ stdout=subprocess.DEVNULL,
68
+ stderr=subprocess.DEVNULL
69
+ )
70
+
71
+ return audio_path
72
+
73
+ # ===============================
74
+ # Hindi script normalizer
75
  # ===============================
76
  def normalize_script(text, lang):
77
  if lang == "hi":
 
82
  return text
83
 
84
  # ===============================
85
+ # Transcription logic (STABLE)
86
  # ===============================
87
+ def transcribe(url, file, lang_choice):
 
88
  try:
89
+ # -------- FILE MODE --------
90
+ if file:
91
+ ext = os.path.splitext(file)[1].lower()
 
 
92
  if ext in [".mp3", ".wav", ".m4a"]:
93
+ audio = file
94
  else:
95
+ audio = extract_audio_safe(file)
96
 
97
+ # -------- URL MODE --------
98
+ elif url:
99
+ video = download_video_only(url)
100
+ audio = extract_audio_safe(video)
101
 
102
  else:
103
+ return "⚠️ Please paste a URL or upload a file."
104
 
105
+ # Safety check
106
+ if not os.path.exists(audio) or os.path.getsize(audio) < 10000:
107
+ return "❌ Audio extraction failed. Please try again."
108
 
109
  model = load_model()
110
+ language = None if lang_choice == "Auto Detect" else lang_choice
 
 
111
 
112
  segments, info = model.transcribe(
113
+ audio,
114
  beam_size=1,
115
  vad_filter=True,
116
  language=language
117
  )
118
 
119
+ raw_text = " ".join(s.text for s in segments)
120
+ final_text = normalize_script(raw_text, info.language)
 
121
 
122
+ return f"🌍 Detected Language: {info.language}\n\n{final_text.strip()}"
123
 
124
  except Exception as e:
125
+ if "instagram" in str(e).lower():
126
+ return "❌ Instagram URL is blocked on Hugging Face. Please upload the video file instead."
127
  return f"❌ Error: {str(e)}"
128
 
129
  # ===============================
130
+ # MODERN UI
131
  # ===============================
132
  css = """
133
+ body {
134
+ background: radial-gradient(circle at top, #0f2027, #203a43, #2c5364);
135
+ }
136
+ .glass {
137
+ background: rgba(255,255,255,0.08);
138
+ backdrop-filter: blur(18px);
139
+ border-radius: 18px;
140
+ padding: 25px;
141
+ box-shadow: 0 20px 40px rgba(0,0,0,0.4);
142
+ }
143
  .gr-button-primary {
144
+ background: linear-gradient(135deg,#00c6ff,#0072ff);
145
  border: none;
146
  color: white;
147
+ font-weight: 600;
148
+ }
149
+ .gr-input, .gr-textarea {
150
+ background: rgba(255,255,255,0.12) !important;
151
+ color: white !important;
152
+ }
153
+ h1, h2, label, .markdown-text {
154
+ color: #ffffff !important;
155
  }
156
+ footer {display:none;}
157
  """
158
 
159
+ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
160
+ with gr.Column(elem_classes="glass"):
161
+ gr.Markdown("## 🚀 Universal Transcript Tool (STABLE)")
162
  gr.Markdown(
163
+ " YouTube TikTok Facebook Twitter/X\n\n"
164
+ "⚠️ Instagram URL blocked on Hugging Face → **Upload video instead**\n\n"
165
+ "**No random ffprobe errors. Ever.**"
166
  )
167
 
168
  with gr.Tabs():
169
  with gr.TabItem("🔗 Paste Link"):
170
+ url = gr.Textbox(label="Video URL")
171
  btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
172
 
173
  with gr.TabItem("📂 Upload File"):
174
+ file = gr.File(
175
  label="Upload Video / Audio",
176
  file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
177
  )
178
  btn_file = gr.Button("📂 Transcribe File", variant="primary")
179
 
180
+ lang = gr.Dropdown(
181
+ label="🌍 Transcript Language",
182
  choices=[
183
  "Auto Detect",
184
+ "hi",
185
+ "ur",
186
+ "en",
187
  "ar",
188
  "fr",
189
  "de",
 
192
  "ja",
193
  "zh"
194
  ],
195
+ value="Auto Detect"
 
196
  )
197
 
198
+ output = gr.Code(label="Transcript Output", lines=14)
199
 
200
+ btn_url.click(transcribe, [url, gr.State(None), lang], output)
201
+ btn_file.click(transcribe, [gr.State(None), file, lang], output)
202
 
203
  demo.launch()