Offex commited on
Commit
b292d46
ยท
verified ยท
1 Parent(s): 892bbcb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -70
app.py CHANGED
@@ -4,47 +4,59 @@ import os
4
  import shutil
5
  import subprocess
6
  from faster_whisper import WhisperModel
 
 
7
  from indic_transliteration import sanscript
8
  from indic_transliteration.sanscript import transliterate
9
 
10
  # ===============================
11
- # Whisper Model
12
  # ===============================
13
  model = None
14
 
15
  def load_model():
16
  global model
17
  if model is None:
 
18
  model = WhisperModel("base", device="cpu", compute_type="int8")
 
19
  return model
20
 
21
  # ===============================
22
- # FFmpeg
23
  # ===============================
24
  def get_ffmpeg():
25
  return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
26
 
27
  # ===============================
28
- # Video โ†’ Audio
29
  # ===============================
30
  def extract_audio(video_path):
31
- audio = "uploaded_audio.wav"
32
- if os.path.exists(audio):
33
- os.remove(audio)
34
- subprocess.run(
35
- [get_ffmpeg(), "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", audio, "-y"],
36
- stdout=subprocess.DEVNULL,
37
- stderr=subprocess.DEVNULL
38
- )
39
- return audio
 
 
 
 
 
 
40
 
41
  # ===============================
42
- # URL โ†’ Audio
43
  # ===============================
44
- def download_audio(url):
 
 
45
  ydl_opts = {
46
  "format": "bestaudio/best",
47
- "outtmpl": "url_audio",
48
  "postprocessors": [{
49
  "key": "FFmpegExtractAudio",
50
  "preferredcodec": "wav",
@@ -52,12 +64,14 @@ def download_audio(url):
52
  "quiet": True,
53
  "nocheckcertificate": True,
54
  }
 
55
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
56
  ydl.download([url])
 
57
  return "url_audio.wav"
58
 
59
  # ===============================
60
- # Script Fix
61
  # ===============================
62
  def normalize_script(text, lang):
63
  if lang == "hi":
@@ -68,95 +82,91 @@ def normalize_script(text, lang):
68
  return text
69
 
70
  # ===============================
71
- # Transcribe
72
  # ===============================
73
- def transcribe(url, file, lang_choice):
 
74
  try:
75
- if file:
76
- ext = os.path.splitext(file)[1].lower()
77
- audio = file if ext in [".mp3", ".wav", ".m4a"] else extract_audio(file)
78
- elif url:
79
- audio = download_audio(url)
 
 
 
 
 
 
 
 
 
80
  else:
81
- return "โš ๏ธ Please provide a URL or upload a file."
 
 
 
82
 
83
  model = load_model()
84
- language = None if lang_choice == "Auto Detect" else lang_choice
 
 
85
 
86
  segments, info = model.transcribe(
87
- audio,
88
  beam_size=1,
89
  vad_filter=True,
90
  language=language
91
  )
92
 
93
- text = " ".join(s.text for s in segments)
94
- text = normalize_script(text, info.language)
 
95
 
96
- return f"๐ŸŒ Language: {info.language}\n\n{text.strip()}"
97
 
98
  except Exception as e:
99
  return f"โŒ Error: {str(e)}"
100
 
101
  # ===============================
102
- # MODERN UI
103
  # ===============================
104
  css = """
105
- body {
106
- background: radial-gradient(circle at top, #0f2027, #203a43, #2c5364);
107
- }
108
- .glass {
109
- background: rgba(255,255,255,0.08);
110
- backdrop-filter: blur(18px);
111
- border-radius: 18px;
112
- padding: 25px;
113
- box-shadow: 0 20px 40px rgba(0,0,0,0.4);
114
- }
115
  .gr-button-primary {
116
- background: linear-gradient(135deg,#00c6ff,#0072ff);
117
  border: none;
118
  color: white;
119
- font-weight: 600;
120
- }
121
- .gr-input, .gr-textarea {
122
- background: rgba(255,255,255,0.12) !important;
123
- color: white !important;
124
- }
125
- h1, h2, label, .markdown-text {
126
- color: #ffffff !important;
127
  }
128
  """
129
 
130
- with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
131
- with gr.Column(elem_classes="glass"):
132
  gr.Markdown("## ๐Ÿš€ Universal Transcript Tool")
133
  gr.Markdown(
134
- "Modern UI โ€ข YouTube โ€ข TikTok โ€ข Instagram (Upload) โ€ข Fast Whisper\n\n"
135
- "**Note:** Instagram URLs may be blocked on Hugging Face."
136
  )
137
 
138
  with gr.Tabs():
139
  with gr.TabItem("๐Ÿ”— Paste Link"):
140
- url = gr.Textbox(
141
- label="Video URL",
142
- placeholder="YouTube / TikTok link"
143
- )
144
  btn_url = gr.Button("๐ŸŽง Transcribe Link", variant="primary")
145
 
146
  with gr.TabItem("๐Ÿ“‚ Upload File"):
147
- file = gr.File(
148
  label="Upload Video / Audio",
149
  file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
150
  )
151
  btn_file = gr.Button("๐Ÿ“‚ Transcribe File", variant="primary")
152
 
153
- lang = gr.Dropdown(
154
- label="๐ŸŒ Transcript Language",
155
  choices=[
156
  "Auto Detect",
157
- "hi",
158
- "ur",
159
- "en",
160
  "ar",
161
  "fr",
162
  "de",
@@ -165,15 +175,13 @@ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
165
  "ja",
166
  "zh"
167
  ],
168
- value="Auto Detect"
 
169
  )
170
 
171
- output = gr.Code(
172
- label="Transcript Output",
173
- lines=14
174
- )
175
 
176
- btn_url.click(transcribe, [url, gr.State(None), lang], output)
177
- btn_file.click(transcribe, [gr.State(None), file, lang], output)
178
 
179
  demo.launch()
 
4
  import shutil
5
  import subprocess
6
  from faster_whisper import WhisperModel
7
+
8
+ # ๐Ÿ”ค Hindi Script Fix
9
  from indic_transliteration import sanscript
10
  from indic_transliteration.sanscript import transliterate
11
 
12
  # ===============================
13
+ # 1. Whisper Model (Lazy Load)
14
  # ===============================
15
  model = None
16
 
17
  def load_model():
18
  global model
19
  if model is None:
20
+ print("๐Ÿ“ฅ Loading Whisper Model...")
21
  model = WhisperModel("base", device="cpu", compute_type="int8")
22
+ print("โœ… Model Loaded")
23
  return model
24
 
25
  # ===============================
26
+ # 2. FFmpeg Path
27
  # ===============================
28
  def get_ffmpeg():
29
  return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
30
 
31
  # ===============================
32
+ # 3. Video โ†’ Audio
33
  # ===============================
34
  def extract_audio(video_path):
35
+ audio_path = "uploaded_audio.wav"
36
+ if os.path.exists(audio_path):
37
+ os.remove(audio_path)
38
+
39
+ cmd = [
40
+ get_ffmpeg(),
41
+ "-i", video_path,
42
+ "-vn",
43
+ "-ac", "1",
44
+ "-ar", "16000",
45
+ audio_path,
46
+ "-y"
47
+ ]
48
+ subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
49
+ return audio_path
50
 
51
  # ===============================
52
+ # 4. Download Audio from URL
53
  # ===============================
54
+ def download_audio_from_url(url):
55
+ output = "url_audio"
56
+
57
  ydl_opts = {
58
  "format": "bestaudio/best",
59
+ "outtmpl": output,
60
  "postprocessors": [{
61
  "key": "FFmpegExtractAudio",
62
  "preferredcodec": "wav",
 
64
  "quiet": True,
65
  "nocheckcertificate": True,
66
  }
67
+
68
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
69
  ydl.download([url])
70
+
71
  return "url_audio.wav"
72
 
73
  # ===============================
74
+ # 5. Hindi Script Normalizer
75
  # ===============================
76
  def normalize_script(text, lang):
77
  if lang == "hi":
 
82
  return text
83
 
84
  # ===============================
85
+ # 6. Main Transcribe Logic
86
  # ===============================
87
+ def transcribe_media(url_input, file_input, language_choice):
88
+
89
  try:
90
+ audio_path = None
91
+
92
+ # ---------- FILE ----------
93
+ if file_input:
94
+ ext = os.path.splitext(file_input)[1].lower()
95
+ if ext in [".mp3", ".wav", ".m4a"]:
96
+ audio_path = file_input
97
+ else:
98
+ audio_path = extract_audio(file_input)
99
+
100
+ # ---------- URL ----------
101
+ elif url_input and url_input.strip():
102
+ audio_path = download_audio_from_url(url_input)
103
+
104
  else:
105
+ return "โš ๏ธ Please paste a link or upload a file."
106
+
107
+ if not os.path.exists(audio_path):
108
+ return "โŒ Audio processing failed."
109
 
110
  model = load_model()
111
+
112
+ # Language handling
113
+ language = None if language_choice == "Auto Detect" else language_choice
114
 
115
  segments, info = model.transcribe(
116
+ audio_path,
117
  beam_size=1,
118
  vad_filter=True,
119
  language=language
120
  )
121
 
122
+ detected_lang = info.language
123
+ raw_text = " ".join(seg.text for seg in segments)
124
+ final_text = normalize_script(raw_text, detected_lang)
125
 
126
+ return f"๐ŸŒ Detected Language: {detected_lang}\n\n{final_text.strip()}"
127
 
128
  except Exception as e:
129
  return f"โŒ Error: {str(e)}"
130
 
131
  # ===============================
132
+ # 7. UI
133
  # ===============================
134
  css = """
135
+ .container {max-width: 900px; margin: auto;}
 
 
 
 
 
 
 
 
 
136
  .gr-button-primary {
137
+ background: linear-gradient(90deg,#667eea,#764ba2);
138
  border: none;
139
  color: white;
 
 
 
 
 
 
 
 
140
  }
141
  """
142
 
143
+ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
144
+ with gr.Column(elem_classes="container"):
145
  gr.Markdown("## ๐Ÿš€ Universal Transcript Tool")
146
  gr.Markdown(
147
+ "Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n"
148
+ "Hindi output is always **Devanagari** ๐Ÿ‡ฎ๐Ÿ‡ณ"
149
  )
150
 
151
  with gr.Tabs():
152
  with gr.TabItem("๐Ÿ”— Paste Link"):
153
+ url_in = gr.Textbox(label="Video URL")
 
 
 
154
  btn_url = gr.Button("๐ŸŽง Transcribe Link", variant="primary")
155
 
156
  with gr.TabItem("๐Ÿ“‚ Upload File"):
157
+ file_in = gr.File(
158
  label="Upload Video / Audio",
159
  file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
160
  )
161
  btn_file = gr.Button("๐Ÿ“‚ Transcribe File", variant="primary")
162
 
163
+ # ๐ŸŒ Language Selector
164
+ language_selector = gr.Dropdown(
165
  choices=[
166
  "Auto Detect",
167
+ "hi", # Hindi (Devanagari)
168
+ "ur", # Urdu
169
+ "en", # English
170
  "ar",
171
  "fr",
172
  "de",
 
175
  "ja",
176
  "zh"
177
  ],
178
+ value="Auto Detect",
179
+ label="๐ŸŒ Select Transcript Language"
180
  )
181
 
182
+ output = gr.Code(label="Transcript Output", lines=15)
 
 
 
183
 
184
+ btn_url.click(transcribe_media, [url_in, gr.State(None), language_selector], output)
185
+ btn_file.click(transcribe_media, [gr.State(None), file_in, language_selector], output)
186
 
187
  demo.launch()