Offex commited on
Commit
892bbcb
·
verified ·
1 Parent(s): 8ef29f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -78
app.py CHANGED
@@ -4,59 +4,47 @@ import os
4
  import shutil
5
  import subprocess
6
  from faster_whisper import WhisperModel
7
-
8
- # 🔤 Hindi Script Fix
9
  from indic_transliteration import sanscript
10
  from indic_transliteration.sanscript import transliterate
11
 
12
  # ===============================
13
- # 1. Whisper Model (Lazy Load)
14
  # ===============================
15
  model = None
16
 
17
  def load_model():
18
  global model
19
  if model is None:
20
- print("📥 Loading Whisper Model...")
21
  model = WhisperModel("base", device="cpu", compute_type="int8")
22
- print("✅ Model Loaded")
23
  return model
24
 
25
  # ===============================
26
- # 2. FFmpeg Path
27
  # ===============================
28
  def get_ffmpeg():
29
  return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
30
 
31
  # ===============================
32
- # 3. Video → Audio
33
  # ===============================
34
  def extract_audio(video_path):
35
- audio_path = "uploaded_audio.wav"
36
- if os.path.exists(audio_path):
37
- os.remove(audio_path)
38
-
39
- cmd = [
40
- get_ffmpeg(),
41
- "-i", video_path,
42
- "-vn",
43
- "-ac", "1",
44
- "-ar", "16000",
45
- audio_path,
46
- "-y"
47
- ]
48
- subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
49
- return audio_path
50
 
51
  # ===============================
52
- # 4. Download Audio from URL
53
  # ===============================
54
- def download_audio_from_url(url):
55
- output = "url_audio"
56
-
57
  ydl_opts = {
58
  "format": "bestaudio/best",
59
- "outtmpl": output,
60
  "postprocessors": [{
61
  "key": "FFmpegExtractAudio",
62
  "preferredcodec": "wav",
@@ -64,14 +52,12 @@ def download_audio_from_url(url):
64
  "quiet": True,
65
  "nocheckcertificate": True,
66
  }
67
-
68
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
69
  ydl.download([url])
70
-
71
  return "url_audio.wav"
72
 
73
  # ===============================
74
- # 5. Hindi Script Normalizer
75
  # ===============================
76
  def normalize_script(text, lang):
77
  if lang == "hi":
@@ -82,91 +68,95 @@ def normalize_script(text, lang):
82
  return text
83
 
84
  # ===============================
85
- # 6. Main Transcribe Logic
86
  # ===============================
87
- def transcribe_media(url_input, file_input, language_choice):
88
-
89
  try:
90
- audio_path = None
91
-
92
- # ---------- FILE ----------
93
- if file_input:
94
- ext = os.path.splitext(file_input)[1].lower()
95
- if ext in [".mp3", ".wav", ".m4a"]:
96
- audio_path = file_input
97
- else:
98
- audio_path = extract_audio(file_input)
99
-
100
- # ---------- URL ----------
101
- elif url_input and url_input.strip():
102
- audio_path = download_audio_from_url(url_input)
103
-
104
  else:
105
- return "⚠️ Please paste a link or upload a file."
106
-
107
- if not os.path.exists(audio_path):
108
- return "❌ Audio processing failed."
109
 
110
  model = load_model()
111
-
112
- # Language handling
113
- language = None if language_choice == "Auto Detect" else language_choice
114
 
115
  segments, info = model.transcribe(
116
- audio_path,
117
  beam_size=1,
118
  vad_filter=True,
119
  language=language
120
  )
121
 
122
- detected_lang = info.language
123
- raw_text = " ".join(seg.text for seg in segments)
124
- final_text = normalize_script(raw_text, detected_lang)
125
 
126
- return f"🌍 Detected Language: {detected_lang}\n\n{final_text.strip()}"
127
 
128
  except Exception as e:
129
  return f"❌ Error: {str(e)}"
130
 
131
  # ===============================
132
- # 7. UI
133
  # ===============================
134
  css = """
135
- .container {max-width: 900px; margin: auto;}
 
 
 
 
 
 
 
 
 
136
  .gr-button-primary {
137
- background: linear-gradient(90deg,#667eea,#764ba2);
138
  border: none;
139
  color: white;
 
 
 
 
 
 
 
 
140
  }
141
  """
142
 
143
- with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
144
- with gr.Column(elem_classes="container"):
145
  gr.Markdown("## 🚀 Universal Transcript Tool")
146
  gr.Markdown(
147
- "Supports **YouTube, TikTok, Instagram, Facebook, Twitter/X**\n\n"
148
- "Hindi output is always **Devanagari** 🇮🇳"
149
  )
150
 
151
  with gr.Tabs():
152
  with gr.TabItem("🔗 Paste Link"):
153
- url_in = gr.Textbox(label="Video URL")
 
 
 
154
  btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
155
 
156
  with gr.TabItem("📂 Upload File"):
157
- file_in = gr.File(
158
  label="Upload Video / Audio",
159
  file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
160
  )
161
  btn_file = gr.Button("📂 Transcribe File", variant="primary")
162
 
163
- # 🌍 Language Selector
164
- language_selector = gr.Dropdown(
165
  choices=[
166
  "Auto Detect",
167
- "hi", # Hindi (Devanagari)
168
- "ur", # Urdu
169
- "en", # English
170
  "ar",
171
  "fr",
172
  "de",
@@ -175,13 +165,15 @@ with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
175
  "ja",
176
  "zh"
177
  ],
178
- value="Auto Detect",
179
- label="🌍 Select Transcript Language"
180
  )
181
 
182
- output = gr.Code(label="Transcript Output", lines=15)
 
 
 
183
 
184
- btn_url.click(transcribe_media, [url_in, gr.State(None), language_selector], output)
185
- btn_file.click(transcribe_media, [gr.State(None), file_in, language_selector], output)
186
 
187
  demo.launch()
 
4
  import shutil
5
  import subprocess
6
  from faster_whisper import WhisperModel
 
 
7
  from indic_transliteration import sanscript
8
  from indic_transliteration.sanscript import transliterate
9
 
10
  # ===============================
11
+ # Whisper Model
12
  # ===============================
13
  model = None
14
 
15
  def load_model():
16
  global model
17
  if model is None:
 
18
  model = WhisperModel("base", device="cpu", compute_type="int8")
 
19
  return model
20
 
21
  # ===============================
22
+ # FFmpeg
23
  # ===============================
24
  def get_ffmpeg():
25
  return shutil.which("ffmpeg") or "/usr/bin/ffmpeg"
26
 
27
  # ===============================
28
+ # Video → Audio
29
  # ===============================
30
  def extract_audio(video_path):
31
+ audio = "uploaded_audio.wav"
32
+ if os.path.exists(audio):
33
+ os.remove(audio)
34
+ subprocess.run(
35
+ [get_ffmpeg(), "-i", video_path, "-vn", "-ac", "1", "-ar", "16000", audio, "-y"],
36
+ stdout=subprocess.DEVNULL,
37
+ stderr=subprocess.DEVNULL
38
+ )
39
+ return audio
 
 
 
 
 
 
40
 
41
  # ===============================
42
+ # URL Audio
43
  # ===============================
44
+ def download_audio(url):
 
 
45
  ydl_opts = {
46
  "format": "bestaudio/best",
47
+ "outtmpl": "url_audio",
48
  "postprocessors": [{
49
  "key": "FFmpegExtractAudio",
50
  "preferredcodec": "wav",
 
52
  "quiet": True,
53
  "nocheckcertificate": True,
54
  }
 
55
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
56
  ydl.download([url])
 
57
  return "url_audio.wav"
58
 
59
  # ===============================
60
+ # Script Fix
61
  # ===============================
62
  def normalize_script(text, lang):
63
  if lang == "hi":
 
68
  return text
69
 
70
  # ===============================
71
+ # Transcribe
72
  # ===============================
73
+ def transcribe(url, file, lang_choice):
 
74
  try:
75
+ if file:
76
+ ext = os.path.splitext(file)[1].lower()
77
+ audio = file if ext in [".mp3", ".wav", ".m4a"] else extract_audio(file)
78
+ elif url:
79
+ audio = download_audio(url)
 
 
 
 
 
 
 
 
 
80
  else:
81
+ return "⚠️ Please provide a URL or upload a file."
 
 
 
82
 
83
  model = load_model()
84
+ language = None if lang_choice == "Auto Detect" else lang_choice
 
 
85
 
86
  segments, info = model.transcribe(
87
+ audio,
88
  beam_size=1,
89
  vad_filter=True,
90
  language=language
91
  )
92
 
93
+ text = " ".join(s.text for s in segments)
94
+ text = normalize_script(text, info.language)
 
95
 
96
+ return f"🌍 Language: {info.language}\n\n{text.strip()}"
97
 
98
  except Exception as e:
99
  return f"❌ Error: {str(e)}"
100
 
101
  # ===============================
102
+ # MODERN UI
103
  # ===============================
104
  css = """
105
+ body {
106
+ background: radial-gradient(circle at top, #0f2027, #203a43, #2c5364);
107
+ }
108
+ .glass {
109
+ background: rgba(255,255,255,0.08);
110
+ backdrop-filter: blur(18px);
111
+ border-radius: 18px;
112
+ padding: 25px;
113
+ box-shadow: 0 20px 40px rgba(0,0,0,0.4);
114
+ }
115
  .gr-button-primary {
116
+ background: linear-gradient(135deg,#00c6ff,#0072ff);
117
  border: none;
118
  color: white;
119
+ font-weight: 600;
120
+ }
121
+ .gr-input, .gr-textarea {
122
+ background: rgba(255,255,255,0.12) !important;
123
+ color: white !important;
124
+ }
125
+ h1, h2, label, .markdown-text {
126
+ color: #ffffff !important;
127
  }
128
  """
129
 
130
+ with gr.Blocks(css=css, theme=gr.themes.Base()) as demo:
131
+ with gr.Column(elem_classes="glass"):
132
  gr.Markdown("## 🚀 Universal Transcript Tool")
133
  gr.Markdown(
134
+ "Modern UI • YouTube TikTok Instagram (Upload) • Fast Whisper\n\n"
135
+ "**Note:** Instagram URLs may be blocked on Hugging Face."
136
  )
137
 
138
  with gr.Tabs():
139
  with gr.TabItem("🔗 Paste Link"):
140
+ url = gr.Textbox(
141
+ label="Video URL",
142
+ placeholder="YouTube / TikTok link"
143
+ )
144
  btn_url = gr.Button("🎧 Transcribe Link", variant="primary")
145
 
146
  with gr.TabItem("📂 Upload File"):
147
+ file = gr.File(
148
  label="Upload Video / Audio",
149
  file_types=[".mp4", ".mkv", ".mov", ".webm", ".avi", ".mp3", ".wav"]
150
  )
151
  btn_file = gr.Button("📂 Transcribe File", variant="primary")
152
 
153
+ lang = gr.Dropdown(
154
+ label="🌍 Transcript Language",
155
  choices=[
156
  "Auto Detect",
157
+ "hi",
158
+ "ur",
159
+ "en",
160
  "ar",
161
  "fr",
162
  "de",
 
165
  "ja",
166
  "zh"
167
  ],
168
+ value="Auto Detect"
 
169
  )
170
 
171
+ output = gr.Code(
172
+ label="Transcript Output",
173
+ lines=14
174
+ )
175
 
176
+ btn_url.click(transcribe, [url, gr.State(None), lang], output)
177
+ btn_file.click(transcribe, [gr.State(None), file, lang], output)
178
 
179
  demo.launch()