Kalp97 commited on
Commit
b045623
·
verified ·
1 Parent(s): 9ea9b32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -311
app.py CHANGED
@@ -3,8 +3,6 @@ import whisper
3
  import yt_dlp
4
  import os
5
  import tempfile
6
- import requests
7
- import requests
8
 
9
  models = {}
10
 
@@ -40,286 +38,6 @@ def devanagari_to_roman(text):
40
  return ''.join(result)
41
 
42
 
43
- RAPIDAPI_KEY = "47b2f0d88bmsh7842ac99f4b2a3ep12df5djsn6f61065d9692"
44
- RAPIDAPI_HOST = "instagram-reels-downloader-api.p.rapidapi.com"
45
-
46
- def download_instagram_audio(url):
47
- """Download Instagram reel via RapidAPI then extract audio"""
48
- headers = {
49
- "x-rapidapi-key": RAPIDAPI_KEY,
50
- "x-rapidapi-host": RAPIDAPI_HOST,
51
- "Content-Type": "application/json"
52
- }
53
- resp = requests.get(
54
- f"https://{RAPIDAPI_HOST}/download",
55
- headers=headers,
56
- params={"url": url},
57
- timeout=30
58
- )
59
- if resp.status_code != 200:
60
- raise Exception(f"RapidAPI error {resp.status_code}: {resp.text[:200]}")
61
-
62
- data = resp.json()
63
- video_url = None
64
- if isinstance(data, dict):
65
- video_url = (data.get('url') or data.get('video_url') or
66
- data.get('download_url') or
67
- (data.get('data') or {}).get('url') or
68
- (data.get('data') or {}).get('video_url'))
69
- elif isinstance(data, list) and len(data) > 0:
70
- item = data[0]
71
- video_url = item.get('url') or item.get('video_url') or item.get('download_url')
72
-
73
- if not video_url:
74
- raise Exception(f"No download URL in response: {str(data)[:300]}")
75
-
76
- tmp_dir = tempfile.mkdtemp()
77
- video_path = os.path.join(tmp_dir, 'ig_video.mp4')
78
- audio_path = os.path.join(tmp_dir, 'audio.mp3')
79
-
80
- vid_resp = requests.get(video_url, timeout=60, stream=True,
81
- headers={"User-Agent":"Mozilla/5.0"})
82
- with open(video_path, 'wb') as f:
83
- for chunk in vid_resp.iter_content(chunk_size=8192):
84
- f.write(chunk)
85
-
86
- os.system(f'ffmpeg -i "{video_path}" -q:a 0 -map a "{audio_path}" -y -loglevel quiet')
87
- if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
88
- return audio_path, 'instagram_reel'
89
- return video_path, 'instagram_reel'
90
-
91
-
92
- RAPIDAPI_KEY = "47b2f0d88bmsh7842ac99f4b2a3ep12df5djsn6f61065d9692"
93
- RAPIDAPI_HOST = "instagram-reels-downloader-api.p.rapidapi.com"
94
-
95
- def download_instagram_via_rapidapi(url):
96
- """Download Instagram reel using RapidAPI — reliable, no IP blocks"""
97
- headers = {
98
- "x-rapidapi-key": RAPIDAPI_KEY,
99
- "x-rapidapi-host": RAPIDAPI_HOST,
100
- "Content-Type": "application/json"
101
- }
102
- params = {"url": url}
103
- resp = requests.get(
104
- f"https://{RAPIDAPI_HOST}/download",
105
- headers=headers,
106
- params=params,
107
- timeout=30
108
- )
109
- if resp.status_code != 200:
110
- raise Exception(f"RapidAPI error {resp.status_code}: {resp.text[:200]}")
111
- data = resp.json()
112
- # Extract direct video URL from response
113
- video_url = None
114
- if isinstance(data, dict):
115
- video_url = (data.get('url') or data.get('download_url') or
116
- data.get('video_url') or data.get('link'))
117
- if not video_url and data.get('data'):
118
- d = data['data']
119
- if isinstance(d, list) and len(d) > 0:
120
- video_url = d[0].get('url') or d[0].get('download_url')
121
- elif isinstance(d, dict):
122
- video_url = d.get('url') or d.get('download_url')
123
- if not video_url:
124
- raise Exception(f"No video URL in response: {str(data)[:300]}")
125
- # Download the actual video file
126
- tmp_dir = tempfile.mkdtemp()
127
- tmp_path = os.path.join(tmp_dir, 'instagram.mp4')
128
- video_resp = requests.get(video_url, timeout=60, stream=True)
129
- with open(tmp_path, 'wb') as f:
130
- for chunk in video_resp.iter_content(chunk_size=8192):
131
- f.write(chunk)
132
- return tmp_path, data.get('title', 'Instagram video')
133
-
134
- def download_from_url(url):
135
- """Instagram → RapidAPI, everything else → yt-dlp"""
136
- if 'instagram.com' in url.lower():
137
- return download_instagram_audio(url)
138
-
139
- tmp_dir = tempfile.mkdtemp()
140
- output_path = os.path.join(tmp_dir, 'audio.%(ext)s')
141
- ydl_opts = {
142
- 'format': 'bestaudio/best',
143
- 'outtmpl': output_path,
144
- 'quiet': True,
145
- 'no_warnings': True,
146
- 'postprocessors': [{
147
- 'key': 'FFmpegExtractAudio',
148
- 'preferredcodec': 'mp3',
149
- 'preferredquality': '192',
150
- }],
151
- 'http_headers': {
152
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
153
- },
154
- 'socket_timeout': 60,
155
- 'retries': 3,
156
- 'geo_bypass': True,
157
- }
158
- try:
159
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
160
- info = ydl.extract_info(url, download=True)
161
- title = info.get('title', 'video')
162
- except Exception as e:
163
- err = str(e)
164
- if 'private' in err.lower():
165
- raise Exception("This account is private. Only public posts can be downloaded.")
166
- elif 'not found' in err.lower() or '404' in err:
167
- raise Exception("Video not found. Check the URL and make sure the post still exists.")
168
- else:
169
- raise Exception(f"Download failed: {err}")
170
-
171
- for f in os.listdir(tmp_dir):
172
- if f.endswith('.mp3'):
173
- return os.path.join(tmp_dir, f), title
174
- raise Exception("Download succeeded but audio file not found.")
175
-
176
-
177
- def transcribe(file, model_name, language, show_timestamps, translate):
178
- if file is None:
179
- return "Please upload a video or audio file.", ""
180
- try:
181
- m = load_model(model_name)
182
- # Hinglish: use large-v3 model for best code-switching support
183
- hinglish_mode = (language == "Hinglish (Roman)")
184
- if hinglish_mode:
185
- lang = "hi" # Force Hindi so Whisper writes Devanagari accurately
186
- m = load_model("large-v3") # Override with large-v3 for Hinglish
187
- elif language == "Auto Detect":
188
- lang = None
189
- else:
190
- lang = language.lower()
191
- whisper_task = "translate" if translate == "Translate to English" else "transcribe"
192
- # Hinglish: nudge toward correct Roman script via initial_prompt
193
- initial_prompt = None
194
- if hinglish_mode:
195
- initial_prompt = (
196
- "This is a Hinglish conversation mixing Hindi and English. "
197
- "Transcribe everything in Roman/Latin script only. No Devanagari. "
198
- "Write full Hindi words correctly in Roman letters — never abbreviate. "
199
- "Common words: India, main, mein, sab, aaj, kal, hai, hain, rahe, "
200
- "isliye, kyunki, lekin, aur, yeh, woh, kya, bhi, toh, bas, paisa, "
201
- "log, kaam, din, raat, ghar, baat, baar, bahut, accha, theek. "
202
- "Example: India mein sab log dropshipping isliye sikha rahe hain "
203
- "kyunki paisa e-books mein ban raha hai."
204
- )
205
- result = m.transcribe(
206
- file.name, language=lang, task=whisper_task,
207
- verbose=False, initial_prompt=initial_prompt
208
- )
209
- plain = result["text"].strip()
210
-
211
- # Hinglish: if any Devanagari slipped through, transliterate it
212
- if hinglish_mode:
213
- try:
214
- from indic_transliteration import sanscript
215
- from indic_transliteration.transliterate import transliterate
216
- plain = transliterate(plain, sanscript.DEVANAGARI, sanscript.ITRANS)
217
- except Exception:
218
- # Fallback: simple character-level Devanagari → Roman map
219
- plain = devanagari_to_roman(plain)
220
- if show_timestamps:
221
- lines = []
222
- for seg in result["segments"]:
223
- start = format_time(seg["start"])
224
- end = format_time(seg["end"])
225
- seg_text = seg['text'].strip()
226
- if hinglish_mode:
227
- try:
228
- from indic_transliteration import sanscript
229
- from indic_transliteration.transliterate import transliterate
230
- seg_text = transliterate(seg_text, sanscript.DEVANAGARI, sanscript.ITRANS)
231
- except Exception:
232
- seg_text = devanagari_to_roman(seg_text)
233
- lines.append(f"[{start} → {end}] {seg_text}")
234
- return "\n".join(lines), plain
235
- return plain, plain
236
- except Exception as e:
237
- return f"Error: {str(e)}", ""
238
-
239
- def save_transcript(text):
240
- if not text:
241
- return None
242
- path = "/tmp/transcript.txt"
243
- with open(path, "w", encoding="utf-8") as f:
244
- f.write(text)
245
- return path
246
-
247
-
248
-
249
- def download_video_only(url):
250
- """Download video — uses RapidAPI for Instagram, yt-dlp for others"""
251
- if not url or not url.strip():
252
- return None, "Please paste a valid URL."
253
- url = url.strip()
254
- supported = ['instagram.com','youtube.com','youtu.be','twitter.com',
255
- 'x.com','facebook.com','fb.watch']
256
- if not any(s in url.lower() for s in supported):
257
- return None, "Unsupported URL."
258
- # Use RapidAPI for Instagram
259
- if 'instagram.com' in url.lower():
260
- try:
261
- tmp_path, title = download_instagram_via_rapidapi(url)
262
- return tmp_path, title
263
- except Exception as e:
264
- return None, str(e)
265
- tmp_dir = tempfile.mkdtemp()
266
- output_path = os.path.join(tmp_dir, 'video.%(ext)s')
267
- ydl_opts = {
268
- 'format': 'bestvideo+bestaudio/best',
269
- 'outtmpl': output_path,
270
- 'quiet': True,
271
- 'no_warnings': True,
272
- 'merge_output_format': 'mp4',
273
- 'http_headers': {
274
- 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
275
- 'Accept-Language': 'en-US,en;q=0.9',
276
- 'Accept': '*/*',
277
- 'Referer': 'https://www.instagram.com/',
278
- },
279
- 'extractor_args': {
280
- 'instagram': {'api_version': 'v1'},
281
- },
282
- 'socket_timeout': 30,
283
- 'retries': 3,
284
- 'geo_bypass': True,
285
- }
286
- try:
287
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
288
- info = ydl.extract_info(url, download=True)
289
- title = info.get('title', 'video')
290
- for f in os.listdir(tmp_dir):
291
- if f.endswith('.mp4'):
292
- return os.path.join(tmp_dir, f), title
293
- return None, "Download succeeded but file not found."
294
- except Exception as e:
295
- return None, f"Download failed: {str(e)}"
296
-
297
- def transcribe_url(url, model_name, language, show_timestamps, translate):
298
- """Download from URL then transcribe"""
299
- if not url or not url.strip():
300
- return "Please paste a valid URL.", ""
301
- url = url.strip()
302
-
303
- # Validate URL is from supported platforms
304
- supported = ['instagram.com', 'youtube.com', 'youtu.be', 'twitter.com',
305
- 'x.com', 'facebook.com', 'fb.watch', 'fb.com', 'tiktok.com']
306
- if not any(s in url.lower() for s in supported):
307
- return "Unsupported URL. Please use Instagram, YouTube, Twitter/X, or Facebook links.", ""
308
-
309
- tmp_path = None
310
- try:
311
- tmp_path, title = download_from_url(url)
312
- # Create a mock file object with .name attribute
313
- class FileObj:
314
- def __init__(self, path):
315
- self.name = path
316
- result = transcribe(FileObj(tmp_path), model_name, language, show_timestamps, translate)
317
- return result
318
- except Exception as e:
319
- return f"Error: {str(e)}", ""
320
- finally:
321
- if tmp_path and os.path.exists(tmp_path):
322
- os.unlink(tmp_path)
323
 
324
  custom_css = """
325
  @import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@1&family=Geist:wght@300;400;500;600&display=swap');
@@ -529,16 +247,6 @@ by Kalpi Edition
529
 
530
  with gr.Row():
531
  with gr.Column(scale=5):
532
- url_input = gr.Textbox(
533
- label="Paste URL (Instagram · YouTube · Twitter/X · Facebook)",
534
- placeholder="https://www.instagram.com/reel/...",
535
- lines=1
536
- )
537
- with gr.Row():
538
- url_btn = gr.Button("Transcribe URL →", variant="primary")
539
- download_btn_url = gr.Button("Download video", variant="secondary")
540
- download_output = gr.File(label="Download", visible=False)
541
- gr.Markdown("<div style='text-align:center;font-size:11px;color:#555;margin:4px 0'>— or upload a file —</div>")
542
  file_input = gr.File(
543
  label="Drop your file here — MP4 · MOV · MP3 · WAV · M4A"
544
  )
@@ -548,7 +256,8 @@ by Kalpi Edition
548
  "tiny — Fastest",
549
  "base — Fast",
550
  "small — Balanced",
551
- "medium — Best accuracy"
 
552
  ],
553
  value="tiny — Fastest",
554
  label="Model"
@@ -595,24 +304,6 @@ by Kalpi Edition
595
  inputs=[file_input, model_choice, language, timestamps, translate],
596
  outputs=[output, plain_output]
597
  )
598
- url_btn.click(
599
- fn=transcribe_url,
600
- inputs=[url_input, model_choice, language, timestamps, translate],
601
- outputs=[output, plain_output]
602
- )
603
-
604
- def handle_download(url):
605
- path, title = download_video_only(url)
606
- if path:
607
- return gr.File(value=path, visible=True, label=f"Download: {title}")
608
- return gr.File(visible=False)
609
-
610
- download_btn_url.click(
611
- fn=handle_download,
612
- inputs=[url_input],
613
- outputs=[download_output]
614
- )
615
- download_btn.click(fn=save_transcript, inputs=plain_output, outputs=download_file)
616
 
617
  if __name__ == "__main__":
618
  demo.launch(css=custom_css)
 
3
  import yt_dlp
4
  import os
5
  import tempfile
 
 
6
 
7
  models = {}
8
 
 
38
  return ''.join(result)
39
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  custom_css = """
43
  @import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@1&family=Geist:wght@300;400;500;600&display=swap');
 
247
 
248
  with gr.Row():
249
  with gr.Column(scale=5):
 
 
 
 
 
 
 
 
 
 
250
  file_input = gr.File(
251
  label="Drop your file here — MP4 · MOV · MP3 · WAV · M4A"
252
  )
 
256
  "tiny — Fastest",
257
  "base — Fast",
258
  "small — Balanced",
259
+ "medium — Best accuracy",
260
+ "large-v3 — Most accurate (very slow)"
261
  ],
262
  value="tiny — Fastest",
263
  label="Model"
 
304
  inputs=[file_input, model_choice, language, timestamps, translate],
305
  outputs=[output, plain_output]
306
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
307
 
308
  if __name__ == "__main__":
309
  demo.launch(css=custom_css)