Spaces:

Kalp97
/

KalpTranscript

Running

App Files Files Community

Kalp97 commited on Mar 29

Commit

b045623

verified ·

1 Parent(s): 9ea9b32

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -311

app.py CHANGED Viewed

@@ -3,8 +3,6 @@ import whisper
 import yt_dlp
 import os
 import tempfile
-import requests
-import requests
 models = {}
@@ -40,286 +38,6 @@ def devanagari_to_roman(text):
     return ''.join(result)
-RAPIDAPI_KEY = "47b2f0d88bmsh7842ac99f4b2a3ep12df5djsn6f61065d9692"
-RAPIDAPI_HOST = "instagram-reels-downloader-api.p.rapidapi.com"
-def download_instagram_audio(url):
-    """Download Instagram reel via RapidAPI then extract audio"""
-    headers = {
-        "x-rapidapi-key": RAPIDAPI_KEY,
-        "x-rapidapi-host": RAPIDAPI_HOST,
-        "Content-Type": "application/json"
-    }
-    resp = requests.get(
-        f"https://{RAPIDAPI_HOST}/download",
-        headers=headers,
-        params={"url": url},
-        timeout=30
-    )
-    if resp.status_code != 200:
-        raise Exception(f"RapidAPI error {resp.status_code}: {resp.text[:200]}")
-    data = resp.json()
-    video_url = None
-    if isinstance(data, dict):
-        video_url = (data.get('url') or data.get('video_url') or
-                     data.get('download_url') or
-                     (data.get('data') or {}).get('url') or
-                     (data.get('data') or {}).get('video_url'))
-    elif isinstance(data, list) and len(data) > 0:
-        item = data[0]
-        video_url = item.get('url') or item.get('video_url') or item.get('download_url')
-    if not video_url:
-        raise Exception(f"No download URL in response: {str(data)[:300]}")
-    tmp_dir = tempfile.mkdtemp()
-    video_path = os.path.join(tmp_dir, 'ig_video.mp4')
-    audio_path = os.path.join(tmp_dir, 'audio.mp3')
-    vid_resp = requests.get(video_url, timeout=60, stream=True,
-        headers={"User-Agent":"Mozilla/5.0"})
-    with open(video_path, 'wb') as f:
-        for chunk in vid_resp.iter_content(chunk_size=8192):
-            f.write(chunk)
-    os.system(f'ffmpeg -i "{video_path}" -q:a 0 -map a "{audio_path}" -y -loglevel quiet')
-    if os.path.exists(audio_path) and os.path.getsize(audio_path) > 0:
-        return audio_path, 'instagram_reel'
-    return video_path, 'instagram_reel'
-RAPIDAPI_KEY = "47b2f0d88bmsh7842ac99f4b2a3ep12df5djsn6f61065d9692"
-RAPIDAPI_HOST = "instagram-reels-downloader-api.p.rapidapi.com"
-def download_instagram_via_rapidapi(url):
-    """Download Instagram reel using RapidAPI — reliable, no IP blocks"""
-    headers = {
-        "x-rapidapi-key": RAPIDAPI_KEY,
-        "x-rapidapi-host": RAPIDAPI_HOST,
-        "Content-Type": "application/json"
-    }
-    params = {"url": url}
-    resp = requests.get(
-        f"https://{RAPIDAPI_HOST}/download",
-        headers=headers,
-        params=params,
-        timeout=30
-    )
-    if resp.status_code != 200:
-        raise Exception(f"RapidAPI error {resp.status_code}: {resp.text[:200]}")
-    data = resp.json()
-    # Extract direct video URL from response
-    video_url = None
-    if isinstance(data, dict):
-        video_url = (data.get('url') or data.get('download_url') or
-                    data.get('video_url') or data.get('link'))
-        if not video_url and data.get('data'):
-            d = data['data']
-            if isinstance(d, list) and len(d) > 0:
-                video_url = d[0].get('url') or d[0].get('download_url')
-            elif isinstance(d, dict):
-                video_url = d.get('url') or d.get('download_url')
-    if not video_url:
-        raise Exception(f"No video URL in response: {str(data)[:300]}")
-    # Download the actual video file
-    tmp_dir = tempfile.mkdtemp()
-    tmp_path = os.path.join(tmp_dir, 'instagram.mp4')
-    video_resp = requests.get(video_url, timeout=60, stream=True)
-    with open(tmp_path, 'wb') as f:
-        for chunk in video_resp.iter_content(chunk_size=8192):
-            f.write(chunk)
-    return tmp_path, data.get('title', 'Instagram video')
-def download_from_url(url):
-    """Instagram → RapidAPI, everything else → yt-dlp"""
-    if 'instagram.com' in url.lower():
-        return download_instagram_audio(url)
-    tmp_dir = tempfile.mkdtemp()
-    output_path = os.path.join(tmp_dir, 'audio.%(ext)s')
-    ydl_opts = {
-        'format': 'bestaudio/best',
-        'outtmpl': output_path,
-        'quiet': True,
-        'no_warnings': True,
-        'postprocessors': [{
-            'key': 'FFmpegExtractAudio',
-            'preferredcodec': 'mp3',
-            'preferredquality': '192',
-        }],
-        'http_headers': {
-            'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
-        },
-        'socket_timeout': 60,
-        'retries': 3,
-        'geo_bypass': True,
-    }
-    try:
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(url, download=True)
-            title = info.get('title', 'video')
-    except Exception as e:
-        err = str(e)
-        if 'private' in err.lower():
-            raise Exception("This account is private. Only public posts can be downloaded.")
-        elif 'not found' in err.lower() or '404' in err:
-            raise Exception("Video not found. Check the URL and make sure the post still exists.")
-        else:
-            raise Exception(f"Download failed: {err}")
-    for f in os.listdir(tmp_dir):
-        if f.endswith('.mp3'):
-            return os.path.join(tmp_dir, f), title
-    raise Exception("Download succeeded but audio file not found.")
-def transcribe(file, model_name, language, show_timestamps, translate):
-    if file is None:
-        return "Please upload a video or audio file.", ""
-    try:
-        m = load_model(model_name)
-        # Hinglish: use large-v3 model for best code-switching support
-        hinglish_mode = (language == "Hinglish (Roman)")
-        if hinglish_mode:
-            lang = "hi"  # Force Hindi so Whisper writes Devanagari accurately
-            m = load_model("large-v3")  # Override with large-v3 for Hinglish
-        elif language == "Auto Detect":
-            lang = None
-        else:
-            lang = language.lower()
-        whisper_task = "translate" if translate == "Translate to English" else "transcribe"
-        # Hinglish: nudge toward correct Roman script via initial_prompt
-        initial_prompt = None
-        if hinglish_mode:
-            initial_prompt = (
-                "This is a Hinglish conversation mixing Hindi and English. "
-                "Transcribe everything in Roman/Latin script only. No Devanagari. "
-                "Write full Hindi words correctly in Roman letters — never abbreviate. "
-                "Common words: India, main, mein, sab, aaj, kal, hai, hain, rahe, "
-                "isliye, kyunki, lekin, aur, yeh, woh, kya, bhi, toh, bas, paisa, "
-                "log, kaam, din, raat, ghar, baat, baar, bahut, accha, theek. "
-                "Example: India mein sab log dropshipping isliye sikha rahe hain "
-                "kyunki paisa e-books mein ban raha hai."
-            )
-        result = m.transcribe(
-            file.name, language=lang, task=whisper_task,
-            verbose=False, initial_prompt=initial_prompt
-        )
-        plain = result["text"].strip()
-        # Hinglish: if any Devanagari slipped through, transliterate it
-        if hinglish_mode:
-            try:
-                from indic_transliteration import sanscript
-                from indic_transliteration.transliterate import transliterate
-                plain = transliterate(plain, sanscript.DEVANAGARI, sanscript.ITRANS)
-            except Exception:
-                # Fallback: simple character-level Devanagari → Roman map
-                plain = devanagari_to_roman(plain)
-        if show_timestamps:
-            lines = []
-            for seg in result["segments"]:
-                start = format_time(seg["start"])
-                end = format_time(seg["end"])
-                seg_text = seg['text'].strip()
-                if hinglish_mode:
-                    try:
-                        from indic_transliteration import sanscript
-                        from indic_transliteration.transliterate import transliterate
-                        seg_text = transliterate(seg_text, sanscript.DEVANAGARI, sanscript.ITRANS)
-                    except Exception:
-                        seg_text = devanagari_to_roman(seg_text)
-                lines.append(f"[{start} → {end}]  {seg_text}")
-            return "\n".join(lines), plain
-        return plain, plain
-    except Exception as e:
-        return f"Error: {str(e)}", ""
-def save_transcript(text):
-    if not text:
-        return None
-    path = "/tmp/transcript.txt"
-    with open(path, "w", encoding="utf-8") as f:
-        f.write(text)
-    return path
-def download_video_only(url):
-    """Download video — uses RapidAPI for Instagram, yt-dlp for others"""
-    if not url or not url.strip():
-        return None, "Please paste a valid URL."
-    url = url.strip()
-    supported = ['instagram.com','youtube.com','youtu.be','twitter.com',
-                 'x.com','facebook.com','fb.watch']
-    if not any(s in url.lower() for s in supported):
-        return None, "Unsupported URL."
-    # Use RapidAPI for Instagram
-    if 'instagram.com' in url.lower():
-        try:
-            tmp_path, title = download_instagram_via_rapidapi(url)
-            return tmp_path, title
-        except Exception as e:
-            return None, str(e)
-    tmp_dir = tempfile.mkdtemp()
-    output_path = os.path.join(tmp_dir, 'video.%(ext)s')
-    ydl_opts = {
-        'format': 'bestvideo+bestaudio/best',
-        'outtmpl': output_path,
-        'quiet': True,
-        'no_warnings': True,
-        'merge_output_format': 'mp4',
-        'http_headers': {
-            'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 17_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.0 Mobile/15E148 Safari/604.1',
-            'Accept-Language': 'en-US,en;q=0.9',
-            'Accept': '*/*',
-            'Referer': 'https://www.instagram.com/',
-        },
-        'extractor_args': {
-            'instagram': {'api_version': 'v1'},
-        },
-        'socket_timeout': 30,
-        'retries': 3,
-        'geo_bypass': True,
-    }
-    try:
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(url, download=True)
-            title = info.get('title', 'video')
-        for f in os.listdir(tmp_dir):
-            if f.endswith('.mp4'):
-                return os.path.join(tmp_dir, f), title
-        return None, "Download succeeded but file not found."
-    except Exception as e:
-        return None, f"Download failed: {str(e)}"
-def transcribe_url(url, model_name, language, show_timestamps, translate):
-    """Download from URL then transcribe"""
-    if not url or not url.strip():
-        return "Please paste a valid URL.", ""
-    url = url.strip()
-    # Validate URL is from supported platforms
-    supported = ['instagram.com', 'youtube.com', 'youtu.be', 'twitter.com',
-                 'x.com', 'facebook.com', 'fb.watch', 'fb.com', 'tiktok.com']
-    if not any(s in url.lower() for s in supported):
-        return "Unsupported URL. Please use Instagram, YouTube, Twitter/X, or Facebook links.", ""
-    tmp_path = None
-    try:
-        tmp_path, title = download_from_url(url)
-        # Create a mock file object with .name attribute
-        class FileObj:
-            def __init__(self, path):
-                self.name = path
-        result = transcribe(FileObj(tmp_path), model_name, language, show_timestamps, translate)
-        return result
-    except Exception as e:
-        return f"Error: {str(e)}", ""
-    finally:
-        if tmp_path and os.path.exists(tmp_path):
-            os.unlink(tmp_path)
 custom_css = """
 @import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@1&family=Geist:wght@300;400;500;600&display=swap');
@@ -529,16 +247,6 @@ by Kalpi Edition
     with gr.Row():
         with gr.Column(scale=5):
-            url_input = gr.Textbox(
-                label="Paste URL (Instagram · YouTube · Twitter/X · Facebook)",
-                placeholder="https://www.instagram.com/reel/...",
-                lines=1
-            )
-            with gr.Row():
-                url_btn = gr.Button("Transcribe URL →", variant="primary")
-                download_btn_url = gr.Button("Download video", variant="secondary")
-            download_output = gr.File(label="Download", visible=False)
-            gr.Markdown("<div style='text-align:center;font-size:11px;color:#555;margin:4px 0'>— or upload a file —</div>")
             file_input = gr.File(
                 label="Drop your file here — MP4 · MOV · MP3 · WAV · M4A"
             )
@@ -548,7 +256,8 @@ by Kalpi Edition
                         "tiny   — Fastest",
                         "base   — Fast",
                         "small  — Balanced",
-                        "medium — Best accuracy"
                     ],
                     value="tiny   — Fastest",
                     label="Model"
@@ -595,24 +304,6 @@ by Kalpi Edition
         inputs=[file_input, model_choice, language, timestamps, translate],
         outputs=[output, plain_output]
     )
-    url_btn.click(
-        fn=transcribe_url,
-        inputs=[url_input, model_choice, language, timestamps, translate],
-        outputs=[output, plain_output]
-    )
-    def handle_download(url):
-        path, title = download_video_only(url)
-        if path:
-            return gr.File(value=path, visible=True, label=f"Download: {title}")
-        return gr.File(visible=False)
-    download_btn_url.click(
-        fn=handle_download,
-        inputs=[url_input],
-        outputs=[download_output]
-    )
-    download_btn.click(fn=save_transcript, inputs=plain_output, outputs=download_file)
 if __name__ == "__main__":
     demo.launch(css=custom_css)

 import yt_dlp
 import os
 import tempfile
 models = {}
     return ''.join(result)
 custom_css = """
 @import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@1&family=Geist:wght@300;400;500;600&display=swap');
     with gr.Row():
         with gr.Column(scale=5):
             file_input = gr.File(
                 label="Drop your file here — MP4 · MOV · MP3 · WAV · M4A"
             )
                         "tiny   — Fastest",
                         "base   — Fast",
                         "small  — Balanced",
+                        "medium — Best accuracy",
+                        "large-v3 — Most accurate (very slow)"
                     ],
                     value="tiny   — Fastest",
                     label="Model"
         inputs=[file_input, model_choice, language, timestamps, translate],
         outputs=[output, plain_output]
     )
 if __name__ == "__main__":
     demo.launch(css=custom_css)