Spaces:

Kalp97
/

KalpTranscript

Sleeping

App Files Files Community

Kalp97 commited on Mar 22

Commit

1b24485

verified ·

1 Parent(s): a2e872f

Upload 2 files

Browse files

Files changed (2) hide show

app.py +125 -0
requirements.txt +2 -0

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import gradio as gr
 import whisper
 models = {}
@@ -34,6 +35,42 @@ def devanagari_to_roman(text):
         result.append(DEVA_MAP.get(ch, ch))
     return ''.join(result)
 def transcribe(file, model_name, language, show_timestamps, translate):
     if file is None:
         return "Please upload a video or audio file.", ""
@@ -104,6 +141,67 @@ def save_transcript(text):
         f.write(text)
     return path
 custom_css = """
 @import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@1&family=Geist:wght@300;400;500;600&display=swap');
@@ -312,6 +410,16 @@ by Kalpi Edition
     with gr.Row():
         with gr.Column(scale=5):
             file_input = gr.File(
                 label="Drop your file here — MP4 · MOV · MP3 · WAV · M4A"
             )
@@ -368,6 +476,23 @@ by Kalpi Edition
         inputs=[file_input, model_choice, language, timestamps, translate],
         outputs=[output, plain_output]
     )
     download_btn.click(fn=save_transcript, inputs=plain_output, outputs=download_file)
 if __name__ == "__main__":

 import gradio as gr
 import whisper
+import yt_dlp
 models = {}
         result.append(DEVA_MAP.get(ch, ch))
     return ''.join(result)
+def download_from_url(url):
+    """Download audio from Instagram, YouTube, Twitter, Facebook using yt-dlp"""
+    tmp_dir = tempfile.mkdtemp()
+    output_path = os.path.join(tmp_dir, 'audio.%(ext)s')
+    ydl_opts = {
+        'format': 'bestaudio/best',
+        'outtmpl': output_path,
+        'quiet': True,
+        'no_warnings': True,
+        'extract_flat': False,
+        'postprocessors': [{
+            'key': 'FFmpegExtractAudio',
+            'preferredcodec': 'mp3',
+            'preferredquality': '192',
+        }],
+        # Rotate user agents to avoid blocks
+        'http_headers': {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        },
+        'cookiefile': None,
+        'socket_timeout': 30,
+    }
+    try:
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=True)
+            title = info.get('title', 'video')
+    except Exception as e:
+        raise Exception(f"Could not download from URL: {str(e)}")
+    # Find the downloaded mp3
+    for f in os.listdir(tmp_dir):
+        if f.endswith('.mp3'):
+            return os.path.join(tmp_dir, f), title
+    raise Exception("Download succeeded but audio file not found.")
 def transcribe(file, model_name, language, show_timestamps, translate):
     if file is None:
         return "Please upload a video or audio file.", ""
         f.write(text)
     return path
+def download_video_only(url):
+    """Download video in highest quality and return file path"""
+    if not url or not url.strip():
+        return None, "Please paste a valid URL."
+    url = url.strip()
+    supported = ['instagram.com','youtube.com','youtu.be','twitter.com',
+                 'x.com','facebook.com','fb.watch']
+    if not any(s in url.lower() for s in supported):
+        return None, "Unsupported URL."
+    tmp_dir = tempfile.mkdtemp()
+    output_path = os.path.join(tmp_dir, 'video.%(ext)s')
+    ydl_opts = {
+        'format': 'bestvideo+bestaudio/best',
+        'outtmpl': output_path,
+        'quiet': True,
+        'no_warnings': True,
+        'merge_output_format': 'mp4',
+        'http_headers': {
+            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
+        },
+    }
+    try:
+        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+            info = ydl.extract_info(url, download=True)
+            title = info.get('title', 'video')
+        for f in os.listdir(tmp_dir):
+            if f.endswith('.mp4'):
+                return os.path.join(tmp_dir, f), title
+        return None, "Download succeeded but file not found."
+    except Exception as e:
+        return None, f"Download failed: {str(e)}"
+def transcribe_url(url, model_name, language, show_timestamps, translate):
+    """Download from URL then transcribe"""
+    if not url or not url.strip():
+        return "Please paste a valid URL.", ""
+    url = url.strip()
+    # Validate URL is from supported platforms
+    supported = ['instagram.com', 'youtube.com', 'youtu.be', 'twitter.com',
+                 'x.com', 'facebook.com', 'fb.watch', 'fb.com', 'tiktok.com']
+    if not any(s in url.lower() for s in supported):
+        return "Unsupported URL. Please use Instagram, YouTube, Twitter/X, or Facebook links.", ""
+    tmp_path = None
+    try:
+        tmp_path, title = download_from_url(url)
+        # Create a mock file object with .name attribute
+        class FileObj:
+            def __init__(self, path):
+                self.name = path
+        result = transcribe(FileObj(tmp_path), model_name, language, show_timestamps, translate)
+        return result
+    except Exception as e:
+        return f"Error: {str(e)}", ""
+    finally:
+        if tmp_path and os.path.exists(tmp_path):
+            os.unlink(tmp_path)
 custom_css = """
 @import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@1&family=Geist:wght@300;400;500;600&display=swap');
     with gr.Row():
         with gr.Column(scale=5):
+            url_input = gr.Textbox(
+                label="Paste URL (Instagram · YouTube · Twitter/X · Facebook)",
+                placeholder="https://www.instagram.com/reel/...",
+                lines=1
+            )
+            with gr.Row():
+                url_btn = gr.Button("Transcribe URL →", variant="primary")
+                download_btn_url = gr.Button("Download video", variant="secondary")
+            download_output = gr.File(label="Download", visible=False)
+            gr.Markdown("<div style='text-align:center;font-size:11px;color:#555;margin:4px 0'>— or upload a file —</div>")
             file_input = gr.File(
                 label="Drop your file here — MP4 · MOV · MP3 · WAV · M4A"
             )
         inputs=[file_input, model_choice, language, timestamps, translate],
         outputs=[output, plain_output]
     )
+    url_btn.click(
+        fn=transcribe_url,
+        inputs=[url_input, model_choice, language, timestamps, translate],
+        outputs=[output, plain_output]
+    )
+    def handle_download(url):
+        path, title = download_video_only(url)
+        if path:
+            return gr.File(value=path, visible=True, label=f"Download: {title}")
+        return gr.File(visible=False)
+    download_btn_url.click(
+        fn=handle_download,
+        inputs=[url_input],
+        outputs=[download_output]
+    )
     download_btn.click(fn=save_transcript, inputs=plain_output, outputs=download_file)
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -4,3 +4,5 @@ openai-whisper
 gunicorn
 indic-transliteration

 gunicorn
 indic-transliteration
+yt-dlp