Spaces:

DARKWICK
/

Solutions_Architect

Sleeping

App Files Files Community

DARKWICK commited on Oct 18, 2025

Commit

a5861bb

verified ·

1 Parent(s): b2dcd00

Upload 3 files

Browse files

Files changed (3) hide show

Dockerfile.txt +20 -0
app.py +140 -0
requirements.txt +3 -0

Dockerfile.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+FROM python:3.11-slim
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    ffmpeg git build-essential \
+ && rm -rf /var/lib/apt/lists/*
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY app.py .
+ENV PORT=7860
+EXPOSE 7860
+ENV WHISPER_MODEL=small
+ENV COMPUTE_TYPE=int8
+ENV MAX_DURATION_SEC=1800
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import os
+import tempfile
+from pathlib import Path
+import gradio as gr
+import yt_dlp
+from faster_whisper import WhisperModel
+# -------- Settings you can tweak --------
+DEFAULT_MODEL = os.getenv("WHISPER_MODEL", "small")  # small | medium | large-v3 (requires more RAM)
+COMPUTE_TYPE = os.getenv("COMPUTE_TYPE", "int8")     # int8 | int8_float16 | float16 | float32
+MAX_DURATION_SEC = int(os.getenv("MAX_DURATION_SEC", "1800"))  # 30 min cap to keep things predictable
+# ---------------------------------------
+# Lazy-load model once per container
+_model = None
+def get_model():
+    global _model
+    if _model is None:
+        _model = WhisperModel(DEFAULT_MODEL, compute_type=COMPUTE_TYPE)
+    return _model
+def _download_youtube_audio(url: str, workdir: str) -> str:
+    """
+    Download YouTube audio and convert to WAV mono 16 kHz using FFmpegExtractAudio.
+    Returns path to the WAV file.
+    """
+    outtmpl = str(Path(workdir) / "%(id)s.%(ext)s")
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "outtmpl": outtmpl,
+        "noplaylist": True,
+        "quiet": True,
+        "no_warnings": True,
+        "postprocessors": [
+            {
+                "key": "FFmpegExtractAudio",
+                "preferredcodec": "wav",
+                "preferredquality": "5",
+            }
+        ],
+        # ensure mono @ 16 kHz
+        "postprocessor_args": ["-ac", "1", "-ar", "16000"],
+    }
+    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+        info = ydl.extract_info(url, download=True)
+        duration = info.get("duration") or 0
+        if duration and duration > MAX_DURATION_SEC:
+            raise gr.Error(f"Video too long ({duration//60} min). Max allowed is {MAX_DURATION_SEC//60} min.")
+    # Find the produced .wav in the temp dir (name can vary)
+    wavs = list(Path(workdir).glob("*.wav"))
+    if not wavs:
+        raise gr.Error("Audio extraction failed. Try a different video.")
+    return str(wavs[0])
+def _write_srt(segments, path: str):
+    def srt_timestamp(t):
+        # t in seconds -> "HH:MM:SS,mmm"
+        h = int(t // 3600)
+        m = int((t % 3600) // 60)
+        s = int(t % 60)
+        ms = int((t - int(t)) * 1000)
+        return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
+    with open(path, "w", encoding="utf-8") as f:
+        for i, seg in enumerate(segments, start=1):
+            f.write(f"{i}\n")
+            f.write(f"{srt_timestamp(seg.start)} --> {srt_timestamp(seg.end)}\n")
+            f.write(seg.text.strip() + "\n\n")
+def transcribe(youtube_url, upload_file, model_size, language, translate_to_english):
+    if not youtube_url and not upload_file:
+        raise gr.Error("Provide a YouTube URL or upload a file.")
+    # Update model on-the-fly if user changes it
+    global _model
+    if _model is None or getattr(_model, "_model_size", None) != model_size:
+        _model = WhisperModel(model_size, compute_type=COMPUTE_TYPE)
+        _model._model_size = model_size  # tag for reuse
+    with tempfile.TemporaryDirectory() as td:
+        if youtube_url:
+            audio_path = _download_youtube_audio(youtube_url.strip(), td)
+        else:
+            # Save uploaded file and (optionally) convert via ffmpeg if needed
+            src = Path(td) / Path(upload_file.name).name
+            with open(src, "wb") as w:
+                w.write(upload_file.read())
+            # Let faster-whisper/ffmpeg handle decoding directly
+            audio_path = str(src)
+        # Transcribe
+        segments, info = _model.transcribe(
+            audio_path,
+            language=None if language == "auto" else language,
+            task="translate" if translate_to_english else "transcribe",
+            vad_filter=True
+        )
+        # Collect text and also write SRT
+        segs = list(segments)
+        full_text = "".join(s.text for s in segs).strip()
+        srt_path = Path(td) / "subtitles.srt"
+        _write_srt(segs, srt_path)
+        return full_text, str(srt_path)
+# ---- Gradio UI ----
+with gr.Blocks(title="YouTube → Text (Whisper)") as demo:
+    gr.Markdown("## 🎬 YouTube → 📝 Text\nPaste a YouTube link **or** upload a media file to get a transcript.")
+    with gr.Row():
+        youtube_url = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
+    with gr.Row():
+        upload_file = gr.File(label="Or upload a video/audio file", file_count="single")
+    with gr.Row():
+        model_size = gr.Dropdown(
+            ["small", "medium", "large-v3"],
+            value=DEFAULT_MODEL,
+            label="Model size (larger = more accurate, slower)"
+        )
+        language = gr.Dropdown(
+            ["auto","en","ar","fr","de","es","hi","ur","fa","ru","zh"],
+            value="auto",
+            label="Language (auto-detect or force)"
+        )
+        translate_to_english = gr.Checkbox(value=False, label="Translate to English")
+    run_btn = gr.Button("Transcribe", variant="primary")
+    transcript = gr.Textbox(label="Transcript", lines=12)
+    srt_file = gr.File(label="Download SRT (subtitles)")
+    run_btn.click(
+        transcribe,
+        inputs=[youtube_url, upload_file, model_size, language, translate_to_english],
+        outputs=[transcript, srt_file]
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio
+yt-dlp
+faster-whisper