Spaces:

redhairedshanks1
/

VoiceScript

Sleeping

App Files Files Community

Antigravity Agent commited on 25 days ago

Commit

33708d5

1 Parent(s): a77fb27

Update to Faster-Whisper Large V3 and Gradio

Browse files

Files changed (3) hide show

Dockerfile +27 -0
app.py +77 -0
requirements.txt +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM python:3.10-slim
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    ffmpeg \
+    git \
+    && rm -rf /var/lib/apt/lists/*
+# Set working directory
+WORKDIR /app
+# Copy requirements and install
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy application code
+COPY . .
+# Expose the Gradio port
+EXPOSE 7860
+# Set environment variables for Gradio
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+ENV GRADIO_SERVER_PORT=7860
+# Run the application
+CMD ["python", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import os
+import tempfile
+import gradio as gr
+from faster_whisper import WhisperModel
+import torch
+# Initialize model
+device = "cuda" if torch.cuda.is_available() else "cpu"
+compute_type = "float16" if torch.cuda.is_available() else "int8"
+print(f"Loading Whisper Large V3 on {device} ({compute_type})...")
+model = WhisperModel("large-v3", device=device, compute_type=compute_type)
+def format_timestamp(seconds):
+    h = int(seconds // 3600)
+    m = int((seconds % 3600) // 60)
+    s = int(seconds % 60)
+    ms = int((seconds % 1) * 1000)
+    return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
+def segments_to_srt(segments):
+    lines = []
+    for i, seg in enumerate(segments, 1):
+        start = format_timestamp(seg.start)
+        end = format_timestamp(seg.end)
+        lines.append(str(i))
+        lines.append(f"{start} --> {end}")
+        lines.append(seg.text.strip())
+        lines.append("")
+    return "\n".join(lines)
+def transcribe(audio_path, task="transcribe", language=None):
+    if audio_path is None:
+        return "Please upload an audio file.", None
+    options = {"task": task}
+    if language and language != "auto":
+        options["language"] = language
+    segments, info = model.transcribe(audio_path, beam_size=5, **options)
+    segments_list = list(segments)
+    full_text = " ".join([s.text.strip() for s in segments_list])
+    srt_content = segments_to_srt(segments_list)
+    # Save SRT to a temporary file
+    temp_srt = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
+    temp_srt.write(srt_content.encode("utf-8"))
+    temp_srt.close()
+    return full_text, temp_srt.name
+# Gradio UI
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🎙️ VoiceScript - Whisper Large V3")
+    gr.Markdown("Fast and accurate transcription powered by Faster-Whisper Large V3.")
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(type="filepath", label="Upload Audio/Video")
+            task_input = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
+            lang_input = gr.Dropdown(["auto", "en", "es", "fr", "de", "zh", "ja", "ko", "hi"], label="Language (optional)", value="auto")
+            transcribe_btn = gr.Button("Transcribe", variant="primary")
+        with gr.Column():
+            text_output = gr.Textbox(label="Transcript", lines=10)
+            file_output = gr.File(label="Download SRT")
+    transcribe_btn.click(
+        fn=transcribe,
+        inputs=[audio_input, task_input, lang_input],
+        outputs=[text_output, file_output]
+    )
+if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=7860)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+faster-whisper
+gradio
+torch
+torchaudio
+ffmpeg-python