Antigravity Agent commited on
Commit
33708d5
·
1 Parent(s): a77fb27

Update to Faster-Whisper Large V3 and Gradio

Browse files
Files changed (3) hide show
  1. Dockerfile +27 -0
  2. app.py +77 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Install system dependencies
4
+ RUN apt-get update && apt-get install -y \
5
+ ffmpeg \
6
+ git \
7
+ && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Set working directory
10
+ WORKDIR /app
11
+
12
+ # Copy requirements and install
13
+ COPY requirements.txt .
14
+ RUN pip install --no-cache-dir -r requirements.txt
15
+
16
+ # Copy application code
17
+ COPY . .
18
+
19
+ # Expose the Gradio port
20
+ EXPOSE 7860
21
+
22
+ # Set environment variables for Gradio
23
+ ENV GRADIO_SERVER_NAME="0.0.0.0"
24
+ ENV GRADIO_SERVER_PORT=7860
25
+
26
+ # Run the application
27
+ CMD ["python", "app.py"]
app.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import gradio as gr
4
+ from faster_whisper import WhisperModel
5
+ import torch
6
+
7
+ # Initialize model
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+ compute_type = "float16" if torch.cuda.is_available() else "int8"
10
+
11
+ print(f"Loading Whisper Large V3 on {device} ({compute_type})...")
12
+ model = WhisperModel("large-v3", device=device, compute_type=compute_type)
13
+
14
+ def format_timestamp(seconds):
15
+ h = int(seconds // 3600)
16
+ m = int((seconds % 3600) // 60)
17
+ s = int(seconds % 60)
18
+ ms = int((seconds % 1) * 1000)
19
+ return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}"
20
+
21
+ def segments_to_srt(segments):
22
+ lines = []
23
+ for i, seg in enumerate(segments, 1):
24
+ start = format_timestamp(seg.start)
25
+ end = format_timestamp(seg.end)
26
+ lines.append(str(i))
27
+ lines.append(f"{start} --> {end}")
28
+ lines.append(seg.text.strip())
29
+ lines.append("")
30
+ return "\n".join(lines)
31
+
32
+ def transcribe(audio_path, task="transcribe", language=None):
33
+ if audio_path is None:
34
+ return "Please upload an audio file.", None
35
+
36
+ options = {"task": task}
37
+ if language and language != "auto":
38
+ options["language"] = language
39
+
40
+ segments, info = model.transcribe(audio_path, beam_size=5, **options)
41
+
42
+ segments_list = list(segments)
43
+ full_text = " ".join([s.text.strip() for s in segments_list])
44
+ srt_content = segments_to_srt(segments_list)
45
+
46
+ # Save SRT to a temporary file
47
+ temp_srt = tempfile.NamedTemporaryFile(delete=False, suffix=".srt")
48
+ temp_srt.write(srt_content.encode("utf-8"))
49
+ temp_srt.close()
50
+
51
+ return full_text, temp_srt.name
52
+
53
+ # Gradio UI
54
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
55
+ gr.Markdown("# 🎙️ VoiceScript - Whisper Large V3")
56
+ gr.Markdown("Fast and accurate transcription powered by Faster-Whisper Large V3.")
57
+
58
+ with gr.Row():
59
+ with gr.Column():
60
+ audio_input = gr.Audio(type="filepath", label="Upload Audio/Video")
61
+ task_input = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
62
+ lang_input = gr.Dropdown(["auto", "en", "es", "fr", "de", "zh", "ja", "ko", "hi"], label="Language (optional)", value="auto")
63
+ transcribe_btn = gr.Button("Transcribe", variant="primary")
64
+
65
+ with gr.Column():
66
+ text_output = gr.Textbox(label="Transcript", lines=10)
67
+ file_output = gr.File(label="Download SRT")
68
+
69
+ transcribe_btn.click(
70
+ fn=transcribe,
71
+ inputs=[audio_input, task_input, lang_input],
72
+ outputs=[text_output, file_output]
73
+ )
74
+
75
+ if __name__ == "__main__":
76
+ demo.launch(server_name="0.0.0.0", server_port=7860)
77
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ faster-whisper
2
+ gradio
3
+ torch
4
+ torchaudio
5
+ ffmpeg-python