Spaces:
Sleeping
Sleeping
| import os | |
| import tempfile | |
| import gradio as gr | |
| from faster_whisper import WhisperModel | |
| import torch | |
| # Global cache for the model | |
| _cached_model_name = None | |
| _cached_model = None | |
| def get_model(model_name): | |
| global _cached_model_name, _cached_model | |
| if _cached_model_name != model_name or _cached_model is None: | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| compute_type = "float16" if torch.cuda.is_available() else "int8" | |
| print(f"Loading {model_name} on {device} ({compute_type})...") | |
| # Using 4 CPU threads to maximize performance on free tier | |
| _cached_model = WhisperModel(model_name, device=device, compute_type=compute_type, cpu_threads=4) | |
| _cached_model_name = model_name | |
| return _cached_model | |
| def format_timestamp(seconds): | |
| h = int(seconds // 3600) | |
| m = int((seconds % 3600) // 60) | |
| s = int(seconds % 60) | |
| ms = int((seconds % 1) * 1000) | |
| return f"{h:02d}:{m:02d}:{s:02d},{ms:03d}" | |
| def segments_to_srt(segments): | |
| lines = [] | |
| for i, seg in enumerate(segments, 1): | |
| start = format_timestamp(seg.start) | |
| end = format_timestamp(seg.end) | |
| lines.append(str(i)) | |
| lines.append(f"{start} --> {end}") | |
| lines.append(seg.text.strip()) | |
| lines.append("") | |
| return "\n".join(lines) | |
| def transcribe(audio_path, model_name, task="transcribe", language=None): | |
| if audio_path is None: | |
| return "Please upload an audio file.", None | |
| model = get_model(model_name) | |
| print(f"Transcribing {audio_path} using {model_name}...") | |
| options = { | |
| "task": task, | |
| "beam_size": 1, # Set to 1 for absolute maximum speed on CPU | |
| "best_of": 1, | |
| "vad_filter": True, # Filter out non-speech/silence to speed up | |
| } | |
| if language and language != "auto": | |
| options["language"] = language | |
| segments, info = model.transcribe(audio_path, **options) | |
| segments_list = [] | |
| for segment in segments: | |
| segments_list.append(segment) | |
| print(f"[{format_timestamp(segment.start)}] {segment.text}") | |
| full_text = " ".join([s.text.strip() for s in segments_list]) | |
| srt_content = segments_to_srt(segments_list) | |
| # Save SRT to a temporary file | |
| temp_srt = tempfile.NamedTemporaryFile(delete=False, suffix=".srt") | |
| temp_srt.write(srt_content.encode("utf-8")) | |
| temp_srt.close() | |
| return full_text, temp_srt.name | |
| # Gradio UI | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# 🎙️ VoiceScript - CPU Optimized") | |
| gr.Markdown("Fast transcription using models optimized for CPU performance (`large-v3-turbo` and `distil-large-v3`).") | |
| with gr.Row(): | |
| with gr.Column(): | |
| audio_input = gr.Audio(type="filepath", label="Upload Audio/Video") | |
| model_input = gr.Radio(["large-v3-turbo", "distil-large-v3"], label="Model", value="large-v3-turbo") | |
| task_input = gr.Radio(["transcribe", "translate"], label="Task", value="transcribe") | |
| lang_input = gr.Dropdown(["auto", "en", "es", "fr", "de", "zh", "ja", "ko", "hi"], label="Language (optional)", value="auto") | |
| transcribe_btn = gr.Button("Transcribe", variant="primary") | |
| with gr.Column(): | |
| text_output = gr.Textbox(label="Transcript", lines=10) | |
| file_output = gr.File(label="Download SRT") | |
| transcribe_btn.click( | |
| fn=transcribe, | |
| inputs=[audio_input, model_input, task_input, lang_input], | |
| outputs=[text_output, file_output] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch(server_name="0.0.0.0", server_port=7860) | |