| import gradio as gr |
| import whisper |
|
|
| |
| |
| |
| model = whisper.load_model("small") |
|
|
|
|
| |
| |
| |
| def transcribe(audio_path, src_lang, tgt_lang): |
| if audio_path is None: |
| return "No audio provided", None |
|
|
| result = model.transcribe(audio_path) |
| text = result.get("text", "").strip() |
|
|
| if not text: |
| text = "No speech detected" |
|
|
| |
| return text, None |
|
|
|
|
| |
| |
| |
| with gr.Blocks(title="Multilingual Voice Clone") as demo: |
| gr.Markdown("# ๐ Multilingual Voice Cloning Demo") |
| gr.Markdown("**Phase 2: Speech-to-Text (Whisper ASR)**") |
|
|
| with gr.Row(): |
| audio_input = gr.Audio( |
| sources=["microphone"], |
| type="filepath", |
| label="Speak (Input Speech)" |
| ) |
|
|
| gr.Audio( |
| sources=["upload"], |
| type="filepath", |
| label="Upload Voice Sample (used later)", |
| interactive=False |
| ) |
|
|
| with gr.Row(): |
| src_lang = gr.Dropdown( |
| ["Marathi", "Hindi", "English"], |
| value="English", |
| label="Input Language (for later)" |
| ) |
|
|
| tgt_lang = gr.Dropdown( |
| ["Hindi", "English", "Tamil"], |
| value="Hindi", |
| label="Output Language (for later)" |
| ) |
|
|
| btn = gr.Button("Generate Voice") |
|
|
| text_out = gr.Textbox( |
| label="Transcribed Text", |
| placeholder="Speech-to-text output will appear here" |
| ) |
|
|
| audio_out = gr.Audio( |
| label="Cloned Voice Output (coming in Phase 4)" |
| ) |
|
|
| btn.click( |
| fn=transcribe, |
| inputs=[audio_input, src_lang, tgt_lang], |
| outputs=[text_out, audio_out] |
| ) |
|
|
| demo.launch() |
|
|