Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from transformers import pipeline | |
| import torch | |
| # Initialize the speech recognition pipeline | |
| print("Loading Whisper Lozi model...") | |
| try: | |
| # Use the specific Lozi model | |
| transcriber = pipeline( | |
| "automatic-speech-recognition", | |
| model="simzacademy/whisper-small-lozi1", | |
| device=0 if torch.cuda.is_available() else -1 # Use GPU if available | |
| ) | |
| print("Model loaded successfully!") | |
| except Exception as e: | |
| print(f"Error loading model: {e}") | |
| transcriber = None | |
| def transcribe_audio(audio): | |
| """ | |
| Transcribe audio to text using the Whisper Lozi model | |
| Args: | |
| audio: Audio file path or tuple (sample_rate, audio_data) | |
| Returns: | |
| Transcribed text | |
| """ | |
| if transcriber is None: | |
| return "Error: Model failed to load. Please check your installation." | |
| if audio is None: | |
| return "Please provide an audio file or recording." | |
| try: | |
| # Transcribe the audio | |
| result = transcriber(audio) | |
| return result["text"] | |
| except Exception as e: | |
| return f"Error during transcription: {str(e)}" | |
| # Create the Gradio interface | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown( | |
| """ | |
| # π€ Lozi Speech-to-Text Interface | |
| ### Powered by Whisper Small Lozi Model | |
| This interface uses the `simzacademy/whisper-small-lozi1` model to transcribe | |
| Lozi language speech to text. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| # Audio input - supports both recording and file upload | |
| audio_input = gr.Audio( | |
| sources=["microphone", "upload"], | |
| type="filepath", | |
| label="Record or Upload Audio" | |
| ) | |
| transcribe_btn = gr.Button("π Transcribe", variant="primary", size="lg") | |
| with gr.Column(): | |
| output_text = gr.Textbox( | |
| label="Transcription", | |
| placeholder="Your transcription will appear here...", | |
| lines=10 | |
| ) | |
| gr.Markdown( | |
| """ | |
| ### π Instructions: | |
| 1. **Record**: Click the microphone icon to record audio directly | |
| 2. **Upload**: Or click to upload an audio file (MP3, WAV, etc.) | |
| 3. **Transcribe**: Click the "Transcribe" button to convert speech to text | |
| 4. **View**: The transcribed text will appear on the right | |
| ### βΉοΈ Notes: | |
| - Speak clearly in Lozi for best results | |
| - The model works best with clear audio and minimal background noise | |
| - First transcription may take longer as the model loads | |
| """ | |
| ) | |
| # Set up the transcription action | |
| transcribe_btn.click( | |
| fn=transcribe_audio, | |
| inputs=audio_input, | |
| outputs=output_text | |
| ) | |
| # Also allow Enter key to trigger transcription | |
| audio_input.change( | |
| fn=lambda: gr.update(interactive=True), | |
| outputs=transcribe_btn | |
| ) | |
| # Launch the interface | |
| if __name__ == "__main__": | |
| demo.launch( | |
| share=False, # Set to True to create a public link | |
| server_name="0.0.0.0", # Allow access from network | |
| server_port=7860 | |
| ) |