Spaces:
Sleeping
Sleeping
| from transformers import pipeline | |
| import gradio as gr | |
| from pydub import AudioSegment | |
| import os | |
| # Initialize the pipeline | |
| pipe = pipeline(model="srirama/whisper-small-hi") # change to "your-username/the-name-you-picked" | |
| # Function to split audio into chunks | |
| def split_audio(audio_path, chunk_length_ms): | |
| audio = AudioSegment.from_file(audio_path) | |
| chunks = [] | |
| for i in range(0, len(audio), chunk_length_ms): | |
| chunk = audio[i:i + chunk_length_ms] | |
| chunks.append(chunk) | |
| return chunks | |
| # Function to transcribe audio | |
| def transcribe(audio_path): | |
| chunk_length_ms = 20 * 1000 # 20 seconds in milliseconds | |
| audio_chunks = split_audio(audio_path, chunk_length_ms) | |
| full_transcript = "" | |
| for i, chunk in enumerate(audio_chunks): | |
| temp_file = f"temp_chunk_{i}.wav" | |
| chunk.export(temp_file, format="wav") | |
| text = pipe(temp_file)["text"] | |
| full_transcript += text + " " | |
| os.remove(temp_file) # Clean up the temporary file | |
| return full_transcript.strip() | |
| iface = gr.Interface( | |
| fn=transcribe, | |
| inputs=gr.Audio(sources=["microphone"], type="filepath"), | |
| outputs="text", | |
| title="Whisper Small English", | |
| description="Realtime demo for Dental speech recognition using a fine-tuned Whisper small model.", | |
| ) | |
| # Launch the interface | |
| iface.launch() | |