import gradio as gr from vosk import Model, KaldiRecognizer import wave import json # Set up the Vosk model model = Model("vosk-model-small-uz-0.22") def recognize_from_file(audio_file): wf = wave.open(audio_file, "rb") if wf.getnchannels() != 1 or wf.getsampwidth() != 2 or wf.getcomptype() != "NONE": return "Audio file must be WAV format mono PCM." recognizer = KaldiRecognizer(model, wf.getframerate()) result_text = "" while True: data = wf.readframes(4000) if len(data) == 0: break if recognizer.AcceptWaveform(data): result = json.loads(recognizer.Result()) result_text += result.get('text', '') + " " final_result = json.loads(recognizer.FinalResult()) result_text += final_result.get('text', '') return result_text iface = gr.Interface( fn=recognize_from_file, inputs=gr.Audio(type="filepath"), outputs="text", title="Speech Recognition from Audio File", description="Upload a WAV file for recognition." ) iface.launch()