Spaces:
Running
Running
File size: 2,058 Bytes
a159b10 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 | from dotenv import load_dotenv
load_dotenv()
import gradio as gr
from counter.word_counter import WordCounter
from model.speech_recognizer import SpeechRecognizer
counter = WordCounter()
recognizer = None
def get_recognizer():
global recognizer
if recognizer is None:
recognizer = SpeechRecognizer(model_size="tiny")
return recognizer
def process_audio(audio, target_word):
if audio is None or not target_word:
return counter.get_count()
try:
rec = get_recognizer()
text = rec.transcribe_audio(audio)
if counter.detect_word(text, target_word):
counter.increment()
except Exception as e:
print(f"Error processing audio: {e}")
return counter.get_count()
def reset_counter():
counter.reset()
return 0
def on_start_recording():
get_recognizer()
return None
if __name__ == "__main__":
with gr.Blocks(css=".audio-tall { min-height: 200px !important; }") as interface:
gr.Markdown("# Word Counter")
gr.Markdown("Enter a word to count, then click the microphone to start recording")
with gr.Row():
target_word = gr.Textbox(label="Target Word", placeholder="Enter word to count")
with gr.Row():
counter_display = gr.Number(label="Count", value=0, interactive=False)
with gr.Row():
audio_input = gr.Audio(
sources=["microphone"],
type="filepath",
streaming=True,
format="wav",
min_width=400,
elem_classes="audio-tall"
)
with gr.Row():
reset_btn = gr.Button("Reset", variant="stop")
audio_input.start_recording(
fn=on_start_recording
)
audio_input.stream(
fn=process_audio,
inputs=[audio_input, target_word],
outputs=counter_display
)
reset_btn.click(
fn=reset_counter,
outputs=counter_display
)
interface.launch()
|