fizzarif7 commited on
Commit
e1c4ed0
Β·
verified Β·
1 Parent(s): 79e74f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -16
app.py CHANGED
@@ -32,24 +32,18 @@ image_model = genai.GenerativeModel(
32
  hf_client = InferenceClient(token=hf_token)
33
 
34
  # -------------------- Utility Functions --------------------
35
- def transcribe_speech():
 
36
  recognizer = sr.Recognizer()
37
- mic = sr.Microphone()
38
-
39
- with mic as source:
40
  try:
41
- recognizer.adjust_for_ambient_noise(source)
42
- print("🎀 Listening...")
43
- audio = recognizer.listen(source, timeout=5)
44
- print("🧠 Transcribing...")
45
- text = recognizer.recognize_google(audio)
46
- return text
47
- except sr.WaitTimeoutError:
48
- return "❗ Timeout. Try again."
49
  except sr.UnknownValueError:
50
- return "❗ Could not understand audio."
51
- except sr.RequestError as e:
52
- return f"❗ API error: {e}"
 
53
 
54
  def generate_image_from_text(prompt):
55
  try:
@@ -343,7 +337,8 @@ with gr.Blocks(
343
  with gr.Row():
344
  char_count = gr.Number(label="πŸ‘₯ Number of Characters", precision=0, value=2, elem_id="char-count")
345
  character_names = gr.Textbox(label="πŸ§™β€β™‚οΈ Character Names", elem_id="char-names")
346
- char_speak_btn = gr.Button("🎀 Speak", elem_id="speak-char-names")
 
347
  with gr.Row():
348
  dialogue = gr.Textbox(label="πŸ’¬ Dialogue (optional)", placeholder="e.g. 'Protect the forest!'", elem_id="dialogue-input")
349
  dialogue_speaker = gr.Textbox(label="πŸ—£οΈ Who says the dialogue?", placeholder="e.g. Bramble", elem_id = "dialogue-speaker")
@@ -439,5 +434,11 @@ with gr.Blocks(
439
  inputs=[explanation_output],
440
  outputs=[tts_audio]
441
  )
 
 
 
 
 
 
442
 
443
  demo.launch()
 
32
  hf_client = InferenceClient(token=hf_token)
33
 
34
  # -------------------- Utility Functions --------------------
35
+ def transcribe_audio(audio):
36
+ import speech_recognition as sr
37
  recognizer = sr.Recognizer()
38
+ with sr.AudioFile(audio) as source:
39
+ audio_data = recognizer.record(source)
 
40
  try:
41
+ return recognizer.recognize_google(audio_data)
 
 
 
 
 
 
 
42
  except sr.UnknownValueError:
43
+ return "Speech not understood."
44
+ except sr.RequestError:
45
+ return "Speech recognition API error."
46
+
47
 
48
  def generate_image_from_text(prompt):
49
  try:
 
337
  with gr.Row():
338
  char_count = gr.Number(label="πŸ‘₯ Number of Characters", precision=0, value=2, elem_id="char-count")
339
  character_names = gr.Textbox(label="πŸ§™β€β™‚οΈ Character Names", elem_id="char-names")
340
+ mic_char_names = gr.Audio(source="microphone", type="filepath", label="🎀 Speak Character Names")
341
+ transcribe_btn_names = gr.Button("πŸ“ Transcribe")
342
  with gr.Row():
343
  dialogue = gr.Textbox(label="πŸ’¬ Dialogue (optional)", placeholder="e.g. 'Protect the forest!'", elem_id="dialogue-input")
344
  dialogue_speaker = gr.Textbox(label="πŸ—£οΈ Who says the dialogue?", placeholder="e.g. Bramble", elem_id = "dialogue-speaker")
 
434
  inputs=[explanation_output],
435
  outputs=[tts_audio]
436
  )
437
+ transcribe_btn_names.click(
438
+ fn=transcribe_audio,
439
+ inputs=[mic_char_names],
440
+ outputs=[character_names]
441
+ )
442
+
443
 
444
  demo.launch()