Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -32,24 +32,18 @@ image_model = genai.GenerativeModel(
|
|
| 32 |
hf_client = InferenceClient(token=hf_token)
|
| 33 |
|
| 34 |
# -------------------- Utility Functions --------------------
|
| 35 |
-
def
|
|
|
|
| 36 |
recognizer = sr.Recognizer()
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
with mic as source:
|
| 40 |
try:
|
| 41 |
-
recognizer.
|
| 42 |
-
print("π€ Listening...")
|
| 43 |
-
audio = recognizer.listen(source, timeout=5)
|
| 44 |
-
print("π§ Transcribing...")
|
| 45 |
-
text = recognizer.recognize_google(audio)
|
| 46 |
-
return text
|
| 47 |
-
except sr.WaitTimeoutError:
|
| 48 |
-
return "β Timeout. Try again."
|
| 49 |
except sr.UnknownValueError:
|
| 50 |
-
return "
|
| 51 |
-
except sr.RequestError
|
| 52 |
-
return
|
|
|
|
| 53 |
|
| 54 |
def generate_image_from_text(prompt):
|
| 55 |
try:
|
|
@@ -343,7 +337,8 @@ with gr.Blocks(
|
|
| 343 |
with gr.Row():
|
| 344 |
char_count = gr.Number(label="π₯ Number of Characters", precision=0, value=2, elem_id="char-count")
|
| 345 |
character_names = gr.Textbox(label="π§ββοΈ Character Names", elem_id="char-names")
|
| 346 |
-
|
|
|
|
| 347 |
with gr.Row():
|
| 348 |
dialogue = gr.Textbox(label="π¬ Dialogue (optional)", placeholder="e.g. 'Protect the forest!'", elem_id="dialogue-input")
|
| 349 |
dialogue_speaker = gr.Textbox(label="π£οΈ Who says the dialogue?", placeholder="e.g. Bramble", elem_id = "dialogue-speaker")
|
|
@@ -439,5 +434,11 @@ with gr.Blocks(
|
|
| 439 |
inputs=[explanation_output],
|
| 440 |
outputs=[tts_audio]
|
| 441 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 442 |
|
| 443 |
demo.launch()
|
|
|
|
| 32 |
hf_client = InferenceClient(token=hf_token)
|
| 33 |
|
| 34 |
# -------------------- Utility Functions --------------------
|
| 35 |
+
def transcribe_audio(audio):
|
| 36 |
+
import speech_recognition as sr
|
| 37 |
recognizer = sr.Recognizer()
|
| 38 |
+
with sr.AudioFile(audio) as source:
|
| 39 |
+
audio_data = recognizer.record(source)
|
|
|
|
| 40 |
try:
|
| 41 |
+
return recognizer.recognize_google(audio_data)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
except sr.UnknownValueError:
|
| 43 |
+
return "Speech not understood."
|
| 44 |
+
except sr.RequestError:
|
| 45 |
+
return "Speech recognition API error."
|
| 46 |
+
|
| 47 |
|
| 48 |
def generate_image_from_text(prompt):
|
| 49 |
try:
|
|
|
|
| 337 |
with gr.Row():
|
| 338 |
char_count = gr.Number(label="π₯ Number of Characters", precision=0, value=2, elem_id="char-count")
|
| 339 |
character_names = gr.Textbox(label="π§ββοΈ Character Names", elem_id="char-names")
|
| 340 |
+
mic_char_names = gr.Audio(source="microphone", type="filepath", label="π€ Speak Character Names")
|
| 341 |
+
transcribe_btn_names = gr.Button("π Transcribe")
|
| 342 |
with gr.Row():
|
| 343 |
dialogue = gr.Textbox(label="π¬ Dialogue (optional)", placeholder="e.g. 'Protect the forest!'", elem_id="dialogue-input")
|
| 344 |
dialogue_speaker = gr.Textbox(label="π£οΈ Who says the dialogue?", placeholder="e.g. Bramble", elem_id = "dialogue-speaker")
|
|
|
|
| 434 |
inputs=[explanation_output],
|
| 435 |
outputs=[tts_audio]
|
| 436 |
)
|
| 437 |
+
transcribe_btn_names.click(
|
| 438 |
+
fn=transcribe_audio,
|
| 439 |
+
inputs=[mic_char_names],
|
| 440 |
+
outputs=[character_names]
|
| 441 |
+
)
|
| 442 |
+
|
| 443 |
|
| 444 |
demo.launch()
|