Spaces:
Runtime error
Runtime error
fixes
Browse files
app.py
CHANGED
|
@@ -88,9 +88,9 @@ def text_to_speech(text):
|
|
| 88 |
return text, (16000, audio.squeeze())
|
| 89 |
|
| 90 |
|
| 91 |
-
def
|
| 92 |
if not image or not audio:
|
| 93 |
-
return
|
| 94 |
|
| 95 |
sr, y = audio
|
| 96 |
|
|
@@ -102,11 +102,18 @@ def transcribe(image, audio):
|
|
| 102 |
|
| 103 |
transcription_text = transcriber({"sampling_rate": sr, "raw": y})["text"]
|
| 104 |
|
| 105 |
-
return generate_answer(image, transcription_text)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
|
| 108 |
qa_interface = gr.Interface(
|
| 109 |
-
fn=
|
| 110 |
inputs=[
|
| 111 |
gr.Image(type="pil"),
|
| 112 |
gr.Textbox(label="Вопрос (на русском)", placeholder="Ваш вопрос"),
|
|
@@ -120,7 +127,7 @@ qa_interface = gr.Interface(
|
|
| 120 |
)
|
| 121 |
|
| 122 |
speech_interface = gr.Interface(
|
| 123 |
-
fn=
|
| 124 |
inputs=[
|
| 125 |
gr.Image(type="pil"),
|
| 126 |
gr.Audio(sources="microphone", label="Голосовой ввод"),
|
|
|
|
| 88 |
return text, (16000, audio.squeeze())
|
| 89 |
|
| 90 |
|
| 91 |
+
def transcribe_pipeline(image, audio):
|
| 92 |
if not image or not audio:
|
| 93 |
+
return None, None
|
| 94 |
|
| 95 |
sr, y = audio
|
| 96 |
|
|
|
|
| 102 |
|
| 103 |
transcription_text = transcriber({"sampling_rate": sr, "raw": y})["text"]
|
| 104 |
|
| 105 |
+
return text_to_speech(generate_answer(image, transcription_text))
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def text_pipeline(image, question):
|
| 109 |
+
if not image or not question:
|
| 110 |
+
return None, None
|
| 111 |
+
|
| 112 |
+
return text_to_speech(generate_answer(image, question))
|
| 113 |
|
| 114 |
|
| 115 |
qa_interface = gr.Interface(
|
| 116 |
+
fn=text_pipeline,
|
| 117 |
inputs=[
|
| 118 |
gr.Image(type="pil"),
|
| 119 |
gr.Textbox(label="Вопрос (на русском)", placeholder="Ваш вопрос"),
|
|
|
|
| 127 |
)
|
| 128 |
|
| 129 |
speech_interface = gr.Interface(
|
| 130 |
+
fn=transcribe_pipeline,
|
| 131 |
inputs=[
|
| 132 |
gr.Image(type="pil"),
|
| 133 |
gr.Audio(sources="microphone", label="Голосовой ввод"),
|