Spaces:

kdorichev
/

test

Sleeping

Konstantin Dorichev commited on Feb 6, 2025

Commit

9f08cb3

unverified ·

1 Parent(s): 68c743d

Block based app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,9 +6,11 @@ transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-bas
 def transcribe(stream, new_chunk):
-    # if stream is not None:
-        # print(stream.shape, new_chunk)
-    sr, y = new_chunk
     # Convert to mono if stereo
     if y.ndim > 1:
@@ -17,20 +19,27 @@ def transcribe(stream, new_chunk):
     y = y.astype(np.float32)
     y /= np.max(np.abs(y))
-    if stream is not None:
-        stream = np.concatenate([stream, y])
-    else:
-        stream = y
-    text = transcriber({"sampling_rate": sr, "raw": stream})["text"]
-    # print(text)
-    return stream, text
-demo = gr.Interface(
-    transcribe,
-    ["state", gr.Audio(sources=["upload"], streaming=False)],
-    ["state", "text"],
-    # live=True,
-)
 demo.launch()

 def transcribe(stream, new_chunk):
+    if stream is None:
+        return ""
+    sr, y = stream
     # Convert to mono if stereo
     if y.ndim > 1:
     y = y.astype(np.float32)
     y /= np.max(np.abs(y))
+    text = transcriber({"sampling_rate": sr, "raw": y})["text"]
+    return text
+def clear(audio, transcribed):
+    audio = None
+    transcribed = None
+    return audio, transcribed
+with gr.Blocks() as demo:
+    gr.HTML(value="<h1>Transcribe Audio to Text Demo</h1>")
+    with gr.Row():
+        with gr.Column():
+            audio = gr.Audio(sources=["upload"], streaming=False, label="wav")
+            with gr.Row():
+                clr = gr.Button(value="Clear", variant="huggingface")
+                btn = gr.Button(value="Transcribe", variant="primary")
+        transcribed = gr.TextArea(label="Transcribed", lines=9)
+    btn.click(fn=transcribe, inputs=audio, outputs=transcribed)
+    clr.click(fn=clear, inputs=[audio, transcribed], outputs=[audio, transcribed])
 demo.launch()