Spaces:

Scrapyard-Brampton
/

Testing

Sleeping

App Files Files Community

Scrapyard commited on Aug 8, 2025

Commit

a748eff

1 Parent(s): 1a4dc8b

forgot to save

Browse files

Files changed (1) hide show

app.py +41 -28

app.py CHANGED Viewed

@@ -1,55 +1,68 @@
 import gradio as gr
 import numpy as np
 from faster_whisper import WhisperModel
 audio_model = WhisperModel("tiny.en", device="cpu", compute_type="int8")
-def transcribe(sr, buffer):
-    segments, info = audio_model.transcribe(buffer, beam_size=5)
-    result = list(segments)
-    text = ""
-    if result and len(result) > 0:
-        text = result[0].text
-        print("Text:", text)
-    else:
         text = ""
-        print("No text found")
-        print(result)
-    return text
-def normaliseData(audioInput, buffer_state):
-    sr, y = audioInput
     # Convert to mono if stereo
     if y.ndim > 1:
         y = y.mean(axis=1)
     y = y.astype(np.float32)
     y /= np.max(np.abs(y))
-    # Append to buffer
-    if buffer_state is not None:
-        buffer = np.concatenate([buffer_state, y])
     else:
-        buffer = y
-    # Only process if buffer is at least 3 seconds
-    min_samples = 3 * sr
-    words = ""
-    if len(buffer) >= min_samples:
-        chunk = buffer[:min_samples]
-        words = transcribe(sr, chunk)
-        buffer = buffer[min_samples:]  # Remove processed samples
-    return buffer, words
 with gr.Blocks() as demo:
     audioInput = gr.Audio(sources=["microphone"], streaming=True)
     audioOutput = gr.Textbox(label="Output")
-    buffer_state = gr.State()
     audioInput.stream(
         fn=normaliseData,
-        inputs=[audioInput, buffer_state],
-        outputs=[buffer_state, audioOutput]
     )
 demo.launch()

 import gradio as gr
 import numpy as np
 from faster_whisper import WhisperModel
+from faster_whisper.transcribe import Segment
 audio_model = WhisperModel("tiny.en", device="cpu", compute_type="int8")
+transcription = ['']
+buffer = np.array([])
+def transcribe(SampleRate, data):
+    global buffer
+    if SampleRate * 3 >= len(buffer):
+        print("buffer big")
+        segments, info = audio_model.transcribe(buffer, beam_size=5)
+        result = (list(segments))
         text = ""
+        if result and len(result) > 0:
+            text = result[0].text
+            print("Text:", text)
+        else:
+            text = ""
+            print("No text found")
+            print(result)
+        buffer = np.array([])
+        return(text)
+    else:
+        buffer = np.concatenate([buffer, data])
+        print("buffer small")
+        return None
+def normaliseData(audioInput, stream):
+    sr, y = audioInput
     # Convert to mono if stereo
     if y.ndim > 1:
         y = y.mean(axis=1)
     y = y.astype(np.float32)
     y /= np.max(np.abs(y))
+    if stream is not None:
+        stream = np.concatenate([stream, y])
     else:
+        stream = y
+    words = transcribe(sr, y)
+    # Return the stream as state and a string representation of the array for display
+    return stream, words,
 with gr.Blocks() as demo:
     audioInput = gr.Audio(sources=["microphone"], streaming=True)
     audioOutput = gr.Textbox(label="Output")
+    state = gr.State()
     audioInput.stream(
         fn=normaliseData,
+        inputs=[audioInput, state],
+        outputs=[state, audioOutput] # try switching it arround
     )
 demo.launch()