Spaces:

not-lain
/

ggwave

Running

not-lain commited on Feb 26

Commit

4e6cca2

1 Parent(s): 5032c25

switch to filepath

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import ggwave
 import gradio as gr
 import numpy as np
 def encode_text_to_audio(text, protocol_id=1, volume=20):
@@ -26,23 +27,24 @@ def encode_text_to_audio(text, protocol_id=1, volume=20):
         raise gr.Error(f"Encoding failed: {str(e)}")
-def decode_audio_to_text(audio_data):
     """
     Decode audio waveform to text using ggwave
     Args:
-        audio_data: Tuple of (sample_rate, audio_array) from Gradio
     Returns:
         Decoded text string
     """
-    if audio_data is None:
         return "No audio provided"
     try:
-        sample_rate, audio = audio_data
-        if audio.dtype != np.float32:
-            audio = audio.astype(np.float32)
         if sample_rate != 48000:
             duration = len(audio) / sample_rate
             new_length = int(duration * 48000)
@@ -116,7 +118,7 @@ encode_interface = gr.Interface(
 decode_interface = gr.Interface(
     fn=decode_audio_to_text,
     inputs=gr.Audio(
-        label="Upload Audio File", type="numpy", sources=["upload", "microphone"]
     ),
     outputs=gr.Textbox(label="Decoded Text", lines=5),
     title="📥 Decode Audio to Text",

 import ggwave
 import gradio as gr
 import numpy as np
+from pydub import AudioSegment
 def encode_text_to_audio(text, protocol_id=1, volume=20):
         raise gr.Error(f"Encoding failed: {str(e)}")
+def decode_audio_to_text(filepath):
     """
     Decode audio waveform to text using ggwave
     Args:
+        filepath: Path to the audio file from Gradio
     Returns:
         Decoded text string
     """
+    if filepath is None:
         return "No audio provided"
     try:
+        segment = AudioSegment.from_file(filepath).set_channels(1)
+        sample_rate = segment.frame_rate
+        audio = np.array(segment.get_array_of_samples(), dtype=np.float32)
+        audio /= 2 ** (segment.sample_width * 8 - 1)
         if sample_rate != 48000:
             duration = len(audio) / sample_rate
             new_length = int(duration * 48000)
 decode_interface = gr.Interface(
     fn=decode_audio_to_text,
     inputs=gr.Audio(
+        label="Upload Audio File", type="filepath", sources=["upload", "microphone"]
     ),
     outputs=gr.Textbox(label="Decoded Text", lines=5),
     title="📥 Decode Audio to Text",