switch to filepath
Browse files
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import ggwave
|
| 2 |
import gradio as gr
|
| 3 |
import numpy as np
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
def encode_text_to_audio(text, protocol_id=1, volume=20):
|
|
@@ -26,23 +27,24 @@ def encode_text_to_audio(text, protocol_id=1, volume=20):
|
|
| 26 |
raise gr.Error(f"Encoding failed: {str(e)}")
|
| 27 |
|
| 28 |
|
| 29 |
-
def decode_audio_to_text(
|
| 30 |
"""
|
| 31 |
Decode audio waveform to text using ggwave
|
| 32 |
|
| 33 |
Args:
|
| 34 |
-
|
| 35 |
|
| 36 |
Returns:
|
| 37 |
Decoded text string
|
| 38 |
"""
|
| 39 |
-
if
|
| 40 |
return "No audio provided"
|
| 41 |
|
| 42 |
try:
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
|
|
|
| 46 |
if sample_rate != 48000:
|
| 47 |
duration = len(audio) / sample_rate
|
| 48 |
new_length = int(duration * 48000)
|
|
@@ -116,7 +118,7 @@ encode_interface = gr.Interface(
|
|
| 116 |
decode_interface = gr.Interface(
|
| 117 |
fn=decode_audio_to_text,
|
| 118 |
inputs=gr.Audio(
|
| 119 |
-
label="Upload Audio File", type="
|
| 120 |
),
|
| 121 |
outputs=gr.Textbox(label="Decoded Text", lines=5),
|
| 122 |
title="📥 Decode Audio to Text",
|
|
|
|
| 1 |
import ggwave
|
| 2 |
import gradio as gr
|
| 3 |
import numpy as np
|
| 4 |
+
from pydub import AudioSegment
|
| 5 |
|
| 6 |
|
| 7 |
def encode_text_to_audio(text, protocol_id=1, volume=20):
|
|
|
|
| 27 |
raise gr.Error(f"Encoding failed: {str(e)}")
|
| 28 |
|
| 29 |
|
| 30 |
+
def decode_audio_to_text(filepath):
|
| 31 |
"""
|
| 32 |
Decode audio waveform to text using ggwave
|
| 33 |
|
| 34 |
Args:
|
| 35 |
+
filepath: Path to the audio file from Gradio
|
| 36 |
|
| 37 |
Returns:
|
| 38 |
Decoded text string
|
| 39 |
"""
|
| 40 |
+
if filepath is None:
|
| 41 |
return "No audio provided"
|
| 42 |
|
| 43 |
try:
|
| 44 |
+
segment = AudioSegment.from_file(filepath).set_channels(1)
|
| 45 |
+
sample_rate = segment.frame_rate
|
| 46 |
+
audio = np.array(segment.get_array_of_samples(), dtype=np.float32)
|
| 47 |
+
audio /= 2 ** (segment.sample_width * 8 - 1)
|
| 48 |
if sample_rate != 48000:
|
| 49 |
duration = len(audio) / sample_rate
|
| 50 |
new_length = int(duration * 48000)
|
|
|
|
| 118 |
decode_interface = gr.Interface(
|
| 119 |
fn=decode_audio_to_text,
|
| 120 |
inputs=gr.Audio(
|
| 121 |
+
label="Upload Audio File", type="filepath", sources=["upload", "microphone"]
|
| 122 |
),
|
| 123 |
outputs=gr.Textbox(label="Decoded Text", lines=5),
|
| 124 |
title="📥 Decode Audio to Text",
|