ggwave / app.py
not-lain's picture
switch to filepath
4e6cca2
import ggwave
import gradio as gr
import numpy as np
from pydub import AudioSegment
def encode_text_to_audio(text, protocol_id=1, volume=20):
"""
Encode text to audio waveform using ggwave
Args:
text: Text to encode
protocol_id: ggwave protocol (0-8, default 1)
volume: Audio volume (0-100, default 20)
Returns:
Tuple of (sample_rate, audio_data) for Gradio audio output
"""
if not text:
return None
try:
waveform = ggwave.encode(text, protocolId=protocol_id, volume=volume)
audio_data = np.frombuffer(waveform, dtype=np.float32)
return 48000, audio_data
except Exception as e:
raise gr.Error(f"Encoding failed: {str(e)}")
def decode_audio_to_text(filepath):
"""
Decode audio waveform to text using ggwave
Args:
filepath: Path to the audio file from Gradio
Returns:
Decoded text string
"""
if filepath is None:
return "No audio provided"
try:
segment = AudioSegment.from_file(filepath).set_channels(1)
sample_rate = segment.frame_rate
audio = np.array(segment.get_array_of_samples(), dtype=np.float32)
audio /= 2 ** (segment.sample_width * 8 - 1)
if sample_rate != 48000:
duration = len(audio) / sample_rate
new_length = int(duration * 48000)
audio = np.interp(
np.linspace(0, len(audio), new_length), np.arange(len(audio)), audio
)
instance = ggwave.init()
chunk_size = 1024
decoded_text = ""
for i in range(0, len(audio), chunk_size):
chunk = audio[i : i + chunk_size]
chunk_bytes = chunk.tobytes()
res = ggwave.decode(instance, chunk_bytes)
if res is not None:
try:
decoded_text = res.decode("utf-8")
break
except:
pass
ggwave.free(instance)
if decoded_text:
return decoded_text
else:
return "❌ No ggwave signal detected in audio"
except Exception as e:
return f"❌ Decoding failed: {str(e)}"
encode_interface = gr.Interface(
fn=encode_text_to_audio,
inputs=[
gr.Textbox(
label="Text to Encode",
placeholder="Enter text to convert to audio...",
lines=3,
),
gr.Slider(
minimum=0,
maximum=8,
value=1,
step=1,
label="Protocol ID",
info="ggwave protocol (0-8, affects speed/reliability)",
),
gr.Slider(
minimum=0,
maximum=100,
value=20,
step=1,
label="Volume",
info="Audio volume (0-100)",
),
],
outputs=gr.Audio(label="Generated Audio", type="numpy"),
title="πŸ“€ Encode Text to Audio",
description="Convert text into an audio signal using ggwave protocol",
examples=[
["Hello World", 1, 20],
["GGWave is cool!", 1, 30],
["Testing 123", 2, 25],
],
theme="default",
)
decode_interface = gr.Interface(
fn=decode_audio_to_text,
inputs=gr.Audio(
label="Upload Audio File", type="filepath", sources=["upload", "microphone"]
),
outputs=gr.Textbox(label="Decoded Text", lines=5),
title="πŸ“₯ Decode Audio to Text",
description="Extract text from an audio signal using ggwave protocol",
examples = [
["./audio.wav"],
],
theme="default",
)
demo = gr.TabbedInterface(
[encode_interface, decode_interface],
tab_names=["πŸ“€ Encode", "πŸ“₯ Decode"],
title="🎡 GGWave Audio Encoder/Decoder",
)
demo.launch(debug=True, mcp_server=True)