File size: 3,792 Bytes
cab8d03 4e6cca2 cab8d03 4e6cca2 cab8d03 4e6cca2 cab8d03 4e6cca2 cab8d03 4e6cca2 cab8d03 5032c25 cab8d03 4e6cca2 cab8d03 6de95c2 cab8d03 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | import ggwave
import gradio as gr
import numpy as np
from pydub import AudioSegment
def encode_text_to_audio(text, protocol_id=1, volume=20):
"""
Encode text to audio waveform using ggwave
Args:
text: Text to encode
protocol_id: ggwave protocol (0-8, default 1)
volume: Audio volume (0-100, default 20)
Returns:
Tuple of (sample_rate, audio_data) for Gradio audio output
"""
if not text:
return None
try:
waveform = ggwave.encode(text, protocolId=protocol_id, volume=volume)
audio_data = np.frombuffer(waveform, dtype=np.float32)
return 48000, audio_data
except Exception as e:
raise gr.Error(f"Encoding failed: {str(e)}")
def decode_audio_to_text(filepath):
"""
Decode audio waveform to text using ggwave
Args:
filepath: Path to the audio file from Gradio
Returns:
Decoded text string
"""
if filepath is None:
return "No audio provided"
try:
segment = AudioSegment.from_file(filepath).set_channels(1)
sample_rate = segment.frame_rate
audio = np.array(segment.get_array_of_samples(), dtype=np.float32)
audio /= 2 ** (segment.sample_width * 8 - 1)
if sample_rate != 48000:
duration = len(audio) / sample_rate
new_length = int(duration * 48000)
audio = np.interp(
np.linspace(0, len(audio), new_length), np.arange(len(audio)), audio
)
instance = ggwave.init()
chunk_size = 1024
decoded_text = ""
for i in range(0, len(audio), chunk_size):
chunk = audio[i : i + chunk_size]
chunk_bytes = chunk.tobytes()
res = ggwave.decode(instance, chunk_bytes)
if res is not None:
try:
decoded_text = res.decode("utf-8")
break
except:
pass
ggwave.free(instance)
if decoded_text:
return decoded_text
else:
return "β No ggwave signal detected in audio"
except Exception as e:
return f"β Decoding failed: {str(e)}"
encode_interface = gr.Interface(
fn=encode_text_to_audio,
inputs=[
gr.Textbox(
label="Text to Encode",
placeholder="Enter text to convert to audio...",
lines=3,
),
gr.Slider(
minimum=0,
maximum=8,
value=1,
step=1,
label="Protocol ID",
info="ggwave protocol (0-8, affects speed/reliability)",
),
gr.Slider(
minimum=0,
maximum=100,
value=20,
step=1,
label="Volume",
info="Audio volume (0-100)",
),
],
outputs=gr.Audio(label="Generated Audio", type="numpy"),
title="π€ Encode Text to Audio",
description="Convert text into an audio signal using ggwave protocol",
examples=[
["Hello World", 1, 20],
["GGWave is cool!", 1, 30],
["Testing 123", 2, 25],
],
theme="default",
)
decode_interface = gr.Interface(
fn=decode_audio_to_text,
inputs=gr.Audio(
label="Upload Audio File", type="filepath", sources=["upload", "microphone"]
),
outputs=gr.Textbox(label="Decoded Text", lines=5),
title="π₯ Decode Audio to Text",
description="Extract text from an audio signal using ggwave protocol",
examples = [
["./audio.wav"],
],
theme="default",
)
demo = gr.TabbedInterface(
[encode_interface, decode_interface],
tab_names=["π€ Encode", "π₯ Decode"],
title="π΅ GGWave Audio Encoder/Decoder",
)
demo.launch(debug=True, mcp_server=True)
|