not-lain commited on
Commit
4e6cca2
·
1 Parent(s): 5032c25

switch to filepath

Browse files
Files changed (1) hide show
  1. app.py +9 -7
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import ggwave
2
  import gradio as gr
3
  import numpy as np
 
4
 
5
 
6
  def encode_text_to_audio(text, protocol_id=1, volume=20):
@@ -26,23 +27,24 @@ def encode_text_to_audio(text, protocol_id=1, volume=20):
26
  raise gr.Error(f"Encoding failed: {str(e)}")
27
 
28
 
29
- def decode_audio_to_text(audio_data):
30
  """
31
  Decode audio waveform to text using ggwave
32
 
33
  Args:
34
- audio_data: Tuple of (sample_rate, audio_array) from Gradio
35
 
36
  Returns:
37
  Decoded text string
38
  """
39
- if audio_data is None:
40
  return "No audio provided"
41
 
42
  try:
43
- sample_rate, audio = audio_data
44
- if audio.dtype != np.float32:
45
- audio = audio.astype(np.float32)
 
46
  if sample_rate != 48000:
47
  duration = len(audio) / sample_rate
48
  new_length = int(duration * 48000)
@@ -116,7 +118,7 @@ encode_interface = gr.Interface(
116
  decode_interface = gr.Interface(
117
  fn=decode_audio_to_text,
118
  inputs=gr.Audio(
119
- label="Upload Audio File", type="numpy", sources=["upload", "microphone"]
120
  ),
121
  outputs=gr.Textbox(label="Decoded Text", lines=5),
122
  title="📥 Decode Audio to Text",
 
1
  import ggwave
2
  import gradio as gr
3
  import numpy as np
4
+ from pydub import AudioSegment
5
 
6
 
7
  def encode_text_to_audio(text, protocol_id=1, volume=20):
 
27
  raise gr.Error(f"Encoding failed: {str(e)}")
28
 
29
 
30
+ def decode_audio_to_text(filepath):
31
  """
32
  Decode audio waveform to text using ggwave
33
 
34
  Args:
35
+ filepath: Path to the audio file from Gradio
36
 
37
  Returns:
38
  Decoded text string
39
  """
40
+ if filepath is None:
41
  return "No audio provided"
42
 
43
  try:
44
+ segment = AudioSegment.from_file(filepath).set_channels(1)
45
+ sample_rate = segment.frame_rate
46
+ audio = np.array(segment.get_array_of_samples(), dtype=np.float32)
47
+ audio /= 2 ** (segment.sample_width * 8 - 1)
48
  if sample_rate != 48000:
49
  duration = len(audio) / sample_rate
50
  new_length = int(duration * 48000)
 
118
  decode_interface = gr.Interface(
119
  fn=decode_audio_to_text,
120
  inputs=gr.Audio(
121
+ label="Upload Audio File", type="filepath", sources=["upload", "microphone"]
122
  ),
123
  outputs=gr.Textbox(label="Decoded Text", lines=5),
124
  title="📥 Decode Audio to Text",