LiKenun commited on
Commit
bb6107f
·
1 Parent(s): 0fea237

Enable audio file retrieval by URL for automatic speech recognition (ASR) sample

Browse files
Files changed (2) hide show
  1. app.py +9 -2
  2. utils.py +16 -1
app.py CHANGED
@@ -7,7 +7,7 @@ from image_classification import image_classification
7
  from image_to_text import image_to_text
8
  from text_to_image import text_to_image
9
  from text_to_speech import text_to_speech
10
- from utils import request_image
11
 
12
 
13
  class App:
@@ -74,9 +74,16 @@ class App:
74
  inputs=text_to_speech_text,
75
  outputs=text_to_speech_output
76
  )
77
- with gr.Tab("Audio Transcription or Automatic Speech Recognition (ASR)"):
78
  gr.Markdown("Transcribe audio to text.")
 
 
79
  audio_transcription_audio_input = gr.Audio(label="Audio")
 
 
 
 
 
80
  audio_transcription_generate_button = gr.Button("Transcribe")
81
  audio_transcription_output = gr.Textbox(label="Text")
82
  audio_transcription_generate_button.click(
 
7
  from image_to_text import image_to_text
8
  from text_to_image import text_to_image
9
  from text_to_speech import text_to_speech
10
+ from utils import request_image, request_audio
11
 
12
 
13
  class App:
 
74
  inputs=text_to_speech_text,
75
  outputs=text_to_speech_output
76
  )
77
+ with gr.Tab("Automatic Speech Recognition (ASR)"):
78
  gr.Markdown("Transcribe audio to text.")
79
+ audio_transcription_url_input = gr.Textbox(label="Audio URL")
80
+ audio_transcription_audio_request_button = gr.Button("Get Audio")
81
  audio_transcription_audio_input = gr.Audio(label="Audio")
82
+ audio_transcription_audio_request_button.click(
83
+ fn=request_audio,
84
+ inputs=audio_transcription_url_input,
85
+ outputs=audio_transcription_audio_input
86
+ )
87
  audio_transcription_generate_button = gr.Button("Transcribe")
88
  audio_transcription_output = gr.Textbox(label="Text")
89
  audio_transcription_generate_button.click(
utils.py CHANGED
@@ -27,7 +27,7 @@ def get_pytorch_device() -> str:
27
 
28
  def request_image(url: str) -> Image:
29
  try:
30
- response = requests.get(url, timeout=int(getenv("REQUEST_TIMEOUT")))
31
  response.raise_for_status()
32
  return open_image(BytesIO(response.content))
33
  except requests.HTTPError as e:
@@ -37,6 +37,21 @@ def request_image(url: str) -> Image:
37
  except requests.RequestException as e:
38
  raise gr.Error(f"Failed to fetch image from URL: {str(e)}")
39
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  def save_image_to_temp_file(image: Image) -> str:
41
  image_format = image.format if image.format else 'PNG'
42
  format_extension = image_format.lower() if image_format else 'png'
 
27
 
28
  def request_image(url: str) -> Image:
29
  try:
30
+ response = requests.get(url, timeout=int(getenv("REQUEST_TIMEOUT", "45")))
31
  response.raise_for_status()
32
  return open_image(BytesIO(response.content))
33
  except requests.HTTPError as e:
 
37
  except requests.RequestException as e:
38
  raise gr.Error(f"Failed to fetch image from URL: {str(e)}")
39
 
40
+ def request_audio(url: str) -> tuple[int, np.ndarray]:
41
+ try:
42
+ response = requests.get(url, timeout=int(getenv("REQUEST_TIMEOUT", "45")))
43
+ response.raise_for_status()
44
+ audio_array, sample_rate = librosa.load(BytesIO(response.content), sr=None)
45
+ return (sample_rate, audio_array)
46
+ except requests.HTTPError as e:
47
+ raise gr.Error(f"Failed to fetch audio from URL because of HTTP error: {e.response.status_code} {e.response.text}")
48
+ except requests.Timeout as e:
49
+ raise gr.Error(f"Failed to fetch audio from URL because the request timed out.")
50
+ except requests.RequestException as e:
51
+ raise gr.Error(f"Failed to fetch audio from URL: {str(e)}")
52
+ except Exception as e:
53
+ raise gr.Error(f"Failed to load audio file: {str(e)}")
54
+
55
  def save_image_to_temp_file(image: Image) -> str:
56
  image_format = image.format if image.format else 'PNG'
57
  format_extension = image_format.lower() if image_format else 'png'