Krish-05 commited on
Commit
073f4d8
·
verified ·
1 Parent(s): a9ca228

Update streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +34 -89
streamlit_app.py CHANGED
@@ -1,24 +1,18 @@
1
  import logging
2
  import logging.handlers
3
- import threading
4
  import time
5
- import urllib.request
6
  import os
7
- from pathlib import Path
8
- from typing import List
9
  import io
10
  import soundfile as sf
11
  import requests
12
 
13
- import av
14
  import numpy as np
15
  import pydub
16
  import streamlit as st
17
  from twilio.rest import Client
18
 
19
- from streamlit_webrtc import WebRtcMode, webrtc_streamer, AudioProcessorBase
20
-
21
- HERE = Path(__file__).parent
22
 
23
  logger = logging.getLogger(__name__)
24
 
@@ -31,49 +25,7 @@ if 'audio_processor_instance' not in st.session_state:
31
  st.session_state.audio_processor_instance = None
32
 
33
 
34
- # --- Utility Functions (from original code, kept for completeness) ---
35
- def download_file(url, download_to: Path, expected_size=None):
36
- # This function is retained but might not be strictly necessary for this new workflow
37
- # as Whisper model is loaded by FastAPI server.
38
- if download_to.exists():
39
- if expected_size:
40
- if download_to.stat().st_size == expected_size:
41
- return
42
- else:
43
- st.info(f"{url} is already downloaded.")
44
- if not st.button("Download again?"):
45
- return
46
-
47
- download_to.parent.mkdir(parents=True, exist_ok=True)
48
-
49
- weights_warning, progress_bar = None, None
50
- try:
51
- weights_warning = st.warning("Downloading %s..." % url)
52
- progress_bar = st.progress(0)
53
- with open(download_to, "wb") as output_file:
54
- with urllib.request.urlopen(url) as response:
55
- length = int(response.info()["Content-Length"])
56
- counter = 0.0
57
- MEGABYTES = 2.0 ** 20.0
58
- while True:
59
- data = response.read(8192)
60
- if not data:
61
- break
62
- counter += len(data)
63
- output_file.write(data)
64
-
65
- weights_warning.warning(
66
- "Downloading %s... (%6.2f/%6.2f MB)"
67
- % (url, counter / MEGABYTES, length / MEGABYTES)
68
- )
69
- progress_bar.progress(min(counter / length, 1.0))
70
- finally:
71
- if weights_warning is not None:
72
- weights_warning.empty()
73
- if progress_bar is not None:
74
- progress_bar.empty()
75
-
76
-
77
  @st.cache_data
78
  def get_ice_servers():
79
  """Fetches ICE servers for WebRTC connection."""
@@ -82,7 +34,9 @@ def get_ice_servers():
82
  auth_token = os.environ["TWILIO_AUTH_TOKEN"]
83
  except KeyError:
84
  logger.warning(
85
- "Twilio credentials are not set. Fallback to a free STUN server from Google."
 
 
86
  )
87
  return [{"urls": ["stun:stun.l.google.com:19302"]}]
88
 
@@ -91,31 +45,6 @@ def get_ice_servers():
91
  return token.ice_servers
92
 
93
 
94
- # --- Custom Audio Processor for streamlit-webrtc ---
95
- class AudioBufferProcessor(AudioProcessorBase):
96
- def __init__(self) -> None:
97
- self._audio_buffer = pydub.AudioSegment.empty()
98
- self._lock = threading.Lock()
99
-
100
- def recv(self, frame: av.AudioFrame) -> None:
101
- if st.session_state.is_recording:
102
- sound = pydub.AudioSegment(
103
- data=frame.to_ndarray().tobytes(),
104
- sample_width=frame.format.bytes,
105
- frame_rate=frame.sample_rate,
106
- channels=len(frame.layout.channels),
107
- )
108
- sound = sound.set_channels(1).set_frame_rate(16000)
109
- with self._lock:
110
- self._audio_buffer += sound
111
-
112
- def get_and_clear_buffered_audio(self) -> pydub.AudioSegment:
113
- with self._lock:
114
- recorded_audio = self._audio_buffer
115
- self._audio_buffer = pydub.AudioSegment.empty()
116
- return recorded_audio
117
-
118
-
119
  def main():
120
  st.header("Whisper Speech-to-Text with Recording")
121
  st.markdown(
@@ -123,9 +52,11 @@ def main():
123
  Click "Start Recording" to begin capturing audio from your microphone.
124
  Click "Stop Recording" to end the capture, save the audio,
125
  and send it to the Whisper model for transcription.
 
126
  """
127
  )
128
 
 
129
  webrtc_ctx = webrtc_streamer(
130
  key="audio_recorder",
131
  mode=WebRtcMode.SENDONLY,
@@ -135,9 +66,11 @@ def main():
135
  async_processing=True
136
  )
137
 
 
138
  if webrtc_ctx.audio_processor and st.session_state.audio_processor_instance is None:
139
  st.session_state.audio_processor_instance = webrtc_ctx.audio_processor
140
 
 
141
  if webrtc_ctx.state.playing:
142
  st.success("Microphone connected. Ready to record.")
143
  else:
@@ -148,54 +81,64 @@ def main():
148
  col1, col2 = st.columns(2)
149
 
150
  with col1:
 
151
  start_button = st.button(
152
  "Start Recording",
 
153
  disabled=st.session_state.is_recording or not webrtc_ctx.state.playing
154
  )
155
  with col2:
 
156
  stop_button = st.button(
157
  "Stop Recording",
 
158
  disabled=not st.session_state.is_recording
159
  )
160
 
161
  # Placeholder for the animated text area
162
- transcription_text_area = st.text_area("Transcription Result", value="", height=150, disabled=True)
 
 
163
 
 
164
  if start_button:
165
  if webrtc_ctx.state.playing:
166
  st.session_state.is_recording = True
167
- st.session_state.transcribed_text = ""
168
- # Clear text area immediately
169
- transcription_text_area.empty()
170
  st.info("Recording... Click 'Stop Recording' to transcribe.")
171
  logger.info("Recording started.")
172
- st.rerun()
173
  else:
174
  st.error("Cannot start recording: Microphone not connected. Please allow microphone access.")
175
 
176
  if stop_button:
177
- if st.session_state.is_recording:
178
  st.session_state.is_recording = False
179
  st.info("Processing recording... Please wait.")
180
  logger.info("Recording stopped. Processing audio...")
181
 
 
182
  if st.session_state.audio_processor_instance:
183
  recorded_audio = st.session_state.audio_processor_instance.get_and_clear_buffered_audio()
184
 
185
  if len(recorded_audio) > 0:
 
186
  wav_file_buffer = io.BytesIO()
187
  audio_array = np.array(recorded_audio.get_array_of_samples())
188
  audio_array = audio_array.astype(np.float32)
189
  sf.write(wav_file_buffer, audio_array, recorded_audio.frame_rate, format='WAV', subtype='PCM_16')
190
- wav_file_buffer.seek(0)
191
 
 
192
  WHISPER_API_URL = "http://localhost:1990/transcribe_audio/"
193
  try:
194
  files = {'audio_file': ('recorded_audio.wav', wav_file_buffer, 'audio/wav')}
195
- response = requests.post(WHISPER_API_URL, files=files, timeout=120)
196
- response.raise_for_status()
197
  transcription_data = response.json()
198
  full_transcribed_text = transcription_data.get("transcription", "No transcription found.")
 
199
  st.session_state.transcribed_text = full_transcribed_text
200
 
201
  # --- Character-by-character display logic ---
@@ -230,7 +173,8 @@ def main():
230
  logger.warning("No audio recorded after stopping.")
231
  else:
232
  st.error("Audio processor instance not found. Please refresh the app and allow microphone access.")
233
- st.rerun()
 
234
 
235
 
236
  if __name__ == "__main__":
@@ -247,7 +191,8 @@ if __name__ == "__main__":
247
  st_webrtc_logger = logging.getLogger("streamlit_webrtc")
248
  st_webrtc_logger.setLevel(logging.DEBUG if DEBUG else logging.INFO)
249
 
250
- fsevents_logger = logging.getLogger("fsevents")
251
- fsevents_logger.setLevel(logging.WARNING)
 
252
 
253
  main()
 
1
  import logging
2
  import logging.handlers
 
3
  import time
 
4
  import os
 
 
5
  import io
6
  import soundfile as sf
7
  import requests
8
 
 
9
  import numpy as np
10
  import pydub
11
  import streamlit as st
12
  from twilio.rest import Client
13
 
14
+ from streamlit_webrtc import WebRtcMode, webrtc_streamer
15
+ from stt_module import AudioBufferProcessor # Import our custom processor
 
16
 
17
  logger = logging.getLogger(__name__)
18
 
 
25
  st.session_state.audio_processor_instance = None
26
 
27
 
28
+ # --- Utility Functions ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  @st.cache_data
30
  def get_ice_servers():
31
  """Fetches ICE servers for WebRTC connection."""
 
34
  auth_token = os.environ["TWILIO_AUTH_TOKEN"]
35
  except KeyError:
36
  logger.warning(
37
+ "Twilio credentials (TWILIO_ACCOUNT_SID, TWILIO_AUTH_TOKEN) are not set. "
38
+ "Falling back to a free STUN server from Google. "
39
+ "This might be less reliable for WebRTC connections."
40
  )
41
  return [{"urls": ["stun:stun.l.google.com:19302"]}]
42
 
 
45
  return token.ice_servers
46
 
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  def main():
49
  st.header("Whisper Speech-to-Text with Recording")
50
  st.markdown(
 
52
  Click "Start Recording" to begin capturing audio from your microphone.
53
  Click "Stop Recording" to end the capture, save the audio,
54
  and send it to the Whisper model for transcription.
55
+ The transcribed text will appear character by character below.
56
  """
57
  )
58
 
59
+ # Initialize the webrtc_streamer once.
60
  webrtc_ctx = webrtc_streamer(
61
  key="audio_recorder",
62
  mode=WebRtcMode.SENDONLY,
 
66
  async_processing=True
67
  )
68
 
69
+ # Store the audio_processor instance in session_state for later retrieval
70
  if webrtc_ctx.audio_processor and st.session_state.audio_processor_instance is None:
71
  st.session_state.audio_processor_instance = webrtc_ctx.audio_processor
72
 
73
+ # Display status of the WebRTC connection
74
  if webrtc_ctx.state.playing:
75
  st.success("Microphone connected. Ready to record.")
76
  else:
 
81
  col1, col2 = st.columns(2)
82
 
83
  with col1:
84
+ # Disable "Start Recording" if already recording or mic not connected
85
  start_button = st.button(
86
  "Start Recording",
87
+ key="start_rec_btn",
88
  disabled=st.session_state.is_recording or not webrtc_ctx.state.playing
89
  )
90
  with col2:
91
+ # Disable "Stop Recording" if not recording
92
  stop_button = st.button(
93
  "Stop Recording",
94
+ key="stop_rec_btn",
95
  disabled=not st.session_state.is_recording
96
  )
97
 
98
  # Placeholder for the animated text area
99
+ # Initialize it with current session state text
100
+ transcription_text_area = st.text_area("Transcription Result", value=st.session_state.transcribed_text, height=150, disabled=True)
101
+
102
 
103
+ # Logic for Start/Stop buttons
104
  if start_button:
105
  if webrtc_ctx.state.playing:
106
  st.session_state.is_recording = True
107
+ st.session_state.transcribed_text = "" # Clear previous text
108
+ transcription_text_area.empty() # Clear the display
 
109
  st.info("Recording... Click 'Stop Recording' to transcribe.")
110
  logger.info("Recording started.")
111
+ st.rerun() # Use st.rerun() to immediately update UI state
112
  else:
113
  st.error("Cannot start recording: Microphone not connected. Please allow microphone access.")
114
 
115
  if stop_button:
116
+ if st.session_state.is_recording: # Only process if recording was active
117
  st.session_state.is_recording = False
118
  st.info("Processing recording... Please wait.")
119
  logger.info("Recording stopped. Processing audio...")
120
 
121
+ # Retrieve all buffered audio from the processor instance
122
  if st.session_state.audio_processor_instance:
123
  recorded_audio = st.session_state.audio_processor_instance.get_and_clear_buffered_audio()
124
 
125
  if len(recorded_audio) > 0:
126
+ # Save the audio to an in-memory WAV file
127
  wav_file_buffer = io.BytesIO()
128
  audio_array = np.array(recorded_audio.get_array_of_samples())
129
  audio_array = audio_array.astype(np.float32)
130
  sf.write(wav_file_buffer, audio_array, recorded_audio.frame_rate, format='WAV', subtype='PCM_16')
131
+ wav_file_buffer.seek(0) # Rewind the buffer to the beginning
132
 
133
+ # Send the WAV file to the FastAPI Whisper endpoint
134
  WHISPER_API_URL = "http://localhost:1990/transcribe_audio/"
135
  try:
136
  files = {'audio_file': ('recorded_audio.wav', wav_file_buffer, 'audio/wav')}
137
+ response = requests.post(WHISPER_API_URL, files=files, timeout=120) # Increased timeout for transcription
138
+ response.raise_for_status() # Raise an exception for HTTP errors (4xx or 5xx)
139
  transcription_data = response.json()
140
  full_transcribed_text = transcription_data.get("transcription", "No transcription found.")
141
+
142
  st.session_state.transcribed_text = full_transcribed_text
143
 
144
  # --- Character-by-character display logic ---
 
173
  logger.warning("No audio recorded after stopping.")
174
  else:
175
  st.error("Audio processor instance not found. Please refresh the app and allow microphone access.")
176
+ # Trigger a rerun to update button states and display transcription
177
+ st.rerun()
178
 
179
 
180
  if __name__ == "__main__":
 
191
  st_webrtc_logger = logging.getLogger("streamlit_webrtc")
192
  st_webrtc_logger.setLevel(logging.DEBUG if DEBUG else logging.INFO)
193
 
194
+ # Removed fsevents logger as Pathlib is not explicitly imported or used as much here
195
+ # fsevents_logger = logging.getLogger("fsevents")
196
+ # fsevents_logger.setLevel(logging.WARNING)
197
 
198
  main()