Viper51 commited on
Commit
00a48da
·
verified ·
1 Parent(s): 5ebc805

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +78 -10
src/streamlit_app.py CHANGED
@@ -21,13 +21,18 @@ from pydantic import BaseModel, Field
21
  from typing import Optional
22
  import os
23
  try:
24
- from gtts import gTTS
25
- except Exception as e:
26
- st.error(f"error importing gtts: {e}")
27
  try:
28
  from google.cloud import texttospeech
29
  except Exception:
30
  texttospeech = None
 
 
 
 
 
31
  # --- Pydantic Models (from your code) ---
32
 
33
  class questions(BaseModel):
@@ -174,6 +179,54 @@ If a good followup question can be asked generate it but only if it is a genuine
174
  # --- MODIFIED Streamlit Audio/Visual Function ---
175
 
176
  import io # Make sure 'import io' is at the top of your file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
177
 
178
  # --- REPLACED: Official Google Cloud TTS Function ---
179
 
@@ -404,14 +457,29 @@ if st.session_state.stage not in ['start', 'processing_resume']:
404
  # --- REPLACEMENT: Text Input Area ---
405
  user_text = None # Initialize user_text
406
  is_disabled = (st.session_state.stage == 'finished')
407
-
408
- with st.form(key="answer_form", clear_on_submit=True):
409
- answer = st.text_input("Your answer:", disabled=is_disabled)
410
- submit_button = st.form_submit_button(label="Submit Answer", disabled=is_disabled)
 
 
 
 
 
 
 
 
 
 
 
411
 
412
- if submit_button and answer:
413
- user_text = answer
414
- st.session_state.chat_history.append(f"**You:** {user_text}")
 
 
 
 
415
  # --- END OF REPLACEMENT ---
416
 
417
 
 
21
  from typing import Optional
22
  import os
23
  try:
24
+ from google.cloud import speech
25
+ except Exception:
26
+ speech = None
27
  try:
28
  from google.cloud import texttospeech
29
  except Exception:
30
  texttospeech = None
31
+ try:
32
+ from streamlit_mic_recorder import mic_recorder
33
+ except Exception:
34
+ mic_recorder = None
35
+
36
  # --- Pydantic Models (from your code) ---
37
 
38
  class questions(BaseModel):
 
179
  # --- MODIFIED Streamlit Audio/Visual Function ---
180
 
181
  import io # Make sure 'import io' is at the top of your file
182
+ @st.cache_data
183
+ def speech_to_text(audio_bytes):
184
+ """
185
+ Transcribes audio bytes using Google Cloud Speech-to-Text
186
+ and returns the transcribed text.
187
+ """
188
+ if speech is None:
189
+ st.warning("google-cloud-speech library not found, transcription is disabled.")
190
+ return None
191
+
192
+ # Get the API key from the environment (where HF secrets put it)
193
+ api_key = os.environ.get("GOOGLE_API_KEY")
194
+
195
+ # Check if the key exists
196
+ if not api_key:
197
+ st.error("GOOGLE_API_KEY not found in secrets. Cannot initialize STT.")
198
+ return None
199
+
200
+ # Pass the key explicitly to the client
201
+ client_options = {"api_key": api_key}
202
+ client = speech.SpeechClient(client_options=client_options)
203
+
204
+ # Configure the audio
205
+ # Note: streamlit-mic-recorder outputs WAV, which is LINEAR16
206
+ audio = speech.RecognitionAudio(content=audio_bytes)
207
+ config = speech.RecognitionConfig(
208
+ encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
209
+ language_code="en-US",
210
+ sample_rate_hertz=16000 # This is a common sample rate
211
+ )
212
+
213
+ try:
214
+ # Detects speech in the audio file
215
+ st.info("Transcribing audio... (this may take a moment)")
216
+ response = client.recognize(config=config, audio=audio)
217
+
218
+ if response.results:
219
+ transcript = response.results[0].alternatives[0].transcript
220
+ st.session_state.chat_history.append(f"**You:** {transcript}")
221
+ return transcript
222
+ else:
223
+ st.warning("Could not understand audio.")
224
+ return None
225
+
226
+ except Exception as e:
227
+ st.error(f"Error during speech-to-text: {e}")
228
+ st.info("This usually means the 'Cloud Speech-to-Text API' is not enabled or your mic is not outputting the correct audio format.")
229
+ return None
230
 
231
  # --- REPLACED: Official Google Cloud TTS Function ---
232
 
 
457
  # --- REPLACEMENT: Text Input Area ---
458
  user_text = None # Initialize user_text
459
  is_disabled = (st.session_state.stage == 'finished')
460
+
461
+ if mic_recorder is None:
462
+ st.error("streamlit_mic_recorder library failed to import. Voice input is disabled.")
463
+ st.info("Please add 'streamlit-mic-recorder' to your requirements.txt")
464
+
465
+ elif is_disabled:
466
+ st.info("Interview is finished. Start a new interview to speak.")
467
+
468
+ else:
469
+ st.write("Your turn to speak:")
470
+ audio_bytes_dict = mic_recorder(
471
+ start_prompt="Start Recording ⏺️",
472
+ stop_prompt="Stop Recording ⏹️",
473
+ key='recorder'
474
+ )
475
 
476
+ if audio_bytes_dict:
477
+ # The component returns a dictionary, get the bytes
478
+ audio_bytes = audio_bytes_dict['bytes']
479
+
480
+ with st.spinner("Transcribing your answer..."):
481
+ # Use our NEW Google Cloud STT function
482
+ user_text = speech_to_text(audio_bytes)
483
  # --- END OF REPLACEMENT ---
484
 
485