Spaces:
Sleeping
Sleeping
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +78 -10
src/streamlit_app.py
CHANGED
|
@@ -21,13 +21,18 @@ from pydantic import BaseModel, Field
|
|
| 21 |
from typing import Optional
|
| 22 |
import os
|
| 23 |
try:
|
| 24 |
-
from
|
| 25 |
-
except Exception
|
| 26 |
-
|
| 27 |
try:
|
| 28 |
from google.cloud import texttospeech
|
| 29 |
except Exception:
|
| 30 |
texttospeech = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
# --- Pydantic Models (from your code) ---
|
| 32 |
|
| 33 |
class questions(BaseModel):
|
|
@@ -174,6 +179,54 @@ If a good followup question can be asked generate it but only if it is a genuine
|
|
| 174 |
# --- MODIFIED Streamlit Audio/Visual Function ---
|
| 175 |
|
| 176 |
import io # Make sure 'import io' is at the top of your file
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
# --- REPLACED: Official Google Cloud TTS Function ---
|
| 179 |
|
|
@@ -404,14 +457,29 @@ if st.session_state.stage not in ['start', 'processing_resume']:
|
|
| 404 |
# --- REPLACEMENT: Text Input Area ---
|
| 405 |
user_text = None # Initialize user_text
|
| 406 |
is_disabled = (st.session_state.stage == 'finished')
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
|
| 412 |
-
if
|
| 413 |
-
|
| 414 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
# --- END OF REPLACEMENT ---
|
| 416 |
|
| 417 |
|
|
|
|
| 21 |
from typing import Optional
|
| 22 |
import os
|
| 23 |
try:
|
| 24 |
+
from google.cloud import speech
|
| 25 |
+
except Exception:
|
| 26 |
+
speech = None
|
| 27 |
try:
|
| 28 |
from google.cloud import texttospeech
|
| 29 |
except Exception:
|
| 30 |
texttospeech = None
|
| 31 |
+
try:
|
| 32 |
+
from streamlit_mic_recorder import mic_recorder
|
| 33 |
+
except Exception:
|
| 34 |
+
mic_recorder = None
|
| 35 |
+
|
| 36 |
# --- Pydantic Models (from your code) ---
|
| 37 |
|
| 38 |
class questions(BaseModel):
|
|
|
|
| 179 |
# --- MODIFIED Streamlit Audio/Visual Function ---
|
| 180 |
|
| 181 |
import io # Make sure 'import io' is at the top of your file
|
| 182 |
+
@st.cache_data
|
| 183 |
+
def speech_to_text(audio_bytes):
|
| 184 |
+
"""
|
| 185 |
+
Transcribes audio bytes using Google Cloud Speech-to-Text
|
| 186 |
+
and returns the transcribed text.
|
| 187 |
+
"""
|
| 188 |
+
if speech is None:
|
| 189 |
+
st.warning("google-cloud-speech library not found, transcription is disabled.")
|
| 190 |
+
return None
|
| 191 |
+
|
| 192 |
+
# Get the API key from the environment (where HF secrets put it)
|
| 193 |
+
api_key = os.environ.get("GOOGLE_API_KEY")
|
| 194 |
+
|
| 195 |
+
# Check if the key exists
|
| 196 |
+
if not api_key:
|
| 197 |
+
st.error("GOOGLE_API_KEY not found in secrets. Cannot initialize STT.")
|
| 198 |
+
return None
|
| 199 |
+
|
| 200 |
+
# Pass the key explicitly to the client
|
| 201 |
+
client_options = {"api_key": api_key}
|
| 202 |
+
client = speech.SpeechClient(client_options=client_options)
|
| 203 |
+
|
| 204 |
+
# Configure the audio
|
| 205 |
+
# Note: streamlit-mic-recorder outputs WAV, which is LINEAR16
|
| 206 |
+
audio = speech.RecognitionAudio(content=audio_bytes)
|
| 207 |
+
config = speech.RecognitionConfig(
|
| 208 |
+
encoding=speech.RecognitionConfig.AudioEncoding.LINEAR16,
|
| 209 |
+
language_code="en-US",
|
| 210 |
+
sample_rate_hertz=16000 # This is a common sample rate
|
| 211 |
+
)
|
| 212 |
+
|
| 213 |
+
try:
|
| 214 |
+
# Detects speech in the audio file
|
| 215 |
+
st.info("Transcribing audio... (this may take a moment)")
|
| 216 |
+
response = client.recognize(config=config, audio=audio)
|
| 217 |
+
|
| 218 |
+
if response.results:
|
| 219 |
+
transcript = response.results[0].alternatives[0].transcript
|
| 220 |
+
st.session_state.chat_history.append(f"**You:** {transcript}")
|
| 221 |
+
return transcript
|
| 222 |
+
else:
|
| 223 |
+
st.warning("Could not understand audio.")
|
| 224 |
+
return None
|
| 225 |
+
|
| 226 |
+
except Exception as e:
|
| 227 |
+
st.error(f"Error during speech-to-text: {e}")
|
| 228 |
+
st.info("This usually means the 'Cloud Speech-to-Text API' is not enabled or your mic is not outputting the correct audio format.")
|
| 229 |
+
return None
|
| 230 |
|
| 231 |
# --- REPLACED: Official Google Cloud TTS Function ---
|
| 232 |
|
|
|
|
| 457 |
# --- REPLACEMENT: Text Input Area ---
|
| 458 |
user_text = None # Initialize user_text
|
| 459 |
is_disabled = (st.session_state.stage == 'finished')
|
| 460 |
+
|
| 461 |
+
if mic_recorder is None:
|
| 462 |
+
st.error("streamlit_mic_recorder library failed to import. Voice input is disabled.")
|
| 463 |
+
st.info("Please add 'streamlit-mic-recorder' to your requirements.txt")
|
| 464 |
+
|
| 465 |
+
elif is_disabled:
|
| 466 |
+
st.info("Interview is finished. Start a new interview to speak.")
|
| 467 |
+
|
| 468 |
+
else:
|
| 469 |
+
st.write("Your turn to speak:")
|
| 470 |
+
audio_bytes_dict = mic_recorder(
|
| 471 |
+
start_prompt="Start Recording ⏺️",
|
| 472 |
+
stop_prompt="Stop Recording ⏹️",
|
| 473 |
+
key='recorder'
|
| 474 |
+
)
|
| 475 |
|
| 476 |
+
if audio_bytes_dict:
|
| 477 |
+
# The component returns a dictionary, get the bytes
|
| 478 |
+
audio_bytes = audio_bytes_dict['bytes']
|
| 479 |
+
|
| 480 |
+
with st.spinner("Transcribing your answer..."):
|
| 481 |
+
# Use our NEW Google Cloud STT function
|
| 482 |
+
user_text = speech_to_text(audio_bytes)
|
| 483 |
# --- END OF REPLACEMENT ---
|
| 484 |
|
| 485 |
|