Spaces:

Viper51
/

Interviewer.ai

Sleeping

App Files Files Community

Viper51 commited on Nov 2, 2025

Commit

f97a556

verified ·

1 Parent(s): 9aaa036

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +59 -21

src/streamlit_app.py CHANGED Viewed

@@ -24,6 +24,10 @@ try:
     from gtts import gTTS
 except Exception as e:
     st.error(f"error importing gtts: {e}")
 # --- Pydantic Models (from your code) ---
 class questions(BaseModel):
@@ -171,35 +175,68 @@ If a good followup question can be asked generate it but only if it is a genuine
 import io # Make sure 'import io' is at the top of your file
 def text_to_speech_and_display(text, autoplay=True):
-    """Converts text to speech, displays text, and plays audio."""
     if not text:
         return
     try:
-        # Display the caption
         if 'chat_history' not in st.session_state:
             st.session_state.chat_history = []
         st.session_state.chat_history.append(f"**Interviewer:** {text}")
-        # Generate audio if gTTS available
-        if gTTS is None:
-            st.warning("gTTS library not found, audio playback is disabled.")
-            return
-        tts = gTTS(text=text, lang='en', slow=False)
-        audio_fp = io.BytesIO()
-        tts.write_to_fp(audio_fp)
-        audio_fp.seek(0)
-        # Display audio player
-        st.audio(audio_fp, format='audio/mp3', autoplay=autoplay)
     except Exception as e:
-        # This will catch the 403 error
-        st.error(f"Error in text-to-speech (gTTS): {e}")
-        st.info("This often happens because gTTS is blocked on servers like Hugging Face.")
-# --- DELETED speech_to_text function ---
 # We are replacing it with a text_input
@@ -314,18 +351,19 @@ elif st.session_state.stage == 'processing_resume':
             st.warning("AI Calls are in TEST MODE.")
             # 1. Generate DUMMY Questions
-            st.session_state.questions = generate_questions_from_resume(st.session_state.resume_text, gen_q_model)
             # 2. Get DUMMY AI Introduction
             intro_output = get_introduction(intro_model)
-            st.session_state.current_question = intro_output['question']
             # 3. Move to next stage and display intro
             st.session_state.stage = 'awaiting_intro'
-            text_to_speech_and_display(intro_output['intro'])
-            text_to_speech_and_display(intro_output['question'])
             # Clean up the resume text from session state
             if 'resume_text' in st.session_state:

     from gtts import gTTS
 except Exception as e:
     st.error(f"error importing gtts: {e}")
+try:
+    from google.cloud import texttospeech
+except Exception:
+    texttospeech = None
 # --- Pydantic Models (from your code) ---
 class questions(BaseModel):
 import io # Make sure 'import io' is at the top of your file
+# --- REPLACED: Official Google Cloud TTS Function ---
+@st.cache_data
+def synthesize_speech(text):
+    """
+    Synthesizes speech from the given text using Google Cloud TTS
+    and returns the audio content as bytes.
+    """
+    if texttospeech is None:
+        st.warning("google-cloud-texttospeech library not found, audio playback is disabled.")
+        return None
+    # Instantiates a client
+    client = texttospeech.TextToSpeechClient()
+    # Set the text input to be synthesized
+    synthesis_input = texttospeech.SynthesisInput(text=text)
+    # Build the voice request
+    voice = texttospeech.VoiceSelectionParams(
+        language_code="en-US", ssml_gender=texttospeech.SsmlVoiceGender.NEUTRAL
+    )
+    # Select the type of audio file you want
+    audio_config = texttospeech.AudioConfig(
+        audio_encoding=texttospeech.AudioEncoding.MP3
+    )
+    # Perform the text-to-speech request
+    response = client.synthesize_speech(
+        input=synthesis_input, voice=voice, audio_config=audio_config
+    )
+    return response.audio_content
 def text_to_speech_and_display(text, autoplay=True):
+    """
+    Displays the text and plays the synthesized audio.
+    """
     if not text:
         return
     try:
+        # 1. Display the caption in chat
         if 'chat_history' not in st.session_state:
             st.session_state.chat_history = []
         st.session_state.chat_history.append(f"**Interviewer:** {text}")
+        # 2. Synthesize speech
+        audio_content = synthesize_speech(text)
+        # 3. Display audio player
+        if audio_content:
+            st.audio(audio_content, format='audio/mp3', autoplay=autoplay)
+        else:
+            st.info("Audio generation is disabled or failed.")
     except Exception as e:
+        # This will catch any API errors (like 403, 404, etc.)
+        st.error(f"Error during text-to-speech: {e}")
+        st.info("This usually means the 'Cloud Text-to-Speech API' is not enabled in your Google Cloud project.")
+# --- END OF REPLACEMENT ---
 # We are replacing it with a text_input
             st.warning("AI Calls are in TEST MODE.")
             # 1. Generate DUMMY Questions
+            resume_text = st.session_state.resume_text
+            st.session_state.questions = generate_questions_from_resume(resume_text, gen_q_model)
             # 2. Get DUMMY AI Introduction
             intro_output = get_introduction(intro_model)
+            st.session_state.current_question = intro_output.question
             # 3. Move to next stage and display intro
             st.session_state.stage = 'awaiting_intro'
+            text_to_speech_and_display(intro_output.intro)
+            text_to_speech_and_display(intro_output.question)
             # Clean up the resume text from session state
             if 'resume_text' in st.session_state: