Parimal Kalpande commited on
Commit
2c970f4
·
1 Parent(s): 2c4a6dc
DOCKERFILE CHANGED
@@ -4,20 +4,24 @@ FROM python:3.11-slim
4
  # Set the working directory in the container
5
  WORKDIR /app
6
 
7
- # Copy the requirements file into the container
8
- COPY requirements.txt .
9
-
10
- # Install any needed system dependencies (like for audio)
11
  RUN apt-get update && apt-get install -y --no-install-recommends \
12
  ffmpeg \
 
13
  && rm -rf /var/lib/apt/lists/*
14
 
 
 
 
15
  # Install the Python dependencies
16
  RUN pip install --no-cache-dir -r requirements.txt
17
 
18
  # Copy the rest of the application's code into the container
19
  COPY . .
20
 
 
 
 
21
  # Expose the port that Gradio runs on
22
  EXPOSE 7860
23
 
 
4
  # Set the working directory in the container
5
  WORKDIR /app
6
 
7
+ # Install system dependencies
 
 
 
8
  RUN apt-get update && apt-get install -y --no-install-recommends \
9
  ffmpeg \
10
+ git \
11
  && rm -rf /var/lib/apt/lists/*
12
 
13
+ # Copy the requirements file into the container
14
+ COPY requirements.txt .
15
+
16
  # Install the Python dependencies
17
  RUN pip install --no-cache-dir -r requirements.txt
18
 
19
  # Copy the rest of the application's code into the container
20
  COPY . .
21
 
22
+ # Create necessary directories
23
+ RUN mkdir -p uploads reports
24
+
25
  # Expose the port that Gradio runs on
26
  EXPOSE 7860
27
 
README_HF.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI Interview Coach - Hugging Face Spaces Deployment
2
+
3
+ This is an AI-powered interview coaching application that helps users practice for Product Manager interviews.
4
+
5
+ ## Features
6
+ - Interactive voice-based interview simulation
7
+ - Document upload for personalized questions
8
+ - Real-time feedback and evaluation
9
+ - PDF report generation
10
+
11
+ ## Setup for Hugging Face Spaces
12
+
13
+ ### Required Environment Variables
14
+ Add these secrets in your Hugging Face Space settings:
15
+
16
+ ```
17
+ GROQ_API_KEY=your_groq_api_key_here
18
+ ```
19
+
20
+ ### Known Limitations in HF Spaces
21
+ - TTS (Text-to-Speech) audio generation may be disabled due to system dependencies
22
+ - Some audio features might not work in the containerized environment
23
+
24
+ ## Local Development
25
+ To run locally, ensure you have:
26
+ 1. A valid GROQ API key set as environment variable
27
+ 2. All required system dependencies installed
28
+ 3. The voice model files in the `voice_model/` directory
app.py CHANGED
@@ -36,11 +36,18 @@ def start_interview(interview_type, doc_file, name, num_questions):
36
  initial_state["current_question_text"] = first_question
37
  greeting = f"Hello {initial_state['name']}. We'll go through {int(num_questions)} questions today. Here is your first question:"
38
  tts_prompt = f"{greeting} {first_question}"
39
- ai_voice_path = text_to_speech_file(tts_prompt)
 
 
 
 
 
 
 
40
  return {
41
  state: initial_state,
42
  chatbot: gr.update(value=[[None, f"{greeting}\n\n{first_question}"]]),
43
- audio_out: gr.update(value=ai_voice_path, autoplay=True),
44
  audio_in: gr.update(interactive=True),
45
  start_btn: gr.update(interactive=False)
46
  }
@@ -60,10 +67,17 @@ def handle_interview_turn(user_audio, chatbot_history, current_state):
60
  end_message = "This concludes the interview. Generating your final report now."
61
  chatbot_history.append([None, end_message])
62
  pdf_path = generate_pdf_file(current_state)
63
- ai_voice_path = text_to_speech_file(end_message)
 
 
 
 
 
 
 
64
  yield {
65
  chatbot: chatbot_history,
66
- audio_out: gr.update(value=ai_voice_path, autoplay=True),
67
  download_pdf_btn: gr.update(value=pdf_path, visible=True)
68
  }
69
  else:
@@ -73,11 +87,18 @@ def handle_interview_turn(user_audio, chatbot_history, current_state):
73
  q_num = current_state["current_question_num"]
74
  transition_message = f"Thank you. Here is question {q_num}:\n\n{next_question}"
75
  chatbot_history.append([None, transition_message])
76
- ai_voice_path = text_to_speech_file(transition_message)
 
 
 
 
 
 
 
77
  yield {
78
  state: current_state,
79
  chatbot: chatbot_history,
80
- audio_out: gr.update(value=ai_voice_path, autoplay=True),
81
  audio_in: gr.update(interactive=True)
82
  }
83
 
@@ -133,4 +154,9 @@ with gr.Blocks(theme=gr.themes.Default()) as app:
133
  if __name__ == "__main__":
134
  os.makedirs(config.UPLOAD_FOLDER, exist_ok=True)
135
  os.makedirs(config.REPORT_FOLDER, exist_ok=True)
136
- app.launch(debug=True)
 
 
 
 
 
 
36
  initial_state["current_question_text"] = first_question
37
  greeting = f"Hello {initial_state['name']}. We'll go through {int(num_questions)} questions today. Here is your first question:"
38
  tts_prompt = f"{greeting} {first_question}"
39
+
40
+ # Try to generate TTS audio, but don't fail if it's not available
41
+ try:
42
+ ai_voice_path = text_to_speech_file(tts_prompt)
43
+ except Exception as e:
44
+ print(f"TTS generation failed: {e}")
45
+ ai_voice_path = None
46
+
47
  return {
48
  state: initial_state,
49
  chatbot: gr.update(value=[[None, f"{greeting}\n\n{first_question}"]]),
50
+ audio_out: gr.update(value=ai_voice_path, autoplay=True if ai_voice_path else False),
51
  audio_in: gr.update(interactive=True),
52
  start_btn: gr.update(interactive=False)
53
  }
 
67
  end_message = "This concludes the interview. Generating your final report now."
68
  chatbot_history.append([None, end_message])
69
  pdf_path = generate_pdf_file(current_state)
70
+
71
+ # Try to generate TTS audio, but don't fail if it's not available
72
+ try:
73
+ ai_voice_path = text_to_speech_file(end_message)
74
+ except Exception as e:
75
+ print(f"TTS generation failed: {e}")
76
+ ai_voice_path = None
77
+
78
  yield {
79
  chatbot: chatbot_history,
80
+ audio_out: gr.update(value=ai_voice_path, autoplay=True if ai_voice_path else False),
81
  download_pdf_btn: gr.update(value=pdf_path, visible=True)
82
  }
83
  else:
 
87
  q_num = current_state["current_question_num"]
88
  transition_message = f"Thank you. Here is question {q_num}:\n\n{next_question}"
89
  chatbot_history.append([None, transition_message])
90
+
91
+ # Try to generate TTS audio, but don't fail if it's not available
92
+ try:
93
+ ai_voice_path = text_to_speech_file(transition_message)
94
+ except Exception as e:
95
+ print(f"TTS generation failed: {e}")
96
+ ai_voice_path = None
97
+
98
  yield {
99
  state: current_state,
100
  chatbot: chatbot_history,
101
+ audio_out: gr.update(value=ai_voice_path, autoplay=True if ai_voice_path else False),
102
  audio_in: gr.update(interactive=True)
103
  }
104
 
 
154
  if __name__ == "__main__":
155
  os.makedirs(config.UPLOAD_FOLDER, exist_ok=True)
156
  os.makedirs(config.REPORT_FOLDER, exist_ok=True)
157
+ # Configure for Hugging Face Spaces deployment
158
+ app.launch(
159
+ server_name="0.0.0.0",
160
+ server_port=7860,
161
+ share=False
162
+ )
check_env.py ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Environment validation script for AI Interview Coach
4
+ Run this before deploying to check for common issues
5
+ """
6
+
7
+ import os
8
+ import sys
9
+
10
+ def check_environment():
11
+ """Check if the environment is properly configured"""
12
+ issues = []
13
+
14
+ # Check for required environment variables
15
+ if not os.environ.get("GROQ_API_KEY"):
16
+ issues.append("❌ GROQ_API_KEY environment variable is not set")
17
+ else:
18
+ print("✅ GROQ_API_KEY is set")
19
+
20
+ # Check for required directories
21
+ required_dirs = ['uploads', 'reports']
22
+ for dir_name in required_dirs:
23
+ if not os.path.exists(dir_name):
24
+ issues.append(f"❌ Directory '{dir_name}' does not exist")
25
+ else:
26
+ print(f"✅ Directory '{dir_name}' exists")
27
+
28
+ # Check for voice model file
29
+ voice_model_path = './voice_model/en_US-lessac-medium.onnx'
30
+ if not os.path.exists(voice_model_path):
31
+ issues.append(f"⚠️ Voice model file not found at {voice_model_path} (TTS will be disabled)")
32
+ else:
33
+ print("✅ Voice model file found")
34
+
35
+ # Try to import critical modules
36
+ try:
37
+ import gradio
38
+ print("✅ Gradio imported successfully")
39
+ except ImportError:
40
+ issues.append("❌ Gradio not installed")
41
+
42
+ try:
43
+ from groq import Groq
44
+ print("✅ Groq imported successfully")
45
+ except ImportError:
46
+ issues.append("❌ Groq not installed")
47
+
48
+ # Summary
49
+ if issues:
50
+ print("\n🚨 Issues found:")
51
+ for issue in issues:
52
+ print(f" {issue}")
53
+ print(f"\nFound {len(issues)} issue(s) that need to be addressed.")
54
+ return False
55
+ else:
56
+ print("\n🎉 All checks passed! Ready for deployment.")
57
+ return True
58
+
59
+ if __name__ == "__main__":
60
+ success = check_environment()
61
+ sys.exit(0 if success else 1)
config.py CHANGED
@@ -8,7 +8,7 @@ OLLAMA_MODEL = 'llama3.1'
8
  INTERVIEW_TYPES = ['Product Sense', 'Technical', 'General Product Interview', 'Group Discussion (GD)', 'Root case analysis']
9
 
10
  # -- Piper TTS Configuration --
11
- PIPER_VOICE_MODEL = './voice_model/en_US-lessac-medium.onnx'
12
 
13
  # -- Directories --
14
  UPLOAD_FOLDER = 'uploads'
 
8
  INTERVIEW_TYPES = ['Product Sense', 'Technical', 'General Product Interview', 'Group Discussion (GD)', 'Root case analysis']
9
 
10
  # -- Piper TTS Configuration --
11
+ PIPER_VOICE_MODEL = os.path.join(os.path.dirname(__file__), 'voice_model', 'en_US-lessac-medium.onnx')
12
 
13
  # -- Directories --
14
  UPLOAD_FOLDER = 'uploads'
modules/llm_handler.py CHANGED
@@ -6,7 +6,11 @@ from groq import Groq
6
  from modules.web_search import search_for_example_answers
7
 
8
  # Initialize the Groq client
9
- client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
 
 
 
10
  MODEL = "llama3-70b-8192" # Use the more powerful 70B model for detailed analysis
11
 
12
  def generate_question(interview_type, document_text):
 
6
  from modules.web_search import search_for_example_answers
7
 
8
  # Initialize the Groq client
9
+ groq_api_key = os.environ.get("GROQ_API_KEY")
10
+ if not groq_api_key:
11
+ raise ValueError("GROQ_API_KEY environment variable is required but not set")
12
+
13
+ client = Groq(api_key=groq_api_key)
14
  MODEL = "llama3-70b-8192" # Use the more powerful 70B model for detailed analysis
15
 
16
  def generate_question(interview_type, document_text):
modules/stt_handler.py CHANGED
@@ -12,18 +12,22 @@ def transcribe_audio(audio_filepath):
12
  with sr.AudioFile(audio_filepath) as source:
13
  audio_data = recognizer.record(source)
14
  print("Transcribing with Whisper...")
 
15
  text = recognizer.recognize_whisper(audio_data, language="english")
16
  print(f"User transcribed as: {text}")
17
  return text
18
  except sr.UnknownValueError:
19
  print("STT Error: Whisper could not understand the audio.")
20
- return "[Could not understand audio]"
21
  except sr.RequestError as e:
22
  print(f"STT Error: Could not request results from Whisper service; {e}")
23
- return f"[Transcription error: Whisper service issue - {e}]"
 
 
 
24
  except Exception as e:
25
  print(f"STT Error: An unexpected error occurred during transcription: {e}")
26
- return f"[Transcription error: {e}]"
27
  finally:
28
  if os.path.exists(audio_filepath):
29
  try:
 
12
  with sr.AudioFile(audio_filepath) as source:
13
  audio_data = recognizer.record(source)
14
  print("Transcribing with Whisper...")
15
+ # Use Whisper for transcription
16
  text = recognizer.recognize_whisper(audio_data, language="english")
17
  print(f"User transcribed as: {text}")
18
  return text
19
  except sr.UnknownValueError:
20
  print("STT Error: Whisper could not understand the audio.")
21
+ return "[Could not understand audio - please try speaking more clearly]"
22
  except sr.RequestError as e:
23
  print(f"STT Error: Could not request results from Whisper service; {e}")
24
+ return "[Transcription service temporarily unavailable - please try again]"
25
+ except ImportError as e:
26
+ print(f"STT Error: Whisper not available: {e}")
27
+ return "[Speech recognition not available in this environment]"
28
  except Exception as e:
29
  print(f"STT Error: An unexpected error occurred during transcription: {e}")
30
+ return f"[Transcription error: Please try again]"
31
  finally:
32
  if os.path.exists(audio_filepath):
33
  try:
modules/tts_handler.py CHANGED
@@ -9,8 +9,14 @@ import tempfile
9
 
10
  def text_to_speech_file(text_to_speak):
11
  print(f"AI generating audio for: {text_to_speak}")
12
- piper_executable = 'piper' # Use system PATH
 
 
13
  try:
 
 
 
 
14
  with tempfile.NamedTemporaryFile(delete=False, suffix=".raw") as raw_file:
15
  raw_filename = raw_file.name
16
 
@@ -27,5 +33,6 @@ def text_to_speech_file(text_to_speak):
27
  os.remove(raw_filename)
28
  return wav_filename
29
  except Exception as e:
30
- print(f"An error occurred during TTS generation: {e}")
 
31
  return None
 
9
 
10
  def text_to_speech_file(text_to_speak):
11
  print(f"AI generating audio for: {text_to_speak}")
12
+
13
+ # For Hugging Face Spaces deployment, we'll disable TTS audio generation
14
+ # since piper-tts requires system dependencies that may not be available
15
  try:
16
+ # Check if piper executable exists
17
+ piper_executable = 'piper'
18
+
19
+ # Try to use piper if available, otherwise skip audio generation
20
  with tempfile.NamedTemporaryFile(delete=False, suffix=".raw") as raw_file:
21
  raw_filename = raw_file.name
22
 
 
33
  os.remove(raw_filename)
34
  return wav_filename
35
  except Exception as e:
36
+ print(f"TTS not available in this environment: {e}")
37
+ # Return None to disable audio playback in deployment
38
  return None
requirements.txt CHANGED
@@ -1,16 +1,16 @@
1
- ollama
2
  openai-whisper
3
- gradio
4
  pydub
5
  soundfile
6
- pyaudio
7
- piper-tts
8
  PyMuPDF
9
  python-docx
10
  reportlab
11
  speechrecognition
12
  duckduckgo-search
13
- ddgs
14
  matplotlib
15
  regex
16
- groq
 
 
 
 
 
1
+ gradio==4.44.0
2
  openai-whisper
 
3
  pydub
4
  soundfile
 
 
5
  PyMuPDF
6
  python-docx
7
  reportlab
8
  speechrecognition
9
  duckduckgo-search
 
10
  matplotlib
11
  regex
12
+ groq
13
+ # Removed problematic dependencies for HF Spaces:
14
+ # - ollama (local service, not available in HF Spaces)
15
+ # - pyaudio (often causes build issues)
16
+ # - piper-tts (system dependencies issues)