APPLE commited on
Commit
994c209
·
1 Parent(s): bc917e5

add LLM fallback chain

Browse files
Files changed (1) hide show
  1. src/main.py +230 -145
src/main.py CHANGED
@@ -12,7 +12,14 @@ from src.rag import get_context
12
  load_dotenv()
13
 
14
  app = FastAPI()
15
- client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 
 
 
 
 
 
 
16
 
17
  app.add_middleware(
18
  CORSMiddleware,
@@ -22,7 +29,7 @@ app.add_middleware(
22
  )
23
 
24
  request_counts = defaultdict(list)
25
- RATE_LIMIT = 20
26
  RATE_WINDOW = 60
27
 
28
  def check_rate_limit(ip: str):
@@ -34,11 +41,168 @@ def check_rate_limit(ip: str):
34
 
35
  interview_sessions = {}
36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
  SKILL_PROMPTS = {
38
  "dsa": """You are a senior DSA interviewer. Structure the interview exactly as follows:
39
 
40
  PHASE 1 - INTRODUCTION (first 2 exchanges):
41
- - Warmly greet the candidate
42
  - Ask them to introduce themselves and their background
43
  - Ask about their experience with data structures and algorithms
44
 
@@ -46,100 +210,81 @@ PHASE 2 - CODING PROBLEMS (next 4-5 exchanges):
46
  - Start with an easy problem, then medium, then hard
47
  - ALWAYS first ask the candidate to explain their approach before coding
48
  - If the approach is correct, say "Great approach! Now go ahead and code it in the editor."
49
- - If the approach is unclear or wrong, ask probing questions: "What is the time complexity?", "Can you think of a more optimal approach?", "What about edge cases?"
50
  - Do NOT let them code until the approach is solid
51
  - After they submit code, review it: check correctness, complexity, edge cases
52
  - Ask follow-up optimization questions
53
 
54
  PHASE 3 - BEHAVIORAL (last 2-3 exchanges):
55
  - Ask about a challenging technical problem they solved
56
- - Ask about working under pressure or tight deadlines
57
  - Wrap up and give brief honest feedback
58
 
59
- Keep each response to 2-3 sentences. Be encouraging but rigorous. Ask one thing at a time.""",
60
 
61
- "system_design": """You are a principal engineer conducting a system design interview. Structure as follows:
62
 
63
  PHASE 1 - INTRODUCTION (first 2 exchanges):
64
- - Greet the candidate warmly
65
- - Ask them to introduce themselves and describe the most complex system they've built
66
- - Understand their background and scale of systems they've worked with
67
-
68
- PHASE 2 - SYSTEM DESIGN PROBLEMS (next 5-6 exchanges):
69
- - Give a design problem (e.g., design Twitter feed, URL shortener, chat system)
70
- - Guide through: requirements clarification high level design → deep dive → scaling
71
- - Probe on: database choices, caching strategies, load balancing, fault tolerance
72
- - Ask "why" for every major decision they make
73
- - Challenge their assumptions with scale questions
74
-
75
- PHASE 3 - BEHAVIORAL & SITUATIONAL (last 2-3 exchanges):
76
- - Ask about a time they had to make a difficult technical trade-off
77
- - Ask about dealing with system outages or failures
78
  - Close with feedback
79
 
80
  Keep responses to 2-3 sentences. Ask one question at a time.""",
81
 
82
- "behavioral": """You are an experienced HR and behavioral interviewer. Structure as follows:
83
 
84
  PHASE 1 - INTRODUCTION (first 2 exchanges):
85
- - Warmly welcome the candidate
86
- - Ask them to walk you through their background and career journey
87
- - Ask what motivated them to pursue this type of role
88
-
89
- PHASE 2 - BEHAVIORAL DEEP DIVE (next 5-6 exchanges):
90
- - Use STAR method questions: Situation, Task, Action, Result
91
- - Cover: leadership, conflict resolution, failure & learning, teamwork, innovation
92
- - Probe deeper when answers are vague: "What was YOUR specific contribution?", "What would you do differently?"
93
- - Ask situational questions: "If your team disagreed with your technical decision, how would you handle it?"
94
- - Ask about culture fit: values, work style, collaboration preferences
95
-
96
- PHASE 3 - ROLE FIT & CLOSE (last 2 exchanges):
97
- - Ask about their career goals and how this role fits
98
- - Give them a chance to ask questions, then close with feedback
99
-
100
- Keep responses to 2-3 sentences. Be empathetic but thorough. Ask one question at a time.""",
101
  }
102
 
103
  def build_company_prompt(company: str, role: str, context: str) -> str:
104
- return f"""You are a senior {role} interviewer at {company}. You have access to real interview experiences from {company} candidates below. Use these to mirror the exact interview style, question types, difficulty, and culture of {company}.
105
 
106
  --- REAL {company.upper()} INTERVIEW EXPERIENCES ---
107
  {context}
108
  --- END ---
109
 
110
- Structure this interview EXACTLY as follows:
111
-
112
  PHASE 1 - INTRODUCTION & RESUME (exchanges 1-3):
113
- - Warmly greet the candidate as a {company} interviewer. Introduce yourself as "Rohan" — a senior {role} interviewer at {company}. Sound human and natural, not robotic.
114
- - Ask them to introduce themselves
115
- - Ask resume-based questions specific to {role} at {company}: past projects, tech stack, scale of systems built
116
- - Ask why they want to join {company} specifically — probe for genuine motivation
117
- - Reference {company}'s culture and values naturally in conversation
118
 
119
  PHASE 2 - CODING ROUNDS (exchanges 4-8):
120
- - Ask problems that {company} is KNOWN to ask for {role} (use the experiences above for reference)
121
- - ALWAYS ask for the approach FIRST before any coding: "Walk me through your approach before you start coding."
122
- - If approach is correct and clear: "Good thinking. Go ahead and implement it in the code editor."
123
- - If approach is vague or suboptimal: keep probing — "What's the time complexity?", "Can we do better?", "What about edge cases?" — do NOT proceed to coding until the approach is solid
124
- - After code is submitted: review for correctness, complexity, style, edge cases
125
- - Ask at least one follow-up: "Can you optimize this further?" or "How would this behave with 1 billion inputs?"
126
 
127
  PHASE 3 - BEHAVIORAL & FIT (exchanges 9-11):
128
- - Ask behavioral questions that {company} specifically focuses on (e.g., Amazon = Leadership Principles, Microsoft = growth mindset, collaboration)
129
- - Ask situational questions relevant to {role}: "Tell me about a time you disagreed with your manager on a technical decision."
130
- - Ask role-specific scenarios: on-call incidents, cross-team collaboration, shipping under pressure
131
- - Assess culture fit for {company} specifically
132
 
133
  PHASE 4 - CLOSE (exchange 12):
134
- - Ask if the candidate has questions
135
- - Give honest, constructive feedback like a real {company} interviewer would
136
- - Mention next steps as {company} would
137
 
138
- Rules:
139
- - Keep each response to 2-3 sentences max (they will be spoken aloud)
140
- - Ask ONE thing at a time
141
- - Be rigorous but encouraging — match {company}'s interview culture
142
- - Always reference the real experiences above when choosing questions"""
143
 
144
 
145
  @app.post("/api/start-interview")
@@ -149,52 +294,40 @@ async def start_interview(request: Request):
149
  mode = body.get("mode", "behavioral")
150
  company = (body.get("company") or "").lower()
151
  role = body.get("role", "SDE")
 
152
 
153
  session_id = str(time.time())
154
 
155
  if mode == "company" and company:
156
- context = get_context(company, f"{role} interview questions experience {company} coding behavioral", n=8)
157
  system_prompt = build_company_prompt(company.capitalize(), role, context)
158
  else:
159
  system_prompt = SKILL_PROMPTS.get(mode, SKILL_PROMPTS["behavioral"])
160
 
 
 
 
161
  interview_sessions[session_id] = {
162
  "messages": [{"role": "system", "content": system_prompt}],
163
  "mode": mode,
164
  "company": company,
165
  "role": role,
166
- "exchange_count": 0
 
 
167
  }
168
 
169
- response = client.chat.completions.create(
170
- model="llama-3.3-70b-versatile",
171
- messages=interview_sessions[session_id]["messages"],
172
- max_tokens=150
173
- )
174
- ai_message = response.choices[0].message.content
175
  interview_sessions[session_id]["messages"].append({"role": "assistant", "content": ai_message})
176
-
177
  return {"session_id": session_id, "message": ai_message}
178
 
179
 
180
  @app.post("/api/transcribe")
181
  async def transcribe_audio(request: Request, audio: UploadFile = File(...)):
182
  check_rate_limit(request.client.host)
183
- with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
184
- content = await audio.read()
185
- tmp.write(content)
186
- tmp_path = tmp.name
187
- try:
188
- with open(tmp_path, "rb") as f:
189
- transcription = client.audio.transcriptions.create(
190
- file=(os.path.basename(tmp_path), f.read()),
191
- model="whisper-large-v3-turbo",
192
- response_format="text",
193
- language="en"
194
- )
195
- return {"text": transcription}
196
- finally:
197
- os.unlink(tmp_path)
198
 
199
 
200
  @app.post("/api/respond")
@@ -211,7 +344,7 @@ async def get_response(request: Request):
211
  session = interview_sessions[session_id]
212
  full_message = user_message
213
  if code.strip():
214
- full_message += f"\n\n[Candidate submitted code]:\n```\n{code}\n```\nPlease review this code — check correctness, time/space complexity, edge cases, and coding style. Then continue the interview."
215
 
216
  session["messages"].append({"role": "user", "content": full_message})
217
  session["exchange_count"] += 1
@@ -222,19 +355,12 @@ async def get_response(request: Request):
222
  if context:
223
  session["messages"].append({
224
  "role": "system",
225
- "content": f"Relevant additional context from real interview experiences:\n{context}"
226
  })
227
 
228
  stage = "introduction" if count <= 3 else "technical" if count <= 8 else "behavioral" if count <= 11 else "closing"
229
-
230
- response = client.chat.completions.create(
231
- model="llama-3.3-70b-versatile",
232
- messages=session["messages"],
233
- max_tokens=200
234
- )
235
- ai_message = response.choices[0].message.content
236
  session["messages"].append({"role": "assistant", "content": ai_message})
237
-
238
  return {"message": ai_message, "stage": stage, "exchange_count": count}
239
 
240
 
@@ -246,53 +372,12 @@ async def synthesize_speech(request: Request):
246
  if not text:
247
  raise HTTPException(status_code=400, detail="No text provided")
248
 
249
- eleven_key = os.environ.get("ELEVENLABS_API_KEY")
250
-
251
- # PRIMARY: Groq Orpheus
252
- try:
253
- response = client.audio.speech.create(
254
- model="canopylabs/orpheus-v1-english",
255
- voice="daniel",
256
- input=text[:500],
257
- response_format="wav"
258
- )
259
- audio_bytes = response.read()
260
- return StreamingResponse(
261
- iter([audio_bytes]),
262
- media_type="audio/wav"
263
- )
264
-
265
- except Exception as groq_error:
266
- # FALLBACK: ElevenLabs if rate limited and key exists
267
- if eleven_key and ("rate_limit" in str(groq_error).lower() or "429" in str(groq_error)):
268
- try:
269
- async with httpx.AsyncClient() as http:
270
- r = await http.post(
271
- "https://api.elevenlabs.io/v1/text-to-speech/onwK4e9ZLuTAKqWW03F9",
272
- headers={
273
- "xi-api-key": eleven_key,
274
- "Content-Type": "application/json"
275
- },
276
- json={
277
- "text": text[:500],
278
- "model_id": "eleven_turbo_v2",
279
- "voice_settings": {
280
- "stability": 0.5,
281
- "similarity_boost": 0.75
282
- }
283
- },
284
- timeout=15.0
285
- )
286
- if r.status_code == 200:
287
- return StreamingResponse(
288
- iter([r.content]),
289
- media_type="audio/mpeg"
290
- )
291
- except Exception:
292
- pass
293
-
294
- # FINAL FALLBACK: tell frontend to use browser TTS
295
- raise HTTPException(status_code=503, detail="tts_unavailable")
296
 
297
 
298
  @app.get("/")
 
12
  load_dotenv()
13
 
14
  app = FastAPI()
15
+
16
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
17
+ GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
18
+ OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
19
+ ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY", "")
20
+ DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY", "")
21
+
22
+ groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
23
 
24
  app.add_middleware(
25
  CORSMiddleware,
 
29
  )
30
 
31
  request_counts = defaultdict(list)
32
+ RATE_LIMIT = 30
33
  RATE_WINDOW = 60
34
 
35
  def check_rate_limit(ip: str):
 
41
 
42
  interview_sessions = {}
43
 
44
+ # ── LLM with fallback chain: Groq → Gemini → OpenRouter ──
45
+ async def call_llm(messages: list, max_tokens: int = 200) -> str:
46
+ # 1. Try Groq
47
+ if groq_client:
48
+ try:
49
+ res = groq_client.chat.completions.create(
50
+ model="llama-3.3-70b-versatile",
51
+ messages=messages,
52
+ max_tokens=max_tokens
53
+ )
54
+ return res.choices[0].message.content
55
+ except Exception as e:
56
+ if "rate_limit" not in str(e).lower() and "429" not in str(e):
57
+ raise e
58
+ print("Groq LLM limit hit, trying Gemini...")
59
+
60
+ # 2. Try Google Gemini
61
+ if GOOGLE_API_KEY:
62
+ try:
63
+ async with httpx.AsyncClient() as http:
64
+ # Convert messages for Gemini format
65
+ system_msg = next((m["content"] for m in messages if m["role"] == "system"), "")
66
+ contents = []
67
+ for m in messages:
68
+ if m["role"] == "system":
69
+ continue
70
+ role = "user" if m["role"] == "user" else "model"
71
+ contents.append({"role": role, "parts": [{"text": m["content"]}]})
72
+ if not contents:
73
+ contents = [{"role": "user", "parts": [{"text": "Hello"}]}]
74
+
75
+ payload = {
76
+ "contents": contents,
77
+ "generationConfig": {"maxOutputTokens": max_tokens}
78
+ }
79
+ if system_msg:
80
+ payload["systemInstruction"] = {"parts": [{"text": system_msg}]}
81
+
82
+ r = await http.post(
83
+ f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GOOGLE_API_KEY}",
84
+ json=payload,
85
+ timeout=30.0
86
+ )
87
+ if r.status_code == 200:
88
+ data = r.json()
89
+ return data["candidates"][0]["content"]["parts"][0]["text"]
90
+ print(f"Gemini error: {r.status_code} {r.text}")
91
+ except Exception as e:
92
+ print(f"Gemini failed: {e}")
93
+
94
+ # 3. Try OpenRouter
95
+ if OPENROUTER_API_KEY:
96
+ try:
97
+ async with httpx.AsyncClient() as http:
98
+ r = await http.post(
99
+ "https://openrouter.ai/api/v1/chat/completions",
100
+ headers={
101
+ "Authorization": f"Bearer {OPENROUTER_API_KEY}",
102
+ "Content-Type": "application/json"
103
+ },
104
+ json={
105
+ "model": "meta-llama/llama-3.3-70b-instruct:free",
106
+ "messages": messages,
107
+ "max_tokens": max_tokens
108
+ },
109
+ timeout=30.0
110
+ )
111
+ if r.status_code == 200:
112
+ return r.json()["choices"][0]["message"]["content"]
113
+ print(f"OpenRouter error: {r.status_code}")
114
+ except Exception as e:
115
+ print(f"OpenRouter failed: {e}")
116
+
117
+ raise HTTPException(status_code=503, detail="All LLM providers exhausted")
118
+
119
+
120
+ # ── STT with fallback: Groq Whisper → Deepgram ──
121
+ async def transcribe(audio_content: bytes, filename: str) -> str:
122
+ # 1. Groq Whisper
123
+ if groq_client:
124
+ try:
125
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
126
+ tmp.write(audio_content)
127
+ tmp_path = tmp.name
128
+ with open(tmp_path, "rb") as f:
129
+ result = groq_client.audio.transcriptions.create(
130
+ file=(filename, f.read()),
131
+ model="whisper-large-v3-turbo",
132
+ response_format="text",
133
+ language="en"
134
+ )
135
+ os.unlink(tmp_path)
136
+ return result
137
+ except Exception as e:
138
+ print(f"Groq STT failed: {e}")
139
+
140
+ # 2. Deepgram
141
+ if DEEPGRAM_API_KEY:
142
+ try:
143
+ async with httpx.AsyncClient() as http:
144
+ r = await http.post(
145
+ "https://api.deepgram.com/v1/listen?model=nova-3&smart_format=true&language=en",
146
+ headers={
147
+ "Authorization": f"Token {DEEPGRAM_API_KEY}",
148
+ "Content-Type": "audio/webm"
149
+ },
150
+ content=audio_content,
151
+ timeout=20.0
152
+ )
153
+ if r.status_code == 200:
154
+ return r.json()["results"]["channels"][0]["alternatives"][0]["transcript"]
155
+ except Exception as e:
156
+ print(f"Deepgram STT failed: {e}")
157
+
158
+ raise HTTPException(status_code=503, detail="All STT providers exhausted")
159
+
160
+
161
+ # ── TTS with fallback chain: Groq Orpheus → ElevenLabs → Browser signal ──
162
+ async def synthesize(text: str):
163
+ # 1. Groq Orpheus
164
+ if groq_client:
165
+ try:
166
+ response = groq_client.audio.speech.create(
167
+ model="canopylabs/orpheus-v1-english",
168
+ voice="daniel",
169
+ input=text[:500],
170
+ response_format="wav"
171
+ )
172
+ return response.read(), "audio/wav"
173
+ except Exception as e:
174
+ print(f"Groq TTS failed: {e}")
175
+
176
+ # 2. ElevenLabs
177
+ if ELEVENLABS_API_KEY:
178
+ try:
179
+ async with httpx.AsyncClient() as http:
180
+ r = await http.post(
181
+ "https://api.elevenlabs.io/v1/text-to-speech/onwK4e9ZLuTAKqWW03F9",
182
+ headers={
183
+ "xi-api-key": ELEVENLABS_API_KEY,
184
+ "Content-Type": "application/json"
185
+ },
186
+ json={
187
+ "text": text[:500],
188
+ "model_id": "eleven_turbo_v2",
189
+ "voice_settings": {"stability": 0.5, "similarity_boost": 0.75}
190
+ },
191
+ timeout=15.0
192
+ )
193
+ if r.status_code == 200:
194
+ return r.content, "audio/mpeg"
195
+ except Exception as e:
196
+ print(f"ElevenLabs TTS failed: {e}")
197
+
198
+ return None, None
199
+
200
+
201
  SKILL_PROMPTS = {
202
  "dsa": """You are a senior DSA interviewer. Structure the interview exactly as follows:
203
 
204
  PHASE 1 - INTRODUCTION (first 2 exchanges):
205
+ - Warmly greet the candidate. Introduce yourself as Alex.
206
  - Ask them to introduce themselves and their background
207
  - Ask about their experience with data structures and algorithms
208
 
 
210
  - Start with an easy problem, then medium, then hard
211
  - ALWAYS first ask the candidate to explain their approach before coding
212
  - If the approach is correct, say "Great approach! Now go ahead and code it in the editor."
213
+ - If the approach is unclear or wrong, probe: "What is the time complexity?", "Can you think of a more optimal approach?"
214
  - Do NOT let them code until the approach is solid
215
  - After they submit code, review it: check correctness, complexity, edge cases
216
  - Ask follow-up optimization questions
217
 
218
  PHASE 3 - BEHAVIORAL (last 2-3 exchanges):
219
  - Ask about a challenging technical problem they solved
 
220
  - Wrap up and give brief honest feedback
221
 
222
+ Keep each response to 2-3 sentences. Ask one thing at a time.""",
223
 
224
+ "system_design": """You are a principal engineer conducting a system design interview. Introduce yourself as Alex.
225
 
226
  PHASE 1 - INTRODUCTION (first 2 exchanges):
227
+ - Greet warmly, introduce as Alex
228
+ - Ask them to describe the most complex system they've built
229
+
230
+ PHASE 2 - DESIGN PROBLEMS (next 5-6 exchanges):
231
+ - Give a design problem
232
+ - Guide: requirements high level deep dive scaling
233
+ - Probe on: databases, caching, load balancing, fault tolerance
234
+ - Ask "why" for every major decision
235
+
236
+ PHASE 3 - CLOSE (last 2 exchanges):
237
+ - Ask about a difficult technical trade-off they made
 
 
 
238
  - Close with feedback
239
 
240
  Keep responses to 2-3 sentences. Ask one question at a time.""",
241
 
242
+ "behavioral": """You are an experienced HR interviewer. Introduce yourself as Alex.
243
 
244
  PHASE 1 - INTRODUCTION (first 2 exchanges):
245
+ - Warmly welcome, introduce as Alex
246
+ - Ask them to walk through their background
247
+
248
+ PHASE 2 - BEHAVIORAL (next 5-6 exchanges):
249
+ - Use STAR method: Situation, Task, Action, Result
250
+ - Cover: leadership, conflict, failure & learning, teamwork
251
+ - Probe deeper: "What was YOUR specific contribution?"
252
+
253
+ PHASE 3 - CLOSE (last 2 exchanges):
254
+ - Ask about career goals
255
+ - Close with feedback
256
+
257
+ Keep responses to 2-3 sentences. Ask one question at a time.""",
 
 
 
258
  }
259
 
260
  def build_company_prompt(company: str, role: str, context: str) -> str:
261
+ return f"""You are a senior {role} interviewer at {company}. Your name is Alex.
262
 
263
  --- REAL {company.upper()} INTERVIEW EXPERIENCES ---
264
  {context}
265
  --- END ---
266
 
 
 
267
  PHASE 1 - INTRODUCTION & RESUME (exchanges 1-3):
268
+ - Introduce yourself as Alex, senior {role} interviewer at {company}
269
+ - Ask candidate to introduce themselves
270
+ - Ask resume-based questions: past projects, tech stack, scale
271
+ - Ask why they want to join {company}
 
272
 
273
  PHASE 2 - CODING ROUNDS (exchanges 4-8):
274
+ - Ask problems {company} is KNOWN to ask for {role}
275
+ - ALWAYS ask for approach FIRST before coding
276
+ - If approach correct: "Good. Go ahead and implement it in the code editor."
277
+ - If vague: probe on complexity, edge cases, optimization
278
+ - After code submitted: review correctness, complexity, style
 
279
 
280
  PHASE 3 - BEHAVIORAL & FIT (exchanges 9-11):
281
+ - Ask behavioral questions {company} focuses on
282
+ - Amazon = Leadership Principles, Microsoft = growth mindset
 
 
283
 
284
  PHASE 4 - CLOSE (exchange 12):
285
+ - Give honest constructive feedback
 
 
286
 
287
+ Rules: 2-3 sentences max per response. ONE question at a time."""
 
 
 
 
288
 
289
 
290
  @app.post("/api/start-interview")
 
294
  mode = body.get("mode", "behavioral")
295
  company = (body.get("company") or "").lower()
296
  role = body.get("role", "SDE")
297
+ user_name = body.get("user_name", "")
298
 
299
  session_id = str(time.time())
300
 
301
  if mode == "company" and company:
302
+ context = get_context(company, f"{role} interview questions {company} coding behavioral", n=8)
303
  system_prompt = build_company_prompt(company.capitalize(), role, context)
304
  else:
305
  system_prompt = SKILL_PROMPTS.get(mode, SKILL_PROMPTS["behavioral"])
306
 
307
+ if user_name:
308
+ system_prompt += f"\n\nThe candidate's name is {user_name}. Use their name naturally."
309
+
310
  interview_sessions[session_id] = {
311
  "messages": [{"role": "system", "content": system_prompt}],
312
  "mode": mode,
313
  "company": company,
314
  "role": role,
315
+ "user_name": user_name,
316
+ "exchange_count": 0,
317
+ "start_time": time.time()
318
  }
319
 
320
+ ai_message = await call_llm(interview_sessions[session_id]["messages"], max_tokens=150)
 
 
 
 
 
321
  interview_sessions[session_id]["messages"].append({"role": "assistant", "content": ai_message})
 
322
  return {"session_id": session_id, "message": ai_message}
323
 
324
 
325
  @app.post("/api/transcribe")
326
  async def transcribe_audio(request: Request, audio: UploadFile = File(...)):
327
  check_rate_limit(request.client.host)
328
+ content = await audio.read()
329
+ text = await transcribe(content, audio.filename or "rec.webm")
330
+ return {"text": text}
 
 
 
 
 
 
 
 
 
 
 
 
331
 
332
 
333
  @app.post("/api/respond")
 
344
  session = interview_sessions[session_id]
345
  full_message = user_message
346
  if code.strip():
347
+ full_message += f"\n\n[Candidate submitted code]:\n```\n{code}\n```\nReview: correctness, time/space complexity, edge cases, style. Then continue."
348
 
349
  session["messages"].append({"role": "user", "content": full_message})
350
  session["exchange_count"] += 1
 
355
  if context:
356
  session["messages"].append({
357
  "role": "system",
358
+ "content": f"Relevant context from real interview experiences:\n{context}"
359
  })
360
 
361
  stage = "introduction" if count <= 3 else "technical" if count <= 8 else "behavioral" if count <= 11 else "closing"
362
+ ai_message = await call_llm(session["messages"], max_tokens=200)
 
 
 
 
 
 
363
  session["messages"].append({"role": "assistant", "content": ai_message})
 
364
  return {"message": ai_message, "stage": stage, "exchange_count": count}
365
 
366
 
 
372
  if not text:
373
  raise HTTPException(status_code=400, detail="No text provided")
374
 
375
+ audio_bytes, media_type = await synthesize(text)
376
+ if audio_bytes:
377
+ return StreamingResponse(iter([audio_bytes]), media_type=media_type)
378
+
379
+ # Signal frontend to use browser TTS
380
+ raise HTTPException(status_code=503, detail="tts_unavailable")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
381
 
382
 
383
  @app.get("/")