Spaces:

elprofessor15
/

ai-interviewer

Sleeping

App Files Files Community

APPLE commited on Mar 10

Commit

994c209

1 Parent(s): bc917e5

add LLM fallback chain

Browse files

Files changed (1) hide show

src/main.py +230 -145

src/main.py CHANGED Viewed

@@ -12,7 +12,14 @@ from src.rag import get_context
 load_dotenv()
 app = FastAPI()
-client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
 app.add_middleware(
     CORSMiddleware,
@@ -22,7 +29,7 @@ app.add_middleware(
 )
 request_counts = defaultdict(list)
-RATE_LIMIT = 20
 RATE_WINDOW = 60
 def check_rate_limit(ip: str):
@@ -34,11 +41,168 @@ def check_rate_limit(ip: str):
 interview_sessions = {}
 SKILL_PROMPTS = {
     "dsa": """You are a senior DSA interviewer. Structure the interview exactly as follows:
 PHASE 1 - INTRODUCTION (first 2 exchanges):
-- Warmly greet the candidate
 - Ask them to introduce themselves and their background
 - Ask about their experience with data structures and algorithms
@@ -46,100 +210,81 @@ PHASE 2 - CODING PROBLEMS (next 4-5 exchanges):
 - Start with an easy problem, then medium, then hard
 - ALWAYS first ask the candidate to explain their approach before coding
 - If the approach is correct, say "Great approach! Now go ahead and code it in the editor."
-- If the approach is unclear or wrong, ask probing questions: "What is the time complexity?", "Can you think of a more optimal approach?", "What about edge cases?"
 - Do NOT let them code until the approach is solid
 - After they submit code, review it: check correctness, complexity, edge cases
 - Ask follow-up optimization questions
 PHASE 3 - BEHAVIORAL (last 2-3 exchanges):
 - Ask about a challenging technical problem they solved
-- Ask about working under pressure or tight deadlines
 - Wrap up and give brief honest feedback
-Keep each response to 2-3 sentences. Be encouraging but rigorous. Ask one thing at a time.""",
-    "system_design": """You are a principal engineer conducting a system design interview. Structure as follows:
 PHASE 1 - INTRODUCTION (first 2 exchanges):
-- Greet the candidate warmly
-- Ask them to introduce themselves and describe the most complex system they've built
-- Understand their background and scale of systems they've worked with
-PHASE 2 - SYSTEM DESIGN PROBLEMS (next 5-6 exchanges):
-- Give a design problem (e.g., design Twitter feed, URL shortener, chat system)
-- Guide through: requirements clarification → high level design → deep dive → scaling
-- Probe on: database choices, caching strategies, load balancing, fault tolerance
-- Ask "why" for every major decision they make
-- Challenge their assumptions with scale questions
-PHASE 3 - BEHAVIORAL & SITUATIONAL (last 2-3 exchanges):
-- Ask about a time they had to make a difficult technical trade-off
-- Ask about dealing with system outages or failures
 - Close with feedback
 Keep responses to 2-3 sentences. Ask one question at a time.""",
-    "behavioral": """You are an experienced HR and behavioral interviewer. Structure as follows:
 PHASE 1 - INTRODUCTION (first 2 exchanges):
-- Warmly welcome the candidate
-- Ask them to walk you through their background and career journey
-- Ask what motivated them to pursue this type of role
-PHASE 2 - BEHAVIORAL DEEP DIVE (next 5-6 exchanges):
-- Use STAR method questions: Situation, Task, Action, Result
-- Cover: leadership, conflict resolution, failure & learning, teamwork, innovation
-- Probe deeper when answers are vague: "What was YOUR specific contribution?", "What would you do differently?"
-- Ask situational questions: "If your team disagreed with your technical decision, how would you handle it?"
-- Ask about culture fit: values, work style, collaboration preferences
-PHASE 3 - ROLE FIT & CLOSE (last 2 exchanges):
-- Ask about their career goals and how this role fits
-- Give them a chance to ask questions, then close with feedback
-Keep responses to 2-3 sentences. Be empathetic but thorough. Ask one question at a time.""",
 }
 def build_company_prompt(company: str, role: str, context: str) -> str:
-    return f"""You are a senior {role} interviewer at {company}. You have access to real interview experiences from {company} candidates below. Use these to mirror the exact interview style, question types, difficulty, and culture of {company}.
 --- REAL {company.upper()} INTERVIEW EXPERIENCES ---
 {context}
 --- END ---
-Structure this interview EXACTLY as follows:
 PHASE 1 - INTRODUCTION & RESUME (exchanges 1-3):
-- Warmly greet the candidate as a {company} interviewer. Introduce yourself as "Rohan" — a senior {role} interviewer at {company}. Sound human and natural, not robotic.
-- Ask them to introduce themselves
-- Ask resume-based questions specific to {role} at {company}: past projects, tech stack, scale of systems built
-- Ask why they want to join {company} specifically — probe for genuine motivation
-- Reference {company}'s culture and values naturally in conversation
 PHASE 2 - CODING ROUNDS (exchanges 4-8):
-- Ask problems that {company} is KNOWN to ask for {role} (use the experiences above for reference)
-- ALWAYS ask for the approach FIRST before any coding: "Walk me through your approach before you start coding."
-- If approach is correct and clear: "Good thinking. Go ahead and implement it in the code editor."
-- If approach is vague or suboptimal: keep probing — "What's the time complexity?", "Can we do better?", "What about edge cases?" — do NOT proceed to coding until the approach is solid
-- After code is submitted: review for correctness, complexity, style, edge cases
-- Ask at least one follow-up: "Can you optimize this further?" or "How would this behave with 1 billion inputs?"
 PHASE 3 - BEHAVIORAL & FIT (exchanges 9-11):
-- Ask behavioral questions that {company} specifically focuses on (e.g., Amazon = Leadership Principles, Microsoft = growth mindset, collaboration)
-- Ask situational questions relevant to {role}: "Tell me about a time you disagreed with your manager on a technical decision."
-- Ask role-specific scenarios: on-call incidents, cross-team collaboration, shipping under pressure
-- Assess culture fit for {company} specifically
 PHASE 4 - CLOSE (exchange 12):
-- Ask if the candidate has questions
-- Give honest, constructive feedback like a real {company} interviewer would
-- Mention next steps as {company} would
-Rules:
-- Keep each response to 2-3 sentences max (they will be spoken aloud)
-- Ask ONE thing at a time
-- Be rigorous but encouraging — match {company}'s interview culture
-- Always reference the real experiences above when choosing questions"""
 @app.post("/api/start-interview")
@@ -149,52 +294,40 @@ async def start_interview(request: Request):
     mode = body.get("mode", "behavioral")
     company = (body.get("company") or "").lower()
     role = body.get("role", "SDE")
     session_id = str(time.time())
     if mode == "company" and company:
-        context = get_context(company, f"{role} interview questions experience {company} coding behavioral", n=8)
         system_prompt = build_company_prompt(company.capitalize(), role, context)
     else:
         system_prompt = SKILL_PROMPTS.get(mode, SKILL_PROMPTS["behavioral"])
     interview_sessions[session_id] = {
         "messages": [{"role": "system", "content": system_prompt}],
         "mode": mode,
         "company": company,
         "role": role,
-        "exchange_count": 0
     }
-    response = client.chat.completions.create(
-        model="llama-3.3-70b-versatile",
-        messages=interview_sessions[session_id]["messages"],
-        max_tokens=150
-    )
-    ai_message = response.choices[0].message.content
     interview_sessions[session_id]["messages"].append({"role": "assistant", "content": ai_message})
     return {"session_id": session_id, "message": ai_message}
 @app.post("/api/transcribe")
 async def transcribe_audio(request: Request, audio: UploadFile = File(...)):
     check_rate_limit(request.client.host)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
-        content = await audio.read()
-        tmp.write(content)
-        tmp_path = tmp.name
-    try:
-        with open(tmp_path, "rb") as f:
-            transcription = client.audio.transcriptions.create(
-                file=(os.path.basename(tmp_path), f.read()),
-                model="whisper-large-v3-turbo",
-                response_format="text",
-                language="en"
-            )
-        return {"text": transcription}
-    finally:
-        os.unlink(tmp_path)
 @app.post("/api/respond")
@@ -211,7 +344,7 @@ async def get_response(request: Request):
     session = interview_sessions[session_id]
     full_message = user_message
     if code.strip():
-        full_message += f"\n\n[Candidate submitted code]:\n```\n{code}\n```\nPlease review this code — check correctness, time/space complexity, edge cases, and coding style. Then continue the interview."
     session["messages"].append({"role": "user", "content": full_message})
     session["exchange_count"] += 1
@@ -222,19 +355,12 @@ async def get_response(request: Request):
         if context:
             session["messages"].append({
                 "role": "system",
-                "content": f"Relevant additional context from real interview experiences:\n{context}"
             })
     stage = "introduction" if count <= 3 else "technical" if count <= 8 else "behavioral" if count <= 11 else "closing"
-    response = client.chat.completions.create(
-        model="llama-3.3-70b-versatile",
-        messages=session["messages"],
-        max_tokens=200
-    )
-    ai_message = response.choices[0].message.content
     session["messages"].append({"role": "assistant", "content": ai_message})
     return {"message": ai_message, "stage": stage, "exchange_count": count}
@@ -246,53 +372,12 @@ async def synthesize_speech(request: Request):
     if not text:
         raise HTTPException(status_code=400, detail="No text provided")
-    eleven_key = os.environ.get("ELEVENLABS_API_KEY")
-    # PRIMARY: Groq Orpheus
-    try:
-        response = client.audio.speech.create(
-            model="canopylabs/orpheus-v1-english",
-            voice="daniel",
-            input=text[:500],
-            response_format="wav"
-        )
-        audio_bytes = response.read()
-        return StreamingResponse(
-            iter([audio_bytes]),
-            media_type="audio/wav"
-        )
-    except Exception as groq_error:
-        # FALLBACK: ElevenLabs if rate limited and key exists
-        if eleven_key and ("rate_limit" in str(groq_error).lower() or "429" in str(groq_error)):
-            try:
-                async with httpx.AsyncClient() as http:
-                    r = await http.post(
-                        "https://api.elevenlabs.io/v1/text-to-speech/onwK4e9ZLuTAKqWW03F9",
-                        headers={
-                            "xi-api-key": eleven_key,
-                            "Content-Type": "application/json"
-                        },
-                        json={
-                            "text": text[:500],
-                            "model_id": "eleven_turbo_v2",
-                            "voice_settings": {
-                                "stability": 0.5,
-                                "similarity_boost": 0.75
-                            }
-                        },
-                        timeout=15.0
-                    )
-                    if r.status_code == 200:
-                        return StreamingResponse(
-                            iter([r.content]),
-                            media_type="audio/mpeg"
-                        )
-            except Exception:
-                pass
-        # FINAL FALLBACK: tell frontend to use browser TTS
-        raise HTTPException(status_code=503, detail="tts_unavailable")
 @app.get("/")

 load_dotenv()
 app = FastAPI()
+GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
+GOOGLE_API_KEY = os.environ.get("GOOGLE_API_KEY", "")
+OPENROUTER_API_KEY = os.environ.get("OPENROUTER_API_KEY", "")
+ELEVENLABS_API_KEY = os.environ.get("ELEVENLABS_API_KEY", "")
+DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY", "")
+groq_client = Groq(api_key=GROQ_API_KEY) if GROQ_API_KEY else None
 app.add_middleware(
     CORSMiddleware,
 )
 request_counts = defaultdict(list)
+RATE_LIMIT = 30
 RATE_WINDOW = 60
 def check_rate_limit(ip: str):
 interview_sessions = {}
+# ── LLM with fallback chain: Groq → Gemini → OpenRouter ──
+async def call_llm(messages: list, max_tokens: int = 200) -> str:
+    # 1. Try Groq
+    if groq_client:
+        try:
+            res = groq_client.chat.completions.create(
+                model="llama-3.3-70b-versatile",
+                messages=messages,
+                max_tokens=max_tokens
+            )
+            return res.choices[0].message.content
+        except Exception as e:
+            if "rate_limit" not in str(e).lower() and "429" not in str(e):
+                raise e
+            print("Groq LLM limit hit, trying Gemini...")
+    # 2. Try Google Gemini
+    if GOOGLE_API_KEY:
+        try:
+            async with httpx.AsyncClient() as http:
+                # Convert messages for Gemini format
+                system_msg = next((m["content"] for m in messages if m["role"] == "system"), "")
+                contents = []
+                for m in messages:
+                    if m["role"] == "system":
+                        continue
+                    role = "user" if m["role"] == "user" else "model"
+                    contents.append({"role": role, "parts": [{"text": m["content"]}]})
+                if not contents:
+                    contents = [{"role": "user", "parts": [{"text": "Hello"}]}]
+                payload = {
+                    "contents": contents,
+                    "generationConfig": {"maxOutputTokens": max_tokens}
+                }
+                if system_msg:
+                    payload["systemInstruction"] = {"parts": [{"text": system_msg}]}
+                r = await http.post(
+                    f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key={GOOGLE_API_KEY}",
+                    json=payload,
+                    timeout=30.0
+                )
+                if r.status_code == 200:
+                    data = r.json()
+                    return data["candidates"][0]["content"]["parts"][0]["text"]
+                print(f"Gemini error: {r.status_code} {r.text}")
+        except Exception as e:
+            print(f"Gemini failed: {e}")
+    # 3. Try OpenRouter
+    if OPENROUTER_API_KEY:
+        try:
+            async with httpx.AsyncClient() as http:
+                r = await http.post(
+                    "https://openrouter.ai/api/v1/chat/completions",
+                    headers={
+                        "Authorization": f"Bearer {OPENROUTER_API_KEY}",
+                        "Content-Type": "application/json"
+                    },
+                    json={
+                        "model": "meta-llama/llama-3.3-70b-instruct:free",
+                        "messages": messages,
+                        "max_tokens": max_tokens
+                    },
+                    timeout=30.0
+                )
+                if r.status_code == 200:
+                    return r.json()["choices"][0]["message"]["content"]
+                print(f"OpenRouter error: {r.status_code}")
+        except Exception as e:
+            print(f"OpenRouter failed: {e}")
+    raise HTTPException(status_code=503, detail="All LLM providers exhausted")
+# ── STT with fallback: Groq Whisper → Deepgram ──
+async def transcribe(audio_content: bytes, filename: str) -> str:
+    # 1. Groq Whisper
+    if groq_client:
+        try:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".webm") as tmp:
+                tmp.write(audio_content)
+                tmp_path = tmp.name
+            with open(tmp_path, "rb") as f:
+                result = groq_client.audio.transcriptions.create(
+                    file=(filename, f.read()),
+                    model="whisper-large-v3-turbo",
+                    response_format="text",
+                    language="en"
+                )
+            os.unlink(tmp_path)
+            return result
+        except Exception as e:
+            print(f"Groq STT failed: {e}")
+    # 2. Deepgram
+    if DEEPGRAM_API_KEY:
+        try:
+            async with httpx.AsyncClient() as http:
+                r = await http.post(
+                    "https://api.deepgram.com/v1/listen?model=nova-3&smart_format=true&language=en",
+                    headers={
+                        "Authorization": f"Token {DEEPGRAM_API_KEY}",
+                        "Content-Type": "audio/webm"
+                    },
+                    content=audio_content,
+                    timeout=20.0
+                )
+                if r.status_code == 200:
+                    return r.json()["results"]["channels"][0]["alternatives"][0]["transcript"]
+        except Exception as e:
+            print(f"Deepgram STT failed: {e}")
+    raise HTTPException(status_code=503, detail="All STT providers exhausted")
+# ── TTS with fallback chain: Groq Orpheus → ElevenLabs → Browser signal ──
+async def synthesize(text: str):
+    # 1. Groq Orpheus
+    if groq_client:
+        try:
+            response = groq_client.audio.speech.create(
+                model="canopylabs/orpheus-v1-english",
+                voice="daniel",
+                input=text[:500],
+                response_format="wav"
+            )
+            return response.read(), "audio/wav"
+        except Exception as e:
+            print(f"Groq TTS failed: {e}")
+    # 2. ElevenLabs
+    if ELEVENLABS_API_KEY:
+        try:
+            async with httpx.AsyncClient() as http:
+                r = await http.post(
+                    "https://api.elevenlabs.io/v1/text-to-speech/onwK4e9ZLuTAKqWW03F9",
+                    headers={
+                        "xi-api-key": ELEVENLABS_API_KEY,
+                        "Content-Type": "application/json"
+                    },
+                    json={
+                        "text": text[:500],
+                        "model_id": "eleven_turbo_v2",
+                        "voice_settings": {"stability": 0.5, "similarity_boost": 0.75}
+                    },
+                    timeout=15.0
+                )
+                if r.status_code == 200:
+                    return r.content, "audio/mpeg"
+        except Exception as e:
+            print(f"ElevenLabs TTS failed: {e}")
+    return None, None
 SKILL_PROMPTS = {
     "dsa": """You are a senior DSA interviewer. Structure the interview exactly as follows:
 PHASE 1 - INTRODUCTION (first 2 exchanges):
+- Warmly greet the candidate. Introduce yourself as Alex.
 - Ask them to introduce themselves and their background
 - Ask about their experience with data structures and algorithms
 - Start with an easy problem, then medium, then hard
 - ALWAYS first ask the candidate to explain their approach before coding
 - If the approach is correct, say "Great approach! Now go ahead and code it in the editor."
+- If the approach is unclear or wrong, probe: "What is the time complexity?", "Can you think of a more optimal approach?"
 - Do NOT let them code until the approach is solid
 - After they submit code, review it: check correctness, complexity, edge cases
 - Ask follow-up optimization questions
 PHASE 3 - BEHAVIORAL (last 2-3 exchanges):
 - Ask about a challenging technical problem they solved
 - Wrap up and give brief honest feedback
+Keep each response to 2-3 sentences. Ask one thing at a time.""",
+    "system_design": """You are a principal engineer conducting a system design interview. Introduce yourself as Alex.
 PHASE 1 - INTRODUCTION (first 2 exchanges):
+- Greet warmly, introduce as Alex
+- Ask them to describe the most complex system they've built
+PHASE 2 - DESIGN PROBLEMS (next 5-6 exchanges):
+- Give a design problem
+- Guide: requirements → high level → deep dive → scaling
+- Probe on: databases, caching, load balancing, fault tolerance
+- Ask "why" for every major decision
+PHASE 3 - CLOSE (last 2 exchanges):
+- Ask about a difficult technical trade-off they made
 - Close with feedback
 Keep responses to 2-3 sentences. Ask one question at a time.""",
+    "behavioral": """You are an experienced HR interviewer. Introduce yourself as Alex.
 PHASE 1 - INTRODUCTION (first 2 exchanges):
+- Warmly welcome, introduce as Alex
+- Ask them to walk through their background
+PHASE 2 - BEHAVIORAL (next 5-6 exchanges):
+- Use STAR method: Situation, Task, Action, Result
+- Cover: leadership, conflict, failure & learning, teamwork
+- Probe deeper: "What was YOUR specific contribution?"
+PHASE 3 - CLOSE (last 2 exchanges):
+- Ask about career goals
+- Close with feedback
+Keep responses to 2-3 sentences. Ask one question at a time.""",
 }
 def build_company_prompt(company: str, role: str, context: str) -> str:
+    return f"""You are a senior {role} interviewer at {company}. Your name is Alex.
 --- REAL {company.upper()} INTERVIEW EXPERIENCES ---
 {context}
 --- END ---
 PHASE 1 - INTRODUCTION & RESUME (exchanges 1-3):
+- Introduce yourself as Alex, senior {role} interviewer at {company}
+- Ask candidate to introduce themselves
+- Ask resume-based questions: past projects, tech stack, scale
+- Ask why they want to join {company}
 PHASE 2 - CODING ROUNDS (exchanges 4-8):
+- Ask problems {company} is KNOWN to ask for {role}
+- ALWAYS ask for approach FIRST before coding
+- If approach correct: "Good. Go ahead and implement it in the code editor."
+- If vague: probe on complexity, edge cases, optimization
+- After code submitted: review correctness, complexity, style
 PHASE 3 - BEHAVIORAL & FIT (exchanges 9-11):
+- Ask behavioral questions {company} focuses on
+- Amazon = Leadership Principles, Microsoft = growth mindset
 PHASE 4 - CLOSE (exchange 12):
+- Give honest constructive feedback
+Rules: 2-3 sentences max per response. ONE question at a time."""
 @app.post("/api/start-interview")
     mode = body.get("mode", "behavioral")
     company = (body.get("company") or "").lower()
     role = body.get("role", "SDE")
+    user_name = body.get("user_name", "")
     session_id = str(time.time())
     if mode == "company" and company:
+        context = get_context(company, f"{role} interview questions {company} coding behavioral", n=8)
         system_prompt = build_company_prompt(company.capitalize(), role, context)
     else:
         system_prompt = SKILL_PROMPTS.get(mode, SKILL_PROMPTS["behavioral"])
+    if user_name:
+        system_prompt += f"\n\nThe candidate's name is {user_name}. Use their name naturally."
     interview_sessions[session_id] = {
         "messages": [{"role": "system", "content": system_prompt}],
         "mode": mode,
         "company": company,
         "role": role,
+        "user_name": user_name,
+        "exchange_count": 0,
+        "start_time": time.time()
     }
+    ai_message = await call_llm(interview_sessions[session_id]["messages"], max_tokens=150)
     interview_sessions[session_id]["messages"].append({"role": "assistant", "content": ai_message})
     return {"session_id": session_id, "message": ai_message}
 @app.post("/api/transcribe")
 async def transcribe_audio(request: Request, audio: UploadFile = File(...)):
     check_rate_limit(request.client.host)
+    content = await audio.read()
+    text = await transcribe(content, audio.filename or "rec.webm")
+    return {"text": text}
 @app.post("/api/respond")
     session = interview_sessions[session_id]
     full_message = user_message
     if code.strip():
+        full_message += f"\n\n[Candidate submitted code]:\n```\n{code}\n```\nReview: correctness, time/space complexity, edge cases, style. Then continue."
     session["messages"].append({"role": "user", "content": full_message})
     session["exchange_count"] += 1
         if context:
             session["messages"].append({
                 "role": "system",
+                "content": f"Relevant context from real interview experiences:\n{context}"
             })
     stage = "introduction" if count <= 3 else "technical" if count <= 8 else "behavioral" if count <= 11 else "closing"
+    ai_message = await call_llm(session["messages"], max_tokens=200)
     session["messages"].append({"role": "assistant", "content": ai_message})
     return {"message": ai_message, "stage": stage, "exchange_count": count}
     if not text:
         raise HTTPException(status_code=400, detail="No text provided")
+    audio_bytes, media_type = await synthesize(text)
+    if audio_bytes:
+        return StreamingResponse(iter([audio_bytes]), media_type=media_type)
+    # Signal frontend to use browser TTS
+    raise HTTPException(status_code=503, detail="tts_unavailable")
 @app.get("/")