lex-interviewer-chat

Sleeping

File size: 5,334 Bytes

634ca1b

"""Lex Fridman AI Interviewer — Nemotron 4B (HF API) + Gemini 2.5 Pro."""
import re
import os
import gradio as gr
from huggingface_hub import InferenceClient

SYSTEM_PROMPT = """You are an AI interviewer in the style of Lex Fridman. You conduct deep, long-form interviews. Ask one question at a time. Listen to the answer and follow up on the most interesting thread."""

MAX_TOKENS = 800
TEMPERATURE = 1.0

MODEL_OPTIONS = [
    "Qwen3-8B (via HF API)",
    "Llama 3.3 70B (via HF API)",
    "Gemini 2.5 Pro (via Google API)",
]

MODEL_MAP = {
    "Qwen3-8B (via HF API)": "Qwen/Qwen3-8B",
    "Llama 3.3 70B (via HF API)": "meta-llama/Llama-3.3-70B-Instruct",
}

# HF InferenceClient auto-uses the Space's built-in token
hf_client = InferenceClient()


def strip_think_tags(text):
    text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
    text = re.sub(r"</think>", "", text).strip()
    text = re.sub(r"<think>", "", text).strip()
    return text


def query_hf_model(messages, model_choice):
    """Use HF Inference API via huggingface_hub client."""
    model_id = MODEL_MAP.get(model_choice, "Qwen/Qwen3-8B")
    try:
        response = hf_client.chat_completion(
            model=model_id,
            messages=messages,
            max_tokens=MAX_TOKENS,
            temperature=TEMPERATURE,
        )
        content = response.choices[0].message.content or ""
        reasoning = getattr(response.choices[0].message, "reasoning_content", "") or ""
        result = content or reasoning
        return strip_think_tags(result) if result else "No response generated"
    except Exception as e:
        return f"Error: {e}"


def query_gemini(messages):
    """Use Google Gemini API."""
    import requests
    api_key = os.environ.get("GOOGLE_API_KEY", "")
    if not api_key:
        return "Error: GOOGLE_API_KEY not set in Space secrets"

    system = ""
    contents = []
    for msg in messages:
        if msg["role"] == "system":
            system = msg["content"]
        elif msg["role"] == "user":
            contents.append({"role": "user", "parts": [{"text": msg["content"]}]})
        elif msg["role"] == "assistant":
            contents.append({"role": "model", "parts": [{"text": msg["content"]}]})

    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
    try:
        body = {
            "contents": contents,
            "generationConfig": {
                "temperature": 1.0,
                "maxOutputTokens": 4096,
                "thinkingConfig": {"thinkingBudget": 1024},
            },
        }
        if system:
            body["systemInstruction"] = {"parts": [{"text": system}]}

        r = requests.post(url, json=body, timeout=120)
        d = r.json()
        if "candidates" in d:
            parts = d["candidates"][0].get("content", {}).get("parts", [])
            text_parts = [p["text"] for p in parts if "text" in p and not p.get("thought", False)]
            return " ".join(text_parts) if text_parts else "No response generated"
        return f"API error: {d.get('error', {}).get('message', 'unknown')}"
    except Exception as e:
        return f"Error: {e}"


def chat_fn(message, history, model_choice, guest_context):
    system = SYSTEM_PROMPT
    if guest_context and guest_context.strip():
        system += f"\n\nGuest context: {guest_context.strip()}"

    messages = [{"role": "system", "content": system}]
    for item in history:
        role = item.get("role", "")
        content = item.get("content", "")
        if role in ("user", "assistant") and content:
            messages.append({"role": role, "content": content})
    messages.append({"role": "user", "content": message})

    if "Gemini" in model_choice:
        return query_gemini(messages)
    else:
        return query_hf_model(messages, model_choice)


CSS = """
.gradio-container { max-width: 980px !important; }
footer { display: none !important; }
"""

with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
    gr.Markdown(
        """
# 🎙️ Lex Fridman AI Interviewer

**Deep, long-form interviews powered by AI** · Qwen3-8B / Llama 3.3 70B / Gemini 2.5 Pro

You are the guest — the AI asks questions in Lex Fridman's interview style.
Paste any context about yourself to get more relevant questions.
"""
    )
    with gr.Row():
        model_choice = gr.Dropdown(
            choices=MODEL_OPTIONS,
            value=MODEL_OPTIONS[0],
            label="Model",
        )
    guest_context = gr.Textbox(
        label="Guest Context (optional)",
        placeholder="Paste guest bio, paper abstract, or describe yourself.",
        lines=2,
    )
    gr.ChatInterface(
        fn=chat_fn,
        type="messages",
        additional_inputs=[model_choice, guest_context],
        examples=[
            ["I think the most important thing about neural networks is that they're actually quite simple.", "Qwen3-8B (via HF API)", "Guest: Andrej Karpathy, AI researcher"],
            ["The biggest challenge in robotics isn't the hardware — it's the software.", "Llama 3.3 70B (via HF API)", "Robotics engineer working on humanoid locomotion"],
            ["Hello, I'm excited to be here.", "Gemini 2.5 Pro (via Google API)", ""],
        ],
    )

if __name__ == "__main__":
    demo.launch()