File size: 5,334 Bytes
634ca1b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
"""Lex Fridman AI Interviewer — Nemotron 4B (HF API) + Gemini 2.5 Pro."""
import re
import os
import gradio as gr
from huggingface_hub import InferenceClient

SYSTEM_PROMPT = """You are an AI interviewer in the style of Lex Fridman. You conduct deep, long-form interviews. Ask one question at a time. Listen to the answer and follow up on the most interesting thread."""

MAX_TOKENS = 800
TEMPERATURE = 1.0

MODEL_OPTIONS = [
    "Qwen3-8B (via HF API)",
    "Llama 3.3 70B (via HF API)",
    "Gemini 2.5 Pro (via Google API)",
]

MODEL_MAP = {
    "Qwen3-8B (via HF API)": "Qwen/Qwen3-8B",
    "Llama 3.3 70B (via HF API)": "meta-llama/Llama-3.3-70B-Instruct",
}

# HF InferenceClient auto-uses the Space's built-in token
hf_client = InferenceClient()


def strip_think_tags(text):
    text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
    text = re.sub(r"</think>", "", text).strip()
    text = re.sub(r"<think>", "", text).strip()
    return text


def query_hf_model(messages, model_choice):
    """Use HF Inference API via huggingface_hub client."""
    model_id = MODEL_MAP.get(model_choice, "Qwen/Qwen3-8B")
    try:
        response = hf_client.chat_completion(
            model=model_id,
            messages=messages,
            max_tokens=MAX_TOKENS,
            temperature=TEMPERATURE,
        )
        content = response.choices[0].message.content or ""
        reasoning = getattr(response.choices[0].message, "reasoning_content", "") or ""
        result = content or reasoning
        return strip_think_tags(result) if result else "No response generated"
    except Exception as e:
        return f"Error: {e}"


def query_gemini(messages):
    """Use Google Gemini API."""
    import requests
    api_key = os.environ.get("GOOGLE_API_KEY", "")
    if not api_key:
        return "Error: GOOGLE_API_KEY not set in Space secrets"

    system = ""
    contents = []
    for msg in messages:
        if msg["role"] == "system":
            system = msg["content"]
        elif msg["role"] == "user":
            contents.append({"role": "user", "parts": [{"text": msg["content"]}]})
        elif msg["role"] == "assistant":
            contents.append({"role": "model", "parts": [{"text": msg["content"]}]})

    url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
    try:
        body = {
            "contents": contents,
            "generationConfig": {
                "temperature": 1.0,
                "maxOutputTokens": 4096,
                "thinkingConfig": {"thinkingBudget": 1024},
            },
        }
        if system:
            body["systemInstruction"] = {"parts": [{"text": system}]}

        r = requests.post(url, json=body, timeout=120)
        d = r.json()
        if "candidates" in d:
            parts = d["candidates"][0].get("content", {}).get("parts", [])
            text_parts = [p["text"] for p in parts if "text" in p and not p.get("thought", False)]
            return " ".join(text_parts) if text_parts else "No response generated"
        return f"API error: {d.get('error', {}).get('message', 'unknown')}"
    except Exception as e:
        return f"Error: {e}"


def chat_fn(message, history, model_choice, guest_context):
    system = SYSTEM_PROMPT
    if guest_context and guest_context.strip():
        system += f"\n\nGuest context: {guest_context.strip()}"

    messages = [{"role": "system", "content": system}]
    for item in history:
        role = item.get("role", "")
        content = item.get("content", "")
        if role in ("user", "assistant") and content:
            messages.append({"role": role, "content": content})
    messages.append({"role": "user", "content": message})

    if "Gemini" in model_choice:
        return query_gemini(messages)
    else:
        return query_hf_model(messages, model_choice)


CSS = """
.gradio-container { max-width: 980px !important; }
footer { display: none !important; }
"""

with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
    gr.Markdown(
        """
# 🎙️ Lex Fridman AI Interviewer

**Deep, long-form interviews powered by AI** · Qwen3-8B / Llama 3.3 70B / Gemini 2.5 Pro

You are the guest — the AI asks questions in Lex Fridman's interview style.
Paste any context about yourself to get more relevant questions.
"""
    )
    with gr.Row():
        model_choice = gr.Dropdown(
            choices=MODEL_OPTIONS,
            value=MODEL_OPTIONS[0],
            label="Model",
        )
    guest_context = gr.Textbox(
        label="Guest Context (optional)",
        placeholder="Paste guest bio, paper abstract, or describe yourself.",
        lines=2,
    )
    gr.ChatInterface(
        fn=chat_fn,
        type="messages",
        additional_inputs=[model_choice, guest_context],
        examples=[
            ["I think the most important thing about neural networks is that they're actually quite simple.", "Qwen3-8B (via HF API)", "Guest: Andrej Karpathy, AI researcher"],
            ["The biggest challenge in robotics isn't the hardware — it's the software.", "Llama 3.3 70B (via HF API)", "Robotics engineer working on humanoid locomotion"],
            ["Hello, I'm excited to be here.", "Gemini 2.5 Pro (via Google API)", ""],
        ],
    )

if __name__ == "__main__":
    demo.launch()