bobber's picture
Sync space files (2026-04-05)
634ca1b verified
"""Lex Fridman AI Interviewer — Nemotron 4B (HF API) + Gemini 2.5 Pro."""
import re
import os
import gradio as gr
from huggingface_hub import InferenceClient
SYSTEM_PROMPT = """You are an AI interviewer in the style of Lex Fridman. You conduct deep, long-form interviews. Ask one question at a time. Listen to the answer and follow up on the most interesting thread."""
MAX_TOKENS = 800
TEMPERATURE = 1.0
MODEL_OPTIONS = [
"Qwen3-8B (via HF API)",
"Llama 3.3 70B (via HF API)",
"Gemini 2.5 Pro (via Google API)",
]
MODEL_MAP = {
"Qwen3-8B (via HF API)": "Qwen/Qwen3-8B",
"Llama 3.3 70B (via HF API)": "meta-llama/Llama-3.3-70B-Instruct",
}
# HF InferenceClient auto-uses the Space's built-in token
hf_client = InferenceClient()
def strip_think_tags(text):
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
text = re.sub(r"</think>", "", text).strip()
text = re.sub(r"<think>", "", text).strip()
return text
def query_hf_model(messages, model_choice):
"""Use HF Inference API via huggingface_hub client."""
model_id = MODEL_MAP.get(model_choice, "Qwen/Qwen3-8B")
try:
response = hf_client.chat_completion(
model=model_id,
messages=messages,
max_tokens=MAX_TOKENS,
temperature=TEMPERATURE,
)
content = response.choices[0].message.content or ""
reasoning = getattr(response.choices[0].message, "reasoning_content", "") or ""
result = content or reasoning
return strip_think_tags(result) if result else "No response generated"
except Exception as e:
return f"Error: {e}"
def query_gemini(messages):
"""Use Google Gemini API."""
import requests
api_key = os.environ.get("GOOGLE_API_KEY", "")
if not api_key:
return "Error: GOOGLE_API_KEY not set in Space secrets"
system = ""
contents = []
for msg in messages:
if msg["role"] == "system":
system = msg["content"]
elif msg["role"] == "user":
contents.append({"role": "user", "parts": [{"text": msg["content"]}]})
elif msg["role"] == "assistant":
contents.append({"role": "model", "parts": [{"text": msg["content"]}]})
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
try:
body = {
"contents": contents,
"generationConfig": {
"temperature": 1.0,
"maxOutputTokens": 4096,
"thinkingConfig": {"thinkingBudget": 1024},
},
}
if system:
body["systemInstruction"] = {"parts": [{"text": system}]}
r = requests.post(url, json=body, timeout=120)
d = r.json()
if "candidates" in d:
parts = d["candidates"][0].get("content", {}).get("parts", [])
text_parts = [p["text"] for p in parts if "text" in p and not p.get("thought", False)]
return " ".join(text_parts) if text_parts else "No response generated"
return f"API error: {d.get('error', {}).get('message', 'unknown')}"
except Exception as e:
return f"Error: {e}"
def chat_fn(message, history, model_choice, guest_context):
system = SYSTEM_PROMPT
if guest_context and guest_context.strip():
system += f"\n\nGuest context: {guest_context.strip()}"
messages = [{"role": "system", "content": system}]
for item in history:
role = item.get("role", "")
content = item.get("content", "")
if role in ("user", "assistant") and content:
messages.append({"role": role, "content": content})
messages.append({"role": "user", "content": message})
if "Gemini" in model_choice:
return query_gemini(messages)
else:
return query_hf_model(messages, model_choice)
CSS = """
.gradio-container { max-width: 980px !important; }
footer { display: none !important; }
"""
with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
gr.Markdown(
"""
# 🎙️ Lex Fridman AI Interviewer
**Deep, long-form interviews powered by AI** · Qwen3-8B / Llama 3.3 70B / Gemini 2.5 Pro
You are the guest — the AI asks questions in Lex Fridman's interview style.
Paste any context about yourself to get more relevant questions.
"""
)
with gr.Row():
model_choice = gr.Dropdown(
choices=MODEL_OPTIONS,
value=MODEL_OPTIONS[0],
label="Model",
)
guest_context = gr.Textbox(
label="Guest Context (optional)",
placeholder="Paste guest bio, paper abstract, or describe yourself.",
lines=2,
)
gr.ChatInterface(
fn=chat_fn,
type="messages",
additional_inputs=[model_choice, guest_context],
examples=[
["I think the most important thing about neural networks is that they're actually quite simple.", "Qwen3-8B (via HF API)", "Guest: Andrej Karpathy, AI researcher"],
["The biggest challenge in robotics isn't the hardware — it's the software.", "Llama 3.3 70B (via HF API)", "Robotics engineer working on humanoid locomotion"],
["Hello, I'm excited to be here.", "Gemini 2.5 Pro (via Google API)", ""],
],
)
if __name__ == "__main__":
demo.launch()