Spaces:
Sleeping
Sleeping
File size: 5,334 Bytes
634ca1b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | """Lex Fridman AI Interviewer — Nemotron 4B (HF API) + Gemini 2.5 Pro."""
import re
import os
import gradio as gr
from huggingface_hub import InferenceClient
SYSTEM_PROMPT = """You are an AI interviewer in the style of Lex Fridman. You conduct deep, long-form interviews. Ask one question at a time. Listen to the answer and follow up on the most interesting thread."""
MAX_TOKENS = 800
TEMPERATURE = 1.0
MODEL_OPTIONS = [
"Qwen3-8B (via HF API)",
"Llama 3.3 70B (via HF API)",
"Gemini 2.5 Pro (via Google API)",
]
MODEL_MAP = {
"Qwen3-8B (via HF API)": "Qwen/Qwen3-8B",
"Llama 3.3 70B (via HF API)": "meta-llama/Llama-3.3-70B-Instruct",
}
# HF InferenceClient auto-uses the Space's built-in token
hf_client = InferenceClient()
def strip_think_tags(text):
text = re.sub(r"<think>.*?</think>", "", text, flags=re.DOTALL).strip()
text = re.sub(r"</think>", "", text).strip()
text = re.sub(r"<think>", "", text).strip()
return text
def query_hf_model(messages, model_choice):
"""Use HF Inference API via huggingface_hub client."""
model_id = MODEL_MAP.get(model_choice, "Qwen/Qwen3-8B")
try:
response = hf_client.chat_completion(
model=model_id,
messages=messages,
max_tokens=MAX_TOKENS,
temperature=TEMPERATURE,
)
content = response.choices[0].message.content or ""
reasoning = getattr(response.choices[0].message, "reasoning_content", "") or ""
result = content or reasoning
return strip_think_tags(result) if result else "No response generated"
except Exception as e:
return f"Error: {e}"
def query_gemini(messages):
"""Use Google Gemini API."""
import requests
api_key = os.environ.get("GOOGLE_API_KEY", "")
if not api_key:
return "Error: GOOGLE_API_KEY not set in Space secrets"
system = ""
contents = []
for msg in messages:
if msg["role"] == "system":
system = msg["content"]
elif msg["role"] == "user":
contents.append({"role": "user", "parts": [{"text": msg["content"]}]})
elif msg["role"] == "assistant":
contents.append({"role": "model", "parts": [{"text": msg["content"]}]})
url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-pro:generateContent?key={api_key}"
try:
body = {
"contents": contents,
"generationConfig": {
"temperature": 1.0,
"maxOutputTokens": 4096,
"thinkingConfig": {"thinkingBudget": 1024},
},
}
if system:
body["systemInstruction"] = {"parts": [{"text": system}]}
r = requests.post(url, json=body, timeout=120)
d = r.json()
if "candidates" in d:
parts = d["candidates"][0].get("content", {}).get("parts", [])
text_parts = [p["text"] for p in parts if "text" in p and not p.get("thought", False)]
return " ".join(text_parts) if text_parts else "No response generated"
return f"API error: {d.get('error', {}).get('message', 'unknown')}"
except Exception as e:
return f"Error: {e}"
def chat_fn(message, history, model_choice, guest_context):
system = SYSTEM_PROMPT
if guest_context and guest_context.strip():
system += f"\n\nGuest context: {guest_context.strip()}"
messages = [{"role": "system", "content": system}]
for item in history:
role = item.get("role", "")
content = item.get("content", "")
if role in ("user", "assistant") and content:
messages.append({"role": role, "content": content})
messages.append({"role": "user", "content": message})
if "Gemini" in model_choice:
return query_gemini(messages)
else:
return query_hf_model(messages, model_choice)
CSS = """
.gradio-container { max-width: 980px !important; }
footer { display: none !important; }
"""
with gr.Blocks(theme=gr.themes.Soft(), css=CSS) as demo:
gr.Markdown(
"""
# 🎙️ Lex Fridman AI Interviewer
**Deep, long-form interviews powered by AI** · Qwen3-8B / Llama 3.3 70B / Gemini 2.5 Pro
You are the guest — the AI asks questions in Lex Fridman's interview style.
Paste any context about yourself to get more relevant questions.
"""
)
with gr.Row():
model_choice = gr.Dropdown(
choices=MODEL_OPTIONS,
value=MODEL_OPTIONS[0],
label="Model",
)
guest_context = gr.Textbox(
label="Guest Context (optional)",
placeholder="Paste guest bio, paper abstract, or describe yourself.",
lines=2,
)
gr.ChatInterface(
fn=chat_fn,
type="messages",
additional_inputs=[model_choice, guest_context],
examples=[
["I think the most important thing about neural networks is that they're actually quite simple.", "Qwen3-8B (via HF API)", "Guest: Andrej Karpathy, AI researcher"],
["The biggest challenge in robotics isn't the hardware — it's the software.", "Llama 3.3 70B (via HF API)", "Robotics engineer working on humanoid locomotion"],
["Hello, I'm excited to be here.", "Gemini 2.5 Pro (via Google API)", ""],
],
)
if __name__ == "__main__":
demo.launch()
|