david-research-assistant / app_improved.py
Davidvandijcke
revamped chatbox
dc3da36
#!/usr/bin/env python3
"""
David Van Dijcke - Professional Research Assistant
Modern, sleek chat interface with custom styling
"""
import os
from typing import List, Tuple
import gradio as gr
from langchain_community.document_loaders import PyPDFLoader
from dotenv import load_dotenv
import google.generativeai as genai
# Load environment variables
load_dotenv()
class ProfessionalAssistant:
"""Professional assistant that speaks as an expert about David's work"""
def __init__(self):
# Setup Gemini
api_key = os.getenv("GOOGLE_API_KEY")
if api_key:
genai.configure(api_key=api_key)
try:
self.model = genai.GenerativeModel('gemini-2.0-flash-exp')
print("Using Gemini 2.0 Flash Experimental")
except:
try:
self.model = genai.GenerativeModel('gemini-2.5-pro')
print("Using Gemini 2.5 Pro")
except:
self.model = genai.GenerativeModel('gemini-1.5-flash')
print("Using Gemini 1.5 Flash")
else:
self.model = None
# Load all papers
self.papers = self._load_all_papers()
# Pre-load context
self.context = self._create_context()
# Question limit
self.question_limit = 15
self.question_count = 0
def _load_all_papers(self) -> dict:
"""Load all papers completely"""
papers = {}
pdf_dir = "documents"
paper_files = {
"r3d": ("r3d_arxiv_4apr2025.pdf", "R3D (Job Market Paper)"),
"frechet": ("frechet_anova_arxiv_submission.pdf", "A Test for Jumps in Metric-Space Conditional Means"),
"cv": ("CV_DavidVanDijcke.pdf", "CV"),
"fdr": ("fdr.pdf", "Free Discontinuity Regression"),
"disco": ("disco.pdf", "Distributional Synthetic Controls"),
"rto": ("rto.pdf", "Return to Office"),
"prodf": ("prodf.pdf", "Revenue Production Functions"),
"unmasking": ("unmasking_partisanship.pdf", "Unmasking Partisanship"),
"ukraine": ("van-dijcke-et-al-public-response-to-government-alerts-saves-lives-during-russian-invasion-of-ukraine.pdf", "Ukraine Alerts")
}
for key, (filename, title) in paper_files.items():
pdf_path = os.path.join(pdf_dir, filename)
if os.path.exists(pdf_path):
try:
loader = PyPDFLoader(pdf_path)
pages = loader.load()
text = "\n\n".join([p.page_content for p in pages])
papers[key] = {
"text": text,
"title": title,
"pages": len(pages)
}
print(f"Loaded {title}: {len(pages)} pages")
except Exception as e:
print(f"Error loading {filename}: {e}")
return papers
def _create_context(self) -> str:
"""Create comprehensive context from all papers"""
context_parts = []
# Add papers in priority order
priority_order = ["r3d", "cv", "frechet", "fdr", "disco", "rto", "prodf"]
for key in priority_order:
if key in self.papers:
paper = self.papers[key]
# Add substantial excerpts
excerpt_length = 30000 if key == "r3d" else 15000
context_parts.append(f"\n[{paper['title']}]\n{paper['text'][:excerpt_length]}")
return "\n\n".join(context_parts)
def chat(self, message: str, history: List[Tuple[str, str]]) -> Tuple[List[Tuple[str, str]], str]:
"""Chat with proper history handling"""
if not message.strip():
return history, ""
# Check question limit
if self.question_count >= self.question_limit:
response = "I've reached the question limit for this session (15 questions). Please refresh the page to start a new conversation."
history.append((message, response))
return history, ""
if not self.model:
response = "I need a Google API key to provide detailed answers about David's research."
history.append((message, response))
return history, ""
# Build conversation context
conversation = "Previous conversation:\n"
for human, assistant in history[-3:]: # Last 3 exchanges
conversation += f"User: {human}\nAssistant: {assistant}\n\n"
# Determine which papers to emphasize based on query
message_lower = message.lower()
specific_context = ""
if "job market" in message_lower or "r3d" in message_lower:
if "r3d" in self.papers:
specific_context = f"\n[R3D - Job Market Paper]\n{self.papers['r3d']['text'][:50000]}\n"
elif "fdr" in message_lower or "discontinuity" in message_lower:
if "fdr" in self.papers:
specific_context = f"\n[FDR Paper]\n{self.papers['fdr']['text'][:30000]}\n"
# Create prompt
prompt = f"""You are an expert assistant helping visitors learn about David Van Dijcke's research.
CRITICAL INSTRUCTIONS:
- You are NOT David - you are an expert explaining his work to website visitors
- Speak in third person about David (use "David" or "Van Dijcke", not "I" or "my")
- Be conversational but professional
- Give concise, informative answers (2-3 paragraphs max unless asked for details)
- Don't say "based on the provided papers" - just state facts confidently
- Focus on what makes his work innovative and important
- When discussing papers with multiple authors, always mention coauthors (other than David) in parentheses
Key facts:
- David is an econometrician on the 2025-26 job market from University of Michigan
- His job market paper is R3D (Regression Discontinuity Design with Distribution-Valued Outcomes)
- He specializes in functional data analysis and optimal transport methods
{conversation}
Full research context:
{self.context}
{specific_context}
Current question: {message}
Provide a concise, expert response:"""
try:
response = self.model.generate_content(prompt)
answer = response.text
# Increment question counter
self.question_count += 1
# Add remaining questions info if getting close to limit
remaining = self.question_limit - self.question_count
if remaining <= 3 and remaining > 0:
answer += f"\n\n*({remaining} questions remaining in this session)*"
history.append((message, answer))
return history, ""
except Exception as e:
error_response = f"I encountered an error. Please try rephrasing your question."
history.append((message, error_response))
return history, ""
# Create modern interface with Blocks
def create_interface():
assistant = ProfessionalAssistant()
# Modern CSS with better styling
custom_css = """
/* Import modern fonts */
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap');
/* Root variables for consistent theming */
:root {
--primary-gradient: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
--primary-color: #667eea;
--secondary-color: #764ba2;
--text-primary: #1a202c;
--text-secondary: #4a5568;
--bg-primary: #ffffff;
--bg-secondary: #f7fafc;
--border-color: #e2e8f0;
--shadow-sm: 0 1px 3px 0 rgba(0, 0, 0, 0.1), 0 1px 2px 0 rgba(0, 0, 0, 0.06);
--shadow-md: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06);
--shadow-lg: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
}
/* Global styles */
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body, .gradio-container {
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif !important;
background: var(--bg-secondary) !important;
height: 100vh !important;
overflow: hidden !important;
}
/* Main container layout */
.main-container {
display: flex !important;
flex-direction: column !important;
height: 100vh !important;
max-width: 100% !important;
margin: 0 !important;
padding: 0 !important;
background: white !important;
}
/* Header section */
.header-section {
background: var(--primary-gradient) !important;
padding: 1.5rem 2rem !important;
color: white !important;
box-shadow: var(--shadow-md) !important;
flex-shrink: 0 !important;
}
.header-content h1 {
font-size: 1.875rem !important;
font-weight: 700 !important;
margin-bottom: 0.5rem !important;
letter-spacing: -0.025em !important;
}
.header-content p {
font-size: 1rem !important;
opacity: 0.95 !important;
line-height: 1.5 !important;
}
.header-content .subtitle {
margin-top: 0.75rem !important;
padding-top: 0.75rem !important;
border-top: 1px solid rgba(255, 255, 255, 0.2) !important;
font-size: 0.875rem !important;
opacity: 0.9 !important;
}
/* Chat container */
.chat-container {
flex: 1 !important;
display: flex !important;
flex-direction: column !important;
overflow: hidden !important;
background: var(--bg-secondary) !important;
}
/* Chatbot messages area */
#chatbot {
flex: 1 !important;
overflow-y: auto !important;
padding: 1.5rem !important;
background: transparent !important;
}
#chatbot .message-wrap {
display: flex !important;
margin-bottom: 1rem !important;
animation: slideIn 0.3s ease-out !important;
}
#chatbot .user {
justify-content: flex-end !important;
}
#chatbot .bot {
justify-content: flex-start !important;
}
#chatbot .message {
max-width: 70% !important;
word-wrap: break-word !important;
}
#chatbot .user .message {
background: var(--primary-gradient) !important;
color: white !important;
padding: 0.875rem 1.25rem !important;
border-radius: 1.25rem 1.25rem 0.25rem 1.25rem !important;
box-shadow: var(--shadow-md) !important;
font-size: 0.9375rem !important;
line-height: 1.5 !important;
}
#chatbot .bot .message {
background: white !important;
color: var(--text-primary) !important;
padding: 0.875rem 1.25rem !important;
border-radius: 1.25rem 1.25rem 1.25rem 0.25rem !important;
box-shadow: var(--shadow-md) !important;
font-size: 0.9375rem !important;
line-height: 1.6 !important;
border: 1px solid var(--border-color) !important;
}
/* Input section */
.input-section {
padding: 1.5rem !important;
background: white !important;
border-top: 1px solid var(--border-color) !important;
box-shadow: 0 -4px 6px -1px rgba(0, 0, 0, 0.05) !important;
}
.input-row {
display: flex !important;
gap: 1rem !important;
align-items: flex-end !important;
}
/* Text input */
#msg-input textarea {
background: var(--bg-secondary) !important;
border: 2px solid var(--border-color) !important;
border-radius: 1rem !important;
padding: 0.75rem 1rem !important;
font-size: 0.9375rem !important;
font-family: inherit !important;
transition: all 0.2s ease !important;
resize: none !important;
line-height: 1.5 !important;
}
#msg-input textarea:focus {
outline: none !important;
border-color: var(--primary-color) !important;
background: white !important;
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important;
}
/* Buttons */
.custom-button {
background: var(--primary-gradient) !important;
color: white !important;
border: none !important;
border-radius: 1rem !important;
padding: 0.75rem 2rem !important;
font-weight: 600 !important;
font-size: 0.9375rem !important;
cursor: pointer !important;
transition: all 0.2s ease !important;
box-shadow: var(--shadow-md) !important;
min-height: 3rem !important;
display: inline-flex !important;
align-items: center !important;
justify-content: center !important;
}
.custom-button:hover {
transform: translateY(-2px) !important;
box-shadow: var(--shadow-lg) !important;
}
.custom-button:active {
transform: translateY(0) !important;
}
.secondary-button {
background: var(--bg-secondary) !important;
color: var(--text-secondary) !important;
border: 2px solid var(--border-color) !important;
box-shadow: none !important;
}
.secondary-button:hover {
background: white !important;
color: var(--text-primary) !important;
border-color: var(--primary-color) !important;
box-shadow: var(--shadow-sm) !important;
}
/* Examples section */
.examples-container {
padding: 1rem 1.5rem !important;
background: var(--bg-secondary) !important;
border-bottom: 1px solid var(--border-color) !important;
}
.examples-title {
font-size: 0.875rem !important;
font-weight: 600 !important;
color: var(--text-secondary) !important;
margin-bottom: 0.75rem !important;
text-transform: uppercase !important;
letter-spacing: 0.05em !important;
}
.examples-grid {
display: flex !important;
flex-wrap: wrap !important;
gap: 0.5rem !important;
}
.example-button {
background: white !important;
color: var(--text-primary) !important;
border: 1px solid var(--border-color) !important;
border-radius: 0.75rem !important;
padding: 0.5rem 1rem !important;
font-size: 0.875rem !important;
cursor: pointer !important;
transition: all 0.2s ease !important;
white-space: nowrap !important;
}
.example-button:hover {
background: var(--primary-gradient) !important;
color: white !important;
border-color: transparent !important;
transform: translateY(-1px) !important;
box-shadow: var(--shadow-sm) !important;
}
/* Scrollbar styling */
#chatbot::-webkit-scrollbar {
width: 8px !important;
}
#chatbot::-webkit-scrollbar-track {
background: var(--bg-secondary) !important;
border-radius: 4px !important;
}
#chatbot::-webkit-scrollbar-thumb {
background: #cbd5e0 !important;
border-radius: 4px !important;
}
#chatbot::-webkit-scrollbar-thumb:hover {
background: #a0aec0 !important;
}
/* Animations */
@keyframes slideIn {
from {
opacity: 0;
transform: translateY(10px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
/* Hide Gradio footer */
footer {
display: none !important;
}
/* Responsive design */
@media (max-width: 768px) {
.header-content h1 {
font-size: 1.5rem !important;
}
#chatbot .message {
max-width: 85% !important;
}
.custom-button {
padding: 0.75rem 1.5rem !important;
}
.examples-grid {
flex-direction: column !important;
}
.example-button {
width: 100% !important;
text-align: center !important;
}
}
/* Fix container heights */
.gradio-container .contain {
height: 100vh !important;
display: flex !important;
flex-direction: column !important;
}
.gradio-container .contain > div {
flex: 1 !important;
display: flex !important;
flex-direction: column !important;
}
"""
# Example questions
examples = [
"What is David's job market paper about?",
"What makes R3D innovative?",
"Tell me about practical applications",
"What other research has David done?",
"Why is David a strong candidate?"
]
with gr.Blocks(css=custom_css, title="David Van Dijcke - Research Assistant") as demo:
with gr.Column(elem_classes=["main-container"]):
# Header
with gr.Row(elem_classes=["header-section"]):
with gr.Column(elem_classes=["header-content"]):
gr.HTML("""
<h1>🎓 David Van Dijcke - Research Assistant</h1>
<p>Welcome! I'm here to help you learn about David Van Dijcke's innovative econometric research.</p>
<div class="subtitle">
<strong>Job Market Paper:</strong> R3D - Regression Discontinuity Design with Distribution-Valued Outcomes<br>
<em>Note: This session allows up to 15 questions. Refresh to start a new session.</em>
</div>
""")
# Chat area
with gr.Column(elem_classes=["chat-container"]):
# Examples section
with gr.Row(elem_classes=["examples-container"]):
with gr.Column():
gr.HTML('<div class="examples-title">Quick Questions</div>')
with gr.Row(elem_classes=["examples-grid"]):
for example in examples:
example_btn = gr.Button(
example,
elem_classes=["example-button"],
size="sm"
)
# Chatbot
chatbot = gr.Chatbot(
elem_id="chatbot",
bubble_full_width=False,
show_label=False,
height=None,
container=False
)
# Input section
with gr.Row(elem_classes=["input-section"]):
with gr.Row(elem_classes=["input-row"]):
msg = gr.Textbox(
placeholder="Ask about David's research, methods, papers, or academic background...",
show_label=False,
lines=2,
max_lines=4,
scale=4,
elem_id="msg-input",
autofocus=True
)
with gr.Column(scale=1):
with gr.Row():
submit = gr.Button("Send", elem_classes=["custom-button"])
clear = gr.Button("Clear", elem_classes=["secondary-button"])
# Event handlers
def respond(message, chat_history):
return assistant.chat(message, chat_history)
# Submit handlers
msg.submit(respond, [msg, chatbot], [chatbot, msg])
submit.click(respond, [msg, chatbot], [chatbot, msg])
# Clear button
clear.click(lambda: ([], ""), outputs=[chatbot, msg])
# Example buttons
for i, example in enumerate(examples):
example_btn = demo.children[0].children[1].children[0].children[0].children[1].children[i]
example_btn.click(
lambda x=example: (x, []),
outputs=[msg, chatbot]
)
return demo
if __name__ == "__main__":
interface = create_interface()
interface.launch(
server_name="0.0.0.0",
server_port=7860,
show_error=True,
show_api=False
)