Ayush239's picture
Upload folder using huggingface_hub
00eef43 verified
import sys
import json
from openai import OpenAI
import gradio as gr
from typing import Dict, List
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent))
from helpers import load_all_documents, PushoverNotifier, get_config
from rag_system import RAGSystem
from evaluation import RAGEvaluator
class DigitalTwin:
def __init__(self):
self.config = get_config()
self.openai = OpenAI(api_key=self.config["openai_api_key"])
self.name = self.config["name"]
self.notifier = PushoverNotifier(self.config["pushover_user"], self.config["pushover_token"])
self.email_collected = False
self.user_email = None
self.user_name = None
print("Loading knowledge base...")
app_dir = Path(__file__).parent
self.documents = load_all_documents(str(app_dir / "me"))
if not self.documents:
raise ValueError("No documents loaded! Please add content to the me/ directory.")
if self.config["rag_enabled"]:
print("Initializing RAG system...")
data_dir = str(app_dir / "data")
self.rag_system = RAGSystem(self.openai, data_dir=data_dir)
self.rag_system.load_knowledge_base(
self.documents,
chunk_size=self.config["chunk_size"],
overlap=self.config["chunk_overlap"]
)
print("RAG system ready!")
else:
self.rag_system = None
self.evaluator = RAGEvaluator(self.openai)
self.tools = [
{
"type": "function",
"function": {
"name": "record_user_details",
"description": "Record user contact information. IMPORTANT: You must ask for their name if they haven't provided it yet. Only call this tool after you have collected both email and name.",
"parameters": {
"type": "object",
"properties": {
"email": {"type": "string", "description": "The email address of this user"},
"name": {"type": "string", "description": "The user's full name"},
"notes": {"type": "string", "description": "A brief 1-line summary of what the user was asking about or interested in"}
},
"required": ["email", "name", "notes"],
"additionalProperties": False
}
}
},
{
"type": "function",
"function": {
"name": "record_unknown_question",
"description": "Always use this tool to record any question that couldn't be answered",
"parameters": {
"type": "object",
"properties": {
"question": {"type": "string", "description": "The question that couldn't be answered"}
},
"required": ["question"],
"additionalProperties": False
}
}
},
{
"type": "function",
"function": {
"name": "search_knowledge_base",
"description": "Search the knowledge base for specific information",
"parameters": {
"type": "object",
"properties": {
"query": {"type": "string", "description": "The search query"},
"focus_area": {"type": "string", "description": "Optional: specific area to focus on"}
},
"required": ["query"],
"additionalProperties": False
}
}
}
]
def record_user_details(self, email: str, name: str, notes: str) -> Dict:
self.email_collected = True
self.user_email = email
self.user_name = name
self.notifier.send(f"New Contact: {name} <{email}>\nInterest: {notes}")
return {"recorded": "ok", "message": f"Perfect! Thanks {name}. I'll be in touch soon."}
def record_unknown_question(self, question: str) -> Dict:
self.notifier.send(f"Unanswered: {question}")
return {"recorded": "ok", "message": "I'll make a note of that question."}
def search_knowledge_base(self, query: str, focus_area: str = None) -> Dict:
if not self.rag_system:
return {"success": False, "message": "RAG system not available"}
enhanced_query = f"{focus_area}: {query}" if focus_area else query
context = self.rag_system.retriever.retrieve(
enhanced_query,
method=self.config["rag_method"],
top_k=self.config["top_k"],
expand_query=self.config["query_expansion"],
query_expander=self.rag_system.query_expander if self.config["query_expansion"] else None
)
results = [{"source": doc["source"], "text": doc["text"][:300] + "...", "score": doc["retrieval_score"]} for doc in context]
return {"success": True, "results": results, "message": f"Found {len(results)} relevant pieces"}
def handle_tool_calls(self, tool_calls) -> List[Dict]:
results = []
for tool_call in tool_calls:
tool_name = tool_call.function.name
arguments = json.loads(tool_call.function.arguments)
print(f"[TOOL] Tool called: {tool_name}", flush=True)
tool_func = getattr(self, tool_name, None)
result = tool_func(**arguments) if tool_func else {"error": f"Unknown tool: {tool_name}"}
results.append({
"role": "tool",
"content": json.dumps(result),
"tool_call_id": tool_call.id
})
return results
def get_system_prompt(self, rag_context: List[Dict] = None) -> str:
prompt = f"""You are acting as {self.name}. You are answering questions on {self.name}'s website, particularly questions related to {self.name}'s career, background, skills and experience.
Your responsibility is to represent {self.name} for interactions on the website as faithfully as possible.
Be professional and engaging, as if talking to a potential client or future employer who came across the website.
"""
if rag_context:
prompt += "\n## Retrieved Information:\n"
for doc in rag_context:
prompt += f"\n[{doc['source']}]:\n{doc['text']}\n"
else:
all_context = "\n\n".join([f"## {k.title()}:\n{v}" for k, v in self.documents.items()])
prompt += f"\n{all_context}\n"
prompt += f"""
## Important Instructions:
- If you don't know the answer to any question, use your record_unknown_question tool
- If you need more specific information, use your search_knowledge_base tool
"""
if not self.email_collected:
prompt += """- If the user is engaging positively, naturally steer towards getting in touch
- Ask for BOTH their name and email address (ask for name first if they only provide email)
- When using record_user_details tool, include a 1-line summary of what they were interested in
- Only call the tool after you have collected both name and email
"""
else:
prompt += f"""- You have already collected contact from {self.user_name or 'this user'} ({self.user_email})
- Continue naturally without repeatedly asking for contact details
"""
prompt += f"\n\nWith this context, please chat with the user, always staying in character as {self.name}."
return prompt
def chat(self, message: str, history: List) -> str:
converted_history = []
for h in history:
if isinstance(h, (list, tuple)) and len(h) == 2:
user_msg, bot_msg = h
if user_msg:
converted_history.append({"role": "user", "content": user_msg})
if bot_msg:
converted_history.append({"role": "assistant", "content": bot_msg})
elif isinstance(h, dict):
converted_history.append({k: v for k, v in h.items() if k in ["role", "content"]})
history = converted_history
use_rag = self.config["rag_enabled"] and self.rag_system
rag_context = None
if use_rag:
query_check = self.openai.chat.completions.create(
model="gpt-4o-mini",
messages=[{"role": "user", "content": f"Is this query asking for specific information about someone's background, experience, or skills? Answer only 'yes' or 'no'.\n\nQuery: {message}"}],
temperature=0
)
should_retrieve = query_check.choices[0].message.content.strip().lower() == "yes"
if should_retrieve:
print("[RAG] Using RAG for this query")
rag_context = self.rag_system.retriever.retrieve(
message,
method=self.config["rag_method"],
top_k=self.config["top_k"],
expand_query=self.config["query_expansion"],
query_expander=self.rag_system.query_expander if self.config["query_expansion"] else None
)
system_prompt = self.get_system_prompt(rag_context)
messages = [{"role": "system", "content": system_prompt}] + history + [{"role": "user", "content": message}]
done = False
max_iterations = 5
iteration = 0
while not done and iteration < max_iterations:
iteration += 1
response = self.openai.chat.completions.create(model="gpt-4o-mini", messages=messages, tools=self.tools, temperature=0.7)
finish_reason = response.choices[0].finish_reason
if finish_reason == "tool_calls":
message_obj = response.choices[0].message
tool_calls = message_obj.tool_calls
results = self.handle_tool_calls(tool_calls)
messages.append(message_obj)
messages.extend(results)
else:
done = True
return response.choices[0].message.content
return response.choices[0].message.content
print("Initializing Digital Twin...")
twin = DigitalTwin()
print("Digital Twin ready!")
def chat_wrapper(message, history):
return twin.chat(message, history)
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="slate"), css="#chatbot {height: 600px;} .contain {max-width: 900px; margin: auto;}") as demo:
gr.Markdown(f"""# Chat with {twin.name}
Welcome! I'm an AI assistant representing {twin.name}. Ask me anything about background, experience, skills, or interests.
Features: Advanced RAG - Context-aware - Smart contact collection - Real-time notifications""")
chatbot = gr.ChatInterface(
chat_wrapper,
chatbot=gr.Chatbot(elem_id="chatbot"),
textbox=gr.Textbox(placeholder=f"Ask me about {twin.name}'s experience, skills, or background...", container=False, scale=7),
title=None,
description=None
)
gr.Markdown(f"""---
Powered by Advanced RAG - OpenAI GPT-4 - Hybrid Search and Reranking
RAG Configuration: {twin.config['rag_method'].upper()} - Top {twin.config['top_k']} docs - Query expansion: {'ON' if twin.config['query_expansion'] else 'OFF'}""")
if __name__ == "__main__":
demo.launch(share=False, server_name="0.0.0.0", server_port=7867)