mhtkmr's picture
Upload folder using huggingface_hub
79d67ab verified
from dotenv import load_dotenv
from openai import OpenAI
import json
import os
import requests
from pypdf import PdfReader
from pathlib import Path
import gradio as gr
import time
load_dotenv(override=True)
def push(text):
requests.post(
"https://api.pushover.net/1/messages.json",
data={
"token": os.getenv("PUSHOVER_TOKEN"),
"user": os.getenv("PUSHOVER_USER"),
"message": text,
}
)
def record_user_details(email, name="Name not provided", notes="not provided"):
push(f"Recording {name} with email {email} and notes {notes}")
return {"recorded": "ok"}
def record_unknown_question(question):
push(f"Recording {question}")
return {"recorded": "ok"}
record_user_details_json = {
"name": "record_user_details",
"description": "Use this tool to record that a user is interested in being in touch and provided an email address",
"parameters": {
"type": "object",
"properties": {
"email": {
"type": "string",
"description": "The email address of this user"
},
"name": {
"type": "string",
"description": "The user's name, if they provided it"
}
,
"notes": {
"type": "string",
"description": "Any additional information about the conversation that's worth recording to give context"
}
},
"required": ["email"],
"additionalProperties": False
}
}
record_unknown_question_json = {
"name": "record_unknown_question",
"description": "Always use this tool to record any question that couldn't be answered as you didn't know the answer",
"parameters": {
"type": "object",
"properties": {
"question": {
"type": "string",
"description": "The question that couldn't be answered"
},
},
"required": ["question"],
"additionalProperties": False
}
}
tools = [{"type": "function", "function": record_user_details_json},
{"type": "function", "function": record_unknown_question_json}]
def load_pdf_with_cache(pdf_path: str, cache_dir="me/cache") -> str:
pdf_path = Path(pdf_path)
cache_dir = Path(cache_dir)
cache_dir.mkdir(parents=True, exist_ok=True)
cache_file = cache_dir / f"{pdf_path.stem}.txt"
# Use cached text if available
if cache_file.exists():
with open(cache_file, "r", encoding="utf-8") as f:
return f.read()
# Otherwise parse PDF (slow path)
reader = PdfReader(str(pdf_path))
text = ""
for page in reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text + "\n"
# Save cache for future cold starts
with open(cache_file, "w", encoding="utf-8") as f:
f.write(text)
return text
class Me:
def __init__(self):
self.openai = OpenAI()
# self.gemini = OpenAI(api_key=os.getenv("GOOGLE_API_KEY"), base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
# self.groq = OpenAI(api_key=os.getenv("GROQ_API_KEY"), base_url="https://api.groq.com/openai/v1")
# self.groq_model_name = "openai/gpt-oss-120b"
self.cur_model = 'gpt'
self.name = "Mohit Kumar"
self.linkedin = load_pdf_with_cache("me/linkedin.pdf")
# reader = PdfReader("me/linkedin.pdf")
# self.linkedin = ""
# for page in reader.pages:
# text = page.extract_text()
# if text:
# self.linkedin += text
with open("me/summary.txt", "r", encoding="utf-8") as f:
self.summary = f.read()
# reader = PdfReader("me/mkt_v1_2pg.pdf")
# self.resume = ""
# for page in reader.pages:
# text = page.extract_text()
# if text:
# self.resume += text
self.resume = load_pdf_with_cache("me/mkt_v1_2pg.pdf")
print("Linkedin and resume loaded successfully.", flush=True)
self._system_prompt = self.system_prompt()
def stream_llm(self, messages):
"""
Tries Groq streaming first, falls back to OpenAI streaming
"""
if self.cur_model == 'groq':
try:
return self.groq.chat.completions.create(
model=self.groq_model_name,
messages=messages,
# tools=tools,
stream=True,
)
except Exception as e:
print("Groq streaming failed:", e, flush=True)
return self.openai.chat.completions.create(
model="gpt-5-nano",
messages=messages,
# tools=tools,
stream=True,
)
else:
print("self.cur_model changed. Using gpt nano for streaming.", flush=True)
return self.openai.chat.completions.create(
model="gpt-5-nano",
messages=messages,
# tools=tools,
stream=True,
)
def handle_tool_call(self, tool_calls):
results = []
for tool_call in tool_calls:
tool_name = tool_call.function.name
arguments = json.loads(tool_call.function.arguments)
print(f"Tool called: {tool_name}", flush=True)
tool = globals().get(tool_name)
result = tool(**arguments) if tool else {}
results.append({"role": "tool","content": json.dumps(result),"tool_call_id": tool_call.id})
return results
def system_prompt(self):
system_prompt = f"You are acting as {self.name}. You are answering questions on {self.name}'s website, \
particularly questions related to {self.name}'s career, background, skills and experience. \
Your responsibility is to represent {self.name} for interactions on the website as faithfully as possible. \
You are given a summary of {self.name}'s background and LinkedIn profile which you can use to answer questions. \
Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \
If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. \
Do not be too pushy about getting in touch via email. my email id is strictly mohit.in@outlook.com, do not use any other email id. You can provide my linkedin profile url as a contact option along with my email id. \
Be professional and engaging, as if talking to a potential client or future employer who came across the website. Answer in a concise and to the point manner."
system_prompt += f"\n\n## Summary:\n{self.summary}\n\n## LinkedIn Profile:\n{self.linkedin}\n\n## Resume:\n{self.resume}\n\n"
system_prompt += f"With this context, please chat with the user, always staying in character as {self.name}."
return system_prompt
def chat(self, message, history):
messages = [{"role": "system", "content": self._system_prompt}] + history + [{"role": "user", "content": message}]
# if not history:
# # messages = [{"role": "system", "content": self.system_prompt()}] + history + [{"role": "user", "content": message}]
# messages = [{"role": "system", "content": self.system_prompt()}]
# else:
# messages = []
# messages += history
# messages.append({"role": "user", "content": message})
done = False
while not done:
response = self.openai.chat.completions.create(model="gpt-5-nano", messages=messages, tools=tools)
# try:
# # response = self.gemini.chat.completions.create(model="gemini-2.5-flash", messages=messages, tools=tools)
# response = self.groq.chat.completions.create(model=self.groq_model_name, messages=messages, tools=tools)
# print("Groq successful")
# except Exception as e:
# print("Groq failed:", e, flush=True)
# self.cur_model = 'openai'
# response = self.openai.chat.completions.create(model="gpt-5-nano", messages=messages, tools=tools)
# print("GPT 5 nano successful.", flush=True)
if response.choices[0].finish_reason=="tool_calls":
message = response.choices[0].message
tool_calls = message.tool_calls
results = self.handle_tool_call(tool_calls)
messages.append(message)
messages.extend(results)
else:
done = True
return response.choices[0].message.content
# stream = self.stream_llm(messages)
# partial = ""
# for chunk in stream:
# delta = chunk.choices[0].delta
# if delta and delta.content:
# partial += delta.content
# yield partial
# time.sleep(0.01)
if __name__ == "__main__":
me = Me()
gr.ChatInterface(me.chat, type="messages").launch()