Spaces:

mhtkmr
/

professional-clone

Sleeping

App Files Files Community

professional-clone / app.py

mhtkmr

Upload folder using huggingface_hub

79d67ab verified about 1 month ago

raw

history blame contribute delete

9.47 kB

	from dotenv import load_dotenv
	from openai import OpenAI
	import json
	import os
	import requests
	from pypdf import PdfReader
	from pathlib import Path
	import gradio as gr
	import time


	load_dotenv(override=True)

	def push(text):
	requests.post(
	"https://api.pushover.net/1/messages.json",
	data={
	"token": os.getenv("PUSHOVER_TOKEN"),
	"user": os.getenv("PUSHOVER_USER"),
	"message": text,
	}
	)


	def record_user_details(email, name="Name not provided", notes="not provided"):
	push(f"Recording {name} with email {email} and notes {notes}")
	return {"recorded": "ok"}

	def record_unknown_question(question):
	push(f"Recording {question}")
	return {"recorded": "ok"}

	record_user_details_json = {
	"name": "record_user_details",
	"description": "Use this tool to record that a user is interested in being in touch and provided an email address",
	"parameters": {
	"type": "object",
	"properties": {
	"email": {
	"type": "string",
	"description": "The email address of this user"
	},
	"name": {
	"type": "string",
	"description": "The user's name, if they provided it"
	}
	,
	"notes": {
	"type": "string",
	"description": "Any additional information about the conversation that's worth recording to give context"
	}
	},
	"required": ["email"],
	"additionalProperties": False
	}
	}

	record_unknown_question_json = {
	"name": "record_unknown_question",
	"description": "Always use this tool to record any question that couldn't be answered as you didn't know the answer",
	"parameters": {
	"type": "object",
	"properties": {
	"question": {
	"type": "string",
	"description": "The question that couldn't be answered"
	},
	},
	"required": ["question"],
	"additionalProperties": False
	}
	}

	tools = [{"type": "function", "function": record_user_details_json},
	{"type": "function", "function": record_unknown_question_json}]


	def load_pdf_with_cache(pdf_path: str, cache_dir="me/cache") -> str:
	pdf_path = Path(pdf_path)
	cache_dir = Path(cache_dir)
	cache_dir.mkdir(parents=True, exist_ok=True)

	cache_file = cache_dir / f"{pdf_path.stem}.txt"

	# Use cached text if available
	if cache_file.exists():
	with open(cache_file, "r", encoding="utf-8") as f:
	return f.read()

	# Otherwise parse PDF (slow path)
	reader = PdfReader(str(pdf_path))
	text = ""

	for page in reader.pages:
	page_text = page.extract_text()
	if page_text:
	text += page_text + "\n"

	# Save cache for future cold starts
	with open(cache_file, "w", encoding="utf-8") as f:
	f.write(text)

	return text



	class Me:

	def __init__(self):
	self.openai = OpenAI()
	# self.gemini = OpenAI(api_key=os.getenv("GOOGLE_API_KEY"), base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
	# self.groq = OpenAI(api_key=os.getenv("GROQ_API_KEY"), base_url="https://api.groq.com/openai/v1")
	# self.groq_model_name = "openai/gpt-oss-120b"
	self.cur_model = 'gpt'
	self.name = "Mohit Kumar"
	self.linkedin = load_pdf_with_cache("me/linkedin.pdf")
	# reader = PdfReader("me/linkedin.pdf")
	# self.linkedin = ""
	# for page in reader.pages:
	# text = page.extract_text()
	# if text:
	# self.linkedin += text
	with open("me/summary.txt", "r", encoding="utf-8") as f:
	self.summary = f.read()
	# reader = PdfReader("me/mkt_v1_2pg.pdf")
	# self.resume = ""
	# for page in reader.pages:
	# text = page.extract_text()
	# if text:
	# self.resume += text
	self.resume = load_pdf_with_cache("me/mkt_v1_2pg.pdf")
	print("Linkedin and resume loaded successfully.", flush=True)
	self._system_prompt = self.system_prompt()



	def stream_llm(self, messages):
	"""
	Tries Groq streaming first, falls back to OpenAI streaming
	"""
	if self.cur_model == 'groq':
	try:
	return self.groq.chat.completions.create(
	model=self.groq_model_name,
	messages=messages,
	# tools=tools,
	stream=True,
	)
	except Exception as e:
	print("Groq streaming failed:", e, flush=True)
	return self.openai.chat.completions.create(
	model="gpt-5-nano",
	messages=messages,
	# tools=tools,
	stream=True,
	)
	else:
	print("self.cur_model changed. Using gpt nano for streaming.", flush=True)
	return self.openai.chat.completions.create(
	model="gpt-5-nano",
	messages=messages,
	# tools=tools,
	stream=True,
	)

	def handle_tool_call(self, tool_calls):
	results = []
	for tool_call in tool_calls:
	tool_name = tool_call.function.name
	arguments = json.loads(tool_call.function.arguments)
	print(f"Tool called: {tool_name}", flush=True)
	tool = globals().get(tool_name)
	result = tool(**arguments) if tool else {}
	results.append({"role": "tool","content": json.dumps(result),"tool_call_id": tool_call.id})
	return results

	def system_prompt(self):
	system_prompt = f"You are acting as {self.name}. You are answering questions on {self.name}'s website, \
	particularly questions related to {self.name}'s career, background, skills and experience. \
	Your responsibility is to represent {self.name} for interactions on the website as faithfully as possible. \
	You are given a summary of {self.name}'s background and LinkedIn profile which you can use to answer questions. \
	Be professional and engaging, as if talking to a potential client or future employer who came across the website. \
	If you don't know the answer to any question, use your record_unknown_question tool to record the question that you couldn't answer, even if it's about something trivial or unrelated to career. \
	If the user is engaging in discussion, try to steer them towards getting in touch via email; ask for their email and record it using your record_user_details tool. \
	Do not be too pushy about getting in touch via email. my email id is strictly mohit.in@outlook.com, do not use any other email id. You can provide my linkedin profile url as a contact option along with my email id. \
	Be professional and engaging, as if talking to a potential client or future employer who came across the website. Answer in a concise and to the point manner."

	system_prompt += f"\n\n## Summary:\n{self.summary}\n\n## LinkedIn Profile:\n{self.linkedin}\n\n## Resume:\n{self.resume}\n\n"
	system_prompt += f"With this context, please chat with the user, always staying in character as {self.name}."
	return system_prompt

	def chat(self, message, history):
	messages = [{"role": "system", "content": self._system_prompt}] + history + [{"role": "user", "content": message}]
	# if not history:
	# # messages = [{"role": "system", "content": self.system_prompt()}] + history + [{"role": "user", "content": message}]
	# messages = [{"role": "system", "content": self.system_prompt()}]
	# else:
	# messages = []
	# messages += history
	# messages.append({"role": "user", "content": message})
	done = False

	while not done:
	response = self.openai.chat.completions.create(model="gpt-5-nano", messages=messages, tools=tools)
	# try:
	# # response = self.gemini.chat.completions.create(model="gemini-2.5-flash", messages=messages, tools=tools)
	# response = self.groq.chat.completions.create(model=self.groq_model_name, messages=messages, tools=tools)
	# print("Groq successful")
	# except Exception as e:
	# print("Groq failed:", e, flush=True)
	# self.cur_model = 'openai'
	# response = self.openai.chat.completions.create(model="gpt-5-nano", messages=messages, tools=tools)
	# print("GPT 5 nano successful.", flush=True)
	if response.choices[0].finish_reason=="tool_calls":
	message = response.choices[0].message
	tool_calls = message.tool_calls
	results = self.handle_tool_call(tool_calls)
	messages.append(message)
	messages.extend(results)
	else:
	done = True
	return response.choices[0].message.content
	# stream = self.stream_llm(messages)

	# partial = ""
	# for chunk in stream:
	# delta = chunk.choices[0].delta

	# if delta and delta.content:
	# partial += delta.content
	# yield partial
	# time.sleep(0.01)


	if __name__ == "__main__":
	me = Me()
	gr.ChatInterface(me.chat, type="messages").launch()