Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files
app.py
CHANGED
|
@@ -4,7 +4,9 @@ import json
|
|
| 4 |
import os
|
| 5 |
import requests
|
| 6 |
from pypdf import PdfReader
|
|
|
|
| 7 |
import gradio as gr
|
|
|
|
| 8 |
|
| 9 |
|
| 10 |
load_dotenv(override=True)
|
|
@@ -73,26 +75,93 @@ tools = [{"type": "function", "function": record_user_details_json},
|
|
| 73 |
{"type": "function", "function": record_unknown_question_json}]
|
| 74 |
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
class Me:
|
| 77 |
|
| 78 |
def __init__(self):
|
| 79 |
self.openai = OpenAI()
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
self.name = "Mohit Kumar"
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
|
|
|
| 87 |
with open("me/summary.txt", "r", encoding="utf-8") as f:
|
| 88 |
self.summary = f.read()
|
| 89 |
-
reader = PdfReader("me/mkt_v1_2pg.pdf")
|
| 90 |
-
self.resume = ""
|
| 91 |
-
for page in reader.pages:
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
def handle_tool_call(self, tool_calls):
|
| 98 |
results = []
|
|
@@ -121,10 +190,27 @@ Be professional and engaging, as if talking to a potential client or future empl
|
|
| 121 |
return system_prompt
|
| 122 |
|
| 123 |
def chat(self, message, history):
|
| 124 |
-
messages = [{"role": "system", "content": self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
done = False
|
|
|
|
| 126 |
while not done:
|
| 127 |
-
response = self.openai.chat.completions.create(model="gpt-5-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
if response.choices[0].finish_reason=="tool_calls":
|
| 129 |
message = response.choices[0].message
|
| 130 |
tool_calls = message.tool_calls
|
|
@@ -134,6 +220,16 @@ Be professional and engaging, as if talking to a potential client or future empl
|
|
| 134 |
else:
|
| 135 |
done = True
|
| 136 |
return response.choices[0].message.content
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
|
| 138 |
|
| 139 |
if __name__ == "__main__":
|
|
|
|
| 4 |
import os
|
| 5 |
import requests
|
| 6 |
from pypdf import PdfReader
|
| 7 |
+
from pathlib import Path
|
| 8 |
import gradio as gr
|
| 9 |
+
import time
|
| 10 |
|
| 11 |
|
| 12 |
load_dotenv(override=True)
|
|
|
|
| 75 |
{"type": "function", "function": record_unknown_question_json}]
|
| 76 |
|
| 77 |
|
| 78 |
+
def load_pdf_with_cache(pdf_path: str, cache_dir="me/cache") -> str:
|
| 79 |
+
pdf_path = Path(pdf_path)
|
| 80 |
+
cache_dir = Path(cache_dir)
|
| 81 |
+
cache_dir.mkdir(parents=True, exist_ok=True)
|
| 82 |
+
|
| 83 |
+
cache_file = cache_dir / f"{pdf_path.stem}.txt"
|
| 84 |
+
|
| 85 |
+
# Use cached text if available
|
| 86 |
+
if cache_file.exists():
|
| 87 |
+
with open(cache_file, "r", encoding="utf-8") as f:
|
| 88 |
+
return f.read()
|
| 89 |
+
|
| 90 |
+
# Otherwise parse PDF (slow path)
|
| 91 |
+
reader = PdfReader(str(pdf_path))
|
| 92 |
+
text = ""
|
| 93 |
+
|
| 94 |
+
for page in reader.pages:
|
| 95 |
+
page_text = page.extract_text()
|
| 96 |
+
if page_text:
|
| 97 |
+
text += page_text + "\n"
|
| 98 |
+
|
| 99 |
+
# Save cache for future cold starts
|
| 100 |
+
with open(cache_file, "w", encoding="utf-8") as f:
|
| 101 |
+
f.write(text)
|
| 102 |
+
|
| 103 |
+
return text
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
|
| 107 |
class Me:
|
| 108 |
|
| 109 |
def __init__(self):
|
| 110 |
self.openai = OpenAI()
|
| 111 |
+
# self.gemini = OpenAI(api_key=os.getenv("GOOGLE_API_KEY"), base_url="https://generativelanguage.googleapis.com/v1beta/openai/")
|
| 112 |
+
# self.groq = OpenAI(api_key=os.getenv("GROQ_API_KEY"), base_url="https://api.groq.com/openai/v1")
|
| 113 |
+
# self.groq_model_name = "openai/gpt-oss-120b"
|
| 114 |
+
self.cur_model = 'gpt'
|
| 115 |
self.name = "Mohit Kumar"
|
| 116 |
+
self.linkedin = load_pdf_with_cache("me/linkedin.pdf")
|
| 117 |
+
# reader = PdfReader("me/linkedin.pdf")
|
| 118 |
+
# self.linkedin = ""
|
| 119 |
+
# for page in reader.pages:
|
| 120 |
+
# text = page.extract_text()
|
| 121 |
+
# if text:
|
| 122 |
+
# self.linkedin += text
|
| 123 |
with open("me/summary.txt", "r", encoding="utf-8") as f:
|
| 124 |
self.summary = f.read()
|
| 125 |
+
# reader = PdfReader("me/mkt_v1_2pg.pdf")
|
| 126 |
+
# self.resume = ""
|
| 127 |
+
# for page in reader.pages:
|
| 128 |
+
# text = page.extract_text()
|
| 129 |
+
# if text:
|
| 130 |
+
# self.resume += text
|
| 131 |
+
self.resume = load_pdf_with_cache("me/mkt_v1_2pg.pdf")
|
| 132 |
+
print("Linkedin and resume loaded successfully.", flush=True)
|
| 133 |
+
self._system_prompt = self.system_prompt()
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def stream_llm(self, messages):
|
| 138 |
+
"""
|
| 139 |
+
Tries Groq streaming first, falls back to OpenAI streaming
|
| 140 |
+
"""
|
| 141 |
+
if self.cur_model == 'groq':
|
| 142 |
+
try:
|
| 143 |
+
return self.groq.chat.completions.create(
|
| 144 |
+
model=self.groq_model_name,
|
| 145 |
+
messages=messages,
|
| 146 |
+
# tools=tools,
|
| 147 |
+
stream=True,
|
| 148 |
+
)
|
| 149 |
+
except Exception as e:
|
| 150 |
+
print("Groq streaming failed:", e, flush=True)
|
| 151 |
+
return self.openai.chat.completions.create(
|
| 152 |
+
model="gpt-5-nano",
|
| 153 |
+
messages=messages,
|
| 154 |
+
# tools=tools,
|
| 155 |
+
stream=True,
|
| 156 |
+
)
|
| 157 |
+
else:
|
| 158 |
+
print("self.cur_model changed. Using gpt nano for streaming.", flush=True)
|
| 159 |
+
return self.openai.chat.completions.create(
|
| 160 |
+
model="gpt-5-nano",
|
| 161 |
+
messages=messages,
|
| 162 |
+
# tools=tools,
|
| 163 |
+
stream=True,
|
| 164 |
+
)
|
| 165 |
|
| 166 |
def handle_tool_call(self, tool_calls):
|
| 167 |
results = []
|
|
|
|
| 190 |
return system_prompt
|
| 191 |
|
| 192 |
def chat(self, message, history):
|
| 193 |
+
messages = [{"role": "system", "content": self._system_prompt}] + history + [{"role": "user", "content": message}]
|
| 194 |
+
# if not history:
|
| 195 |
+
# # messages = [{"role": "system", "content": self.system_prompt()}] + history + [{"role": "user", "content": message}]
|
| 196 |
+
# messages = [{"role": "system", "content": self.system_prompt()}]
|
| 197 |
+
# else:
|
| 198 |
+
# messages = []
|
| 199 |
+
# messages += history
|
| 200 |
+
# messages.append({"role": "user", "content": message})
|
| 201 |
done = False
|
| 202 |
+
|
| 203 |
while not done:
|
| 204 |
+
response = self.openai.chat.completions.create(model="gpt-5-nano", messages=messages, tools=tools)
|
| 205 |
+
# try:
|
| 206 |
+
# # response = self.gemini.chat.completions.create(model="gemini-2.5-flash", messages=messages, tools=tools)
|
| 207 |
+
# response = self.groq.chat.completions.create(model=self.groq_model_name, messages=messages, tools=tools)
|
| 208 |
+
# print("Groq successful")
|
| 209 |
+
# except Exception as e:
|
| 210 |
+
# print("Groq failed:", e, flush=True)
|
| 211 |
+
# self.cur_model = 'openai'
|
| 212 |
+
# response = self.openai.chat.completions.create(model="gpt-5-nano", messages=messages, tools=tools)
|
| 213 |
+
# print("GPT 5 nano successful.", flush=True)
|
| 214 |
if response.choices[0].finish_reason=="tool_calls":
|
| 215 |
message = response.choices[0].message
|
| 216 |
tool_calls = message.tool_calls
|
|
|
|
| 220 |
else:
|
| 221 |
done = True
|
| 222 |
return response.choices[0].message.content
|
| 223 |
+
# stream = self.stream_llm(messages)
|
| 224 |
+
|
| 225 |
+
# partial = ""
|
| 226 |
+
# for chunk in stream:
|
| 227 |
+
# delta = chunk.choices[0].delta
|
| 228 |
+
|
| 229 |
+
# if delta and delta.content:
|
| 230 |
+
# partial += delta.content
|
| 231 |
+
# yield partial
|
| 232 |
+
# time.sleep(0.01)
|
| 233 |
|
| 234 |
|
| 235 |
if __name__ == "__main__":
|