Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,766 +1,177 @@
|
|
| 1 |
-
# edunatives_full.py
|
| 2 |
-
from __future__ import annotations
|
| 3 |
-
import os
|
| 4 |
-
import re
|
| 5 |
-
import uuid
|
| 6 |
import json
|
| 7 |
-
import
|
| 8 |
-
import fitz
|
| 9 |
import docx
|
| 10 |
-
import
|
| 11 |
-
from datetime import datetime, timezone
|
| 12 |
-
from typing import List, Dict, Any, Optional, Tuple
|
| 13 |
-
from dataclasses import dataclass
|
| 14 |
-
|
| 15 |
import gradio as gr
|
| 16 |
from openai import OpenAI
|
| 17 |
-
import
|
| 18 |
-
from weaviate.
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
# -------------------- Configuration (edit these or set env vars) --------------------
|
| 22 |
-
MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
|
| 23 |
-
DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY", "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa")
|
| 24 |
-
BASE_URL = os.getenv("BASE_URL", "https://api.deepinfra.com/v1/openai")
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
# -------------------- Clients --------------------
|
| 33 |
-
llm_client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
|
| 34 |
-
|
| 35 |
-
weaviate_client = weaviate.Client(
|
| 36 |
url=WEAVIATE_URL,
|
| 37 |
-
auth_client_secret=
|
| 38 |
)
|
|
|
|
| 39 |
|
| 40 |
-
#
|
| 41 |
-
ARABIC_RANGE = (
|
| 42 |
-
(0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
|
| 43 |
-
(0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
|
| 44 |
-
)
|
| 45 |
-
def is_arabic(text: str) -> bool:
|
| 46 |
-
for ch in text or "":
|
| 47 |
-
code = ord(ch)
|
| 48 |
-
for a, b in ARABIC_RANGE:
|
| 49 |
-
if a <= code <= b:
|
| 50 |
-
return True
|
| 51 |
-
return False
|
| 52 |
-
|
| 53 |
-
def get_rfc3339_time() -> str:
|
| 54 |
-
return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
| 55 |
-
|
| 56 |
-
# -------------------- Simple KB --------------------
|
| 57 |
-
KB: Dict[str, Dict[str, str]] = {
|
| 58 |
-
"student_registration": {
|
| 59 |
-
"en": (
|
| 60 |
-
"**How to register / create an account (Student)**\n\n"
|
| 61 |
-
"1. Go to the EduNatives site and choose Sign Up.\n"
|
| 62 |
-
"2. Use your university email if possible and verify it.\n"
|
| 63 |
-
"3. Complete your profile (major, skills, interests).\n"
|
| 64 |
-
"4. Enable notifications for internships/scholarships."
|
| 65 |
-
),
|
| 66 |
-
"ar": (
|
| 67 |
-
"**طريقة التسجيل وإنشاء حساب (طلاب)**\n\n"
|
| 68 |
-
"١. اذهب إلى موقع EduNatives واختر Sign Up.\n"
|
| 69 |
-
"٢. يفضل استخدام إيميل الجامعة وتأكيده.\n"
|
| 70 |
-
"٣. أكمل ملفك الشخصي (التخصص، المهارات، الاهتمامات).\n"
|
| 71 |
-
"٤. فعّل التنبيهات لفرص التدريب والمنح."
|
| 72 |
-
),
|
| 73 |
-
},
|
| 74 |
-
"student_internships": {
|
| 75 |
-
"en": (
|
| 76 |
-
"**Finding internships & scholarships**\n\n"
|
| 77 |
-
"- Use the search filters: field, location, duration, paid/unpaid.\n"
|
| 78 |
-
"- Follow companies and set up alerts for new opportunities.\n"
|
| 79 |
-
"- Keep your profile and resume updated."
|
| 80 |
-
),
|
| 81 |
-
"ar": (
|
| 82 |
-
"**كيفية العثور على تدريب أو منحة**\n\n"
|
| 83 |
-
"- استخدم فلاتر البحث: التخصص، المكان، المدة، مدفوع/غير مدفوع.\n"
|
| 84 |
-
"- تابع الشركات وفعّل التنبيهات للفرص الجديدة.\n"
|
| 85 |
-
"- حافظ على تحديث ملفك الشخصي وسيرتك الذاتية."
|
| 86 |
-
),
|
| 87 |
-
},
|
| 88 |
-
}
|
| 89 |
-
|
| 90 |
-
# keys to detect intents (simple)
|
| 91 |
-
KEYS = {
|
| 92 |
-
"student_registration": ["register", "sign up", "signup", "create account", "account", "تسجيل", "انشاء", "إنشاء", "حساب"],
|
| 93 |
-
"student_internships": ["intern", "internship", "training", "scholar", "scholarship", "grant", "opportunity", "تدريب", "منحة", "فرصة"],
|
| 94 |
-
"Job": ["job", "وظيفة", "وظائف", "وظايف"],
|
| 95 |
-
"Application": ["apply", "application", "cover letter", "تقديم", "طلب"],
|
| 96 |
-
"Memory": ["memory", "conversation history", "ذاكرة"],
|
| 97 |
-
"Opportunities": ["opportunity", "فرص", "opportunities"],
|
| 98 |
-
"Project": ["project", "مشروع"],
|
| 99 |
-
"Team": ["team", "فريق"]
|
| 100 |
-
}
|
| 101 |
-
|
| 102 |
-
@dataclass
|
| 103 |
-
class Route:
|
| 104 |
-
audience: str
|
| 105 |
-
intent: str
|
| 106 |
-
language: str
|
| 107 |
-
|
| 108 |
-
def route_intent(text: str, forced_audience: Optional[str]=None) -> Route:
|
| 109 |
-
lang = "ar" if is_arabic(text) else "en"
|
| 110 |
-
match_label = None
|
| 111 |
-
text_l = (text or "").lower()
|
| 112 |
-
for label, kws in KEYS.items():
|
| 113 |
-
for kw in kws:
|
| 114 |
-
if kw in text_l:
|
| 115 |
-
match_label = label
|
| 116 |
-
break
|
| 117 |
-
if match_label:
|
| 118 |
-
break
|
| 119 |
-
audience = forced_audience if forced_audience else "general"
|
| 120 |
-
intent = match_label if match_label else "general"
|
| 121 |
-
return Route(audience=audience, intent=intent, language=lang)
|
| 122 |
-
|
| 123 |
-
# -------------------- Skill extraction (simple regex baseline) --------------------
|
| 124 |
-
_SKILL_REGEX = re.compile(
|
| 125 |
-
r"\b(Python|Machine Learning|Deep Learning|NLP|Data Science|SQL|Docker|Kubernetes|React|JavaScript|Java|C\+\+|C#|TensorFlow|PyTorch|Pandas|NumPy|Tableau|Excel)\b",
|
| 126 |
-
re.IGNORECASE
|
| 127 |
-
)
|
| 128 |
-
|
| 129 |
-
def extract_skills_from_text(cv_text: str) -> List[str]:
|
| 130 |
-
skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(cv_text or "")})
|
| 131 |
-
return [s.capitalize() for s in skills]
|
| 132 |
-
|
| 133 |
-
# -------------------- File processing --------------------
|
| 134 |
-
def process_uploaded_file(file_obj: Any) -> dict | None:
|
| 135 |
-
"""
|
| 136 |
-
Accepts a Gradio file-like object (file_obj). Returns a dict:
|
| 137 |
-
{"content": str, "skills": [...], "filename": "..."} or {"error": "..."}
|
| 138 |
-
"""
|
| 139 |
-
if not file_obj:
|
| 140 |
-
return None
|
| 141 |
-
# Gradio file object typically has a 'name' attribute with path
|
| 142 |
-
file_path = getattr(file_obj, "name", None)
|
| 143 |
-
if not file_path or not os.path.exists(file_path):
|
| 144 |
-
# sometimes file_obj is a dict with 'name' key
|
| 145 |
-
try:
|
| 146 |
-
file_path = file_obj["name"]
|
| 147 |
-
except Exception:
|
| 148 |
-
return {"error": "Uploaded file not accessible."}
|
| 149 |
-
|
| 150 |
-
filename = os.path.basename(file_path)
|
| 151 |
-
text_content = ""
|
| 152 |
-
try:
|
| 153 |
-
if filename.lower().endswith(".pdf"):
|
| 154 |
-
with fitz.open(file_path) as doc:
|
| 155 |
-
for page in doc:
|
| 156 |
-
text_content += page.get_text()
|
| 157 |
-
elif filename.lower().endswith(".docx"):
|
| 158 |
-
docp = docx.Document(file_path)
|
| 159 |
-
for p in docp.paragraphs:
|
| 160 |
-
text_content += p.text + "\n"
|
| 161 |
-
elif filename.lower().endswith(".txt"):
|
| 162 |
-
with open(file_path, "r", encoding="utf-8") as f:
|
| 163 |
-
text_content = f.read()
|
| 164 |
-
else:
|
| 165 |
-
return {"error": f"Unsupported file type: {filename}"}
|
| 166 |
-
|
| 167 |
-
skills = extract_skills_from_text(text_content)
|
| 168 |
-
return {"content": text_content.strip(), "skills": skills, "filename": filename}
|
| 169 |
-
except Exception as e:
|
| 170 |
-
return {"error": f"Error processing file {filename}: {e}"}
|
| 171 |
-
|
| 172 |
-
# -------------------- Weaviate schema helpers --------------------
|
| 173 |
-
def class_exists(class_name: str) -> bool:
|
| 174 |
-
schema = weaviate_client.schema.get()
|
| 175 |
-
classes = schema.get("classes", []) if isinstance(schema, dict) else []
|
| 176 |
-
for c in classes:
|
| 177 |
-
if c.get("class") == class_name:
|
| 178 |
-
return True
|
| 179 |
-
return False
|
| 180 |
-
|
| 181 |
def ensure_collections():
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
{"name": "description", "dataType": ["text"]},
|
| 194 |
-
{"name": "skills", "dataType": ["string[]"]},
|
| 195 |
-
{"name": "salaryDetails", "dataType": ["text"]},
|
| 196 |
-
{"name": "workplaceType", "dataType": ["text"]},
|
| 197 |
-
],
|
| 198 |
-
}
|
| 199 |
-
weaviate_client.schema.create_class(job_class)
|
| 200 |
-
|
| 201 |
-
if not class_exists("Application"):
|
| 202 |
-
app_class = {
|
| 203 |
-
"class": "Application",
|
| 204 |
-
"properties": [
|
| 205 |
-
{"name": "applicationId", "dataType": ["string"]},
|
| 206 |
-
{"name": "jobId", "dataType": ["string"]},
|
| 207 |
-
{"name": "applicantName", "dataType": ["text"]},
|
| 208 |
-
{"name": "applicantEmail", "dataType": ["text"]},
|
| 209 |
-
{"name": "coverLetter", "dataType": ["text"]},
|
| 210 |
-
{"name": "cvText", "dataType": ["text"]},
|
| 211 |
-
{"name": "skills", "dataType": ["string[]"]},
|
| 212 |
-
{"name": "createdAt", "dataType": ["date"]},
|
| 213 |
-
],
|
| 214 |
-
}
|
| 215 |
-
weaviate_client.schema.create_class(app_class)
|
| 216 |
-
|
| 217 |
-
if not class_exists("Memory"):
|
| 218 |
-
mem_class = {
|
| 219 |
-
"class": "Memory",
|
| 220 |
-
"properties": [
|
| 221 |
-
{"name": "memoryId", "dataType": ["string"]},
|
| 222 |
-
{"name": "sessionId", "dataType": ["string"]},
|
| 223 |
-
{"name": "text", "dataType": ["text"]},
|
| 224 |
-
{"name": "createdAt", "dataType": ["date"]},
|
| 225 |
-
],
|
| 226 |
-
}
|
| 227 |
-
weaviate_client.schema.create_class(mem_class)
|
| 228 |
|
| 229 |
-
if not class_exists("Opportunities"):
|
| 230 |
-
opp_class = {
|
| 231 |
-
"class": "Opportunities",
|
| 232 |
-
"properties": [
|
| 233 |
-
{"name": "oppId", "dataType": ["string"]},
|
| 234 |
-
{"name": "title", "dataType": ["text"]},
|
| 235 |
-
{"name": "description", "dataType": ["text"]},
|
| 236 |
-
{"name": "skills", "dataType": ["string[]"]},
|
| 237 |
-
],
|
| 238 |
-
}
|
| 239 |
-
weaviate_client.schema.create_class(opp_class)
|
| 240 |
-
|
| 241 |
-
if not class_exists("Project"):
|
| 242 |
-
proj_class = {
|
| 243 |
-
"class": "Project",
|
| 244 |
-
"properties": [
|
| 245 |
-
{"name": "projectId", "dataType": ["string"]},
|
| 246 |
-
{"name": "title", "dataType": ["text"]},
|
| 247 |
-
{"name": "description", "dataType": ["text"]},
|
| 248 |
-
{"name": "skills", "dataType": ["string[]"]},
|
| 249 |
-
],
|
| 250 |
-
}
|
| 251 |
-
weaviate_client.schema.create_class(proj_class)
|
| 252 |
-
|
| 253 |
-
if not class_exists("Team"):
|
| 254 |
-
team_class = {
|
| 255 |
-
"class": "Team",
|
| 256 |
-
"properties": [
|
| 257 |
-
{"name": "teamId", "dataType": ["string"]},
|
| 258 |
-
{"name": "name", "dataType": ["text"]},
|
| 259 |
-
{"name": "projectId", "dataType": ["string"]},
|
| 260 |
-
{"name": "members", "dataType": ["string[]"]},
|
| 261 |
-
{"name": "skills", "dataType": ["string[]"]},
|
| 262 |
-
{"name": "creatorId", "dataType": ["string"]},
|
| 263 |
-
{"name": "createdAt", "dataType": ["date"]},
|
| 264 |
-
{"name": "idea", "dataType": ["text"]},
|
| 265 |
-
],
|
| 266 |
-
}
|
| 267 |
-
weaviate_client.schema.create_class(team_class)
|
| 268 |
-
|
| 269 |
-
# ensure schema exists
|
| 270 |
ensure_collections()
|
| 271 |
|
| 272 |
-
#
|
| 273 |
-
def
|
| 274 |
-
""
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 296 |
|
| 297 |
-
#
|
| 298 |
-
def
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
"You are an expert assistant. Use ONLY the Retrieved Data above to answer the question, "
|
| 309 |
-
"summarize, and include 'Next Steps' for the user."
|
| 310 |
)
|
|
|
|
| 311 |
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
try:
|
| 318 |
-
resp = llm_client.chat.completions.create(
|
| 319 |
-
model=MODEL_NAME,
|
| 320 |
-
messages=[
|
| 321 |
-
{"role": "system", "content": "You are EduNatives Assistant. Be concise and practical."},
|
| 322 |
-
{"role": "user", "content": prompt}
|
| 323 |
-
],
|
| 324 |
-
temperature=0.2,
|
| 325 |
-
max_tokens=1200,
|
| 326 |
-
)
|
| 327 |
-
answer = resp.choices[0].message.content or ""
|
| 328 |
-
except Exception as e:
|
| 329 |
-
print("[RAG LLM Error]", e)
|
| 330 |
-
answer = ""
|
| 331 |
-
return answer, retrieved
|
| 332 |
-
|
| 333 |
-
# -------------------- Embeddings & Recommendations --------------------
|
| 334 |
-
def compute_embedding(text: str) -> List[float]:
|
| 335 |
-
try:
|
| 336 |
-
resp = llm_client.embeddings.create(
|
| 337 |
-
model="Qwen/Qwen3-Embedding-8B",
|
| 338 |
-
input=text,
|
| 339 |
-
encoding_format="float"
|
| 340 |
-
)
|
| 341 |
-
if isinstance(resp, dict):
|
| 342 |
-
data = resp.get("data", [])
|
| 343 |
-
if data and isinstance(data[0], dict):
|
| 344 |
-
return data[0].get("embedding", [])
|
| 345 |
-
if hasattr(resp, "data") and resp.data:
|
| 346 |
-
return resp.data[0].embedding
|
| 347 |
-
except Exception as e:
|
| 348 |
-
print("[compute_embedding] error:", e)
|
| 349 |
-
return []
|
| 350 |
-
|
| 351 |
-
def cosine_similarity(a: List[float], b: List[float]) -> float:
|
| 352 |
-
try:
|
| 353 |
-
va = np.array(a, dtype=float)
|
| 354 |
-
vb = np.array(b, dtype=float)
|
| 355 |
-
if va.size == 0 or vb.size == 0:
|
| 356 |
-
return 0.0
|
| 357 |
-
denom = (np.linalg.norm(va) * np.linalg.norm(vb))
|
| 358 |
-
if denom == 0:
|
| 359 |
-
return 0.0
|
| 360 |
-
return float(np.dot(va, vb) / denom)
|
| 361 |
-
except Exception as e:
|
| 362 |
-
print("[cosine_similarity] error:", e)
|
| 363 |
-
return 0.0
|
| 364 |
-
|
| 365 |
-
def recommend_jobs_by_embedding(cv_text: str, top_k: int = 5, jobs_fetch_limit: int = 200) -> str:
|
| 366 |
-
skills = extract_skills_from_text(cv_text or "")
|
| 367 |
-
user_text = " ".join(skills) if skills else (cv_text or "")[:500]
|
| 368 |
-
user_emb = compute_embedding(user_text)
|
| 369 |
-
if not user_emb:
|
| 370 |
-
return "⚠️ Unable to compute embedding for your CV. Check API keys."
|
| 371 |
-
|
| 372 |
-
# fetch jobs
|
| 373 |
-
try:
|
| 374 |
-
res = weaviate_client.query.get("Job", ["*"]).with_limit(jobs_fetch_limit).do()
|
| 375 |
-
hits = res.get("data", {}).get("Get", {}).get("Job", [])
|
| 376 |
-
if not hits:
|
| 377 |
-
return "⚠️ No jobs found in the database."
|
| 378 |
-
except Exception as e:
|
| 379 |
-
print("[recommend_jobs] Weaviate fetch error:", e)
|
| 380 |
-
return "⚠️ Could not fetch jobs from the database."
|
| 381 |
-
|
| 382 |
-
scored_jobs = []
|
| 383 |
-
for h in hits:
|
| 384 |
-
props = h.get("properties", {})
|
| 385 |
-
job_text_parts = []
|
| 386 |
-
if props.get("skills"):
|
| 387 |
-
job_text_parts.append(" ".join(props.get("skills")))
|
| 388 |
-
if props.get("title"):
|
| 389 |
-
job_text_parts.append(props.get("title"))
|
| 390 |
-
if props.get("description"):
|
| 391 |
-
job_text_parts.append((props.get("description") or "")[:2000])
|
| 392 |
-
job_text = " ".join(job_text_parts).strip() or (props.get("title") or "")
|
| 393 |
-
job_emb = compute_embedding(job_text)
|
| 394 |
-
if not job_emb:
|
| 395 |
-
continue
|
| 396 |
-
score = cosine_similarity(user_emb, job_emb)
|
| 397 |
-
scored_jobs.append((score, props))
|
| 398 |
-
|
| 399 |
-
if not scored_jobs:
|
| 400 |
-
return "⚠️ No jobs could be embedded / compared."
|
| 401 |
-
|
| 402 |
-
scored_jobs.sort(key=lambda x: x[0], reverse=True)
|
| 403 |
-
top = scored_jobs[:top_k]
|
| 404 |
-
lines = []
|
| 405 |
-
for score, props in top:
|
| 406 |
-
title = props.get("title", "No title")
|
| 407 |
-
company = props.get("companyName", "Unknown company")
|
| 408 |
-
job_id = props.get("jobId", "")
|
| 409 |
-
salary = props.get("salaryDetails") or "Not specified"
|
| 410 |
-
skills_list = props.get("skills") or []
|
| 411 |
-
description = (props.get("description") or "").strip()
|
| 412 |
-
lines.append(
|
| 413 |
-
f"**{title}** at *{company}* \n"
|
| 414 |
-
f"- Job ID: `{job_id}` \n"
|
| 415 |
-
f"- Score: {score:.3f} \n"
|
| 416 |
-
f"- Salary: {salary} \n"
|
| 417 |
-
f"- Skills: {skills_list} \n"
|
| 418 |
-
f"- Description: {description[:600]}{'...' if len(description) > 600 else ''} \n"
|
| 419 |
-
f"---"
|
| 420 |
-
)
|
| 421 |
-
return "\n\n".join(lines)
|
| 422 |
-
|
| 423 |
-
# -------------------- Weaviate save/update helpers --------------------
|
| 424 |
-
def save_application_to_weaviate(app: dict) -> bool:
|
| 425 |
-
try:
|
| 426 |
-
weaviate_client.data_object.create(app, "Application", uuid=app.get("applicationId"))
|
| 427 |
-
return True
|
| 428 |
-
except Exception as e:
|
| 429 |
-
print("[save_application] error:", e)
|
| 430 |
-
return False
|
| 431 |
-
|
| 432 |
-
def save_team_to_weaviate(team_props: dict) -> bool:
|
| 433 |
-
try:
|
| 434 |
-
weaviate_client.data_object.create(team_props, "Team", uuid=team_props.get("teamId"))
|
| 435 |
-
return True
|
| 436 |
-
except Exception as e:
|
| 437 |
-
print("[save_team] error:", e)
|
| 438 |
-
return False
|
| 439 |
-
|
| 440 |
-
def update_team_add_member(team_name: str, member_name: str, skills: List[str]) -> str:
|
| 441 |
-
# naive: find team by name, append member, update object
|
| 442 |
-
try:
|
| 443 |
-
q = weaviate_client.query.get("Team", ["teamId", "name", "members", "skills"]).with_where({
|
| 444 |
-
"path": ["name"],
|
| 445 |
-
"operator": "Equal",
|
| 446 |
-
"valueString": team_name
|
| 447 |
-
}).with_limit(1)
|
| 448 |
-
res = q.do()
|
| 449 |
-
hits = res.get("data", {}).get("Get", {}).get("Team", [])
|
| 450 |
-
if not hits:
|
| 451 |
-
return "⚠️ Team not found."
|
| 452 |
-
obj = hits[0]
|
| 453 |
-
props = obj.get("properties", {})
|
| 454 |
-
team_id = props.get("teamId")
|
| 455 |
-
members = props.get("members") or []
|
| 456 |
-
members.append(member_name)
|
| 457 |
-
skills_list = list(set((props.get("skills") or []) + skills))
|
| 458 |
-
weaviate_client.data_object.update({"members": members, "skills": skills_list}, "Team", uuid=team_id)
|
| 459 |
-
return f"✅ {member_name} added to team '{team_name}'."
|
| 460 |
-
except Exception as e:
|
| 461 |
-
print("[update_team_add_member] error:", e)
|
| 462 |
-
return "⚠️ Failed to add member to team."
|
| 463 |
-
|
| 464 |
-
# -------------------- Session / State machine --------------------
|
| 465 |
-
def initial_session() -> dict:
|
| 466 |
-
return {"state": "idle", "data": {}}
|
| 467 |
-
|
| 468 |
-
def handle_uploaded_cv_for_session(session: dict, uploaded_file: Any) -> Tuple[str, dict]:
|
| 469 |
-
if not uploaded_file:
|
| 470 |
-
return "⚠️ No file received.", session
|
| 471 |
-
doc_info = process_uploaded_file(uploaded_file)
|
| 472 |
-
if not doc_info or "error" in (doc_info or {}):
|
| 473 |
-
return f"⚠️ Error processing uploaded CV: {doc_info.get('error') if doc_info else 'unknown error'}", session
|
| 474 |
-
session["data"]["cvText"] = doc_info.get("content", "")
|
| 475 |
-
session["data"]["cvSkills"] = doc_info.get("skills", [])
|
| 476 |
-
st = session.get("state")
|
| 477 |
-
if st == "apply_wait_cv":
|
| 478 |
-
session["state"] = "apply_jobtitle"
|
| 479 |
-
detected = session["data"]["cvSkills"]
|
| 480 |
-
return f"CV received. Detected skills: {detected}. Which job title do you want to apply for? (type job title or 'any')", session
|
| 481 |
-
if st == "recommend_wait_cv":
|
| 482 |
-
rec_text = recommend_jobs_by_embedding(session["data"]["cvText"], top_k=5)
|
| 483 |
-
session = initial_session()
|
| 484 |
-
return f"Here are recommended jobs based on your CV:\n\n{rec_text}", session
|
| 485 |
-
return "CV uploaded and processed. What would you like to do next?", session
|
| 486 |
-
|
| 487 |
-
def handle_user_message(session: dict, user_text: str, uploaded_file: Any = None) -> Tuple[str, dict, bool]:
|
| 488 |
-
session = session or initial_session()
|
| 489 |
-
st = session.get("state", "idle")
|
| 490 |
-
text = (user_text or "").strip()
|
| 491 |
-
|
| 492 |
-
# quick reset
|
| 493 |
-
if text.lower() in ("cancel", "exit", "quit", "restart", "reset"):
|
| 494 |
-
return "Conversation reset. How can I help you now?", initial_session(), False
|
| 495 |
-
|
| 496 |
-
# file upload route
|
| 497 |
-
if uploaded_file:
|
| 498 |
-
bot_msg, new_session = handle_uploaded_cv_for_session(session, uploaded_file)
|
| 499 |
-
return bot_msg, new_session, False
|
| 500 |
-
|
| 501 |
-
# IDLE
|
| 502 |
-
if st == "idle":
|
| 503 |
-
low = text.lower()
|
| 504 |
-
if low in ("hi", "hello", "hey", "مرحبا", "ازيك", "السلام عليكم"):
|
| 505 |
-
return "👋 Hello! How can I support you today? You can ask about jobs, teams, or recommendations.", session, False
|
| 506 |
-
if low in ("who are you?", "who are you", "انت مين", "من انت"):
|
| 507 |
-
return ("👋 I am EduNatives Assistant — your friendly academic and career guide."), session, False
|
| 508 |
-
|
| 509 |
-
route = route_intent(text)
|
| 510 |
-
# 1) KB first
|
| 511 |
-
if route.intent in KB:
|
| 512 |
-
return KB[route.intent].get(route.language, KB[route.intent].get("en", "")), session, False
|
| 513 |
-
|
| 514 |
-
# 2) If intent is a RAG-related class -> call rag
|
| 515 |
-
if route.intent in {"Job", "Application", "Memory", "Opportunities", "Project", "Team"}:
|
| 516 |
-
try:
|
| 517 |
-
rag_ans, items = rag_answer(text, route.intent, top_k=5)
|
| 518 |
-
if rag_ans:
|
| 519 |
-
return rag_ans, session, False
|
| 520 |
-
except Exception as e:
|
| 521 |
-
print("[handle_user_message] rag error:", e)
|
| 522 |
-
|
| 523 |
-
# 3) fallback to LLM normal chat
|
| 524 |
-
try:
|
| 525 |
-
resp = llm_client.chat.completions.create(
|
| 526 |
-
model=MODEL_NAME,
|
| 527 |
-
messages=[
|
| 528 |
-
{"role": "system", "content": "You are EduNatives Assistant. Be concise and helpful."},
|
| 529 |
-
{"role": "user", "content": text},
|
| 530 |
-
],
|
| 531 |
-
temperature=0.5,
|
| 532 |
-
max_tokens=800
|
| 533 |
-
)
|
| 534 |
-
answer = resp.choices[0].message.content or ""
|
| 535 |
-
return answer, session, False
|
| 536 |
-
except Exception as e:
|
| 537 |
-
print("[handle_user_message] LLM error:", e)
|
| 538 |
-
return "⚠️ Sorry, I couldn't process that right now. Try again later.", session, False
|
| 539 |
-
|
| 540 |
-
# ---------- APPLY FLOW ----------
|
| 541 |
-
if st == "apply_name":
|
| 542 |
-
session["data"]["applicantName"] = text or "Applicant"
|
| 543 |
session["state"] = "apply_email"
|
| 544 |
-
return "
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
m = re.search(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", text)
|
| 548 |
-
session["data"]["applicantEmail"] = m.group(1) if m else text
|
| 549 |
session["state"] = "apply_cover"
|
| 550 |
-
return "
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
| 592 |
-
|
| 593 |
-
|
| 594 |
-
|
| 595 |
-
|
| 596 |
-
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
-
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
| 608 |
-
|
| 609 |
-
|
| 610 |
-
|
| 611 |
-
|
| 612 |
-
|
| 613 |
-
|
| 614 |
-
|
| 615 |
-
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
session["data"]["owner"] = text
|
| 621 |
-
session["state"] = "team_create_skills"
|
| 622 |
-
return "Owner saved. Please list the team's skills (comma-separated).", session, False
|
| 623 |
-
|
| 624 |
-
if st == "team_create_skills":
|
| 625 |
-
session["data"]["skills"] = [s.strip() for s in text.split(",") if s.strip()]
|
| 626 |
-
session["state"] = "team_create_course"
|
| 627 |
-
return "Skills saved. (Optional) Enter course/subject name or type 'skip'.", session, False
|
| 628 |
-
|
| 629 |
-
if st == "team_create_course":
|
| 630 |
-
session["data"]["course"] = "" if text.lower() == "skip" else text
|
| 631 |
-
session["state"] = "team_create_idea"
|
| 632 |
-
return "Please write a short idea/description for the project.", session, False
|
| 633 |
-
|
| 634 |
-
if st == "team_create_idea":
|
| 635 |
-
session["data"]["idea"] = text
|
| 636 |
-
team_props = {
|
| 637 |
-
"teamId": str(uuid.uuid4()),
|
| 638 |
-
"name": session["data"].get("team_name"),
|
| 639 |
-
"projectId": None,
|
| 640 |
-
"members": [session["data"].get("owner")],
|
| 641 |
-
"skills": session["data"].get("skills", []),
|
| 642 |
-
"creatorId": session["data"].get("owner"),
|
| 643 |
-
"createdAt": get_rfc3339_time(),
|
| 644 |
-
"idea": session["data"].get("idea", "")
|
| 645 |
-
}
|
| 646 |
-
saved = save_team_to_weaviate(team_props)
|
| 647 |
-
session = initial_session()
|
| 648 |
-
return (f"🎉 Team '{team_props['name']}' created! Members: {team_props['members']}" if saved
|
| 649 |
-
else "⚠️ Failed to create team. Try again later."), session, False
|
| 650 |
-
|
| 651 |
-
if st == "team_join_name":
|
| 652 |
-
session["data"]["team_name"] = text
|
| 653 |
-
session["state"] = "team_join_member"
|
| 654 |
-
return "What's your name (to add you to the team)?", session, False
|
| 655 |
-
|
| 656 |
-
if st == "team_join_member":
|
| 657 |
-
session["data"]["member_name"] = text
|
| 658 |
-
session["state"] = "team_join_skills"
|
| 659 |
-
return "Enter your skills (comma-separated).", session, False
|
| 660 |
-
|
| 661 |
-
if st == "team_join_skills":
|
| 662 |
-
skills = [s.strip() for s in text.split(",") if s.strip()]
|
| 663 |
-
resp = update_team_add_member(session["data"].get("team_name"), session["data"].get("member_name"), skills)
|
| 664 |
-
session = initial_session()
|
| 665 |
-
return resp, session, False
|
| 666 |
-
|
| 667 |
-
# ---------- RECOMMEND FLOW ----------
|
| 668 |
-
if st == "recommend_wait_cv":
|
| 669 |
-
return "Please upload your CV (use the Upload button).", session, True
|
| 670 |
-
|
| 671 |
-
# default fallback
|
| 672 |
-
return "Sorry — I didn't understand that. You can say 'apply', 'create team', 'join team' or 'recommend'.", session, False
|
| 673 |
-
|
| 674 |
-
# -------------------- UI wiring (Gradio) --------------------
|
| 675 |
-
def format_chat_html(history: List[Dict[str, str]]) -> str:
|
| 676 |
-
html = "<div class='chatbot'>"
|
| 677 |
-
for msg in history:
|
| 678 |
-
role = msg["role"]
|
| 679 |
-
content = msg["content"]
|
| 680 |
-
if role == "user":
|
| 681 |
-
html += f"<div class='user-bubble'>{content}</div>"
|
| 682 |
-
else:
|
| 683 |
-
html_content = markdown.markdown(content, extensions=['tables'])
|
| 684 |
-
html += f"<div class='bot-bubble'>{html_content}</div>"
|
| 685 |
-
html += "</div>"
|
| 686 |
-
return html
|
| 687 |
-
|
| 688 |
-
# minimal CSS + UI
|
| 689 |
-
with gr.Blocks(css="""
|
| 690 |
-
.chatbot {height: 520px; overflow: auto;}
|
| 691 |
-
.user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
|
| 692 |
-
.bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
|
| 693 |
-
.chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
|
| 694 |
-
""") as demo:
|
| 695 |
-
|
| 696 |
-
gr.Markdown("# 💬 EduNatives — Conversational Job Portal")
|
| 697 |
-
|
| 698 |
-
chat_html = gr.HTML(format_chat_html([]))
|
| 699 |
-
|
| 700 |
-
with gr.Row(elem_classes="chatbox-container"):
|
| 701 |
-
user_input = gr.Textbox(placeholder="Type your message here (e.g. 'apply', 'create team', 'recommend')", lines=2)
|
| 702 |
-
send_btn = gr.Button("Send", variant="primary")
|
| 703 |
-
|
| 704 |
-
with gr.Row(visible=False) as file_row:
|
| 705 |
-
cv_uploader = gr.File(label="Upload CV (.pdf/.docx/.txt)", file_count="single", file_types=[".pdf", ".docx", ".txt"], visible=False)
|
| 706 |
-
upload_btn = gr.Button("Upload CV", visible=False)
|
| 707 |
-
|
| 708 |
-
with gr.Row():
|
| 709 |
-
clear_btn = gr.Button("Reset Conversation")
|
| 710 |
-
instructions = gr.Markdown("Commands: `apply`, `create team`, `join team`, `recommend` — the bot will guide you step-by-step.")
|
| 711 |
-
|
| 712 |
-
chat_history_state = gr.State([])
|
| 713 |
-
session_state = gr.State(initial_session())
|
| 714 |
-
|
| 715 |
-
def append_to_history(history: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
|
| 716 |
-
history = history or []
|
| 717 |
-
history.append({"role": role, "content": content})
|
| 718 |
-
return history
|
| 719 |
-
|
| 720 |
-
def handle_send(message: str, history: List[Dict[str, str]], session: dict):
|
| 721 |
-
history = history or []
|
| 722 |
-
session = session or initial_session()
|
| 723 |
-
if message and message.strip():
|
| 724 |
-
history = append_to_history(history, "user", message.strip())
|
| 725 |
-
bot_reply, new_session, show_uploader = handle_user_message(session, message or "", uploaded_file=None)
|
| 726 |
-
history = append_to_history(history, "assistant", bot_reply or "…")
|
| 727 |
-
html = format_chat_html(history)
|
| 728 |
-
return "", html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
|
| 729 |
-
|
| 730 |
-
def handle_upload(file_obj, history: List[Dict[str, str]], session: dict):
|
| 731 |
-
history = history or []
|
| 732 |
-
session = session or initial_session()
|
| 733 |
-
filename = getattr(file_obj, "name", "uploaded_file")
|
| 734 |
-
history = append_to_history(history, "user", f"📎 Uploaded file: {filename}")
|
| 735 |
-
bot_reply, new_session, show_uploader = handle_user_message(session, "", uploaded_file=file_obj)
|
| 736 |
-
history = append_to_history(history, "assistant", bot_reply or "…")
|
| 737 |
-
html = format_chat_html(history)
|
| 738 |
-
return html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
|
| 739 |
-
|
| 740 |
-
def handle_reset(history, session):
|
| 741 |
-
new_hist = []
|
| 742 |
-
new_session = initial_session()
|
| 743 |
-
html = format_chat_html(new_hist)
|
| 744 |
-
return html, new_hist, new_session, gr.update(visible=False), gr.update(visible=False)
|
| 745 |
-
|
| 746 |
-
send_btn.click(
|
| 747 |
-
fn=handle_send,
|
| 748 |
-
inputs=[user_input, chat_history_state, session_state],
|
| 749 |
-
outputs=[user_input, chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
|
| 750 |
-
queue=True
|
| 751 |
-
)
|
| 752 |
-
upload_btn.click(
|
| 753 |
-
fn=handle_upload,
|
| 754 |
-
inputs=[cv_uploader, chat_history_state, session_state],
|
| 755 |
-
outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
|
| 756 |
-
queue=True
|
| 757 |
-
)
|
| 758 |
-
clear_btn.click(
|
| 759 |
-
fn=handle_reset,
|
| 760 |
-
inputs=[chat_history_state, session_state],
|
| 761 |
-
outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
|
| 762 |
-
queue=False
|
| 763 |
-
)
|
| 764 |
-
|
| 765 |
-
if __name__ == "__main__":
|
| 766 |
-
demo.launch(debug=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import json
|
| 2 |
+
import weaviate
|
| 3 |
+
import fitz
|
| 4 |
import docx
|
| 5 |
+
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
import gradio as gr
|
| 7 |
from openai import OpenAI
|
| 8 |
+
from weaviate.classes.init import Auth
|
| 9 |
+
from weaviate.classes.config import Property, DataType
|
| 10 |
+
from sklearn.metrics.pairwise import cosine_similarity
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
+
# --- Config ---
|
| 13 |
+
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
|
| 14 |
+
WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "YOUR_KEY")
|
| 15 |
+
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_KEY")
|
| 16 |
|
| 17 |
+
# --- Clients ---
|
| 18 |
+
client = weaviate.WeaviateClient(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 19 |
url=WEAVIATE_URL,
|
| 20 |
+
auth_client_secret=Auth.api_key(WEAVIATE_API_KEY),
|
| 21 |
)
|
| 22 |
+
openai_client = OpenAI(api_key=OPENAI_API_KEY)
|
| 23 |
|
| 24 |
+
# --- Ensure Collections ---
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
def ensure_collections():
|
| 26 |
+
collections = {
|
| 27 |
+
"Job": [Property(name="title", data_type=DataType.TEXT), Property(name="description", data_type=DataType.TEXT)],
|
| 28 |
+
"Application": [Property(name="name", data_type=DataType.TEXT), Property(name="email", data_type=DataType.TEXT)],
|
| 29 |
+
"Memory": [Property(name="content", data_type=DataType.TEXT)],
|
| 30 |
+
"Opportunities": [Property(name="details", data_type=DataType.TEXT)],
|
| 31 |
+
"Project": [Property(name="name", data_type=DataType.TEXT), Property(name="description", data_type=DataType.TEXT)],
|
| 32 |
+
"Team": [Property(name="member", data_type=DataType.TEXT), Property(name="role", data_type=DataType.TEXT)],
|
| 33 |
+
}
|
| 34 |
+
for cname, props in collections.items():
|
| 35 |
+
if not client.collections.exists(cname):
|
| 36 |
+
client.collections.create(name=cname, properties=props)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
ensure_collections()
|
| 39 |
|
| 40 |
+
# --- Embeddings ---
|
| 41 |
+
def get_embedding(text):
|
| 42 |
+
resp = openai_client.embeddings.create(input=text, model="text-embedding-3-small")
|
| 43 |
+
return resp.data[0].embedding
|
| 44 |
+
|
| 45 |
+
def recommend_jobs_by_embedding(cv_text, jobs, top_n=3):
|
| 46 |
+
cv_embedding = get_embedding(cv_text)
|
| 47 |
+
job_embeddings = [get_embedding(j["description"]) for j in jobs]
|
| 48 |
+
sims = cosine_similarity([cv_embedding], job_embeddings)[0]
|
| 49 |
+
ranked = sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
|
| 50 |
+
return [job for job, _ in ranked[:top_n]]
|
| 51 |
+
|
| 52 |
+
# --- File Upload Handling ---
|
| 53 |
+
def process_uploaded_file(file_path):
|
| 54 |
+
ext = os.path.splitext(file_path)[1].lower()
|
| 55 |
+
text = ""
|
| 56 |
+
if ext == ".pdf":
|
| 57 |
+
with fitz.open(file_path) as pdf:
|
| 58 |
+
for page in pdf:
|
| 59 |
+
text += page.get_text()
|
| 60 |
+
elif ext == ".docx":
|
| 61 |
+
doc = docx.Document(file_path)
|
| 62 |
+
for para in doc.paragraphs:
|
| 63 |
+
text += para.text + "\n"
|
| 64 |
+
elif ext == ".txt":
|
| 65 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
| 66 |
+
text = f.read()
|
| 67 |
+
return text.strip()
|
| 68 |
+
|
| 69 |
+
# --- Session Management ---
|
| 70 |
+
def initial_session():
|
| 71 |
+
return {"state": "idle", "data": {}, "history": []}
|
| 72 |
+
|
| 73 |
+
def handle_uploaded_cv_for_session(session, file_path):
|
| 74 |
+
text = process_uploaded_file(file_path)
|
| 75 |
+
session["data"]["cv_text"] = text
|
| 76 |
+
return session
|
| 77 |
+
|
| 78 |
+
# --- KB ---
|
| 79 |
+
KB_RESPONSES = {
|
| 80 |
+
"student_registration": "You can register as a student on the portal...",
|
| 81 |
+
"student_internships": "Internships are listed under opportunities section..."
|
| 82 |
+
}
|
| 83 |
|
| 84 |
+
# --- RAG Query ---
|
| 85 |
+
def rag_query(collection, query_text):
|
| 86 |
+
query_embedding = get_embedding(query_text)
|
| 87 |
+
results = client.query.get(collection, ["*"]).with_near_vector({"vector": query_embedding}).with_limit(3).do()
|
| 88 |
+
return results
|
| 89 |
+
|
| 90 |
+
# --- LLM Chat ---
|
| 91 |
+
def llm_chat(prompt):
|
| 92 |
+
resp = openai_client.chat.completions.create(
|
| 93 |
+
model="gpt-4o-mini",
|
| 94 |
+
messages=[{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": prompt}],
|
|
|
|
|
|
|
| 95 |
)
|
| 96 |
+
return resp.choices[0].message.content
|
| 97 |
|
| 98 |
+
# --- Flows ---
|
| 99 |
+
def apply_flow(session, message):
|
| 100 |
+
state = session["state"]
|
| 101 |
+
if state == "apply_name":
|
| 102 |
+
session["data"]["name"] = message
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
session["state"] = "apply_email"
|
| 104 |
+
return "Please provide your email.", session
|
| 105 |
+
elif state == "apply_email":
|
| 106 |
+
session["data"]["email"] = message
|
|
|
|
|
|
|
| 107 |
session["state"] = "apply_cover"
|
| 108 |
+
return "Please provide your cover letter.", session
|
| 109 |
+
elif state == "apply_cover":
|
| 110 |
+
session["data"]["cover"] = message
|
| 111 |
+
session["state"] = "idle"
|
| 112 |
+
return "Your application has been recorded.", session
|
| 113 |
+
return "Let's start your application. What's your name?", {"state": "apply_name", "data": {}}
|
| 114 |
+
|
| 115 |
+
def team_flow(session, message):
|
| 116 |
+
return "Team flow triggered. Add member info.", session
|
| 117 |
+
|
| 118 |
+
def recommend_flow(session, message):
|
| 119 |
+
if "cv_text" in session["data"]:
|
| 120 |
+
jobs = [{"title": "AI Intern", "description": "Work on NLP"}, {"title": "ML Engineer", "description": "Build models"}]
|
| 121 |
+
recs = recommend_jobs_by_embedding(session["data"]["cv_text"], jobs)
|
| 122 |
+
return f"Recommended jobs: {[j['title'] for j in recs]}", session
|
| 123 |
+
return "Please upload your CV first.", session
|
| 124 |
+
|
| 125 |
+
# --- Main Handler ---
|
| 126 |
+
def handle_user_message(session, message):
|
| 127 |
+
lower = message.lower()
|
| 128 |
+
|
| 129 |
+
# KB check
|
| 130 |
+
for key, answer in KB_RESPONSES.items():
|
| 131 |
+
if key in lower:
|
| 132 |
+
return answer, session
|
| 133 |
+
|
| 134 |
+
# RAG check
|
| 135 |
+
for collection in ["Job", "Application", "Memory", "Opportunities", "Project", "Team"]:
|
| 136 |
+
if collection.lower() in lower:
|
| 137 |
+
results = rag_query(collection, message)
|
| 138 |
+
return f"RAG Results from {collection}: {json.dumps(results, indent=2)}", session
|
| 139 |
+
|
| 140 |
+
# Flow triggers
|
| 141 |
+
if "apply" in lower:
|
| 142 |
+
return apply_flow(session, message)
|
| 143 |
+
if "team" in lower:
|
| 144 |
+
return team_flow(session, message)
|
| 145 |
+
if "recommend" in lower:
|
| 146 |
+
return recommend_flow(session, message)
|
| 147 |
+
|
| 148 |
+
# Default LLM
|
| 149 |
+
return llm_chat(message), session
|
| 150 |
+
|
| 151 |
+
# --- Gradio App ---
|
| 152 |
+
session = initial_session()
|
| 153 |
+
|
| 154 |
+
def chat_with_bot(message, file=None):
|
| 155 |
+
global session
|
| 156 |
+
if file is not None:
|
| 157 |
+
session = handle_uploaded_cv_for_session(session, file.name)
|
| 158 |
+
return "CV uploaded successfully!"
|
| 159 |
+
reply, session = handle_user_message(session, message)
|
| 160 |
+
return reply
|
| 161 |
+
|
| 162 |
+
with gr.Blocks(title="Edunatives Chatbot") as demo:
|
| 163 |
+
gr.Markdown("# 🎓 Edunatives Chatbot")
|
| 164 |
+
chatbot = gr.Chatbot()
|
| 165 |
+
msg = gr.Textbox(placeholder="Type your message here...")
|
| 166 |
+
file_upload = gr.File(label="Upload CV (PDF/DOCX/TXT)")
|
| 167 |
+
clear = gr.Button("Clear Chat")
|
| 168 |
+
|
| 169 |
+
def respond(message, history, file):
|
| 170 |
+
response = chat_with_bot(message, file)
|
| 171 |
+
history.append((message, response))
|
| 172 |
+
return history, ""
|
| 173 |
+
|
| 174 |
+
msg.submit(respond, [msg, chatbot, file_upload], [chatbot, msg])
|
| 175 |
+
clear.click(lambda: ([], ""), None, [chatbot, msg])
|
| 176 |
+
|
| 177 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|