Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,129 +1,105 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
import os
|
|
|
|
|
|
|
| 3 |
import json
|
| 4 |
import time
|
| 5 |
-
|
| 6 |
-
|
| 7 |
import markdown
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
import gradio as gr
|
| 9 |
from openai import OpenAI
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
|
| 11 |
-
#
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa" # Replace with your key if needed
|
| 16 |
-
BASE_URL = "https://api.deepinfra.com/v1/openai"
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
| 20 |
|
| 21 |
-
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
(
|
| 26 |
-
(0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
|
| 27 |
)
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
audience: str
|
| 32 |
-
intent: str
|
| 33 |
-
language: str
|
| 34 |
-
|
| 35 |
-
KB: Dict[str, Dict[str, str]] = {
|
| 36 |
-
"student_registration": {
|
| 37 |
-
"en": (
|
| 38 |
-
"**How to register / create an account (Student)**\n\n"
|
| 39 |
-
"1. Go to the EduNatives site and choose Sign Up.\n"
|
| 40 |
-
"2. Use your university email if possible and verify it.\n"
|
| 41 |
-
"3. Complete your profile (major, skills, interests).\n"
|
| 42 |
-
"4. Enable notifications for internships/scholarships."
|
| 43 |
-
),
|
| 44 |
-
"ar": (
|
| 45 |
-
"**طريقة التسجيل وإنشاء حساب (طلاب)**\n\n"
|
| 46 |
-
"١. اذهب إلى موقع EduNatives واختر Sign Up.\n"
|
| 47 |
-
"٢. يفضل استخدام إيميل الجامعة وتأكيده.\n"
|
| 48 |
-
"٣. أكمل ملفك الشخصي (التخصص، المهارات، الاهتمامات).\n"
|
| 49 |
-
"٤. فعّل التنبيهات لفرص التدريب والمنح."
|
| 50 |
-
),
|
| 51 |
-
},
|
| 52 |
-
"student_internships": {
|
| 53 |
-
"en": (
|
| 54 |
-
"**Finding internships & scholarships**\n\n"
|
| 55 |
-
"- Use the search filters: field, location, duration, paid/unpaid.\n"
|
| 56 |
-
"- Follow companies and set up alerts for new opportunities.\n"
|
| 57 |
-
"- Keep your profile and resume updated."
|
| 58 |
-
),
|
| 59 |
-
"ar": (
|
| 60 |
-
"**كيفية العثور على تدريب أو منحة**\n\n"
|
| 61 |
-
"- استخدم فلاتر البحث: التخصص، المكان، المدة، مدفوع/غير مدفوع.\n"
|
| 62 |
-
"- تابع الشركات وفعّل التنبيهات للفرص الجديدة.\n"
|
| 63 |
-
"- حافظ على تحديث ملفك الشخصي وسيرتك الذاتية."
|
| 64 |
-
),
|
| 65 |
-
},
|
| 66 |
-
}
|
| 67 |
-
|
| 68 |
-
KEYS = {
|
| 69 |
-
"student_registration": [
|
| 70 |
-
"register", "sign up", "signup", "create account", "account",
|
| 71 |
-
"تسجيل", "انشاء", "إنشاء", "حساب", "اعمل حساب", "سجل"
|
| 72 |
-
],
|
| 73 |
-
"student_internships": [
|
| 74 |
-
"intern", "internship", "training", "scholar", "scholarship", "grant", "opportunity",
|
| 75 |
-
"تدريب", "تدريبي", "منحة", "منح", "فرصة", "فرص", "انترنشيب"
|
| 76 |
-
],
|
| 77 |
-
"student_mentors": [
|
| 78 |
-
"mentor", "advisor", "professor", "supervisor", "faculty", "connect",
|
| 79 |
-
"منتور", "مشرف", "دكتور", "أستاذ", "استاذ", "التواصل", "اكلم"
|
| 80 |
-
],
|
| 81 |
-
"university_publish": [
|
| 82 |
-
"publish", "paper", "research", "preprint", "conference", "event", "seminar", "webinar",
|
| 83 |
-
"نشر", "أبحاث", "ابحاث", "بحث", "مؤتمر", "فعالية", "فعاليات", "ندوة", "ورشة"
|
| 84 |
-
],
|
| 85 |
-
"university_connect": [
|
| 86 |
-
"students", "connect with students", "reach students", "collaborate",
|
| 87 |
-
"طلاب", "تواصل مع الطلاب", "التواصل مع الطلاب", "تعاون"
|
| 88 |
-
],
|
| 89 |
-
"company_post_jobs": [
|
| 90 |
-
"job", "jobs", "post job", "hiring", "hire", "internships", "graduate",
|
| 91 |
-
"وظيفة", "وظائف", "اعلان", "إعلان", "نشر وظيفة", "توظيف", "فرص تدريب", "خريجين"
|
| 92 |
-
],
|
| 93 |
-
"company_find_talent": [
|
| 94 |
-
"talent", "candidate", "recruit", "search", "find", "pipeline",
|
| 95 |
-
"موهبة", "مواهب", "مرشحين", "تعيين", "تجنيد", "ابحث", "دور على"
|
| 96 |
-
],
|
| 97 |
-
}
|
| 98 |
-
|
| 99 |
-
AUDIENCE_MAP = {
|
| 100 |
-
"student_registration": "student",
|
| 101 |
-
"student_internships": "student",
|
| 102 |
-
"student_mentors": "student",
|
| 103 |
-
"university_publish": "university",
|
| 104 |
-
"university_connect": "university",
|
| 105 |
-
"company_post_jobs": "company",
|
| 106 |
-
"company_find_talent": "company",
|
| 107 |
-
}
|
| 108 |
|
| 109 |
SYSTEM_PROMPT_BASE = (
|
| 110 |
"You are **EduNatives Assistant**, a helpful, friendly, and precise academic/career guide. "
|
| 111 |
"You serve three primary audiences: Students, Universities/Researchers, and Companies.\n\n"
|
| 112 |
"Goals by audience:\n"
|
| 113 |
-
"- Students: registration/account help; finding internships/scholarships; connecting with mentors or professors.\n"
|
| 114 |
"- Universities/Researchers: publish research or announce events; connect/collaborate with students.\n"
|
| 115 |
"- Companies: post jobs/internships/graduate roles; discover student talent.\n\n"
|
| 116 |
"General rules:\n"
|
| 117 |
"- Reply in the user's language (Arabic if the user writes Arabic; otherwise English).\n"
|
|
|
|
| 118 |
"- Be concise, step-by-step, and action-oriented (lists, bullets, checklists).\n"
|
| 119 |
"- If information is unavailable, state that clearly and suggest the next best step.\n"
|
|
|
|
|
|
|
|
|
|
| 120 |
)
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
)
|
| 125 |
|
| 126 |
-
# --- Core Functions ---
|
| 127 |
def is_arabic(text: str) -> bool:
|
| 128 |
for ch in text:
|
| 129 |
code = ord(ch)
|
|
@@ -132,145 +108,675 @@ def is_arabic(text: str) -> bool:
|
|
| 132 |
return True
|
| 133 |
return False
|
| 134 |
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
|
| 167 |
try:
|
| 168 |
-
resp =
|
| 169 |
model=MODEL_NAME,
|
| 170 |
-
messages=
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
|
|
|
|
|
|
| 174 |
)
|
| 175 |
-
|
| 176 |
except Exception as e:
|
| 177 |
-
print(f"[
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
)
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
)
|
| 194 |
|
|
|
|
| 195 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
with gr.Blocks(css="""
|
| 198 |
-
.chatbot {height:
|
| 199 |
.user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
|
| 200 |
.bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
|
| 201 |
.chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
|
| 202 |
-
/* Basic styling for tables inside the bot bubble */
|
| 203 |
-
.bot-bubble table {border-collapse: collapse; width: 100%;}
|
| 204 |
-
.bot-bubble th, .bot-bubble td {border: 1px solid #ddd; padding: 8px; text-align: left;}
|
| 205 |
-
.bot-bubble th {background-color: #e9e9e9;}
|
| 206 |
""") as demo:
|
| 207 |
|
| 208 |
-
gr.Markdown("#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
with gr.Row():
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
choices=["Auto", "Student", "University-Research", "Company"],
|
| 214 |
-
value="Auto",
|
| 215 |
-
interactive=True,
|
| 216 |
-
info="Select your role. 'Auto' detects it from your message."
|
| 217 |
-
)
|
| 218 |
-
clear_btn = gr.Button("🧹 Clear Chat")
|
| 219 |
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
chat_history_state = gr.State([])
|
| 224 |
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
html
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
history
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 274 |
|
|
|
|
| 275 |
if __name__ == "__main__":
|
| 276 |
-
demo.launch()
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
import os
|
| 3 |
+
import re
|
| 4 |
+
import uuid
|
| 5 |
import json
|
| 6 |
import time
|
| 7 |
+
import fitz # PyMuPDF
|
| 8 |
+
import docx
|
| 9 |
import markdown
|
| 10 |
+
from datetime import datetime, timezone
|
| 11 |
+
from typing import List, Dict, Any, Optional
|
| 12 |
+
from dataclasses import dataclass
|
| 13 |
+
|
| 14 |
import gradio as gr
|
| 15 |
from openai import OpenAI
|
| 16 |
+
import weaviate
|
| 17 |
+
from weaviate.classes.init import Auth
|
| 18 |
+
from weaviate.classes.config import Configure, Property, DataType
|
| 19 |
+
from weaviate.classes.query import Filter
|
| 20 |
|
| 21 |
+
# -------------------- Configuration --------------------
|
| 22 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
|
| 23 |
+
DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY", "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa")
|
| 24 |
+
BASE_URL = os.getenv("BASE_URL", "https://api.deepinfra.com/v1/openai")
|
|
|
|
|
|
|
| 25 |
|
| 26 |
+
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "htorgbgpt4w63nvf1yeuw.c0.us-west3.gcp.weaviate.cloud")
|
| 27 |
+
WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "ZUd6clB5WmYzVGkxeU40cl96NTY5UkViUlVzY05Md3IzQ0JKelBZQmxGZHRPeGpCeGdxS1FUNnlYUkFFPV92MjAw")
|
| 28 |
|
| 29 |
+
MEMORY_FILE = os.getenv("MEMORY_FILE", "chat_memory.json")
|
| 30 |
+
LOG_FILE = os.getenv("LOG_FILE", "interaction_logs.json")
|
| 31 |
+
# -------------------- Clients --------------------
|
| 32 |
+
llm_client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
|
| 33 |
|
| 34 |
+
weaviate_client = weaviate.connect_to_weaviate_cloud(
|
| 35 |
+
cluster_url=WEAVIATE_URL,
|
| 36 |
+
auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
|
|
|
|
| 37 |
)
|
| 38 |
|
| 39 |
+
# -------------------- Helpers --------------------
|
| 40 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
SYSTEM_PROMPT_BASE = (
|
| 43 |
"You are **EduNatives Assistant**, a helpful, friendly, and precise academic/career guide. "
|
| 44 |
"You serve three primary audiences: Students, Universities/Researchers, and Companies.\n\n"
|
| 45 |
"Goals by audience:\n"
|
| 46 |
+
"- Students: registration/account help; finding internships/scholarships; connecting with mentors or professors; querying projects; applying for jobs/opportunities; joining project teams.\n"
|
| 47 |
"- Universities/Researchers: publish research or announce events; connect/collaborate with students.\n"
|
| 48 |
"- Companies: post jobs/internships/graduate roles; discover student talent.\n\n"
|
| 49 |
"General rules:\n"
|
| 50 |
"- Reply in the user's language (Arabic if the user writes Arabic; otherwise English).\n"
|
| 51 |
+
"- Use data from Weaviate collections (Job, Opportunities, Project) when relevant.\n"
|
| 52 |
"- Be concise, step-by-step, and action-oriented (lists, bullets, checklists).\n"
|
| 53 |
"- If information is unavailable, state that clearly and suggest the next best step.\n"
|
| 54 |
+
"- For CV analysis, extract skills/experience and recommend matching opportunities.\n"
|
| 55 |
+
"- Assist with applications and team matching.\n"
|
| 56 |
+
"- Ensure that all generated prompts are phrased using positive reinforcement."
|
| 57 |
)
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
def get_rfc3339_time() -> str:
|
| 61 |
+
"""Return current UTC time in RFC3339 format"""
|
| 62 |
+
return datetime.now(UTC).isoformat().replace('+00:00', 'Z')
|
| 63 |
+
|
| 64 |
+
# --- CV Skills Extraction (Regex baseline, can replace with NLP model later) ---
|
| 65 |
+
_SKILL_REGEX = re.compile(r"\b(Natural Language Processing|Building Information Modeling|Search Engine Optimization|Search Engine Marketing|Aerospace Engineering & Management|Computational Fluid Dynamics|Kotlin Multiplatform|Google Cloud Platform|Social Media Marketing|Aerospace Engineering|Microsoft SQL Server|Amazon Web Services|Finite Element Analysis|Technology-based Management|Autodesk Inventor|Emotional Intelligence|Aerospace Engineering & Operations Management|Content Marketing|Presentation Skills|Interpersonal Skills|Critical Thinking|Financial Modeling|Decision Making|Process Improvement|Time Management|Lean Manufacturing|Project Management|Microsoft Excel|Data Visualization|Computer Vision|Machine Learning|Deep Learning|Attention to Detail|Six Sigma|Risk Analysis|Data Analysis|Data Science|Communication|Collaboration|Teamwork|Leadership|Management|Adaptability|Creativity|Innovation|Negotiation|Android|Angular|Ansible|Apache|ArcGIS|Arduino|Asana|ASP\.NET|AutoCAD|Azure|Bash|BIM|Business Analysis|C\+\+|C#|CAM|Cassandra|CATIA|CentOS|Chef|CI/CD|Civil 3D|Content Marketing|CRM|CSS|Data Mining|Django|Docker|Elasticsearch|Email Marketing|ERP|ETABS|ETL|Express\.js|Facebook Ads|Firebase|Flask|Flutter|FPGA|Fusion 360|GCP|Git|GitHub|GitLab|Go|Google Ads|Google Analytics|GraphQL|Hadoop|HTML|HubSpot|iOS|Java|JavaScript|Jenkins|Jira|Jupyter Notebook|Kanban|Keras|Kotlin|Kubernetes|LabVIEW|Laravel|LESS|Linux|Lua|macOS|Marketo|MATLAB|Matplotlib|MongoDB|Multisim|MySQL|Nginx|NLP|Node\.js|NoSQL|Nuxt\.js|NumPy|Next\.js|Objective-C|Oracle Database|Oracle|OrCAD|Pandas|Perl|PHP|PLC|Plotly|PostgreSQL|Power BI|PowerShell|Problem Solving|Puppet|PSpice|Python|PyTorch|Raspberry Pi|React Native|React|Red Hat|Redis|Revit|Ruby on Rails|Ruby|Rust|Salesforce|SAP2000|SAP|Sass|SCADA|Scala|Scikit-learn|Scrum|Seaborn|SEM|SEO|Simulink|SketchUp|Slack|SolidWorks|Spring Boot|SQL|SQLAlchemy|SwiftUI|Swift|Tableau|Terraform|TensorFlow|Trello|TypeScript|Ubuntu|Verilog|VHDL|Vue\.js|Waterfall|Windows|WordPress|Xamarin|Analytical Skills)\b", re.IGNORECASE)
|
| 66 |
+
|
| 67 |
+
def extract_skills_from_text(cv_text: str) -> List[str]:
|
| 68 |
+
skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(cv_text)})
|
| 69 |
+
return [s.capitalize() for s in skills]
|
| 70 |
+
|
| 71 |
+
# --- Process uploaded file (PDF, DOCX, TXT) ---
|
| 72 |
+
def process_uploaded_file(file_obj: Any) -> dict | None:
|
| 73 |
+
if not file_obj:
|
| 74 |
+
return None
|
| 75 |
+
file_path = file_obj.name
|
| 76 |
+
filename = os.path.basename(file_path)
|
| 77 |
+
text_content = ""
|
| 78 |
+
try:
|
| 79 |
+
if filename.lower().endswith(".pdf"):
|
| 80 |
+
with fitz.open(file_path) as doc:
|
| 81 |
+
for page in doc:
|
| 82 |
+
text_content += page.get_text()
|
| 83 |
+
elif filename.lower().endswith(".docx"):
|
| 84 |
+
docp = docx.Document(file_path)
|
| 85 |
+
for p in docp.paragraphs:
|
| 86 |
+
text_content += p.text + "\n"
|
| 87 |
+
elif filename.lower().endswith(".txt"):
|
| 88 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
| 89 |
+
text_content = f.read()
|
| 90 |
+
else:
|
| 91 |
+
return {"error": f"Unsupported file type: {filename}"}
|
| 92 |
+
|
| 93 |
+
skills = extract_skills_from_text(text_content)
|
| 94 |
+
return {"content": text_content.strip(), "skills": skills, "filename": filename}
|
| 95 |
+
|
| 96 |
+
except Exception as e:
|
| 97 |
+
return {"error": f"Error processing file {filename}: {e}"}
|
| 98 |
+
ARABIC_RANGE = (
|
| 99 |
+
(0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
|
| 100 |
+
(0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
|
| 101 |
)
|
| 102 |
|
|
|
|
| 103 |
def is_arabic(text: str) -> bool:
|
| 104 |
for ch in text:
|
| 105 |
code = ord(ch)
|
|
|
|
| 108 |
return True
|
| 109 |
return False
|
| 110 |
|
| 111 |
+
# --- Chat history HTML formatter (for Gradio) ---
|
| 112 |
+
def format_chat_html(history: List[Dict[str, str]]) -> str:
|
| 113 |
+
html = "<div class='chatbot'>"
|
| 114 |
+
for msg in history:
|
| 115 |
+
role = msg["role"]
|
| 116 |
+
content = msg["content"]
|
| 117 |
+
if role == "user":
|
| 118 |
+
html += f"<div class='user-bubble'>{content}</div>"
|
| 119 |
+
else:
|
| 120 |
+
html_content = markdown.markdown(content, extensions=['tables'])
|
| 121 |
+
html += f"<div class='bot-bubble'>{html_content}</div>"
|
| 122 |
+
html += "</div>"
|
| 123 |
+
return html
|
| 124 |
+
# ================================
|
| 125 |
+
# Part 2 — Weaviate Collections + Query + RAG
|
| 126 |
+
# ================================
|
| 127 |
+
|
| 128 |
+
# -------------------- Ensure collections --------------------
|
| 129 |
+
def ensure_collections():
|
| 130 |
+
# Application collection
|
| 131 |
+
if not weaviate_client.collections.exists("Application"):
|
| 132 |
+
weaviate_client.collections.create(
|
| 133 |
+
name="Application",
|
| 134 |
+
properties=[
|
| 135 |
+
Property(name="applicationId", data_type=DataType.TEXT),
|
| 136 |
+
Property(name="jobId", data_type=DataType.TEXT),
|
| 137 |
+
Property(name="applicantName", data_type=DataType.TEXT),
|
| 138 |
+
Property(name="applicantEmail", data_type=DataType.TEXT),
|
| 139 |
+
Property(name="coverLetter", data_type=DataType.TEXT),
|
| 140 |
+
Property(name="cvText", data_type=DataType.TEXT),
|
| 141 |
+
Property(name="skills", data_type=DataType.TEXT_ARRAY),
|
| 142 |
+
Property(name="createdAt", data_type=DataType.DATE),
|
| 143 |
+
],
|
| 144 |
+
vectorizer_config=Configure.Vectorizer.none()
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
# Team collection
|
| 148 |
+
if not weaviate_client.collections.exists("Team"):
|
| 149 |
+
weaviate_client.collections.create(
|
| 150 |
+
name="Team",
|
| 151 |
+
properties=[
|
| 152 |
+
Property(name="teamId", data_type=DataType.TEXT),
|
| 153 |
+
Property(name="name", data_type=DataType.TEXT),
|
| 154 |
+
Property(name="projectId", data_type=DataType.TEXT),
|
| 155 |
+
Property(name="members", data_type=DataType.TEXT_ARRAY),
|
| 156 |
+
Property(name="skills", data_type=DataType.TEXT_ARRAY),
|
| 157 |
+
Property(name="createdAt", data_type=DataType.DATE),
|
| 158 |
+
Property(name="creatorId", data_type=DataType.TEXT),
|
| 159 |
+
],
|
| 160 |
+
vectorizer_config=Configure.Vectorizer.none()
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
# Memory collection
|
| 164 |
+
if not weaviate_client.collections.exists("Memory"):
|
| 165 |
+
weaviate_client.collections.create(
|
| 166 |
+
name="Memory",
|
| 167 |
+
properties=[
|
| 168 |
+
Property(name="memoryId", data_type=DataType.TEXT),
|
| 169 |
+
Property(name="sessionId", data_type=DataType.TEXT),
|
| 170 |
+
Property(name="text", data_type=DataType.TEXT),
|
| 171 |
+
Property(name="createdAt", data_type=DataType.DATE),
|
| 172 |
+
],
|
| 173 |
+
vectorizer_config=Configure.Vectorizer.none()
|
| 174 |
+
)
|
| 175 |
+
|
| 176 |
+
ensure_collections()
|
| 177 |
+
|
| 178 |
+
# -------------------- Query Weaviate --------------------
|
| 179 |
+
def query_weaviate_collection(class_name: str, query_text: str, limit: int = 5) -> List[dict]:
|
| 180 |
+
try:
|
| 181 |
+
collection = weaviate_client.collections.get(class_name)
|
| 182 |
+
|
| 183 |
+
# BM25 keyword search
|
| 184 |
+
response = collection.query.bm25(query=query_text, limit=limit)
|
| 185 |
+
items = [obj.properties for obj in response.objects]
|
| 186 |
+
|
| 187 |
+
# fallback filter if nothing found
|
| 188 |
+
if not items:
|
| 189 |
+
filters = Filter.any_of([
|
| 190 |
+
Filter.by_property("title").like(f"*{query_text}*"),
|
| 191 |
+
Filter.by_property("skills").like(f"*{query_text}*")
|
| 192 |
+
])
|
| 193 |
+
response_fallback = collection.query.fetch_objects(limit=limit, filters=filters)
|
| 194 |
+
items = [obj.properties for obj in response_fallback.objects]
|
| 195 |
+
|
| 196 |
+
return items
|
| 197 |
+
except Exception as e:
|
| 198 |
+
print(f"[Weaviate Query Error] {e}")
|
| 199 |
+
return []
|
| 200 |
+
|
| 201 |
+
# -------------------- RAG Prompt Builder --------------------
|
| 202 |
+
def build_rag_prompt(user_question: str, retrieved_items: List[dict], class_name: str) -> str:
|
| 203 |
+
context_parts = []
|
| 204 |
+
for i, item in enumerate(retrieved_items, 1):
|
| 205 |
+
if class_name == "Job":
|
| 206 |
+
details = {
|
| 207 |
+
"Title": item.get("title"),
|
| 208 |
+
"Company": item.get("companyName"),
|
| 209 |
+
"Job Type": item.get("jobType"),
|
| 210 |
+
"Employment": ", ".join(item.get("employmentType", [])),
|
| 211 |
+
"Location": item.get("workplaceType"),
|
| 212 |
+
"Description": item.get("description"),
|
| 213 |
+
"Skills": item.get("skills", []),
|
| 214 |
+
"Requirements": item.get("requirements"),
|
| 215 |
+
"Salary": str(item.get("salaryDetails", {})),
|
| 216 |
+
}
|
| 217 |
+
else:
|
| 218 |
+
details = {k: str(v) for k, v in item.items()}
|
| 219 |
+
|
| 220 |
+
item_str = f"--- Record {i} ---\n{json.dumps(details, indent=2, ensure_ascii=False)}"
|
| 221 |
+
context_parts.append(item_str)
|
| 222 |
+
|
| 223 |
+
context_block = "\n\n".join(context_parts)
|
| 224 |
+
|
| 225 |
+
return f"""
|
| 226 |
+
User Question: "{user_question}"
|
| 227 |
+
You are an expert AI assistant and a skilled data analyst. Your primary mission is to take structured data (in JSON format), analyze it completely, and present all its information to the user in a clear, comprehensive, and conversational summary.
|
| 228 |
+
|
| 229 |
+
**Primary Directive:** Your ONLY source of information for this task is the structured JSON data provided below under "Retrieved Data". If the data section is empty, you must state that no results were found that match the search and stop. Do not use your general knowledge under any circumstances.
|
| 230 |
+
|
| 231 |
+
**Your Core Instructions:**
|
| 232 |
+
1. **Analyze the Entire Object:** When you receive a JSON object, your first step is to read and understand every single key and value, including nested objects and arrays. Do not ignore any piece of information.
|
| 233 |
+
2. **Group Related Information:** Organize your output logically. For example, group company details together, role requirements together, dates and deadlines together, etc. Use clear Markdown headings (`###`) for these logical groups to improve readability.
|
| 234 |
+
3. **Convert Data into Natural Language:** Do not just list the data. Convert it into readable, engaging sentences. For example, instead of `workplaceType: "HYBRID"`, say "This is a hybrid role, which offers the flexibility of working both remotely and from the office."
|
| 235 |
+
4. **Handle All Data Types Intelligently:**
|
| 236 |
+
* For **arrays** (like `skills` or `categories`), list them as clear bullet points or integrate them into a sentence.
|
| 237 |
+
* For **nested objects** (like `salaryDetails` or `careerLevel`), explain the contents of the object clearly.
|
| 238 |
+
* For **booleans** (true/false), explain their meaning in context (e.g., `published: true` should be "This position is currently published and accepting applications.").
|
| 239 |
+
5. **Add a Concluding Call to Action:** After presenting all the details, conclude with a helpful "Next Steps" or "How to Apply" section. For a job, this should be a practical guide. For a project, it might be "How to Get Involved."
|
| 240 |
+
"""
|
| 241 |
+
|
| 242 |
+
# -------------------- RAG Answer --------------------
|
| 243 |
+
def rag_answer(user_question: str, class_name: str, top_k: int = 5) -> (str, List[dict]):
|
| 244 |
+
retrieved = query_weaviate_collection(class_name, user_question, limit=top_k)
|
| 245 |
+
if not retrieved:
|
| 246 |
+
return "", []
|
| 247 |
+
|
| 248 |
+
prompt = build_rag_prompt(user_question, retrieved, class_name)
|
| 249 |
|
| 250 |
try:
|
| 251 |
+
resp = llm_client.chat.completions.create(
|
| 252 |
model=MODEL_NAME,
|
| 253 |
+
messages=[
|
| 254 |
+
{"role": "system", "content": SYSTEM_PROMPT_BASE},
|
| 255 |
+
{"role": "user", "content": prompt}
|
| 256 |
+
],
|
| 257 |
+
temperature=0.3,
|
| 258 |
+
max_tokens=4096
|
| 259 |
)
|
| 260 |
+
answer = resp.choices[0].message.content or ""
|
| 261 |
except Exception as e:
|
| 262 |
+
print(f"[RAG LLM Error] {e}")
|
| 263 |
+
answer = ""
|
| 264 |
+
|
| 265 |
+
return answer, retrieved
|
| 266 |
+
# ================================
|
| 267 |
+
# ================================
|
| 268 |
+
# Part 3 — Conversation State Machine + Embedding Recommendations
|
| 269 |
+
# ================================
|
| 270 |
+
|
| 271 |
+
import numpy as np # used for cosine similarity in recommendations
|
| 272 |
+
|
| 273 |
+
# -------------------- Embedding helpers --------------------
|
| 274 |
+
def compute_embedding(text: str) -> List[float]:
|
| 275 |
+
"""
|
| 276 |
+
Compute embedding using the OpenAI-compatible client (DeepInfra).
|
| 277 |
+
Returns list[float] or empty list on failure.
|
| 278 |
+
"""
|
| 279 |
+
try:
|
| 280 |
+
resp = llm_client.embeddings.create(
|
| 281 |
+
model="Qwen/Qwen3-Embedding-8B",
|
| 282 |
+
input=text,
|
| 283 |
+
encoding_format="float"
|
| 284 |
)
|
| 285 |
+
# resp may be a dict-like object; handle safe access
|
| 286 |
+
if isinstance(resp, dict):
|
| 287 |
+
data = resp.get("data", [])
|
| 288 |
+
if data and isinstance(data[0], dict):
|
| 289 |
+
return data[0].get("embedding", [])
|
| 290 |
+
# some clients return objects with attributes
|
| 291 |
+
if hasattr(resp, "data") and resp.data:
|
| 292 |
+
return resp.data[0].embedding
|
| 293 |
+
except Exception as e:
|
| 294 |
+
print("[compute_embedding] error:", e)
|
| 295 |
+
return []
|
| 296 |
+
|
| 297 |
+
def cosine_similarity(a: List[float], b: List[float]) -> float:
|
| 298 |
+
try:
|
| 299 |
+
va = np.array(a, dtype=float)
|
| 300 |
+
vb = np.array(b, dtype=float)
|
| 301 |
+
if va.size == 0 or vb.size == 0:
|
| 302 |
+
return 0.0
|
| 303 |
+
denom = (np.linalg.norm(va) * np.linalg.norm(vb))
|
| 304 |
+
if denom == 0:
|
| 305 |
+
return 0.0
|
| 306 |
+
return float(np.dot(va, vb) / denom)
|
| 307 |
+
except Exception as e:
|
| 308 |
+
print("[cosine_similarity] error:", e)
|
| 309 |
+
return 0.0
|
| 310 |
+
|
| 311 |
+
# -------------------- Recommendations by embedding --------------------
|
| 312 |
+
def recommend_jobs_by_embedding(cv_text: str, top_k: int = 5, jobs_fetch_limit: int = 200) -> str:
|
| 313 |
+
"""
|
| 314 |
+
1) Extract a short user representation (skills or first 200 chars)
|
| 315 |
+
2) Compute user embedding
|
| 316 |
+
3) Fetch Job objects from Weaviate (BM25 or fetch_objects)
|
| 317 |
+
4) For each job build a short text (skills + description), compute embedding, score by cosine
|
| 318 |
+
5) Return formatted list with full job details (title, company, skills, salary, description, score)
|
| 319 |
+
Note: This routine computes embeddings on-the-fly for jobs; for large scale you should precompute and store job embeddings.
|
| 320 |
+
"""
|
| 321 |
+
# prepare user text
|
| 322 |
+
skills = extract_skills_from_text(cv_text or "")
|
| 323 |
+
user_text = " ".join(skills) if skills else (cv_text or "")[:500]
|
| 324 |
+
user_emb = compute_embedding(user_text)
|
| 325 |
+
if not user_emb:
|
| 326 |
+
return "⚠️ Unable to compute embedding for your CV. Try again or check API keys."
|
| 327 |
+
|
| 328 |
+
# fetch jobs from weaviate
|
| 329 |
+
try:
|
| 330 |
+
jobs_col = weaviate_client.collections.get("Job")
|
| 331 |
+
fetched = jobs_col.query.fetch_objects(limit=jobs_fetch_limit)
|
| 332 |
+
if not fetched.objects:
|
| 333 |
+
return "⚠️ No jobs found in the database."
|
| 334 |
+
except Exception as e:
|
| 335 |
+
print("[recommend_jobs_by_embedding] Weaviate fetch error:", e)
|
| 336 |
+
return "⚠️ Could not fetch jobs from the database."
|
| 337 |
+
|
| 338 |
+
scored_jobs = []
|
| 339 |
+
for obj in fetched.objects:
|
| 340 |
+
props = obj.properties
|
| 341 |
+
# build job text for embedding: skills + title + description (truncate to reasonable length)
|
| 342 |
+
job_text_parts = []
|
| 343 |
+
if props.get("skills"):
|
| 344 |
+
job_text_parts.append(" ".join(props.get("skills")))
|
| 345 |
+
if props.get("title"):
|
| 346 |
+
job_text_parts.append(props.get("title"))
|
| 347 |
+
if props.get("description"):
|
| 348 |
+
job_text_parts.append((props.get("description") or "")[:2000])
|
| 349 |
+
job_text = " ".join(job_text_parts).strip() or (props.get("title") or "")
|
| 350 |
+
|
| 351 |
+
job_emb = compute_embedding(job_text)
|
| 352 |
+
if not job_emb:
|
| 353 |
+
# skip if embedding failed
|
| 354 |
+
continue
|
| 355 |
+
score = cosine_similarity(user_emb, job_emb)
|
| 356 |
+
scored_jobs.append((score, props))
|
| 357 |
+
|
| 358 |
+
if not scored_jobs:
|
| 359 |
+
return "⚠️ No jobs could be embedded / compared."
|
| 360 |
+
|
| 361 |
+
# sort and pick top_k
|
| 362 |
+
scored_jobs.sort(key=lambda x: x[0], reverse=True)
|
| 363 |
+
top = scored_jobs[:top_k]
|
| 364 |
+
|
| 365 |
+
# format rich output with full details
|
| 366 |
+
lines = []
|
| 367 |
+
for score, props in top:
|
| 368 |
+
title = props.get("title", "No title")
|
| 369 |
+
company = props.get("companyName", "Unknown company")
|
| 370 |
+
job_id = props.get("jobId", "")
|
| 371 |
+
salary = props.get("salary") or props.get("salaryDetails") or "Not specified"
|
| 372 |
+
skills_list = props.get("skills") or []
|
| 373 |
+
description = (props.get("description") or "").strip()
|
| 374 |
+
# include a truncated description and the full skills list
|
| 375 |
+
lines.append(
|
| 376 |
+
f"**{title}** at *{company}* \n"
|
| 377 |
+
f"- Job ID: `{job_id}` \n"
|
| 378 |
+
f"- Score: {score:.3f} \n"
|
| 379 |
+
f"- Salary: {salary} \n"
|
| 380 |
+
f"- Skills: {skills_list} \n"
|
| 381 |
+
f"- Description: {description[:600]}{'...' if len(description)>600 else ''} \n"
|
| 382 |
+
f"---"
|
| 383 |
)
|
| 384 |
|
| 385 |
+
return "\n\n".join(lines)
|
| 386 |
|
| 387 |
+
# -------------------- Conversation Session helpers --------------------
|
| 388 |
+
def initial_session() -> dict:
|
| 389 |
+
"""
|
| 390 |
+
structure:
|
| 391 |
+
{
|
| 392 |
+
"state": "idle" | "apply_name" | "apply_email" | "apply_cover" | "apply_wait_cv" | "apply_jobtitle" | "apply_confirm" |
|
| 393 |
+
"team_action" | "team_create_name" | "team_create_owner" | "team_create_skills" | "team_create_course" | "team_create_idea" |
|
| 394 |
+
"team_join_name" | "team_join_member" | "team_join_skills" |
|
| 395 |
+
"recommend_wait_cv"
|
| 396 |
+
"data": { ... collected fields ... }
|
| 397 |
+
}
|
| 398 |
+
"""
|
| 399 |
+
return {"state": "idle", "data": {}}
|
| 400 |
|
| 401 |
+
def handle_uploaded_cv_for_session(session: dict, uploaded_file: Any) -> (str, dict):
|
| 402 |
+
"""
|
| 403 |
+
Called when user uploads a CV while in a certain flow.
|
| 404 |
+
Returns (bot_message, updated_session)
|
| 405 |
+
"""
|
| 406 |
+
if not uploaded_file:
|
| 407 |
+
return "⚠️ No file received.", session
|
| 408 |
+
|
| 409 |
+
doc_info = process_uploaded_file(uploaded_file)
|
| 410 |
+
if not doc_info or "error" in (doc_info or {}):
|
| 411 |
+
return f"⚠️ Error processing uploaded CV: {doc_info.get('error') if doc_info else 'unknown error'}", session
|
| 412 |
+
|
| 413 |
+
# store processed text & skills in session
|
| 414 |
+
session["data"]["cvText"] = doc_info.get("content", "")
|
| 415 |
+
session["data"]["cvSkills"] = doc_info.get("profile", {}).get("skills", [])
|
| 416 |
+
# If session state expects next step, return appropriate prompt
|
| 417 |
+
st = session.get("state")
|
| 418 |
+
if st == "apply_wait_cv":
|
| 419 |
+
session["state"] = "apply_jobtitle"
|
| 420 |
+
detected = session["data"]["cvSkills"]
|
| 421 |
+
return f"CV received. Detected skills: {detected}. Which job title do you want to apply for? (type job title or 'any')", session
|
| 422 |
+
if st == "recommend_wait_cv":
|
| 423 |
+
# compute recommendations and finish the flow
|
| 424 |
+
rec_text = recommend_jobs_by_embedding(session["data"]["cvText"], top_k=5)
|
| 425 |
+
session = initial_session()
|
| 426 |
+
return f"Here are recommended jobs based on your CV:\n\n{rec_text}", session
|
| 427 |
+
|
| 428 |
+
# default
|
| 429 |
+
return "CV uploaded and processed. What would you like to do next?", session
|
| 430 |
+
|
| 431 |
+
# -------------------- Main message handler (state machine) --------------------
|
| 432 |
+
def handle_user_message(session: dict, user_text: str, uploaded_file: Any = None) -> (str, dict, bool):
|
| 433 |
+
"""
|
| 434 |
+
Main conversation handler.
|
| 435 |
+
Returns (bot_reply, new_session, show_file_uploader).
|
| 436 |
+
"""
|
| 437 |
+
session = session or initial_session()
|
| 438 |
+
st = session.get("state", "idle")
|
| 439 |
+
text = (user_text or "").strip()
|
| 440 |
+
|
| 441 |
+
# quick reset
|
| 442 |
+
if text.lower() in ("cancel", "exit", "quit", "restart", "reset"):
|
| 443 |
+
return "Conversation reset. How can I help you now?", initial_session(), False
|
| 444 |
+
|
| 445 |
+
# file upload
|
| 446 |
+
if uploaded_file:
|
| 447 |
+
bot_msg, new_session = handle_uploaded_cv_for_session(session, uploaded_file)
|
| 448 |
+
return bot_msg, new_session, False
|
| 449 |
+
|
| 450 |
+
# ========== IDLE STATE ==========
|
| 451 |
+
if st == "idle":
|
| 452 |
+
low = text.lower()
|
| 453 |
+
|
| 454 |
+
# 1) greetings
|
| 455 |
+
if low in ("hi", "hello", "hey", "مرحبا", "ازيك", "السلام عليكم"):
|
| 456 |
+
return "👋 Hello! How can I support you today? You can ask about jobs, teams, or recommendations.", session, False
|
| 457 |
+
if low in ["who are you?", "who are you", "انت مين", "من انت"]:
|
| 458 |
+
return ("👋 I am EduNatives Assistant — your friendly academic and career guide. ""I help students, universities, and companies connect through opportunities, projects, and mentoring.",
|
| 459 |
+
session,
|
| 460 |
+
False
|
| 461 |
+
)
|
| 462 |
+
route = route_intent(text)
|
| 463 |
+
kb_ans = kb_fallback(route)
|
| 464 |
+
if kb_ans:
|
| 465 |
+
return kb_ans, session, False
|
| 466 |
+
try:
|
| 467 |
+
rag_ans, _ = rag_answer(text, "Job", top_k=5)
|
| 468 |
+
if rag_ans:
|
| 469 |
+
return rag_ans, session, False
|
| 470 |
+
except Exception as e:
|
| 471 |
+
print("[handle_user_message] rag error:", e)
|
| 472 |
+
try:
|
| 473 |
+
resp = llm_client.chat.completions.create(
|
| 474 |
+
model=MODEL_NAME,
|
| 475 |
+
messages=[
|
| 476 |
+
{"role": "system", "content": SYSTEM_PROMPT_BASE},
|
| 477 |
+
{"role": "user", "content": text}
|
| 478 |
+
],
|
| 479 |
+
temperature=0.3,
|
| 480 |
+
max_tokens=4096
|
| 481 |
+
)
|
| 482 |
+
except Exception as e:
|
| 483 |
+
print("[handle_user_message] free LLM error:", e)
|
| 484 |
+
return "⚠️ Sorry, I couldn't process that. Try again later.", session, False
|
| 485 |
+
|
| 486 |
+
|
| 487 |
+
|
| 488 |
+
# 2) flows
|
| 489 |
+
if any(k in low for k in ["apply", "i want to apply", "i'd like to apply", "أريد التقديم", "عايز اقدم", "اريد التقديم"]):
|
| 490 |
+
session["state"] = "apply_name"
|
| 491 |
+
session["data"] = {}
|
| 492 |
+
return "Okay — let's start your application. What's your full name?", session, False
|
| 493 |
+
|
| 494 |
+
if any(k in low for k in ["team", "create team", "join team", "create", "join", "انضم", "انشاء فريق"]):
|
| 495 |
+
session["state"] = "team_action"
|
| 496 |
+
session["data"] = {}
|
| 497 |
+
return "Do you want to create a team or join an existing team? (reply 'create' or 'join')", session, False
|
| 498 |
+
|
| 499 |
+
if any(k in low for k in ["recommend", "recommendation", "jobs for me", "رشح", "ترشيح", "recommend me jobs"]):
|
| 500 |
+
session["state"] = "recommend_wait_cv"
|
| 501 |
+
session["data"] = {}
|
| 502 |
+
return "Please upload your CV to get job recommendations (use the Upload button).", session, True
|
| 503 |
+
|
| 504 |
+
# 3) Knowledge Base fallback
|
| 505 |
+
route = route_intent(text)
|
| 506 |
+
kb_answer = kb_fallback(route)
|
| 507 |
+
if kb_answer:
|
| 508 |
+
return kb_answer, session, False
|
| 509 |
+
|
| 510 |
+
# 4) RAG (jobs search)
|
| 511 |
+
try:
|
| 512 |
+
rag_ans, _ = rag_answer(text, "Job", top_k=5)
|
| 513 |
+
if rag_ans:
|
| 514 |
+
return rag_ans, session, False
|
| 515 |
+
except Exception as e:
|
| 516 |
+
print("[handle_user_message] rag error:", e)
|
| 517 |
+
|
| 518 |
+
# 5) ultimate fallback
|
| 519 |
+
return "Sorry — I didn't understand that. You can say 'apply', 'create team', 'join team' or 'recommend'.", session, False
|
| 520 |
+
|
| 521 |
+
# ========== APPLY FLOW ==========
|
| 522 |
+
if st == "apply_name":
|
| 523 |
+
session["data"]["applicantName"] = text or "Applicant"
|
| 524 |
+
session["state"] = "apply_email"
|
| 525 |
+
return "Thanks. What's your email address?", session, False
|
| 526 |
+
|
| 527 |
+
if st == "apply_email":
|
| 528 |
+
m = re.search(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", text)
|
| 529 |
+
session["data"]["applicantEmail"] = m.group(1) if m else text
|
| 530 |
+
session["state"] = "apply_cover"
|
| 531 |
+
return "Got it. Please type a short cover letter (or type 'skip' to skip).", session, False
|
| 532 |
+
|
| 533 |
+
if st == "apply_cover":
|
| 534 |
+
if text.lower() != "skip":
|
| 535 |
+
session["data"]["coverLetter"] = text
|
| 536 |
+
else:
|
| 537 |
+
session["data"]["coverLetter"] = ""
|
| 538 |
+
session["state"] = "apply_wait_cv"
|
| 539 |
+
return "Please upload your CV now (use the Upload button).", session, True
|
| 540 |
+
|
| 541 |
+
if st == "apply_jobtitle":
|
| 542 |
+
session["data"]["targetJobTitle"] = text
|
| 543 |
+
found = query_weaviate_collection("Job", text, limit=3)
|
| 544 |
+
cv_skills = [s.lower() for s in session["data"].get("cvSkills", [])]
|
| 545 |
+
if found:
|
| 546 |
+
job = found[0]
|
| 547 |
+
job_skills = [s.lower() for s in (job.get("skills") or [])]
|
| 548 |
+
overlap = len([s for s in cv_skills if s in job_skills])
|
| 549 |
+
session["data"]["targetJobId"] = job.get("jobId")
|
| 550 |
+
session["state"] = "apply_confirm"
|
| 551 |
+
if overlap > 0:
|
| 552 |
+
return (f"I found a job: {job.get('title')} at {job.get('companyName')}. "
|
| 553 |
+
f"Detected {overlap} overlapping skills. Do you want to confirm application? (yes/no)"), session, False
|
| 554 |
+
else:
|
| 555 |
+
return (f"I found {job.get('title')} at {job.get('companyName')}, but your CV skills do not overlap. "
|
| 556 |
+
"Do you still want to proceed? (yes/no)"), session, False
|
| 557 |
+
else:
|
| 558 |
+
session["data"]["targetJobId"] = None
|
| 559 |
+
session["state"] = "apply_confirm"
|
| 560 |
+
return f"I couldn't find a job with that title. Do you want to apply for '{text}' anyway? (yes/no)", session, False
|
| 561 |
+
|
| 562 |
+
if st == "apply_confirm":
|
| 563 |
+
if text.lower() in ("yes", "y", "نعم"):
|
| 564 |
+
app = {
|
| 565 |
+
"applicationId": str(uuid.uuid4()),
|
| 566 |
+
"jobId": session["data"].get("targetJobId"),
|
| 567 |
+
"applicantName": session["data"].get("applicantName"),
|
| 568 |
+
"applicantEmail": session["data"].get("applicantEmail"),
|
| 569 |
+
"coverLetter": session["data"].get("coverLetter", ""),
|
| 570 |
+
"cvText": session["data"].get("cvText", ""),
|
| 571 |
+
"skills": session["data"].get("cvSkills", []),
|
| 572 |
+
"createdAt": get_rfc3339_time()
|
| 573 |
+
}
|
| 574 |
+
ok = save_application_to_weaviate(app)
|
| 575 |
+
session = initial_session()
|
| 576 |
+
return ("🎉 Your application has been submitted successfully. Good luck!" if ok
|
| 577 |
+
else "⚠️ Failed to save application. Please try again later."), session, False
|
| 578 |
+
else:
|
| 579 |
+
session = initial_session()
|
| 580 |
+
return "Application cancelled. If you want to do something else, tell me.", session, False
|
| 581 |
+
|
| 582 |
+
# ========== TEAM FLOW ==========
|
| 583 |
+
if st == "team_action":
|
| 584 |
+
low = text.lower()
|
| 585 |
+
if "create" in low or "إنشاء" in low:
|
| 586 |
+
session["state"] = "team_create_name"
|
| 587 |
+
session["data"] = {}
|
| 588 |
+
return "Great — what's the team name?", session, False
|
| 589 |
+
if "join" in low or "انضم" in low:
|
| 590 |
+
session["state"] = "team_join_name"
|
| 591 |
+
session["data"] = {}
|
| 592 |
+
return "Okay — what's the name of the team you want to join?", session, False
|
| 593 |
+
return "Please say 'create' to create a team or 'join' to join a team.", session, False
|
| 594 |
+
|
| 595 |
+
if st == "team_create_name":
|
| 596 |
+
session["data"]["team_name"] = text
|
| 597 |
+
session["state"] = "team_create_owner"
|
| 598 |
+
return "Team name saved. Who is the team owner (your name)?", session, False
|
| 599 |
+
|
| 600 |
+
if st == "team_create_owner":
|
| 601 |
+
session["data"]["owner"] = text
|
| 602 |
+
session["state"] = "team_create_skills"
|
| 603 |
+
return "Owner saved. Please list the team's skills (comma-separated).", session, False
|
| 604 |
+
|
| 605 |
+
if st == "team_create_skills":
|
| 606 |
+
session["data"]["skills"] = [s.strip() for s in text.split(",") if s.strip()]
|
| 607 |
+
session["state"] = "team_create_course"
|
| 608 |
+
return "Skills saved. (Optional) Enter course/subject name or type 'skip'.", session, False
|
| 609 |
+
|
| 610 |
+
if st == "team_create_course":
|
| 611 |
+
session["data"]["course"] = "" if text.lower() == "skip" else text
|
| 612 |
+
session["state"] = "team_create_idea"
|
| 613 |
+
return "Please write a short idea/description for the project.", session, False
|
| 614 |
+
|
| 615 |
+
if st == "team_create_idea":
|
| 616 |
+
session["data"]["idea"] = text
|
| 617 |
+
team_props = {
|
| 618 |
+
"teamId": str(uuid.uuid4()),
|
| 619 |
+
"name": session["data"].get("team_name"),
|
| 620 |
+
"projectId": None,
|
| 621 |
+
"members": [session["data"].get("owner")],
|
| 622 |
+
"skills": session["data"].get("skills", []),
|
| 623 |
+
"creatorId": session["data"].get("owner"),
|
| 624 |
+
"createdAt": get_rfc3339_time(),
|
| 625 |
+
"idea": session["data"].get("idea", "")
|
| 626 |
+
}
|
| 627 |
+
saved = save_team_to_weaviate(team_props)
|
| 628 |
+
session = initial_session()
|
| 629 |
+
return (f"🎉 Team '{team_props['name']}' created! Members: {team_props['members']}" if saved
|
| 630 |
+
else "⚠️ Failed to create team. Try again later."), session, False
|
| 631 |
+
|
| 632 |
+
if st == "team_join_name":
|
| 633 |
+
session["data"]["team_name"] = text
|
| 634 |
+
session["state"] = "team_join_member"
|
| 635 |
+
return "What's your name (to add you to the team)?", session, False
|
| 636 |
+
|
| 637 |
+
if st == "team_join_member":
|
| 638 |
+
session["data"]["member_name"] = text
|
| 639 |
+
session["state"] = "team_join_skills"
|
| 640 |
+
return "Enter your skills (comma-separated).", session, False
|
| 641 |
+
|
| 642 |
+
if st == "team_join_skills":
|
| 643 |
+
skills = [s.strip() for s in text.split(",") if s.strip()]
|
| 644 |
+
resp = update_team_add_member(session["data"].get("team_name"), session["data"].get("member_name"), skills)
|
| 645 |
+
session = initial_session()
|
| 646 |
+
return resp, session, False
|
| 647 |
+
|
| 648 |
+
# ========== RECOMMEND FLOW ==========
|
| 649 |
+
if st == "recommend_wait_cv":
|
| 650 |
+
return "Please upload your CV (use the Upload button).", session, True
|
| 651 |
+
|
| 652 |
+
# fallback
|
| 653 |
+
return "Sorry — I didn't understand that. You can say 'apply', 'create team', 'join team' or 'recommend'.", session, False
|
| 654 |
+
|
| 655 |
+
|
| 656 |
+
# ================================
|
| 657 |
+
# Part 4 — Gradio Chat UI wiring
|
| 658 |
+
# ================================
|
| 659 |
+
|
| 660 |
+
import atexit
|
| 661 |
+
# ensure Weaviate connection closes when the app exits
|
| 662 |
+
atexit.register(lambda: weaviate_client.close())
|
| 663 |
+
|
| 664 |
+
# initial session state per user
|
| 665 |
+
def create_initial_session_for_state():
|
| 666 |
+
return initial_session()
|
| 667 |
+
|
| 668 |
+
# helper to append to chat history (list of dicts)
|
| 669 |
+
def append_to_history(history: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
|
| 670 |
+
history = history or []
|
| 671 |
+
history.append({"role": role, "content": content})
|
| 672 |
+
return history
|
| 673 |
+
|
| 674 |
+
# UI
|
| 675 |
with gr.Blocks(css="""
|
| 676 |
+
.chatbot {height: 520px; overflow: auto;}
|
| 677 |
.user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
|
| 678 |
.bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
|
| 679 |
.chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 680 |
""") as demo:
|
| 681 |
|
| 682 |
+
gr.Markdown("# 💬 EduNatives — Conversational Job Portal")
|
| 683 |
+
|
| 684 |
+
# chat HTML (we use custom formatted HTML)
|
| 685 |
+
chat_html = gr.HTML(format_chat_html([]))
|
| 686 |
+
|
| 687 |
+
# input row
|
| 688 |
+
with gr.Row(elem_classes="chatbox-container"):
|
| 689 |
+
user_input = gr.Textbox(placeholder="Type your message here (e.g. 'apply', 'create team', 'recommend')", lines=2)
|
| 690 |
+
send_btn = gr.Button("Send", variant="primary")
|
| 691 |
|
| 692 |
+
# file upload row (initially hidden). We'll show/hide it dynamically.
|
| 693 |
+
with gr.Row(visible=False) as file_row:
|
| 694 |
+
cv_uploader = gr.File(label="Upload CV (.pdf/.docx/.txt)", file_count="single", file_types=[".pdf", ".docx", ".txt"], visible=False)
|
| 695 |
+
upload_btn = gr.Button("Upload CV", visible=False)
|
| 696 |
+
|
| 697 |
+
# control buttons
|
| 698 |
with gr.Row():
|
| 699 |
+
clear_btn = gr.Button("Reset Conversation")
|
| 700 |
+
instructions = gr.Markdown("Commands: `apply`, `create team`, `join team`, `recommend` — the bot will guide you step-by-step.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 701 |
|
| 702 |
+
# persistent state across turns
|
| 703 |
+
chat_history_state = gr.State([])
|
| 704 |
+
session_state = gr.State(create_initial_session_for_state())
|
|
|
|
| 705 |
|
| 706 |
+
# -------------------- handlers --------------------
|
| 707 |
+
def handle_send(message: str, history: List[Dict[str, str]], session: dict):
|
| 708 |
+
"""
|
| 709 |
+
Called when the user presses Send.
|
| 710 |
+
- calls handle_user_message(session, message, uploaded_file=None)
|
| 711 |
+
- updates history, session, and indicates whether uploader should be shown
|
| 712 |
+
"""
|
| 713 |
+
history = history or []
|
| 714 |
+
session = session or initial_session()
|
| 715 |
+
|
| 716 |
+
# append user message
|
| 717 |
+
if message and message.strip():
|
| 718 |
+
history = append_to_history(history, "user", message.strip())
|
| 719 |
+
|
| 720 |
+
bot_reply, new_session, show_uploader = handle_user_message(session, message or "" , uploaded_file=None)
|
| 721 |
+
|
| 722 |
+
history = append_to_history(history, "assistant", bot_reply or "…")
|
| 723 |
+
# Render HTML
|
| 724 |
+
html = format_chat_html(history)
|
| 725 |
+
|
| 726 |
+
# update session state
|
| 727 |
+
return "", html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
|
| 728 |
+
|
| 729 |
+
def handle_upload(file_obj, history: List[Dict[str, str]], session: dict):
|
| 730 |
+
"""
|
| 731 |
+
Called when the user presses Upload CV (after selecting a file).
|
| 732 |
+
The session should be in a state that expects a CV (e.g. apply_wait_cv or recommend_wait_cv).
|
| 733 |
+
"""
|
| 734 |
+
history = history or []
|
| 735 |
+
session = session or initial_session()
|
| 736 |
+
|
| 737 |
+
# Show the filename in user message
|
| 738 |
+
filename = getattr(file_obj, "name", "uploaded_file")
|
| 739 |
+
history = append_to_history(history, "user", f"📎 Uploaded file: {filename}")
|
| 740 |
+
|
| 741 |
+
# route file into the handler: pass file to handle_user_message
|
| 742 |
+
bot_reply, new_session, show_uploader = handle_user_message(session, "", uploaded_file=file_obj)
|
| 743 |
+
|
| 744 |
+
history = append_to_history(history, "assistant", bot_reply or "…")
|
| 745 |
+
html = format_chat_html(history)
|
| 746 |
+
|
| 747 |
+
# after upload we usually hide uploader (unless the bot again asks for more files)
|
| 748 |
+
return html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
|
| 749 |
+
|
| 750 |
+
def handle_reset(history, session):
|
| 751 |
+
# clear everything
|
| 752 |
+
new_hist = []
|
| 753 |
+
new_session = initial_session()
|
| 754 |
+
html = format_chat_html(new_hist)
|
| 755 |
+
return html, new_hist, new_session, gr.update(visible=False), gr.update(visible=False)
|
| 756 |
+
|
| 757 |
+
# -------------------- event wiring --------------------
|
| 758 |
+
send_btn.click(
|
| 759 |
+
fn=handle_send,
|
| 760 |
+
inputs=[user_input, chat_history_state, session_state],
|
| 761 |
+
outputs=[user_input, chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
|
| 762 |
+
queue=True
|
| 763 |
+
)
|
| 764 |
+
|
| 765 |
+
# upload button is shown/hidden by the send handler; user selects a file in cv_uploader then presses Upload CV
|
| 766 |
+
upload_btn.click(
|
| 767 |
+
fn=handle_upload,
|
| 768 |
+
inputs=[cv_uploader, chat_history_state, session_state],
|
| 769 |
+
outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
|
| 770 |
+
queue=True
|
| 771 |
+
)
|
| 772 |
+
|
| 773 |
+
clear_btn.click(
|
| 774 |
+
fn=handle_reset,
|
| 775 |
+
inputs=[chat_history_state, session_state],
|
| 776 |
+
outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
|
| 777 |
+
queue=False
|
| 778 |
+
)
|
| 779 |
|
| 780 |
+
# launch
|
| 781 |
if __name__ == "__main__":
|
| 782 |
+
demo.launch(debug=True)
|