Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,225 +1,749 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
import os
|
| 3 |
import json
|
| 4 |
import time
|
|
|
|
| 5 |
import uuid
|
|
|
|
| 6 |
from dataclasses import dataclass
|
| 7 |
-
from typing import List, Dict, Any
|
| 8 |
|
| 9 |
-
# --- Gradio & UI ---
|
| 10 |
import markdown
|
| 11 |
import gradio as gr
|
| 12 |
-
|
| 13 |
-
# --- LLM & Vector DB ---
|
| 14 |
from openai import OpenAI
|
| 15 |
-
import weaviate
|
| 16 |
-
|
| 17 |
-
# --- File Processing ---
|
| 18 |
import fitz # PyMuPDF
|
| 19 |
import docx
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
# ---
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
# تأكد من وضع مفتاح API الخاص بك هنا
|
| 26 |
-
DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY", "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa")
|
| 27 |
-
BASE_URL = "https://api.deepinfra.com/v1/openai"
|
| 28 |
|
| 29 |
-
|
| 30 |
-
|
| 31 |
|
|
|
|
|
|
|
| 32 |
llm_client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
|
| 33 |
|
| 34 |
-
#
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
try:
|
| 40 |
-
weaviate_client = weaviate.Client(
|
| 41 |
-
url=WEAVIATE_URL,
|
| 42 |
-
auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
|
| 43 |
-
)
|
| 44 |
-
print("[INFO] Successfully connected to Weaviate.")
|
| 45 |
-
except Exception as e:
|
| 46 |
-
print(f"[ERROR] Failed to connect to Weaviate: {e}")
|
| 47 |
-
weaviate_client = None
|
| 48 |
-
|
| 49 |
-
# --- Language & Routing Configuration ---
|
| 50 |
-
ARABIC_RANGE = (
|
| 51 |
-
(0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
|
| 52 |
-
(0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
|
| 53 |
)
|
| 54 |
|
| 55 |
-
|
| 56 |
-
class Route:
|
| 57 |
-
audience: str
|
| 58 |
-
intent: str
|
| 59 |
-
language: str
|
| 60 |
-
|
| 61 |
-
# --- Knowledge Base (KB) for simple queries ---
|
| 62 |
KB: Dict[str, Dict[str, str]] = {
|
| 63 |
"student_registration": {
|
| 64 |
-
"en":
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
},
|
| 67 |
}
|
| 68 |
|
| 69 |
-
# --- UPDATED: Keywords for intent routing, including RAG intents ---
|
| 70 |
KEYS = {
|
| 71 |
-
|
| 72 |
-
"
|
| 73 |
-
"
|
| 74 |
-
"
|
| 75 |
-
"
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
"
|
| 79 |
-
"
|
| 80 |
-
"
|
| 81 |
}
|
| 82 |
|
| 83 |
-
# --- Mapping intents to audiences ---
|
| 84 |
AUDIENCE_MAP = {
|
| 85 |
-
"find_job": "student",
|
| 86 |
-
"find_opportunity": "student",
|
| 87 |
-
"find_project": "student",
|
| 88 |
-
"join_team": "student",
|
| 89 |
"student_registration": "student",
|
|
|
|
| 90 |
"student_mentors": "student",
|
| 91 |
"university_publish": "university",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
}
|
| 93 |
|
| 94 |
-
# --- System Prompts ---
|
| 95 |
SYSTEM_PROMPT_BASE = (
|
| 96 |
-
"You are **EduNatives Assistant**, a helpful, friendly, and precise academic/career guide
|
| 97 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
)
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
)
|
| 105 |
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
return None
|
| 117 |
file_path = file_obj.name
|
| 118 |
-
|
| 119 |
text_content = ""
|
| 120 |
try:
|
| 121 |
-
if
|
| 122 |
with fitz.open(file_path) as doc:
|
| 123 |
-
|
| 124 |
-
|
|
|
|
| 125 |
doc = docx.Document(file_path)
|
| 126 |
-
|
| 127 |
-
|
|
|
|
| 128 |
with open(file_path, "r", encoding="utf-8") as f:
|
| 129 |
text_content = f.read()
|
| 130 |
else:
|
| 131 |
-
return f"
|
| 132 |
-
|
|
|
|
| 133 |
except Exception as e:
|
| 134 |
-
print(f"[
|
| 135 |
-
return f"
|
| 136 |
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
if has_file and any(kw in text_l for kw in ["cv", "resume", "my skills", "سيرة ذاتية", "ملفي"]):
|
| 148 |
-
return Route(audience="student", intent="analyze_cv_for_opportunities", language=lang)
|
| 149 |
|
| 150 |
-
|
|
|
|
|
|
|
| 151 |
match_label = None
|
| 152 |
for label, kws in KEYS.items():
|
| 153 |
-
|
| 154 |
-
|
|
|
|
|
|
|
|
|
|
| 155 |
break
|
| 156 |
-
|
| 157 |
-
if
|
| 158 |
-
audience =
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
| 170 |
messages.append({"role": "user", "content": user_message})
|
| 171 |
-
|
| 172 |
try:
|
| 173 |
resp = llm_client.chat.completions.create(
|
| 174 |
-
model=MODEL_NAME,
|
|
|
|
|
|
|
|
|
|
| 175 |
)
|
| 176 |
return resp.choices[0].message.content or ""
|
| 177 |
except Exception as e:
|
| 178 |
-
print(f"[
|
| 179 |
-
return "
|
| 180 |
-
|
| 181 |
-
def query_weaviate(class_name: str, query_text: str, properties: List[str], limit: int = 5) -> str:
|
| 182 |
-
"""Performs vector search on a Weaviate collection and formats the output."""
|
| 183 |
-
if not weaviate_client:
|
| 184 |
-
return "Database connection is not available."
|
| 185 |
-
try:
|
| 186 |
-
response = (
|
| 187 |
-
weaviate_client.query
|
| 188 |
-
.get(class_name, properties)
|
| 189 |
-
.with_near_text({"concepts": [query_text]})
|
| 190 |
-
.with_limit(limit)
|
| 191 |
-
.do()
|
| 192 |
-
)
|
| 193 |
-
|
| 194 |
-
results = response["data"]["Get"][class_name]
|
| 195 |
-
if not results:
|
| 196 |
-
return f"No matching {class_name.lower()} found."
|
| 197 |
-
|
| 198 |
-
formatted_output = ""
|
| 199 |
-
for i, item in enumerate(results):
|
| 200 |
-
formatted_output += f"### Result {i+1}\n"
|
| 201 |
-
for prop in properties:
|
| 202 |
-
if prop in item and item[prop]:
|
| 203 |
-
formatted_output += f"- **{prop.replace('_', ' ').title()}**: {item[prop]}\n"
|
| 204 |
-
formatted_output += "\n"
|
| 205 |
-
return formatted_output.strip()
|
| 206 |
-
|
| 207 |
-
except Exception as e:
|
| 208 |
-
print(f"[ERROR] Weaviate query failed for class '{class_name}': {e}")
|
| 209 |
-
return f"An error occurred while searching for {class_name.lower()}."
|
| 210 |
-
|
| 211 |
-
def analyze_cv_with_llm(cv_text: str) -> str:
|
| 212 |
-
"""Uses LLM to extract key skills and information from a CV."""
|
| 213 |
-
prompt = (
|
| 214 |
-
"Analyze the following CV text and extract the key information. "
|
| 215 |
-
"Summarize it into a short phrase suitable for a vector search to find matching jobs or internships. "
|
| 216 |
-
"Focus on technical skills, programming languages, field of study, and key experiences.\n\n"
|
| 217 |
-
f"--- CV TEXT ---\n{cv_text}\n--- END CV TEXT ---"
|
| 218 |
-
)
|
| 219 |
-
return call_llm(prompt, [], SYSTEM_PROMPT_BASE)
|
| 220 |
-
|
| 221 |
-
# --- 3. GRADIO UI & EVENT HANDLERS ---
|
| 222 |
|
|
|
|
| 223 |
with gr.Blocks(css="""
|
| 224 |
.chatbot {height: 500px; overflow: auto;}
|
| 225 |
.user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
|
|
@@ -229,116 +753,222 @@ with gr.Blocks(css="""
|
|
| 229 |
.bot-bubble th, .bot-bubble td {border: 1px solid #ddd; padding: 8px; text-align: left;}
|
| 230 |
.bot-bubble th {background-color: #e9e9e9;}
|
| 231 |
""") as demo:
|
| 232 |
-
|
|
|
|
| 233 |
|
| 234 |
with gr.Row():
|
| 235 |
-
audience_dd = gr.Dropdown(
|
| 236 |
-
label="Audience",
|
| 237 |
-
choices=["Auto", "Student", "University-Research", "Company"],
|
| 238 |
-
value="Auto",
|
| 239 |
-
interactive=True,
|
| 240 |
-
info="Select your role. 'Auto' detects it from your message."
|
| 241 |
-
)
|
| 242 |
clear_btn = gr.Button("🧹 Clear Chat")
|
| 243 |
|
| 244 |
status = gr.Markdown("Status: Ready.")
|
| 245 |
chatbot_html = gr.HTML("<div class='chatbot' id='chatbot'></div>")
|
| 246 |
chat_history_state = gr.State([])
|
|
|
|
| 247 |
|
| 248 |
with gr.Row(elem_classes="chatbox-container"):
|
| 249 |
-
msg = gr.Textbox(
|
| 250 |
-
|
| 251 |
-
lines=2, scale=4, autofocus=True,
|
| 252 |
-
)
|
| 253 |
-
file_uploader = gr.File(
|
| 254 |
-
label="Upload Document (.txt, .pdf, .docx)",
|
| 255 |
-
file_types=[".txt", ".pdf", ".docx"],
|
| 256 |
-
interactive=True,
|
| 257 |
-
)
|
| 258 |
with gr.Column(scale=1, min_width=120):
|
| 259 |
send_btn = gr.Button("➡️ Send", scale=1, variant="primary")
|
| 260 |
|
| 261 |
-
def
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
def respond(user_text: str, file_obj: Any, history: List[Dict[str, str]], audience_choice: str):
|
| 273 |
-
# 1. Process inputs
|
| 274 |
-
document_text = process_uploaded_file(file_obj)
|
| 275 |
-
if not user_text.strip() and not document_text:
|
| 276 |
-
return "", format_chat_html(history), history, "Status: Please type a message or upload a file.", None
|
| 277 |
|
| 278 |
-
user_message_for_history = user_text
|
| 279 |
-
if document_text:
|
| 280 |
-
file_name = os.path.basename(file_obj.name)
|
| 281 |
-
user_message_for_history += f"\n\n*📎 [File Attached: {file_name}]*"
|
| 282 |
-
|
| 283 |
-
# 2. Route intent
|
| 284 |
forced = {"Student": "student", "University-Research": "university", "Company": "company"}.get(audience_choice)
|
| 285 |
-
route = route_intent(
|
| 286 |
status_text = f"**Audience**: {route.audience} | **Intent**: {route.intent} | **Lang**: {route.language.upper()}"
|
| 287 |
-
|
| 288 |
-
# 3. Handle different intents
|
| 289 |
-
answer = ""
|
| 290 |
-
rag_query_text = user_text
|
| 291 |
-
|
| 292 |
-
# --- RAG Logic Branch ---
|
| 293 |
-
if weaviate_client and route.intent in ["find_job", "find_opportunity", "find_project", "analyze_cv_for_opportunities"]:
|
| 294 |
-
if route.intent == "analyze_cv_for_opportunities":
|
| 295 |
-
status_text += " | Analyzing CV..."
|
| 296 |
-
# Use LLM to get a search query from the CV
|
| 297 |
-
rag_query_text = analyze_cv_with_llm(document_text)
|
| 298 |
-
|
| 299 |
-
if "job" in route.intent or "cv" in route.intent:
|
| 300 |
-
retrieved_data = query_weaviate("Job", rag_query_text, ["title", "company", "location", "description"])
|
| 301 |
-
elif "opportunity" in route.intent:
|
| 302 |
-
retrieved_data = query_weaviate("Opportunities", rag_query_text, ["title", "type", "organization", "summary"])
|
| 303 |
-
elif "project" in route.intent:
|
| 304 |
-
retrieved_data = query_weaviate("Project", rag_query_text, ["title", "field", "university", "abstract"])
|
| 305 |
-
|
| 306 |
-
# Combine retrieved data with LLM for a natural response
|
| 307 |
-
system_prompt = RAG_PROMPT_TEMPLATE.format(retrieved_data=retrieved_data)
|
| 308 |
-
answer = call_llm(user_text, history, system_prompt)
|
| 309 |
|
| 310 |
-
#
|
| 311 |
-
else:
|
| 312 |
-
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
else:
|
| 315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
| 317 |
-
# 4. Update history, log, and format for UI
|
| 318 |
history.append({"role": "user", "content": user_message_for_history})
|
| 319 |
-
history.append({"role": "assistant", "content":
|
| 320 |
-
|
| 321 |
-
log_interaction({
|
| 322 |
-
"timestamp": time.time(),
|
| 323 |
-
"user_message": user_text,
|
| 324 |
-
"file_uploaded": file_obj.name if file_obj else None,
|
| 325 |
-
"audience": route.audience,
|
| 326 |
-
"intent": route.intent,
|
| 327 |
-
"language": route.language,
|
| 328 |
-
"bot_response": answer
|
| 329 |
-
})
|
| 330 |
-
|
| 331 |
-
updated_html = format_chat_html(history)
|
| 332 |
-
return "", updated_html, history, status_text, None # Clear text input and file uploader
|
| 333 |
|
| 334 |
-
|
| 335 |
-
|
| 336 |
-
|
|
|
|
|
|
|
| 337 |
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
| 341 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 342 |
|
| 343 |
if __name__ == "__main__":
|
| 344 |
demo.launch(debug=True)
|
|
|
|
| 1 |
+
# from __future__ import annotations
|
| 2 |
+
# import os
|
| 3 |
+
# import json
|
| 4 |
+
# import time
|
| 5 |
+
# import uuid
|
| 6 |
+
# from dataclasses import dataclass
|
| 7 |
+
# from typing import List, Dict, Any
|
| 8 |
+
|
| 9 |
+
# # --- Gradio & UI ---
|
| 10 |
+
# import markdown
|
| 11 |
+
# import gradio as gr
|
| 12 |
+
|
| 13 |
+
# # --- LLM & Vector DB ---
|
| 14 |
+
# from openai import OpenAI
|
| 15 |
+
# import weaviate
|
| 16 |
+
|
| 17 |
+
# # --- File Processing ---
|
| 18 |
+
# import fitz # PyMuPDF
|
| 19 |
+
# import docx
|
| 20 |
+
|
| 21 |
+
# # --- 1. BACKEND LOGIC & CONFIG ---
|
| 22 |
+
|
| 23 |
+
# # --- LLM Configuration (DeepInfra) ---
|
| 24 |
+
# MODEL_NAME = "openai/gpt-oss-120b"
|
| 25 |
+
# # تأكد من وضع مفتاح API الخاص بك هنا
|
| 26 |
+
# DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY", "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa")
|
| 27 |
+
# BASE_URL = "https://api.deepinfra.com/v1/openai"
|
| 28 |
+
|
| 29 |
+
# if not DEEPINFRA_API_KEY:
|
| 30 |
+
# print("[WARN] DEEPINFRA_API_KEY is not set. The chatbot will likely fail.")
|
| 31 |
+
|
| 32 |
+
# llm_client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
|
| 33 |
+
|
| 34 |
+
# # --- Weaviate RAG Configuration ---
|
| 35 |
+
# WEAVIATE_URL = os.getenv("WEAVIATE_URL", "https://org-bgpt4w63nvf1yeuw.c0.us-west3.gcp.weaviate.cloud")
|
| 36 |
+
# # تأكد من وضع مفتاح API الخاص بك هنا
|
| 37 |
+
# WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "ZUd6clB5WmYzVGkxeU40cl96NTY5UkViUlVzY05Md3IzQ0JKelBZQmxGZHRPeGpCeGdxS1FUNnlYUkFFPV92MjAw")
|
| 38 |
+
|
| 39 |
+
# try:
|
| 40 |
+
# weaviate_client = weaviate.Client(
|
| 41 |
+
# url=WEAVIATE_URL,
|
| 42 |
+
# auth_client_secret=weaviate.AuthApiKey(api_key=WEAVIATE_API_KEY),
|
| 43 |
+
# )
|
| 44 |
+
# print("[INFO] Successfully connected to Weaviate.")
|
| 45 |
+
# except Exception as e:
|
| 46 |
+
# print(f"[ERROR] Failed to connect to Weaviate: {e}")
|
| 47 |
+
# weaviate_client = None
|
| 48 |
+
|
| 49 |
+
# # --- Language & Routing Configuration ---
|
| 50 |
+
# ARABIC_RANGE = (
|
| 51 |
+
# (0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
|
| 52 |
+
# (0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
|
| 53 |
+
# )
|
| 54 |
+
|
| 55 |
+
# @dataclass
|
| 56 |
+
# class Route:
|
| 57 |
+
# audience: str
|
| 58 |
+
# intent: str
|
| 59 |
+
# language: str
|
| 60 |
+
|
| 61 |
+
# # --- Knowledge Base (KB) for simple queries ---
|
| 62 |
+
# KB: Dict[str, Dict[str, str]] = {
|
| 63 |
+
# "student_registration": {
|
| 64 |
+
# "en": "**How to register (Student)**\n\n1. Go to the EduNatives site and choose Sign Up.\n2. Use your university email and verify it.\n3. Complete your profile.",
|
| 65 |
+
# "ar": "**طريقة التسجيل (طلاب)**\n\n١. اذهب إلى موقع EduNatives واختر Sign Up.\n٢. استخدم إيميل الجامعة وأكده.\n٣. أكمل ملفك الشخصي.",
|
| 66 |
+
# },
|
| 67 |
+
# }
|
| 68 |
+
|
| 69 |
+
# # --- UPDATED: Keywords for intent routing, including RAG intents ---
|
| 70 |
+
# KEYS = {
|
| 71 |
+
# # RAG Intents
|
| 72 |
+
# "find_job": ["job", "jobs", "career", "hiring", "وظيفة", "وظائف", "توظيف", "شغل"],
|
| 73 |
+
# "find_opportunity": ["intern", "internship", "scholarship", "opportunity", "training", "تدريب", "منحة", "فرصة"],
|
| 74 |
+
# "find_project": ["project", "projects", "research", "مشروع", "مشاريع", "بحث", "ابحاث"],
|
| 75 |
+
# "join_team": ["team", "join team", "find team", "فريق", "انضم لفريق", "تيم"],
|
| 76 |
+
|
| 77 |
+
# # Standard Intents
|
| 78 |
+
# "student_registration": ["register", "sign up", "account", "تسجيل", "حساب"],
|
| 79 |
+
# "student_mentors": ["mentor", "advisor", "professor", "مشرف", "دكتور"],
|
| 80 |
+
# "university_publish": ["publish", "paper", "conference", "نشر", "مؤتمر"],
|
| 81 |
+
# }
|
| 82 |
+
|
| 83 |
+
# # --- Mapping intents to audiences ---
|
| 84 |
+
# AUDIENCE_MAP = {
|
| 85 |
+
# "find_job": "student",
|
| 86 |
+
# "find_opportunity": "student",
|
| 87 |
+
# "find_project": "student",
|
| 88 |
+
# "join_team": "student",
|
| 89 |
+
# "student_registration": "student",
|
| 90 |
+
# "student_mentors": "student",
|
| 91 |
+
# "university_publish": "university",
|
| 92 |
+
# }
|
| 93 |
+
|
| 94 |
+
# # --- System Prompts ---
|
| 95 |
+
# SYSTEM_PROMPT_BASE = (
|
| 96 |
+
# "You are **EduNatives Assistant**, a helpful, friendly, and precise academic/career guide for Students, Universities, and Companies. "
|
| 97 |
+
# "Reply in the user's language (Arabic/English). Be concise and action-oriented."
|
| 98 |
+
# )
|
| 99 |
+
|
| 100 |
+
# RAG_PROMPT_TEMPLATE = (
|
| 101 |
+
# "Based on the following information retrieved from our database, please answer the user's question. "
|
| 102 |
+
# "Format the results clearly (e.g., using a list or table). At the end, ask the user if they need help applying or have more questions.\n\n"
|
| 103 |
+
# "--- RETRIEVED DATA ---\n{retrieved_data}\n--- END DATA ---\n\n"
|
| 104 |
+
# )
|
| 105 |
+
|
| 106 |
+
# # --- 2. CORE FUNCTIONS ---
|
| 107 |
+
|
| 108 |
+
# def log_interaction(data: Dict[str, Any]):
|
| 109 |
+
# """Appends interaction data to a JSONL file for analytics."""
|
| 110 |
+
# with open("interaction_log.jsonl", "a", encoding="utf-8") as f:
|
| 111 |
+
# f.write(json.dumps(data, ensure_ascii=False) + "\n")
|
| 112 |
+
|
| 113 |
+
# def process_uploaded_file(file_obj: Any) -> str | None:
|
| 114 |
+
# """Extracts text from an uploaded file object."""
|
| 115 |
+
# if file_obj is None:
|
| 116 |
+
# return None
|
| 117 |
+
# file_path = file_obj.name
|
| 118 |
+
# file_name = os.path.basename(file_path)
|
| 119 |
+
# text_content = ""
|
| 120 |
+
# try:
|
| 121 |
+
# if file_name.lower().endswith(".pdf"):
|
| 122 |
+
# with fitz.open(file_path) as doc:
|
| 123 |
+
# text_content = "".join(page.get_text() for page in doc)
|
| 124 |
+
# elif file_name.lower().endswith(".docx"):
|
| 125 |
+
# doc = docx.Document(file_path)
|
| 126 |
+
# text_content = "\n".join(para.text for para in doc.paragraphs)
|
| 127 |
+
# elif file_name.lower().endswith(".txt"):
|
| 128 |
+
# with open(file_path, "r", encoding="utf-8") as f:
|
| 129 |
+
# text_content = f.read()
|
| 130 |
+
# else:
|
| 131 |
+
# return f"[Unsupported file type: {file_name}]"
|
| 132 |
+
# return text_content.strip()
|
| 133 |
+
# except Exception as e:
|
| 134 |
+
# print(f"[ERROR] Failed to process file {file_name}: {e}")
|
| 135 |
+
# return f"[Error processing file: {file_name}]"
|
| 136 |
+
|
| 137 |
+
# def is_arabic(text: str) -> bool:
|
| 138 |
+
# """Checks if a string contains Arabic characters."""
|
| 139 |
+
# return any(any(a <= ord(ch) <= b for a, b in ARABIC_RANGE) for ch in text)
|
| 140 |
+
|
| 141 |
+
# def route_intent(text: str, has_file: bool, forced_audience: str | None = None) -> Route:
|
| 142 |
+
# """Determines user intent based on keywords and context."""
|
| 143 |
+
# lang = "ar" if is_arabic(text) else "en"
|
| 144 |
+
# text_l = text.lower()
|
| 145 |
+
|
| 146 |
+
# # Special intent for CV analysis
|
| 147 |
+
# if has_file and any(kw in text_l for kw in ["cv", "resume", "my skills", "سيرة ذاتية", "ملفي"]):
|
| 148 |
+
# return Route(audience="student", intent="analyze_cv_for_opportunities", language=lang)
|
| 149 |
+
|
| 150 |
+
# # Keyword-based routing
|
| 151 |
+
# match_label = None
|
| 152 |
+
# for label, kws in KEYS.items():
|
| 153 |
+
# if any(kw in text_l for kw in kws):
|
| 154 |
+
# match_label = label
|
| 155 |
+
# break
|
| 156 |
+
|
| 157 |
+
# if match_label:
|
| 158 |
+
# audience = AUDIENCE_MAP.get(match_label, "general")
|
| 159 |
+
# if forced_audience:
|
| 160 |
+
# audience = forced_audience
|
| 161 |
+
# return Route(audience=audience, intent=match_label, language=lang)
|
| 162 |
+
|
| 163 |
+
# # Fallback to general intent
|
| 164 |
+
# return Route(audience=forced_audience or "general", intent="general", language=lang)
|
| 165 |
+
|
| 166 |
+
# def call_llm(user_message: str, history: List[Dict[str, str]], system_prompt: str) -> str:
|
| 167 |
+
# """Generic function to call the LLM."""
|
| 168 |
+
# messages: List[Dict[str, str]] = [{"role": "system", "content": system_prompt}]
|
| 169 |
+
# messages.extend(history[-6:]) # Keep last 3 turns
|
| 170 |
+
# messages.append({"role": "user", "content": user_message})
|
| 171 |
+
|
| 172 |
+
# try:
|
| 173 |
+
# resp = llm_client.chat.completions.create(
|
| 174 |
+
# model=MODEL_NAME, messages=messages, temperature=0.6, top_p=0.9, max_tokens=4096,
|
| 175 |
+
# )
|
| 176 |
+
# return resp.choices[0].message.content or ""
|
| 177 |
+
# except Exception as e:
|
| 178 |
+
# print(f"[ERROR] LLM call failed: {e}")
|
| 179 |
+
# return "Sorry, I'm having trouble connecting to my brain right now. Please try again later."
|
| 180 |
+
|
| 181 |
+
# def query_weaviate(class_name: str, query_text: str, properties: List[str], limit: int = 5) -> str:
|
| 182 |
+
# """Performs vector search on a Weaviate collection and formats the output."""
|
| 183 |
+
# if not weaviate_client:
|
| 184 |
+
# return "Database connection is not available."
|
| 185 |
+
# try:
|
| 186 |
+
# response = (
|
| 187 |
+
# weaviate_client.query
|
| 188 |
+
# .get(class_name, properties)
|
| 189 |
+
# .with_near_text({"concepts": [query_text]})
|
| 190 |
+
# .with_limit(limit)
|
| 191 |
+
# .do()
|
| 192 |
+
# )
|
| 193 |
+
|
| 194 |
+
# results = response["data"]["Get"][class_name]
|
| 195 |
+
# if not results:
|
| 196 |
+
# return f"No matching {class_name.lower()} found."
|
| 197 |
+
|
| 198 |
+
# formatted_output = ""
|
| 199 |
+
# for i, item in enumerate(results):
|
| 200 |
+
# formatted_output += f"### Result {i+1}\n"
|
| 201 |
+
# for prop in properties:
|
| 202 |
+
# if prop in item and item[prop]:
|
| 203 |
+
# formatted_output += f"- **{prop.replace('_', ' ').title()}**: {item[prop]}\n"
|
| 204 |
+
# formatted_output += "\n"
|
| 205 |
+
# return formatted_output.strip()
|
| 206 |
+
|
| 207 |
+
# except Exception as e:
|
| 208 |
+
# print(f"[ERROR] Weaviate query failed for class '{class_name}': {e}")
|
| 209 |
+
# return f"An error occurred while searching for {class_name.lower()}."
|
| 210 |
+
|
| 211 |
+
# def analyze_cv_with_llm(cv_text: str) -> str:
|
| 212 |
+
# """Uses LLM to extract key skills and information from a CV."""
|
| 213 |
+
# prompt = (
|
| 214 |
+
# "Analyze the following CV text and extract the key information. "
|
| 215 |
+
# "Summarize it into a short phrase suitable for a vector search to find matching jobs or internships. "
|
| 216 |
+
# "Focus on technical skills, programming languages, field of study, and key experiences.\n\n"
|
| 217 |
+
# f"--- CV TEXT ---\n{cv_text}\n--- END CV TEXT ---"
|
| 218 |
+
# )
|
| 219 |
+
# return call_llm(prompt, [], SYSTEM_PROMPT_BASE)
|
| 220 |
+
|
| 221 |
+
# # --- 3. GRADIO UI & EVENT HANDLERS ---
|
| 222 |
+
|
| 223 |
+
# with gr.Blocks(css="""
|
| 224 |
+
# .chatbot {height: 500px; overflow: auto;}
|
| 225 |
+
# .user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
|
| 226 |
+
# .bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
|
| 227 |
+
# .chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
|
| 228 |
+
# .bot-bubble table {border-collapse: collapse; width: 100%;}
|
| 229 |
+
# .bot-bubble th, .bot-bubble td {border: 1px solid #ddd; padding: 8px; text-align: left;}
|
| 230 |
+
# .bot-bubble th {background-color: #e9e9e9;}
|
| 231 |
+
# """) as demo:
|
| 232 |
+
# gr.Markdown("# 🤖 EduNatives Assistant\nYour smart, bilingual guide for academic and career opportunities.")
|
| 233 |
+
|
| 234 |
+
# with gr.Row():
|
| 235 |
+
# audience_dd = gr.Dropdown(
|
| 236 |
+
# label="Audience",
|
| 237 |
+
# choices=["Auto", "Student", "University-Research", "Company"],
|
| 238 |
+
# value="Auto",
|
| 239 |
+
# interactive=True,
|
| 240 |
+
# info="Select your role. 'Auto' detects it from your message."
|
| 241 |
+
# )
|
| 242 |
+
# clear_btn = gr.Button("🧹 Clear Chat")
|
| 243 |
+
|
| 244 |
+
# status = gr.Markdown("Status: Ready.")
|
| 245 |
+
# chatbot_html = gr.HTML("<div class='chatbot' id='chatbot'></div>")
|
| 246 |
+
# chat_history_state = gr.State([])
|
| 247 |
+
|
| 248 |
+
# with gr.Row(elem_classes="chatbox-container"):
|
| 249 |
+
# msg = gr.Textbox(
|
| 250 |
+
# placeholder="اكتب سؤالك هنا... / Ask your question here...",
|
| 251 |
+
# lines=2, scale=4, autofocus=True,
|
| 252 |
+
# )
|
| 253 |
+
# file_uploader = gr.File(
|
| 254 |
+
# label="Upload Document (.txt, .pdf, .docx)",
|
| 255 |
+
# file_types=[".txt", ".pdf", ".docx"],
|
| 256 |
+
# interactive=True,
|
| 257 |
+
# )
|
| 258 |
+
# with gr.Column(scale=1, min_width=120):
|
| 259 |
+
# send_btn = gr.Button("➡️ Send", scale=1, variant="primary")
|
| 260 |
+
|
| 261 |
+
# def format_chat_html(history: List[Dict[str, str]]) -> str:
|
| 262 |
+
# """Converts chat history to styled HTML."""
|
| 263 |
+
# html = "<div class='chatbot'>"
|
| 264 |
+
# for message in history:
|
| 265 |
+
# role, content = message["role"], message["content"]
|
| 266 |
+
# bubble_class = "user-bubble" if role == "user" else "bot-bubble"
|
| 267 |
+
# html_content = markdown.markdown(content, extensions=['tables']) if role == "assistant" else content
|
| 268 |
+
# html += f"<div class='{bubble_class}'>{html_content}</div>"
|
| 269 |
+
# html += "</div>"
|
| 270 |
+
# return html
|
| 271 |
+
|
| 272 |
+
# def respond(user_text: str, file_obj: Any, history: List[Dict[str, str]], audience_choice: str):
|
| 273 |
+
# # 1. Process inputs
|
| 274 |
+
# document_text = process_uploaded_file(file_obj)
|
| 275 |
+
# if not user_text.strip() and not document_text:
|
| 276 |
+
# return "", format_chat_html(history), history, "Status: Please type a message or upload a file.", None
|
| 277 |
+
|
| 278 |
+
# user_message_for_history = user_text
|
| 279 |
+
# if document_text:
|
| 280 |
+
# file_name = os.path.basename(file_obj.name)
|
| 281 |
+
# user_message_for_history += f"\n\n*📎 [File Attached: {file_name}]*"
|
| 282 |
+
|
| 283 |
+
# # 2. Route intent
|
| 284 |
+
# forced = {"Student": "student", "University-Research": "university", "Company": "company"}.get(audience_choice)
|
| 285 |
+
# route = route_intent(user_text, has_file=bool(document_text), forced_audience=forced)
|
| 286 |
+
# status_text = f"**Audience**: {route.audience} | **Intent**: {route.intent} | **Lang**: {route.language.upper()}"
|
| 287 |
+
|
| 288 |
+
# # 3. Handle different intents
|
| 289 |
+
# answer = ""
|
| 290 |
+
# rag_query_text = user_text
|
| 291 |
+
|
| 292 |
+
# # --- RAG Logic Branch ---
|
| 293 |
+
# if weaviate_client and route.intent in ["find_job", "find_opportunity", "find_project", "analyze_cv_for_opportunities"]:
|
| 294 |
+
# if route.intent == "analyze_cv_for_opportunities":
|
| 295 |
+
# status_text += " | Analyzing CV..."
|
| 296 |
+
# # Use LLM to get a search query from the CV
|
| 297 |
+
# rag_query_text = analyze_cv_with_llm(document_text)
|
| 298 |
+
|
| 299 |
+
# if "job" in route.intent or "cv" in route.intent:
|
| 300 |
+
# retrieved_data = query_weaviate("Job", rag_query_text, ["title", "company", "location", "description"])
|
| 301 |
+
# elif "opportunity" in route.intent:
|
| 302 |
+
# retrieved_data = query_weaviate("Opportunities", rag_query_text, ["title", "type", "organization", "summary"])
|
| 303 |
+
# elif "project" in route.intent:
|
| 304 |
+
# retrieved_data = query_weaviate("Project", rag_query_text, ["title", "field", "university", "abstract"])
|
| 305 |
+
|
| 306 |
+
# # Combine retrieved data with LLM for a natural response
|
| 307 |
+
# system_prompt = RAG_PROMPT_TEMPLATE.format(retrieved_data=retrieved_data)
|
| 308 |
+
# answer = call_llm(user_text, history, system_prompt)
|
| 309 |
+
|
| 310 |
+
# # --- KB/General LLM Logic Branch ---
|
| 311 |
+
# else:
|
| 312 |
+
# if route.intent in KB:
|
| 313 |
+
# answer = KB[route.intent].get(route.language, KB[route.intent]["en"])
|
| 314 |
+
# else:
|
| 315 |
+
# answer = call_llm(user_text, history, SYSTEM_PROMPT_BASE)
|
| 316 |
+
|
| 317 |
+
# # 4. Update history, log, and format for UI
|
| 318 |
+
# history.append({"role": "user", "content": user_message_for_history})
|
| 319 |
+
# history.append({"role": "assistant", "content": answer})
|
| 320 |
+
|
| 321 |
+
# log_interaction({
|
| 322 |
+
# "timestamp": time.time(),
|
| 323 |
+
# "user_message": user_text,
|
| 324 |
+
# "file_uploaded": file_obj.name if file_obj else None,
|
| 325 |
+
# "audience": route.audience,
|
| 326 |
+
# "intent": route.intent,
|
| 327 |
+
# "language": route.language,
|
| 328 |
+
# "bot_response": answer
|
| 329 |
+
# })
|
| 330 |
+
|
| 331 |
+
# updated_html = format_chat_html(history)
|
| 332 |
+
# return "", updated_html, history, status_text, None # Clear text input and file uploader
|
| 333 |
+
|
| 334 |
+
# def clear_chat():
|
| 335 |
+
# """Clears the chat history and UI components."""
|
| 336 |
+
# return "", [], "Status: Ready.", None
|
| 337 |
+
|
| 338 |
+
# # Event Handlers
|
| 339 |
+
# send_btn.click(respond, inputs=[msg, file_uploader, chat_history_state, audience_dd], outputs=[msg, chatbot_html, chat_history_state, status, file_uploader], queue=True)
|
| 340 |
+
# msg.submit(respond, inputs=[msg, file_uploader, chat_history_state, audience_dd], outputs=[msg, chatbot_html, chat_history_state, status, file_uploader], queue=True)
|
| 341 |
+
# clear_btn.click(clear_chat, outputs=[chatbot_html, chat_history_state, status, file_uploader], queue=False)
|
| 342 |
+
|
| 343 |
+
# if __name__ == "__main__":
|
| 344 |
+
# demo.launch(debug=True)
|
| 345 |
+
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
|
| 349 |
+
# app.py -- Full EduNatives chatbot with RAG + Application + Team flows
|
| 350 |
from __future__ import annotations
|
| 351 |
import os
|
| 352 |
import json
|
| 353 |
import time
|
| 354 |
+
import re
|
| 355 |
import uuid
|
| 356 |
+
import datetime
|
| 357 |
from dataclasses import dataclass
|
| 358 |
+
from typing import List, Dict, Any, Optional
|
| 359 |
|
|
|
|
| 360 |
import markdown
|
| 361 |
import gradio as gr
|
|
|
|
|
|
|
| 362 |
from openai import OpenAI
|
|
|
|
|
|
|
|
|
|
| 363 |
import fitz # PyMuPDF
|
| 364 |
import docx
|
| 365 |
+
import weaviate
|
| 366 |
+
from weaviate.classes.init import Auth
|
| 367 |
+
from weaviate.classes.config import Configure, Property, DataType
|
| 368 |
|
| 369 |
+
# -------------------- Configuration --------------------
|
| 370 |
+
MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
|
| 371 |
+
DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY", "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa")
|
| 372 |
+
BASE_URL = os.getenv("BASE_URL", "https://api.deepinfra.com/v1/openai")
|
| 373 |
|
| 374 |
+
WEAVIATE_URL = os.getenv("WEAVIATE_URL", "htorgbgpt4w63nvf1yeuw.c0.us-west3.gcp.weaviate.cloud")
|
| 375 |
+
WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "ZUd6clB5WmYzVGkxeU40cl96NTY5UkViUlVzY05Md3IzQ0JKelBZQmxGZHRPeGpCeGdxS1FUNnlYUkFFPV92MjAw")
|
|
|
|
|
|
|
|
|
|
| 376 |
|
| 377 |
+
MEMORY_FILE = os.getenv("MEMORY_FILE", "chat_memory.json")
|
| 378 |
+
LOG_FILE = os.getenv("LOG_FILE", "chat_analytics.json")
|
| 379 |
|
| 380 |
+
# -------------------- Clients --------------------
|
| 381 |
+
# LLM client
|
| 382 |
llm_client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
|
| 383 |
|
| 384 |
+
# Weaviate client
|
| 385 |
+
weaviate_client = weaviate.connect_to_weaviate_cloud(
|
| 386 |
+
cluster_url=WEAVIATE_URL,
|
| 387 |
+
auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
)
|
| 389 |
|
| 390 |
+
# -------------------- KB, Keys, prompts --------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
KB: Dict[str, Dict[str, str]] = {
|
| 392 |
"student_registration": {
|
| 393 |
+
"en": (
|
| 394 |
+
"**How to register / create an account (Student)**\n\n"
|
| 395 |
+
"1. Go to the EduNatives site and choose Sign Up.\n"
|
| 396 |
+
"2. Use your university email if possible and verify it.\n"
|
| 397 |
+
"3. Complete your profile (major, skills, interests).\n"
|
| 398 |
+
"4. Enable notifications for internships/scholarships."
|
| 399 |
+
),
|
| 400 |
+
"ar": (
|
| 401 |
+
"**طريقة التسجيل وإنشاء حساب (طلاب)**\n\n"
|
| 402 |
+
"١. اذهب إلى موقع EduNatives واختر Sign Up.\n"
|
| 403 |
+
"٢. يفضل استخدام إيميل الجامعة وتأكيده.\n"
|
| 404 |
+
"٣. أكمل ملفك الشخصي (التخصص، المهارات، الاهتمامات).\n"
|
| 405 |
+
"٤. فعّل التنبيهات لفرص التدريب والمنح."
|
| 406 |
+
),
|
| 407 |
+
},
|
| 408 |
+
"student_internships": {
|
| 409 |
+
"en": (
|
| 410 |
+
"**Finding internships & scholarships**\n\n"
|
| 411 |
+
"- Use the search filters: field, location, duration, paid/unpaid.\n"
|
| 412 |
+
"- Follow companies and set up alerts for new opportunities.\n"
|
| 413 |
+
"- Keep your profile and resume updated."
|
| 414 |
+
),
|
| 415 |
+
"ar": (
|
| 416 |
+
"**كيفية العثور على تدريب أو منحة**\n\n"
|
| 417 |
+
"- استخدم فلاتر البحث: التخصص، المكان، المدة، مدفوع/غير مدفوع.\n"
|
| 418 |
+
"- تابع الشركات وفعّل التنبيهات للفرص الجديدة.\n"
|
| 419 |
+
"- حافظ على تحديث ملفك الشخصي وسيرتك الذاتية."
|
| 420 |
+
),
|
| 421 |
},
|
| 422 |
}
|
| 423 |
|
|
|
|
| 424 |
KEYS = {
|
| 425 |
+
"student_registration": ["register", "sign up", "signup", "create account", "account", "تسجيل", "انشاء", "إنشاء", "حساب", "اعمل حساب", "سجل"],
|
| 426 |
+
"student_internships": ["intern", "internship", "training", "scholar", "scholarship", "grant", "opportunity", "تدريب", "تدريبي", "منحة", "منح", "فرصة", "فرص", "انترنشيب"],
|
| 427 |
+
"student_mentors": ["mentor", "advisor", "professor", "supervisor", "faculty", "connect", "منتور", "مشرف", "دكتور", "أستاذ", "استاذ", "التواصل", "اكلم"],
|
| 428 |
+
"university_publish": ["publish", "paper", "research", "preprint", "conference", "event", "seminar", "webinar", "نشر", "أبحاث", "ابحاث", "بحث", "مؤتمر", "فعالية", "فعاليات", "ندوة", "ورشة"],
|
| 429 |
+
"university_connect": ["students", "connect with students", "reach students", "collaborate", "طلاب", "تواصل مع الطلاب", "التواصل مع الطلاب", "تعاون"],
|
| 430 |
+
"company_post_jobs": ["job", "jobs", "post job", "hiring", "hire", "internships", "graduate", "وظيفة", "وظائف", "اعلان", "إعلان", "نشر وظيفة", "توظيف", "فرص تدريب", "خريجين"],
|
| 431 |
+
"company_find_talent": ["talent", "candidate", "recruit", "search", "find", "pipeline", "موهبة", "مواهب", "مرشحين", "تعيين", "تجنيد", "ابحث", "دور على"],
|
| 432 |
+
"project_query": ["project", "projects", "مشروع", "مشاريع", "هدف", "أهداف"],
|
| 433 |
+
"apply_job_opportunity": ["apply", "application", "تقديم", "طلب", "عايز اقدم", "اريد التقديم", "اريد اتقدم"],
|
| 434 |
+
"join_team": ["team", "join team", "فريق", "انضمام لفريق", "انضمام", "انضم"],
|
| 435 |
}
|
| 436 |
|
|
|
|
| 437 |
AUDIENCE_MAP = {
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
"student_registration": "student",
|
| 439 |
+
"student_internships": "student",
|
| 440 |
"student_mentors": "student",
|
| 441 |
"university_publish": "university",
|
| 442 |
+
"university_connect": "university",
|
| 443 |
+
"company_post_jobs": "company",
|
| 444 |
+
"company_find_talent": "company",
|
| 445 |
+
"project_query": "student",
|
| 446 |
+
"apply_job_opportunity": "student",
|
| 447 |
+
"join_team": "student",
|
| 448 |
}
|
| 449 |
|
|
|
|
| 450 |
SYSTEM_PROMPT_BASE = (
|
| 451 |
+
"You are **EduNatives Assistant**, a helpful, friendly, and precise academic/career guide. "
|
| 452 |
+
"You serve three primary audiences: Students, Universities/Researchers, and Companies.\n\n"
|
| 453 |
+
"Goals by audience:\n"
|
| 454 |
+
"- Students: registration/account help; finding internships/scholarships; connecting with mentors or professors; querying projects; applying for jobs/opportunities; joining project teams.\n"
|
| 455 |
+
"- Universities/Researchers: publish research or announce events; connect/collaborate with students.\n"
|
| 456 |
+
"- Companies: post jobs/internships/graduate roles; discover student talent.\n\n"
|
| 457 |
+
"General rules:\n"
|
| 458 |
+
"- Reply in the user's language (Arabic if the user writes Arabic; otherwise English).\n"
|
| 459 |
+
"- Use data from Weaviate collections (Job, Opportunities, Project) when relevant.\n"
|
| 460 |
+
"- Be concise, step-by-step, and action-oriented (lists, bullets, checklists).\n"
|
| 461 |
+
"- If information is unavailable, state that clearly and suggest the next best step.\n"
|
| 462 |
+
"- For CV analysis, extract skills/experience and recommend matching opportunities.\n"
|
| 463 |
+
"- Assist with applications and team matching.\n"
|
| 464 |
+
"- Ensure that all generated prompts are phrased using positive reinforcement."
|
| 465 |
+
)
|
| 466 |
+
|
| 467 |
+
CONTEXT_INJECT_TEMPLATE = (
|
| 468 |
+
"Context to guide your answer (do not repeat verbatim):\n"
|
| 469 |
+
"- Audience: {audience}\n- Intent: {intent}\n- Extra hints: Keep it practical for this audience."
|
| 470 |
)
|
| 471 |
|
| 472 |
+
# -------------------- Utility helpers --------------------
|
| 473 |
+
ARABIC_RANGE = (
|
| 474 |
+
(0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
|
| 475 |
+
(0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
|
| 476 |
)
|
| 477 |
|
| 478 |
+
def is_arabic(text: str) -> bool:
|
| 479 |
+
for ch in text:
|
| 480 |
+
code = ord(ch)
|
| 481 |
+
for a, b in ARABIC_RANGE:
|
| 482 |
+
if a <= code <= b:
|
| 483 |
+
return True
|
| 484 |
+
return False
|
| 485 |
+
|
| 486 |
+
def format_chat_html(history: List[Dict[str, str]]) -> str:
|
| 487 |
+
html = "<div class='chatbot'>"
|
| 488 |
+
for message in history:
|
| 489 |
+
role = message["role"]
|
| 490 |
+
content = message["content"]
|
| 491 |
+
if role == "user":
|
| 492 |
+
html += f"<div class='user-bubble'>{content}</div>"
|
| 493 |
+
elif role == "assistant":
|
| 494 |
+
html_content = markdown.markdown(content, extensions=['tables'])
|
| 495 |
+
html += f"<div class='bot-bubble'>{html_content}</div>"
|
| 496 |
+
html += "</div>"
|
| 497 |
+
return html
|
| 498 |
+
|
| 499 |
+
# Simple keyword-based CV skills/experience extractor (improvable)
|
| 500 |
+
_SKILL_REGEX = re.compile(r"\b(python|java|c\+\+|c#|javascript|nlp|machine learning|deep learning|data science|sql|aws|azure|docker|kubernetes|react|node\.js)\b", re.IGNORECASE)
|
| 501 |
+
_EXP_REGEX = re.compile(r"(\d+)\s*(?:years|year|months|month)\s*(?:of)?\s*(?:experience|exp|worked)", re.IGNORECASE)
|
| 502 |
+
|
| 503 |
+
def extract_skills_experience(text: str) -> Dict[str, List[str]]:
|
| 504 |
+
skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(text)})
|
| 505 |
+
experiences = [m.group(0) for m in _EXP_REGEX.finditer(text)]
|
| 506 |
+
return {"skills": skills, "experience": experiences}
|
| 507 |
+
|
| 508 |
+
# -------------------- Ensure auxiliary collections --------------------
|
| 509 |
+
def ensure_aux_collections():
|
| 510 |
+
# Team
|
| 511 |
+
if not weaviate_client.collections.exists("Team"):
|
| 512 |
+
weaviate_client.collections.create(
|
| 513 |
+
name="Team",
|
| 514 |
+
properties=[
|
| 515 |
+
Property(name="teamId", data_type=DataType.TEXT),
|
| 516 |
+
Property(name="name", data_type=DataType.TEXT),
|
| 517 |
+
Property(name="projectId", data_type=DataType.TEXT),
|
| 518 |
+
Property(name="members", data_type=DataType.TEXT_ARRAY),
|
| 519 |
+
Property(name="createdAt", data_type=DataType.DATE),
|
| 520 |
+
Property(name="creatorId", data_type=DataType.TEXT),
|
| 521 |
+
],
|
| 522 |
+
vectorizer_config=Configure.Vectorizer.none()
|
| 523 |
+
)
|
| 524 |
+
|
| 525 |
+
# Application
|
| 526 |
+
if not weaviate_client.collections.exists("Application"):
|
| 527 |
+
weaviate_client.collections.create(
|
| 528 |
+
name="Application",
|
| 529 |
+
properties=[
|
| 530 |
+
Property(name="applicationId", data_type=DataType.TEXT),
|
| 531 |
+
Property(name="jobId", data_type=DataType.TEXT),
|
| 532 |
+
Property(name="opportunityId", data_type=DataType.TEXT),
|
| 533 |
+
Property(name="applicantName", data_type=DataType.TEXT),
|
| 534 |
+
Property(name="applicantEmail", data_type=DataType.TEXT),
|
| 535 |
+
Property(name="coverLetter", data_type=DataType.TEXT),
|
| 536 |
+
Property(name="cvText", data_type=DataType.TEXT),
|
| 537 |
+
Property(name="createdAt", data_type=DataType.DATE),
|
| 538 |
+
],
|
| 539 |
+
vectorizer_config=Configure.Vectorizer.none()
|
| 540 |
+
)
|
| 541 |
|
| 542 |
+
# Memory
|
| 543 |
+
if not weaviate_client.collections.exists("Memory"):
|
| 544 |
+
weaviate_client.collections.create(
|
| 545 |
+
name="Memory",
|
| 546 |
+
properties=[
|
| 547 |
+
Property(name="memoryId", data_type=DataType.TEXT),
|
| 548 |
+
Property(name="sessionId", data_type=DataType.TEXT),
|
| 549 |
+
Property(name="text", data_type=DataType.TEXT),
|
| 550 |
+
Property(name="createdAt", data_type=DataType.DATE),
|
| 551 |
+
],
|
| 552 |
+
vectorizer_config=Configure.Vectorizer.none()
|
| 553 |
+
)
|
| 554 |
+
|
| 555 |
+
ensure_aux_collections()
|
| 556 |
+
|
| 557 |
+
# -------------------- Weaviate query helpers (RAG) --------------------
|
| 558 |
+
def query_weaviate_collection(class_name: str, query_text: str, limit: int = 5) -> List[dict]:
|
| 559 |
+
"""
|
| 560 |
+
Query using v4 weaviate client (hybrid search).
|
| 561 |
+
"""
|
| 562 |
+
try:
|
| 563 |
+
collection = weaviate_client.collections.get(class_name)
|
| 564 |
|
| 565 |
+
# Hybrid search
|
| 566 |
+
res = collection.query.hybrid(query=query_text, limit=limit)
|
| 567 |
+
|
| 568 |
+
items = [o.properties for o in res.objects]
|
| 569 |
+
|
| 570 |
+
# --- fallback لو مفيش نتيجة
|
| 571 |
+
if not items:
|
| 572 |
+
print(f"[Hybrid returned 0 → fallback filter on {class_name}]")
|
| 573 |
+
res2 = collection.query.fetch_objects(limit=limit) # مجرد fallback بسيط
|
| 574 |
+
items = [o.properties for o in res2.objects]
|
| 575 |
+
|
| 576 |
+
return items
|
| 577 |
+
except Exception as e:
|
| 578 |
+
print(f"[Weaviate Query Error] class={class_name} error={e}")
|
| 579 |
+
return []
|
| 580 |
+
|
| 581 |
+
|
| 582 |
+
def build_rag_prompt(user_question: str, retrieved_items: List[dict], class_name: str) -> str:
|
| 583 |
+
intro = f"Use the following {len(retrieved_items)} records from {class_name} to answer the question succinctly.\n\n"
|
| 584 |
+
parts = []
|
| 585 |
+
for i, item in enumerate(retrieved_items, 1):
|
| 586 |
+
if class_name == "Job":
|
| 587 |
+
parts.append(f"{i}. Title: {item.get('title','N/A')} | Company: {item.get('companyName','N/A')} | Skills: {', '.join(item.get('skills',[]))} | Desc: {item.get('description','')[:200]}")
|
| 588 |
+
elif class_name == "Opportunities":
|
| 589 |
+
parts.append(f"{i}. Title: {item.get('title','N/A')} | Topic: {item.get('topic','N/A')} | Skills: {', '.join(item.get('skills',[]))} | Overview: {item.get('overview','')[:200]}")
|
| 590 |
+
elif class_name == "Project":
|
| 591 |
+
parts.append(f"{i}. Title: {item.get('title','N/A')} | ShortDesc: {item.get('shortDescription','')[:200]} | Fields: {', '.join(item.get('fields',[]))}")
|
| 592 |
+
else:
|
| 593 |
+
parts.append(f"{i}. {str(item)[:200]}")
|
| 594 |
+
context_block = "\n".join(parts)
|
| 595 |
+
closing = f"\n\nQuestion: {user_question}\nAnswer concisely and, if applicable, include next steps (how to apply / contact / form a team)."
|
| 596 |
+
return intro + context_block + closing
|
| 597 |
+
|
| 598 |
+
def rag_answer(user_question: str, class_name: str, top_k: int = 5) -> (str, List[dict]):
|
| 599 |
+
retrieved = query_weaviate_collection(class_name, user_question, limit=top_k)
|
| 600 |
+
if not retrieved:
|
| 601 |
+
return "", []
|
| 602 |
+
prompt = build_rag_prompt(user_question, retrieved, class_name)
|
| 603 |
+
try:
|
| 604 |
+
resp = llm_client.chat.completions.create(
|
| 605 |
+
model=MODEL_NAME,
|
| 606 |
+
messages=[
|
| 607 |
+
{"role": "system", "content": SYSTEM_PROMPT_BASE},
|
| 608 |
+
{"role": "user", "content": prompt}
|
| 609 |
+
],
|
| 610 |
+
temperature=0.2,
|
| 611 |
+
max_tokens=512
|
| 612 |
+
)
|
| 613 |
+
answer = resp.choices[0].message.content or ""
|
| 614 |
+
except Exception as e:
|
| 615 |
+
print(f"[RAG LLM Error] {e}")
|
| 616 |
+
answer = ""
|
| 617 |
+
return answer, retrieved
|
| 618 |
+
|
| 619 |
+
# -------------------- Save helpers --------------------
|
| 620 |
+
def save_application_to_weaviate(application: dict) -> bool:
|
| 621 |
+
try:
|
| 622 |
+
collection = weaviate_client.collections.get("Application")
|
| 623 |
+
# ensure createdAt exists
|
| 624 |
+
application.setdefault("createdAt", datetime.datetime.utcnow().isoformat() + "Z")
|
| 625 |
+
# add stable uuid
|
| 626 |
+
uid = str(uuid.uuid5(uuid.NAMESPACE_DNS, application.get("applicationId", str(uuid.uuid4()))))
|
| 627 |
+
collection.data.insert(properties=application, uuid=uid)
|
| 628 |
+
return True
|
| 629 |
+
except Exception as e:
|
| 630 |
+
print(f"[Save Application Error] {e}")
|
| 631 |
+
return False
|
| 632 |
+
|
| 633 |
+
def save_team_to_weaviate(team_props: dict) -> Optional[dict]:
|
| 634 |
+
try:
|
| 635 |
+
collection = weaviate_client.collections.get("Team")
|
| 636 |
+
team_props.setdefault("createdAt", datetime.datetime.utcnow().isoformat() + "Z")
|
| 637 |
+
uid = str(uuid.uuid5(uuid.NAMESPACE_DNS, team_props.get("teamId", str(uuid.uuid4()))))
|
| 638 |
+
collection.data.insert(properties=team_props, uuid=uid)
|
| 639 |
+
return team_props
|
| 640 |
+
except Exception as e:
|
| 641 |
+
print(f"[Save Team Error] {e}")
|
| 642 |
+
return None
|
| 643 |
+
|
| 644 |
+
def save_memory_to_weaviate(session_id: str, text: str) -> bool:
|
| 645 |
+
try:
|
| 646 |
+
collection = weaviate_client.collections.get("Memory")
|
| 647 |
+
mem = {"memoryId": str(uuid.uuid4()), "sessionId": session_id, "text": text, "createdAt": datetime.datetime.utcnow().isoformat() + "Z"}
|
| 648 |
+
collection.data.insert(properties=mem, uuid=str(uuid.uuid5(uuid.NAMESPACE_DNS, mem["memoryId"])))
|
| 649 |
+
return True
|
| 650 |
+
except Exception as e:
|
| 651 |
+
print(f"[Save Memory Error] {e}")
|
| 652 |
+
return False
|
| 653 |
+
|
| 654 |
+
# -------------------- File processing --------------------
|
| 655 |
+
def process_uploaded_file(file_obj: Any) -> dict | None:
|
| 656 |
+
"""
|
| 657 |
+
file_obj is a Gradio file (file_obj.name present)
|
| 658 |
+
returns dict with 'content' and 'profile' keys or None
|
| 659 |
+
"""
|
| 660 |
+
if not file_obj:
|
| 661 |
return None
|
| 662 |
file_path = file_obj.name
|
| 663 |
+
filename = os.path.basename(file_path)
|
| 664 |
text_content = ""
|
| 665 |
try:
|
| 666 |
+
if filename.lower().endswith(".pdf"):
|
| 667 |
with fitz.open(file_path) as doc:
|
| 668 |
+
for page in doc:
|
| 669 |
+
text_content += page.get_text()
|
| 670 |
+
elif filename.lower().endswith(".docx"):
|
| 671 |
doc = docx.Document(file_path)
|
| 672 |
+
for p in doc.paragraphs:
|
| 673 |
+
text_content += p.text + "\n"
|
| 674 |
+
elif filename.lower().endswith(".txt"):
|
| 675 |
with open(file_path, "r", encoding="utf-8") as f:
|
| 676 |
text_content = f.read()
|
| 677 |
else:
|
| 678 |
+
return {"error": f"Unsupported file type: {filename}"}
|
| 679 |
+
profile = extract_skills_experience(text_content)
|
| 680 |
+
return {"content": text_content.strip(), "profile": profile, "filename": filename}
|
| 681 |
except Exception as e:
|
| 682 |
+
print(f"[File process error] {e}")
|
| 683 |
+
return {"error": f"Error processing file {filename}: {e}"}
|
| 684 |
|
| 685 |
+
# -------------------- Logging --------------------
|
| 686 |
+
def log_interaction(user_message: str, route: 'Route', response: str):
|
| 687 |
+
entry = {"timestamp": time.time(), "user_message": user_message, "audience": route.audience, "intent": route.intent, "language": route.language, "response": response}
|
| 688 |
+
try:
|
| 689 |
+
if os.path.exists(LOG_FILE):
|
| 690 |
+
with open(LOG_FILE, "r", encoding="utf-8") as f:
|
| 691 |
+
logs = json.load(f)
|
| 692 |
+
else:
|
| 693 |
+
logs = []
|
| 694 |
+
logs.append(entry)
|
| 695 |
+
with open(LOG_FILE, "w", encoding="utf-8") as f:
|
| 696 |
+
json.dump(logs, f, ensure_ascii=False, indent=2)
|
| 697 |
+
except Exception as e:
|
| 698 |
+
print(f"[Log error] {e}")
|
| 699 |
|
| 700 |
+
# -------------------- Intent routing --------------------
|
| 701 |
+
@dataclass
|
| 702 |
+
class Route:
|
| 703 |
+
audience: str
|
| 704 |
+
intent: str
|
| 705 |
+
language: str
|
|
|
|
|
|
|
| 706 |
|
| 707 |
+
def route_intent(text: str, forced_audience: str | None = None) -> Route:
|
| 708 |
+
lang = "ar" if is_arabic(text) else "en"
|
| 709 |
+
text_l = text.lower() if text else ""
|
| 710 |
match_label = None
|
| 711 |
for label, kws in KEYS.items():
|
| 712 |
+
for kw in kws:
|
| 713 |
+
if kw in text_l:
|
| 714 |
+
match_label = label
|
| 715 |
+
break
|
| 716 |
+
if match_label:
|
| 717 |
break
|
| 718 |
+
audience = AUDIENCE_MAP.get(match_label, "general")
|
| 719 |
+
if forced_audience and forced_audience in {"student", "university", "company"}:
|
| 720 |
+
audience = forced_audience
|
| 721 |
+
return Route(audience=audience, intent=match_label or "general", language=lang)
|
| 722 |
+
|
| 723 |
+
# -------------------- call_llm --------------------
|
| 724 |
+
def call_llm(user_message: str, history: List[Dict[str, str]], route: Route, temperature: float = 0.6, max_tokens: int = 512) -> str:
|
| 725 |
+
messages = [
|
| 726 |
+
{"role": "system", "content": SYSTEM_PROMPT_BASE},
|
| 727 |
+
{"role": "system", "content": CONTEXT_INJECT_TEMPLATE.format(audience=route.audience, intent=route.intent)}
|
| 728 |
+
]
|
| 729 |
+
# include last few turns
|
| 730 |
+
MAX_TURNS = 3
|
| 731 |
+
trimmed = history[-MAX_TURNS*2:] if history else []
|
| 732 |
+
messages.extend(trimmed)
|
| 733 |
messages.append({"role": "user", "content": user_message})
|
|
|
|
| 734 |
try:
|
| 735 |
resp = llm_client.chat.completions.create(
|
| 736 |
+
model=MODEL_NAME,
|
| 737 |
+
messages=messages,
|
| 738 |
+
temperature=temperature,
|
| 739 |
+
max_tokens=max_tokens
|
| 740 |
)
|
| 741 |
return resp.choices[0].message.content or ""
|
| 742 |
except Exception as e:
|
| 743 |
+
print(f"[LLM Error] {e}")
|
| 744 |
+
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 745 |
|
| 746 |
+
# -------------------- Main respond flow (Gradio) --------------------
|
| 747 |
with gr.Blocks(css="""
|
| 748 |
.chatbot {height: 500px; overflow: auto;}
|
| 749 |
.user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
|
|
|
|
| 753 |
.bot-bubble th, .bot-bubble td {border: 1px solid #ddd; padding: 8px; text-align: left;}
|
| 754 |
.bot-bubble th {background-color: #e9e9e9;}
|
| 755 |
""") as demo:
|
| 756 |
+
|
| 757 |
+
gr.Markdown("# 🤖 EduNatives Assistant")
|
| 758 |
|
| 759 |
with gr.Row():
|
| 760 |
+
audience_dd = gr.Dropdown(label="Audience", choices=["Auto", "Student", "University-Research", "Company"], value="Auto", interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 761 |
clear_btn = gr.Button("🧹 Clear Chat")
|
| 762 |
|
| 763 |
status = gr.Markdown("Status: Ready.")
|
| 764 |
chatbot_html = gr.HTML("<div class='chatbot' id='chatbot'></div>")
|
| 765 |
chat_history_state = gr.State([])
|
| 766 |
+
user_id_state = gr.State("default_user")
|
| 767 |
|
| 768 |
with gr.Row(elem_classes="chatbox-container"):
|
| 769 |
+
msg = gr.Textbox(placeholder="اكتب سؤالك هنا... / Ask your question here...", lines=2, scale=4, autofocus=True)
|
| 770 |
+
file_uploader = gr.File(label="Upload Document (.txt, .pdf, .docx)", file_types=[".txt", ".pdf", ".docx"], file_count="single", interactive=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 771 |
with gr.Column(scale=1, min_width=120):
|
| 772 |
send_btn = gr.Button("➡️ Send", scale=1, variant="primary")
|
| 773 |
|
| 774 |
+
def respond(user_text: str, file_obj: Any, history: List[Dict[str, str]], audience_choice: str, user_id: str):
|
| 775 |
+
user_text = (user_text or "").strip()
|
| 776 |
+
# process file if exists
|
| 777 |
+
doc_info = process_uploaded_file(file_obj) if file_obj else None
|
| 778 |
+
if not user_text and not doc_info:
|
| 779 |
+
return "", format_chat_html(history), history, "Status: Please type a message or upload a file.", None, user_id
|
| 780 |
+
|
| 781 |
+
# build combined input
|
| 782 |
+
llm_input = user_text
|
| 783 |
+
if doc_info and "content" in doc_info:
|
| 784 |
+
llm_input = f"Based on the document content below, answer the question.\n\n---DOCUMENT---\n{doc_info['content'][:6000]}\n---END DOCUMENT---\n\nQuestion: {user_text}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 785 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 786 |
forced = {"Student": "student", "University-Research": "university", "Company": "company"}.get(audience_choice)
|
| 787 |
+
route = route_intent(llm_input, forced_audience=forced)
|
| 788 |
status_text = f"**Audience**: {route.audience} | **Intent**: {route.intent} | **Lang**: {route.language.upper()}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 789 |
|
| 790 |
+
# quick CV skills if provided
|
| 791 |
+
cv_profile = doc_info.get("profile") if doc_info and "profile" in doc_info else {"skills": [], "experience": []}
|
| 792 |
+
|
| 793 |
+
# Decide RAG target based on intent / keywords
|
| 794 |
+
text_lower = (user_text or "").lower()
|
| 795 |
+
wants_project = any(k in text_lower for k in KEYS.get("project_query", []))
|
| 796 |
+
wants_job = any(k in text_lower for k in KEYS.get("company_post_jobs", [])) or any(k in text_lower for k in KEYS.get("apply_job_opportunity", []))
|
| 797 |
+
wants_opp = any(k in text_lower for k in KEYS.get("student_internships", []))
|
| 798 |
+
|
| 799 |
+
final_answer = ""
|
| 800 |
+
retrieved = []
|
| 801 |
+
|
| 802 |
+
try:
|
| 803 |
+
if wants_project or route.intent == "project_query":
|
| 804 |
+
final_answer, retrieved = rag_answer(user_text or (doc_info or {}).get("content",""), "Project", top_k=5)
|
| 805 |
+
if not final_answer:
|
| 806 |
+
final_answer = KB.get("student_internships", {}).get(route.language, "") if route.intent == "student_internships" else ""
|
| 807 |
+
elif wants_job or route.intent == "apply_job_opportunity" or route.intent == "company_post_jobs":
|
| 808 |
+
# try jobs first
|
| 809 |
+
final_answer, retrieved = rag_answer(user_text or (doc_info or {}).get("content",""), "Job", top_k=5)
|
| 810 |
+
if not final_answer:
|
| 811 |
+
# fallback to ops
|
| 812 |
+
final_answer, retrieved = rag_answer(user_text or (doc_info or {}).get("content",""), "Opportunities", top_k=5)
|
| 813 |
+
elif wants_opp or route.intent == "student_internships":
|
| 814 |
+
final_answer, retrieved = rag_answer(user_text or (doc_info or {}).get("content",""), "Opportunities", top_k=5)
|
| 815 |
else:
|
| 816 |
+
# default LLM response with context
|
| 817 |
+
final_answer = call_llm(llm_input, history, route) or KB.get("student_registration", {}).get(route.language, "Sorry, I don't have info.")
|
| 818 |
+
except Exception as e:
|
| 819 |
+
print(f"[Respond Error] {e}")
|
| 820 |
+
final_answer = call_llm(llm_input, history, route) or KB.get("student_registration", {}).get(route.language, "")
|
| 821 |
+
|
| 822 |
+
# --- Application flow: if user indicated they want to apply (keywords) ---
|
| 823 |
+
wants_apply = any(kw in text_lower for kw in KEYS.get("apply_job_opportunity", []))
|
| 824 |
+
app_summary = ""
|
| 825 |
+
generated_cover = ""
|
| 826 |
+
if wants_apply:
|
| 827 |
+
target = retrieved[0] if retrieved else None
|
| 828 |
+
cover_text = ""
|
| 829 |
+
if target:
|
| 830 |
+
# build cover letter prompt using target and CV text if present
|
| 831 |
+
cover_prompt = f"Write a concise 3-paragraph cover letter applying for this role:\nRole details: {json.dumps(target, ensure_ascii=False)[:1500]}\n"
|
| 832 |
+
if doc_info and "content" in doc_info:
|
| 833 |
+
cover_prompt += f"\nApplicant CV summary: {doc_info['content'][:2000]}\n"
|
| 834 |
+
cover_prompt += "\nWrite the letter in the same language as the user."
|
| 835 |
+
try:
|
| 836 |
+
resp = llm_client.chat.completions.create(
|
| 837 |
+
model=MODEL_NAME,
|
| 838 |
+
messages=[{"role":"system","content":SYSTEM_PROMPT_BASE},{"role":"user","content":cover_prompt}],
|
| 839 |
+
temperature=0.3,
|
| 840 |
+
max_tokens=512
|
| 841 |
+
)
|
| 842 |
+
cover_text = resp.choices[0].message.content or ""
|
| 843 |
+
except Exception as e:
|
| 844 |
+
print(f"[Cover generation error] {e}")
|
| 845 |
+
cover_text = "I can help craft a cover letter, but an error occurred while generating it."
|
| 846 |
+
|
| 847 |
+
application = {
|
| 848 |
+
"applicationId": str(uuid.uuid4()),
|
| 849 |
+
"jobId": target.get("id") or target.get("jobId"),
|
| 850 |
+
"opportunityId": target.get("id") or target.get("opportunityId"),
|
| 851 |
+
"applicantName": "Unknown",
|
| 852 |
+
"applicantEmail": "Unknown",
|
| 853 |
+
"coverLetter": cover_text,
|
| 854 |
+
"cvText": doc_info.get("content","")[:4000] if doc_info else "",
|
| 855 |
+
"createdAt": datetime.datetime.now().isoformat()
|
| 856 |
+
}
|
| 857 |
+
saved = save_application_to_weaviate(application)
|
| 858 |
+
app_summary = "✅ Application prepared and saved." if saved else "⚠️ Application could not be saved."
|
| 859 |
+
generated_cover = cover_text
|
| 860 |
+
else:
|
| 861 |
+
app_summary = "لم أجد وظيفة/فرصة مناسبة تلقائياً من النتائج. أرسل عنوان الوظيفة أو اختر من النتيجة."
|
| 862 |
+
|
| 863 |
+
# --- Team creation flow (join_team) ---
|
| 864 |
+
team_created_msg = ""
|
| 865 |
+
wants_team = any(k in text_lower for k in KEYS.get("join_team", []))
|
| 866 |
+
if wants_team and (wants_project or route.intent == "join_team"):
|
| 867 |
+
# attempt to suggest members using cv skills or create empty team entry
|
| 868 |
+
suggested_members = []
|
| 869 |
+
if cv_profile and cv_profile.get("skills"):
|
| 870 |
+
# use skills to search opportunities maybe get studentName fields
|
| 871 |
+
matches = query_weaviate_collection("Opportunities", " ".join(cv_profile.get("skills", [])), limit=5)
|
| 872 |
+
for m in matches:
|
| 873 |
+
name = m.get("studentName") or m.get("student") or m.get("name")
|
| 874 |
+
if name:
|
| 875 |
+
suggested_members.append(name)
|
| 876 |
+
if not suggested_members:
|
| 877 |
+
team_props = {
|
| 878 |
+
"teamId": str(uuid.uuid4()),
|
| 879 |
+
"name": f"Team for project - {uuid.uuid4().hex[:6]}",
|
| 880 |
+
"projectId": retrieved[0].get("globalId") if retrieved and retrieved[0].get("globalId") else None,
|
| 881 |
+
"members": [],
|
| 882 |
+
"createdAt": datetime.datetime.utcnow().isoformat()+"Z",
|
| 883 |
+
"creatorId": user_id
|
| 884 |
+
}
|
| 885 |
+
saved_team = save_team_to_weaviate(team_props)
|
| 886 |
+
if saved_team:
|
| 887 |
+
team_created_msg = f"✅ Team created with id {team_props['teamId']}. يمكنك إضافة أعضاء لاحقًا."
|
| 888 |
+
else:
|
| 889 |
+
team_created_msg = "⚠️ لم أتمكن من إنشاء الفريق الآن."
|
| 890 |
+
else:
|
| 891 |
+
team_props = {
|
| 892 |
+
"teamId": str(uuid.uuid4()),
|
| 893 |
+
"name": f"Team for project - {uuid.uuid4().hex[:6]}",
|
| 894 |
+
"projectId": retrieved[0].get("globalId") if retrieved and retrieved[0].get("globalId") else None,
|
| 895 |
+
"members": suggested_members,
|
| 896 |
+
"createdAt": datetime.datetime.utcnow().isoformat()+"Z",
|
| 897 |
+
"creatorId": user_id
|
| 898 |
+
}
|
| 899 |
+
saved_team = save_team_to_weaviate(team_props)
|
| 900 |
+
if saved_team:
|
| 901 |
+
team_created_msg = f"✅ Team created with members: {', '.join(suggested_members)}"
|
| 902 |
+
else:
|
| 903 |
+
team_created_msg = "⚠️ لم أتمكن من إنشاء الفريق الآن."
|
| 904 |
+
|
| 905 |
+
# Save a short memory entry
|
| 906 |
+
try:
|
| 907 |
+
sess = str(uuid.uuid5(uuid.NAMESPACE_DNS, (user_id or "anon") + (user_text or "")[:50]))
|
| 908 |
+
mem_text = f"User: {user_text[:300]} | Action: RAG on { 'Project' if wants_project else 'Job' if wants_job else 'Opportunities' if wants_opp else 'LLM' }"
|
| 909 |
+
save_memory_to_weaviate(sess, mem_text)
|
| 910 |
+
except Exception as e:
|
| 911 |
+
print(f"[Memory Save Error] {e}")
|
| 912 |
+
|
| 913 |
+
# Prepare final message (answer + top results + app/team status)
|
| 914 |
+
message_parts = []
|
| 915 |
+
if final_answer:
|
| 916 |
+
message_parts.append(final_answer)
|
| 917 |
+
if retrieved:
|
| 918 |
+
list_lines = []
|
| 919 |
+
for item in retrieved[:5]:
|
| 920 |
+
title = item.get("title") or item.get("jobTitle") or item.get("globalId") or "No Title"
|
| 921 |
+
meta = item.get("companyName") or item.get("topic") or item.get("shortDescription","")
|
| 922 |
+
list_lines.append(f"- **{title}** | {meta}")
|
| 923 |
+
if list_lines:
|
| 924 |
+
message_parts.append("\n\n**Top results:**\n" + "\n".join(list_lines))
|
| 925 |
+
if wants_apply:
|
| 926 |
+
message_parts.append("\n\n**Application status:** " + app_summary)
|
| 927 |
+
if generated_cover:
|
| 928 |
+
message_parts.append("\n\n**Generated Cover Letter:**\n" + generated_cover)
|
| 929 |
+
if team_created_msg:
|
| 930 |
+
message_parts.append("\n\n" + team_created_msg)
|
| 931 |
+
|
| 932 |
+
final_message_to_user = "\n\n".join([p for p in message_parts if p])
|
| 933 |
+
|
| 934 |
+
# Update history
|
| 935 |
+
user_message_for_history = user_text
|
| 936 |
+
if doc_info and doc_info.get("filename"):
|
| 937 |
+
user_message_for_history += f"\n\n*📎 [File Attached: {doc_info.get('filename')}] *"
|
| 938 |
|
|
|
|
| 939 |
history.append({"role": "user", "content": user_message_for_history})
|
| 940 |
+
history.append({"role": "assistant", "content": final_message_to_user or "عذراً، لم أجد إجابة مناسبة الآن."})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 941 |
|
| 942 |
+
# log interaction
|
| 943 |
+
try:
|
| 944 |
+
log_interaction(user_text, route, final_message_to_user)
|
| 945 |
+
except Exception:
|
| 946 |
+
pass
|
| 947 |
|
| 948 |
+
# Return values: clear input field, updated html, updated history, status, clear file uploader, keep user_id
|
| 949 |
+
return "", format_chat_html(history), history, status_text, None, user_id
|
| 950 |
+
|
| 951 |
+
def clear_chat():
|
| 952 |
+
return "", [], "Status: Ready.", None, "default_user"
|
| 953 |
+
|
| 954 |
+
# Bind events
|
| 955 |
+
send_btn.click(
|
| 956 |
+
respond,
|
| 957 |
+
inputs=[msg, file_uploader, chat_history_state, audience_dd, user_id_state],
|
| 958 |
+
outputs=[msg, chatbot_html, chat_history_state, status, file_uploader, user_id_state],
|
| 959 |
+
queue=True
|
| 960 |
+
)
|
| 961 |
+
msg.submit(
|
| 962 |
+
respond,
|
| 963 |
+
inputs=[msg, file_uploader, chat_history_state, audience_dd, user_id_state],
|
| 964 |
+
outputs=[msg, chatbot_html, chat_history_state, status, file_uploader, user_id_state],
|
| 965 |
+
queue=True
|
| 966 |
+
)
|
| 967 |
+
clear_btn.click(
|
| 968 |
+
clear_chat,
|
| 969 |
+
outputs=[msg, chatbot_html, chat_history_state, status, file_uploader, user_id_state],
|
| 970 |
+
queue=False
|
| 971 |
+
)
|
| 972 |
|
| 973 |
if __name__ == "__main__":
|
| 974 |
demo.launch(debug=True)
|