afouda commited on
Commit
db05ab0
·
verified ·
1 Parent(s): d952e91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +725 -219
app.py CHANGED
@@ -1,129 +1,105 @@
1
  from __future__ import annotations
2
  import os
 
 
3
  import json
4
  import time
5
- from dataclasses import dataclass
6
- from typing import List, Dict, Any
7
  import markdown
 
 
 
 
8
  import gradio as gr
9
  from openai import OpenAI
 
 
 
 
10
 
11
- # --- 1. BACKEND LOGIC & CONFIG (from your first script) ---
12
-
13
- # --- LLM Configuration ---
14
- MODEL_NAME = "openai/gpt-oss-120b"
15
- DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa" # Replace with your key if needed
16
- BASE_URL = "https://api.deepinfra.com/v1/openai"
17
 
18
- if not DEEPINFRA_API_KEY:
19
- print("[WARN] DEEPINFRA_API_KEY is not set. The chatbot will likely fail.")
20
 
21
- client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
 
 
 
22
 
23
- # --- Language & Routing Configuration ---
24
- ARABIC_RANGE = (
25
- (0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
26
- (0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
27
  )
28
 
29
- @dataclass
30
- class Route:
31
- audience: str
32
- intent: str
33
- language: str
34
-
35
- KB: Dict[str, Dict[str, str]] = {
36
- "student_registration": {
37
- "en": (
38
- "**How to register / create an account (Student)**\n\n"
39
- "1. Go to the EduNatives site and choose Sign Up.\n"
40
- "2. Use your university email if possible and verify it.\n"
41
- "3. Complete your profile (major, skills, interests).\n"
42
- "4. Enable notifications for internships/scholarships."
43
- ),
44
- "ar": (
45
- "**طريقة التسجيل وإنشاء حساب (طلاب)**\n\n"
46
- "١. اذهب إلى موقع EduNatives واختر Sign Up.\n"
47
- "٢. يفضل استخدام إيميل الجامعة وتأكيده.\n"
48
- "٣. أكمل ملفك الشخصي (التخصص، المهارات، الاهتمامات).\n"
49
- "٤. فعّل التنبيهات لفرص التدريب والمنح."
50
- ),
51
- },
52
- "student_internships": {
53
- "en": (
54
- "**Finding internships & scholarships**\n\n"
55
- "- Use the search filters: field, location, duration, paid/unpaid.\n"
56
- "- Follow companies and set up alerts for new opportunities.\n"
57
- "- Keep your profile and resume updated."
58
- ),
59
- "ar": (
60
- "**كيفية العثور على تدريب أو منحة**\n\n"
61
- "- استخدم فلاتر البحث: التخصص، المكان، المدة، مدفوع/غير مدفوع.\n"
62
- "- تابع الشركات وفعّل التنبيهات للفرص الجديدة.\n"
63
- "- حافظ على تحديث ملفك الشخصي وسيرتك الذاتية."
64
- ),
65
- },
66
- }
67
-
68
- KEYS = {
69
- "student_registration": [
70
- "register", "sign up", "signup", "create account", "account",
71
- "تسجيل", "انشاء", "إنشاء", "حساب", "اعمل حساب", "سجل"
72
- ],
73
- "student_internships": [
74
- "intern", "internship", "training", "scholar", "scholarship", "grant", "opportunity",
75
- "تدريب", "تدريبي", "منحة", "منح", "فرصة", "فرص", "انترنشيب"
76
- ],
77
- "student_mentors": [
78
- "mentor", "advisor", "professor", "supervisor", "faculty", "connect",
79
- "منتور", "مشرف", "دكتور", "أستاذ", "استاذ", "التواصل", "اكلم"
80
- ],
81
- "university_publish": [
82
- "publish", "paper", "research", "preprint", "conference", "event", "seminar", "webinar",
83
- "نشر", "أبحاث", "ابحاث", "بحث", "مؤتمر", "فعالية", "فعاليات", "ندوة", "ورشة"
84
- ],
85
- "university_connect": [
86
- "students", "connect with students", "reach students", "collaborate",
87
- "طلاب", "تواصل مع الطلاب", "التواصل مع الطلاب", "تعاون"
88
- ],
89
- "company_post_jobs": [
90
- "job", "jobs", "post job", "hiring", "hire", "internships", "graduate",
91
- "وظيفة", "وظائف", "اعلان", "إعلان", "نشر وظيفة", "توظيف", "فرص تدريب", "خريجين"
92
- ],
93
- "company_find_talent": [
94
- "talent", "candidate", "recruit", "search", "find", "pipeline",
95
- "موهبة", "مواهب", "مرشحين", "تعيين", "تجنيد", "ابحث", "دور على"
96
- ],
97
- }
98
-
99
- AUDIENCE_MAP = {
100
- "student_registration": "student",
101
- "student_internships": "student",
102
- "student_mentors": "student",
103
- "university_publish": "university",
104
- "university_connect": "university",
105
- "company_post_jobs": "company",
106
- "company_find_talent": "company",
107
- }
108
 
109
  SYSTEM_PROMPT_BASE = (
110
  "You are **EduNatives Assistant**, a helpful, friendly, and precise academic/career guide. "
111
  "You serve three primary audiences: Students, Universities/Researchers, and Companies.\n\n"
112
  "Goals by audience:\n"
113
- "- Students: registration/account help; finding internships/scholarships; connecting with mentors or professors.\n"
114
  "- Universities/Researchers: publish research or announce events; connect/collaborate with students.\n"
115
  "- Companies: post jobs/internships/graduate roles; discover student talent.\n\n"
116
  "General rules:\n"
117
  "- Reply in the user's language (Arabic if the user writes Arabic; otherwise English).\n"
 
118
  "- Be concise, step-by-step, and action-oriented (lists, bullets, checklists).\n"
119
  "- If information is unavailable, state that clearly and suggest the next best step.\n"
 
 
 
120
  )
121
- CONTEXT_INJECT_TEMPLATE = (
122
- "Context to guide your answer (do not repeat verbatim):\n"
123
- "- Audience: {audience}\n- Intent: {intent}\n- Extra hints: Keep it practical for this audience."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  )
125
 
126
- # --- Core Functions ---
127
  def is_arabic(text: str) -> bool:
128
  for ch in text:
129
  code = ord(ch)
@@ -132,145 +108,675 @@ def is_arabic(text: str) -> bool:
132
  return True
133
  return False
134
 
135
- def route_intent(text: str, forced_audience: str | None = None) -> Route:
136
- lang = "ar" if is_arabic(text) else "en"
137
- match_label = None
138
- text_l = text.lower()
139
- for label, kws in KEYS.items():
140
- for kw in kws:
141
- if kw in text_l:
142
- match_label = label
143
- break
144
- if match_label:
145
- break
146
-
147
- if match_label is None:
148
- audience = forced_audience if forced_audience else "general"
149
- return Route(audience=audience, intent="general", language=lang)
150
-
151
- audience = AUDIENCE_MAP.get(match_label, "general")
152
- if forced_audience and forced_audience in {"student", "university", "company"}:
153
- audience = forced_audience
154
-
155
- return Route(audience=audience, intent=match_label, language=lang)
156
-
157
- def call_llm(user_message: str, history: List[Dict[str, str]], route: Route) -> str:
158
- messages: List[Dict[str, str]] = [
159
- {"role": "system", "content": SYSTEM_PROMPT_BASE},
160
- {"role": "system", "content": CONTEXT_INJECT_TEMPLATE.format(audience=route.audience, intent=route.intent)},
161
- ]
162
- MAX_TURNS = 3
163
- trimmed_history = history[-MAX_TURNS*2:]
164
- messages.extend(trimmed_history)
165
- messages.append({"role": "user", "content": user_message})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
166
 
167
  try:
168
- resp = client.chat.completions.create(
169
  model=MODEL_NAME,
170
- messages=messages,
171
- temperature=0.6,
172
- top_p=0.9,
173
- max_tokens=800,
 
 
174
  )
175
- return resp.choices[0].message.content or ""
176
  except Exception as e:
177
- print(f"[ERROR] LLM call failed: {e}")
178
- return ""
179
-
180
- def kb_fallback(route: Route) -> str:
181
- if route.intent in KB:
182
- block = KB[route.intent]
183
- return block.get(route.language, block.get("en", ""))
184
- if route.language == "ar":
185
- return (
186
- "عذراً، لم أجد معلومات كافية. يرجى توضيح طلبك أو السؤال عن أحد المواضيع التالية: "
187
- "(تسجيل، تدريب/منح، مشرفين، نشر أبحاث، وظائف)."
 
 
 
 
 
 
 
 
 
 
 
188
  )
189
- else:
190
- return (
191
- "I couldn't find enough info. Please clarify your request or ask about "
192
- "registration, internships/scholarships, mentors, publishing research, or jobs."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
193
  )
194
 
 
195
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
  with gr.Blocks(css="""
198
- .chatbot {height: 500px; overflow: auto;}
199
  .user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
200
  .bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
201
  .chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
202
- /* Basic styling for tables inside the bot bubble */
203
- .bot-bubble table {border-collapse: collapse; width: 100%;}
204
- .bot-bubble th, .bot-bubble td {border: 1px solid #ddd; padding: 8px; text-align: left;}
205
- .bot-bubble th {background-color: #e9e9e9;}
206
  """) as demo:
207
 
208
- gr.Markdown("# 🤖 EduNatives Assistant\nYour smart, bilingual guide for academic and career opportunities.")
 
 
 
 
 
 
 
 
209
 
 
 
 
 
 
 
210
  with gr.Row():
211
- audience_dd = gr.Dropdown(
212
- label="Audience",
213
- choices=["Auto", "Student", "University-Research", "Company"],
214
- value="Auto",
215
- interactive=True,
216
- info="Select your role. 'Auto' detects it from your message."
217
- )
218
- clear_btn = gr.Button("🧹 Clear Chat")
219
 
220
- status = gr.Markdown("Status: Ready.")
221
- chatbot_html = gr.HTML("<div class='chatbot' id='chatbot'></div>")
222
-
223
- chat_history_state = gr.State([])
224
 
225
- with gr.Row(elem_classes="chatbox-container"):
226
- msg = gr.Textbox(
227
- placeholder="اكتب سؤالك هنا... / Ask your question here...",
228
- lines=2,
229
- scale=4,
230
- autofocus=True,
231
- )
232
- send_btn = gr.Button("➡️ Send", scale=1, variant="primary")
233
-
234
- def format_chat_html(history: List[Dict[str, str]]) -> str:
235
- html = "<div class='chatbot'>"
236
- for message in history:
237
- role = message["role"]
238
- content = message["content"]
239
- if role == "user":
240
- # User messages are plain text, no need to render markdown
241
- html += f"<div class='user-bubble'>{content}</div>"
242
- elif role == "assistant":
243
- # Convert bot's markdown response to HTML
244
- html_content = markdown.markdown(content, extensions=['tables'])
245
- html += f"<div class='bot-bubble'>{html_content}</div>"
246
- html += "</div>"
247
- return html
248
-
249
- # The 'respond' and 'clear_chat' functions remain exactly the same
250
- def respond(user_text: str, history: List[Dict[str, str]], audience_choice: str):
251
- if not user_text.strip():
252
- return "", format_chat_html(history), history, "Status: Please type a message."
253
- forced = {
254
- "Student": "student",
255
- "University-Research": "university",
256
- "Company": "company"
257
- }.get(audience_choice)
258
- route = route_intent(user_text, forced_audience=forced)
259
- status_text = f"**Audience**: {route.audience} | **Intent**: {route.intent} | **Lang**: {route.language.upper()}"
260
- answer = call_llm(user_text, history, route)
261
- if not answer:
262
- answer = kb_fallback(route)
263
- history.append({"role": "user", "content": user_text})
264
- history.append({"role": "assistant", "content": answer})
265
- updated_html = format_chat_html(history)
266
- return "", updated_html, history, status_text
267
-
268
- def clear_chat():
269
- return "", [], "Status: Ready."
270
-
271
- send_btn.click(respond, [msg, chat_history_state, audience_dd], [msg, chatbot_html, chat_history_state, status], queue=True)
272
- msg.submit(respond, [msg, chat_history_state, audience_dd], [msg, chatbot_html, chat_history_state, status], queue=True)
273
- clear_btn.click(clear_chat, outputs=[chatbot_html, chat_history_state, status], queue=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
 
275
  if __name__ == "__main__":
276
- demo.launch()
 
1
  from __future__ import annotations
2
  import os
3
+ import re
4
+ import uuid
5
  import json
6
  import time
7
+ import fitz # PyMuPDF
8
+ import docx
9
  import markdown
10
+ from datetime import datetime, timezone
11
+ from typing import List, Dict, Any, Optional
12
+ from dataclasses import dataclass
13
+
14
  import gradio as gr
15
  from openai import OpenAI
16
+ import weaviate
17
+ from weaviate.classes.init import Auth
18
+ from weaviate.classes.config import Configure, Property, DataType
19
+ from weaviate.classes.query import Filter
20
 
21
+ # -------------------- Configuration --------------------
22
+ MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
23
+ DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY", "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa")
24
+ BASE_URL = os.getenv("BASE_URL", "https://api.deepinfra.com/v1/openai")
 
 
25
 
26
+ WEAVIATE_URL = os.getenv("WEAVIATE_URL", "htorgbgpt4w63nvf1yeuw.c0.us-west3.gcp.weaviate.cloud")
27
+ WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "ZUd6clB5WmYzVGkxeU40cl96NTY5UkViUlVzY05Md3IzQ0JKelBZQmxGZHRPeGpCeGdxS1FUNnlYUkFFPV92MjAw")
28
 
29
+ MEMORY_FILE = os.getenv("MEMORY_FILE", "chat_memory.json")
30
+ LOG_FILE = os.getenv("LOG_FILE", "interaction_logs.json")
31
+ # -------------------- Clients --------------------
32
+ llm_client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
33
 
34
+ weaviate_client = weaviate.connect_to_weaviate_cloud(
35
+ cluster_url=WEAVIATE_URL,
36
+ auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
 
37
  )
38
 
39
+ # -------------------- Helpers --------------------
40
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  SYSTEM_PROMPT_BASE = (
43
  "You are **EduNatives Assistant**, a helpful, friendly, and precise academic/career guide. "
44
  "You serve three primary audiences: Students, Universities/Researchers, and Companies.\n\n"
45
  "Goals by audience:\n"
46
+ "- Students: registration/account help; finding internships/scholarships; connecting with mentors or professors; querying projects; applying for jobs/opportunities; joining project teams.\n"
47
  "- Universities/Researchers: publish research or announce events; connect/collaborate with students.\n"
48
  "- Companies: post jobs/internships/graduate roles; discover student talent.\n\n"
49
  "General rules:\n"
50
  "- Reply in the user's language (Arabic if the user writes Arabic; otherwise English).\n"
51
+ "- Use data from Weaviate collections (Job, Opportunities, Project) when relevant.\n"
52
  "- Be concise, step-by-step, and action-oriented (lists, bullets, checklists).\n"
53
  "- If information is unavailable, state that clearly and suggest the next best step.\n"
54
+ "- For CV analysis, extract skills/experience and recommend matching opportunities.\n"
55
+ "- Assist with applications and team matching.\n"
56
+ "- Ensure that all generated prompts are phrased using positive reinforcement."
57
  )
58
+
59
+
60
+ def get_rfc3339_time() -> str:
61
+ """Return current UTC time in RFC3339 format"""
62
+ return datetime.now(UTC).isoformat().replace('+00:00', 'Z')
63
+
64
+ # --- CV Skills Extraction (Regex baseline, can replace with NLP model later) ---
65
+ _SKILL_REGEX = re.compile(r"\b(Natural Language Processing|Building Information Modeling|Search Engine Optimization|Search Engine Marketing|Aerospace Engineering & Management|Computational Fluid Dynamics|Kotlin Multiplatform|Google Cloud Platform|Social Media Marketing|Aerospace Engineering|Microsoft SQL Server|Amazon Web Services|Finite Element Analysis|Technology-based Management|Autodesk Inventor|Emotional Intelligence|Aerospace Engineering & Operations Management|Content Marketing|Presentation Skills|Interpersonal Skills|Critical Thinking|Financial Modeling|Decision Making|Process Improvement|Time Management|Lean Manufacturing|Project Management|Microsoft Excel|Data Visualization|Computer Vision|Machine Learning|Deep Learning|Attention to Detail|Six Sigma|Risk Analysis|Data Analysis|Data Science|Communication|Collaboration|Teamwork|Leadership|Management|Adaptability|Creativity|Innovation|Negotiation|Android|Angular|Ansible|Apache|ArcGIS|Arduino|Asana|ASP\.NET|AutoCAD|Azure|Bash|BIM|Business Analysis|C\+\+|C#|CAM|Cassandra|CATIA|CentOS|Chef|CI/CD|Civil 3D|Content Marketing|CRM|CSS|Data Mining|Django|Docker|Elasticsearch|Email Marketing|ERP|ETABS|ETL|Express\.js|Facebook Ads|Firebase|Flask|Flutter|FPGA|Fusion 360|GCP|Git|GitHub|GitLab|Go|Google Ads|Google Analytics|GraphQL|Hadoop|HTML|HubSpot|iOS|Java|JavaScript|Jenkins|Jira|Jupyter Notebook|Kanban|Keras|Kotlin|Kubernetes|LabVIEW|Laravel|LESS|Linux|Lua|macOS|Marketo|MATLAB|Matplotlib|MongoDB|Multisim|MySQL|Nginx|NLP|Node\.js|NoSQL|Nuxt\.js|NumPy|Next\.js|Objective-C|Oracle Database|Oracle|OrCAD|Pandas|Perl|PHP|PLC|Plotly|PostgreSQL|Power BI|PowerShell|Problem Solving|Puppet|PSpice|Python|PyTorch|Raspberry Pi|React Native|React|Red Hat|Redis|Revit|Ruby on Rails|Ruby|Rust|Salesforce|SAP2000|SAP|Sass|SCADA|Scala|Scikit-learn|Scrum|Seaborn|SEM|SEO|Simulink|SketchUp|Slack|SolidWorks|Spring Boot|SQL|SQLAlchemy|SwiftUI|Swift|Tableau|Terraform|TensorFlow|Trello|TypeScript|Ubuntu|Verilog|VHDL|Vue\.js|Waterfall|Windows|WordPress|Xamarin|Analytical Skills)\b", re.IGNORECASE)
66
+
67
+ def extract_skills_from_text(cv_text: str) -> List[str]:
68
+ skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(cv_text)})
69
+ return [s.capitalize() for s in skills]
70
+
71
+ # --- Process uploaded file (PDF, DOCX, TXT) ---
72
+ def process_uploaded_file(file_obj: Any) -> dict | None:
73
+ if not file_obj:
74
+ return None
75
+ file_path = file_obj.name
76
+ filename = os.path.basename(file_path)
77
+ text_content = ""
78
+ try:
79
+ if filename.lower().endswith(".pdf"):
80
+ with fitz.open(file_path) as doc:
81
+ for page in doc:
82
+ text_content += page.get_text()
83
+ elif filename.lower().endswith(".docx"):
84
+ docp = docx.Document(file_path)
85
+ for p in docp.paragraphs:
86
+ text_content += p.text + "\n"
87
+ elif filename.lower().endswith(".txt"):
88
+ with open(file_path, "r", encoding="utf-8") as f:
89
+ text_content = f.read()
90
+ else:
91
+ return {"error": f"Unsupported file type: {filename}"}
92
+
93
+ skills = extract_skills_from_text(text_content)
94
+ return {"content": text_content.strip(), "skills": skills, "filename": filename}
95
+
96
+ except Exception as e:
97
+ return {"error": f"Error processing file {filename}: {e}"}
98
+ ARABIC_RANGE = (
99
+ (0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
100
+ (0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
101
  )
102
 
 
103
  def is_arabic(text: str) -> bool:
104
  for ch in text:
105
  code = ord(ch)
 
108
  return True
109
  return False
110
 
111
+ # --- Chat history HTML formatter (for Gradio) ---
112
+ def format_chat_html(history: List[Dict[str, str]]) -> str:
113
+ html = "<div class='chatbot'>"
114
+ for msg in history:
115
+ role = msg["role"]
116
+ content = msg["content"]
117
+ if role == "user":
118
+ html += f"<div class='user-bubble'>{content}</div>"
119
+ else:
120
+ html_content = markdown.markdown(content, extensions=['tables'])
121
+ html += f"<div class='bot-bubble'>{html_content}</div>"
122
+ html += "</div>"
123
+ return html
124
+ # ================================
125
+ # Part 2 — Weaviate Collections + Query + RAG
126
+ # ================================
127
+
128
+ # -------------------- Ensure collections --------------------
129
+ def ensure_collections():
130
+ # Application collection
131
+ if not weaviate_client.collections.exists("Application"):
132
+ weaviate_client.collections.create(
133
+ name="Application",
134
+ properties=[
135
+ Property(name="applicationId", data_type=DataType.TEXT),
136
+ Property(name="jobId", data_type=DataType.TEXT),
137
+ Property(name="applicantName", data_type=DataType.TEXT),
138
+ Property(name="applicantEmail", data_type=DataType.TEXT),
139
+ Property(name="coverLetter", data_type=DataType.TEXT),
140
+ Property(name="cvText", data_type=DataType.TEXT),
141
+ Property(name="skills", data_type=DataType.TEXT_ARRAY),
142
+ Property(name="createdAt", data_type=DataType.DATE),
143
+ ],
144
+ vectorizer_config=Configure.Vectorizer.none()
145
+ )
146
+
147
+ # Team collection
148
+ if not weaviate_client.collections.exists("Team"):
149
+ weaviate_client.collections.create(
150
+ name="Team",
151
+ properties=[
152
+ Property(name="teamId", data_type=DataType.TEXT),
153
+ Property(name="name", data_type=DataType.TEXT),
154
+ Property(name="projectId", data_type=DataType.TEXT),
155
+ Property(name="members", data_type=DataType.TEXT_ARRAY),
156
+ Property(name="skills", data_type=DataType.TEXT_ARRAY),
157
+ Property(name="createdAt", data_type=DataType.DATE),
158
+ Property(name="creatorId", data_type=DataType.TEXT),
159
+ ],
160
+ vectorizer_config=Configure.Vectorizer.none()
161
+ )
162
+
163
+ # Memory collection
164
+ if not weaviate_client.collections.exists("Memory"):
165
+ weaviate_client.collections.create(
166
+ name="Memory",
167
+ properties=[
168
+ Property(name="memoryId", data_type=DataType.TEXT),
169
+ Property(name="sessionId", data_type=DataType.TEXT),
170
+ Property(name="text", data_type=DataType.TEXT),
171
+ Property(name="createdAt", data_type=DataType.DATE),
172
+ ],
173
+ vectorizer_config=Configure.Vectorizer.none()
174
+ )
175
+
176
+ ensure_collections()
177
+
178
+ # -------------------- Query Weaviate --------------------
179
+ def query_weaviate_collection(class_name: str, query_text: str, limit: int = 5) -> List[dict]:
180
+ try:
181
+ collection = weaviate_client.collections.get(class_name)
182
+
183
+ # BM25 keyword search
184
+ response = collection.query.bm25(query=query_text, limit=limit)
185
+ items = [obj.properties for obj in response.objects]
186
+
187
+ # fallback filter if nothing found
188
+ if not items:
189
+ filters = Filter.any_of([
190
+ Filter.by_property("title").like(f"*{query_text}*"),
191
+ Filter.by_property("skills").like(f"*{query_text}*")
192
+ ])
193
+ response_fallback = collection.query.fetch_objects(limit=limit, filters=filters)
194
+ items = [obj.properties for obj in response_fallback.objects]
195
+
196
+ return items
197
+ except Exception as e:
198
+ print(f"[Weaviate Query Error] {e}")
199
+ return []
200
+
201
+ # -------------------- RAG Prompt Builder --------------------
202
+ def build_rag_prompt(user_question: str, retrieved_items: List[dict], class_name: str) -> str:
203
+ context_parts = []
204
+ for i, item in enumerate(retrieved_items, 1):
205
+ if class_name == "Job":
206
+ details = {
207
+ "Title": item.get("title"),
208
+ "Company": item.get("companyName"),
209
+ "Job Type": item.get("jobType"),
210
+ "Employment": ", ".join(item.get("employmentType", [])),
211
+ "Location": item.get("workplaceType"),
212
+ "Description": item.get("description"),
213
+ "Skills": item.get("skills", []),
214
+ "Requirements": item.get("requirements"),
215
+ "Salary": str(item.get("salaryDetails", {})),
216
+ }
217
+ else:
218
+ details = {k: str(v) for k, v in item.items()}
219
+
220
+ item_str = f"--- Record {i} ---\n{json.dumps(details, indent=2, ensure_ascii=False)}"
221
+ context_parts.append(item_str)
222
+
223
+ context_block = "\n\n".join(context_parts)
224
+
225
+ return f"""
226
+ User Question: "{user_question}"
227
+ You are an expert AI assistant and a skilled data analyst. Your primary mission is to take structured data (in JSON format), analyze it completely, and present all its information to the user in a clear, comprehensive, and conversational summary.
228
+
229
+ **Primary Directive:** Your ONLY source of information for this task is the structured JSON data provided below under "Retrieved Data". If the data section is empty, you must state that no results were found that match the search and stop. Do not use your general knowledge under any circumstances.
230
+
231
+ **Your Core Instructions:**
232
+ 1. **Analyze the Entire Object:** When you receive a JSON object, your first step is to read and understand every single key and value, including nested objects and arrays. Do not ignore any piece of information.
233
+ 2. **Group Related Information:** Organize your output logically. For example, group company details together, role requirements together, dates and deadlines together, etc. Use clear Markdown headings (`###`) for these logical groups to improve readability.
234
+ 3. **Convert Data into Natural Language:** Do not just list the data. Convert it into readable, engaging sentences. For example, instead of `workplaceType: "HYBRID"`, say "This is a hybrid role, which offers the flexibility of working both remotely and from the office."
235
+ 4. **Handle All Data Types Intelligently:**
236
+ * For **arrays** (like `skills` or `categories`), list them as clear bullet points or integrate them into a sentence.
237
+ * For **nested objects** (like `salaryDetails` or `careerLevel`), explain the contents of the object clearly.
238
+ * For **booleans** (true/false), explain their meaning in context (e.g., `published: true` should be "This position is currently published and accepting applications.").
239
+ 5. **Add a Concluding Call to Action:** After presenting all the details, conclude with a helpful "Next Steps" or "How to Apply" section. For a job, this should be a practical guide. For a project, it might be "How to Get Involved."
240
+ """
241
+
242
+ # -------------------- RAG Answer --------------------
243
+ def rag_answer(user_question: str, class_name: str, top_k: int = 5) -> (str, List[dict]):
244
+ retrieved = query_weaviate_collection(class_name, user_question, limit=top_k)
245
+ if not retrieved:
246
+ return "", []
247
+
248
+ prompt = build_rag_prompt(user_question, retrieved, class_name)
249
 
250
  try:
251
+ resp = llm_client.chat.completions.create(
252
  model=MODEL_NAME,
253
+ messages=[
254
+ {"role": "system", "content": SYSTEM_PROMPT_BASE},
255
+ {"role": "user", "content": prompt}
256
+ ],
257
+ temperature=0.3,
258
+ max_tokens=4096
259
  )
260
+ answer = resp.choices[0].message.content or ""
261
  except Exception as e:
262
+ print(f"[RAG LLM Error] {e}")
263
+ answer = ""
264
+
265
+ return answer, retrieved
266
+ # ================================
267
+ # ================================
268
+ # Part 3 — Conversation State Machine + Embedding Recommendations
269
+ # ================================
270
+
271
+ import numpy as np # used for cosine similarity in recommendations
272
+
273
+ # -------------------- Embedding helpers --------------------
274
+ def compute_embedding(text: str) -> List[float]:
275
+ """
276
+ Compute embedding using the OpenAI-compatible client (DeepInfra).
277
+ Returns list[float] or empty list on failure.
278
+ """
279
+ try:
280
+ resp = llm_client.embeddings.create(
281
+ model="Qwen/Qwen3-Embedding-8B",
282
+ input=text,
283
+ encoding_format="float"
284
  )
285
+ # resp may be a dict-like object; handle safe access
286
+ if isinstance(resp, dict):
287
+ data = resp.get("data", [])
288
+ if data and isinstance(data[0], dict):
289
+ return data[0].get("embedding", [])
290
+ # some clients return objects with attributes
291
+ if hasattr(resp, "data") and resp.data:
292
+ return resp.data[0].embedding
293
+ except Exception as e:
294
+ print("[compute_embedding] error:", e)
295
+ return []
296
+
297
+ def cosine_similarity(a: List[float], b: List[float]) -> float:
298
+ try:
299
+ va = np.array(a, dtype=float)
300
+ vb = np.array(b, dtype=float)
301
+ if va.size == 0 or vb.size == 0:
302
+ return 0.0
303
+ denom = (np.linalg.norm(va) * np.linalg.norm(vb))
304
+ if denom == 0:
305
+ return 0.0
306
+ return float(np.dot(va, vb) / denom)
307
+ except Exception as e:
308
+ print("[cosine_similarity] error:", e)
309
+ return 0.0
310
+
311
+ # -------------------- Recommendations by embedding --------------------
312
+ def recommend_jobs_by_embedding(cv_text: str, top_k: int = 5, jobs_fetch_limit: int = 200) -> str:
313
+ """
314
+ 1) Extract a short user representation (skills or first 200 chars)
315
+ 2) Compute user embedding
316
+ 3) Fetch Job objects from Weaviate (BM25 or fetch_objects)
317
+ 4) For each job build a short text (skills + description), compute embedding, score by cosine
318
+ 5) Return formatted list with full job details (title, company, skills, salary, description, score)
319
+ Note: This routine computes embeddings on-the-fly for jobs; for large scale you should precompute and store job embeddings.
320
+ """
321
+ # prepare user text
322
+ skills = extract_skills_from_text(cv_text or "")
323
+ user_text = " ".join(skills) if skills else (cv_text or "")[:500]
324
+ user_emb = compute_embedding(user_text)
325
+ if not user_emb:
326
+ return "⚠️ Unable to compute embedding for your CV. Try again or check API keys."
327
+
328
+ # fetch jobs from weaviate
329
+ try:
330
+ jobs_col = weaviate_client.collections.get("Job")
331
+ fetched = jobs_col.query.fetch_objects(limit=jobs_fetch_limit)
332
+ if not fetched.objects:
333
+ return "⚠️ No jobs found in the database."
334
+ except Exception as e:
335
+ print("[recommend_jobs_by_embedding] Weaviate fetch error:", e)
336
+ return "⚠️ Could not fetch jobs from the database."
337
+
338
+ scored_jobs = []
339
+ for obj in fetched.objects:
340
+ props = obj.properties
341
+ # build job text for embedding: skills + title + description (truncate to reasonable length)
342
+ job_text_parts = []
343
+ if props.get("skills"):
344
+ job_text_parts.append(" ".join(props.get("skills")))
345
+ if props.get("title"):
346
+ job_text_parts.append(props.get("title"))
347
+ if props.get("description"):
348
+ job_text_parts.append((props.get("description") or "")[:2000])
349
+ job_text = " ".join(job_text_parts).strip() or (props.get("title") or "")
350
+
351
+ job_emb = compute_embedding(job_text)
352
+ if not job_emb:
353
+ # skip if embedding failed
354
+ continue
355
+ score = cosine_similarity(user_emb, job_emb)
356
+ scored_jobs.append((score, props))
357
+
358
+ if not scored_jobs:
359
+ return "⚠️ No jobs could be embedded / compared."
360
+
361
+ # sort and pick top_k
362
+ scored_jobs.sort(key=lambda x: x[0], reverse=True)
363
+ top = scored_jobs[:top_k]
364
+
365
+ # format rich output with full details
366
+ lines = []
367
+ for score, props in top:
368
+ title = props.get("title", "No title")
369
+ company = props.get("companyName", "Unknown company")
370
+ job_id = props.get("jobId", "")
371
+ salary = props.get("salary") or props.get("salaryDetails") or "Not specified"
372
+ skills_list = props.get("skills") or []
373
+ description = (props.get("description") or "").strip()
374
+ # include a truncated description and the full skills list
375
+ lines.append(
376
+ f"**{title}** at *{company}* \n"
377
+ f"- Job ID: `{job_id}` \n"
378
+ f"- Score: {score:.3f} \n"
379
+ f"- Salary: {salary} \n"
380
+ f"- Skills: {skills_list} \n"
381
+ f"- Description: {description[:600]}{'...' if len(description)>600 else ''} \n"
382
+ f"---"
383
  )
384
 
385
+ return "\n\n".join(lines)
386
 
387
+ # -------------------- Conversation Session helpers --------------------
388
+ def initial_session() -> dict:
389
+ """
390
+ structure:
391
+ {
392
+ "state": "idle" | "apply_name" | "apply_email" | "apply_cover" | "apply_wait_cv" | "apply_jobtitle" | "apply_confirm" |
393
+ "team_action" | "team_create_name" | "team_create_owner" | "team_create_skills" | "team_create_course" | "team_create_idea" |
394
+ "team_join_name" | "team_join_member" | "team_join_skills" |
395
+ "recommend_wait_cv"
396
+ "data": { ... collected fields ... }
397
+ }
398
+ """
399
+ return {"state": "idle", "data": {}}
400
 
401
+ def handle_uploaded_cv_for_session(session: dict, uploaded_file: Any) -> (str, dict):
402
+ """
403
+ Called when user uploads a CV while in a certain flow.
404
+ Returns (bot_message, updated_session)
405
+ """
406
+ if not uploaded_file:
407
+ return "⚠️ No file received.", session
408
+
409
+ doc_info = process_uploaded_file(uploaded_file)
410
+ if not doc_info or "error" in (doc_info or {}):
411
+ return f"⚠️ Error processing uploaded CV: {doc_info.get('error') if doc_info else 'unknown error'}", session
412
+
413
+ # store processed text & skills in session
414
+ session["data"]["cvText"] = doc_info.get("content", "")
415
+ session["data"]["cvSkills"] = doc_info.get("profile", {}).get("skills", [])
416
+ # If session state expects next step, return appropriate prompt
417
+ st = session.get("state")
418
+ if st == "apply_wait_cv":
419
+ session["state"] = "apply_jobtitle"
420
+ detected = session["data"]["cvSkills"]
421
+ return f"CV received. Detected skills: {detected}. Which job title do you want to apply for? (type job title or 'any')", session
422
+ if st == "recommend_wait_cv":
423
+ # compute recommendations and finish the flow
424
+ rec_text = recommend_jobs_by_embedding(session["data"]["cvText"], top_k=5)
425
+ session = initial_session()
426
+ return f"Here are recommended jobs based on your CV:\n\n{rec_text}", session
427
+
428
+ # default
429
+ return "CV uploaded and processed. What would you like to do next?", session
430
+
431
+ # -------------------- Main message handler (state machine) --------------------
432
+ def handle_user_message(session: dict, user_text: str, uploaded_file: Any = None) -> (str, dict, bool):
433
+ """
434
+ Main conversation handler.
435
+ Returns (bot_reply, new_session, show_file_uploader).
436
+ """
437
+ session = session or initial_session()
438
+ st = session.get("state", "idle")
439
+ text = (user_text or "").strip()
440
+
441
+ # quick reset
442
+ if text.lower() in ("cancel", "exit", "quit", "restart", "reset"):
443
+ return "Conversation reset. How can I help you now?", initial_session(), False
444
+
445
+ # file upload
446
+ if uploaded_file:
447
+ bot_msg, new_session = handle_uploaded_cv_for_session(session, uploaded_file)
448
+ return bot_msg, new_session, False
449
+
450
+ # ========== IDLE STATE ==========
451
+ if st == "idle":
452
+ low = text.lower()
453
+
454
+ # 1) greetings
455
+ if low in ("hi", "hello", "hey", "مرحبا", "ازيك", "السلام عليكم"):
456
+ return "👋 Hello! How can I support you today? You can ask about jobs, teams, or recommendations.", session, False
457
+ if low in ["who are you?", "who are you", "انت مين", "من انت"]:
458
+ return ("👋 I am EduNatives Assistant — your friendly academic and career guide. ""I help students, universities, and companies connect through opportunities, projects, and mentoring.",
459
+ session,
460
+ False
461
+ )
462
+ route = route_intent(text)
463
+ kb_ans = kb_fallback(route)
464
+ if kb_ans:
465
+ return kb_ans, session, False
466
+ try:
467
+ rag_ans, _ = rag_answer(text, "Job", top_k=5)
468
+ if rag_ans:
469
+ return rag_ans, session, False
470
+ except Exception as e:
471
+ print("[handle_user_message] rag error:", e)
472
+ try:
473
+ resp = llm_client.chat.completions.create(
474
+ model=MODEL_NAME,
475
+ messages=[
476
+ {"role": "system", "content": SYSTEM_PROMPT_BASE},
477
+ {"role": "user", "content": text}
478
+ ],
479
+ temperature=0.3,
480
+ max_tokens=4096
481
+ )
482
+ except Exception as e:
483
+ print("[handle_user_message] free LLM error:", e)
484
+ return "⚠️ Sorry, I couldn't process that. Try again later.", session, False
485
+
486
+
487
+
488
+ # 2) flows
489
+ if any(k in low for k in ["apply", "i want to apply", "i'd like to apply", "أريد التقديم", "عايز اقدم", "اريد التقديم"]):
490
+ session["state"] = "apply_name"
491
+ session["data"] = {}
492
+ return "Okay — let's start your application. What's your full name?", session, False
493
+
494
+ if any(k in low for k in ["team", "create team", "join team", "create", "join", "انضم", "انشاء فريق"]):
495
+ session["state"] = "team_action"
496
+ session["data"] = {}
497
+ return "Do you want to create a team or join an existing team? (reply 'create' or 'join')", session, False
498
+
499
+ if any(k in low for k in ["recommend", "recommendation", "jobs for me", "رشح", "ترشيح", "recommend me jobs"]):
500
+ session["state"] = "recommend_wait_cv"
501
+ session["data"] = {}
502
+ return "Please upload your CV to get job recommendations (use the Upload button).", session, True
503
+
504
+ # 3) Knowledge Base fallback
505
+ route = route_intent(text)
506
+ kb_answer = kb_fallback(route)
507
+ if kb_answer:
508
+ return kb_answer, session, False
509
+
510
+ # 4) RAG (jobs search)
511
+ try:
512
+ rag_ans, _ = rag_answer(text, "Job", top_k=5)
513
+ if rag_ans:
514
+ return rag_ans, session, False
515
+ except Exception as e:
516
+ print("[handle_user_message] rag error:", e)
517
+
518
+ # 5) ultimate fallback
519
+ return "Sorry — I didn't understand that. You can say 'apply', 'create team', 'join team' or 'recommend'.", session, False
520
+
521
+ # ========== APPLY FLOW ==========
522
+ if st == "apply_name":
523
+ session["data"]["applicantName"] = text or "Applicant"
524
+ session["state"] = "apply_email"
525
+ return "Thanks. What's your email address?", session, False
526
+
527
+ if st == "apply_email":
528
+ m = re.search(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", text)
529
+ session["data"]["applicantEmail"] = m.group(1) if m else text
530
+ session["state"] = "apply_cover"
531
+ return "Got it. Please type a short cover letter (or type 'skip' to skip).", session, False
532
+
533
+ if st == "apply_cover":
534
+ if text.lower() != "skip":
535
+ session["data"]["coverLetter"] = text
536
+ else:
537
+ session["data"]["coverLetter"] = ""
538
+ session["state"] = "apply_wait_cv"
539
+ return "Please upload your CV now (use the Upload button).", session, True
540
+
541
+ if st == "apply_jobtitle":
542
+ session["data"]["targetJobTitle"] = text
543
+ found = query_weaviate_collection("Job", text, limit=3)
544
+ cv_skills = [s.lower() for s in session["data"].get("cvSkills", [])]
545
+ if found:
546
+ job = found[0]
547
+ job_skills = [s.lower() for s in (job.get("skills") or [])]
548
+ overlap = len([s for s in cv_skills if s in job_skills])
549
+ session["data"]["targetJobId"] = job.get("jobId")
550
+ session["state"] = "apply_confirm"
551
+ if overlap > 0:
552
+ return (f"I found a job: {job.get('title')} at {job.get('companyName')}. "
553
+ f"Detected {overlap} overlapping skills. Do you want to confirm application? (yes/no)"), session, False
554
+ else:
555
+ return (f"I found {job.get('title')} at {job.get('companyName')}, but your CV skills do not overlap. "
556
+ "Do you still want to proceed? (yes/no)"), session, False
557
+ else:
558
+ session["data"]["targetJobId"] = None
559
+ session["state"] = "apply_confirm"
560
+ return f"I couldn't find a job with that title. Do you want to apply for '{text}' anyway? (yes/no)", session, False
561
+
562
+ if st == "apply_confirm":
563
+ if text.lower() in ("yes", "y", "نعم"):
564
+ app = {
565
+ "applicationId": str(uuid.uuid4()),
566
+ "jobId": session["data"].get("targetJobId"),
567
+ "applicantName": session["data"].get("applicantName"),
568
+ "applicantEmail": session["data"].get("applicantEmail"),
569
+ "coverLetter": session["data"].get("coverLetter", ""),
570
+ "cvText": session["data"].get("cvText", ""),
571
+ "skills": session["data"].get("cvSkills", []),
572
+ "createdAt": get_rfc3339_time()
573
+ }
574
+ ok = save_application_to_weaviate(app)
575
+ session = initial_session()
576
+ return ("🎉 Your application has been submitted successfully. Good luck!" if ok
577
+ else "⚠️ Failed to save application. Please try again later."), session, False
578
+ else:
579
+ session = initial_session()
580
+ return "Application cancelled. If you want to do something else, tell me.", session, False
581
+
582
+ # ========== TEAM FLOW ==========
583
+ if st == "team_action":
584
+ low = text.lower()
585
+ if "create" in low or "إنشاء" in low:
586
+ session["state"] = "team_create_name"
587
+ session["data"] = {}
588
+ return "Great — what's the team name?", session, False
589
+ if "join" in low or "انضم" in low:
590
+ session["state"] = "team_join_name"
591
+ session["data"] = {}
592
+ return "Okay — what's the name of the team you want to join?", session, False
593
+ return "Please say 'create' to create a team or 'join' to join a team.", session, False
594
+
595
+ if st == "team_create_name":
596
+ session["data"]["team_name"] = text
597
+ session["state"] = "team_create_owner"
598
+ return "Team name saved. Who is the team owner (your name)?", session, False
599
+
600
+ if st == "team_create_owner":
601
+ session["data"]["owner"] = text
602
+ session["state"] = "team_create_skills"
603
+ return "Owner saved. Please list the team's skills (comma-separated).", session, False
604
+
605
+ if st == "team_create_skills":
606
+ session["data"]["skills"] = [s.strip() for s in text.split(",") if s.strip()]
607
+ session["state"] = "team_create_course"
608
+ return "Skills saved. (Optional) Enter course/subject name or type 'skip'.", session, False
609
+
610
+ if st == "team_create_course":
611
+ session["data"]["course"] = "" if text.lower() == "skip" else text
612
+ session["state"] = "team_create_idea"
613
+ return "Please write a short idea/description for the project.", session, False
614
+
615
+ if st == "team_create_idea":
616
+ session["data"]["idea"] = text
617
+ team_props = {
618
+ "teamId": str(uuid.uuid4()),
619
+ "name": session["data"].get("team_name"),
620
+ "projectId": None,
621
+ "members": [session["data"].get("owner")],
622
+ "skills": session["data"].get("skills", []),
623
+ "creatorId": session["data"].get("owner"),
624
+ "createdAt": get_rfc3339_time(),
625
+ "idea": session["data"].get("idea", "")
626
+ }
627
+ saved = save_team_to_weaviate(team_props)
628
+ session = initial_session()
629
+ return (f"🎉 Team '{team_props['name']}' created! Members: {team_props['members']}" if saved
630
+ else "⚠️ Failed to create team. Try again later."), session, False
631
+
632
+ if st == "team_join_name":
633
+ session["data"]["team_name"] = text
634
+ session["state"] = "team_join_member"
635
+ return "What's your name (to add you to the team)?", session, False
636
+
637
+ if st == "team_join_member":
638
+ session["data"]["member_name"] = text
639
+ session["state"] = "team_join_skills"
640
+ return "Enter your skills (comma-separated).", session, False
641
+
642
+ if st == "team_join_skills":
643
+ skills = [s.strip() for s in text.split(",") if s.strip()]
644
+ resp = update_team_add_member(session["data"].get("team_name"), session["data"].get("member_name"), skills)
645
+ session = initial_session()
646
+ return resp, session, False
647
+
648
+ # ========== RECOMMEND FLOW ==========
649
+ if st == "recommend_wait_cv":
650
+ return "Please upload your CV (use the Upload button).", session, True
651
+
652
+ # fallback
653
+ return "Sorry — I didn't understand that. You can say 'apply', 'create team', 'join team' or 'recommend'.", session, False
654
+
655
+
656
+ # ================================
657
+ # Part 4 — Gradio Chat UI wiring
658
+ # ================================
659
+
660
+ import atexit
661
+ # ensure Weaviate connection closes when the app exits
662
+ atexit.register(lambda: weaviate_client.close())
663
+
664
+ # initial session state per user
665
+ def create_initial_session_for_state():
666
+ return initial_session()
667
+
668
+ # helper to append to chat history (list of dicts)
669
+ def append_to_history(history: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
670
+ history = history or []
671
+ history.append({"role": role, "content": content})
672
+ return history
673
+
674
+ # UI
675
  with gr.Blocks(css="""
676
+ .chatbot {height: 520px; overflow: auto;}
677
  .user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
678
  .bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
679
  .chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
 
 
 
 
680
  """) as demo:
681
 
682
+ gr.Markdown("# 💬 EduNatives Conversational Job Portal")
683
+
684
+ # chat HTML (we use custom formatted HTML)
685
+ chat_html = gr.HTML(format_chat_html([]))
686
+
687
+ # input row
688
+ with gr.Row(elem_classes="chatbox-container"):
689
+ user_input = gr.Textbox(placeholder="Type your message here (e.g. 'apply', 'create team', 'recommend')", lines=2)
690
+ send_btn = gr.Button("Send", variant="primary")
691
 
692
+ # file upload row (initially hidden). We'll show/hide it dynamically.
693
+ with gr.Row(visible=False) as file_row:
694
+ cv_uploader = gr.File(label="Upload CV (.pdf/.docx/.txt)", file_count="single", file_types=[".pdf", ".docx", ".txt"], visible=False)
695
+ upload_btn = gr.Button("Upload CV", visible=False)
696
+
697
+ # control buttons
698
  with gr.Row():
699
+ clear_btn = gr.Button("Reset Conversation")
700
+ instructions = gr.Markdown("Commands: `apply`, `create team`, `join team`, `recommend` — the bot will guide you step-by-step.")
 
 
 
 
 
 
701
 
702
+ # persistent state across turns
703
+ chat_history_state = gr.State([])
704
+ session_state = gr.State(create_initial_session_for_state())
 
705
 
706
+ # -------------------- handlers --------------------
707
+ def handle_send(message: str, history: List[Dict[str, str]], session: dict):
708
+ """
709
+ Called when the user presses Send.
710
+ - calls handle_user_message(session, message, uploaded_file=None)
711
+ - updates history, session, and indicates whether uploader should be shown
712
+ """
713
+ history = history or []
714
+ session = session or initial_session()
715
+
716
+ # append user message
717
+ if message and message.strip():
718
+ history = append_to_history(history, "user", message.strip())
719
+
720
+ bot_reply, new_session, show_uploader = handle_user_message(session, message or "" , uploaded_file=None)
721
+
722
+ history = append_to_history(history, "assistant", bot_reply or "…")
723
+ # Render HTML
724
+ html = format_chat_html(history)
725
+
726
+ # update session state
727
+ return "", html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
728
+
729
+ def handle_upload(file_obj, history: List[Dict[str, str]], session: dict):
730
+ """
731
+ Called when the user presses Upload CV (after selecting a file).
732
+ The session should be in a state that expects a CV (e.g. apply_wait_cv or recommend_wait_cv).
733
+ """
734
+ history = history or []
735
+ session = session or initial_session()
736
+
737
+ # Show the filename in user message
738
+ filename = getattr(file_obj, "name", "uploaded_file")
739
+ history = append_to_history(history, "user", f"📎 Uploaded file: {filename}")
740
+
741
+ # route file into the handler: pass file to handle_user_message
742
+ bot_reply, new_session, show_uploader = handle_user_message(session, "", uploaded_file=file_obj)
743
+
744
+ history = append_to_history(history, "assistant", bot_reply or "")
745
+ html = format_chat_html(history)
746
+
747
+ # after upload we usually hide uploader (unless the bot again asks for more files)
748
+ return html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
749
+
750
+ def handle_reset(history, session):
751
+ # clear everything
752
+ new_hist = []
753
+ new_session = initial_session()
754
+ html = format_chat_html(new_hist)
755
+ return html, new_hist, new_session, gr.update(visible=False), gr.update(visible=False)
756
+
757
+ # -------------------- event wiring --------------------
758
+ send_btn.click(
759
+ fn=handle_send,
760
+ inputs=[user_input, chat_history_state, session_state],
761
+ outputs=[user_input, chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
762
+ queue=True
763
+ )
764
+
765
+ # upload button is shown/hidden by the send handler; user selects a file in cv_uploader then presses Upload CV
766
+ upload_btn.click(
767
+ fn=handle_upload,
768
+ inputs=[cv_uploader, chat_history_state, session_state],
769
+ outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
770
+ queue=True
771
+ )
772
+
773
+ clear_btn.click(
774
+ fn=handle_reset,
775
+ inputs=[chat_history_state, session_state],
776
+ outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
777
+ queue=False
778
+ )
779
 
780
+ # launch
781
  if __name__ == "__main__":
782
+ demo.launch(debug=True)