afouda commited on
Commit
bca2990
ยท
verified ยท
1 Parent(s): 00dca00

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +236 -1263
app.py CHANGED
@@ -1,1292 +1,265 @@
1
- # from __future__ import annotations
2
- # import os
3
- # import re
4
- # import uuid
5
- # import json
6
- # import time
7
- # import atexit
8
- # import fitz # PyMuPDF
9
- # import docx
10
- # import markdown
11
- # from datetime import datetime, timezone
12
- # from typing import List, Dict, Any, Optional
13
-
14
- # import gradio as gr
15
- # import numpy as np
16
- # from openai import OpenAI
17
- # import weaviate
18
- # from weaviate.classes.init import Auth
19
- # from weaviate.classes.config import Configure, Property, DataType
20
- # from weaviate.classes.query import Filter, MetadataQuery
21
-
22
- # # -------------------- Configuration --------------------
23
- # MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
24
- # DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY", "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa")
25
- # BASE_URL = os.getenv("BASE_URL", "https://api.deepinfra.com/v1/openai")
26
-
27
- # WEAVIATE_URL = os.getenv("WEAVIATE_URL", "htorgbgpt4w63nvf1yeuw.c0.us-west3.gcp.weaviate.cloud")
28
- # WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "ZUd6clB5WmYzVGkxeU40cl96NTY5UkViUlVzY05Md3IzQ0JKelBZQmxGZHRPeGpCeGdxS1FUNnlYUkFFPV92MjAw")
29
-
30
- # MEMORY_FILE = os.getenv("MEMORY_FILE", "chat_memory.json")
31
- # LOG_FILE = os.getenv("LOG_FILE", "interaction_logs.json")
32
-
33
- # # Check for missing environment variables
34
- # if not DEEPINFRA_API_KEY or not WEAVIATE_URL or not WEAVIATE_API_KEY:
35
- # raise ValueError("One or more required environment variables are not set: DEEPINFRA_API_KEY, WEAVIATE_URL, WEAVIATE_API_KEY")
36
-
37
- # # -------------------- Clients --------------------
38
- # llm_client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
39
-
40
- # weaviate_client = weaviate.connect_to_weaviate_cloud(
41
- # cluster_url=WEAVIATE_URL,
42
- # auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
43
- # )
44
-
45
- # # -------------------- Helpers --------------------
46
- # UTC = timezone.utc
47
-
48
- # SYSTEM_PROMPT_BASE = (
49
- # "You are **EduNatives Assistant**, a helpful, friendly, and precise academic/career guide. "
50
- # "You serve three primary audiences: Students, Universities/Researchers, and Companies.\n\n"
51
- # "Goals by audience:\n"
52
- # "- Students: registration/account help; finding internships/scholarships; connecting with mentors or professors; querying projects; applying for jobs/opportunities; joining project teams.\n"
53
- # "- Universities/Researchers: publish research or announce events; connect/collaborate with students.\n"
54
- # "- Companies: post jobs/internships/graduate roles; discover student talent.\n\n"
55
- # "General rules:\n"
56
- # "- Reply in the user's language (Arabic if the user writes Arabic; otherwise English).\n"
57
- # "- Use data from Weaviate collections (Job, Opportunities, Project) when relevant.\n"
58
- # "- Be concise, step-by-step, and action-oriented (lists, bullets, checklists).\n"
59
- # "- If information is unavailable, state that clearly and suggest the next best step.\n"
60
- # "- For CV analysis, extract skills/experience and recommend matching opportunities.\n"
61
- # "- Assist with applications and team matching.\n"
62
- # "- Ensure that all generated prompts are phrased using positive reinforcement."
63
- # )
64
-
65
-
66
- # def get_rfc3339_time() -> str:
67
- # """Return current UTC time in RFC3339 format"""
68
- # return datetime.now(UTC).isoformat().replace('+00:00', 'Z')
69
-
70
- # # --- CV Skills Extraction (Regex baseline, can replace with NLP model later) ---
71
- # _SKILL_REGEX = re.compile(r"\b(Natural Language Processing|Building Information Modeling|Search Engine Optimization|Search Engine Marketing|Aerospace Engineering & Management|Computational Fluid Dynamics|Kotlin Multiplatform|Google Cloud Platform|Social Media Marketing|Aerospace Engineering|Microsoft SQL Server|Amazon Web Services|Finite Element Analysis|Technology-based Management|Autodesk Inventor|Emotional Intelligence|Aerospace Engineering & Operations Management|Content Marketing|Presentation Skills|Interpersonal Skills|Critical Thinking|Financial Modeling|Decision Making|Process Improvement|Time Management|Lean Manufacturing|Project Management|Microsoft Excel|Data Visualization|Computer Vision|Machine Learning|Deep Learning|Attention to Detail|Six Sigma|Risk Analysis|Data Analysis|Data Science|Communication|Collaboration|Teamwork|Leadership|Management|Adaptability|Creativity|Innovation|Negotiation|Android|Angular|Ansible|Apache|ArcGIS|Arduino|Asana|ASP\.NET|AutoCAD|Azure|Bash|BIM|Business Analysis|C\+\+|C#|CAM|Cassandra|CATIA|CentOS|Chef|CI/CD|Civil 3D|Content Marketing|CRM|CSS|Data Mining|Django|Docker|Elasticsearch|Email Marketing|ERP|ETABS|ETL|Express\.js|Facebook Ads|Firebase|Flask|Flutter|FPGA|Fusion 360|GCP|Git|GitHub|GitLab|Go|Google Ads|Google Analytics|GraphQL|Hadoop|HTML|HubSpot|iOS|Java|JavaScript|Jenkins|Jira|Jupyter Notebook|Kanban|Keras|Kotlin|Kubernetes|LabVIEW|Laravel|LESS|Linux|Lua|macOS|Marketo|MATLAB|Matplotlib|MongoDB|Multisim|MySQL|Nginx|NLP|Node\.js|NoSQL|Nuxt\.js|NumPy|Next\.js|Objective-C|Oracle Database|Oracle|OrCAD|Pandas|Perl|PHP|PLC|Plotly|PostgreSQL|Power BI|PowerShell|Problem Solving|Puppet|PSpice|Python|PyTorch|Raspberry Pi|React Native|React|Red Hat|Redis|Revit|Ruby on Rails|Ruby|Rust|Salesforce|SAP2000|SAP|Sass|SCADA|Scala|Scikit-learn|Scrum|Seaborn|SEM|SEO|Simulink|SketchUp|Slack|SolidWorks|Spring Boot|SQL|SQLAlchemy|SwiftUI|Swift|Tableau|Terraform|TensorFlow|Trello|TypeScript|Ubuntu|Verilog|VHDL|Vue\.js|Waterfall|Windows|WordPress|Xamarin|Analytical Skills)\b", re.IGNORECASE)
72
-
73
- # def extract_skills_from_text(cv_text: str) -> List[str]:
74
- # skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(cv_text)})
75
- # return [s.capitalize() for s in skills]
76
-
77
-
78
- # # --- Process uploaded file (PDF, DOCX, TXT) ---
79
- # def process_uploaded_file(file_obj: Any) -> dict | None:
80
- # if not file_obj:
81
- # return None
82
- # file_path = file_obj.name
83
- # filename = os.path.basename(file_path)
84
- # text_content = ""
85
- # try:
86
- # if filename.lower().endswith(".pdf"):
87
- # with fitz.open(file_path) as doc:
88
- # for page in doc:
89
- # text_content += page.get_text()
90
- # elif filename.lower().endswith(".docx"):
91
- # docp = docx.Document(file_path)
92
- # for p in docp.paragraphs:
93
- # text_content += p.text + "\n"
94
- # elif filename.lower().endswith(".txt"):
95
- # with open(file_path, "r", encoding="utf-8") as f:
96
- # text_content = f.read()
97
- # else:
98
- # return {"error": f"Unsupported file type: {filename}"}
99
-
100
- # skills = extract_skills_from_text(text_content)
101
- # return {"content": text_content.strip(), "skills": skills, "filename": filename}
102
-
103
- # except Exception as e:
104
- # return {"error": f"Error processing file {filename}: {e}"}
105
-
106
- # # --- Chat history HTML formatter (for Gradio) ---
107
- # def format_chat_html(history: List[Dict[str, str]]) -> str:
108
- # html = "<div class='chatbot'>"
109
- # for msg in history:
110
- # role = msg["role"]
111
- # content = msg["content"]
112
- # if role == "user":
113
- # html += f"<div class='user-bubble'>{content}</div>"
114
- # else:
115
- # # Render bot response as Markdown
116
- # html_content = markdown.markdown(content, extensions=['tables'])
117
- # html += f"<div class='bot-bubble'>{html_content}</div>"
118
- # html += "</div>"
119
- # return html
120
-
121
- # # ================================
122
- # # Part 2 โ€” Weaviate Collections + Query + RAG
123
- # # ================================
124
-
125
- # # -------------------- Ensure collections --------------------
126
- # def ensure_collections():
127
- # # Application collection (no vectorizer)
128
- # if not weaviate_client.collections.exists("Application"):
129
- # weaviate_client.collections.create(
130
- # name="Application",
131
- # properties=[
132
- # Property(name="applicationId", data_type=DataType.TEXT),
133
- # Property(name="jobId", data_type=DataType.TEXT),
134
- # Property(name="applicantName", data_type=DataType.TEXT),
135
- # Property(name="applicantEmail", data_type=DataType.TEXT),
136
- # Property(name="coverLetter", data_type=DataType.TEXT),
137
- # Property(name="cvText", data_type=DataType.TEXT),
138
- # Property(name="skills", data_type=DataType.TEXT_ARRAY),
139
- # Property(name="createdAt", data_type=DataType.DATE),
140
- # ],
141
- # vectorizer_config=Configure.Vectorizer.none()
142
- # )
143
-
144
- # # Team collection (no vectorizer)
145
- # if not weaviate_client.collections.exists("Team"):
146
- # weaviate_client.collections.create(
147
- # name="Team",
148
- # properties=[
149
- # Property(name="teamId", data_type=DataType.TEXT),
150
- # Property(name="name", data_type=DataType.TEXT),
151
- # Property(name="projectId", data_type=DataType.TEXT, skip_vectorization=True),
152
- # Property(name="members", data_type=DataType.TEXT_ARRAY),
153
- # Property(name="skills", data_type=DataType.TEXT_ARRAY),
154
- # Property(name="createdAt", data_type=DataType.DATE),
155
- # Property(name="creatorId", data_type=DataType.TEXT),
156
- # Property(name="idea", data_type=DataType.TEXT),
157
- # ],
158
- # vectorizer_config=Configure.Vectorizer.none()
159
- # )
160
-
161
- # # RAG Collections (with vectorizer)
162
- # rag_collections = {
163
- # "Job": [
164
- # Property(name="jobId", data_type=DataType.TEXT), Property(name="title", data_type=DataType.TEXT),
165
- # Property(name="companyName", data_type=DataType.TEXT), Property(name="description", data_type=DataType.TEXT),
166
- # Property(name="skills", data_type=DataType.TEXT_ARRAY), Property(name="workplaceType", data_type=DataType.TEXT),
167
- # Property(name="employmentType", data_type=DataType.TEXT_ARRAY), Property(name="requirements", data_type=DataType.TEXT),
168
- # ],
169
- # "Opportunities": [
170
- # Property(name="opportunityId", data_type=DataType.TEXT), Property(name="title", data_type=DataType.TEXT),
171
- # Property(name="organization", data_type=DataType.TEXT), Property(name="description", data_type=DataType.TEXT),
172
- # Property(name="type", data_type=DataType.TEXT), Property(name="skills", data_type=DataType.TEXT_ARRAY),
173
- # ],
174
- # "Project": [
175
- # Property(name="projectId", data_type=DataType.TEXT), Property(name="title", data_type=DataType.TEXT),
176
- # Property(name="description", data_type=DataType.TEXT), Property(name="skills", data_type=DataType.TEXT_ARRAY),
177
- # Property(name="creatorName", data_type=DataType.TEXT),
178
- # ]
179
- # }
180
-
181
- # for name, properties in rag_collections.items():
182
- # if not weaviate_client.collections.exists(name):
183
- # weaviate_client.collections.create(
184
- # name=name,
185
- # properties=properties,
186
- # vectorizer_config=Configure.Vectorizer.text2vec_openai()
187
- # )
188
-
189
- # ensure_collections()
190
-
191
- # # -------------------- Query Weaviate --------------------
192
- # def query_weaviate_collection(class_name: str, query_text: str, limit: int = 5) -> List[dict]:
193
- # """Performs a hybrid search on a Weaviate collection."""
194
- # try:
195
- # collection = weaviate_client.collections.get(class_name)
196
- # response = collection.query.hybrid(
197
- # query=query_text,
198
- # limit=limit,
199
- # query_properties=["title^2", "description", "skills"] if class_name == "Job" else None
200
- # )
201
- # return [obj.properties for obj in response.objects]
202
- # except Exception as e:
203
- # print(f"[Weaviate Query Error] {e}")
204
- # return []
205
-
206
- # # -------------------- RAG Prompt Builder --------------------
207
- # def build_rag_prompt(user_question: str, retrieved_items: List[dict], class_name: str) -> str:
208
- # if not retrieved_items:
209
- # return ""
210
- # context_parts = [json.dumps(item, indent=2, ensure_ascii=False) for item in retrieved_items]
211
- # context_block = "\n\n---\n\n".join(context_parts)
212
-
213
- # return f"""
214
- # User Question: "{user_question}"
215
-
216
- # You are an expert AI assistant. Your ONLY source of information is the structured JSON data provided below under "Retrieved Data". If the data section is empty, you must state that no results were found.
217
-
218
- # **Instructions:**
219
- # 1. Analyze all JSON objects provided.
220
- # 2. Convert the data for each item into a clear, readable summary. Use Markdown for formatting (headings, lists).
221
- # 3. Present all important details like title, company/creator, description, and skills.
222
-
223
- # Retrieved Data from '{class_name}' collection:
224
- # {context_block}
225
- # """
226
-
227
- # # -------------------- RAG Answer --------------------
228
- # def rag_answer(user_question: str, class_name: str, top_k: int = 3) -> (str, List[dict]):
229
- # retrieved = query_weaviate_collection(class_name, user_question, limit=top_k)
230
- # if not retrieved:
231
- # return f"Sorry, I couldn't find any results related to '{user_question}' in the {class_name} database.", []
232
-
233
- # prompt = build_rag_prompt(user_question, retrieved, class_name)
234
-
235
- # try:
236
- # resp = llm_client.chat.completions.create(
237
- # model=MODEL_NAME,
238
- # messages=[
239
- # {"role": "system", "content": SYSTEM_PROMPT_BASE},
240
- # {"role": "user", "content": prompt}
241
- # ],
242
- # temperature=0.3, max_tokens=4096
243
- # )
244
- # answer = resp.choices[0].message.content or ""
245
- # except Exception as e:
246
- # print(f"[RAG LLM Error] {e}")
247
- # answer = "โš ๏ธ Sorry, I couldn't process that. Try again later."
248
-
249
- # return answer, retrieved
250
- # # ================================
251
- # # Part 3 โ€” Conversation State Machine + Embedding Recommendations
252
- # # ================================
253
-
254
- # # -------------------- Embedding helpers --------------------
255
- # def compute_embedding(text: str) -> List[float]:
256
- # """Compute embedding using the OpenAI-compatible client."""
257
- # try:
258
- # resp = llm_client.embeddings.create(model="Qwen/Qwen3-Embedding-8B", input=text, encoding_format="float")
259
- # if hasattr(resp, "data") and resp.data:
260
- # return resp.data[0].embedding
261
- # except Exception as e:
262
- # print("[compute_embedding] error:", e)
263
- # return []
264
-
265
- # def cosine_similarity(a: List[float], b: List[float]) -> float:
266
- # if not a or not b: return 0.0
267
- # va = np.array(a, dtype=float)
268
- # vb = np.array(b, dtype=float)
269
- # denom = (np.linalg.norm(va) * np.linalg.norm(vb))
270
- # return float(np.dot(va, vb) / denom) if denom != 0 else 0.0
271
-
272
- # # -------------------- Recommendations by embedding --------------------
273
- # def recommend_jobs_by_embedding(cv_text: str, top_k: int = 5, jobs_fetch_limit: int = 200) -> str:
274
- # """
275
- # NOTE: This function computes embeddings on-the-fly for jobs, which is slow for large datasets.
276
- # For production, pre-calculate and store job embeddings in Weaviate.
277
- # """
278
- # skills = extract_skills_from_text(cv_text or "")
279
- # user_text = " ".join(skills) if skills else (cv_text or "")[:500]
280
- # user_emb = compute_embedding(user_text)
281
- # if not user_emb:
282
- # return "โš ๏ธ Unable to compute embedding for your CV. Try again or check API keys."
283
-
284
- # try:
285
- # jobs_col = weaviate_client.collections.get("Job")
286
- # fetched = jobs_col.query.fetch_objects(limit=jobs_fetch_limit)
287
- # if not fetched.objects: return "โš ๏ธ No jobs found in the database."
288
- # except Exception as e:
289
- # print("[recommend_jobs_by_embedding] Weaviate fetch error:", e)
290
- # return "โš ๏ธ Could not fetch jobs from the database."
291
-
292
- # scored_jobs = []
293
- # for obj in fetched.objects:
294
- # props = obj.properties
295
- # job_text_parts = [*(props.get("skills") or []), props.get("title", ""), (props.get("description") or "")[:1000]]
296
- # job_text = " ".join(filter(None, job_text_parts)).strip()
297
- # if not job_text: continue
298
-
299
- # job_emb = compute_embedding(job_text)
300
- # if job_emb:
301
- # score = cosine_similarity(user_emb, job_emb)
302
- # scored_jobs.append((score, props))
303
-
304
- # if not scored_jobs:
305
- # return "โš ๏ธ No jobs could be compared. The database might be empty or embeddings could not be generated."
306
-
307
- # scored_jobs.sort(key=lambda x: x[0], reverse=True)
308
- # top = scored_jobs[:top_k]
309
-
310
- # lines = [f"### Here are the top {len(top)} recommended jobs for you:"]
311
- # for score, props in top:
312
- # lines.append(
313
- # f"**{props.get('title', 'N/A')}** at *{props.get('companyName', 'N/A')}*\n"
314
- # f"- **Match Score**: {score:.2%}\n"
315
- # f"- **Skills**: {', '.join(props.get('skills', []))}\n"
316
- # f"---"
317
- # )
318
- # return "\n".join(lines)
319
-
320
- # # -------------------- INTENT ROUTING & KNOWLEDGE BASE --------------------
321
- # def route_intent(text: str) -> Optional[str]:
322
- # low = text.lower()
323
- # # KB intents
324
- # if any(k in low for k in ["register", "ุชุณุฌูŠู„"]): return "kb_registration"
325
- # if ("who" in low and "edunatives" in low) or ("ู…ูŠู†" in low and "edunatives" in low): return "kb_about"
326
- # if ("how" in low and "apply" in low) or ("ูƒูŠู" in low and "ูˆุธูŠูุฉ" in low): return "kb_apply_job"
327
- # if ("contact" in low and "support" in low) or ("ุงุชูˆุงุตู„" in low and "ุฏุนู…" in low): return "kb_contact_support"
328
- # # RAG intents
329
- # if any(k in low for k in ["job", "ูˆุธูŠูุฉ", "internship"]): return "rag_job"
330
- # if any(k in low for k in ["project", "ู…ุดุฑูˆุน"]): return "rag_project"
331
- # if any(k in low for k in ["opportunity", "ูุฑุตุฉ", "scholarship"]): return "rag_opportunities"
332
- # return None
333
-
334
- # def kb_fallback(intent: str) -> Optional[str]:
335
- # kb = {
336
- # "kb_registration": "To register as a student, please visit our website, click on 'Register', and fill out the form. [Arabic: ู„ู„ุชุณุฌูŠู„ ูƒุทุงู„ุจุŒ ู‚ู… ุจุฒูŠุงุฑุฉ ู…ูˆู‚ุนู†ุงุŒ ุงุถุบุท ุนู„ู‰ 'ุชุณุฌูŠู„'ุŒ ูˆุงู…ู„ุฃ ุงู„ุจูŠุงู†ุงุช ุงู„ู…ุทู„ูˆุจุฉ.]",
337
- # "kb_about": "EduNatives is an academic and career platform connecting students with opportunities, projects, and mentoring from universities and companies. [Arabic: EduNatives ู‡ูŠ ู…ู†ุตุฉ ุฃูƒุงุฏูŠู…ูŠุฉ ูˆู…ู‡ู†ูŠุฉ ุชุฑุจุท ุงู„ุทู„ุงุจ ุจุงู„ูุฑุต ูˆุงู„ู…ุดุงุฑูŠุน ูˆุงู„ุฅุฑุดุงุฏ ู…ู† ุงู„ุฌุงู…ุนุงุช ูˆุงู„ุดุฑูƒุงุช.]",
338
- # "kb_apply_job": "You can apply for a job by finding it on the platform, clicking 'Apply', and following the steps, which include uploading your CV. [Arabic: ูŠู…ูƒู†ูƒ ุงู„ุชู‚ุฏูŠู… ุนู„ู‰ ูˆุธูŠูุฉ ุจุงู„ุจุญุซ ุนู†ู‡ุง ููŠ ุงู„ู…ู†ุตุฉุŒ ุงู„ุถุบุท ุนู„ู‰ 'ู‚ุฏู… ุงู„ุขู†'ุŒ ูˆุงุชุจุงุน ุงู„ุฎุทูˆุงุช ุงู„ุชูŠ ุชุชุถู…ู† ุชุญู…ูŠู„ ุณูŠุฑุชูƒ ุงู„ุฐุงุชูŠุฉ.]",
339
- # "kb_contact_support": "You can contact support via email at support@edunatives.com. [Arabic: ูŠู…ูƒู†ูƒ ุงู„ุชูˆุงุตู„ ู…ุน ุงู„ุฏุนู… ุนุจุฑ ุงู„ุจุฑูŠุฏ ุงู„ุฅู„ูƒุชุฑูˆู†ูŠ support@edunatives.com.]"
340
- # }
341
- # return kb.get(intent)
342
-
343
- # # -------------------- Conversation Session helpers --------------------
344
- # def initial_session() -> dict:
345
- # return {"state": "idle", "data": {}}
346
-
347
- # def handle_uploaded_cv_for_session(session: dict, uploaded_file: Any) -> (str, dict):
348
- # if not uploaded_file: return "โš ๏ธ No file received.", session
349
- # doc_info = process_uploaded_file(uploaded_file)
350
- # if not doc_info or "error" in doc_info:
351
- # return f"โš ๏ธ Error processing CV: {doc_info.get('error', 'unknown error')}", session
352
-
353
- # session["data"]["cvText"] = doc_info.get("content", "")
354
- # session["data"]["cvSkills"] = doc_info.get("skills", [])
355
-
356
- # st = session.get("state")
357
- # if st == "apply_wait_cv":
358
- # session["state"] = "apply_jobtitle"
359
- # detected = ", ".join(session["data"]["cvSkills"]) if session["data"]["cvSkills"] else "none"
360
- # return f"CV received. Detected skills: {detected}. Which job title are you applying for?", session
361
- # if st == "recommend_wait_cv":
362
- # rec_text = recommend_jobs_by_embedding(session["data"]["cvText"])
363
- # return f"Here are job recommendations based on your CV:\n\n{rec_text}", initial_session()
364
-
365
- # return "CV uploaded. What's next?", session
366
-
367
- # # =================================================================
368
- # # FIXED: Functions to save data to Weaviate
369
- # # =================================================================
370
-
371
- # def save_application_to_weaviate(app_data: dict) -> bool:
372
- # """Saves a new application object to the Weaviate 'Application' collection."""
373
- # try:
374
- # applications = weaviate_client.collections.get("Application")
375
- # applications.data.insert(properties=app_data)
376
- # print(f"โœ… Successfully saved application {app_data.get('applicationId')}")
377
- # return True
378
- # except Exception as e:
379
- # print(f"โŒ Failed to save application to Weaviate: {e}")
380
- # return False
381
-
382
- # def save_team_to_weaviate(team_data: dict) -> bool:
383
- # """Saves a new team object to the Weaviate 'Team' collection."""
384
- # try:
385
- # teams = weaviate_client.collections.get("Team")
386
- # teams.data.insert(properties=team_data)
387
- # print(f"โœ… Successfully saved team {team_data.get('teamId')}")
388
- # return True
389
- # except Exception as e:
390
- # print(f"โŒ Failed to save team to Weaviate: {e}")
391
- # return False
392
-
393
- # def update_team_add_member(team_name: str, member_name: str, skills: list) -> str:
394
- # """Finds a team by name, adds a member and their skills, then updates it."""
395
- # try:
396
- # teams = weaviate_client.collections.get("Team")
397
- # # Step 1: Find the team by its name
398
- # response = teams.query.fetch_objects(
399
- # filters=Filter.by_property("name").equal(team_name),
400
- # limit=1
401
- # )
402
- # if not response.objects:
403
- # return f"โš ๏ธ Team '{team_name}' not found."
404
-
405
- # team_obj = response.objects[0]
406
-
407
- # # Step 2: Update the properties
408
- # updated_members = list(set((team_obj.properties.get("members") or []) + [member_name]))
409
- # updated_skills = list(set((team_obj.properties.get("skills") or []) + skills))
410
-
411
- # # Step 3: Replace the object with the updated data
412
- # teams.data.replace(
413
- # uuid=team_obj.uuid,
414
- # properties={
415
- # **team_obj.properties,
416
- # "members": updated_members,
417
- # "skills": updated_skills
418
- # }
419
- # )
420
- # return f"๐ŸŽ‰ You have been added to the team '{team_name}'."
421
- # except Exception as e:
422
- # print(f"โŒ Failed to update team in Weaviate: {e}")
423
- # return "โš ๏ธ An error occurred while trying to join the team."
424
-
425
- # # -------------------- Main message handler (state machine) --------------------
426
- # def handle_user_message(session: dict, user_text: str, uploaded_file: Any = None) -> (str, dict, bool):
427
- # session = session or initial_session()
428
- # st = session.get("state", "idle")
429
- # text = (user_text or "").strip()
430
-
431
- # if text.lower() in ("cancel", "exit", "quit", "restart", "reset"):
432
- # return "Conversation reset. How can I help?", initial_session(), False
433
-
434
- # if uploaded_file:
435
- # bot_msg, new_session = handle_uploaded_cv_for_session(session, uploaded_file)
436
- # return bot_msg, new_session, False
437
-
438
- # # ========== IDLE STATE ==========
439
- # if st == "idle":
440
- # low = text.lower()
441
- # if any(k in low for k in ["hi", "hello", "hey", "ู…ุฑุญุจุง"]):
442
- # return "๐Ÿ‘‹ Hello! How can I support you today? You can ask about jobs, teams, or recommendations.", session, False
443
-
444
- # # Flow starters
445
- # if any(k in low for k in ["apply", "ุฃุฑูŠุฏ ุงู„ุชู‚ุฏูŠู…"]):
446
- # session["state"] = "apply_name"
447
- # return "Okay, let's start your application. What's your full name?", initial_session(), False
448
- # if any(k in low for k in ["team", "ูุฑูŠู‚"]):
449
- # session["state"] = "team_action"
450
- # return "Do you want to 'create' a team or 'join' an existing one?", initial_session(), False
451
- # if any(k in low for k in ["recommend", "ุฑุดุญ"]):
452
- # session["state"] = "recommend_wait_cv"
453
- # return "Please upload your CV to get job recommendations.", initial_session(), True
454
-
455
- # # Intent Routing for RAG/KB
456
- # intent = route_intent(text)
457
- # if intent:
458
- # if intent.startswith("kb_"):
459
- # return kb_fallback(intent), session, False
460
- # if intent.startswith("rag_"):
461
- # class_map = {"rag_job": "Job", "rag_project": "Project", "rag_opportunities": "Opportunities"}
462
- # rag_ans, _ = rag_answer(text, class_map[intent])
463
- # return rag_ans, session, False
464
-
465
- # # Fallback to general LLM
466
- # try:
467
- # resp = llm_client.chat.completions.create(model=MODEL_NAME, messages=[{"role": "system", "content": SYSTEM_PROMPT_BASE}, {"role": "user", "content": text}], temperature=0.3, max_tokens=4096)
468
- # return resp.choices[0].message.content or "...", session, False
469
- # except Exception as e:
470
- # return f"โš ๏ธ Sorry, an error occurred: {e}", session, False
471
-
472
- # # ========== APPLY FLOW ==========
473
- # if st == "apply_name":
474
- # session["data"]["applicantName"] = text
475
- # session["state"] = "apply_email"
476
- # return "Thanks. What's your email address?", session, False
477
- # if st == "apply_email":
478
- # session["data"]["applicantEmail"] = text
479
- # session["state"] = "apply_cover"
480
- # return "Got it. Please type a short cover letter (or 'skip').", session, False
481
- # if st == "apply_cover":
482
- # session["data"]["coverLetter"] = "" if text.lower() == "skip" else text
483
- # session["state"] = "apply_wait_cv"
484
- # return "Please upload your CV now.", session, True
485
- # if st == "apply_jobtitle":
486
- # session["data"]["targetJobTitle"] = text
487
- # session["state"] = "apply_confirm"
488
- # return f"You are applying for '{text}'. Is this correct? (yes/no)", session, False
489
- # if st == "apply_confirm":
490
- # if text.lower() in ("yes", "y", "ู†ุนู…"):
491
- # app_data = {
492
- # "applicationId": str(uuid.uuid4()),
493
- # "jobId": session["data"].get("targetJobTitle"), # Simplified to title for now
494
- # "applicantName": session["data"].get("applicantName"),
495
- # "applicantEmail": session["data"].get("applicantEmail"),
496
- # "coverLetter": session["data"].get("coverLetter", ""),
497
- # "cvText": session["data"].get("cvText", ""),
498
- # "skills": session["data"].get("cvSkills", []),
499
- # "createdAt": get_rfc3339_time()
500
- # }
501
- # ok = save_application_to_weaviate(app_data)
502
- # msg = "๐ŸŽ‰ Your application has been submitted successfully!" if ok else "โš ๏ธ Failed to save application."
503
- # return msg, initial_session(), False
504
- # else:
505
- # return "Application cancelled. How else can I help?", initial_session(), False
506
-
507
- # # ========== TEAM FLOW ==========
508
- # if st == "team_action":
509
- # if "create" in text.lower(): session["state"] = "team_create_name"; return "Great! What's the team name?", session, False
510
- # if "join" in text.lower(): session["state"] = "team_join_name"; return "Okay, what's the name of the team to join?", session, False
511
- # return "Please say 'create' or 'join'.", session, False
512
- # if st == "team_create_name":
513
- # session["data"]["team_name"] = text; session["state"] = "team_create_owner"; return "Team name saved. Who is the owner?", session, False
514
- # if st == "team_create_owner":
515
- # session["data"]["owner"] = text; session["state"] = "team_create_skills"; return "Owner saved. List the team's skills (comma-separated).", session, False
516
- # if st == "team_create_skills":
517
- # session["data"]["skills"] = [s.strip() for s in text.split(",")]; session["state"] = "team_create_idea"; return "Skills saved. Briefly describe the project idea.", session, False
518
- # if st == "team_create_idea":
519
- # team_data = {
520
- # "teamId": str(uuid.uuid4()), "name": session["data"].get("team_name"),
521
- # "members": [session["data"].get("owner")], "skills": session["data"].get("skills", []),
522
- # "creatorId": session["data"].get("owner"), "idea": text, "createdAt": get_rfc3339_time()
523
- # }
524
- # saved = save_team_to_weaviate(team_data)
525
- # msg = f"๐ŸŽ‰ Team '{team_data['name']}' created!" if saved else "โš ๏ธ Failed to create team."
526
- # return msg, initial_session(), False
527
- # if st == "team_join_name":
528
- # session["data"]["team_name"] = text; session["state"] = "team_join_member"; return "What's your name (to add to the team)?", session, False
529
- # if st == "team_join_member":
530
- # session["data"]["member_name"] = text; session["state"] = "team_join_skills"; return "Enter your skills (comma-separated).", session, False
531
- # if st == "team_join_skills":
532
- # skills = [s.strip() for s in text.split(",") if s.strip()]
533
- # resp = update_team_add_member(session["data"].get("team_name"), session["data"].get("member_name"), skills)
534
- # return resp, initial_session(), False
535
-
536
- # # Fallback
537
- # return "Sorry, I'm not sure how to handle that. Let's start over.", initial_session(), False
538
-
539
-
540
- # # ================================
541
- # # Part 4 โ€” Gradio Chat UI wiring
542
- # # ================================
543
- # atexit.register(lambda: weaviate_client.close())
544
-
545
- # with gr.Blocks(css="""
546
- # .chatbot {height: 520px; overflow: auto;}
547
- # .user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
548
- # .bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
549
- # .chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
550
- # """) as demo:
551
- # gr.Markdown("# ๐Ÿ’ฌ EduNatives โ€” Conversational Job Portal")
552
- # chat_html = gr.HTML(format_chat_html([]))
553
- # with gr.Row(elem_classes="chatbox-container"):
554
- # user_input = gr.Textbox(placeholder="Type your message (e.g., 'apply', 'create team', 'recommend')", scale=4)
555
- # send_btn = gr.Button("Send", variant="primary", scale=1)
556
- # with gr.Row(visible=False) as file_row:
557
- # cv_uploader = gr.File(label="Upload CV (.pdf/.docx/.txt)", file_count="single", scale=4)
558
- # upload_btn = gr.Button("Upload CV", scale=1, visible=True) # Button is visible within the row
559
- # with gr.Row():
560
- # clear_btn = gr.Button("Reset Conversation")
561
-
562
- # chat_history_state = gr.State([])
563
- # session_state = gr.State(initial_session)
564
-
565
- # def handle_send(message: str, history: List[Dict[str, str]], session: dict):
566
- # history.append({"role": "user", "content": message})
567
- # bot_reply, new_session, show_uploader = handle_user_message(session, message, None)
568
- # history.append({"role": "assistant", "content": bot_reply})
569
- # return "", format_chat_html(history), history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
570
-
571
- # def handle_upload(file_obj, history: List[Dict[str, str]], session: dict):
572
- # if not file_obj:
573
- # history.append({"role": "assistant", "content": "Please select a file first."})
574
- # return format_chat_html(history), history, session, gr.update(visible=True), gr.update(visible=True)
575
-
576
- # history.append({"role": "user", "content": f"๐Ÿ“Ž Uploaded: {os.path.basename(file_obj.name)}"})
577
- # bot_reply, new_session, show_uploader = handle_user_message(session, "", file_obj)
578
- # history.append({"role": "assistant", "content": bot_reply})
579
- # return format_chat_html(history), history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
580
-
581
- # def handle_reset():
582
- # return format_chat_html([]), [], initial_session(), gr.update(visible=False), gr.update(visible=False)
583
-
584
- # send_btn.click(
585
- # fn=handle_send,
586
- # inputs=[user_input, chat_history_state, session_state],
587
- # outputs=[user_input, chat_html, chat_history_state, session_state, file_row, cv_uploader]
588
- # )
589
- # user_input.submit(
590
- # fn=handle_send,
591
- # inputs=[user_input, chat_history_state, session_state],
592
- # outputs=[user_input, chat_html, chat_history_state, session_state, file_row, cv_uploader]
593
- # )
594
- # upload_btn.click(
595
- # fn=handle_upload,
596
- # inputs=[cv_uploader, chat_history_state, session_state],
597
- # outputs=[chat_html, chat_history_state, session_state, file_row, cv_uploader]
598
- # )
599
- # clear_btn.click(
600
- # fn=handle_reset,
601
- # inputs=[],
602
- # outputs=[chat_html, chat_history_state, session_state, file_row, cv_uploader]
603
- # )
604
-
605
- # if __name__ == "__main__":
606
- # demo.launch(debug=True)
607
-
608
-
609
-
610
-
611
- from __future__ import annotations
612
  import os
613
- import re
614
- import uuid
 
615
  import json
616
- import time
617
- import fitz # PyMuPDF
618
- import docx
619
- import markdown
620
- from datetime import datetime, timezone
621
- from typing import List, Dict, Any, Optional
622
- from dataclasses import dataclass
623
-
624
  import gradio as gr
625
- import numpy as np
626
- from openai import OpenAI
627
- import weaviate
628
- from weaviate.classes.init import Auth
629
- from weaviate.classes.config import Configure, Property, DataType
630
- from weaviate.classes.query import Filter
631
-
632
- # -------------------- Configuration --------------------
633
- MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
634
- DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY", "285LUJulGIprqT6hcPhiXtcrphU04FG4")
635
- BASE_URL = os.getenv("BASE_URL", "https://api.deepinfra.com/v1/openai")
636
-
637
- WEAVIATE_URL = os.getenv("WEAVIATE_URL", "htorgbgpt4w63nvf1yeuw.c0.us-west3.gcp.weaviate.cloud")
638
- WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "ZUd6clB5WmYzVGkxeU40cl96NTY5UkViUlVzY05Md3IzQ0JKelBZQmxGZHRPeGpCeGdxS1FUNnlYUkFFPV92MjAw")
639
-
640
- MEMORY_FILE = os.getenv("MEMORY_FILE", "chat_memory.json")
641
- LOG_FILE = os.getenv("LOG_FILE", "interaction_logs.json")
642
-
643
- # -------------------- Clients --------------------
644
- llm_client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
645
-
646
- weaviate_client = weaviate.connect_to_weaviate_cloud(
647
- cluster_url=WEAVIATE_URL,
648
- auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
649
- skip_init_checks=True
650
- )
651
-
652
- # -------------------- Helpers --------------------
653
- UTC = timezone.utc
654
-
655
- SYSTEM_PROMPT_BASE = (
656
- "You are *EduNatives Assistant*, a helpful, friendly, and precise academic/career guide. "
657
- "You serve three primary audiences: Students, Universities/Researchers, and Companies.\n\n"
658
- "Goals by audience:\n"
659
- "- Students: registration/account help; finding internships/scholarships; connecting with mentors or professors; querying projects; applying for jobs/opportunities; joining project teams.\n"
660
- "- Universities/Researchers: publish research or announce events; connect/collaborate with students.\n"
661
- "- Companies: post jobs/internships/graduate roles; discover student talent.\n\n"
662
- "General rules:\n"
663
- "- Reply in the user's language (Arabic if the user writes Arabic; otherwise English).\n"
664
- "- Use data from Weaviate collections (Job, Opportunities, Project) when relevant.\n"
665
- "- Be concise, step-by-step, and action-oriented (lists, bullets, checklists).\n"
666
- "- If information is unavailable, state that clearly and suggest the next best step.\n"
667
- "- For CV analysis, extract skills/experience and recommend matching opportunities.\n"
668
- "- Assist with applications and team matching.\n"
669
- "- Ensure that all generated prompts are phrased using positive reinforcement."
670
- )
671
-
672
- def get_rfc3339_time() -> str:
673
- """Return current UTC time in RFC3339 format"""
674
- return datetime.now(UTC).isoformat().replace('+00:00', 'Z')
675
-
676
- # --- CV Skills Extraction (Regex baseline, can replace with NLP model later) ---
677
- _SKILL_REGEX = re.compile(r"\b(Natural Language Processing|Building Information Modeling|Search Engine Optimization|Search Engine Marketing|Aerospace Engineering & Management|Computational Fluid Dynamics|Kotlin Multiplatform|Google Cloud Platform|Social Media Marketing|Aerospace Engineering|Microsoft SQL Server|Amazon Web Services|Finite Element Analysis|Technology-based Management|Autodesk Inventor|Emotional Intelligence|Aerospace Engineering & Operations Management|Content Marketing|Presentation Skills|Interpersonal Skills|Critical Thinking|Financial Modeling|Decision Making|Process Improvement|Time Management|Lean Manufacturing|Project Management|Microsoft Excel|Data Visualization|Computer Vision|Machine Learning|Deep Learning|Attention to Detail|Six Sigma|Risk Analysis|Data Analysis|Data Science|Communication|Collaboration|Teamwork|Leadership|Management|Adaptability|Creativity|Innovation|Negotiation|Android|Angular|Ansible|Apache|ArcGIS|Arduino|Asana|ASP\.NET|AutoCAD|Azure|Bash|BIM|Business Analysis|C\+\+|C#|CAM|Cassandra|CATIA|CentOS|Chef|CI/CD|Civil 3D|Content Marketing|CRM|CSS|Data Mining|Django|Docker|Elasticsearch|Email Marketing|ERP|ETABS|ETL|Express\.js|Facebook Ads|Firebase|Flask|Flutter|FPGA|Fusion 360|GCP|Git|GitHub|GitLab|Go|Google Ads|Google Analytics|GraphQL|Hadoop|HTML|HubSpot|iOS|Java|JavaScript|Jenkins|Jira|Jupyter Notebook|Kanban|Keras|Kotlin|Kubernetes|LabVIEW|Laravel|LESS|Linux|Lua|macOS|Marketo|MATLAB|Matplotlib|MongoDB|Multisim|MySQL|Nginx|NLP|Node\.js|NoSQL|Nuxt\.js|NumPy|Next\.js|Objective-C|Oracle Database|Oracle|OrCAD|Pandas|Perl|PHP|PLC|Plotly|PostgreSQL|Power BI|PowerShell|Problem Solving|Puppet|PSpice|Python|PyTorch|Raspberry Pi|React Native|React|Red Hat|Redis|Revit|Ruby on Rails|Ruby|Rust|Salesforce|SAP2000|SAP|Sass|SCADA|Scala|Scikit-learn|Scrum|Seaborn|SEM|SEO|Simulink|SketchUp|Slack|SolidWorks|Spring Boot|SQL|SQLAlchemy|SwiftUI|Swift|Tableau|Terraform|TensorFlow|Trello|TypeScript|Ubuntu|Verilog|VHDL|Vue\.js|Waterfall|Windows|WordPress|Xamarin|Analytical Skills)\b", re.IGNORECASE)
678
-
679
- def extract_skills_from_text(cv_text: str) -> List[str]:
680
- skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(cv_text)})
681
- return [s.capitalize() for s in skills]
682
-
683
- # --- Process uploaded file (PDF, DOCX, TXT) ---
684
- def process_uploaded_file(file_obj: Any) -> dict | None:
685
- if not file_obj:
686
- return None
687
- file_path = file_obj.name
688
- filename = os.path.basename(file_path)
689
- text_content = ""
690
- try:
691
- if filename.lower().endswith(".pdf"):
692
- with fitz.open(file_path) as doc:
693
- for page in doc:
694
- text_content += page.get_text()
695
- elif filename.lower().endswith(".docx"):
696
- docp = docx.Document(file_path)
697
- for p in docp.paragraphs:
698
- text_content += p.text + "\n"
699
- elif filename.lower().endswith(".txt"):
700
- with open(file_path, "r", encoding="utf-8") as f:
701
- text_content = f.read()
702
- else:
703
- return {"error": f"Unsupported file type: {filename}"}
704
-
705
- skills = extract_skills_from_text(text_content)
706
- return {"content": text_content.strip(), "skills": skills, "filename": filename}
707
-
708
- except Exception as e:
709
- return {"error": f"Error processing file {filename}: {e}"}
710
-
711
- ARABIC_RANGE = (
712
- (0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
713
- (0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
714
- )
715
-
716
- def is_arabic(text: str) -> bool:
717
- for ch in text:
718
- code = ord(ch)
719
- for a, b in ARABIC_RANGE:
720
- if a <= code <= b:
721
- return True
722
- return False
723
-
724
- # --- Chat history HTML formatter (for Gradio) ---
725
- def format_chat_html(history: List[Dict[str, str]]) -> str:
726
- html = "<div class='chatbot'>"
727
- for msg in history:
728
- role = msg["role"]
729
- content = msg["content"]
730
- if role == "user":
731
- html += f"<div class='user-bubble'>{content}</div>"
732
- else:
733
- html_content = markdown.markdown(content, extensions=['tables'])
734
- html += f"<div class='bot-bubble'>{html_content}</div>"
735
- html += "</div>"
736
- return html
737
-
738
- # ================================
739
- # Part 2 โ€” Weaviate Collections + Query + RAG
740
- # ================================
741
-
742
- # -------------------- Clean Query Text --------------------
743
- def clean_query_text(query_text: str) -> str:
744
- """Clean the query text to extract key terms for better search results."""
745
- stop_words = ["show", "me", "available", "jobs", "job", "please", "find", "search", "for"]
746
- query_text = query_text.lower().strip()
747
- for word in stop_words:
748
- query_text = query_text.replace(f" {word} ", " ")
749
- query_text = re.sub(r'[^\w\s]', '', query_text)
750
- query_text = " ".join(query_text.split())
751
- return query_text
752
-
753
- # -------------------- Ensure collections --------------------
754
- def ensure_collections():
755
- if not weaviate_client.collections.exists("Application"):
756
- weaviate_client.collections.create(
757
- name="Application",
758
- properties=[
759
- Property(name="applicationId", data_type=DataType.TEXT),
760
- Property(name="jobId", data_type=DataType.TEXT),
761
- Property(name="applicantName", data_type=DataType.TEXT),
762
- Property(name="applicantEmail", data_type=DataType.TEXT),
763
- Property(name="coverLetter", data_type=DataType.TEXT),
764
- Property(name="cvText", data_type=DataType.TEXT),
765
- Property(name="skills", data_type=DataType.TEXT_ARRAY),
766
- Property(name="createdAt", data_type=DataType.DATE),
767
- ],
768
- vectorizer_config=Configure.Vectorizer.none()
769
- )
770
 
771
- if not weaviate_client.collections.exists("Team"):
772
- weaviate_client.collections.create(
773
- name="Team",
774
- properties=[
775
- Property(name="teamId", data_type=DataType.TEXT),
776
- Property(name="name", data_type=DataType.TEXT),
777
- Property(name="projectId", data_type=DataType.TEXT),
778
- Property(name="members", data_type=DataType.TEXT_ARRAY),
779
- Property(name="skills", data_type=DataType.TEXT_ARRAY),
780
- Property(name="createdAt", data_type=DataType.DATE),
781
- Property(name="creatorId", data_type=DataType.TEXT),
782
- ],
783
- vectorizer_config=Configure.Vectorizer.none()
784
- )
785
 
786
- if not weaviate_client.collections.exists("Memory"):
787
- weaviate_client.collections.create(
788
- name="Memory",
 
 
 
 
 
 
 
 
 
 
789
  properties=[
790
- Property(name="memoryId", data_type=DataType.TEXT),
791
- Property(name="sessionId", data_type=DataType.TEXT),
792
- Property(name="text", data_type=DataType.TEXT),
793
- Property(name="createdAt", data_type=DataType.DATE),
794
- ],
795
- vectorizer_config=Configure.Vectorizer.none()
796
  )
797
-
798
- if not weaviate_client.collections.exists("Job"):
799
- weaviate_client.collections.create(
800
- name="Job",
801
- properties=[
802
- Property(name="jobId", data_type=DataType.TEXT),
803
- Property(name="title", data_type=DataType.TEXT),
804
- Property(name="companyName", data_type=DataType.TEXT),
805
- Property(name="description", data_type=DataType.TEXT),
806
- Property(name="skills", data_type=DataType.TEXT_ARRAY),
807
- Property(name="workplaceType", data_type=DataType.TEXT),
808
- Property(name="employmentType", data_type=DataType.TEXT_ARRAY),
809
- Property(name="requirements", data_type=DataType.TEXT),
810
- ],
811
- vectorizer_config=Configure.Vectorizer.text2vec_openai()
812
- )
813
-
814
- if not weaviate_client.collections.exists("Opportunities"):
815
- weaviate_client.collections.create(
816
- name="Opportunities",
817
- properties=[
818
- Property(name="opportunityId", data_type=DataType.TEXT),
819
- Property(name="title", data_type=DataType.TEXT),
820
- Property(name="organization", data_type=DataType.TEXT),
821
- Property(name="description", data_type=DataType.TEXT),
822
- Property(name="type", data_type=DataType.TEXT),
823
- Property(name="skills", data_type=DataType.TEXT_ARRAY),
824
- ],
825
- vectorizer_config=Configure.Vectorizer.text2vec_openai()
826
  )
 
 
 
 
827
 
828
- if not weaviate_client.collections.exists("Project"):
829
- weaviate_client.collections.create(
830
- name="Project",
831
- properties=[
832
- Property(name="projectId", data_type=DataType.TEXT),
833
- Property(name="title", data_type=DataType.TEXT),
834
- Property(name="description", data_type=DataType.TEXT),
835
- Property(name="skills", data_type=DataType.TEXT_ARRAY),
836
- Property(name="creatorName", data_type=DataType.TEXT),
837
- ],
838
- vectorizer_config=Configure.Vectorizer.text2vec_openai()
839
- )
840
 
841
- ensure_collections()
842
 
843
- # -------------------- Query Weaviate (Hybrid Search) --------------------
844
- def query_weaviate_collection(class_name: str, query_text: str, limit: int = 10) -> List[dict]:
845
- """
846
- Performs a hybrid search on a Weaviate collection to get more relevant results
847
- for conversational queries.
848
- """
849
- try:
850
- print(f"[DEBUG] Querying collection {class_name} with query: {query_text}")
851
- collection = weaviate_client.collections.get(class_name)
852
- cleaned_query = clean_query_text(query_text)
853
- print(f"[DEBUG] Cleaned query: {cleaned_query}")
854
- response = collection.query.hybrid(
855
- query=cleaned_query,
856
- limit=limit,
857
- query_properties=["title^2", "description", "skills"] if class_name in ["Job", "Project", "Opportunities"] else None
858
- )
859
- items = [obj.properties for obj in response.objects]
860
- print(f"[DEBUG] Found {len(items)} results for {class_name}: {items}")
861
- return items
862
- except Exception as e:
863
- print(f"[Weaviate Query Error] {class_name} - {e}")
864
- return []
865
 
866
- # -------------------- Search All Collections --------------------
867
- def search_all_collections(query_text: str, limit_per_class: int = 3) -> list[dict]:
868
- """
869
- Searches across Job, Project, and Opportunities collections and returns
870
- a combined list of results, with each result tagged by its class name.
871
- """
872
- collections_to_search = ["Job", "Project", "Opportunities"]
873
- all_results = []
874
- for class_name in collections_to_search:
875
- items = query_weaviate_collection(class_name, query_text, limit=limit_per_class)
876
- for item_properties in items:
877
- all_results.append({
878
- "class_name": class_name,
879
- "properties": item_properties
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
880
  })
881
- return all_results
882
-
883
- # -------------------- RAG Answer (Multi-Class Search) --------------------
884
- def rag_answer_all(user_question: str, top_k: int = 3) -> (str, list[dict]):
885
- retrieved_items = search_all_collections(user_question, limit_per_class=top_k)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
886
 
887
- if not retrieved_items:
888
- return f"Sorry, I couldn't find any results related to '{user_question}' in our Jobs, Projects, or Opportunities databases.", []
889
-
890
- context_parts = []
891
- grouped_results = {}
892
- for item in retrieved_items:
893
- class_name = item["class_name"]
894
- if class_name not in grouped_results:
895
- grouped_results[class_name] = []
896
- grouped_results[class_name].append(item["properties"])
897
-
898
- for class_name, items in grouped_results.items():
899
- context_parts.append(f"\n--- Results from '{class_name}' collection ---")
900
- for i, properties in enumerate(items, 1):
901
- details = {k: str(v) for k, v in properties.items()}
902
- item_str = f"Record {i}:\n{json.dumps(details, indent=2, ensure_ascii=False)}"
903
- context_parts.append(item_str)
904
 
905
- context_block = "\n".join(context_parts)
906
- prompt = f"""
907
- User Question: "{user_question}"
908
- You are an expert AI assistant. Your mission is to analyze structured data from different categories (Jobs, Projects, Opportunities) and present a comprehensive, clear summary to the user.
909
- *Primary Directive:* Your ONLY source of information is the structured JSON data provided below under "Retrieved Data". If the data section is empty, state that no results were found.
910
- *Your Core Instructions:*
911
- 1. *Acknowledge the Categories:* Analyze all the data provided from each collection (Job, Project, Opportunities).
912
- 2. *Summarize Logically:* For each result, *you must clearly state which category it belongs to. For example, start with "I found a **Job* opportunity:" or "Here is a *Project* you might be interested in:".
913
- 3. *Present All Details:* Convert the data for each item into natural, readable language, covering all important details like title, company/creator, description, and skills.
914
- 4. *Use Clear Formatting:* Use Markdown headings (e.g., ### Job: [Title]) and bullet points to make the response easy to read.
915
- Retrieved Data:
916
- {context_block}
917
- """
918
-
919
- try:
920
- resp = llm_client.chat.completions.create(
921
- model=MODEL_NAME,
922
- messages=[
923
- {"role": "system", "content": SYSTEM_PROMPT_BASE},
924
- {"role": "user", "content": prompt}
925
- ],
926
- temperature=0.3,
927
- max_tokens=4096
928
- )
929
- answer = resp.choices[0].message.content or ""
930
- except Exception as e:
931
- print(f"[RAG LLM Error] {e}")
932
- answer = "โš ๏ธ Sorry, I couldn't process that. Try again later."
933
-
934
- return answer, retrieved_items
935
-
936
- # ================================
937
- # Part 3 โ€” Conversation State Machine + Embedding Recommendations
938
- # ================================
939
-
940
- # -------------------- Embedding helpers --------------------
941
- def compute_embedding(text: str) -> List[float]:
942
- try:
943
- resp = llm_client.embeddings.create(
944
- model="Qwen/Qwen3-Embedding-8B",
945
- input=text,
946
- encoding_format="float"
947
- )
948
- if hasattr(resp, "data") and resp.data:
949
- return resp.data[0].embedding
950
- except Exception as e:
951
- print("[compute_embedding] error:", e)
952
- return []
953
-
954
- def cosine_similarity(a: List[float], b: List[float]) -> float:
955
- try:
956
- va = np.array(a, dtype=float)
957
- vb = np.array(b, dtype=float)
958
- if va.size == 0 or vb.size == 0: return 0.0
959
- denom = (np.linalg.norm(va) * np.linalg.norm(vb))
960
- if denom == 0: return 0.0
961
- return float(np.dot(va, vb) / denom)
962
- except Exception as e:
963
- print("[cosine_similarity] error:", e)
964
- return 0.0
965
-
966
- # -------------------- Recommendations by embedding --------------------
967
- def recommend_jobs_by_embedding(cv_text: str, top_k: int = 5, jobs_fetch_limit: int = 200) -> str:
968
- skills = extract_skills_from_text(cv_text or "")
969
- user_text = " ".join(skills) if skills else (cv_text or "")[:500]
970
- user_emb = compute_embedding(user_text)
971
- if not user_emb:
972
- return "โš ๏ธ Unable to compute embedding for your CV. Try again or check API keys."
973
-
974
- try:
975
- jobs_col = weaviate_client.collections.get("Job")
976
- fetched = jobs_col.query.fetch_objects(limit=jobs_fetch_limit)
977
- if not fetched.objects:
978
- return "โš ๏ธ No jobs found in the database."
979
- except Exception as e:
980
- print("[recommend_jobs_by_embedding] Weaviate fetch error:", e)
981
- return "โš ๏ธ Could not fetch jobs from the database."
982
-
983
- scored_jobs = []
984
- for obj in fetched.objects:
985
- props = obj.properties
986
- job_text_parts = []
987
- if props.get("skills"): job_text_parts.append(" ".join(props.get("skills")))
988
- if props.get("title"): job_text_parts.append(props.get("title"))
989
- if props.get("description"): job_text_parts.append((props.get("description") or "")[:2000])
990
- job_text = " ".join(job_text_parts).strip() or (props.get("title") or "")
991
- job_emb = compute_embedding(job_text)
992
- if not job_emb: continue
993
- score = cosine_similarity(user_emb, job_emb)
994
- scored_jobs.append((score, props))
995
-
996
- if not scored_jobs:
997
- return "โš ๏ธ No jobs could be embedded / compared."
998
-
999
- scored_jobs.sort(key=lambda x: x[0], reverse=True)
1000
- top = scored_jobs[:top_k]
1001
- lines = []
1002
- for score, props in top:
1003
- title = props.get("title", "No title")
1004
- company = props.get("companyName", "Unknown company")
1005
- job_id = props.get("jobId", "")
1006
- salary = props.get("salary") or props.get("salaryDetails") or "Not specified"
1007
- skills_list = props.get("skills") or []
1008
- description = (props.get("description") or "").strip()
1009
- lines.append(
1010
- f"*{title}* at {company} \n"
1011
- f"- Job ID: {job_id} \n"
1012
- f"- Score: {score:.3f} \n"
1013
- f"- Salary: {salary} \n"
1014
- f"- Skills: {skills_list} \n"
1015
- f"- Description: {description[:600]}{'...' if len(description)>600 else ''} \n"
1016
- f"---"
1017
- )
1018
- return "\n\n".join(lines)
1019
-
1020
- # -------------------- INTENT ROUTING & KNOWLEDGE BASE --------------------
1021
- def route_intent(text: str) -> Optional[str]:
1022
- low = text.lower()
1023
- if "ุชุณุฌูŠู„" in low or "register" in low: return "kb_registration"
1024
- if ("ู…ูŠู†" in low and "edunatives" in low) or ("who is" in low and "edunatives" in low): return "kb_about"
1025
- if ("ูƒูŠู" in low and "ูˆุธูŠูุฉ" in low) or ("how to apply" in low and "job" in low): return "kb_apply_job"
1026
- if "ุงุชูˆุงุตู„" in low and ("ุฏุนู…" in low or "contact support" in low): return "kb_contact_support"
1027
- return None
1028
-
1029
- def kb_fallback(intent: str) -> Optional[str]:
1030
- kb = {
1031
- "kb_registration": "To register as a student, you need to create an account on our platform. You can do this by visiting our website, clicking on 'Register', and filling out the required information. [Arabic: ู„ู„ุชุณุฌูŠู„ ูƒุทุงู„ุจุŒ ุชุญุชุงุฌ ุฅู„ู‰ ุฅู†ุดุงุก ุญุณุงุจ ุนู„ู‰ ู…ู†ุตุชู†ุง. ูŠู…ูƒู†ูƒ ูุนู„ ุฐู„ูƒ ุจุฒูŠุงุฑุฉ ู…ูˆู‚ุนู†ุงุŒ ูˆุงู„ู†ู‚ุฑ ุนู„ู‰ 'ุชุณุฌูŠู„'ุŒ ุซู… ู…ู„ุก ุงู„ุจูŠุงู†ุงุช ุงู„ู…ุทู„ูˆุจุฉ.]",
1032
- "kb_about": "EduNatives is an academic and career platform that connects students with opportunities, projects, and mentoring from universities and companies. [Arabic: EduNatives ู‡ูŠ ู…ู†ุตุฉ ุฃูƒุงุฏูŠู…ูŠุฉ ูˆู…ู‡ู†ูŠุฉ ุชุฑุจุท ุงู„ุทู„ุงุจ ุจุงู„ูุฑุต ูˆุงู„ู…ุดุงุฑูŠุน ูˆุงู„ุฅุฑุดุงุฏ ู…ู† ุงู„ุฌุงู…ุนุงุช ูˆุงู„ุดุฑูƒุงุช.]",
1033
- "kb_apply_job": "You can apply for a job directly through the platform. Simply find the job you're interested in, click on 'Apply', and follow the step-by-step process, which includes uploading your CV and writing a cover letter. [Arabic: ูŠู…ูƒู†ูƒ ุงู„ุชู‚ุฏูŠู… ุนู„ู‰ ูˆุธูŠูุฉ ู…ุจุงุดุฑุฉ ู…ู† ุฎู„ุงู„ ุงู„ู…ู†ุตุฉ. ุงุจุญุซ ุนู† ุงู„ูˆุธูŠูุฉ ุงู„ุชูŠ ุชู‡ู…ูƒุŒ ุงุถุบุท ุนู„ู‰ 'ู‚ุฏู… ุงู„ุขู†'ุŒ ูˆุงุชุจุน ุงู„ุฎุทูˆุงุช ุงู„ุชูŠ ุชุชุถู…ู† ุชุญู…ูŠู„ ุณูŠุฑุชูƒ ุงู„ุฐุงุชูŠุฉ ูˆูƒุชุงุจุฉ ุฎุทุงุจ ุชู‚ุฏูŠู…ูŠ.]",
1034
- "kb_contact_support": "You can contact our support team via email at support@edunatives.com or by phone at +966 123 4567. [Arabic: ูŠู…ูƒู†ูƒ ุงู„ุชูˆุงุตู„ ู…ุน ูุฑูŠู‚ ุงู„ุฏุนู… ุนุจุฑ ุงู„ุจุฑูŠุฏ ุงู„ุฅู„ูƒุชุฑูˆู†ูŠ support@edunatives.com ุฃูˆ ุนุจุฑ ุงู„ู‡ุงุชู ุนู„ู‰ ุงู„ุฑู‚ู… +966 123 4567.]"
1035
- }
1036
- return kb.get(intent, None)
1037
-
1038
- # -------------------- Conversation Session helpers --------------------
1039
- def initial_session() -> dict:
1040
- return {"state": "idle", "data": {}}
1041
-
1042
- def handle_uploaded_cv_for_session(session: dict, uploaded_file: Any) -> (str, dict):
1043
- if not uploaded_file: return "โš ๏ธ No file received.", session
1044
- doc_info = process_uploaded_file(uploaded_file)
1045
- if not doc_info or "error" in (doc_info or {}):
1046
- return f"โš ๏ธ Error processing uploaded CV: {doc_info.get('error') if doc_info else 'unknown error'}", session
1047
-
1048
- session["data"]["cvText"] = doc_info.get("content", "")
1049
- session["data"]["cvSkills"] = doc_info.get("skills", [])
1050
- st = session.get("state")
1051
- if st == "apply_wait_cv":
1052
- session["state"] = "apply_jobtitle"
1053
- detected = session["data"]["cvSkills"]
1054
- return f"CV received. Detected skills: {detected}. Which job title do you want to apply for? (type job title or 'any')", session
1055
- if st == "recommend_wait_cv":
1056
- rec_text = recommend_jobs_by_embedding(session["data"]["cvText"], top_k=5)
1057
- session = initial_session()
1058
- return f"Here are recommended jobs based on your CV:\n\n{rec_text}", session
1059
- return "CV uploaded and processed. What would you like to do next?", session
1060
-
1061
- # -------------------- Main message handler (state machine) --------------------
1062
- def handle_user_message(session: dict, user_text: str, uploaded_file: Any = None) -> (str, dict, bool):
1063
- session = session or initial_session()
1064
- st = session.get("state", "idle")
1065
- text = (user_text or "").strip()
1066
-
1067
- if text.lower() in ("cancel", "exit", "quit", "restart", "reset"):
1068
- return "Conversation reset. How can I help you now?", initial_session(), False
1069
-
1070
- if uploaded_file:
1071
- bot_msg, new_session = handle_uploaded_cv_for_session(session, uploaded_file)
1072
- return bot_msg, new_session, False
1073
-
1074
- # ========== IDLE STATE ==========
1075
- if st == "idle":
1076
- low = text.lower()
1077
-
1078
- if low in ("hi", "hello", "hey", "ู…ุฑุญุจุง", "ุงุฒูŠูƒ", "ุงู„ุณู„ุงู… ุนู„ูŠูƒู…"):
1079
- return "๐Ÿ‘‹ Hello! How can I support you today? You can ask about jobs, teams, or recommendations.", session, False
1080
- if low in ["who are you?", "who are you", "ุงู†ุช ู…ูŠู†", "ู…ู† ุงู†ุช"]:
1081
- return ("๐Ÿ‘‹ I am EduNatives Assistant โ€” your friendly academic and career guide.", session, False)
1082
-
1083
- if any(k in low for k in ["apply", "ุฃุฑูŠุฏ ุงู„ุชู‚ุฏูŠู…", "ุนุงูŠุฒ ุงู‚ุฏู…"]):
1084
- session["state"] = "apply_name"; session["data"] = {}
1085
- return "Okay โ€” let's start your application. What's your full name?", session, False
1086
- if any(k in low for k in ["team", "create team", "join team", "ุงู†ุถู…", "ุงู†ุดุงุก ูุฑูŠู‚"]):
1087
- session["state"] = "team_action"; session["data"] = {}
1088
- return "Do you want to create a team or join an existing team? (reply 'create' or 'join')", session, False
1089
- if any(k in low for k in ["recommend", "recommendation", "jobs for me", "ุฑุดุญ"]):
1090
- session["state"] = "recommend_wait_cv"; session["data"] = {}
1091
- return "Please upload your CV to get job recommendations (use the Upload button).", session, True
1092
 
1093
- intent = route_intent(text)
1094
- if intent and intent.startswith("kb_"):
1095
- kb_ans = kb_fallback(intent)
1096
- if kb_ans: return kb_ans, session, False
1097
 
1098
- # Default action: perform a global RAG search
1099
- rag_ans, _ = rag_answer_all(text)
1100
- return rag_ans, session, False
1101
 
1102
- # ========== APPLY FLOW ==========
1103
- if st == "apply_name":
1104
- session["data"]["applicantName"] = text or "Applicant"; session["state"] = "apply_email"
1105
- return "Thanks. What's your email address?", session, False
1106
- if st == "apply_email":
1107
- m = re.search(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", text)
1108
- session["data"]["applicantEmail"] = m.group(1) if m else text; session["state"] = "apply_cover"
1109
- return "Got it. Please type a short cover letter (or type 'skip' to skip).", session, False
1110
- if st == "apply_cover":
1111
- session["data"]["coverLetter"] = "" if text.lower() == "skip" else text; session["state"] = "apply_wait_cv"
1112
- return "Please upload your CV now (use the Upload button).", session, True
1113
- if st == "apply_jobtitle":
1114
- session["data"]["targetJobTitle"] = text; found = query_weaviate_collection("Job", text, limit=3)
1115
- cv_skills = [s.lower() for s in session["data"].get("cvSkills", [])]
1116
- if found:
1117
- job = found[0]; job_skills = [s.lower() for s in (job.get("skills") or [])]
1118
- overlap = len([s for s in cv_skills if s in job_skills])
1119
- session["data"]["targetJobId"] = job.get("jobId"); session["state"] = "apply_confirm"
1120
- if overlap > 0:
1121
- return (f"I found a job: {job.get('title')} at {job.get('companyName')}. "
1122
- f"Detected {overlap} overlapping skills. Do you want to confirm application? (yes/no)"), session, False
1123
- else:
1124
- return (f"I found {job.get('title')} at {job.get('companyName')}, but no skill overlap. "
1125
- "Proceed anyway? (yes/no)"), session, False
1126
- else:
1127
- session["data"]["targetJobId"] = None; session["state"] = "apply_confirm"
1128
- return f"I couldn't find a job with that title. Apply for '{text}' anyway? (yes/no)", session, False
1129
- if st == "apply_confirm":
1130
- if text.lower() in ("yes", "y", "ู†ุนู…"):
1131
- def save_application_to_weaviate(app):
1132
- print(f"Saving application to Weaviate: {app}")
1133
- try:
1134
- apps = weaviate_client.collections.get("Application")
1135
- apps.data.insert(app)
1136
- return True
1137
- except Exception as e:
1138
- print(f"Error saving application: {e}")
1139
- return False
1140
- app = {
1141
- "applicationId": str(uuid.uuid4()), "jobId": session["data"].get("targetJobId"),
1142
- "applicantName": session["data"].get("applicantName"), "applicantEmail": session["data"].get("applicantEmail"),
1143
- "coverLetter": session["data"].get("coverLetter", ""), "cvText": session["data"].get("cvText", ""),
1144
- "skills": session["data"].get("cvSkills", []), "createdAt": get_rfc3339_time()
1145
- }
1146
- ok = save_application_to_weaviate(app)
1147
- session = initial_session()
1148
- return ("๐ŸŽ‰ Your application has been submitted successfully." if ok
1149
- else "โš ๏ธ Failed to save application."), session, False
1150
  else:
1151
- session = initial_session()
1152
- return "Application cancelled.", session, False
1153
-
1154
- # ========== TEAM FLOW ==========
1155
- if st == "team_action":
1156
- low = text.lower()
1157
- if "create" in low or "ุฅู†ุดุงุก" in low: session["state"] = "team_create_name"; session["data"] = {}; return "Great โ€” what's the team name?", session, False
1158
- if "join" in low or "ุงู†ุถู…" in low: session["state"] = "team_join_name"; session["data"] = {}; return "Okay โ€” what's the name of the team you want to join?", session, False
1159
- return "Please say 'create' or 'join'.", session, False
1160
- if st == "team_create_name":
1161
- session["data"]["team_name"] = text; session["state"] = "team_create_owner"
1162
- return "Team name saved. Who is the team owner (your name)?", session, False
1163
- if st == "team_create_owner":
1164
- session["data"]["owner"] = text; session["state"] = "team_create_skills"
1165
- return "Owner saved. Please list the team's skills (comma-separated).", session, False
1166
- if st == "team_create_skills":
1167
- session["data"]["skills"] = [s.strip() for s in text.split(",") if s.strip()]; session["state"] = "team_create_idea"
1168
- return "Skills saved. Please write a short idea/description for the project.", session, False
1169
- if st == "team_create_idea":
1170
- session["data"]["idea"] = text
1171
- def save_team_to_weaviate(props):
1172
- print(f"Saving team to Weaviate: {props}")
1173
- try:
1174
- teams = weaviate_client.collections.get("Team")
1175
- teams.data.insert(props)
1176
- return True
1177
- except Exception as e:
1178
- print(f"Error saving team: {e}")
1179
- return False
1180
- team_props = {
1181
- "teamId": str(uuid.uuid4()), "name": session["data"].get("team_name"),
1182
- "projectId": None, "members": [session["data"].get("owner")],
1183
- "skills": session["data"].get("skills", []), "creatorId": session["data"].get("owner"),
1184
- "createdAt": get_rfc3339_time(), "idea": session["data"].get("idea", "")
1185
- }
1186
- saved = save_team_to_weaviate(team_props)
1187
- session = initial_session()
1188
- return (f"๐ŸŽ‰ Team '{team_props['name']}' created!" if saved else "โš ๏ธ Failed to create team."), session, False
1189
- if st == "team_join_name":
1190
- session["data"]["team_name"] = text; session["state"] = "team_join_member"
1191
- return "What's your name (to add you to the team)?", session, False
1192
- if st == "team_join_member":
1193
- session["data"]["member_name"] = text; session["state"] = "team_join_skills"
1194
- return "Enter your skills (comma-separated).", session, False
1195
- if st == "team_join_skills":
1196
- skills = [s.strip() for s in text.split(",") if s.strip()]
1197
- def update_team_add_member(team_name, member_name, skills):
1198
- print(f"Adding member {member_name} to team {team_name}")
1199
- return f"๐ŸŽ‰ You have been added to the team '{team_name}'."
1200
- resp = update_team_add_member(session["data"].get("team_name"), session["data"].get("member_name"), skills)
1201
- session = initial_session()
1202
- return resp, session, False
1203
-
1204
- # ========== RECOMMEND FLOW ==========
1205
- if st == "recommend_wait_cv":
1206
- return "Please upload your CV (use the Upload button).", session, True
1207
-
1208
- return "Sorry โ€” I didn't understand that.", session, False
1209
-
1210
- # ================================
1211
- # Part 4 โ€” Gradio Chat UI wiring
1212
- # ================================
1213
-
1214
- import atexit
1215
- atexit.register(lambda: weaviate_client.close())
1216
-
1217
- def create_initial_session_for_state():
1218
- return initial_session()
1219
-
1220
- def append_to_history(history: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
1221
- history = history or []
1222
- history.append({"role": role, "content": content})
1223
- return history
1224
-
1225
- with gr.Blocks(css="""
1226
- .chatbot {height: 520px; overflow: auto;}
1227
- .user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
1228
- .bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
1229
- .chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
1230
- """) as demo:
1231
- gr.Markdown("# ๐Ÿ’ฌ EduNatives โ€” Conversational Job Portal")
1232
- chat_html = gr.HTML(format_chat_html([]))
1233
- with gr.Row(elem_classes="chatbox-container"):
1234
- user_input = gr.Textbox(placeholder="Type your message here (e.g. 'apply', 'create team', 'recommend')", lines=2)
1235
- send_btn = gr.Button("Send", variant="primary")
1236
- with gr.Row(visible=False) as file_row:
1237
- cv_uploader = gr.File(label="Upload CV (.pdf/.docx/.txt)", file_count="single", file_types=[".pdf", ".docx", ".txt"], visible=False)
1238
- upload_btn = gr.Button("Upload CV", visible=False)
1239
- with gr.Row():
1240
- clear_btn = gr.Button("Reset Conversation")
1241
- instructions = gr.Markdown("Commands: apply, create team, join team, recommend โ€” or just ask a question!")
1242
-
1243
- chat_history_state = gr.State([])
1244
- session_state = gr.State(create_initial_session_for_state())
1245
 
1246
- def handle_send(message: str, history: List[Dict[str, str]], session: dict):
1247
- history = history or []
1248
- session = session or initial_session()
1249
- if message and message.strip():
1250
- history = append_to_history(history, "user", message.strip())
1251
- bot_reply, new_session, show_uploader = handle_user_message(session, message or "", uploaded_file=None)
1252
- history = append_to_history(history, "assistant", bot_reply or "โ€ฆ")
1253
- html = format_chat_html(history)
1254
- return "", html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
1255
 
1256
- def handle_upload(file_obj, history: List[Dict[str, str]], session: dict):
1257
- history = history or []
1258
- session = session or initial_session()
1259
- filename = getattr(file_obj, "name", "uploaded_file")
1260
- history = append_to_history(history, "user", f"๐Ÿ“Ž Uploaded file: {os.path.basename(filename)}")
1261
- bot_reply, new_session, show_uploader = handle_user_message(session, "", uploaded_file=file_obj)
1262
- history = append_to_history(history, "assistant", bot_reply or "โ€ฆ")
1263
- html = format_chat_html(history)
1264
- return html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1265
 
1266
- def handle_reset():
1267
- new_hist = []
1268
- new_session = initial_session()
1269
- html = format_chat_html(new_hist)
1270
- return html, new_hist, new_session, gr.update(visible=False), gr.update(visible=False)
1271
 
1272
- send_btn.click(
1273
- fn=handle_send,
1274
- inputs=[user_input, chat_history_state, session_state],
1275
- outputs=[user_input, chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
1276
- queue=True
 
1277
  )
1278
- upload_btn.click(
1279
- fn=handle_upload,
1280
- inputs=[cv_uploader, chat_history_state, session_state],
1281
- outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
1282
- queue=True
1283
  )
1284
- clear_btn.click(
1285
- fn=handle_reset,
1286
- inputs=[],
1287
- outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
1288
- queue=False
 
 
 
 
 
 
 
1289
  )
1290
 
 
 
 
 
1291
  if __name__ == "__main__":
1292
  demo.launch(debug=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import weaviate
3
+ from weaviate.auth import Auth
4
+ from openai import OpenAI
5
  import json
 
 
 
 
 
 
 
 
6
  import gradio as gr
7
+ import atexit
8
+ import datetime
9
+ import re
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
+ import pypdf
12
+ import docx
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ MODEL_NAME = "openai/gpt-oss-120b"
15
+ EMBEDDING_MODEL_NAME = "Qwen/Qwen3-Embedding-8B"
16
+ DEEPINFRA_API_KEY = "KwZiFcFHhOPUE6Rrc6wY4ng0mqPfwsVN"
17
+ BASE_URL = "https://api.deepinfra.com/v1/openai"
18
+ WEAVIATE_URL = "https://maf5cvz1saelnti3k34a.c.europe-west3.gcp.weaviate.cloud"
19
+ WEAVIATE_API_KEY = "L1U0ZndLYXFuSWVsL3NIT19NTzArZFZNU3BsaUlabUlEOHNMaDIrZVRwU2t2OTJTekFxRDZTYTBlTjc0PV92MjAw"
20
+
21
+ def create_application_schema(client: weaviate.WeaviateClient):
22
+ collection_name = "Application"
23
+ if not client.collections.exists(collection_name):
24
+ print(f"Creating collection: {collection_name}")
25
+ client.collections.create(
26
+ name=collection_name,
27
  properties=[
28
+ weaviate.classes.config.Property(name="job_id", data_type=weaviate.classes.config.DataType.TEXT),
29
+ weaviate.classes.config.Property(name="cv_content", data_type=weaviate.classes.config.DataType.TEXT),
30
+ weaviate.classes.config.Property(name="cover_letter_content", data_type=weaviate.classes.config.DataType.TEXT),
31
+ weaviate.classes.config.Property(name="submission_date", data_type=weaviate.classes.config.DataType.DATE),
32
+ ]
 
33
  )
34
+ print(f"โœ… Collection '{collection_name}' created successfully.")
35
+ else:
36
+ print(f"โœ… Collection '{collection_name}' already exists.")
37
+
38
+ class WeaviateChatbot:
39
+ def __init__(self, weaviate_url, weaviate_api_key, llm_api_key, llm_base_url):
40
+ print("Connecting to clients...")
41
+ self.weaviate_client = weaviate.connect_to_weaviate_cloud(
42
+ cluster_url=weaviate_url,
43
+ auth_credentials=Auth.api_key(weaviate_api_key),
44
+ skip_init_checks=True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  )
46
+ self.weaviate_client.connect()
47
+ print("โœ… Successfully connected to Weaviate.")
48
+
49
+ create_application_schema(self.weaviate_client)
50
 
51
+ self.llm_client = OpenAI(api_key=llm_api_key, base_url=llm_base_url)
52
+ print("โœ… Successfully connected to LLM client (DeepInfra).")
 
 
 
 
 
 
 
 
 
 
53
 
54
+ self.collection_names = ["Job", "Opportunities", "Project"]
55
 
56
+ def _embed_text(self, text: str) -> list[float]:
57
+ resp = self.llm_client.embeddings.create(model=EMBEDDING_MODEL_NAME, input=text, encoding_format="float")
58
+ return resp.data[0].embedding
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
 
60
+ def _search_database(self, query_vector: list[float], limit: int = 5) -> str:
61
+ all_results = []
62
+ for name in self.collection_names:
63
+ try:
64
+ collection = self.weaviate_client.collections.get(name)
65
+ response = collection.query.near_vector(near_vector=query_vector, limit=limit)
66
+ for item in response.objects:
67
+ all_results.append(f"Type: {name}\nContent: {json.dumps(item.properties, indent=2, default=str)}\n")
68
+ except Exception as e:
69
+ print(f"Could not query collection '{name}'. Error: {e}")
70
+ return "\n---\n".join(all_results) if all_results else "No relevant information found in the database."
71
+
72
+ def _generate_response(self, query: str, context: str) -> str:
73
+ prompt = f"""
74
+ You are *EduNatives Assistant*. Your main goal is to help users find opportunities and apply for them.
75
+ - First, answer the user's question based on the CONTEXT from the database.
76
+ - **IMPORTANT**: If you list jobs, make sure each job has a clear identifier like (job_021).
77
+ - If the user says they want to apply for a job, for example "I want to apply for job_021", you MUST respond ONLY with the exact phrase: `STARTING_APPLICATION_PROCESS:job_021`. Do not add any other text.
78
+
79
+ --- CONTEXT FROM DATABASE START ---
80
+ {context}
81
+ --- CONTEXT FROM DATABASE END ---
82
+
83
+ User Question: {query}
84
+
85
+ Answer:
86
+ """
87
+ response = self.llm_client.chat.completions.create(model=MODEL_NAME, messages=[{"role": "user", "content": prompt}], max_tokens=4096)
88
+ return response.choices[0].message.content.strip()
89
+
90
+ def ask(self, query: str):
91
+ print(f"\nProcessing query: '{query}'")
92
+ query_vector = self._embed_text(query)
93
+ context = self._search_database(query_vector)
94
+ answer = self._generate_response(query, context)
95
+ return answer
96
+
97
+ def save_application(self, application_data: dict):
98
+ print("Saving application to Weaviate...")
99
+ try:
100
+ applications = self.weaviate_client.collections.get("Application")
101
+ app_uuid = applications.data.insert({
102
+ "job_id": application_data.get("job_id"),
103
+ "cv_content": application_data.get("cv_content"),
104
+ "cover_letter_content": application_data.get("cover_letter_content"),
105
+ "submission_date": datetime.datetime.now(datetime.timezone.utc)
106
  })
107
+ print(f"โœ… Application saved with UUID: {app_uuid}")
108
+ return True
109
+ except Exception as e:
110
+ print(f"โŒ Failed to save application: {e}")
111
+ return False
112
+
113
+ def close_connections(self):
114
+ if self.weaviate_client.is_connected():
115
+ self.weaviate_client.close()
116
+ print("\nWeaviate connection closed.")
117
+
118
+
119
+ # --- Helper to extract text from uploaded files ---
120
+ def _extract_text_from_file(file_path):
121
+ # ... (No changes needed in this function, code is omitted for brevity)
122
+ print(f"Extracting text from: {file_path}")
123
+ if file_path.endswith('.pdf'):
124
+ try:
125
+ reader = pypdf.PdfReader(file_path)
126
+ text = "".join(page.extract_text() for page in reader.pages)
127
+ return text
128
+ except Exception as e:
129
+ return f"Error reading PDF: {e}"
130
+ elif file_path.endswith('.docx'):
131
+ try:
132
+ doc = docx.Document(file_path)
133
+ return "\n".join([para.text for para in doc.paragraphs])
134
+ except Exception as e:
135
+ return f"Error reading DOCX: {e}"
136
+ elif file_path.endswith('.txt'):
137
+ try:
138
+ with open(file_path, 'r', encoding='utf-8') as f:
139
+ return f.read()
140
+ except Exception as e:
141
+ return f"Error reading TXT: {e}"
142
+ return "Unsupported file type."
143
+
144
+
145
+ chatbot_instance = WeaviateChatbot(WEAVIATE_URL, WEAVIATE_API_KEY, DEEPINFRA_API_KEY, BASE_URL)
146
+ atexit.register(chatbot_instance.close_connections)
147
+
148
+ def chat_interface_func(message: str, history: list, app_state: dict, file_obj: object):
149
+ history = history or []
150
 
151
+ if file_obj is not None:
152
+ file_path = file_obj.name
153
+ text = _extract_text_from_file(file_path)
154
+
155
+ if app_state.get("mode") == "APPLYING_CV":
156
+ app_state["cv_content"] = text
157
+ history.append((f"๐Ÿ“„ CV '{os.path.basename(file_path)}' uploaded.", "Great! Now, please upload your Cover Letter."))
158
+ app_state["mode"] = "APPLYING_COVER_LETTER"
159
+ return history, app_state, gr.update(visible=True, value=None)
160
+
161
+ elif app_state.get("mode") == "APPLYING_COVER_LETTER":
162
+ app_state["cover_letter_content"] = text
163
+ history.append((f"๐Ÿ“„ Cover Letter '{os.path.basename(file_path)}' uploaded.", "Thank you! Submitting your application now..."))
 
 
 
 
164
 
165
+ success = chatbot_instance.save_application(app_state)
166
+ if success:
167
+ final_message = f"โœ… Your application for job **{app_state.get('job_id')}** has been submitted successfully! What else can I help you with?"
168
+ else:
169
+ final_message = "โŒ Sorry, there was an error submitting your application. Please try again later."
170
+
171
+ history.append((None, final_message))
172
+ app_state = {"mode": "GENERAL"}
173
+ return history, app_state, gr.update(visible=False, value=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
 
175
+ if message:
176
+ history.append((message, None))
 
 
177
 
178
+ if "APPLYING" in app_state.get("mode", "GENERAL"):
179
+ history.append((None, "Please upload the requested document to continue."))
180
+ return history, app_state, gr.update(visible=True)
181
 
182
+ response = chatbot_instance.ask(message)
183
+
184
+ if response.startswith("STARTING_APPLICATION_PROCESS:"):
185
+ job_id = response.split(":")[1]
186
+ app_state["mode"] = "APPLYING_CV"
187
+ app_state["job_id"] = job_id
188
+ bot_message = f"Starting application for job **{job_id}**. Please upload your CV."
189
+ history.append((None, bot_message))
190
+ return history, app_state, gr.update(visible=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  else:
192
+ history.append((None, response))
193
+ return history, app_state, gr.update(visible=False)
194
+
195
+ return history, app_state, gr.update(visible=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
196
 
 
 
 
 
 
 
 
 
 
197
 
198
+ with gr.Blocks(theme=gr.themes.Soft(), title="EduNatives Assistant") as demo:
199
+
200
+ application_state = gr.State({"mode": "GENERAL", "job_id": None, "cv_content": None, "cover_letter_content": None})
201
+ file_uploader = gr.File(label="Upload Document", file_types=['.pdf', '.docx', '.txt'], visible=False)
202
+
203
+ gr.Markdown(
204
+ """
205
+ # EduNatives Assistant
206
+ Ask me anything about jobs, projects, or student availability. I can also help you navigate the EduNatives app.
207
+ """
208
+ )
209
+
210
+ chatbot_window = gr.Chatbot(height=450, label="Chat Window", bubble_full_width=False)
211
+
212
+ with gr.Column() as examples_container:
213
+ examples_list = [
214
+ "What jobs are available at Google?",
215
+ "Find students with experience in Python and Machine Learning.",
216
+ "Tell me about the 'AI-Powered Medical Imaging Analysis' project.",
217
+ "ูƒูŠู ูŠู…ูƒู†ู†ูŠ ูƒุทุงู„ุจ ุงู„ุชุณุฌูŠู„ ููŠ ุงู„ุชุทุจูŠู‚ุŸ",
218
+ "I'm a company, how can I post an internship?"
219
+ ]
220
+ with gr.Row():
221
+ btn1 = gr.Button(examples_list[0], variant='secondary')
222
+ btn2 = gr.Button(examples_list[1], variant='secondary')
223
+ btn3 = gr.Button(examples_list[2], variant='secondary')
224
+ with gr.Row():
225
+ btn4 = gr.Button(examples_list[3], variant='secondary')
226
+ btn5 = gr.Button(examples_list[4], variant='secondary')
227
+
228
+ example_buttons = [btn1, btn2, btn3, btn4, btn5]
229
 
230
+ with gr.Row() as main_input_row:
231
+ text_input = gr.Textbox(placeholder="Ask your question here...", container=False, scale=7)
232
+ submit_btn = gr.Button("Send", variant="primary", scale=1)
 
 
233
 
234
+ outputs_list = [chatbot_window, application_state, file_uploader]
235
+
236
+ submit_btn.click(
237
+ fn=chat_interface_func,
238
+ inputs=[text_input, chatbot_window, application_state, file_uploader],
239
+ outputs=outputs_list
240
  )
241
+ text_input.submit(
242
+ fn=chat_interface_func,
243
+ inputs=[text_input, chatbot_window, application_state, file_uploader],
244
+ outputs=outputs_list
 
245
  )
246
+
247
+ for btn in example_buttons:
248
+ btn.click(
249
+ fn=chat_interface_func,
250
+ inputs=[btn, chatbot_window, application_state, file_uploader],
251
+ outputs=outputs_list
252
+ )
253
+
254
+ file_uploader.upload(
255
+ fn=chat_interface_func,
256
+ inputs=[gr.Textbox(value="", visible=False), chatbot_window, application_state, file_uploader],
257
+ outputs=outputs_list
258
  )
259
 
260
+ submit_btn.click(lambda: "", outputs=text_input)
261
+ text_input.submit(lambda: "", outputs=text_input)
262
+
263
+
264
  if __name__ == "__main__":
265
  demo.launch(debug=True)