afouda commited on
Commit
a9a9fa8
·
verified ·
1 Parent(s): ec184cf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +159 -748
app.py CHANGED
@@ -1,766 +1,177 @@
1
- # edunatives_full.py
2
- from __future__ import annotations
3
- import os
4
- import re
5
- import uuid
6
  import json
7
- import time
8
- import fitz # PyMuPDF
9
  import docx
10
- import markdown
11
- from datetime import datetime, timezone
12
- from typing import List, Dict, Any, Optional, Tuple
13
- from dataclasses import dataclass
14
-
15
  import gradio as gr
16
  from openai import OpenAI
17
- import weaviate
18
- from weaviate.auth import AuthApiKey
19
- import numpy as np
20
-
21
- # -------------------- Configuration (edit these or set env vars) --------------------
22
- MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
23
- DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY", "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa")
24
- BASE_URL = os.getenv("BASE_URL", "https://api.deepinfra.com/v1/openai")
25
 
26
- WEAVIATE_URL = os.getenv("WEAVIATE_URL", "htorgbgpt4w63nvf1yeuw.c0.us-west3.gcp.weaviate.cloud")
27
- WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "ZUd6clB5WmYzVGkxeU40cl96NTY5UkViUlVzY05Md3IzQ0JKelBZQmxGZHRPeGpCeGdxS1FUNnlYUkFFPV92MjAw")
 
 
28
 
29
- MEMORY_FILE = os.getenv("MEMORY_FILE", "chat_memory.json")
30
- LOG_FILE = os.getenv("LOG_FILE", "interaction_logs.json")
31
-
32
- # -------------------- Clients --------------------
33
- llm_client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
34
-
35
- weaviate_client = weaviate.Client(
36
  url=WEAVIATE_URL,
37
- auth_client_secret=AuthApiKey(api_key=WEAVIATE_API_KEY),
38
  )
 
39
 
40
- # -------------------- Helpers & constants --------------------
41
- ARABIC_RANGE = (
42
- (0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
43
- (0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
44
- )
45
- def is_arabic(text: str) -> bool:
46
- for ch in text or "":
47
- code = ord(ch)
48
- for a, b in ARABIC_RANGE:
49
- if a <= code <= b:
50
- return True
51
- return False
52
-
53
- def get_rfc3339_time() -> str:
54
- return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
55
-
56
- # -------------------- Simple KB --------------------
57
- KB: Dict[str, Dict[str, str]] = {
58
- "student_registration": {
59
- "en": (
60
- "**How to register / create an account (Student)**\n\n"
61
- "1. Go to the EduNatives site and choose Sign Up.\n"
62
- "2. Use your university email if possible and verify it.\n"
63
- "3. Complete your profile (major, skills, interests).\n"
64
- "4. Enable notifications for internships/scholarships."
65
- ),
66
- "ar": (
67
- "**طريقة التسجيل وإنشاء حساب (طلاب)**\n\n"
68
- "١. اذهب إلى موقع EduNatives واختر Sign Up.\n"
69
- "٢. يفضل استخدام إيميل الجامعة وتأكيده.\n"
70
- "٣. أكمل ملفك الشخصي (التخصص، المهارات، الاهتمامات).\n"
71
- "٤. فعّل التنبيهات لفرص التدريب والمنح."
72
- ),
73
- },
74
- "student_internships": {
75
- "en": (
76
- "**Finding internships & scholarships**\n\n"
77
- "- Use the search filters: field, location, duration, paid/unpaid.\n"
78
- "- Follow companies and set up alerts for new opportunities.\n"
79
- "- Keep your profile and resume updated."
80
- ),
81
- "ar": (
82
- "**كيفية العثور على تدريب أو منحة**\n\n"
83
- "- استخدم فلاتر البحث: التخصص، المكان، المدة، مدفوع/غير مدفوع.\n"
84
- "- تابع الشركات وفعّل التنبيهات للفرص الجديدة.\n"
85
- "- حافظ على تحديث ملفك الشخصي وسيرتك الذاتية."
86
- ),
87
- },
88
- }
89
-
90
- # keys to detect intents (simple)
91
- KEYS = {
92
- "student_registration": ["register", "sign up", "signup", "create account", "account", "تسجيل", "انشاء", "إنشاء", "حساب"],
93
- "student_internships": ["intern", "internship", "training", "scholar", "scholarship", "grant", "opportunity", "تدريب", "منحة", "فرصة"],
94
- "Job": ["job", "وظيفة", "وظائف", "وظايف"],
95
- "Application": ["apply", "application", "cover letter", "تقديم", "طلب"],
96
- "Memory": ["memory", "conversation history", "ذاكرة"],
97
- "Opportunities": ["opportunity", "فرص", "opportunities"],
98
- "Project": ["project", "مشروع"],
99
- "Team": ["team", "فريق"]
100
- }
101
-
102
- @dataclass
103
- class Route:
104
- audience: str
105
- intent: str
106
- language: str
107
-
108
- def route_intent(text: str, forced_audience: Optional[str]=None) -> Route:
109
- lang = "ar" if is_arabic(text) else "en"
110
- match_label = None
111
- text_l = (text or "").lower()
112
- for label, kws in KEYS.items():
113
- for kw in kws:
114
- if kw in text_l:
115
- match_label = label
116
- break
117
- if match_label:
118
- break
119
- audience = forced_audience if forced_audience else "general"
120
- intent = match_label if match_label else "general"
121
- return Route(audience=audience, intent=intent, language=lang)
122
-
123
- # -------------------- Skill extraction (simple regex baseline) --------------------
124
- _SKILL_REGEX = re.compile(
125
- r"\b(Python|Machine Learning|Deep Learning|NLP|Data Science|SQL|Docker|Kubernetes|React|JavaScript|Java|C\+\+|C#|TensorFlow|PyTorch|Pandas|NumPy|Tableau|Excel)\b",
126
- re.IGNORECASE
127
- )
128
-
129
- def extract_skills_from_text(cv_text: str) -> List[str]:
130
- skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(cv_text or "")})
131
- return [s.capitalize() for s in skills]
132
-
133
- # -------------------- File processing --------------------
134
- def process_uploaded_file(file_obj: Any) -> dict | None:
135
- """
136
- Accepts a Gradio file-like object (file_obj). Returns a dict:
137
- {"content": str, "skills": [...], "filename": "..."} or {"error": "..."}
138
- """
139
- if not file_obj:
140
- return None
141
- # Gradio file object typically has a 'name' attribute with path
142
- file_path = getattr(file_obj, "name", None)
143
- if not file_path or not os.path.exists(file_path):
144
- # sometimes file_obj is a dict with 'name' key
145
- try:
146
- file_path = file_obj["name"]
147
- except Exception:
148
- return {"error": "Uploaded file not accessible."}
149
-
150
- filename = os.path.basename(file_path)
151
- text_content = ""
152
- try:
153
- if filename.lower().endswith(".pdf"):
154
- with fitz.open(file_path) as doc:
155
- for page in doc:
156
- text_content += page.get_text()
157
- elif filename.lower().endswith(".docx"):
158
- docp = docx.Document(file_path)
159
- for p in docp.paragraphs:
160
- text_content += p.text + "\n"
161
- elif filename.lower().endswith(".txt"):
162
- with open(file_path, "r", encoding="utf-8") as f:
163
- text_content = f.read()
164
- else:
165
- return {"error": f"Unsupported file type: {filename}"}
166
-
167
- skills = extract_skills_from_text(text_content)
168
- return {"content": text_content.strip(), "skills": skills, "filename": filename}
169
- except Exception as e:
170
- return {"error": f"Error processing file {filename}: {e}"}
171
-
172
- # -------------------- Weaviate schema helpers --------------------
173
- def class_exists(class_name: str) -> bool:
174
- schema = weaviate_client.schema.get()
175
- classes = schema.get("classes", []) if isinstance(schema, dict) else []
176
- for c in classes:
177
- if c.get("class") == class_name:
178
- return True
179
- return False
180
-
181
  def ensure_collections():
182
- """
183
- Creates minimal schema classes for Job, Application, Memory, Opportunities, Project, Team
184
- if they do not exist already.
185
- """
186
- if not class_exists("Job"):
187
- job_class = {
188
- "class": "Job",
189
- "properties": [
190
- {"name": "jobId", "dataType": ["string"]},
191
- {"name": "title", "dataType": ["text"]},
192
- {"name": "companyName", "dataType": ["text"]},
193
- {"name": "description", "dataType": ["text"]},
194
- {"name": "skills", "dataType": ["string[]"]},
195
- {"name": "salaryDetails", "dataType": ["text"]},
196
- {"name": "workplaceType", "dataType": ["text"]},
197
- ],
198
- }
199
- weaviate_client.schema.create_class(job_class)
200
-
201
- if not class_exists("Application"):
202
- app_class = {
203
- "class": "Application",
204
- "properties": [
205
- {"name": "applicationId", "dataType": ["string"]},
206
- {"name": "jobId", "dataType": ["string"]},
207
- {"name": "applicantName", "dataType": ["text"]},
208
- {"name": "applicantEmail", "dataType": ["text"]},
209
- {"name": "coverLetter", "dataType": ["text"]},
210
- {"name": "cvText", "dataType": ["text"]},
211
- {"name": "skills", "dataType": ["string[]"]},
212
- {"name": "createdAt", "dataType": ["date"]},
213
- ],
214
- }
215
- weaviate_client.schema.create_class(app_class)
216
-
217
- if not class_exists("Memory"):
218
- mem_class = {
219
- "class": "Memory",
220
- "properties": [
221
- {"name": "memoryId", "dataType": ["string"]},
222
- {"name": "sessionId", "dataType": ["string"]},
223
- {"name": "text", "dataType": ["text"]},
224
- {"name": "createdAt", "dataType": ["date"]},
225
- ],
226
- }
227
- weaviate_client.schema.create_class(mem_class)
228
 
229
- if not class_exists("Opportunities"):
230
- opp_class = {
231
- "class": "Opportunities",
232
- "properties": [
233
- {"name": "oppId", "dataType": ["string"]},
234
- {"name": "title", "dataType": ["text"]},
235
- {"name": "description", "dataType": ["text"]},
236
- {"name": "skills", "dataType": ["string[]"]},
237
- ],
238
- }
239
- weaviate_client.schema.create_class(opp_class)
240
-
241
- if not class_exists("Project"):
242
- proj_class = {
243
- "class": "Project",
244
- "properties": [
245
- {"name": "projectId", "dataType": ["string"]},
246
- {"name": "title", "dataType": ["text"]},
247
- {"name": "description", "dataType": ["text"]},
248
- {"name": "skills", "dataType": ["string[]"]},
249
- ],
250
- }
251
- weaviate_client.schema.create_class(proj_class)
252
-
253
- if not class_exists("Team"):
254
- team_class = {
255
- "class": "Team",
256
- "properties": [
257
- {"name": "teamId", "dataType": ["string"]},
258
- {"name": "name", "dataType": ["text"]},
259
- {"name": "projectId", "dataType": ["string"]},
260
- {"name": "members", "dataType": ["string[]"]},
261
- {"name": "skills", "dataType": ["string[]"]},
262
- {"name": "creatorId", "dataType": ["string"]},
263
- {"name": "createdAt", "dataType": ["date"]},
264
- {"name": "idea", "dataType": ["text"]},
265
- ],
266
- }
267
- weaviate_client.schema.create_class(team_class)
268
-
269
- # ensure schema exists
270
  ensure_collections()
271
 
272
- # -------------------- Query helpers --------------------
273
- def query_weaviate_collection(class_name: str, query_text: str, limit: int = 5) -> List[dict]:
274
- """
275
- Simple retrieval: fetch objects where text fields match the query_text (basic).
276
- Uses a basic search via .get (no BM25 module required).
277
- """
278
- try:
279
- q = weaviate_client.query.get(class_name, ["*"]).with_limit(limit)
280
- res = q.do()
281
- hits = res.get("data", {}).get("Get", {}).get(class_name, [])
282
- # naive filter: return items that contain query_text in title/description/skills
283
- low = (query_text or "").lower()
284
- items = []
285
- for h in hits:
286
- props = h.get("properties", {})
287
- text_blob = " ".join(
288
- [str(props.get(k, "")) for k in ("title", "description", "companyName", "skills")]
289
- ).lower()
290
- if not low or low in text_blob:
291
- items.append(props)
292
- return items[:limit]
293
- except Exception as e:
294
- print("[Weaviate Query Error]", e)
295
- return []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
 
297
- # -------------------- RAG prompt builder --------------------
298
- def build_rag_prompt(user_question: str, retrieved_items: List[dict], class_name: str) -> str:
299
- context_parts = []
300
- for i, item in enumerate(retrieved_items, 1):
301
- details = {k: item.get(k) for k in item.keys()}
302
- item_str = f"--- Record {i} ---\n{json.dumps(details, indent=2, ensure_ascii=False)}"
303
- context_parts.append(item_str)
304
- context_block = "\n\n".join(context_parts)
305
- return (
306
- f'User Question: "{user_question}"\n\n'
307
- f"Retrieved Data:\n{context_block}\n\n"
308
- "You are an expert assistant. Use ONLY the Retrieved Data above to answer the question, "
309
- "summarize, and include 'Next Steps' for the user."
310
  )
 
311
 
312
- def rag_answer(user_question: str, class_name: str, top_k: int = 5) -> Tuple[str, List[dict]]:
313
- retrieved = query_weaviate_collection(class_name, user_question, limit=top_k)
314
- if not retrieved:
315
- return "", []
316
- prompt = build_rag_prompt(user_question, retrieved, class_name)
317
- try:
318
- resp = llm_client.chat.completions.create(
319
- model=MODEL_NAME,
320
- messages=[
321
- {"role": "system", "content": "You are EduNatives Assistant. Be concise and practical."},
322
- {"role": "user", "content": prompt}
323
- ],
324
- temperature=0.2,
325
- max_tokens=1200,
326
- )
327
- answer = resp.choices[0].message.content or ""
328
- except Exception as e:
329
- print("[RAG LLM Error]", e)
330
- answer = ""
331
- return answer, retrieved
332
-
333
- # -------------------- Embeddings & Recommendations --------------------
334
- def compute_embedding(text: str) -> List[float]:
335
- try:
336
- resp = llm_client.embeddings.create(
337
- model="Qwen/Qwen3-Embedding-8B",
338
- input=text,
339
- encoding_format="float"
340
- )
341
- if isinstance(resp, dict):
342
- data = resp.get("data", [])
343
- if data and isinstance(data[0], dict):
344
- return data[0].get("embedding", [])
345
- if hasattr(resp, "data") and resp.data:
346
- return resp.data[0].embedding
347
- except Exception as e:
348
- print("[compute_embedding] error:", e)
349
- return []
350
-
351
- def cosine_similarity(a: List[float], b: List[float]) -> float:
352
- try:
353
- va = np.array(a, dtype=float)
354
- vb = np.array(b, dtype=float)
355
- if va.size == 0 or vb.size == 0:
356
- return 0.0
357
- denom = (np.linalg.norm(va) * np.linalg.norm(vb))
358
- if denom == 0:
359
- return 0.0
360
- return float(np.dot(va, vb) / denom)
361
- except Exception as e:
362
- print("[cosine_similarity] error:", e)
363
- return 0.0
364
-
365
- def recommend_jobs_by_embedding(cv_text: str, top_k: int = 5, jobs_fetch_limit: int = 200) -> str:
366
- skills = extract_skills_from_text(cv_text or "")
367
- user_text = " ".join(skills) if skills else (cv_text or "")[:500]
368
- user_emb = compute_embedding(user_text)
369
- if not user_emb:
370
- return "⚠️ Unable to compute embedding for your CV. Check API keys."
371
-
372
- # fetch jobs
373
- try:
374
- res = weaviate_client.query.get("Job", ["*"]).with_limit(jobs_fetch_limit).do()
375
- hits = res.get("data", {}).get("Get", {}).get("Job", [])
376
- if not hits:
377
- return "⚠️ No jobs found in the database."
378
- except Exception as e:
379
- print("[recommend_jobs] Weaviate fetch error:", e)
380
- return "⚠️ Could not fetch jobs from the database."
381
-
382
- scored_jobs = []
383
- for h in hits:
384
- props = h.get("properties", {})
385
- job_text_parts = []
386
- if props.get("skills"):
387
- job_text_parts.append(" ".join(props.get("skills")))
388
- if props.get("title"):
389
- job_text_parts.append(props.get("title"))
390
- if props.get("description"):
391
- job_text_parts.append((props.get("description") or "")[:2000])
392
- job_text = " ".join(job_text_parts).strip() or (props.get("title") or "")
393
- job_emb = compute_embedding(job_text)
394
- if not job_emb:
395
- continue
396
- score = cosine_similarity(user_emb, job_emb)
397
- scored_jobs.append((score, props))
398
-
399
- if not scored_jobs:
400
- return "⚠️ No jobs could be embedded / compared."
401
-
402
- scored_jobs.sort(key=lambda x: x[0], reverse=True)
403
- top = scored_jobs[:top_k]
404
- lines = []
405
- for score, props in top:
406
- title = props.get("title", "No title")
407
- company = props.get("companyName", "Unknown company")
408
- job_id = props.get("jobId", "")
409
- salary = props.get("salaryDetails") or "Not specified"
410
- skills_list = props.get("skills") or []
411
- description = (props.get("description") or "").strip()
412
- lines.append(
413
- f"**{title}** at *{company}* \n"
414
- f"- Job ID: `{job_id}` \n"
415
- f"- Score: {score:.3f} \n"
416
- f"- Salary: {salary} \n"
417
- f"- Skills: {skills_list} \n"
418
- f"- Description: {description[:600]}{'...' if len(description) > 600 else ''} \n"
419
- f"---"
420
- )
421
- return "\n\n".join(lines)
422
-
423
- # -------------------- Weaviate save/update helpers --------------------
424
- def save_application_to_weaviate(app: dict) -> bool:
425
- try:
426
- weaviate_client.data_object.create(app, "Application", uuid=app.get("applicationId"))
427
- return True
428
- except Exception as e:
429
- print("[save_application] error:", e)
430
- return False
431
-
432
- def save_team_to_weaviate(team_props: dict) -> bool:
433
- try:
434
- weaviate_client.data_object.create(team_props, "Team", uuid=team_props.get("teamId"))
435
- return True
436
- except Exception as e:
437
- print("[save_team] error:", e)
438
- return False
439
-
440
- def update_team_add_member(team_name: str, member_name: str, skills: List[str]) -> str:
441
- # naive: find team by name, append member, update object
442
- try:
443
- q = weaviate_client.query.get("Team", ["teamId", "name", "members", "skills"]).with_where({
444
- "path": ["name"],
445
- "operator": "Equal",
446
- "valueString": team_name
447
- }).with_limit(1)
448
- res = q.do()
449
- hits = res.get("data", {}).get("Get", {}).get("Team", [])
450
- if not hits:
451
- return "⚠️ Team not found."
452
- obj = hits[0]
453
- props = obj.get("properties", {})
454
- team_id = props.get("teamId")
455
- members = props.get("members") or []
456
- members.append(member_name)
457
- skills_list = list(set((props.get("skills") or []) + skills))
458
- weaviate_client.data_object.update({"members": members, "skills": skills_list}, "Team", uuid=team_id)
459
- return f"✅ {member_name} added to team '{team_name}'."
460
- except Exception as e:
461
- print("[update_team_add_member] error:", e)
462
- return "⚠️ Failed to add member to team."
463
-
464
- # -------------------- Session / State machine --------------------
465
- def initial_session() -> dict:
466
- return {"state": "idle", "data": {}}
467
-
468
- def handle_uploaded_cv_for_session(session: dict, uploaded_file: Any) -> Tuple[str, dict]:
469
- if not uploaded_file:
470
- return "⚠️ No file received.", session
471
- doc_info = process_uploaded_file(uploaded_file)
472
- if not doc_info or "error" in (doc_info or {}):
473
- return f"⚠️ Error processing uploaded CV: {doc_info.get('error') if doc_info else 'unknown error'}", session
474
- session["data"]["cvText"] = doc_info.get("content", "")
475
- session["data"]["cvSkills"] = doc_info.get("skills", [])
476
- st = session.get("state")
477
- if st == "apply_wait_cv":
478
- session["state"] = "apply_jobtitle"
479
- detected = session["data"]["cvSkills"]
480
- return f"CV received. Detected skills: {detected}. Which job title do you want to apply for? (type job title or 'any')", session
481
- if st == "recommend_wait_cv":
482
- rec_text = recommend_jobs_by_embedding(session["data"]["cvText"], top_k=5)
483
- session = initial_session()
484
- return f"Here are recommended jobs based on your CV:\n\n{rec_text}", session
485
- return "CV uploaded and processed. What would you like to do next?", session
486
-
487
- def handle_user_message(session: dict, user_text: str, uploaded_file: Any = None) -> Tuple[str, dict, bool]:
488
- session = session or initial_session()
489
- st = session.get("state", "idle")
490
- text = (user_text or "").strip()
491
-
492
- # quick reset
493
- if text.lower() in ("cancel", "exit", "quit", "restart", "reset"):
494
- return "Conversation reset. How can I help you now?", initial_session(), False
495
-
496
- # file upload route
497
- if uploaded_file:
498
- bot_msg, new_session = handle_uploaded_cv_for_session(session, uploaded_file)
499
- return bot_msg, new_session, False
500
-
501
- # IDLE
502
- if st == "idle":
503
- low = text.lower()
504
- if low in ("hi", "hello", "hey", "مرحبا", "ازيك", "السلام عليكم"):
505
- return "👋 Hello! How can I support you today? You can ask about jobs, teams, or recommendations.", session, False
506
- if low in ("who are you?", "who are you", "انت مين", "من انت"):
507
- return ("👋 I am EduNatives Assistant — your friendly academic and career guide."), session, False
508
-
509
- route = route_intent(text)
510
- # 1) KB first
511
- if route.intent in KB:
512
- return KB[route.intent].get(route.language, KB[route.intent].get("en", "")), session, False
513
-
514
- # 2) If intent is a RAG-related class -> call rag
515
- if route.intent in {"Job", "Application", "Memory", "Opportunities", "Project", "Team"}:
516
- try:
517
- rag_ans, items = rag_answer(text, route.intent, top_k=5)
518
- if rag_ans:
519
- return rag_ans, session, False
520
- except Exception as e:
521
- print("[handle_user_message] rag error:", e)
522
-
523
- # 3) fallback to LLM normal chat
524
- try:
525
- resp = llm_client.chat.completions.create(
526
- model=MODEL_NAME,
527
- messages=[
528
- {"role": "system", "content": "You are EduNatives Assistant. Be concise and helpful."},
529
- {"role": "user", "content": text},
530
- ],
531
- temperature=0.5,
532
- max_tokens=800
533
- )
534
- answer = resp.choices[0].message.content or ""
535
- return answer, session, False
536
- except Exception as e:
537
- print("[handle_user_message] LLM error:", e)
538
- return "⚠️ Sorry, I couldn't process that right now. Try again later.", session, False
539
-
540
- # ---------- APPLY FLOW ----------
541
- if st == "apply_name":
542
- session["data"]["applicantName"] = text or "Applicant"
543
  session["state"] = "apply_email"
544
- return "Thanks. What's your email address?", session, False
545
-
546
- if st == "apply_email":
547
- m = re.search(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", text)
548
- session["data"]["applicantEmail"] = m.group(1) if m else text
549
  session["state"] = "apply_cover"
550
- return "Got it. Please type a short cover letter (or type 'skip' to skip).", session, False
551
-
552
- if st == "apply_cover":
553
- if text.lower() != "skip":
554
- session["data"]["coverLetter"] = text
555
- else:
556
- session["data"]["coverLetter"] = ""
557
- session["state"] = "apply_wait_cv"
558
- return "Please upload your CV now (use the Upload button).", session, True
559
-
560
- if st == "apply_jobtitle":
561
- session["data"]["targetJobTitle"] = text
562
- found = query_weaviate_collection("Job", text, limit=3)
563
- cv_skills = [s.lower() for s in session["data"].get("cvSkills", [])]
564
- if found:
565
- job = found[0]
566
- job_skills = [s.lower() for s in (job.get("skills") or [])]
567
- overlap = len([s for s in cv_skills if s in job_skills])
568
- session["data"]["targetJobId"] = job.get("jobId")
569
- session["state"] = "apply_confirm"
570
- if overlap > 0:
571
- return (f"I found a job: {job.get('title')} at {job.get('companyName')}. "
572
- f"Detected {overlap} overlapping skills. Do you want to confirm application? (yes/no)"), session, False
573
- else:
574
- return (f"I found {job.get('title')} at {job.get('companyName')}, but your CV skills do not overlap. "
575
- "Do you still want to proceed? (yes/no)"), session, False
576
- else:
577
- session["data"]["targetJobId"] = None
578
- session["state"] = "apply_confirm"
579
- return f"I couldn't find a job with that title. Do you want to apply for '{text}' anyway? (yes/no)", session, False
580
-
581
- if st == "apply_confirm":
582
- if text.lower() in ("yes", "y", "نعم"):
583
- app = {
584
- "applicationId": str(uuid.uuid4()),
585
- "jobId": session["data"].get("targetJobId"),
586
- "applicantName": session["data"].get("applicantName"),
587
- "applicantEmail": session["data"].get("applicantEmail"),
588
- "coverLetter": session["data"].get("coverLetter", ""),
589
- "cvText": session["data"].get("cvText", ""),
590
- "skills": session["data"].get("cvSkills", []),
591
- "createdAt": get_rfc3339_time()
592
- }
593
- ok = save_application_to_weaviate(app)
594
- session = initial_session()
595
- return ("🎉 Your application has been submitted successfully. Good luck!" if ok
596
- else "⚠️ Failed to save application. Please try again later."), session, False
597
- else:
598
- session = initial_session()
599
- return "Application cancelled. If you want to do something else, tell me.", session, False
600
-
601
- # ---------- TEAM FLOW ----------
602
- if st == "team_action":
603
- low = text.lower()
604
- if "create" in low or "إنشاء" in low:
605
- session["state"] = "team_create_name"
606
- session["data"] = {}
607
- return "Great what's the team name?", session, False
608
- if "join" in low or "انضم" in low:
609
- session["state"] = "team_join_name"
610
- session["data"] = {}
611
- return "Okay — what's the name of the team you want to join?", session, False
612
- return "Please say 'create' to create a team or 'join' to join a team.", session, False
613
-
614
- if st == "team_create_name":
615
- session["data"]["team_name"] = text
616
- session["state"] = "team_create_owner"
617
- return "Team name saved. Who is the team owner (your name)?", session, False
618
-
619
- if st == "team_create_owner":
620
- session["data"]["owner"] = text
621
- session["state"] = "team_create_skills"
622
- return "Owner saved. Please list the team's skills (comma-separated).", session, False
623
-
624
- if st == "team_create_skills":
625
- session["data"]["skills"] = [s.strip() for s in text.split(",") if s.strip()]
626
- session["state"] = "team_create_course"
627
- return "Skills saved. (Optional) Enter course/subject name or type 'skip'.", session, False
628
-
629
- if st == "team_create_course":
630
- session["data"]["course"] = "" if text.lower() == "skip" else text
631
- session["state"] = "team_create_idea"
632
- return "Please write a short idea/description for the project.", session, False
633
-
634
- if st == "team_create_idea":
635
- session["data"]["idea"] = text
636
- team_props = {
637
- "teamId": str(uuid.uuid4()),
638
- "name": session["data"].get("team_name"),
639
- "projectId": None,
640
- "members": [session["data"].get("owner")],
641
- "skills": session["data"].get("skills", []),
642
- "creatorId": session["data"].get("owner"),
643
- "createdAt": get_rfc3339_time(),
644
- "idea": session["data"].get("idea", "")
645
- }
646
- saved = save_team_to_weaviate(team_props)
647
- session = initial_session()
648
- return (f"🎉 Team '{team_props['name']}' created! Members: {team_props['members']}" if saved
649
- else "⚠️ Failed to create team. Try again later."), session, False
650
-
651
- if st == "team_join_name":
652
- session["data"]["team_name"] = text
653
- session["state"] = "team_join_member"
654
- return "What's your name (to add you to the team)?", session, False
655
-
656
- if st == "team_join_member":
657
- session["data"]["member_name"] = text
658
- session["state"] = "team_join_skills"
659
- return "Enter your skills (comma-separated).", session, False
660
-
661
- if st == "team_join_skills":
662
- skills = [s.strip() for s in text.split(",") if s.strip()]
663
- resp = update_team_add_member(session["data"].get("team_name"), session["data"].get("member_name"), skills)
664
- session = initial_session()
665
- return resp, session, False
666
-
667
- # ---------- RECOMMEND FLOW ----------
668
- if st == "recommend_wait_cv":
669
- return "Please upload your CV (use the Upload button).", session, True
670
-
671
- # default fallback
672
- return "Sorry — I didn't understand that. You can say 'apply', 'create team', 'join team' or 'recommend'.", session, False
673
-
674
- # -------------------- UI wiring (Gradio) --------------------
675
- def format_chat_html(history: List[Dict[str, str]]) -> str:
676
- html = "<div class='chatbot'>"
677
- for msg in history:
678
- role = msg["role"]
679
- content = msg["content"]
680
- if role == "user":
681
- html += f"<div class='user-bubble'>{content}</div>"
682
- else:
683
- html_content = markdown.markdown(content, extensions=['tables'])
684
- html += f"<div class='bot-bubble'>{html_content}</div>"
685
- html += "</div>"
686
- return html
687
-
688
- # minimal CSS + UI
689
- with gr.Blocks(css="""
690
- .chatbot {height: 520px; overflow: auto;}
691
- .user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
692
- .bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
693
- .chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
694
- """) as demo:
695
-
696
- gr.Markdown("# 💬 EduNatives — Conversational Job Portal")
697
-
698
- chat_html = gr.HTML(format_chat_html([]))
699
-
700
- with gr.Row(elem_classes="chatbox-container"):
701
- user_input = gr.Textbox(placeholder="Type your message here (e.g. 'apply', 'create team', 'recommend')", lines=2)
702
- send_btn = gr.Button("Send", variant="primary")
703
-
704
- with gr.Row(visible=False) as file_row:
705
- cv_uploader = gr.File(label="Upload CV (.pdf/.docx/.txt)", file_count="single", file_types=[".pdf", ".docx", ".txt"], visible=False)
706
- upload_btn = gr.Button("Upload CV", visible=False)
707
-
708
- with gr.Row():
709
- clear_btn = gr.Button("Reset Conversation")
710
- instructions = gr.Markdown("Commands: `apply`, `create team`, `join team`, `recommend` — the bot will guide you step-by-step.")
711
-
712
- chat_history_state = gr.State([])
713
- session_state = gr.State(initial_session())
714
-
715
- def append_to_history(history: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
716
- history = history or []
717
- history.append({"role": role, "content": content})
718
- return history
719
-
720
- def handle_send(message: str, history: List[Dict[str, str]], session: dict):
721
- history = history or []
722
- session = session or initial_session()
723
- if message and message.strip():
724
- history = append_to_history(history, "user", message.strip())
725
- bot_reply, new_session, show_uploader = handle_user_message(session, message or "", uploaded_file=None)
726
- history = append_to_history(history, "assistant", bot_reply or "…")
727
- html = format_chat_html(history)
728
- return "", html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
729
-
730
- def handle_upload(file_obj, history: List[Dict[str, str]], session: dict):
731
- history = history or []
732
- session = session or initial_session()
733
- filename = getattr(file_obj, "name", "uploaded_file")
734
- history = append_to_history(history, "user", f"📎 Uploaded file: {filename}")
735
- bot_reply, new_session, show_uploader = handle_user_message(session, "", uploaded_file=file_obj)
736
- history = append_to_history(history, "assistant", bot_reply or "…")
737
- html = format_chat_html(history)
738
- return html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
739
-
740
- def handle_reset(history, session):
741
- new_hist = []
742
- new_session = initial_session()
743
- html = format_chat_html(new_hist)
744
- return html, new_hist, new_session, gr.update(visible=False), gr.update(visible=False)
745
-
746
- send_btn.click(
747
- fn=handle_send,
748
- inputs=[user_input, chat_history_state, session_state],
749
- outputs=[user_input, chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
750
- queue=True
751
- )
752
- upload_btn.click(
753
- fn=handle_upload,
754
- inputs=[cv_uploader, chat_history_state, session_state],
755
- outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
756
- queue=True
757
- )
758
- clear_btn.click(
759
- fn=handle_reset,
760
- inputs=[chat_history_state, session_state],
761
- outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
762
- queue=False
763
- )
764
-
765
- if __name__ == "__main__":
766
- demo.launch(debug=True)
 
 
 
 
 
 
1
  import json
2
+ import weaviate
3
+ import fitz
4
  import docx
5
+ import os
 
 
 
 
6
  import gradio as gr
7
  from openai import OpenAI
8
+ from weaviate.classes.init import Auth
9
+ from weaviate.classes.config import Property, DataType
10
+ from sklearn.metrics.pairwise import cosine_similarity
 
 
 
 
 
11
 
12
+ # --- Config ---
13
+ WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
14
+ WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "YOUR_KEY")
15
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_KEY")
16
 
17
+ # --- Clients ---
18
+ client = weaviate.WeaviateClient(
 
 
 
 
 
19
  url=WEAVIATE_URL,
20
+ auth_client_secret=Auth.api_key(WEAVIATE_API_KEY),
21
  )
22
+ openai_client = OpenAI(api_key=OPENAI_API_KEY)
23
 
24
+ # --- Ensure Collections ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  def ensure_collections():
26
+ collections = {
27
+ "Job": [Property(name="title", data_type=DataType.TEXT), Property(name="description", data_type=DataType.TEXT)],
28
+ "Application": [Property(name="name", data_type=DataType.TEXT), Property(name="email", data_type=DataType.TEXT)],
29
+ "Memory": [Property(name="content", data_type=DataType.TEXT)],
30
+ "Opportunities": [Property(name="details", data_type=DataType.TEXT)],
31
+ "Project": [Property(name="name", data_type=DataType.TEXT), Property(name="description", data_type=DataType.TEXT)],
32
+ "Team": [Property(name="member", data_type=DataType.TEXT), Property(name="role", data_type=DataType.TEXT)],
33
+ }
34
+ for cname, props in collections.items():
35
+ if not client.collections.exists(cname):
36
+ client.collections.create(name=cname, properties=props)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  ensure_collections()
39
 
40
+ # --- Embeddings ---
41
+ def get_embedding(text):
42
+ resp = openai_client.embeddings.create(input=text, model="text-embedding-3-small")
43
+ return resp.data[0].embedding
44
+
45
+ def recommend_jobs_by_embedding(cv_text, jobs, top_n=3):
46
+ cv_embedding = get_embedding(cv_text)
47
+ job_embeddings = [get_embedding(j["description"]) for j in jobs]
48
+ sims = cosine_similarity([cv_embedding], job_embeddings)[0]
49
+ ranked = sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
50
+ return [job for job, _ in ranked[:top_n]]
51
+
52
+ # --- File Upload Handling ---
53
+ def process_uploaded_file(file_path):
54
+ ext = os.path.splitext(file_path)[1].lower()
55
+ text = ""
56
+ if ext == ".pdf":
57
+ with fitz.open(file_path) as pdf:
58
+ for page in pdf:
59
+ text += page.get_text()
60
+ elif ext == ".docx":
61
+ doc = docx.Document(file_path)
62
+ for para in doc.paragraphs:
63
+ text += para.text + "\n"
64
+ elif ext == ".txt":
65
+ with open(file_path, "r", encoding="utf-8") as f:
66
+ text = f.read()
67
+ return text.strip()
68
+
69
+ # --- Session Management ---
70
+ def initial_session():
71
+ return {"state": "idle", "data": {}, "history": []}
72
+
73
+ def handle_uploaded_cv_for_session(session, file_path):
74
+ text = process_uploaded_file(file_path)
75
+ session["data"]["cv_text"] = text
76
+ return session
77
+
78
+ # --- KB ---
79
+ KB_RESPONSES = {
80
+ "student_registration": "You can register as a student on the portal...",
81
+ "student_internships": "Internships are listed under opportunities section..."
82
+ }
83
 
84
+ # --- RAG Query ---
85
+ def rag_query(collection, query_text):
86
+ query_embedding = get_embedding(query_text)
87
+ results = client.query.get(collection, ["*"]).with_near_vector({"vector": query_embedding}).with_limit(3).do()
88
+ return results
89
+
90
+ # --- LLM Chat ---
91
+ def llm_chat(prompt):
92
+ resp = openai_client.chat.completions.create(
93
+ model="gpt-4o-mini",
94
+ messages=[{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": prompt}],
 
 
95
  )
96
+ return resp.choices[0].message.content
97
 
98
+ # --- Flows ---
99
+ def apply_flow(session, message):
100
+ state = session["state"]
101
+ if state == "apply_name":
102
+ session["data"]["name"] = message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  session["state"] = "apply_email"
104
+ return "Please provide your email.", session
105
+ elif state == "apply_email":
106
+ session["data"]["email"] = message
 
 
107
  session["state"] = "apply_cover"
108
+ return "Please provide your cover letter.", session
109
+ elif state == "apply_cover":
110
+ session["data"]["cover"] = message
111
+ session["state"] = "idle"
112
+ return "Your application has been recorded.", session
113
+ return "Let's start your application. What's your name?", {"state": "apply_name", "data": {}}
114
+
115
+ def team_flow(session, message):
116
+ return "Team flow triggered. Add member info.", session
117
+
118
+ def recommend_flow(session, message):
119
+ if "cv_text" in session["data"]:
120
+ jobs = [{"title": "AI Intern", "description": "Work on NLP"}, {"title": "ML Engineer", "description": "Build models"}]
121
+ recs = recommend_jobs_by_embedding(session["data"]["cv_text"], jobs)
122
+ return f"Recommended jobs: {[j['title'] for j in recs]}", session
123
+ return "Please upload your CV first.", session
124
+
125
+ # --- Main Handler ---
126
+ def handle_user_message(session, message):
127
+ lower = message.lower()
128
+
129
+ # KB check
130
+ for key, answer in KB_RESPONSES.items():
131
+ if key in lower:
132
+ return answer, session
133
+
134
+ # RAG check
135
+ for collection in ["Job", "Application", "Memory", "Opportunities", "Project", "Team"]:
136
+ if collection.lower() in lower:
137
+ results = rag_query(collection, message)
138
+ return f"RAG Results from {collection}: {json.dumps(results, indent=2)}", session
139
+
140
+ # Flow triggers
141
+ if "apply" in lower:
142
+ return apply_flow(session, message)
143
+ if "team" in lower:
144
+ return team_flow(session, message)
145
+ if "recommend" in lower:
146
+ return recommend_flow(session, message)
147
+
148
+ # Default LLM
149
+ return llm_chat(message), session
150
+
151
+ # --- Gradio App ---
152
+ session = initial_session()
153
+
154
+ def chat_with_bot(message, file=None):
155
+ global session
156
+ if file is not None:
157
+ session = handle_uploaded_cv_for_session(session, file.name)
158
+ return "CV uploaded successfully!"
159
+ reply, session = handle_user_message(session, message)
160
+ return reply
161
+
162
+ with gr.Blocks(title="Edunatives Chatbot") as demo:
163
+ gr.Markdown("# 🎓 Edunatives Chatbot")
164
+ chatbot = gr.Chatbot()
165
+ msg = gr.Textbox(placeholder="Type your message here...")
166
+ file_upload = gr.File(label="Upload CV (PDF/DOCX/TXT)")
167
+ clear = gr.Button("Clear Chat")
168
+
169
+ def respond(message, history, file):
170
+ response = chat_with_bot(message, file)
171
+ history.append((message, response))
172
+ return history, ""
173
+
174
+ msg.submit(respond, [msg, chatbot, file_upload], [chatbot, msg])
175
+ clear.click(lambda: ([], ""), None, [chatbot, msg])
176
+
177
+ demo.launch(server_name="0.0.0.0", server_port=7860)