afouda commited on
Commit
bc7377c
·
verified ·
1 Parent(s): e01009a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +615 -460
app.py CHANGED
@@ -343,28 +343,27 @@
343
  # if __name__ == "__main__":
344
  # demo.launch(debug=True)
345
 
 
346
 
347
-
348
-
349
- # app.py -- Full EduNatives chatbot with RAG + Application + Team flows
350
  from __future__ import annotations
351
  import os
352
- import json
353
- import time
354
  import re
355
  import uuid
356
- import datetime
357
- from dataclasses import dataclass
 
 
 
 
358
  from typing import List, Dict, Any, Optional
 
359
 
360
- import markdown
361
  import gradio as gr
362
  from openai import OpenAI
363
- import fitz # PyMuPDF
364
- import docx
365
  import weaviate
366
  from weaviate.classes.init import Auth
367
  from weaviate.classes.config import Configure, Property, DataType
 
368
 
369
  # -------------------- Configuration --------------------
370
  MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
@@ -375,77 +374,17 @@ WEAVIATE_URL = os.getenv("WEAVIATE_URL", "htorgbgpt4w63nvf1yeuw.c0.us-west3.gcp.
375
  WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "ZUd6clB5WmYzVGkxeU40cl96NTY5UkViUlVzY05Md3IzQ0JKelBZQmxGZHRPeGpCeGdxS1FUNnlYUkFFPV92MjAw")
376
 
377
  MEMORY_FILE = os.getenv("MEMORY_FILE", "chat_memory.json")
378
- LOG_FILE = os.getenv("LOG_FILE", "chat_analytics.json")
379
-
380
  # -------------------- Clients --------------------
381
- # LLM client
382
  llm_client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
383
 
384
- # Weaviate client
385
  weaviate_client = weaviate.connect_to_weaviate_cloud(
386
  cluster_url=WEAVIATE_URL,
387
  auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
388
  )
389
 
390
- # -------------------- KB, Keys, prompts --------------------
391
- KB: Dict[str, Dict[str, str]] = {
392
- "student_registration": {
393
- "en": (
394
- "**How to register / create an account (Student)**\n\n"
395
- "1. Go to the EduNatives site and choose Sign Up.\n"
396
- "2. Use your university email if possible and verify it.\n"
397
- "3. Complete your profile (major, skills, interests).\n"
398
- "4. Enable notifications for internships/scholarships."
399
- ),
400
- "ar": (
401
- "**طريقة التسجيل وإنشاء حساب (طلاب)**\n\n"
402
- "١. اذهب إلى موقع EduNatives واختر Sign Up.\n"
403
- "٢. يفضل استخدام إيميل الجامعة وتأكيده.\n"
404
- "٣. أكمل ملفك الشخصي (التخصص، المهارات، الاهتمامات).\n"
405
- "٤. فعّل التنبيهات لفرص التدريب والمنح."
406
- ),
407
- },
408
- "student_internships": {
409
- "en": (
410
- "**Finding internships & scholarships**\n\n"
411
- "- Use the search filters: field, location, duration, paid/unpaid.\n"
412
- "- Follow companies and set up alerts for new opportunities.\n"
413
- "- Keep your profile and resume updated."
414
- ),
415
- "ar": (
416
- "**كيفية العثور على تدريب أو منحة**\n\n"
417
- "- استخدم فلاتر البحث: التخصص، المكان، المدة، مدفوع/غير مدفوع.\n"
418
- "- تابع الشركات وفعّل التنبيهات للفرص الجديدة.\n"
419
- "- حافظ على تحديث ملفك الشخصي وسيرتك الذاتية."
420
- ),
421
- },
422
- }
423
-
424
- KEYS = {
425
- "student_registration": ["register", "sign up", "signup", "create account", "account", "تسجيل", "انشاء", "إنشاء", "حساب", "اعمل حساب", "سجل"],
426
- "student_internships": ["intern", "internship", "training", "scholar", "scholarship", "grant", "opportunity", "تدريب", "تدريبي", "منحة", "منح", "فرصة", "فرص", "انترنشيب"],
427
- "student_mentors": ["mentor", "advisor", "professor", "supervisor", "faculty", "connect", "منتور", "مشرف", "دكتور", "أستاذ", "استاذ", "التواصل", "اكلم"],
428
- "university_publish": ["publish", "paper", "research", "preprint", "conference", "event", "seminar", "webinar", "نشر", "أبحاث", "ابحاث", "بحث", "مؤتمر", "فعالية", "فعاليات", "ندوة", "ورشة"],
429
- "university_connect": ["students", "connect with students", "reach students", "collaborate", "طلاب", "تواصل مع الطلاب", "التواصل مع الطلاب", "تعاون"],
430
- "company_post_jobs": ["job", "jobs", "post job", "hiring", "hire", "internships", "graduate", "وظيفة", "وظائف", "اعلان", "إعلان", "نشر وظيفة", "توظيف", "فرص تدريب", "خريجين"],
431
- "company_find_talent": ["talent", "candidate", "recruit", "search", "find", "pipeline", "موهبة", "مواهب", "مرشحين", "تعيين", "تجنيد", "ابحث", "دور على"],
432
- "project_query": ["project", "projects", "مشروع", "مشاريع", "هدف", "أهداف"],
433
- "apply_job_opportunity": ["apply", "application", "تقديم", "طلب", "عايز اقدم", "اريد التقديم", "اريد اتقدم"],
434
- "join_team": ["team", "join team", "فريق", "انضمام لفريق", "انضمام", "انضم"],
435
- }
436
-
437
- AUDIENCE_MAP = {
438
- "student_registration": "student",
439
- "student_internships": "student",
440
- "student_mentors": "student",
441
- "university_publish": "university",
442
- "university_connect": "university",
443
- "company_post_jobs": "company",
444
- "company_find_talent": "company",
445
- "project_query": "student",
446
- "apply_job_opportunity": "student",
447
- "join_team": "student",
448
- }
449
 
450
  SYSTEM_PROMPT_BASE = (
451
  "You are **EduNatives Assistant**, a helpful, friendly, and precise academic/career guide. "
@@ -464,12 +403,45 @@ SYSTEM_PROMPT_BASE = (
464
  "- Ensure that all generated prompts are phrased using positive reinforcement."
465
  )
466
 
467
- CONTEXT_INJECT_TEMPLATE = (
468
- "Context to guide your answer (do not repeat verbatim):\n"
469
- "- Audience: {audience}\n- Intent: {intent}\n- Extra hints: Keep it practical for this audience."
470
- )
471
 
472
- # -------------------- Utility helpers --------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
473
  ARABIC_RANGE = (
474
  (0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
475
  (0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
@@ -483,63 +455,59 @@ def is_arabic(text: str) -> bool:
483
  return True
484
  return False
485
 
 
486
  def format_chat_html(history: List[Dict[str, str]]) -> str:
487
  html = "<div class='chatbot'>"
488
- for message in history:
489
- role = message["role"]
490
- content = message["content"]
491
  if role == "user":
492
  html += f"<div class='user-bubble'>{content}</div>"
493
- elif role == "assistant":
494
  html_content = markdown.markdown(content, extensions=['tables'])
495
  html += f"<div class='bot-bubble'>{html_content}</div>"
496
  html += "</div>"
497
  return html
 
 
 
498
 
499
- # Simple keyword-based CV skills/experience extractor (improvable)
500
- _SKILL_REGEX = re.compile(r"\b(python|java|c\+\+|c#|javascript|nlp|machine learning|deep learning|data science|sql|aws|azure|docker|kubernetes|react|node\.js)\b", re.IGNORECASE)
501
- _EXP_REGEX = re.compile(r"(\d+)\s*(?:years|year|months|month)\s*(?:of)?\s*(?:experience|exp|worked)", re.IGNORECASE)
502
-
503
- def extract_skills_experience(text: str) -> Dict[str, List[str]]:
504
- skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(text)})
505
- experiences = [m.group(0) for m in _EXP_REGEX.finditer(text)]
506
- return {"skills": skills, "experience": experiences}
507
-
508
- # -------------------- Ensure auxiliary collections --------------------
509
- def ensure_aux_collections():
510
- # Team
511
- if not weaviate_client.collections.exists("Team"):
512
- weaviate_client.collections.create(
513
- name="Team",
514
- properties=[
515
- Property(name="teamId", data_type=DataType.TEXT),
516
- Property(name="name", data_type=DataType.TEXT),
517
- Property(name="projectId", data_type=DataType.TEXT),
518
- Property(name="members", data_type=DataType.TEXT_ARRAY),
519
- Property(name="createdAt", data_type=DataType.DATE),
520
- Property(name="creatorId", data_type=DataType.TEXT),
521
- ],
522
- vectorizer_config=Configure.Vectorizer.none()
523
- )
524
-
525
- # Application
526
  if not weaviate_client.collections.exists("Application"):
527
  weaviate_client.collections.create(
528
  name="Application",
529
  properties=[
530
  Property(name="applicationId", data_type=DataType.TEXT),
531
  Property(name="jobId", data_type=DataType.TEXT),
532
- Property(name="opportunityId", data_type=DataType.TEXT),
533
  Property(name="applicantName", data_type=DataType.TEXT),
534
  Property(name="applicantEmail", data_type=DataType.TEXT),
535
  Property(name="coverLetter", data_type=DataType.TEXT),
536
  Property(name="cvText", data_type=DataType.TEXT),
 
537
  Property(name="createdAt", data_type=DataType.DATE),
538
  ],
539
  vectorizer_config=Configure.Vectorizer.none()
540
  )
541
 
542
- # Memory
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
543
  if not weaviate_client.collections.exists("Memory"):
544
  weaviate_client.collections.create(
545
  name="Memory",
@@ -552,54 +520,80 @@ def ensure_aux_collections():
552
  vectorizer_config=Configure.Vectorizer.none()
553
  )
554
 
555
- ensure_aux_collections()
556
 
557
- # -------------------- Weaviate query helpers (RAG) --------------------
558
  def query_weaviate_collection(class_name: str, query_text: str, limit: int = 5) -> List[dict]:
559
- """
560
- Query using v4 weaviate client (hybrid search).
561
- """
562
  try:
563
  collection = weaviate_client.collections.get(class_name)
564
 
565
- # Hybrid search
566
- res = collection.query.hybrid(query=query_text, limit=limit)
 
567
 
568
- items = [o.properties for o in res.objects]
569
-
570
- # --- fallback لو مفيش نتيجة
571
  if not items:
572
- print(f"[Hybrid returned 0 → fallback filter on {class_name}]")
573
- res2 = collection.query.fetch_objects(limit=limit) # مجرد fallback بسيط
574
- items = [o.properties for o in res2.objects]
 
 
 
575
 
576
  return items
577
  except Exception as e:
578
- print(f"[Weaviate Query Error] class={class_name} error={e}")
579
  return []
580
 
581
-
582
  def build_rag_prompt(user_question: str, retrieved_items: List[dict], class_name: str) -> str:
583
- intro = f"Use the following {len(retrieved_items)} records from {class_name} to answer the question succinctly.\n\n"
584
- parts = []
585
  for i, item in enumerate(retrieved_items, 1):
586
  if class_name == "Job":
587
- parts.append(f"{i}. Title: {item.get('title','N/A')} | Company: {item.get('companyName','N/A')} | Skills: {', '.join(item.get('skills',[]))} | Desc: {item.get('description','')[:200]}")
588
- elif class_name == "Opportunities":
589
- parts.append(f"{i}. Title: {item.get('title','N/A')} | Topic: {item.get('topic','N/A')} | Skills: {', '.join(item.get('skills',[]))} | Overview: {item.get('overview','')[:200]}")
590
- elif class_name == "Project":
591
- parts.append(f"{i}. Title: {item.get('title','N/A')} | ShortDesc: {item.get('shortDescription','')[:200]} | Fields: {', '.join(item.get('fields',[]))}")
 
 
 
 
 
 
592
  else:
593
- parts.append(f"{i}. {str(item)[:200]}")
594
- context_block = "\n".join(parts)
595
- closing = f"\n\nQuestion: {user_question}\nAnswer concisely and, if applicable, include next steps (how to apply / contact / form a team)."
596
- return intro + context_block + closing
 
 
597
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
598
  def rag_answer(user_question: str, class_name: str, top_k: int = 5) -> (str, List[dict]):
599
  retrieved = query_weaviate_collection(class_name, user_question, limit=top_k)
600
  if not retrieved:
601
  return "", []
 
602
  prompt = build_rag_prompt(user_question, retrieved, class_name)
 
603
  try:
604
  resp = llm_client.chat.completions.create(
605
  model=MODEL_NAME,
@@ -607,368 +601,529 @@ def rag_answer(user_question: str, class_name: str, top_k: int = 5) -> (str, Lis
607
  {"role": "system", "content": SYSTEM_PROMPT_BASE},
608
  {"role": "user", "content": prompt}
609
  ],
610
- temperature=0.2,
611
- max_tokens=512
612
  )
613
  answer = resp.choices[0].message.content or ""
614
  except Exception as e:
615
  print(f"[RAG LLM Error] {e}")
616
  answer = ""
 
617
  return answer, retrieved
 
 
 
 
618
 
619
- # -------------------- Save helpers --------------------
620
- def save_application_to_weaviate(application: dict) -> bool:
621
- try:
622
- collection = weaviate_client.collections.get("Application")
623
- # ensure createdAt exists
624
- application.setdefault("createdAt", datetime.datetime.utcnow().isoformat() + "Z")
625
- # add stable uuid
626
- uid = str(uuid.uuid5(uuid.NAMESPACE_DNS, application.get("applicationId", str(uuid.uuid4()))))
627
- collection.data.insert(properties=application, uuid=uid)
628
- return True
629
- except Exception as e:
630
- print(f"[Save Application Error] {e}")
631
- return False
632
 
633
- def save_team_to_weaviate(team_props: dict) -> Optional[dict]:
 
 
 
 
 
634
  try:
635
- collection = weaviate_client.collections.get("Team")
636
- team_props.setdefault("createdAt", datetime.datetime.utcnow().isoformat() + "Z")
637
- uid = str(uuid.uuid5(uuid.NAMESPACE_DNS, team_props.get("teamId", str(uuid.uuid4()))))
638
- collection.data.insert(properties=team_props, uuid=uid)
639
- return team_props
 
 
 
 
 
 
 
 
640
  except Exception as e:
641
- print(f"[Save Team Error] {e}")
642
- return None
643
 
644
- def save_memory_to_weaviate(session_id: str, text: str) -> bool:
645
  try:
646
- collection = weaviate_client.collections.get("Memory")
647
- mem = {"memoryId": str(uuid.uuid4()), "sessionId": session_id, "text": text, "createdAt": datetime.datetime.utcnow().isoformat() + "Z"}
648
- collection.data.insert(properties=mem, uuid=str(uuid.uuid5(uuid.NAMESPACE_DNS, mem["memoryId"])))
649
- return True
 
 
 
 
650
  except Exception as e:
651
- print(f"[Save Memory Error] {e}")
652
- return False
653
 
654
- # -------------------- File processing --------------------
655
- def process_uploaded_file(file_obj: Any) -> dict | None:
656
  """
657
- file_obj is a Gradio file (file_obj.name present)
658
- returns dict with 'content' and 'profile' keys or None
 
 
 
 
659
  """
660
- if not file_obj:
661
- return None
662
- file_path = file_obj.name
663
- filename = os.path.basename(file_path)
664
- text_content = ""
665
- try:
666
- if filename.lower().endswith(".pdf"):
667
- with fitz.open(file_path) as doc:
668
- for page in doc:
669
- text_content += page.get_text()
670
- elif filename.lower().endswith(".docx"):
671
- doc = docx.Document(file_path)
672
- for p in doc.paragraphs:
673
- text_content += p.text + "\n"
674
- elif filename.lower().endswith(".txt"):
675
- with open(file_path, "r", encoding="utf-8") as f:
676
- text_content = f.read()
677
- else:
678
- return {"error": f"Unsupported file type: {filename}"}
679
- profile = extract_skills_experience(text_content)
680
- return {"content": text_content.strip(), "profile": profile, "filename": filename}
681
- except Exception as e:
682
- print(f"[File process error] {e}")
683
- return {"error": f"Error processing file {filename}: {e}"}
684
-
685
- # -------------------- Logging --------------------
686
- def log_interaction(user_message: str, route: 'Route', response: str):
687
- entry = {"timestamp": time.time(), "user_message": user_message, "audience": route.audience, "intent": route.intent, "language": route.language, "response": response}
688
  try:
689
- if os.path.exists(LOG_FILE):
690
- with open(LOG_FILE, "r", encoding="utf-8") as f:
691
- logs = json.load(f)
692
- else:
693
- logs = []
694
- logs.append(entry)
695
- with open(LOG_FILE, "w", encoding="utf-8") as f:
696
- json.dump(logs, f, ensure_ascii=False, indent=2)
697
  except Exception as e:
698
- print(f"[Log error] {e}")
699
-
700
- # -------------------- Intent routing --------------------
701
- @dataclass
702
- class Route:
703
- audience: str
704
- intent: str
705
- language: str
706
-
707
- def route_intent(text: str, forced_audience: str | None = None) -> Route:
708
- lang = "ar" if is_arabic(text) else "en"
709
- text_l = text.lower() if text else ""
710
- match_label = None
711
- for label, kws in KEYS.items():
712
- for kw in kws:
713
- if kw in text_l:
714
- match_label = label
715
- break
716
- if match_label:
717
- break
718
- audience = AUDIENCE_MAP.get(match_label, "general")
719
- if forced_audience and forced_audience in {"student", "university", "company"}:
720
- audience = forced_audience
721
- return Route(audience=audience, intent=match_label or "general", language=lang)
722
-
723
- # -------------------- call_llm --------------------
724
- def call_llm(user_message: str, history: List[Dict[str, str]], route: Route, temperature: float = 0.6, max_tokens: int = 512) -> str:
725
- messages = [
726
- {"role": "system", "content": SYSTEM_PROMPT_BASE},
727
- {"role": "system", "content": CONTEXT_INJECT_TEMPLATE.format(audience=route.audience, intent=route.intent)}
728
- ]
729
- # include last few turns
730
- MAX_TURNS = 3
731
- trimmed = history[-MAX_TURNS*2:] if history else []
732
- messages.extend(trimmed)
733
- messages.append({"role": "user", "content": user_message})
734
- try:
735
- resp = llm_client.chat.completions.create(
736
- model=MODEL_NAME,
737
- messages=messages,
738
- temperature=temperature,
739
- max_tokens=max_tokens
 
 
 
 
 
 
740
  )
741
- return resp.choices[0].message.content or ""
742
- except Exception as e:
743
- print(f"[LLM Error] {e}")
744
- return ""
745
 
746
- # -------------------- Main respond flow (Gradio) --------------------
747
- with gr.Blocks(css="""
748
- .chatbot {height: 500px; overflow: auto;}
749
- .user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
750
- .bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
751
- .chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
752
- .bot-bubble table {border-collapse: collapse; width: 100%;}
753
- .bot-bubble th, .bot-bubble td {border: 1px solid #ddd; padding: 8px; text-align: left;}
754
- .bot-bubble th {background-color: #e9e9e9;}
755
- """) as demo:
756
 
757
- gr.Markdown("# 🤖 EduNatives Assistant")
 
 
 
 
 
 
 
 
 
 
 
 
758
 
759
- with gr.Row():
760
- audience_dd = gr.Dropdown(label="Audience", choices=["Auto", "Student", "University-Research", "Company"], value="Auto", interactive=True)
761
- clear_btn = gr.Button("🧹 Clear Chat")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
762
 
763
- status = gr.Markdown("Status: Ready.")
764
- chatbot_html = gr.HTML("<div class='chatbot' id='chatbot'></div>")
765
- chat_history_state = gr.State([])
766
- user_id_state = gr.State("default_user")
767
 
768
- with gr.Row(elem_classes="chatbox-container"):
769
- msg = gr.Textbox(placeholder="اكتب سؤالك هنا... / Ask your question here...", lines=2, scale=4, autofocus=True)
770
- file_uploader = gr.File(label="Upload Document (.txt, .pdf, .docx)", file_types=[".txt", ".pdf", ".docx"], file_count="single", interactive=True)
771
- with gr.Column(scale=1, min_width=120):
772
- send_btn = gr.Button("➡️ Send", scale=1, variant="primary")
773
-
774
- def respond(user_text: str, file_obj: Any, history: List[Dict[str, str]], audience_choice: str, user_id: str):
775
- user_text = (user_text or "").strip()
776
- # process file if exists
777
- doc_info = process_uploaded_file(file_obj) if file_obj else None
778
- if not user_text and not doc_info:
779
- return "", format_chat_html(history), history, "Status: Please type a message or upload a file.", None, user_id
780
-
781
- # build combined input
782
- llm_input = user_text
783
- if doc_info and "content" in doc_info:
784
- llm_input = f"Based on the document content below, answer the question.\n\n---DOCUMENT---\n{doc_info['content'][:6000]}\n---END DOCUMENT---\n\nQuestion: {user_text}"
785
-
786
- forced = {"Student": "student", "University-Research": "university", "Company": "company"}.get(audience_choice)
787
- route = route_intent(llm_input, forced_audience=forced)
788
- status_text = f"**Audience**: {route.audience} | **Intent**: {route.intent} | **Lang**: {route.language.upper()}"
789
-
790
- # quick CV skills if provided
791
- cv_profile = doc_info.get("profile") if doc_info and "profile" in doc_info else {"skills": [], "experience": []}
792
-
793
- # Decide RAG target based on intent / keywords
794
- text_lower = (user_text or "").lower()
795
- wants_project = any(k in text_lower for k in KEYS.get("project_query", []))
796
- wants_job = any(k in text_lower for k in KEYS.get("company_post_jobs", [])) or any(k in text_lower for k in KEYS.get("apply_job_opportunity", []))
797
- wants_opp = any(k in text_lower for k in KEYS.get("student_internships", []))
798
-
799
- final_answer = ""
800
- retrieved = []
801
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
802
  try:
803
- if wants_project or route.intent == "project_query":
804
- final_answer, retrieved = rag_answer(user_text or (doc_info or {}).get("content",""), "Project", top_k=5)
805
- if not final_answer:
806
- final_answer = KB.get("student_internships", {}).get(route.language, "") if route.intent == "student_internships" else ""
807
- elif wants_job or route.intent == "apply_job_opportunity" or route.intent == "company_post_jobs":
808
- # try jobs first
809
- final_answer, retrieved = rag_answer(user_text or (doc_info or {}).get("content",""), "Job", top_k=5)
810
- if not final_answer:
811
- # fallback to ops
812
- final_answer, retrieved = rag_answer(user_text or (doc_info or {}).get("content",""), "Opportunities", top_k=5)
813
- elif wants_opp or route.intent == "student_internships":
814
- final_answer, retrieved = rag_answer(user_text or (doc_info or {}).get("content",""), "Opportunities", top_k=5)
815
- else:
816
- # default LLM response with context
817
- final_answer = call_llm(llm_input, history, route) or KB.get("student_registration", {}).get(route.language, "Sorry, I don't have info.")
818
  except Exception as e:
819
- print(f"[Respond Error] {e}")
820
- final_answer = call_llm(llm_input, history, route) or KB.get("student_registration", {}).get(route.language, "")
821
-
822
- # --- Application flow: if user indicated they want to apply (keywords) ---
823
- wants_apply = any(kw in text_lower for kw in KEYS.get("apply_job_opportunity", []))
824
- app_summary = ""
825
- generated_cover = ""
826
- if wants_apply:
827
- target = retrieved[0] if retrieved else None
828
- cover_text = ""
829
- if target:
830
- # build cover letter prompt using target and CV text if present
831
- cover_prompt = f"Write a concise 3-paragraph cover letter applying for this role:\nRole details: {json.dumps(target, ensure_ascii=False)[:1500]}\n"
832
- if doc_info and "content" in doc_info:
833
- cover_prompt += f"\nApplicant CV summary: {doc_info['content'][:2000]}\n"
834
- cover_prompt += "\nWrite the letter in the same language as the user."
835
- try:
836
- resp = llm_client.chat.completions.create(
837
- model=MODEL_NAME,
838
- messages=[{"role":"system","content":SYSTEM_PROMPT_BASE},{"role":"user","content":cover_prompt}],
839
- temperature=0.3,
840
- max_tokens=512
841
- )
842
- cover_text = resp.choices[0].message.content or ""
843
- except Exception as e:
844
- print(f"[Cover generation error] {e}")
845
- cover_text = "I can help craft a cover letter, but an error occurred while generating it."
846
-
847
- application = {
848
- "applicationId": str(uuid.uuid4()),
849
- "jobId": target.get("id") or target.get("jobId"),
850
- "opportunityId": target.get("id") or target.get("opportunityId"),
851
- "applicantName": "Unknown",
852
- "applicantEmail": "Unknown",
853
- "coverLetter": cover_text,
854
- "cvText": doc_info.get("content","")[:4000] if doc_info else "",
855
- "createdAt": datetime.datetime.now().isoformat()
856
- }
857
- saved = save_application_to_weaviate(application)
858
- app_summary = "✅ Application prepared and saved." if saved else "⚠️ Application could not be saved."
859
- generated_cover = cover_text
860
- else:
861
- app_summary = "لم أجد وظيفة/فرصة مناسبة تلقائياً من النتائج. أرسل عنوان الوظيفة أو اختر من النتيجة."
862
-
863
- # --- Team creation flow (join_team) ---
864
- team_created_msg = ""
865
- wants_team = any(k in text_lower for k in KEYS.get("join_team", []))
866
- if wants_team and (wants_project or route.intent == "join_team"):
867
- # attempt to suggest members using cv skills or create empty team entry
868
- suggested_members = []
869
- if cv_profile and cv_profile.get("skills"):
870
- # use skills to search opportunities maybe get studentName fields
871
- matches = query_weaviate_collection("Opportunities", " ".join(cv_profile.get("skills", [])), limit=5)
872
- for m in matches:
873
- name = m.get("studentName") or m.get("student") or m.get("name")
874
- if name:
875
- suggested_members.append(name)
876
- if not suggested_members:
877
- team_props = {
878
- "teamId": str(uuid.uuid4()),
879
- "name": f"Team for project - {uuid.uuid4().hex[:6]}",
880
- "projectId": retrieved[0].get("globalId") if retrieved and retrieved[0].get("globalId") else None,
881
- "members": [],
882
- "createdAt": datetime.datetime.utcnow().isoformat()+"Z",
883
- "creatorId": user_id
884
- }
885
- saved_team = save_team_to_weaviate(team_props)
886
- if saved_team:
887
- team_created_msg = f"✅ Team created with id {team_props['teamId']}. يمكنك إضافة أعضاء لاحقًا."
888
- else:
889
- team_created_msg = "⚠️ لم أتمكن من إنشاء الفريق الآن."
890
  else:
891
- team_props = {
892
- "teamId": str(uuid.uuid4()),
893
- "name": f"Team for project - {uuid.uuid4().hex[:6]}",
894
- "projectId": retrieved[0].get("globalId") if retrieved and retrieved[0].get("globalId") else None,
895
- "members": suggested_members,
896
- "createdAt": datetime.datetime.utcnow().isoformat()+"Z",
897
- "creatorId": user_id
898
- }
899
- saved_team = save_team_to_weaviate(team_props)
900
- if saved_team:
901
- team_created_msg = f"✅ Team created with members: {', '.join(suggested_members)}"
902
- else:
903
- team_created_msg = "⚠️ لم أتمكن من إنشاء الفريق الآن."
904
-
905
- # Save a short memory entry
906
- try:
907
- sess = str(uuid.uuid5(uuid.NAMESPACE_DNS, (user_id or "anon") + (user_text or "")[:50]))
908
- mem_text = f"User: {user_text[:300]} | Action: RAG on { 'Project' if wants_project else 'Job' if wants_job else 'Opportunities' if wants_opp else 'LLM' }"
909
- save_memory_to_weaviate(sess, mem_text)
910
- except Exception as e:
911
- print(f"[Memory Save Error] {e}")
912
-
913
- # Prepare final message (answer + top results + app/team status)
914
- message_parts = []
915
- if final_answer:
916
- message_parts.append(final_answer)
917
- if retrieved:
918
- list_lines = []
919
- for item in retrieved[:5]:
920
- title = item.get("title") or item.get("jobTitle") or item.get("globalId") or "No Title"
921
- meta = item.get("companyName") or item.get("topic") or item.get("shortDescription","")
922
- list_lines.append(f"- **{title}** | {meta}")
923
- if list_lines:
924
- message_parts.append("\n\n**Top results:**\n" + "\n".join(list_lines))
925
- if wants_apply:
926
- message_parts.append("\n\n**Application status:** " + app_summary)
927
- if generated_cover:
928
- message_parts.append("\n\n**Generated Cover Letter:**\n" + generated_cover)
929
- if team_created_msg:
930
- message_parts.append("\n\n" + team_created_msg)
931
-
932
- final_message_to_user = "\n\n".join([p for p in message_parts if p])
933
-
934
- # Update history
935
- user_message_for_history = user_text
936
- if doc_info and doc_info.get("filename"):
937
- user_message_for_history += f"\n\n*📎 [File Attached: {doc_info.get('filename')}] *"
938
-
939
- history.append({"role": "user", "content": user_message_for_history})
940
- history.append({"role": "assistant", "content": final_message_to_user or "عذراً، لم أجد إجابة مناسبة الآن."})
941
-
942
- # log interaction
943
- try:
944
- log_interaction(user_text, route, final_message_to_user)
945
- except Exception:
946
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
947
 
948
- # Return values: clear input field, updated html, updated history, status, clear file uploader, keep user_id
949
- return "", format_chat_html(history), history, status_text, None, user_id
950
 
951
- def clear_chat():
952
- return "", [], "Status: Ready.", None, "default_user"
 
 
 
 
 
953
 
954
- # Bind events
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
955
  send_btn.click(
956
- respond,
957
- inputs=[msg, file_uploader, chat_history_state, audience_dd, user_id_state],
958
- outputs=[msg, chatbot_html, chat_history_state, status, file_uploader, user_id_state],
959
  queue=True
960
  )
961
- msg.submit(
962
- respond,
963
- inputs=[msg, file_uploader, chat_history_state, audience_dd, user_id_state],
964
- outputs=[msg, chatbot_html, chat_history_state, status, file_uploader, user_id_state],
 
 
965
  queue=True
966
  )
 
967
  clear_btn.click(
968
- clear_chat,
969
- outputs=[msg, chatbot_html, chat_history_state, status, file_uploader, user_id_state],
 
970
  queue=False
971
  )
972
 
 
973
  if __name__ == "__main__":
974
  demo.launch(debug=True)
 
343
  # if __name__ == "__main__":
344
  # demo.launch(debug=True)
345
 
346
+ اقراء الكود التاني
347
 
 
 
 
348
  from __future__ import annotations
349
  import os
 
 
350
  import re
351
  import uuid
352
+ import json
353
+ import time
354
+ import fitz # PyMuPDF
355
+ import docx
356
+ import markdown
357
+ from datetime import datetime, timezone
358
  from typing import List, Dict, Any, Optional
359
+ from dataclasses import dataclass
360
 
 
361
  import gradio as gr
362
  from openai import OpenAI
 
 
363
  import weaviate
364
  from weaviate.classes.init import Auth
365
  from weaviate.classes.config import Configure, Property, DataType
366
+ from weaviate.classes.query import Filter
367
 
368
  # -------------------- Configuration --------------------
369
  MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
 
374
  WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "ZUd6clB5WmYzVGkxeU40cl96NTY5UkViUlVzY05Md3IzQ0JKelBZQmxGZHRPeGpCeGdxS1FUNnlYUkFFPV92MjAw")
375
 
376
  MEMORY_FILE = os.getenv("MEMORY_FILE", "chat_memory.json")
377
+ LOG_FILE = os.getenv("LOG_FILE", "interaction_logs.json")
 
378
  # -------------------- Clients --------------------
 
379
  llm_client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
380
 
 
381
  weaviate_client = weaviate.connect_to_weaviate_cloud(
382
  cluster_url=WEAVIATE_URL,
383
  auth_credentials=Auth.api_key(WEAVIATE_API_KEY),
384
  )
385
 
386
+ # -------------------- Helpers --------------------
387
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
388
 
389
  SYSTEM_PROMPT_BASE = (
390
  "You are **EduNatives Assistant**, a helpful, friendly, and precise academic/career guide. "
 
403
  "- Ensure that all generated prompts are phrased using positive reinforcement."
404
  )
405
 
 
 
 
 
406
 
407
+ def get_rfc3339_time() -> str:
408
+ """Return current UTC time in RFC3339 format"""
409
+ return datetime.now(UTC).isoformat().replace('+00:00', 'Z')
410
+
411
+ # --- CV Skills Extraction (Regex baseline, can replace with NLP model later) ---
412
+ _SKILL_REGEX = re.compile(r"\b(Natural Language Processing|Building Information Modeling|Search Engine Optimization|Search Engine Marketing|Aerospace Engineering & Management|Computational Fluid Dynamics|Kotlin Multiplatform|Google Cloud Platform|Social Media Marketing|Aerospace Engineering|Microsoft SQL Server|Amazon Web Services|Finite Element Analysis|Technology-based Management|Autodesk Inventor|Emotional Intelligence|Aerospace Engineering & Operations Management|Content Marketing|Presentation Skills|Interpersonal Skills|Critical Thinking|Financial Modeling|Decision Making|Process Improvement|Time Management|Lean Manufacturing|Project Management|Microsoft Excel|Data Visualization|Computer Vision|Machine Learning|Deep Learning|Attention to Detail|Six Sigma|Risk Analysis|Data Analysis|Data Science|Communication|Collaboration|Teamwork|Leadership|Management|Adaptability|Creativity|Innovation|Negotiation|Android|Angular|Ansible|Apache|ArcGIS|Arduino|Asana|ASP\.NET|AutoCAD|Azure|Bash|BIM|Business Analysis|C\+\+|C#|CAM|Cassandra|CATIA|CentOS|Chef|CI/CD|Civil 3D|Content Marketing|CRM|CSS|Data Mining|Django|Docker|Elasticsearch|Email Marketing|ERP|ETABS|ETL|Express\.js|Facebook Ads|Firebase|Flask|Flutter|FPGA|Fusion 360|GCP|Git|GitHub|GitLab|Go|Google Ads|Google Analytics|GraphQL|Hadoop|HTML|HubSpot|iOS|Java|JavaScript|Jenkins|Jira|Jupyter Notebook|Kanban|Keras|Kotlin|Kubernetes|LabVIEW|Laravel|LESS|Linux|Lua|macOS|Marketo|MATLAB|Matplotlib|MongoDB|Multisim|MySQL|Nginx|NLP|Node\.js|NoSQL|Nuxt\.js|NumPy|Next\.js|Objective-C|Oracle Database|Oracle|OrCAD|Pandas|Perl|PHP|PLC|Plotly|PostgreSQL|Power BI|PowerShell|Problem Solving|Puppet|PSpice|Python|PyTorch|Raspberry Pi|React Native|React|Red Hat|Redis|Revit|Ruby on Rails|Ruby|Rust|Salesforce|SAP2000|SAP|Sass|SCADA|Scala|Scikit-learn|Scrum|Seaborn|SEM|SEO|Simulink|SketchUp|Slack|SolidWorks|Spring Boot|SQL|SQLAlchemy|SwiftUI|Swift|Tableau|Terraform|TensorFlow|Trello|TypeScript|Ubuntu|Verilog|VHDL|Vue\.js|Waterfall|Windows|WordPress|Xamarin|Analytical Skills)\b", re.IGNORECASE)
413
+
414
+ def extract_skills_from_text(cv_text: str) -> List[str]:
415
+ skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(cv_text)})
416
+ return [s.capitalize() for s in skills]
417
+
418
+ # --- Process uploaded file (PDF, DOCX, TXT) ---
419
+ def process_uploaded_file(file_obj: Any) -> dict | None:
420
+ if not file_obj:
421
+ return None
422
+ file_path = file_obj.name
423
+ filename = os.path.basename(file_path)
424
+ text_content = ""
425
+ try:
426
+ if filename.lower().endswith(".pdf"):
427
+ with fitz.open(file_path) as doc:
428
+ for page in doc:
429
+ text_content += page.get_text()
430
+ elif filename.lower().endswith(".docx"):
431
+ docp = docx.Document(file_path)
432
+ for p in docp.paragraphs:
433
+ text_content += p.text + "\n"
434
+ elif filename.lower().endswith(".txt"):
435
+ with open(file_path, "r", encoding="utf-8") as f:
436
+ text_content = f.read()
437
+ else:
438
+ return {"error": f"Unsupported file type: {filename}"}
439
+
440
+ skills = extract_skills_from_text(text_content)
441
+ return {"content": text_content.strip(), "skills": skills, "filename": filename}
442
+
443
+ except Exception as e:
444
+ return {"error": f"Error processing file {filename}: {e}"}
445
  ARABIC_RANGE = (
446
  (0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
447
  (0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
 
455
  return True
456
  return False
457
 
458
+ # --- Chat history HTML formatter (for Gradio) ---
459
  def format_chat_html(history: List[Dict[str, str]]) -> str:
460
  html = "<div class='chatbot'>"
461
+ for msg in history:
462
+ role = msg["role"]
463
+ content = msg["content"]
464
  if role == "user":
465
  html += f"<div class='user-bubble'>{content}</div>"
466
+ else:
467
  html_content = markdown.markdown(content, extensions=['tables'])
468
  html += f"<div class='bot-bubble'>{html_content}</div>"
469
  html += "</div>"
470
  return html
471
+ # ================================
472
+ # Part 2 — Weaviate Collections + Query + RAG
473
+ # ================================
474
 
475
+ # -------------------- Ensure collections --------------------
476
+ def ensure_collections():
477
+ # Application collection
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
  if not weaviate_client.collections.exists("Application"):
479
  weaviate_client.collections.create(
480
  name="Application",
481
  properties=[
482
  Property(name="applicationId", data_type=DataType.TEXT),
483
  Property(name="jobId", data_type=DataType.TEXT),
 
484
  Property(name="applicantName", data_type=DataType.TEXT),
485
  Property(name="applicantEmail", data_type=DataType.TEXT),
486
  Property(name="coverLetter", data_type=DataType.TEXT),
487
  Property(name="cvText", data_type=DataType.TEXT),
488
+ Property(name="skills", data_type=DataType.TEXT_ARRAY),
489
  Property(name="createdAt", data_type=DataType.DATE),
490
  ],
491
  vectorizer_config=Configure.Vectorizer.none()
492
  )
493
 
494
+ # Team collection
495
+ if not weaviate_client.collections.exists("Team"):
496
+ weaviate_client.collections.create(
497
+ name="Team",
498
+ properties=[
499
+ Property(name="teamId", data_type=DataType.TEXT),
500
+ Property(name="name", data_type=DataType.TEXT),
501
+ Property(name="projectId", data_type=DataType.TEXT),
502
+ Property(name="members", data_type=DataType.TEXT_ARRAY),
503
+ Property(name="skills", data_type=DataType.TEXT_ARRAY),
504
+ Property(name="createdAt", data_type=DataType.DATE),
505
+ Property(name="creatorId", data_type=DataType.TEXT),
506
+ ],
507
+ vectorizer_config=Configure.Vectorizer.none()
508
+ )
509
+
510
+ # Memory collection
511
  if not weaviate_client.collections.exists("Memory"):
512
  weaviate_client.collections.create(
513
  name="Memory",
 
520
  vectorizer_config=Configure.Vectorizer.none()
521
  )
522
 
523
+ ensure_collections()
524
 
525
+ # -------------------- Query Weaviate --------------------
526
  def query_weaviate_collection(class_name: str, query_text: str, limit: int = 5) -> List[dict]:
 
 
 
527
  try:
528
  collection = weaviate_client.collections.get(class_name)
529
 
530
+ # BM25 keyword search
531
+ response = collection.query.bm25(query=query_text, limit=limit)
532
+ items = [obj.properties for obj in response.objects]
533
 
534
+ # fallback filter if nothing found
 
 
535
  if not items:
536
+ filters = Filter.any_of([
537
+ Filter.by_property("title").like(f"*{query_text}*"),
538
+ Filter.by_property("skills").like(f"*{query_text}*")
539
+ ])
540
+ response_fallback = collection.query.fetch_objects(limit=limit, filters=filters)
541
+ items = [obj.properties for obj in response_fallback.objects]
542
 
543
  return items
544
  except Exception as e:
545
+ print(f"[Weaviate Query Error] {e}")
546
  return []
547
 
548
+ # -------------------- RAG Prompt Builder --------------------
549
  def build_rag_prompt(user_question: str, retrieved_items: List[dict], class_name: str) -> str:
550
+ context_parts = []
 
551
  for i, item in enumerate(retrieved_items, 1):
552
  if class_name == "Job":
553
+ details = {
554
+ "Title": item.get("title"),
555
+ "Company": item.get("companyName"),
556
+ "Job Type": item.get("jobType"),
557
+ "Employment": ", ".join(item.get("employmentType", [])),
558
+ "Location": item.get("workplaceType"),
559
+ "Description": item.get("description"),
560
+ "Skills": item.get("skills", []),
561
+ "Requirements": item.get("requirements"),
562
+ "Salary": str(item.get("salaryDetails", {})),
563
+ }
564
  else:
565
+ details = {k: str(v) for k, v in item.items()}
566
+
567
+ item_str = f"--- Record {i} ---\n{json.dumps(details, indent=2, ensure_ascii=False)}"
568
+ context_parts.append(item_str)
569
+
570
+ context_block = "\n\n".join(context_parts)
571
 
572
+ return f"""
573
+ User Question: "{user_question}"
574
+ You are an expert AI assistant and a skilled data analyst. Your primary mission is to take structured data (in JSON format), analyze it completely, and present all its information to the user in a clear, comprehensive, and conversational summary.
575
+
576
+ **Primary Directive:** Your ONLY source of information for this task is the structured JSON data provided below under "Retrieved Data". If the data section is empty, you must state that no results were found that match the search and stop. Do not use your general knowledge under any circumstances.
577
+
578
+ **Your Core Instructions:**
579
+ 1. **Analyze the Entire Object:** When you receive a JSON object, your first step is to read and understand every single key and value, including nested objects and arrays. Do not ignore any piece of information.
580
+ 2. **Group Related Information:** Organize your output logically. For example, group company details together, role requirements together, dates and deadlines together, etc. Use clear Markdown headings (`###`) for these logical groups to improve readability.
581
+ 3. **Convert Data into Natural Language:** Do not just list the data. Convert it into readable, engaging sentences. For example, instead of `workplaceType: "HYBRID"`, say "This is a hybrid role, which offers the flexibility of working both remotely and from the office."
582
+ 4. **Handle All Data Types Intelligently:**
583
+ * For **arrays** (like `skills` or `categories`), list them as clear bullet points or integrate them into a sentence.
584
+ * For **nested objects** (like `salaryDetails` or `careerLevel`), explain the contents of the object clearly.
585
+ * For **booleans** (true/false), explain their meaning in context (e.g., `published: true` should be "This position is currently published and accepting applications.").
586
+ 5. **Add a Concluding Call to Action:** After presenting all the details, conclude with a helpful "Next Steps" or "How to Apply" section. For a job, this should be a practical guide. For a project, it might be "How to Get Involved."
587
+ """
588
+
589
+ # -------------------- RAG Answer --------------------
590
  def rag_answer(user_question: str, class_name: str, top_k: int = 5) -> (str, List[dict]):
591
  retrieved = query_weaviate_collection(class_name, user_question, limit=top_k)
592
  if not retrieved:
593
  return "", []
594
+
595
  prompt = build_rag_prompt(user_question, retrieved, class_name)
596
+
597
  try:
598
  resp = llm_client.chat.completions.create(
599
  model=MODEL_NAME,
 
601
  {"role": "system", "content": SYSTEM_PROMPT_BASE},
602
  {"role": "user", "content": prompt}
603
  ],
604
+ temperature=0.3,
605
+ max_tokens=4096
606
  )
607
  answer = resp.choices[0].message.content or ""
608
  except Exception as e:
609
  print(f"[RAG LLM Error] {e}")
610
  answer = ""
611
+
612
  return answer, retrieved
613
+ # ================================
614
+ # ================================
615
+ # Part 3 — Conversation State Machine + Embedding Recommendations
616
+ # ================================
617
 
618
+ import numpy as np # used for cosine similarity in recommendations
 
 
 
 
 
 
 
 
 
 
 
 
619
 
620
+ # -------------------- Embedding helpers --------------------
621
+ def compute_embedding(text: str) -> List[float]:
622
+ """
623
+ Compute embedding using the OpenAI-compatible client (DeepInfra).
624
+ Returns list[float] or empty list on failure.
625
+ """
626
  try:
627
+ resp = llm_client.embeddings.create(
628
+ model="Qwen/Qwen3-Embedding-8B",
629
+ input=text,
630
+ encoding_format="float"
631
+ )
632
+ # resp may be a dict-like object; handle safe access
633
+ if isinstance(resp, dict):
634
+ data = resp.get("data", [])
635
+ if data and isinstance(data[0], dict):
636
+ return data[0].get("embedding", [])
637
+ # some clients return objects with attributes
638
+ if hasattr(resp, "data") and resp.data:
639
+ return resp.data[0].embedding
640
  except Exception as e:
641
+ print("[compute_embedding] error:", e)
642
+ return []
643
 
644
+ def cosine_similarity(a: List[float], b: List[float]) -> float:
645
  try:
646
+ va = np.array(a, dtype=float)
647
+ vb = np.array(b, dtype=float)
648
+ if va.size == 0 or vb.size == 0:
649
+ return 0.0
650
+ denom = (np.linalg.norm(va) * np.linalg.norm(vb))
651
+ if denom == 0:
652
+ return 0.0
653
+ return float(np.dot(va, vb) / denom)
654
  except Exception as e:
655
+ print("[cosine_similarity] error:", e)
656
+ return 0.0
657
 
658
+ # -------------------- Recommendations by embedding --------------------
659
+ def recommend_jobs_by_embedding(cv_text: str, top_k: int = 5, jobs_fetch_limit: int = 200) -> str:
660
  """
661
+ 1) Extract a short user representation (skills or first 200 chars)
662
+ 2) Compute user embedding
663
+ 3) Fetch Job objects from Weaviate (BM25 or fetch_objects)
664
+ 4) For each job build a short text (skills + description), compute embedding, score by cosine
665
+ 5) Return formatted list with full job details (title, company, skills, salary, description, score)
666
+ Note: This routine computes embeddings on-the-fly for jobs; for large scale you should precompute and store job embeddings.
667
  """
668
+ # prepare user text
669
+ skills = extract_skills_from_text(cv_text or "")
670
+ user_text = " ".join(skills) if skills else (cv_text or "")[:500]
671
+ user_emb = compute_embedding(user_text)
672
+ if not user_emb:
673
+ return "⚠️ Unable to compute embedding for your CV. Try again or check API keys."
674
+
675
+ # fetch jobs from weaviate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
676
  try:
677
+ jobs_col = weaviate_client.collections.get("Job")
678
+ fetched = jobs_col.query.fetch_objects(limit=jobs_fetch_limit)
679
+ if not fetched.objects:
680
+ return "⚠️ No jobs found in the database."
 
 
 
 
681
  except Exception as e:
682
+ print("[recommend_jobs_by_embedding] Weaviate fetch error:", e)
683
+ return "⚠️ Could not fetch jobs from the database."
684
+
685
+ scored_jobs = []
686
+ for obj in fetched.objects:
687
+ props = obj.properties
688
+ # build job text for embedding: skills + title + description (truncate to reasonable length)
689
+ job_text_parts = []
690
+ if props.get("skills"):
691
+ job_text_parts.append(" ".join(props.get("skills")))
692
+ if props.get("title"):
693
+ job_text_parts.append(props.get("title"))
694
+ if props.get("description"):
695
+ job_text_parts.append((props.get("description") or "")[:2000])
696
+ job_text = " ".join(job_text_parts).strip() or (props.get("title") or "")
697
+
698
+ job_emb = compute_embedding(job_text)
699
+ if not job_emb:
700
+ # skip if embedding failed
701
+ continue
702
+ score = cosine_similarity(user_emb, job_emb)
703
+ scored_jobs.append((score, props))
704
+
705
+ if not scored_jobs:
706
+ return "⚠️ No jobs could be embedded / compared."
707
+
708
+ # sort and pick top_k
709
+ scored_jobs.sort(key=lambda x: x[0], reverse=True)
710
+ top = scored_jobs[:top_k]
711
+
712
+ # format rich output with full details
713
+ lines = []
714
+ for score, props in top:
715
+ title = props.get("title", "No title")
716
+ company = props.get("companyName", "Unknown company")
717
+ job_id = props.get("jobId", "")
718
+ salary = props.get("salary") or props.get("salaryDetails") or "Not specified"
719
+ skills_list = props.get("skills") or []
720
+ description = (props.get("description") or "").strip()
721
+ # include a truncated description and the full skills list
722
+ lines.append(
723
+ f"**{title}** at *{company}* \n"
724
+ f"- Job ID: `{job_id}` \n"
725
+ f"- Score: {score:.3f} \n"
726
+ f"- Salary: {salary} \n"
727
+ f"- Skills: {skills_list} \n"
728
+ f"- Description: {description[:600]}{'...' if len(description)>600 else ''} \n"
729
+ f"---"
730
  )
 
 
 
 
731
 
732
+ return "\n\n".join(lines)
 
 
 
 
 
 
 
 
 
733
 
734
+ # -------------------- Conversation Session helpers --------------------
735
+ def initial_session() -> dict:
736
+ """
737
+ structure:
738
+ {
739
+ "state": "idle" | "apply_name" | "apply_email" | "apply_cover" | "apply_wait_cv" | "apply_jobtitle" | "apply_confirm" |
740
+ "team_action" | "team_create_name" | "team_create_owner" | "team_create_skills" | "team_create_course" | "team_create_idea" |
741
+ "team_join_name" | "team_join_member" | "team_join_skills" |
742
+ "recommend_wait_cv"
743
+ "data": { ... collected fields ... }
744
+ }
745
+ """
746
+ return {"state": "idle", "data": {}}
747
 
748
+ def handle_uploaded_cv_for_session(session: dict, uploaded_file: Any) -> (str, dict):
749
+ """
750
+ Called when user uploads a CV while in a certain flow.
751
+ Returns (bot_message, updated_session)
752
+ """
753
+ if not uploaded_file:
754
+ return "⚠️ No file received.", session
755
+
756
+ doc_info = process_uploaded_file(uploaded_file)
757
+ if not doc_info or "error" in (doc_info or {}):
758
+ return f"⚠️ Error processing uploaded CV: {doc_info.get('error') if doc_info else 'unknown error'}", session
759
+
760
+ # store processed text & skills in session
761
+ session["data"]["cvText"] = doc_info.get("content", "")
762
+ session["data"]["cvSkills"] = doc_info.get("profile", {}).get("skills", [])
763
+ # If session state expects next step, return appropriate prompt
764
+ st = session.get("state")
765
+ if st == "apply_wait_cv":
766
+ session["state"] = "apply_jobtitle"
767
+ detected = session["data"]["cvSkills"]
768
+ return f"CV received. Detected skills: {detected}. Which job title do you want to apply for? (type job title or 'any')", session
769
+ if st == "recommend_wait_cv":
770
+ # compute recommendations and finish the flow
771
+ rec_text = recommend_jobs_by_embedding(session["data"]["cvText"], top_k=5)
772
+ session = initial_session()
773
+ return f"Here are recommended jobs based on your CV:\n\n{rec_text}", session
774
+
775
+ # default
776
+ return "CV uploaded and processed. What would you like to do next?", session
777
+
778
+ # -------------------- Main message handler (state machine) --------------------
779
+ def handle_user_message(session: dict, user_text: str, uploaded_file: Any = None) -> (str, dict, bool):
780
+ """
781
+ Main conversation handler.
782
+ Returns (bot_reply, new_session, show_file_uploader).
783
+ """
784
+ session = session or initial_session()
785
+ st = session.get("state", "idle")
786
+ text = (user_text or "").strip()
787
+
788
+ # quick reset
789
+ if text.lower() in ("cancel", "exit", "quit", "restart", "reset"):
790
+ return "Conversation reset. How can I help you now?", initial_session(), False
791
+
792
+ # file upload
793
+ if uploaded_file:
794
+ bot_msg, new_session = handle_uploaded_cv_for_session(session, uploaded_file)
795
+ return bot_msg, new_session, False
796
+
797
+ # ========== IDLE STATE ==========
798
+ if st == "idle":
799
+ low = text.lower()
800
+
801
+ # 1) greetings
802
+ if low in ("hi", "hello", "hey", "مرحبا", "ازيك", "السلام عليكم"):
803
+ return "👋 Hello! How can I support you today? You can ask about jobs, teams, or recommendations.", session, False
804
+ if low in ["who are you?", "who are you", "انت مين", "من انت"]:
805
+ return ("👋 I am EduNatives Assistant — your friendly academic and career guide. ""I help students, universities, and companies connect through opportunities, projects, and mentoring.",
806
+ session,
807
+ False
808
+ )
809
+ route = route_intent(text)
810
+ kb_ans = kb_fallback(route)
811
+ if kb_ans:
812
+ return kb_ans, session, False
813
+ try:
814
+ rag_ans, _ = rag_answer(text, "Job", top_k=5)
815
+ if rag_ans:
816
+ return rag_ans, session, False
817
+ except Exception as e:
818
+ print("[handle_user_message] rag error:", e)
819
+ try:
820
+ resp = llm_client.chat.completions.create(
821
+ model=MODEL_NAME,
822
+ messages=[
823
+ {"role": "system", "content": SYSTEM_PROMPT_BASE},
824
+ {"role": "user", "content": text}
825
+ ],
826
+ temperature=0.3,
827
+ max_tokens=4096
828
+ )
829
+ except Exception as e:
830
+ print("[handle_user_message] free LLM error:", e)
831
+ return "⚠️ Sorry, I couldn't process that. Try again later.", session, False
832
 
 
 
 
 
833
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
834
 
835
+ # 2) flows
836
+ if any(k in low for k in ["apply", "i want to apply", "i'd like to apply", "أريد التقديم", "عايز اقدم", "اريد التقديم"]):
837
+ session["state"] = "apply_name"
838
+ session["data"] = {}
839
+ return "Okay — let's start your application. What's your full name?", session, False
840
+
841
+ if any(k in low for k in ["team", "create team", "join team", "create", "join", "انضم", "انشاء فريق"]):
842
+ session["state"] = "team_action"
843
+ session["data"] = {}
844
+ return "Do you want to create a team or join an existing team? (reply 'create' or 'join')", session, False
845
+
846
+ if any(k in low for k in ["recommend", "recommendation", "jobs for me", "رشح", "ترشيح", "recommend me jobs"]):
847
+ session["state"] = "recommend_wait_cv"
848
+ session["data"] = {}
849
+ return "Please upload your CV to get job recommendations (use the Upload button).", session, True
850
+
851
+ # 3) Knowledge Base fallback
852
+ route = route_intent(text)
853
+ kb_answer = kb_fallback(route)
854
+ if kb_answer:
855
+ return kb_answer, session, False
856
+
857
+ # 4) RAG (jobs search)
858
  try:
859
+ rag_ans, _ = rag_answer(text, "Job", top_k=5)
860
+ if rag_ans:
861
+ return rag_ans, session, False
 
 
 
 
 
 
 
 
 
 
 
 
862
  except Exception as e:
863
+ print("[handle_user_message] rag error:", e)
864
+
865
+ # 5) ultimate fallback
866
+ return "Sorry I didn't understand that. You can say 'apply', 'create team', 'join team' or 'recommend'.", session, False
867
+
868
+ # ========== APPLY FLOW ==========
869
+ if st == "apply_name":
870
+ session["data"]["applicantName"] = text or "Applicant"
871
+ session["state"] = "apply_email"
872
+ return "Thanks. What's your email address?", session, False
873
+
874
+ if st == "apply_email":
875
+ m = re.search(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", text)
876
+ session["data"]["applicantEmail"] = m.group(1) if m else text
877
+ session["state"] = "apply_cover"
878
+ return "Got it. Please type a short cover letter (or type 'skip' to skip).", session, False
879
+
880
+ if st == "apply_cover":
881
+ if text.lower() != "skip":
882
+ session["data"]["coverLetter"] = text
883
+ else:
884
+ session["data"]["coverLetter"] = ""
885
+ session["state"] = "apply_wait_cv"
886
+ return "Please upload your CV now (use the Upload button).", session, True
887
+
888
+ if st == "apply_jobtitle":
889
+ session["data"]["targetJobTitle"] = text
890
+ found = query_weaviate_collection("Job", text, limit=3)
891
+ cv_skills = [s.lower() for s in session["data"].get("cvSkills", [])]
892
+ if found:
893
+ job = found[0]
894
+ job_skills = [s.lower() for s in (job.get("skills") or [])]
895
+ overlap = len([s for s in cv_skills if s in job_skills])
896
+ session["data"]["targetJobId"] = job.get("jobId")
897
+ session["state"] = "apply_confirm"
898
+ if overlap > 0:
899
+ return (f"I found a job: {job.get('title')} at {job.get('companyName')}. "
900
+ f"Detected {overlap} overlapping skills. Do you want to confirm application? (yes/no)"), session, False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
901
  else:
902
+ return (f"I found {job.get('title')} at {job.get('companyName')}, but your CV skills do not overlap. "
903
+ "Do you still want to proceed? (yes/no)"), session, False
904
+ else:
905
+ session["data"]["targetJobId"] = None
906
+ session["state"] = "apply_confirm"
907
+ return f"I couldn't find a job with that title. Do you want to apply for '{text}' anyway? (yes/no)", session, False
908
+
909
+ if st == "apply_confirm":
910
+ if text.lower() in ("yes", "y", "نعم"):
911
+ app = {
912
+ "applicationId": str(uuid.uuid4()),
913
+ "jobId": session["data"].get("targetJobId"),
914
+ "applicantName": session["data"].get("applicantName"),
915
+ "applicantEmail": session["data"].get("applicantEmail"),
916
+ "coverLetter": session["data"].get("coverLetter", ""),
917
+ "cvText": session["data"].get("cvText", ""),
918
+ "skills": session["data"].get("cvSkills", []),
919
+ "createdAt": get_rfc3339_time()
920
+ }
921
+ ok = save_application_to_weaviate(app)
922
+ session = initial_session()
923
+ return ("🎉 Your application has been submitted successfully. Good luck!" if ok
924
+ else "⚠️ Failed to save application. Please try again later."), session, False
925
+ else:
926
+ session = initial_session()
927
+ return "Application cancelled. If you want to do something else, tell me.", session, False
928
+
929
+ # ========== TEAM FLOW ==========
930
+ if st == "team_action":
931
+ low = text.lower()
932
+ if "create" in low or "إنشاء" in low:
933
+ session["state"] = "team_create_name"
934
+ session["data"] = {}
935
+ return "Great — what's the team name?", session, False
936
+ if "join" in low or "انضم" in low:
937
+ session["state"] = "team_join_name"
938
+ session["data"] = {}
939
+ return "Okay what's the name of the team you want to join?", session, False
940
+ return "Please say 'create' to create a team or 'join' to join a team.", session, False
941
+
942
+ if st == "team_create_name":
943
+ session["data"]["team_name"] = text
944
+ session["state"] = "team_create_owner"
945
+ return "Team name saved. Who is the team owner (your name)?", session, False
946
+
947
+ if st == "team_create_owner":
948
+ session["data"]["owner"] = text
949
+ session["state"] = "team_create_skills"
950
+ return "Owner saved. Please list the team's skills (comma-separated).", session, False
951
+
952
+ if st == "team_create_skills":
953
+ session["data"]["skills"] = [s.strip() for s in text.split(",") if s.strip()]
954
+ session["state"] = "team_create_course"
955
+ return "Skills saved. (Optional) Enter course/subject name or type 'skip'.", session, False
956
+
957
+ if st == "team_create_course":
958
+ session["data"]["course"] = "" if text.lower() == "skip" else text
959
+ session["state"] = "team_create_idea"
960
+ return "Please write a short idea/description for the project.", session, False
961
+
962
+ if st == "team_create_idea":
963
+ session["data"]["idea"] = text
964
+ team_props = {
965
+ "teamId": str(uuid.uuid4()),
966
+ "name": session["data"].get("team_name"),
967
+ "projectId": None,
968
+ "members": [session["data"].get("owner")],
969
+ "skills": session["data"].get("skills", []),
970
+ "creatorId": session["data"].get("owner"),
971
+ "createdAt": get_rfc3339_time(),
972
+ "idea": session["data"].get("idea", "")
973
+ }
974
+ saved = save_team_to_weaviate(team_props)
975
+ session = initial_session()
976
+ return (f"🎉 Team '{team_props['name']}' created! Members: {team_props['members']}" if saved
977
+ else "⚠️ Failed to create team. Try again later."), session, False
978
+
979
+ if st == "team_join_name":
980
+ session["data"]["team_name"] = text
981
+ session["state"] = "team_join_member"
982
+ return "What's your name (to add you to the team)?", session, False
983
+
984
+ if st == "team_join_member":
985
+ session["data"]["member_name"] = text
986
+ session["state"] = "team_join_skills"
987
+ return "Enter your skills (comma-separated).", session, False
988
+
989
+ if st == "team_join_skills":
990
+ skills = [s.strip() for s in text.split(",") if s.strip()]
991
+ resp = update_team_add_member(session["data"].get("team_name"), session["data"].get("member_name"), skills)
992
+ session = initial_session()
993
+ return resp, session, False
994
+
995
+ # ========== RECOMMEND FLOW ==========
996
+ if st == "recommend_wait_cv":
997
+ return "Please upload your CV (use the Upload button).", session, True
998
+
999
+ # fallback
1000
+ return "Sorry — I didn't understand that. You can say 'apply', 'create team', 'join team' or 'recommend'.", session, False
1001
+
1002
+
1003
+ # ================================
1004
+ # Part 4 — Gradio Chat UI wiring
1005
+ # ================================
1006
+
1007
+ import atexit
1008
+ # ensure Weaviate connection closes when the app exits
1009
+ atexit.register(lambda: weaviate_client.close())
1010
+
1011
+ # initial session state per user
1012
+ def create_initial_session_for_state():
1013
+ return initial_session()
1014
+
1015
+ # helper to append to chat history (list of dicts)
1016
+ def append_to_history(history: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
1017
+ history = history or []
1018
+ history.append({"role": role, "content": content})
1019
+ return history
1020
+
1021
+ # UI
1022
+ with gr.Blocks(css="""
1023
+ .chatbot {height: 520px; overflow: auto;}
1024
+ .user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
1025
+ .bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
1026
+ .chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
1027
+ """) as demo:
1028
 
1029
+ gr.Markdown("# 💬 EduNatives Conversational Job Portal")
 
1030
 
1031
+ # chat HTML (we use custom formatted HTML)
1032
+ chat_html = gr.HTML(format_chat_html([]))
1033
+
1034
+ # input row
1035
+ with gr.Row(elem_classes="chatbox-container"):
1036
+ user_input = gr.Textbox(placeholder="Type your message here (e.g. 'apply', 'create team', 'recommend')", lines=2)
1037
+ send_btn = gr.Button("Send", variant="primary")
1038
 
1039
+ # file upload row (initially hidden). We'll show/hide it dynamically.
1040
+ with gr.Row(visible=False) as file_row:
1041
+ cv_uploader = gr.File(label="Upload CV (.pdf/.docx/.txt)", file_count="single", file_types=[".pdf", ".docx", ".txt"], visible=False)
1042
+ upload_btn = gr.Button("Upload CV", visible=False)
1043
+
1044
+ # control buttons
1045
+ with gr.Row():
1046
+ clear_btn = gr.Button("Reset Conversation")
1047
+ instructions = gr.Markdown("Commands: `apply`, `create team`, `join team`, `recommend` — the bot will guide you step-by-step.")
1048
+
1049
+ # persistent state across turns
1050
+ chat_history_state = gr.State([])
1051
+ session_state = gr.State(create_initial_session_for_state())
1052
+
1053
+ # -------------------- handlers --------------------
1054
+ def handle_send(message: str, history: List[Dict[str, str]], session: dict):
1055
+ """
1056
+ Called when the user presses Send.
1057
+ - calls handle_user_message(session, message, uploaded_file=None)
1058
+ - updates history, session, and indicates whether uploader should be shown
1059
+ """
1060
+ history = history or []
1061
+ session = session or initial_session()
1062
+
1063
+ # append user message
1064
+ if message and message.strip():
1065
+ history = append_to_history(history, "user", message.strip())
1066
+
1067
+ bot_reply, new_session, show_uploader = handle_user_message(session, message or "" , uploaded_file=None)
1068
+
1069
+ history = append_to_history(history, "assistant", bot_reply or "…")
1070
+ # Render HTML
1071
+ html = format_chat_html(history)
1072
+
1073
+ # update session state
1074
+ return "", html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
1075
+
1076
+ def handle_upload(file_obj, history: List[Dict[str, str]], session: dict):
1077
+ """
1078
+ Called when the user presses Upload CV (after selecting a file).
1079
+ The session should be in a state that expects a CV (e.g. apply_wait_cv or recommend_wait_cv).
1080
+ """
1081
+ history = history or []
1082
+ session = session or initial_session()
1083
+
1084
+ # Show the filename in user message
1085
+ filename = getattr(file_obj, "name", "uploaded_file")
1086
+ history = append_to_history(history, "user", f"📎 Uploaded file: {filename}")
1087
+
1088
+ # route file into the handler: pass file to handle_user_message
1089
+ bot_reply, new_session, show_uploader = handle_user_message(session, "", uploaded_file=file_obj)
1090
+
1091
+ history = append_to_history(history, "assistant", bot_reply or "…")
1092
+ html = format_chat_html(history)
1093
+
1094
+ # after upload we usually hide uploader (unless the bot again asks for more files)
1095
+ return html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
1096
+
1097
+ def handle_reset(history, session):
1098
+ # clear everything
1099
+ new_hist = []
1100
+ new_session = initial_session()
1101
+ html = format_chat_html(new_hist)
1102
+ return html, new_hist, new_session, gr.update(visible=False), gr.update(visible=False)
1103
+
1104
+ # -------------------- event wiring --------------------
1105
  send_btn.click(
1106
+ fn=handle_send,
1107
+ inputs=[user_input, chat_history_state, session_state],
1108
+ outputs=[user_input, chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
1109
  queue=True
1110
  )
1111
+
1112
+ # upload button is shown/hidden by the send handler; user selects a file in cv_uploader then presses Upload CV
1113
+ upload_btn.click(
1114
+ fn=handle_upload,
1115
+ inputs=[cv_uploader, chat_history_state, session_state],
1116
+ outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
1117
  queue=True
1118
  )
1119
+
1120
  clear_btn.click(
1121
+ fn=handle_reset,
1122
+ inputs=[chat_history_state, session_state],
1123
+ outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
1124
  queue=False
1125
  )
1126
 
1127
+ # launch
1128
  if __name__ == "__main__":
1129
  demo.launch(debug=True)