Spaces:

afouda
/

EduNativesChatbot

Runtime error

App Files Files Community

afouda commited on Sep 15, 2025

Commit

a9a9fa8

verified ·

1 Parent(s): ec184cf

Update app.py

Browse files

Files changed (1) hide show

app.py +159 -748

app.py CHANGED Viewed

@@ -1,766 +1,177 @@
-# edunatives_full.py
-from __future__ import annotations
-import os
-import re
-import uuid
 import json
-import time
-import fitz  # PyMuPDF
 import docx
-import markdown
-from datetime import datetime, timezone
-from typing import List, Dict, Any, Optional, Tuple
-from dataclasses import dataclass
 import gradio as gr
 from openai import OpenAI
-import weaviate
-from weaviate.auth import AuthApiKey
-import numpy as np
-# -------------------- Configuration (edit these or set env vars) --------------------
-MODEL_NAME = os.getenv("MODEL_NAME", "openai/gpt-oss-120b")
-DEEPINFRA_API_KEY = os.getenv("DEEPINFRA_API_KEY", "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa")
-BASE_URL = os.getenv("BASE_URL", "https://api.deepinfra.com/v1/openai")
-WEAVIATE_URL = os.getenv("WEAVIATE_URL", "htorgbgpt4w63nvf1yeuw.c0.us-west3.gcp.weaviate.cloud")
-WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "ZUd6clB5WmYzVGkxeU40cl96NTY5UkViUlVzY05Md3IzQ0JKelBZQmxGZHRPeGpCeGdxS1FUNnlYUkFFPV92MjAw")
-MEMORY_FILE = os.getenv("MEMORY_FILE", "chat_memory.json")
-LOG_FILE = os.getenv("LOG_FILE", "interaction_logs.json")
-# -------------------- Clients --------------------
-llm_client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
-weaviate_client = weaviate.Client(
     url=WEAVIATE_URL,
-    auth_client_secret=AuthApiKey(api_key=WEAVIATE_API_KEY),
 )
-# -------------------- Helpers & constants --------------------
-ARABIC_RANGE = (
-    (0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
-    (0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
-)
-def is_arabic(text: str) -> bool:
-    for ch in text or "":
-        code = ord(ch)
-        for a, b in ARABIC_RANGE:
-            if a <= code <= b:
-                return True
-    return False
-def get_rfc3339_time() -> str:
-    return datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
-# -------------------- Simple KB --------------------
-KB: Dict[str, Dict[str, str]] = {
-    "student_registration": {
-        "en": (
-            "**How to register / create an account (Student)**\n\n"
-            "1. Go to the EduNatives site and choose Sign Up.\n"
-            "2. Use your university email if possible and verify it.\n"
-            "3. Complete your profile (major, skills, interests).\n"
-            "4. Enable notifications for internships/scholarships."
-        ),
-        "ar": (
-            "**طريقة التسجيل وإنشاء حساب (طلاب)**\n\n"
-            "١. اذهب إلى موقع EduNatives واختر Sign Up.\n"
-            "٢. يفضل استخدام إيميل الجامعة وتأكيده.\n"
-            "٣. أكمل ملفك الشخصي (التخصص، المهارات، الاهتمامات).\n"
-            "٤. فعّل التنبيهات لفرص التدريب والمنح."
-        ),
-    },
-    "student_internships": {
-        "en": (
-            "**Finding internships & scholarships**\n\n"
-            "- Use the search filters: field, location, duration, paid/unpaid.\n"
-            "- Follow companies and set up alerts for new opportunities.\n"
-            "- Keep your profile and resume updated."
-        ),
-        "ar": (
-            "**كيفية العثور على تدريب أو منحة**\n\n"
-            "- استخدم فلاتر البحث: التخصص، المكان، المدة، مدفوع/غير مدفوع.\n"
-            "- تابع الشركات وفعّل التنبيهات للفرص الجديدة.\n"
-            "- حافظ على تحديث ملفك الشخصي وسيرتك الذاتية."
-        ),
-    },
-}
-# keys to detect intents (simple)
-KEYS = {
-    "student_registration": ["register", "sign up", "signup", "create account", "account", "تسجيل", "انشاء", "إنشاء", "حساب"],
-    "student_internships": ["intern", "internship", "training", "scholar", "scholarship", "grant", "opportunity", "تدريب", "منحة", "فرصة"],
-    "Job": ["job", "وظيفة", "وظائف", "وظايف"],
-    "Application": ["apply", "application", "cover letter", "تقديم", "طلب"],
-    "Memory": ["memory", "conversation history", "ذاكرة"],
-    "Opportunities": ["opportunity", "فرص", "opportunities"],
-    "Project": ["project", "مشروع"],
-    "Team": ["team", "فريق"]
-}
-@dataclass
-class Route:
-    audience: str
-    intent: str
-    language: str
-def route_intent(text: str, forced_audience: Optional[str]=None) -> Route:
-    lang = "ar" if is_arabic(text) else "en"
-    match_label = None
-    text_l = (text or "").lower()
-    for label, kws in KEYS.items():
-        for kw in kws:
-            if kw in text_l:
-                match_label = label
-                break
-        if match_label:
-            break
-    audience = forced_audience if forced_audience else "general"
-    intent = match_label if match_label else "general"
-    return Route(audience=audience, intent=intent, language=lang)
-# -------------------- Skill extraction (simple regex baseline) --------------------
-_SKILL_REGEX = re.compile(
-    r"\b(Python|Machine Learning|Deep Learning|NLP|Data Science|SQL|Docker|Kubernetes|React|JavaScript|Java|C\+\+|C#|TensorFlow|PyTorch|Pandas|NumPy|Tableau|Excel)\b",
-    re.IGNORECASE
-)
-def extract_skills_from_text(cv_text: str) -> List[str]:
-    skills = list({m.group(0).lower() for m in _SKILL_REGEX.finditer(cv_text or "")})
-    return [s.capitalize() for s in skills]
-# -------------------- File processing --------------------
-def process_uploaded_file(file_obj: Any) -> dict | None:
-    """
-    Accepts a Gradio file-like object (file_obj). Returns a dict:
-    {"content": str, "skills": [...], "filename": "..."} or {"error": "..."}
-    """
-    if not file_obj:
-        return None
-    # Gradio file object typically has a 'name' attribute with path
-    file_path = getattr(file_obj, "name", None)
-    if not file_path or not os.path.exists(file_path):
-        # sometimes file_obj is a dict with 'name' key
-        try:
-            file_path = file_obj["name"]
-        except Exception:
-            return {"error": "Uploaded file not accessible."}
-    filename = os.path.basename(file_path)
-    text_content = ""
-    try:
-        if filename.lower().endswith(".pdf"):
-            with fitz.open(file_path) as doc:
-                for page in doc:
-                    text_content += page.get_text()
-        elif filename.lower().endswith(".docx"):
-            docp = docx.Document(file_path)
-            for p in docp.paragraphs:
-                text_content += p.text + "\n"
-        elif filename.lower().endswith(".txt"):
-            with open(file_path, "r", encoding="utf-8") as f:
-                text_content = f.read()
-        else:
-            return {"error": f"Unsupported file type: {filename}"}
-        skills = extract_skills_from_text(text_content)
-        return {"content": text_content.strip(), "skills": skills, "filename": filename}
-    except Exception as e:
-        return {"error": f"Error processing file {filename}: {e}"}
-# -------------------- Weaviate schema helpers --------------------
-def class_exists(class_name: str) -> bool:
-    schema = weaviate_client.schema.get()
-    classes = schema.get("classes", []) if isinstance(schema, dict) else []
-    for c in classes:
-        if c.get("class") == class_name:
-            return True
-    return False
 def ensure_collections():
-    """
-    Creates minimal schema classes for Job, Application, Memory, Opportunities, Project, Team
-    if they do not exist already.
-    """
-    if not class_exists("Job"):
-        job_class = {
-            "class": "Job",
-            "properties": [
-                {"name": "jobId", "dataType": ["string"]},
-                {"name": "title", "dataType": ["text"]},
-                {"name": "companyName", "dataType": ["text"]},
-                {"name": "description", "dataType": ["text"]},
-                {"name": "skills", "dataType": ["string[]"]},
-                {"name": "salaryDetails", "dataType": ["text"]},
-                {"name": "workplaceType", "dataType": ["text"]},
-            ],
-        }
-        weaviate_client.schema.create_class(job_class)
-    if not class_exists("Application"):
-        app_class = {
-            "class": "Application",
-            "properties": [
-                {"name": "applicationId", "dataType": ["string"]},
-                {"name": "jobId", "dataType": ["string"]},
-                {"name": "applicantName", "dataType": ["text"]},
-                {"name": "applicantEmail", "dataType": ["text"]},
-                {"name": "coverLetter", "dataType": ["text"]},
-                {"name": "cvText", "dataType": ["text"]},
-                {"name": "skills", "dataType": ["string[]"]},
-                {"name": "createdAt", "dataType": ["date"]},
-            ],
-        }
-        weaviate_client.schema.create_class(app_class)
-    if not class_exists("Memory"):
-        mem_class = {
-            "class": "Memory",
-            "properties": [
-                {"name": "memoryId", "dataType": ["string"]},
-                {"name": "sessionId", "dataType": ["string"]},
-                {"name": "text", "dataType": ["text"]},
-                {"name": "createdAt", "dataType": ["date"]},
-            ],
-        }
-        weaviate_client.schema.create_class(mem_class)
-    if not class_exists("Opportunities"):
-        opp_class = {
-            "class": "Opportunities",
-            "properties": [
-                {"name": "oppId", "dataType": ["string"]},
-                {"name": "title", "dataType": ["text"]},
-                {"name": "description", "dataType": ["text"]},
-                {"name": "skills", "dataType": ["string[]"]},
-            ],
-        }
-        weaviate_client.schema.create_class(opp_class)
-    if not class_exists("Project"):
-        proj_class = {
-            "class": "Project",
-            "properties": [
-                {"name": "projectId", "dataType": ["string"]},
-                {"name": "title", "dataType": ["text"]},
-                {"name": "description", "dataType": ["text"]},
-                {"name": "skills", "dataType": ["string[]"]},
-            ],
-        }
-        weaviate_client.schema.create_class(proj_class)
-    if not class_exists("Team"):
-        team_class = {
-            "class": "Team",
-            "properties": [
-                {"name": "teamId", "dataType": ["string"]},
-                {"name": "name", "dataType": ["text"]},
-                {"name": "projectId", "dataType": ["string"]},
-                {"name": "members", "dataType": ["string[]"]},
-                {"name": "skills", "dataType": ["string[]"]},
-                {"name": "creatorId", "dataType": ["string"]},
-                {"name": "createdAt", "dataType": ["date"]},
-                {"name": "idea", "dataType": ["text"]},
-            ],
-        }
-        weaviate_client.schema.create_class(team_class)
-# ensure schema exists
 ensure_collections()
-# -------------------- Query helpers --------------------
-def query_weaviate_collection(class_name: str, query_text: str, limit: int = 5) -> List[dict]:
-    """
-    Simple retrieval: fetch objects where text fields match the query_text (basic).
-    Uses a basic search via .get (no BM25 module required).
-    """
-    try:
-        q = weaviate_client.query.get(class_name, ["*"]).with_limit(limit)
-        res = q.do()
-        hits = res.get("data", {}).get("Get", {}).get(class_name, [])
-        # naive filter: return items that contain query_text in title/description/skills
-        low = (query_text or "").lower()
-        items = []
-        for h in hits:
-            props = h.get("properties", {})
-            text_blob = " ".join(
-                [str(props.get(k, "")) for k in ("title", "description", "companyName", "skills")]
-            ).lower()
-            if not low or low in text_blob:
-                items.append(props)
-        return items[:limit]
-    except Exception as e:
-        print("[Weaviate Query Error]", e)
-        return []
-# -------------------- RAG prompt builder --------------------
-def build_rag_prompt(user_question: str, retrieved_items: List[dict], class_name: str) -> str:
-    context_parts = []
-    for i, item in enumerate(retrieved_items, 1):
-        details = {k: item.get(k) for k in item.keys()}
-        item_str = f"--- Record {i} ---\n{json.dumps(details, indent=2, ensure_ascii=False)}"
-        context_parts.append(item_str)
-    context_block = "\n\n".join(context_parts)
-    return (
-        f'User Question: "{user_question}"\n\n'
-        f"Retrieved Data:\n{context_block}\n\n"
-        "You are an expert assistant. Use ONLY the Retrieved Data above to answer the question, "
-        "summarize, and include 'Next Steps' for the user."
     )
-def rag_answer(user_question: str, class_name: str, top_k: int = 5) -> Tuple[str, List[dict]]:
-    retrieved = query_weaviate_collection(class_name, user_question, limit=top_k)
-    if not retrieved:
-        return "", []
-    prompt = build_rag_prompt(user_question, retrieved, class_name)
-    try:
-        resp = llm_client.chat.completions.create(
-            model=MODEL_NAME,
-            messages=[
-                {"role": "system", "content": "You are EduNatives Assistant. Be concise and practical."},
-                {"role": "user", "content": prompt}
-            ],
-            temperature=0.2,
-            max_tokens=1200,
-        )
-        answer = resp.choices[0].message.content or ""
-    except Exception as e:
-        print("[RAG LLM Error]", e)
-        answer = ""
-    return answer, retrieved
-# -------------------- Embeddings & Recommendations --------------------
-def compute_embedding(text: str) -> List[float]:
-    try:
-        resp = llm_client.embeddings.create(
-            model="Qwen/Qwen3-Embedding-8B",
-            input=text,
-            encoding_format="float"
-        )
-        if isinstance(resp, dict):
-            data = resp.get("data", [])
-            if data and isinstance(data[0], dict):
-                return data[0].get("embedding", [])
-        if hasattr(resp, "data") and resp.data:
-            return resp.data[0].embedding
-    except Exception as e:
-        print("[compute_embedding] error:", e)
-    return []
-def cosine_similarity(a: List[float], b: List[float]) -> float:
-    try:
-        va = np.array(a, dtype=float)
-        vb = np.array(b, dtype=float)
-        if va.size == 0 or vb.size == 0:
-            return 0.0
-        denom = (np.linalg.norm(va) * np.linalg.norm(vb))
-        if denom == 0:
-            return 0.0
-        return float(np.dot(va, vb) / denom)
-    except Exception as e:
-        print("[cosine_similarity] error:", e)
-        return 0.0
-def recommend_jobs_by_embedding(cv_text: str, top_k: int = 5, jobs_fetch_limit: int = 200) -> str:
-    skills = extract_skills_from_text(cv_text or "")
-    user_text = " ".join(skills) if skills else (cv_text or "")[:500]
-    user_emb = compute_embedding(user_text)
-    if not user_emb:
-        return "⚠️ Unable to compute embedding for your CV. Check API keys."
-    # fetch jobs
-    try:
-        res = weaviate_client.query.get("Job", ["*"]).with_limit(jobs_fetch_limit).do()
-        hits = res.get("data", {}).get("Get", {}).get("Job", [])
-        if not hits:
-            return "⚠️ No jobs found in the database."
-    except Exception as e:
-        print("[recommend_jobs] Weaviate fetch error:", e)
-        return "⚠️ Could not fetch jobs from the database."
-    scored_jobs = []
-    for h in hits:
-        props = h.get("properties", {})
-        job_text_parts = []
-        if props.get("skills"):
-            job_text_parts.append(" ".join(props.get("skills")))
-        if props.get("title"):
-            job_text_parts.append(props.get("title"))
-        if props.get("description"):
-            job_text_parts.append((props.get("description") or "")[:2000])
-        job_text = " ".join(job_text_parts).strip() or (props.get("title") or "")
-        job_emb = compute_embedding(job_text)
-        if not job_emb:
-            continue
-        score = cosine_similarity(user_emb, job_emb)
-        scored_jobs.append((score, props))
-    if not scored_jobs:
-        return "⚠️ No jobs could be embedded / compared."
-    scored_jobs.sort(key=lambda x: x[0], reverse=True)
-    top = scored_jobs[:top_k]
-    lines = []
-    for score, props in top:
-        title = props.get("title", "No title")
-        company = props.get("companyName", "Unknown company")
-        job_id = props.get("jobId", "")
-        salary = props.get("salaryDetails") or "Not specified"
-        skills_list = props.get("skills") or []
-        description = (props.get("description") or "").strip()
-        lines.append(
-            f"**{title}** at *{company}*  \n"
-            f"- Job ID: `{job_id}`  \n"
-            f"- Score: {score:.3f}  \n"
-            f"- Salary: {salary}  \n"
-            f"- Skills: {skills_list}  \n"
-            f"- Description: {description[:600]}{'...' if len(description) > 600 else ''}  \n"
-            f"---"
-        )
-    return "\n\n".join(lines)
-# -------------------- Weaviate save/update helpers --------------------
-def save_application_to_weaviate(app: dict) -> bool:
-    try:
-        weaviate_client.data_object.create(app, "Application", uuid=app.get("applicationId"))
-        return True
-    except Exception as e:
-        print("[save_application] error:", e)
-        return False
-def save_team_to_weaviate(team_props: dict) -> bool:
-    try:
-        weaviate_client.data_object.create(team_props, "Team", uuid=team_props.get("teamId"))
-        return True
-    except Exception as e:
-        print("[save_team] error:", e)
-        return False
-def update_team_add_member(team_name: str, member_name: str, skills: List[str]) -> str:
-    # naive: find team by name, append member, update object
-    try:
-        q = weaviate_client.query.get("Team", ["teamId", "name", "members", "skills"]).with_where({
-            "path": ["name"],
-            "operator": "Equal",
-            "valueString": team_name
-        }).with_limit(1)
-        res = q.do()
-        hits = res.get("data", {}).get("Get", {}).get("Team", [])
-        if not hits:
-            return "⚠️ Team not found."
-        obj = hits[0]
-        props = obj.get("properties", {})
-        team_id = props.get("teamId")
-        members = props.get("members") or []
-        members.append(member_name)
-        skills_list = list(set((props.get("skills") or []) + skills))
-        weaviate_client.data_object.update({"members": members, "skills": skills_list}, "Team", uuid=team_id)
-        return f"✅ {member_name} added to team '{team_name}'."
-    except Exception as e:
-        print("[update_team_add_member] error:", e)
-        return "⚠️ Failed to add member to team."
-# -------------------- Session / State machine --------------------
-def initial_session() -> dict:
-    return {"state": "idle", "data": {}}
-def handle_uploaded_cv_for_session(session: dict, uploaded_file: Any) -> Tuple[str, dict]:
-    if not uploaded_file:
-        return "⚠️ No file received.", session
-    doc_info = process_uploaded_file(uploaded_file)
-    if not doc_info or "error" in (doc_info or {}):
-        return f"⚠️ Error processing uploaded CV: {doc_info.get('error') if doc_info else 'unknown error'}", session
-    session["data"]["cvText"] = doc_info.get("content", "")
-    session["data"]["cvSkills"] = doc_info.get("skills", [])
-    st = session.get("state")
-    if st == "apply_wait_cv":
-        session["state"] = "apply_jobtitle"
-        detected = session["data"]["cvSkills"]
-        return f"CV received. Detected skills: {detected}. Which job title do you want to apply for? (type job title or 'any')", session
-    if st == "recommend_wait_cv":
-        rec_text = recommend_jobs_by_embedding(session["data"]["cvText"], top_k=5)
-        session = initial_session()
-        return f"Here are recommended jobs based on your CV:\n\n{rec_text}", session
-    return "CV uploaded and processed. What would you like to do next?", session
-def handle_user_message(session: dict, user_text: str, uploaded_file: Any = None) -> Tuple[str, dict, bool]:
-    session = session or initial_session()
-    st = session.get("state", "idle")
-    text = (user_text or "").strip()
-    # quick reset
-    if text.lower() in ("cancel", "exit", "quit", "restart", "reset"):
-        return "Conversation reset. How can I help you now?", initial_session(), False
-    # file upload route
-    if uploaded_file:
-        bot_msg, new_session = handle_uploaded_cv_for_session(session, uploaded_file)
-        return bot_msg, new_session, False
-    # IDLE
-    if st == "idle":
-        low = text.lower()
-        if low in ("hi", "hello", "hey", "مرحبا", "ازيك", "السلام عليكم"):
-            return "👋 Hello! How can I support you today? You can ask about jobs, teams, or recommendations.", session, False
-        if low in ("who are you?", "who are you", "انت مين", "من انت"):
-            return ("👋 I am EduNatives Assistant — your friendly academic and career guide."), session, False
-        route = route_intent(text)
-        # 1) KB first
-        if route.intent in KB:
-            return KB[route.intent].get(route.language, KB[route.intent].get("en", "")), session, False
-        # 2) If intent is a RAG-related class -> call rag
-        if route.intent in {"Job", "Application", "Memory", "Opportunities", "Project", "Team"}:
-            try:
-                rag_ans, items = rag_answer(text, route.intent, top_k=5)
-                if rag_ans:
-                    return rag_ans, session, False
-            except Exception as e:
-                print("[handle_user_message] rag error:", e)
-        # 3) fallback to LLM normal chat
-        try:
-            resp = llm_client.chat.completions.create(
-                model=MODEL_NAME,
-                messages=[
-                    {"role": "system", "content": "You are EduNatives Assistant. Be concise and helpful."},
-                    {"role": "user", "content": text},
-                ],
-                temperature=0.5,
-                max_tokens=800
-            )
-            answer = resp.choices[0].message.content or ""
-            return answer, session, False
-        except Exception as e:
-            print("[handle_user_message] LLM error:", e)
-            return "⚠️ Sorry, I couldn't process that right now. Try again later.", session, False
-    # ---------- APPLY FLOW ----------
-    if st == "apply_name":
-        session["data"]["applicantName"] = text or "Applicant"
         session["state"] = "apply_email"
-        return "Thanks. What's your email address?", session, False
-    if st == "apply_email":
-        m = re.search(r"([a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+)", text)
-        session["data"]["applicantEmail"] = m.group(1) if m else text
         session["state"] = "apply_cover"
-        return "Got it. Please type a short cover letter (or type 'skip' to skip).", session, False
-    if st == "apply_cover":
-        if text.lower() != "skip":
-            session["data"]["coverLetter"] = text
-        else:
-            session["data"]["coverLetter"] = ""
-        session["state"] = "apply_wait_cv"
-        return "Please upload your CV now (use the Upload button).", session, True
-    if st == "apply_jobtitle":
-        session["data"]["targetJobTitle"] = text
-        found = query_weaviate_collection("Job", text, limit=3)
-        cv_skills = [s.lower() for s in session["data"].get("cvSkills", [])]
-        if found:
-            job = found[0]
-            job_skills = [s.lower() for s in (job.get("skills") or [])]
-            overlap = len([s for s in cv_skills if s in job_skills])
-            session["data"]["targetJobId"] = job.get("jobId")
-            session["state"] = "apply_confirm"
-            if overlap > 0:
-                return (f"I found a job: {job.get('title')} at {job.get('companyName')}. "
-                        f"Detected {overlap} overlapping skills. Do you want to confirm application? (yes/no)"), session, False
-            else:
-                return (f"I found {job.get('title')} at {job.get('companyName')}, but your CV skills do not overlap. "
-                        "Do you still want to proceed? (yes/no)"), session, False
-        else:
-            session["data"]["targetJobId"] = None
-            session["state"] = "apply_confirm"
-            return f"I couldn't find a job with that title. Do you want to apply for '{text}' anyway? (yes/no)", session, False
-    if st == "apply_confirm":
-        if text.lower() in ("yes", "y", "نعم"):
-            app = {
-                "applicationId": str(uuid.uuid4()),
-                "jobId": session["data"].get("targetJobId"),
-                "applicantName": session["data"].get("applicantName"),
-                "applicantEmail": session["data"].get("applicantEmail"),
-                "coverLetter": session["data"].get("coverLetter", ""),
-                "cvText": session["data"].get("cvText", ""),
-                "skills": session["data"].get("cvSkills", []),
-                "createdAt": get_rfc3339_time()
-            }
-            ok = save_application_to_weaviate(app)
-            session = initial_session()
-            return ("🎉 Your application has been submitted successfully. Good luck!" if ok
-                    else "⚠️ Failed to save application. Please try again later."), session, False
-        else:
-            session = initial_session()
-            return "Application cancelled. If you want to do something else, tell me.", session, False
-    # ---------- TEAM FLOW ----------
-    if st == "team_action":
-        low = text.lower()
-        if "create" in low or "إنشاء" in low:
-            session["state"] = "team_create_name"
-            session["data"] = {}
-            return "Great — what's the team name?", session, False
-        if "join" in low or "انضم" in low:
-            session["state"] = "team_join_name"
-            session["data"] = {}
-            return "Okay — what's the name of the team you want to join?", session, False
-        return "Please say 'create' to create a team or 'join' to join a team.", session, False
-    if st == "team_create_name":
-        session["data"]["team_name"] = text
-        session["state"] = "team_create_owner"
-        return "Team name saved. Who is the team owner (your name)?", session, False
-    if st == "team_create_owner":
-        session["data"]["owner"] = text
-        session["state"] = "team_create_skills"
-        return "Owner saved. Please list the team's skills (comma-separated).", session, False
-    if st == "team_create_skills":
-        session["data"]["skills"] = [s.strip() for s in text.split(",") if s.strip()]
-        session["state"] = "team_create_course"
-        return "Skills saved. (Optional) Enter course/subject name or type 'skip'.", session, False
-    if st == "team_create_course":
-        session["data"]["course"] = "" if text.lower() == "skip" else text
-        session["state"] = "team_create_idea"
-        return "Please write a short idea/description for the project.", session, False
-    if st == "team_create_idea":
-        session["data"]["idea"] = text
-        team_props = {
-            "teamId": str(uuid.uuid4()),
-            "name": session["data"].get("team_name"),
-            "projectId": None,
-            "members": [session["data"].get("owner")],
-            "skills": session["data"].get("skills", []),
-            "creatorId": session["data"].get("owner"),
-            "createdAt": get_rfc3339_time(),
-            "idea": session["data"].get("idea", "")
-        }
-        saved = save_team_to_weaviate(team_props)
-        session = initial_session()
-        return (f"🎉 Team '{team_props['name']}' created! Members: {team_props['members']}" if saved
-                else "⚠️ Failed to create team. Try again later."), session, False
-    if st == "team_join_name":
-        session["data"]["team_name"] = text
-        session["state"] = "team_join_member"
-        return "What's your name (to add you to the team)?", session, False
-    if st == "team_join_member":
-        session["data"]["member_name"] = text
-        session["state"] = "team_join_skills"
-        return "Enter your skills (comma-separated).", session, False
-    if st == "team_join_skills":
-        skills = [s.strip() for s in text.split(",") if s.strip()]
-        resp = update_team_add_member(session["data"].get("team_name"), session["data"].get("member_name"), skills)
-        session = initial_session()
-        return resp, session, False
-    # ---------- RECOMMEND FLOW ----------
-    if st == "recommend_wait_cv":
-        return "Please upload your CV (use the Upload button).", session, True
-    # default fallback
-    return "Sorry — I didn't understand that. You can say 'apply', 'create team', 'join team' or 'recommend'.", session, False
-# -------------------- UI wiring (Gradio) --------------------
-def format_chat_html(history: List[Dict[str, str]]) -> str:
-    html = "<div class='chatbot'>"
-    for msg in history:
-        role = msg["role"]
-        content = msg["content"]
-        if role == "user":
-            html += f"<div class='user-bubble'>{content}</div>"
-        else:
-            html_content = markdown.markdown(content, extensions=['tables'])
-            html += f"<div class='bot-bubble'>{html_content}</div>"
-    html += "</div>"
-    return html
-# minimal CSS + UI
-with gr.Blocks(css="""
-.chatbot {height: 520px; overflow: auto;}
-.user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
-.bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
-.chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
-""") as demo:
-    gr.Markdown("# 💬 EduNatives — Conversational Job Portal")
-    chat_html = gr.HTML(format_chat_html([]))
-    with gr.Row(elem_classes="chatbox-container"):
-        user_input = gr.Textbox(placeholder="Type your message here (e.g. 'apply', 'create team', 'recommend')", lines=2)
-        send_btn = gr.Button("Send", variant="primary")
-    with gr.Row(visible=False) as file_row:
-        cv_uploader = gr.File(label="Upload CV (.pdf/.docx/.txt)", file_count="single", file_types=[".pdf", ".docx", ".txt"], visible=False)
-        upload_btn = gr.Button("Upload CV", visible=False)
-    with gr.Row():
-        clear_btn = gr.Button("Reset Conversation")
-        instructions = gr.Markdown("Commands: `apply`, `create team`, `join team`, `recommend` — the bot will guide you step-by-step.")
-    chat_history_state = gr.State([])
-    session_state = gr.State(initial_session())
-    def append_to_history(history: List[Dict[str, str]], role: str, content: str) -> List[Dict[str, str]]:
-        history = history or []
-        history.append({"role": role, "content": content})
-        return history
-    def handle_send(message: str, history: List[Dict[str, str]], session: dict):
-        history = history or []
-        session = session or initial_session()
-        if message and message.strip():
-            history = append_to_history(history, "user", message.strip())
-        bot_reply, new_session, show_uploader = handle_user_message(session, message or "", uploaded_file=None)
-        history = append_to_history(history, "assistant", bot_reply or "…")
-        html = format_chat_html(history)
-        return "", html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
-    def handle_upload(file_obj, history: List[Dict[str, str]], session: dict):
-        history = history or []
-        session = session or initial_session()
-        filename = getattr(file_obj, "name", "uploaded_file")
-        history = append_to_history(history, "user", f"📎 Uploaded file: {filename}")
-        bot_reply, new_session, show_uploader = handle_user_message(session, "", uploaded_file=file_obj)
-        history = append_to_history(history, "assistant", bot_reply or "…")
-        html = format_chat_html(history)
-        return html, history, new_session, gr.update(visible=show_uploader), gr.update(visible=show_uploader)
-    def handle_reset(history, session):
-        new_hist = []
-        new_session = initial_session()
-        html = format_chat_html(new_hist)
-        return html, new_hist, new_session, gr.update(visible=False), gr.update(visible=False)
-    send_btn.click(
-        fn=handle_send,
-        inputs=[user_input, chat_history_state, session_state],
-        outputs=[user_input, chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
-        queue=True
-    )
-    upload_btn.click(
-        fn=handle_upload,
-        inputs=[cv_uploader, chat_history_state, session_state],
-        outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
-        queue=True
-    )
-    clear_btn.click(
-        fn=handle_reset,
-        inputs=[chat_history_state, session_state],
-        outputs=[chat_html, chat_history_state, session_state, cv_uploader, upload_btn],
-        queue=False
-    )
-if __name__ == "__main__":
-    demo.launch(debug=True)

 import json
+import weaviate
+import fitz
 import docx
+import os
 import gradio as gr
 from openai import OpenAI
+from weaviate.classes.init import Auth
+from weaviate.classes.config import Property, DataType
+from sklearn.metrics.pairwise import cosine_similarity
+# --- Config ---
+WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
+WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "YOUR_KEY")
+OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_KEY")
+# --- Clients ---
+client = weaviate.WeaviateClient(
     url=WEAVIATE_URL,
+    auth_client_secret=Auth.api_key(WEAVIATE_API_KEY),
 )
+openai_client = OpenAI(api_key=OPENAI_API_KEY)
+# --- Ensure Collections ---
 def ensure_collections():
+    collections = {
+        "Job": [Property(name="title", data_type=DataType.TEXT), Property(name="description", data_type=DataType.TEXT)],
+        "Application": [Property(name="name", data_type=DataType.TEXT), Property(name="email", data_type=DataType.TEXT)],
+        "Memory": [Property(name="content", data_type=DataType.TEXT)],
+        "Opportunities": [Property(name="details", data_type=DataType.TEXT)],
+        "Project": [Property(name="name", data_type=DataType.TEXT), Property(name="description", data_type=DataType.TEXT)],
+        "Team": [Property(name="member", data_type=DataType.TEXT), Property(name="role", data_type=DataType.TEXT)],
+    }
+    for cname, props in collections.items():
+        if not client.collections.exists(cname):
+            client.collections.create(name=cname, properties=props)
 ensure_collections()
+# --- Embeddings ---
+def get_embedding(text):
+    resp = openai_client.embeddings.create(input=text, model="text-embedding-3-small")
+    return resp.data[0].embedding
+def recommend_jobs_by_embedding(cv_text, jobs, top_n=3):
+    cv_embedding = get_embedding(cv_text)
+    job_embeddings = [get_embedding(j["description"]) for j in jobs]
+    sims = cosine_similarity([cv_embedding], job_embeddings)[0]
+    ranked = sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
+    return [job for job, _ in ranked[:top_n]]
+# --- File Upload Handling ---
+def process_uploaded_file(file_path):
+    ext = os.path.splitext(file_path)[1].lower()
+    text = ""
+    if ext == ".pdf":
+        with fitz.open(file_path) as pdf:
+            for page in pdf:
+                text += page.get_text()
+    elif ext == ".docx":
+        doc = docx.Document(file_path)
+        for para in doc.paragraphs:
+            text += para.text + "\n"
+    elif ext == ".txt":
+        with open(file_path, "r", encoding="utf-8") as f:
+            text = f.read()
+    return text.strip()
+# --- Session Management ---
+def initial_session():
+    return {"state": "idle", "data": {}, "history": []}
+def handle_uploaded_cv_for_session(session, file_path):
+    text = process_uploaded_file(file_path)
+    session["data"]["cv_text"] = text
+    return session
+# --- KB ---
+KB_RESPONSES = {
+    "student_registration": "You can register as a student on the portal...",
+    "student_internships": "Internships are listed under opportunities section..."
+}
+# --- RAG Query ---
+def rag_query(collection, query_text):
+    query_embedding = get_embedding(query_text)
+    results = client.query.get(collection, ["*"]).with_near_vector({"vector": query_embedding}).with_limit(3).do()
+    return results
+# --- LLM Chat ---
+def llm_chat(prompt):
+    resp = openai_client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": prompt}],
     )
+    return resp.choices[0].message.content
+# --- Flows ---
+def apply_flow(session, message):
+    state = session["state"]
+    if state == "apply_name":
+        session["data"]["name"] = message
         session["state"] = "apply_email"
+        return "Please provide your email.", session
+    elif state == "apply_email":
+        session["data"]["email"] = message
         session["state"] = "apply_cover"
+        return "Please provide your cover letter.", session
+    elif state == "apply_cover":
+        session["data"]["cover"] = message
+        session["state"] = "idle"
+        return "Your application has been recorded.", session
+    return "Let's start your application. What's your name?", {"state": "apply_name", "data": {}}
+def team_flow(session, message):
+    return "Team flow triggered. Add member info.", session
+def recommend_flow(session, message):
+    if "cv_text" in session["data"]:
+        jobs = [{"title": "AI Intern", "description": "Work on NLP"}, {"title": "ML Engineer", "description": "Build models"}]
+        recs = recommend_jobs_by_embedding(session["data"]["cv_text"], jobs)
+        return f"Recommended jobs: {[j['title'] for j in recs]}", session
+    return "Please upload your CV first.", session
+# --- Main Handler ---
+def handle_user_message(session, message):
+    lower = message.lower()
+    # KB check
+    for key, answer in KB_RESPONSES.items():
+        if key in lower:
+            return answer, session
+    # RAG check
+    for collection in ["Job", "Application", "Memory", "Opportunities", "Project", "Team"]:
+        if collection.lower() in lower:
+            results = rag_query(collection, message)
+            return f"RAG Results from {collection}: {json.dumps(results, indent=2)}", session
+    # Flow triggers
+    if "apply" in lower:
+        return apply_flow(session, message)
+    if "team" in lower:
+        return team_flow(session, message)
+    if "recommend" in lower:
+        return recommend_flow(session, message)
+    # Default LLM
+    return llm_chat(message), session
+# --- Gradio App ---
+session = initial_session()
+def chat_with_bot(message, file=None):
+    global session
+    if file is not None:
+        session = handle_uploaded_cv_for_session(session, file.name)
+        return "CV uploaded successfully!"
+    reply, session = handle_user_message(session, message)
+    return reply
+with gr.Blocks(title="Edunatives Chatbot") as demo:
+    gr.Markdown("# 🎓 Edunatives Chatbot")
+    chatbot = gr.Chatbot()
+    msg = gr.Textbox(placeholder="Type your message here...")
+    file_upload = gr.File(label="Upload CV (PDF/DOCX/TXT)")
+    clear = gr.Button("Clear Chat")
+    def respond(message, history, file):
+        response = chat_with_bot(message, file)
+        history.append((message, response))
+        return history, ""
+    msg.submit(respond, [msg, chatbot, file_upload], [chatbot, msg])
+    clear.click(lambda: ([], ""), None, [chatbot, msg])
+demo.launch(server_name="0.0.0.0", server_port=7860)