Spaces:

afouda
/

EduNativesChatbot

Runtime error

App Files Files Community

afouda commited on Sep 15, 2025

Commit

8d433bb

verified ·

1 Parent(s): dac8618

Update app.py

Browse files

Files changed (1) hide show

app.py +271 -171

app.py CHANGED Viewed

@@ -1,177 +1,277 @@
-import json
-import weaviate
-import fitz
-import docx
 import os
 import gradio as gr
 from openai import OpenAI
-from weaviate.classes.init import Auth
-from weaviate.classes.config import Property, DataType
-from sklearn.metrics.pairwise import cosine_similarity
-# --- Config ---
-WEAVIATE_URL = os.getenv("WEAVIATE_URL", "http://localhost:8080")
-WEAVIATE_API_KEY = os.getenv("WEAVIATE_API_KEY", "YOUR_KEY")
-OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "YOUR_KEY")
-# --- Clients ---
-client = weaviate.WeaviateClient(
-    url=WEAVIATE_URL,
-    auth_client_secret=Auth.api_key(WEAVIATE_API_KEY),
 )
-openai_client = OpenAI(api_key=OPENAI_API_KEY)
-# --- Ensure Collections ---
-def ensure_collections():
-    collections = {
-        "Job": [Property(name="title", data_type=DataType.TEXT), Property(name="description", data_type=DataType.TEXT)],
-        "Application": [Property(name="name", data_type=DataType.TEXT), Property(name="email", data_type=DataType.TEXT)],
-        "Memory": [Property(name="content", data_type=DataType.TEXT)],
-        "Opportunities": [Property(name="details", data_type=DataType.TEXT)],
-        "Project": [Property(name="name", data_type=DataType.TEXT), Property(name="description", data_type=DataType.TEXT)],
-        "Team": [Property(name="member", data_type=DataType.TEXT), Property(name="role", data_type=DataType.TEXT)],
-    }
-    for cname, props in collections.items():
-        if not client.collections.exists(cname):
-            client.collections.create(name=cname, properties=props)
-ensure_collections()
-# --- Embeddings ---
-def get_embedding(text):
-    resp = openai_client.embeddings.create(input=text, model="text-embedding-3-small")
-    return resp.data[0].embedding
-def recommend_jobs_by_embedding(cv_text, jobs, top_n=3):
-    cv_embedding = get_embedding(cv_text)
-    job_embeddings = [get_embedding(j["description"]) for j in jobs]
-    sims = cosine_similarity([cv_embedding], job_embeddings)[0]
-    ranked = sorted(zip(jobs, sims), key=lambda x: x[1], reverse=True)
-    return [job for job, _ in ranked[:top_n]]
-# --- File Upload Handling ---
-def process_uploaded_file(file_path):
-    ext = os.path.splitext(file_path)[1].lower()
-    text = ""
-    if ext == ".pdf":
-        with fitz.open(file_path) as pdf:
-            for page in pdf:
-                text += page.get_text()
-    elif ext == ".docx":
-        doc = docx.Document(file_path)
-        for para in doc.paragraphs:
-            text += para.text + "\n"
-    elif ext == ".txt":
-        with open(file_path, "r", encoding="utf-8") as f:
-            text = f.read()
-    return text.strip()
-# --- Session Management ---
-def initial_session():
-    return {"state": "idle", "data": {}, "history": []}
-def handle_uploaded_cv_for_session(session, file_path):
-    text = process_uploaded_file(file_path)
-    session["data"]["cv_text"] = text
-    return session
-# --- KB ---
-KB_RESPONSES = {
-    "student_registration": "You can register as a student on the portal...",
-    "student_internships": "Internships are listed under opportunities section..."
 }
-# --- RAG Query ---
-def rag_query(collection, query_text):
-    query_embedding = get_embedding(query_text)
-    results = client.query.get(collection, ["*"]).with_near_vector({"vector": query_embedding}).with_limit(3).do()
-    return results
-# --- LLM Chat ---
-def llm_chat(prompt):
-    resp = openai_client.chat.completions.create(
-        model="gpt-4o-mini",
-        messages=[{"role": "system", "content": "You are a helpful chatbot."}, {"role": "user", "content": prompt}],
-    )
-    return resp.choices[0].message.content
-# --- Flows ---
-def apply_flow(session, message):
-    state = session["state"]
-    if state == "apply_name":
-        session["data"]["name"] = message
-        session["state"] = "apply_email"
-        return "Please provide your email.", session
-    elif state == "apply_email":
-        session["data"]["email"] = message
-        session["state"] = "apply_cover"
-        return "Please provide your cover letter.", session
-    elif state == "apply_cover":
-        session["data"]["cover"] = message
-        session["state"] = "idle"
-        return "Your application has been recorded.", session
-    return "Let's start your application. What's your name?", {"state": "apply_name", "data": {}}
-def team_flow(session, message):
-    return "Team flow triggered. Add member info.", session
-def recommend_flow(session, message):
-    if "cv_text" in session["data"]:
-        jobs = [{"title": "AI Intern", "description": "Work on NLP"}, {"title": "ML Engineer", "description": "Build models"}]
-        recs = recommend_jobs_by_embedding(session["data"]["cv_text"], jobs)
-        return f"Recommended jobs: {[j['title'] for j in recs]}", session
-    return "Please upload your CV first.", session
-# --- Main Handler ---
-def handle_user_message(session, message):
-    lower = message.lower()
-    # KB check
-    for key, answer in KB_RESPONSES.items():
-        if key in lower:
-            return answer, session
-    # RAG check
-    for collection in ["Job", "Application", "Memory", "Opportunities", "Project", "Team"]:
-        if collection.lower() in lower:
-            results = rag_query(collection, message)
-            return f"RAG Results from {collection}: {json.dumps(results, indent=2)}", session
-    # Flow triggers
-    if "apply" in lower:
-        return apply_flow(session, message)
-    if "team" in lower:
-        return team_flow(session, message)
-    if "recommend" in lower:
-        return recommend_flow(session, message)
-    # Default LLM
-    return llm_chat(message), session
-# --- Gradio App ---
-session = initial_session()
-def chat_with_bot(message, file=None):
-    global session
-    if file is not None:
-        session = handle_uploaded_cv_for_session(session, file.name)
-        return "CV uploaded successfully!"
-    reply, session = handle_user_message(session, message)
-    return reply
-with gr.Blocks(title="Edunatives Chatbot") as demo:
-    gr.Markdown("# 🎓 Edunatives Chatbot")
-    chatbot = gr.Chatbot()
-    msg = gr.Textbox(placeholder="Type your message here...")
-    file_upload = gr.File(label="Upload CV (PDF/DOCX/TXT)")
-    clear = gr.Button("Clear Chat")
-    def respond(message, history, file):
-        response = chat_with_bot(message, file)
-        history.append((message, response))
-        return history, ""
-    msg.submit(respond, [msg, chatbot, file_upload], [chatbot, msg])
-    clear.click(lambda: ([], ""), None, [chatbot, msg])
-demo.launch(server_name="0.0.0.0", server_port=7860)

+from __future__ import annotations
 import os
+import json
+import time
+from dataclasses import dataclass
+from typing import List, Dict, Any
+import markdown
 import gradio as gr
 from openai import OpenAI
+# --- 1. BACKEND LOGIC & CONFIG (from your first script) ---
+# --- LLM Configuration ---
+MODEL_NAME = "openai/gpt-oss-120b"
+DEEPINFRA_API_KEY = "kPEm10rrnxXrCf0TuB6Xcd7Y7lp3YgKa" # Replace with your key if needed
+BASE_URL = "https://api.deepinfra.com/v1/openai"
+if not DEEPINFRA_API_KEY:
+    print("[WARN] DEEPINFRA_API_KEY is not set. The chatbot will likely fail.")
+client = OpenAI(api_key=DEEPINFRA_API_KEY, base_url=BASE_URL)
+# --- Language & Routing Configuration ---
+ARABIC_RANGE = (
+    (0x0600, 0x06FF), (0x0750, 0x077F), (0x08A0, 0x08FF),
+    (0xFB50, 0xFDFF), (0xFE70, 0xFEFF), (0x1EE00, 0x1EEFF)
 )
+@dataclass
+class Route:
+    audience: str
+    intent: str
+    language: str
+KB: Dict[str, Dict[str, str]] = {
+    "student_registration": {
+        "en": (
+            "**How to register / create an account (Student)**\n\n"
+            "1. Go to the EduNatives site and choose Sign Up.\n"
+            "2. Use your university email if possible and verify it.\n"
+            "3. Complete your profile (major, skills, interests).\n"
+            "4. Enable notifications for internships/scholarships."
+        ),
+        "ar": (
+            "**طريقة التسجيل وإنشاء حساب (طلاب)**\n\n"
+            "١. اذهب إلى موقع EduNatives واختر Sign Up.\n"
+            "٢. يفضل استخدام إيميل الجامعة وتأكيده.\n"
+            "٣. أكمل ملفك الشخصي (التخصص، المهارات، الاهتمامات).\n"
+            "٤. فعّل التنبيهات لفرص التدريب والمنح."
+        ),
+    },
+    "student_internships": {
+        "en": (
+            "**Finding internships & scholarships**\n\n"
+            "- Use the search filters: field, location, duration, paid/unpaid.\n"
+            "- Follow companies and set up alerts for new opportunities.\n"
+            "- Keep your profile and resume updated."
+        ),
+        "ar": (
+            "**كيفية العثور على تدريب أو منحة**\n\n"
+            "- استخدم فلاتر البحث: التخصص، المكان، المدة، مدفوع/غير مدفوع.\n"
+            "- تابع الشركات وفعّل التنبيهات للفرص الجديدة.\n"
+            "- حافظ على تحديث ملفك الشخصي وسيرتك الذاتية."
+        ),
+    },
+}
+KEYS = {
+    "student_registration": [
+        "register", "sign up", "signup", "create account", "account",
+        "تسجيل", "انشاء", "إنشاء", "حساب", "اعمل حساب", "سجل"
+    ],
+    "student_internships": [
+        "intern", "internship", "training", "scholar", "scholarship", "grant", "opportunity",
+        "تدريب", "تدريبي", "منحة", "منح", "فرصة", "فرص", "انترنشيب"
+    ],
+    "student_mentors": [
+        "mentor", "advisor", "professor", "supervisor", "faculty", "connect",
+        "منتور", "مشرف", "دكتور", "أستاذ", "استاذ", "التواصل", "اكلم"
+    ],
+    "university_publish": [
+        "publish", "paper", "research", "preprint", "conference", "event", "seminar", "webinar",
+        "نشر", "أبحاث", "ابحاث", "بحث", "مؤتمر", "فعالية", "فعاليات", "ندوة", "ورشة"
+    ],
+    "university_connect": [
+        "students", "connect with students", "reach students", "collaborate",
+        "طلاب", "تواصل مع الطلاب", "التواصل مع الطلاب", "تعاون"
+    ],
+    "company_post_jobs": [
+        "job", "jobs", "post job", "hiring", "hire", "internships", "graduate",
+        "وظيفة", "وظائف", "اعلان", "إعلان", "نشر وظيفة", "توظيف", "فرص تدريب", "خريجين"
+    ],
+    "company_find_talent": [
+        "talent", "candidate", "recruit", "search", "find", "pipeline",
+        "موهبة", "مواهب", "مرشحين", "تعيين", "تجنيد", "ابحث", "دور على"
+    ],
+}
+AUDIENCE_MAP = {
+    "student_registration": "student",
+    "student_internships": "student",
+    "student_mentors": "student",
+    "university_publish": "university",
+    "university_connect": "university",
+    "company_post_jobs": "company",
+    "company_find_talent": "company",
 }
+SYSTEM_PROMPT_BASE = (
+    "You are **EduNatives Assistant**, a helpful, friendly, and precise academic/career guide. "
+    "You serve three primary audiences: Students, Universities/Researchers, and Companies.\n\n"
+    "Goals by audience:\n"
+    "- Students: registration/account help; finding internships/scholarships; connecting with mentors or professors.\n"
+    "- Universities/Researchers: publish research or announce events; connect/collaborate with students.\n"
+    "- Companies: post jobs/internships/graduate roles; discover student talent.\n\n"
+    "General rules:\n"
+    "- Reply in the user's language (Arabic if the user writes Arabic; otherwise English).\n"
+    "- Be concise, step-by-step, and action-oriented (lists, bullets, checklists).\n"
+    "- If information is unavailable, state that clearly and suggest the next best step.\n"
+)
+CONTEXT_INJECT_TEMPLATE = (
+    "Context to guide your answer (do not repeat verbatim):\n"
+    "- Audience: {audience}\n- Intent: {intent}\n- Extra hints: Keep it practical for this audience."
+)
+# --- Core Functions ---
+def is_arabic(text: str) -> bool:
+    for ch in text:
+        code = ord(ch)
+        for a, b in ARABIC_RANGE:
+            if a <= code <= b:
+                return True
+    return False
+def route_intent(text: str, forced_audience: str | None = None) -> Route:
+    lang = "ar" if is_arabic(text) else "en"
+    match_label = None
+    text_l = text.lower()
+    for label, kws in KEYS.items():
+        for kw in kws:
+            if kw in text_l:
+                match_label = label
+                break
+        if match_label:
+            break
+    if match_label is None:
+        audience = forced_audience if forced_audience else "general"
+        return Route(audience=audience, intent="general", language=lang)
+    audience = AUDIENCE_MAP.get(match_label, "general")
+    if forced_audience and forced_audience in {"student", "university", "company"}:
+        audience = forced_audience
+    return Route(audience=audience, intent=match_label, language=lang)
+def call_llm(user_message: str, history: List[Dict[str, str]], route: Route) -> str:
+    messages: List[Dict[str, str]] = [
+        {"role": "system", "content": SYSTEM_PROMPT_BASE},
+        {"role": "system", "content": CONTEXT_INJECT_TEMPLATE.format(audience=route.audience, intent=route.intent)},
+    ]
+    MAX_TURNS = 3
+    trimmed_history = history[-MAX_TURNS*2:]
+    messages.extend(trimmed_history)
+    messages.append({"role": "user", "content": user_message})
+    try:
+        resp = client.chat.completions.create(
+            model=MODEL_NAME,
+            messages=messages,
+            temperature=0.6,
+            top_p=0.9,
+            max_tokens=800,
+        )
+        return resp.choices[0].message.content or ""
+    except Exception as e:
+        print(f"[ERROR] LLM call failed: {e}")
+        return ""
+def kb_fallback(route: Route) -> str:
+    if route.intent in KB:
+        block = KB[route.intent]
+        return block.get(route.language, block.get("en", ""))
+    if route.language == "ar":
+        return (
+            "عذراً، لم أجد معلومات كافية. يرجى توضيح طلبك أو السؤال عن أحد المواضيع التالية: "
+            "(تسجيل، تدريب/منح، مشرفين، نشر أبحاث، وظائف)."
+        )
+    else:
+        return (
+            "I couldn't find enough info. Please clarify your request or ask about "
+            "registration, internships/scholarships, mentors, publishing research, or jobs."
+        )
+with gr.Blocks(css="""
+.chatbot {height: 500px; overflow: auto;}
+.user-bubble {background-color: #DCF8C6; padding: 10px; border-radius: 12px; max-width: 75%; float: right; clear: both; margin: 5px; word-wrap: break-word;}
+.bot-bubble {background-color: #F1F0F0; padding: 10px; border-radius: 12px; max-width: 75%; float: left; clear: both; margin: 5px; word-wrap: break-word;}
+.chatbox-container {display: flex; gap: 8px; margin-top: 10px;}
+/* Basic styling for tables inside the bot bubble */
+.bot-bubble table {border-collapse: collapse; width: 100%;}
+.bot-bubble th, .bot-bubble td {border: 1px solid #ddd; padding: 8px; text-align: left;}
+.bot-bubble th {background-color: #e9e9e9;}
+""") as demo:
+    gr.Markdown("# 🤖 EduNatives Assistant\nYour smart, bilingual guide for academic and career opportunities.")
+    with gr.Row():
+        audience_dd = gr.Dropdown(
+            label="Audience",
+            choices=["Auto", "Student", "University-Research", "Company"],
+            value="Auto",
+            interactive=True,
+            info="Select your role. 'Auto' detects it from your message."
+        )
+        clear_btn = gr.Button("🧹 Clear Chat")
+    status = gr.Markdown("Status: Ready.")
+    chatbot_html = gr.HTML("<div class='chatbot' id='chatbot'></div>")
+    chat_history_state = gr.State([])
+    with gr.Row(elem_classes="chatbox-container"):
+        msg = gr.Textbox(
+            placeholder="اكتب سؤالك هنا... / Ask your question here...",
+            lines=2,
+            scale=4,
+            autofocus=True,
+        )
+        send_btn = gr.Button("➡️ Send", scale=1, variant="primary")
+    def format_chat_html(history: List[Dict[str, str]]) -> str:
+        html = "<div class='chatbot'>"
+        for message in history:
+            role = message["role"]
+            content = message["content"]
+            if role == "user":
+                # User messages are plain text, no need to render markdown
+                html += f"<div class='user-bubble'>{content}</div>"
+            elif role == "assistant":
+                # Convert bot's markdown response to HTML
+                html_content = markdown.markdown(content, extensions=['tables'])
+                html += f"<div class='bot-bubble'>{html_content}</div>"
+        html += "</div>"
+        return html
+    # The 'respond' and 'clear_chat' functions remain exactly the same
+    def respond(user_text: str, history: List[Dict[str, str]], audience_choice: str):
+        if not user_text.strip():
+            return "", format_chat_html(history), history, "Status: Please type a message."
+        forced = {
+            "Student": "student",
+            "University-Research": "university",
+            "Company": "company"
+        }.get(audience_choice)
+        route = route_intent(user_text, forced_audience=forced)
+        status_text = f"**Audience**: {route.audience} | **Intent**: {route.intent} | **Lang**: {route.language.upper()}"
+        answer = call_llm(user_text, history, route)
+        if not answer:
+            answer = kb_fallback(route)
+        history.append({"role": "user", "content": user_text})
+        history.append({"role": "assistant", "content": answer})
+        updated_html = format_chat_html(history)
+        return "", updated_html, history, status_text
+    def clear_chat():
+        return "", [], "Status: Ready."
+    send_btn.click(respond, [msg, chat_history_state, audience_dd], [msg, chatbot_html, chat_history_state, status], queue=True)
+    msg.submit(respond, [msg, chat_history_state, audience_dd], [msg, chatbot_html, chat_history_state, status], queue=True)
+    clear_btn.click(clear_chat, outputs=[chatbot_html, chat_history_state, status], queue=False)
+if __name__ == "__main__":
+    demo.launch()