Spaces:

MaryahGreene
/

minkdata_x_studentdata

Runtime error

App Files Files Community

MaryahGreene commited on Sep 24, 2025

Commit

098cc19

verified ·

1 Parent(s): 7e4ec0e

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -215

app.py CHANGED Viewed

@@ -1,230 +1,89 @@
 import gradio as gr
-import joblib, json, pandas as pd, os, shutil, math
-from collections import deque
 from huggingface_hub import hf_hub_download
-from transformers import pipeline
-# -------- Setup: folders --------
-os.makedirs("uploads/mink", exist_ok=True)
-os.makedirs("uploads/student", exist_ok=True)
-# -------- Load models & feature metadata --------
-# Mink (event/test/course model)
-mink_xgb = joblib.load(
-    hf_hub_download("MaryahGreene/Mink_School_Model", "xgb_mink_model.pkl")
-)
-with open(
-    hf_hub_download("MaryahGreene/Mink_School_Model", "xgb_mink_features.json")
-) as f:
     mink_features = json.load(f)
-# Student (structured student dataset model)
-student_xgb = joblib.load(
-    hf_hub_download("MaryahGreene/Student_Predict_Model", "xgb_student_model.pkl")
-)
-with open(
-    hf_hub_download("MaryahGreene/Student_Predict_Model", "xgb_student_features.json")
-) as f:
-    student_features = json.load(f)
-# -------- Lightweight LLM (CPU-friendly) --------
-# If flan-t5-base is heavy on free CPU, switch to "google/flan-t5-small".
-chat_llm = pipeline(
-    "text2text-generation",
-    model="google/flan-t5-small",
-)
-# -------- Memory (last 3 exchanges per bot) --------
-mink_memory = deque(maxlen=6)     # user, bot, user, bot, ...
-student_memory = deque(maxlen=6)
-# -------- Helpers --------
-def parse_and_save(file, save_dir):
-    """Persist upload + load into DataFrame."""
-    if file is None:
-        return None, None
-    save_path = os.path.join(save_dir, os.path.basename(file.name))
-    shutil.copy(file.name, save_path)
-    try:
-        if file.name.endswith(".csv"):
-            df = pd.read_csv(file.name)
-        elif file.name.endswith((".xls", ".xlsx")):
-            df = pd.read_excel(file.name)
-        else:
-            return None, save_path
-        return df, save_path
-    except Exception:
-        return None, save_path
-def robust_llm(prompt, max_new_tokens=180, temperature=0.9):
-    """Be forgiving with different pipeline return shapes."""
-    try:
-        outs = chat_llm(prompt, max_new_tokens=max_new_tokens, temperature=temperature)
-        if not outs:
-            return ""
-        first = outs[0]
-        if isinstance(first, dict):
-            text = first.get("generated_text", "")
-        else:
-            text = str(first)
-        # Trim anything before "RESPONSE:" to reduce echo
-        if "RESPONSE:" in text:
-            text = text.split("RESPONSE:", 1)[-1]
-        return text.strip()
-    except Exception as e:
-        return f"⚠️ Oops, LLM error: {e}"
-def generate_reply(bot_style, user_input, preds=None, memory=None):
-    base_prompt = (
-        f"You are {bot_style}. "
-        "Always reply in a warm, supportive, motivational tone with gentle emojis. "
-        "Give practical tips about grades, tests, study habits, absences, and participation. "
-        "Do not repeat the user's message. Keep it kind, concise, and actionable."
     )
-    convo = ""
-    if memory:
-        for i in range(0, len(memory), 2):
-            if i + 1 < len(memory):
-                convo += f"User: {memory[i]}\nBot: {memory[i+1]}\n"
-    if preds is not None:
-        prompt = (
-            f"INSTRUCTION: {base_prompt}\n\n"
-            f"Conversation so far (if any):\n{convo}\n"
-            f"User says: {user_input}\n"
-            f"Model predictions (summarized): {preds}\n\n"
-            f"RESPONSE:"
-        )
-    else:
-        prompt = (
-            f"INSTRUCTION: {base_prompt}\n\n"
-            f"Conversation so far (if any):\n{convo}\n"
-            f"User says: {user_input}\n\n"
-            f"RESPONSE:"
-        )
-    text = robust_llm(prompt)
-    if text:
-        if memory is not None:
-            memory.append(user_input)
-            memory.append(text)
-        return text
-    return "✨ I'm here for you! Keep going — you’ve got this 💕."
-def summarize_mink_predictions(preds):
-    """Friendly summary for classification-like outputs."""
-    if not preds:
-        return "No predictions produced."
-    # If it's nested (e.g., [[1], [2]]), flatten
-    flat = []
-    for p in preds:
-        if isinstance(p, (list, tuple)):
-            flat.extend(list(p))
-        else:
-            flat.append(p)
-    # Count occurrences
-    counts = {}
-    for p in flat:
-        counts[p] = counts.get(p, 0) + 1
-    total = len(flat)
-    top = sorted(counts.items(), key=lambda x: -x[1])[0]
-    label, freq = top
-    return f"Top achievement level = {label} (seen {freq}/{total} rows). Distribution: {counts}"
-def summarize_student_predictions(preds):
-    """Friendly summary for regression outputs."""
-    if not preds:
-        return "No predictions produced."
-    # Flatten
-    flat = []
-    for p in preds:
-        if isinstance(p, (list, tuple)):
-            flat.extend(list(p))
-        else:
-            flat.append(p)
-    # numeric safe stats
-    nums = [float(x) for x in flat if isinstance(x, (int, float)) or (isinstance(x, str) and x.replace('.', '', 1).isdigit())]
-    if not nums:
-        return "No numeric predictions could be summarized."
-    avg = sum(nums) / len(nums)
-    lo, hi = min(nums), max(nums)
-    def r(x):
-        try:
-            return round(x, 1)
-        except Exception:
-            return x
-    return f"Next test score ~ avg {r(avg)} (range {r(lo)}–{r(hi)} across {len(nums)} row(s))."
-# -------- Bot logic --------
-def mink_predict(text, file):
-    """
-    Rules:
-    - If a file is uploaded, we predict on it and summarize nicely, then wrap with LLM response.
-    - If no file, we DO NOT guess with zeros — we just chat (preds=None).
-    """
-    preds_summary = None
-    if file is not None:
-        df, save_path = parse_and_save(file, "uploads/mink")
-        if df is not None and not df.empty:
-            X = df.reindex(columns=mink_features, fill_value=0)
-            try:
-                preds = mink_xgb.predict(X).tolist()
-                preds_summary = summarize_mink_predictions(preds)
-            except Exception as e:
-                preds_summary = f"Prediction failed: {e}"
-        else:
-            preds_summary = "Could not read the file or it was empty."
-    # If no file: chat-only (no fake predictions)
-    return generate_reply("MinkBot 🌸, a pink-themed tutor", text or "", preds=preds_summary, memory=mink_memory)
-def student_predict(text, file):
-    """
-    Rules:
-    - If a file is uploaded, predict next test score for each row, summarize, then wrap with LLM.
-    - If no file, chat-only (no fake numbers).
-    """
-    preds_summary = None
-    if file is not None:
-        df, save_path = parse_and_save(file, "uploads/student")
-        if df is not None and not df.empty:
-            X = df.reindex(columns=student_features, fill_value=0)
-            try:
-                preds = student_xgb.predict(X).tolist()
-                preds_summary = summarize_student_predictions(preds)
-            except Exception as e:
-                preds_summary = f"Prediction failed: {e}"
-        else:
-            preds_summary = "Could not read the file or it was empty."
-    return generate_reply("StudentBot 🎀, a pastel-themed tutor", text or "", preds=preds_summary, memory=student_memory)
-# -------- UI --------
-with gr.Blocks(
-    css="""
-#left-panel  {background-color:#ffe6f2; border-radius: 20px; padding: 20px;}
-#right-panel {background-color:#f0f6ff; border-radius: 20px; padding: 20px;}
-.gr-button   {border-radius: 12px; font-weight:600;}
-h2 {margin-top:0}
-"""
-) as demo:
-    with gr.Row():
-        # Mink side
-        with gr.Column(elem_id="left-panel"):
-            gr.Markdown("<h2 style='color:#c4005a;'>🌸 <b>Mink Data Chatbot</b> 🌸</h2>")
-            mink_text = gr.Textbox(label="Chat with MinkBot", placeholder="Ask anything about grades/tests/absences…", lines=2)
-            mink_file = gr.File(label="Upload Student Portal Export (optional)", file_types=[".csv", ".xls", ".xlsx"])
-            mink_btn  = gr.Button("✨ Send / Predict")
-            mink_out  = gr.Textbox(label="MinkBot Says", interactive=False)
-            mink_btn.click(fn=mink_predict, inputs=[mink_text, mink_file], outputs=mink_out)
-        # Student side
-        with gr.Column(elem_id="right-panel"):
-            gr.Markdown("<h2 style='color:#2a2aa5;'>🎀 <b>Student Predictor</b> 🎀</h2>")
-            student_text = gr.Textbox(label="Chat with StudentBot", placeholder="Ask for tips or upload to get predictions…", lines=2)
-            student_file = gr.File(label="Upload Student Portal Export (optional)", file_types=[".csv", ".xls", ".xlsx"])
-            student_btn  = gr.Button("🌟 Send / Predict")
-            student_out  = gr.Textbox(label="StudentBot Says", interactive=False)
-            student_btn.click(fn=student_predict, inputs=[student_text, student_file], outputs=student_out)
 demo.launch()

 import gradio as gr
+import pandas as pd
+import xgboost as xgb
+import json
+import torch
+from transformers import AutoTokenizer, BertForSequenceClassification
 from huggingface_hub import hf_hub_download
+from sklearn.preprocessing import LabelEncoder
+# -------------------------------
+# Load Mink Models
+# -------------------------------
+print("Loading Mink Models...")
+mink_xgb = xgb.Booster()
+mink_xgb.load_model(hf_hub_download("MaryahGreene/Mink_School_Model", "xgb_mink_model.pkl"))
+with open(hf_hub_download("MaryahGreene/Mink_School_Model", "xgb_mink_features.json")) as f:
     mink_features = json.load(f)
+mink_tokenizer = AutoTokenizer.from_pretrained("MaryahGreene/Mink_School_Model")
+mink_bert = BertForSequenceClassification.from_pretrained("MaryahGreene/Mink_School_Model")
+# -------------------------------
+# Load Student Models
+# -------------------------------
+print("Loading Student Models...")
+student_xgb = xgb.Booster()
+student_xgb.load_model(hf_hub_download("MaryahGreene/Student_Predict_Model", "xgb_student_model.pkl"))
+with open(hf_hub_download("MaryahGreene/Student_Predict_Model", "xgb_student_features.json")) as f:
+    student_features = json.load(f)
+student_tokenizer = AutoTokenizer.from_pretrained("MaryahGreene/Student_Predict_Model")
+student_bert = BertForSequenceClassification.from_pretrained("MaryahGreene/Student_Predict_Model")
+# -------------------------------
+# Helper Functions
+# -------------------------------
+def predict_mink(user_dict, keywords=""):
+    df = pd.DataFrame([user_dict])[mink_features]
+    dmatrix = xgb.DMatrix(df, feature_names=mink_features)
+    xgb_pred = int(mink_xgb.predict(dmatrix).argmax())
+    bert_inputs = mink_tokenizer(keywords, return_tensors="pt", padding=True, truncation=True)
+    bert_outputs = mink_bert(**bert_inputs)
+    bert_pred = int(torch.argmax(bert_outputs.logits, dim=1))
+    # Interpret results
+    achievement_map = {0: "Below Basic", 1: "Basic", 2: "Proficient", 3: "Advanced"}
+    return f"📊 XGBoost Prediction: {achievement_map.get(xgb_pred, 'Unknown')} | 📝 BERT Subject: {bert_pred}"
+def predict_student(user_dict, keywords=""):
+    df = pd.DataFrame([user_dict])[student_features]
+    dmatrix = xgb.DMatrix(df, feature_names=student_features)
+    xgb_pred = float(student_xgb.predict(dmatrix)[0])
+    bert_inputs = student_tokenizer(keywords, return_tensors="pt", padding=True, truncation=True)
+    bert_outputs = student_bert(**bert_inputs)
+    bert_pred = int(torch.argmax(bert_outputs.logits, dim=1))
+    grade_map = lambda s: "A" if s >= 90 else "B" if s >= 80 else "C" if s >= 70 else "D" if s >= 60 else "F"
+    return f"📊 XGBoost Predicted Next Score: {xgb_pred:.1f} ({grade_map(xgb_pred)}) | 📝 BERT Subject: {bert_pred}"
+# -------------------------------
+# Chatbot Wrappers
+# -------------------------------
+def minkbot(message):
+    return predict_mink(
+        {"Score": 85, "CreditsEarned": 12, "GPAPoints": 3.2, "TotalAbsences": 4, "TotalUnexcused": 2},
+        keywords=message
     )
+def studentbot(message):
+    return predict_student(
+        {"age": 16, "grade level": 10, "gpa": 3.0, "homework average": 80, "quiz average": 75,
+         "previous test scores": 78, "class participation": 70, "attendance rate": 90,
+         "quality points": 3.0, "gender": 1, "class type": 1, "class level": 1, "subject category": 0},
+        keywords=message
+    )
+# -------------------------------
+# UI
+# -------------------------------
+with gr.Blocks() as demo:
+    gr.HTML("<h1 style='color:pink;text-align:center;'>MinkBot 🎀</h1>")
+    mink_chat = gr.ChatInterface(fn=minkbot, chatbot=gr.Chatbot())
+    gr.HTML("<h1 style='color:purple;text-align:center;'>StudentBot 📚</h1>")
+    student_chat = gr.ChatInterface(fn=studentbot, chatbot=gr.Chatbot())
 demo.launch()