MaryahGreene commited on
Commit
098cc19
Β·
verified Β·
1 Parent(s): 7e4ec0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -215
app.py CHANGED
@@ -1,230 +1,89 @@
1
  import gradio as gr
2
- import joblib, json, pandas as pd, os, shutil, math
3
- from collections import deque
 
 
 
4
  from huggingface_hub import hf_hub_download
5
- from transformers import pipeline
6
 
7
- # -------- Setup: folders --------
8
- os.makedirs("uploads/mink", exist_ok=True)
9
- os.makedirs("uploads/student", exist_ok=True)
 
10
 
11
- # -------- Load models & feature metadata --------
12
- # Mink (event/test/course model)
13
- mink_xgb = joblib.load(
14
- hf_hub_download("MaryahGreene/Mink_School_Model", "xgb_mink_model.pkl")
15
- )
16
- with open(
17
- hf_hub_download("MaryahGreene/Mink_School_Model", "xgb_mink_features.json")
18
- ) as f:
19
  mink_features = json.load(f)
20
 
21
- # Student (structured student dataset model)
22
- student_xgb = joblib.load(
23
- hf_hub_download("MaryahGreene/Student_Predict_Model", "xgb_student_model.pkl")
24
- )
25
- with open(
26
- hf_hub_download("MaryahGreene/Student_Predict_Model", "xgb_student_features.json")
27
- ) as f:
28
- student_features = json.load(f)
29
-
30
- # -------- Lightweight LLM (CPU-friendly) --------
31
- # If flan-t5-base is heavy on free CPU, switch to "google/flan-t5-small".
32
- chat_llm = pipeline(
33
- "text2text-generation",
34
- model="google/flan-t5-small",
35
- )
36
-
37
- # -------- Memory (last 3 exchanges per bot) --------
38
- mink_memory = deque(maxlen=6) # user, bot, user, bot, ...
39
- student_memory = deque(maxlen=6)
40
 
41
- # -------- Helpers --------
42
- def parse_and_save(file, save_dir):
43
- """Persist upload + load into DataFrame."""
44
- if file is None:
45
- return None, None
46
- save_path = os.path.join(save_dir, os.path.basename(file.name))
47
- shutil.copy(file.name, save_path)
48
- try:
49
- if file.name.endswith(".csv"):
50
- df = pd.read_csv(file.name)
51
- elif file.name.endswith((".xls", ".xlsx")):
52
- df = pd.read_excel(file.name)
53
- else:
54
- return None, save_path
55
- return df, save_path
56
- except Exception:
57
- return None, save_path
58
 
59
- def robust_llm(prompt, max_new_tokens=180, temperature=0.9):
60
- """Be forgiving with different pipeline return shapes."""
61
- try:
62
- outs = chat_llm(prompt, max_new_tokens=max_new_tokens, temperature=temperature)
63
- if not outs:
64
- return ""
65
- first = outs[0]
66
- if isinstance(first, dict):
67
- text = first.get("generated_text", "")
68
- else:
69
- text = str(first)
70
- # Trim anything before "RESPONSE:" to reduce echo
71
- if "RESPONSE:" in text:
72
- text = text.split("RESPONSE:", 1)[-1]
73
- return text.strip()
74
- except Exception as e:
75
- return f"⚠️ Oops, LLM error: {e}"
76
 
77
- def generate_reply(bot_style, user_input, preds=None, memory=None):
78
- base_prompt = (
79
- f"You are {bot_style}. "
80
- "Always reply in a warm, supportive, motivational tone with gentle emojis. "
81
- "Give practical tips about grades, tests, study habits, absences, and participation. "
82
- "Do not repeat the user's message. Keep it kind, concise, and actionable."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  )
84
 
85
- convo = ""
86
- if memory:
87
- for i in range(0, len(memory), 2):
88
- if i + 1 < len(memory):
89
- convo += f"User: {memory[i]}\nBot: {memory[i+1]}\n"
90
-
91
- if preds is not None:
92
- prompt = (
93
- f"INSTRUCTION: {base_prompt}\n\n"
94
- f"Conversation so far (if any):\n{convo}\n"
95
- f"User says: {user_input}\n"
96
- f"Model predictions (summarized): {preds}\n\n"
97
- f"RESPONSE:"
98
- )
99
- else:
100
- prompt = (
101
- f"INSTRUCTION: {base_prompt}\n\n"
102
- f"Conversation so far (if any):\n{convo}\n"
103
- f"User says: {user_input}\n\n"
104
- f"RESPONSE:"
105
- )
106
-
107
- text = robust_llm(prompt)
108
- if text:
109
- if memory is not None:
110
- memory.append(user_input)
111
- memory.append(text)
112
- return text
113
-
114
- return "✨ I'm here for you! Keep going β€” you’ve got this πŸ’•."
115
-
116
- def summarize_mink_predictions(preds):
117
- """Friendly summary for classification-like outputs."""
118
- if not preds:
119
- return "No predictions produced."
120
- # If it's nested (e.g., [[1], [2]]), flatten
121
- flat = []
122
- for p in preds:
123
- if isinstance(p, (list, tuple)):
124
- flat.extend(list(p))
125
- else:
126
- flat.append(p)
127
- # Count occurrences
128
- counts = {}
129
- for p in flat:
130
- counts[p] = counts.get(p, 0) + 1
131
- total = len(flat)
132
- top = sorted(counts.items(), key=lambda x: -x[1])[0]
133
- label, freq = top
134
- return f"Top achievement level = {label} (seen {freq}/{total} rows). Distribution: {counts}"
135
-
136
- def summarize_student_predictions(preds):
137
- """Friendly summary for regression outputs."""
138
- if not preds:
139
- return "No predictions produced."
140
- # Flatten
141
- flat = []
142
- for p in preds:
143
- if isinstance(p, (list, tuple)):
144
- flat.extend(list(p))
145
- else:
146
- flat.append(p)
147
- # numeric safe stats
148
- nums = [float(x) for x in flat if isinstance(x, (int, float)) or (isinstance(x, str) and x.replace('.', '', 1).isdigit())]
149
- if not nums:
150
- return "No numeric predictions could be summarized."
151
- avg = sum(nums) / len(nums)
152
- lo, hi = min(nums), max(nums)
153
- def r(x):
154
- try:
155
- return round(x, 1)
156
- except Exception:
157
- return x
158
- return f"Next test score ~ avg {r(avg)} (range {r(lo)}–{r(hi)} across {len(nums)} row(s))."
159
-
160
- # -------- Bot logic --------
161
- def mink_predict(text, file):
162
- """
163
- Rules:
164
- - If a file is uploaded, we predict on it and summarize nicely, then wrap with LLM response.
165
- - If no file, we DO NOT guess with zeros β€” we just chat (preds=None).
166
- """
167
- preds_summary = None
168
- if file is not None:
169
- df, save_path = parse_and_save(file, "uploads/mink")
170
- if df is not None and not df.empty:
171
- X = df.reindex(columns=mink_features, fill_value=0)
172
- try:
173
- preds = mink_xgb.predict(X).tolist()
174
- preds_summary = summarize_mink_predictions(preds)
175
- except Exception as e:
176
- preds_summary = f"Prediction failed: {e}"
177
- else:
178
- preds_summary = "Could not read the file or it was empty."
179
- # If no file: chat-only (no fake predictions)
180
- return generate_reply("MinkBot 🌸, a pink-themed tutor", text or "", preds=preds_summary, memory=mink_memory)
181
-
182
- def student_predict(text, file):
183
- """
184
- Rules:
185
- - If a file is uploaded, predict next test score for each row, summarize, then wrap with LLM.
186
- - If no file, chat-only (no fake numbers).
187
- """
188
- preds_summary = None
189
- if file is not None:
190
- df, save_path = parse_and_save(file, "uploads/student")
191
- if df is not None and not df.empty:
192
- X = df.reindex(columns=student_features, fill_value=0)
193
- try:
194
- preds = student_xgb.predict(X).tolist()
195
- preds_summary = summarize_student_predictions(preds)
196
- except Exception as e:
197
- preds_summary = f"Prediction failed: {e}"
198
- else:
199
- preds_summary = "Could not read the file or it was empty."
200
- return generate_reply("StudentBot πŸŽ€, a pastel-themed tutor", text or "", preds=preds_summary, memory=student_memory)
201
 
202
- # -------- UI --------
203
- with gr.Blocks(
204
- css="""
205
- #left-panel {background-color:#ffe6f2; border-radius: 20px; padding: 20px;}
206
- #right-panel {background-color:#f0f6ff; border-radius: 20px; padding: 20px;}
207
- .gr-button {border-radius: 12px; font-weight:600;}
208
- h2 {margin-top:0}
209
- """
210
- ) as demo:
211
- with gr.Row():
212
- # Mink side
213
- with gr.Column(elem_id="left-panel"):
214
- gr.Markdown("<h2 style='color:#c4005a;'>🌸 <b>Mink Data Chatbot</b> 🌸</h2>")
215
- mink_text = gr.Textbox(label="Chat with MinkBot", placeholder="Ask anything about grades/tests/absences…", lines=2)
216
- mink_file = gr.File(label="Upload Student Portal Export (optional)", file_types=[".csv", ".xls", ".xlsx"])
217
- mink_btn = gr.Button("✨ Send / Predict")
218
- mink_out = gr.Textbox(label="MinkBot Says", interactive=False)
219
- mink_btn.click(fn=mink_predict, inputs=[mink_text, mink_file], outputs=mink_out)
220
 
221
- # Student side
222
- with gr.Column(elem_id="right-panel"):
223
- gr.Markdown("<h2 style='color:#2a2aa5;'>πŸŽ€ <b>Student Predictor</b> πŸŽ€</h2>")
224
- student_text = gr.Textbox(label="Chat with StudentBot", placeholder="Ask for tips or upload to get predictions…", lines=2)
225
- student_file = gr.File(label="Upload Student Portal Export (optional)", file_types=[".csv", ".xls", ".xlsx"])
226
- student_btn = gr.Button("🌟 Send / Predict")
227
- student_out = gr.Textbox(label="StudentBot Says", interactive=False)
228
- student_btn.click(fn=student_predict, inputs=[student_text, student_file], outputs=student_out)
229
 
230
  demo.launch()
 
1
  import gradio as gr
2
+ import pandas as pd
3
+ import xgboost as xgb
4
+ import json
5
+ import torch
6
+ from transformers import AutoTokenizer, BertForSequenceClassification
7
  from huggingface_hub import hf_hub_download
8
+ from sklearn.preprocessing import LabelEncoder
9
 
10
+ # -------------------------------
11
+ # Load Mink Models
12
+ # -------------------------------
13
+ print("Loading Mink Models...")
14
 
15
+ mink_xgb = xgb.Booster()
16
+ mink_xgb.load_model(hf_hub_download("MaryahGreene/Mink_School_Model", "xgb_mink_model.pkl"))
17
+ with open(hf_hub_download("MaryahGreene/Mink_School_Model", "xgb_mink_features.json")) as f:
 
 
 
 
 
18
  mink_features = json.load(f)
19
 
20
+ mink_tokenizer = AutoTokenizer.from_pretrained("MaryahGreene/Mink_School_Model")
21
+ mink_bert = BertForSequenceClassification.from_pretrained("MaryahGreene/Mink_School_Model")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # -------------------------------
24
+ # Load Student Models
25
+ # -------------------------------
26
+ print("Loading Student Models...")
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ student_xgb = xgb.Booster()
29
+ student_xgb.load_model(hf_hub_download("MaryahGreene/Student_Predict_Model", "xgb_student_model.pkl"))
30
+ with open(hf_hub_download("MaryahGreene/Student_Predict_Model", "xgb_student_features.json")) as f:
31
+ student_features = json.load(f)
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ student_tokenizer = AutoTokenizer.from_pretrained("MaryahGreene/Student_Predict_Model")
34
+ student_bert = BertForSequenceClassification.from_pretrained("MaryahGreene/Student_Predict_Model")
35
+
36
+ # -------------------------------
37
+ # Helper Functions
38
+ # -------------------------------
39
+ def predict_mink(user_dict, keywords=""):
40
+ df = pd.DataFrame([user_dict])[mink_features]
41
+ dmatrix = xgb.DMatrix(df, feature_names=mink_features)
42
+ xgb_pred = int(mink_xgb.predict(dmatrix).argmax())
43
+ bert_inputs = mink_tokenizer(keywords, return_tensors="pt", padding=True, truncation=True)
44
+ bert_outputs = mink_bert(**bert_inputs)
45
+ bert_pred = int(torch.argmax(bert_outputs.logits, dim=1))
46
+
47
+ # Interpret results
48
+ achievement_map = {0: "Below Basic", 1: "Basic", 2: "Proficient", 3: "Advanced"}
49
+ return f"πŸ“Š XGBoost Prediction: {achievement_map.get(xgb_pred, 'Unknown')} | πŸ“ BERT Subject: {bert_pred}"
50
+
51
+ def predict_student(user_dict, keywords=""):
52
+ df = pd.DataFrame([user_dict])[student_features]
53
+ dmatrix = xgb.DMatrix(df, feature_names=student_features)
54
+ xgb_pred = float(student_xgb.predict(dmatrix)[0])
55
+ bert_inputs = student_tokenizer(keywords, return_tensors="pt", padding=True, truncation=True)
56
+ bert_outputs = student_bert(**bert_inputs)
57
+ bert_pred = int(torch.argmax(bert_outputs.logits, dim=1))
58
+
59
+ grade_map = lambda s: "A" if s >= 90 else "B" if s >= 80 else "C" if s >= 70 else "D" if s >= 60 else "F"
60
+ return f"πŸ“Š XGBoost Predicted Next Score: {xgb_pred:.1f} ({grade_map(xgb_pred)}) | πŸ“ BERT Subject: {bert_pred}"
61
+
62
+ # -------------------------------
63
+ # Chatbot Wrappers
64
+ # -------------------------------
65
+ def minkbot(message):
66
+ return predict_mink(
67
+ {"Score": 85, "CreditsEarned": 12, "GPAPoints": 3.2, "TotalAbsences": 4, "TotalUnexcused": 2},
68
+ keywords=message
69
  )
70
 
71
+ def studentbot(message):
72
+ return predict_student(
73
+ {"age": 16, "grade level": 10, "gpa": 3.0, "homework average": 80, "quiz average": 75,
74
+ "previous test scores": 78, "class participation": 70, "attendance rate": 90,
75
+ "quality points": 3.0, "gender": 1, "class type": 1, "class level": 1, "subject category": 0},
76
+ keywords=message
77
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
 
79
+ # -------------------------------
80
+ # UI
81
+ # -------------------------------
82
+ with gr.Blocks() as demo:
83
+ gr.HTML("<h1 style='color:pink;text-align:center;'>MinkBot πŸŽ€</h1>")
84
+ mink_chat = gr.ChatInterface(fn=minkbot, chatbot=gr.Chatbot())
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
+ gr.HTML("<h1 style='color:purple;text-align:center;'>StudentBot πŸ“š</h1>")
87
+ student_chat = gr.ChatInterface(fn=studentbot, chatbot=gr.Chatbot())
 
 
 
 
 
 
88
 
89
  demo.launch()