aedupuga commited on
Commit
41f8068
·
verified ·
1 Parent(s): 9023a9e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +633 -334
app.py CHANGED
@@ -1,353 +1,652 @@
1
- # --- TechSpark Courses Q&A: lightweight Gradio chat with a tiny LLM ---
2
- # Works in Google Colab. Hard-coded course data, no uploads required.
 
 
 
 
 
 
 
 
 
 
 
3
 
 
4
 
5
- import gradio as gr
6
- from rapidfuzz import process, fuzz
7
 
8
- # Optional tiny LLM (fast): FLAN-T5-small
9
- # If it fails to load (e.g., offline), we’ll just return the raw answer.
10
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
11
 
12
- def load_llm():
13
- try:
14
- tok = AutoTokenizer.from_pretrained("google/flan-t5-small")
15
- mdl = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-small")
16
- return tok, mdl
17
- except Exception:
18
- return None, None
19
-
20
- TOK, MDL = load_llm()
21
-
22
- def llm_paraphrase(text: str) -> str:
23
- if not (TOK and MDL):
24
- return text
25
- prompt = f"Paraphrase clearly and concisely:\n{text}"
26
- inputs = TOK(prompt, return_tensors="pt")
27
- out_ids = MDL.generate(**inputs, max_new_tokens=128)
28
- return TOK.decode(out_ids[0], skip_special_tokens=True)
29
-
30
- # ------------------------
31
- # HARD-CODED COURSE DATA
32
- # (Copied from your CSV: /mnt/data/TechSpark.xlsx - Courses.csv)
33
- # Columns: Name, Code, Description, Units, Length (Weeks),
34
- # Laser Cutting, Wood Working, Wood CNC, Metal Machining, Metal CNC,
35
- # 3D Printer, Welding, Electronics
36
- # Note: Some descriptions were truncated with "..." in the source CSV; left as-is.
37
- # ------------------------
38
-
39
- COURSES = [
40
- {
41
- "Name": "Modern Making",
42
- "Code": 24104,
43
- "Description": "This course teaches the fundamental skills needed to plan, devel...bricating with 3D printers, and physical computing with Arduino.",
44
- "Units": 3,
45
- "Length (Weeks)": 7,
46
- "Laser Cutting": 3,
47
- "Wood Working": 0,
48
- "Wood CNC": 0,
49
- "Metal Machining": 0,
50
- "Metal CNC": 0,
51
- "3D Printer": 3,
52
- "Welding": 0,
53
- "Electronics": 3
54
- },
55
- {
56
- "Name": "Laser Machine Training",
57
- "Code": 24105,
58
- "Description": "This is a course that allows students to work at their own pace....ed to use the laser cutting and engraving machines at TechSpark.",
59
- "Units": 0,
60
- "Length (Weeks)": 2,
61
- "Laser Cutting": 2,
62
- "Wood Working": 0,
63
- "Wood CNC": 0,
64
- "Metal Machining": 0,
65
- "Metal CNC": 0,
66
- "3D Printer": 0,
67
- "Welding": 0,
68
- "Electronics": 0
69
- },
70
- {
71
- "Name": "Intro to Manual Machining",
72
- "Code": 24200,
73
- "Description": "This course teaches safe operation of manual machining equipment...gn projects, research equipment, and extracurricular activities.",
74
- "Units": 1,
75
- "Length (Weeks)": 7,
76
- "Laser Cutting": 0,
77
- "Wood Working": 0,
78
- "Wood CNC": 0,
79
- "Metal Machining": 0,
80
- "Metal CNC": 0,
81
- "3D Printer": 0,
82
- "Welding": 0,
83
- "Electronics": 0
84
- },
85
- {
86
- "Name": "Project Fabrication and Assembly",
87
- "Code": 24201,
88
- "Description": "This course teaches the fundamental skills of fabrication and as...asses and is a portal (prerequisite) to other TechSpark courses.",
89
- "Units": 1,
90
- "Length (Weeks)": 7,
91
- "Laser Cutting": 2,
92
- "Wood Working": 1,
93
- "Wood CNC": 0,
94
- "Metal Machining": 0,
95
- "Metal CNC": 0,
96
- "3D Printer": 3,
97
- "Welding": 0,
98
- "Electronics": 3
99
- },
100
- {
101
- "Name": "Machine Shop Principles",
102
- "Code": 24203,
103
- "Description": "This course teaches the safe operation of manual machining equip...course is required to use the student machine shop at TechSpark.",
104
- "Units": 3,
105
- "Length (Weeks)": 7,
106
- "Laser Cutting": 0,
107
- "Wood Working": 0,
108
- "Wood CNC": 0,
109
- "Metal Machining": 3,
110
- "Metal CNC": 0,
111
- "3D Printer": 0,
112
- "Welding": 0,
113
- "Electronics": 0
114
- },
115
- {
116
- "Name": "Metal Jewelry",
117
- "Code": 24204,
118
- "Description": "This course teaches introductory-level metal jewelry fabrication...This course is required to use the hot metals room at TechSpark.",
119
- "Units": 2,
120
- "Length (Weeks)": 7,
121
- "Laser Cutting": 0,
122
- "Wood Working": 0,
123
- "Wood CNC": 0,
124
- "Metal Machining": 1,
125
- "Metal CNC": 1,
126
- "3D Printer": 0,
127
- "Welding": 2,
128
- "Electronics": 0
129
- },
130
- {
131
- "Name": "Welding",
132
- "Code": 24205,
133
- "Description": "This course teaches the safe operation of welding equipment thro...is course is required to use the welding equipment at TechSpark.",
134
- "Units": 2,
135
- "Length (Weeks)": 7,
136
- "Laser Cutting": 0,
137
- "Wood Working": 0,
138
- "Wood CNC": 0,
139
- "Metal Machining": 1,
140
- "Metal CNC": 1,
141
- "3D Printer": 0,
142
- "Welding": 3,
143
- "Electronics": 0
144
- },
145
- {
146
- "Name": "Wood Shop Principles",
147
- "Code": 24206,
148
- "Description": "This course teaches the safe operation of wood working equipment...is course is required to use the student wood shop at TechSpark.",
149
- "Units": 3,
150
- "Length (Weeks)": 7,
151
- "Laser Cutting": 0,
152
- "Wood Working": 3,
153
- "Wood CNC": 0,
154
- "Metal Machining": 0,
155
- "Metal CNC": 0,
156
- "3D Printer": 0,
157
- "Welding": 0,
158
- "Electronics": 0
159
- },
160
- {
161
- "Name": "Wood Shop CNC Router",
162
- "Code": 24207,
163
- "Description": "This course builds upon previous skills taught in TechSpark's wo...o use the CNC wood router in the student wood shop at TechSpark.",
164
- "Units": 3,
165
- "Length (Weeks)": 7,
166
- "Laser Cutting": 0,
167
- "Wood Working": 2,
168
- "Wood CNC": 3,
169
- "Metal Machining": 0,
170
- "Metal CNC": 1,
171
- "3D Printer": 0,
172
- "Welding": 0,
173
- "Electronics": 0
174
- },
175
- {
176
- "Name": "Machine Shop CNC Milling",
177
- "Code": 24300,
178
- "Description": "This course builds upon previous skills taught in TechSpark's ma...e CNC milling machines in the student machine shop at TechSpark.",
179
- "Units": 2,
180
- "Length (Weeks)": 7,
181
- "Laser Cutting": 0,
182
- "Wood Working": 0,
183
- "Wood CNC": 1,
184
- "Metal Machining": 0,
185
- "Metal CNC": 3,
186
- "3D Printer": 0,
187
- "Welding": 0,
188
- "Electronics": 0
189
- }
190
- ]
191
-
192
-
193
- # ------------------------
194
- # Simple retrieval helpers
195
- # ------------------------
196
-
197
- FIELD_ALIASES = {
198
- "units": "Units",
199
- "weeks": "Length (Weeks)",
200
- "length": "Length (Weeks)",
201
- "description": "Description",
202
- "laser": "Laser Cutting",
203
- "laser cutting": "Laser Cutting",
204
- "wood": "Wood Working",
205
- "woodworking": "Wood Working",
206
- "wood cnc": "Wood CNC",
207
- "metal": "Metal Machining",
208
- "metal machining": "Metal Machining",
209
- "metal cnc": "Metal CNC",
210
- "3d": "3D Printer",
211
- "3d printing": "3D Printer",
212
- "printer": "3D Printer",
213
- "weld": "Welding",
214
- "welding": "Welding",
215
- "electronics": "Electronics",
216
- "code": "Code",
217
- "name": "Name"
218
  }
219
 
220
- COURSE_NAMES = [c["Name"] for c in COURSES]
221
- COURSE_CODES = [str(c["Code"]) for c in COURSES]
222
-
223
- def find_course(query: str):
224
- # Try to match by name (fuzzy) or code (exact substring)
225
- best_name = process.extractOne(query, COURSE_NAMES, scorer=fuzz.WRatio)
226
- code_hits = [c for c in COURSES if str(c["Code"]) in query.replace(" ", "")]
227
- if best_name and best_name[1] >= 70:
228
- for c in COURSES:
229
- if c["Name"] == best_name[0]:
230
- return c
231
- if code_hits:
232
- return code_hits[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  return None
234
 
235
- def filter_by_skill(query: str):
236
- # Return courses that have >0 level for any skill mentioned
237
- hits = []
238
- for key, field in FIELD_ALIASES.items():
239
- if field in ["Units", "Length (Weeks)", "Description", "Code", "Name"]:
240
- continue
241
- if key in query.lower():
242
- for c in COURSES:
243
- try:
244
- if c.get(field, 0) and int(c.get(field, 0)) > 0:
245
- hits.append((field, c))
246
- except Exception:
247
- pass
248
- return hits
249
-
250
- def reply_for_course(c: dict, query: str) -> str:
251
- # If user asked a specific field, show that; else show a compact summary
252
- lower = query.lower()
253
- # Check if they asked for a specific attribute
254
- for key, field in FIELD_ALIASES.items():
255
- if key in lower and field in c:
256
- return f"{c['Name']} — {field}: {c[field]}"
257
- # Default compact card
258
- skills = ["Laser Cutting","Wood Working","Wood CNC","Metal Machining","Metal CNC","3D Printer","Welding","Electronics"]
259
- taught = [s for s in skills if int(c.get(s,0))>0]
260
- taught_str = ", ".join(taught) if taught else "General skills"
261
- return (
262
- f"{c['Name']} (Code {c['Code']})\n"
263
- f"Units: {c['Units']} | Length: {c['Length (Weeks)']} weeks\n"
264
- f"Focus: {taught_str}\n"
265
- f"Description: {c['Description']}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  )
 
 
 
 
267
 
268
- def list_all_courses():
269
- return "Courses:\n" + "\n".join([f"- {c['Name']} (Code {c['Code']})" for c in COURSES])
270
 
271
- def list_by_skill(hits):
272
- if not hits:
273
- return None
274
- # Group by skill field
275
- by = {}
276
- for field, c in hits:
277
- by.setdefault(field, []).append(c)
278
- lines = []
279
- for field, cs in by.items():
280
- lines.append(f"{field} courses:")
281
- for c in cs:
282
- lines.append(f"- {c['Name']} (Code {c['Code']})")
283
- return "\n".join(lines)
284
-
285
- # ------------------------
286
- # Chat handler
287
- # ------------------------
288
-
289
- HELP_TEXT = (
290
- "You can ask:\n"
291
- "• “List all courses”\n"
292
- "• “What are the units for Modern Making?”\n"
293
- "• “Which classes teach welding?”\n"
294
- "• “What is Code 24205?” or “Tell me about Intro to CNC Machining”\n"
295
- "• “Which courses cover laser cutting or 3D printing?”\n"
296
- )
297
 
298
- def answer_fn(message, history):
299
- q = (message or "").strip()
300
- if not q:
301
- return HELP_TEXT
302
-
303
- # Quick intents
304
- if "list" in q.lower() and "course" in q.lower():
305
- ans = list_all_courses()
306
- return llm_paraphrase(ans)
307
-
308
- # Skill-filter intent
309
- skill_hits = filter_by_skill(q)
310
- skill_resp = list_by_skill(skill_hits)
311
- if skill_resp:
312
- return llm_paraphrase(skill_resp)
313
-
314
- # Single-course intent
315
- c = find_course(q)
316
- if c:
317
- ans = reply_for_course(c, q)
318
- return llm_paraphrase(ans)
319
-
320
- # Fallback: nearest name suggestion
321
- best = process.extractOne(q, COURSE_NAMES, scorer=fuzz.WRatio)
322
- if best and best[1] >= 55:
323
- suggestion = best[0]
324
- return llm_paraphrase(
325
- f"I couldn't find an exact match. Did you mean “{suggestion}”? "
326
- f"Try asking: ‘Tell me about {suggestion}’ or ‘What are the units for {suggestion}?’\n\n{HELP_TEXT}"
327
  )
 
328
 
329
- # Final fallback
330
- return llm_paraphrase(
331
- "I couldn't match that to a TechSpark course. "
332
- "Try mentioning a course name or code, or a skill like welding, laser cutting, or 3D printing.\n\n" + HELP_TEXT
333
  )
 
 
 
 
 
 
 
 
 
 
334
 
335
- # ------------------------
336
- # Gradio UI
337
- # ------------------------
338
-
339
- demo = gr.ChatInterface(
340
- answer_fn,
341
- title="TechSpark Courses Assistant",
342
- description="Ask about TechSpark courses by name, code, or skill. (Tiny LLM paraphrase enabled for clarity.)",
343
- examples=[
344
- "List all courses",
345
- "What are the units for Modern Making?",
346
- "Which courses teach welding?",
347
- "Tell me about Intro to CNC Machining",
348
- "What is Code 24301?",
349
- "Which courses include laser cutting?"
 
 
 
 
 
 
 
 
 
 
 
 
350
  ],
 
 
 
 
 
 
 
 
 
 
 
 
351
  )
352
 
353
- demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import smolagents
3
+ import pandas as pd
4
+ import numpy as np
5
+ from huggingface_hub import login, HfApi
6
+ from datasets import Dataset, DatasetDict, load_dataset
7
+ import difflib
8
+ import openai
9
+ from typing import List
10
+ import streamlit as st
11
+ from streamlit_chat import message
12
+ from streamlit_extras.colored_header import colored_header
13
+ from streamlit_extras.add_vertical_space import add_vertical_space
14
 
15
+ # Setup
16
 
17
+ login(token_public)
 
18
 
19
+ REPO_ID_TECHSPARK_STAFF = "aslan-ng/CMU_TechSpark_Staff"
20
+ REPO_ID_TECHSPARK_COURSES = "aslan-ng/CMU_TechSpark_Courses"
21
+ REPO_ID_TECHSPARK_TOOLS = "aslan-ng/CMU_TechSpark_Tools"
22
 
23
+ # LLM model initialization
24
+ model = smolagents.OpenAIServerModel(
25
+ model_id="gpt-4.1-mini", # or another fast model
26
+ api_key=OPENAI_API,
27
+ # optionally: base_url="https://api.groq.com/openai/v1" for Groq, etc.
28
+ )
29
+
30
+ # Numeric profile of skills for each entry
31
+ NUMERIC_PROFILE = ["Laser Cutting", "Wood Working", "Wood CNC", "Metal Machining", "Metal CNC", "3D Printer", "Welding", "Electronics"]
32
+
33
+ # Map common task keywords to candidate machine names.
34
+ KEYWORD_TO_MACHINES = {
35
+ "mill": ["Mill"],
36
+ "shear": ["Shear"],
37
+ "vertical band saw": ["Vertical Band Saw"],
38
+ "horizontal band saw": ["Horizontal Band Saw"],
39
+ "band saw": ["Band Saw"],
40
+ "drill press": ["Drill press", "Drill Press", "Mini Drill Press"],
41
+ "lathe": ["Lathe"],
42
+ "cnc": ["Metal CNC", "Wood CNC"],
43
+ "weld": ["MIG Welder", "TIG Welder"],
44
+ "plasma": ["Hand-held Plasma Cutter"],
45
+ "waterjet": ["Waterjet"],
46
+ "torch": ["Acetylene Torch"],
47
+ "furnace": ["Furnace"],
48
+ "kiln": ["Kiln"],
49
+ "cast": ["Centrifugal Caster", "Vacuum Caster", "Vacuum Former", "Pressure Pots", "Vacuum Chambers"],
50
+ "tumble": ["Rotary Tumbler"],
51
+ "buff": ["Buffing Wheel"],
52
+ "solder": ["Soldering stations"],
53
+ "electronics": ["Soldering stations", "DC power supplies", "Multimeters", "Oscilloscopes"],
54
+ "jig saw": ["Jig Saws"],
55
+ "jigsaw": ["Jig Saws"],
56
+ "router": ["Table Router"],
57
+ "panel saw": ["Panel Saw"],
58
+ "table saw": ["Table Saw"],
59
+ "miter": ["Miter Saw"],
60
+ "sand": ["Belt/Disc/Spindle Sanders"],
61
+ "3d print": ["3D Printers"],
62
+ "3d printer": ["3D Printers"],
63
+ "printer": ["3D Printers"],
64
+ "laser": ["Laser Cutters"],
65
+ "paint": ["Paint"],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  }
67
 
68
+ MACHINE_NOTES = {
69
+ "Laser Cutters": "2D cutting/engraving of sheet materials (e.g., acrylic, plywood, cardboard).",
70
+ "3D Printers": "Additive manufacturing of small plastic parts.",
71
+ "MIG Welder": "Fast welding of steel/aluminium with filler wire.",
72
+ "TIG Welder": "Precise welding of thin metals.",
73
+ "Waterjet": "High-precision cutting of almost any material with water/abrasive.",
74
+ "Hand-held Plasma Cutter": "Rough cutting of steel plate.",
75
+ "Centrifugal Caster": "Casting small metal components using centrifugal force.",
76
+ "Vacuum Caster": "Degassing and casting for small parts using vacuum.",
77
+ "Vacuum Former": "Forming heated plastic sheets over molds.",
78
+ "Pressure Pots": "Pressure-curing of cast parts to remove bubbles.",
79
+ "Vacuum Chambers": "Degassing silicone and resins before casting.",
80
+ "Soldering stations": "Assembly and rework of PCBs and wired electronics.",
81
+ "Table Saw": "Straight cuts in sheet/board stock (wood).",
82
+ "Panel Saw": "Breaking down large sheet goods (plywood, MDF).",
83
+ "Band Saw": "Curved cuts in wood.",
84
+ "Belt/Disc/Spindle Sanders": "Shaping and smoothing wood components.",
85
+ "Paint": "Finishing parts with spray paint in a ventilated booth.",
86
+ }
87
+
88
+ def load_data_from_sheet():
89
+ """
90
+ Load the data from Google Sheets.
91
+ """
92
+ from google.colab import auth
93
+ from google.auth import default
94
+ import gspread
95
+
96
+ auth.authenticate_user()
97
+
98
+ SHEET_SCHEMA = [
99
+ {"Staff": ["Name", "Role", "Overview of Responsibilities", *NUMERIC_PROFILE]},
100
+ {"Courses": ["Name", "Code", "Description", "Units", "Length (Weeks)", *NUMERIC_PROFILE]},
101
+ {"Tools": ["Name", "Location", "Accessible by Students", "Required Course"]},
102
+ ]
103
+ SHEET_NAMES = [list(d.keys())[0] for d in SHEET_SCHEMA]
104
+ #print(SHEET_NAMES)
105
+ def get_sheet_columns(sheet_name):
106
+ for entry in SHEET_SCHEMA:
107
+ if sheet_name in entry:
108
+ return entry[sheet_name]
109
+ return None
110
+ #print(get_sheet_columns(SHEET_NAMES[0]))
111
+
112
+ sh = gspread.authorize(default()[0]).open_by_key(SHEET_ID_TECHSPARK)
113
+
114
+ dfs = {}
115
+ for sheet_name in SHEET_NAMES:
116
+ ws = sh.worksheet(sheet_name) # tab with that name
117
+ records = ws.get_all_records() # list of dicts (rows)
118
+ df = pd.DataFrame(records)
119
+
120
+ # Ensure correct column order (and drop extras if any)
121
+ cols = get_sheet_columns(sheet_name)
122
+ if cols is not None:
123
+ df = df.reindex(columns=cols)
124
+
125
+ dfs[sheet_name] = df
126
+
127
+ # 5. Return them in a fixed order
128
+ staff_df = dfs["Staff"]
129
+ courses_df = dfs["Courses"]
130
+ tools_df = dfs["Tools"]
131
+
132
+ # Clean "Accessible by Students" if it comes as strings "TRUE"/"FALSE"
133
+ if tools_df["Accessible by Students"].dtype == object:
134
+ tools_df["Accessible by Students"] = tools_df["Accessible by Students"].map(
135
+ {"TRUE": True, "FALSE": False}
136
+ )
137
+
138
+ # Clean "Required Course": make it string with missing values
139
+ tools_df["Required Course"] = (
140
+ tools_df["Required Course"]
141
+ .replace("", pd.NA) # empty ➔ missing
142
+ .astype("string") # keep as string type
143
+ )
144
+
145
+ return staff_df, courses_df, tools_df
146
+
147
+ def save_data_to_huggingface(staff_df, courses_df, tools_df):
148
+ """
149
+ Save data to HuggingFace.
150
+ """
151
+ hf_ds_staff = Dataset.from_pandas(staff_df, preserve_index=False)
152
+ hf_ds_staff.push_to_hub(REPO_ID_TECHSPARK_STAFF)
153
+ hf_ds_courses = Dataset.from_pandas(courses_df, preserve_index=False)
154
+ hf_ds_courses.push_to_hub(REPO_ID_TECHSPARK_COURSES)
155
+ hf_ds_tools = Dataset.from_pandas(tools_df, preserve_index=False)
156
+ hf_ds_tools.push_to_hub(REPO_ID_TECHSPARK_TOOLS)
157
+
158
+ def refresh_hugginface_repo():
159
+ """
160
+ Loads data from Google Sheets and pushes it to HuggingFace.
161
+ """
162
+ staff_df, courses_df, tools_df = load_data_from_sheet()
163
+ save_data_to_huggingface(staff_df, courses_df, tools_df)
164
+
165
+ def load_data_from_huggingface():
166
+ """
167
+ Loads data from HuggingFace.
168
+ """
169
+ # Staff (People)
170
+ ds_staff = load_dataset(REPO_ID_TECHSPARK_STAFF)
171
+ staff_df = ds_staff["train"].to_pandas()
172
+
173
+ # Courses
174
+ ds_courses = load_dataset(REPO_ID_TECHSPARK_COURSES)
175
+ courses_df = ds_courses["train"].to_pandas()
176
+
177
+ # Tools
178
+ ds_tools = load_dataset(REPO_ID_TECHSPARK_TOOLS)
179
+ tools_df = ds_tools["train"].to_pandas()
180
+ return staff_df, courses_df, tools_df
181
+
182
+ def vector_1st_distance(x: list, y: list):
183
+ """
184
+ Calculate the 1st distance between two vectors.
185
+ """
186
+ if len(x) != len(y):
187
+ raise ValueError
188
+ return sum(np.array(x) - np.array(y)) / len(x)
189
+
190
+ def skill_score(
191
+ skill_profile: dict, # The skill profile that we want to analyze
192
+ laser_cutting: float = None,
193
+ wood_working: float = None,
194
+ wood_cnc: float = None,
195
+ metal_machining: float = None,
196
+ metal_cnc: float = None,
197
+ three_d_printer: float = None,
198
+ welding: float = None,
199
+ electronics: float = None,
200
+ ):
201
+ """
202
+ Calculate the skill score for a given skill profile. Useful for both staff and courses skill profiles.
203
+ """
204
+ x = []
205
+ y = []
206
+ if laser_cutting is not None:
207
+ x.append(skill_profile['Laser Cutting'])
208
+ y.append(laser_cutting)
209
+ if wood_working is not None:
210
+ x.append(skill_profile['Wood Working'])
211
+ y.append(wood_working)
212
+ if wood_cnc is not None:
213
+ x.append(skill_profile['Wood CNC'])
214
+ y.append(wood_cnc)
215
+ if metal_machining is not None:
216
+ x.append(skill_profile['Metal Machining'])
217
+ y.append(metal_machining)
218
+ if metal_cnc is not None:
219
+ x.append(skill_profile['Metal CNC'])
220
+ y.append(metal_cnc)
221
+ if three_d_printer is not None:
222
+ x.append(skill_profile['3D Printer'])
223
+ y.append(three_d_printer)
224
+ if welding is not None:
225
+ x.append(skill_profile['Welding'])
226
+ y.append(welding)
227
+ if electronics is not None:
228
+ x.append(skill_profile['Electronics'])
229
+ y.append(electronics)
230
+ return vector_1st_distance(x, y)
231
+
232
+ def all_staff():
233
+ """
234
+ Return a list of all staff.
235
+ """
236
+ return staff_df["Name"].dropna().tolist()
237
+
238
+ def get_staff_full_profile(name: str):
239
+ """
240
+ Get the staff full profile (including description and skill).
241
+ """
242
+ matches = difflib.get_close_matches(name, all_staff(), n=1, cutoff=0.2)
243
+ name = matches[0] if matches else None
244
+ if name:
245
+ full_profile = staff_df[staff_df["Name"] == name].iloc[0].to_dict()
246
+ return full_profile
247
+ return None
248
+
249
+ def get_staff_skills_profile(name: str):
250
+ """
251
+ Get the staff skills profile given its name.
252
+ """
253
+ full_profile = get_staff_full_profile(name)
254
+ return {k: full_profile[k] for k in NUMERIC_PROFILE}
255
+
256
+ def get_staff_profile(name: str):
257
+ """
258
+ Get the staff profile without skill part.
259
+ """
260
+ full_profile = get_staff_full_profile(name)
261
+ return {k: v for k, v in full_profile.items() if k not in NUMERIC_PROFILE}
262
+
263
+ def search_staff_by_skills(
264
+ laser_cutting: float = None,
265
+ wood_working: float = None,
266
+ wood_cnc: float = None,
267
+ metal_machining: float = None,
268
+ metal_cnc: float = None,
269
+ three_d_printer: float = None,
270
+ welding: float = None,
271
+ electronics: float = None,
272
+ ):
273
+ names = all_staff()
274
+ best_name = None
275
+ best_score = float("inf")
276
+ for name in names:
277
+ skills_profile = get_staff_skills_profile(name)
278
+ score = skill_score(
279
+ skill_profile = skills_profile,
280
+ laser_cutting = laser_cutting,
281
+ wood_working = wood_working,
282
+ wood_cnc = wood_cnc,
283
+ metal_machining = metal_machining,
284
+ metal_cnc = metal_cnc,
285
+ three_d_printer = three_d_printer,
286
+ welding = welding,
287
+ electronics = electronics,
288
+ )
289
+ # keep only positive scores
290
+ if score is not None and score > 0 and score < best_score:
291
+ best_score = score
292
+ best_name = name
293
+ return best_name
294
+
295
+ def all_courses_code():
296
+ """
297
+ Return a list of all course codes.
298
+ """
299
+ return courses_df["Code"].dropna().astype(str).tolist()
300
+
301
+ def get_course_info(code: str):
302
+ """
303
+ Get the course information given its code.
304
+ """
305
+ # Ensure the input code is a string for comparison
306
+ code_str = str(code)
307
+ matches = difflib.get_close_matches(code_str, all_courses_code(), n=1, cutoff=0.2)
308
+ code = matches[0] if matches else None
309
+ if code:
310
+ full_profile = courses_df[courses_df["Code"].astype(str) == code].iloc[0].to_dict()
311
+ return full_profile
312
+ return None
313
+
314
+ def all_tools():
315
+ """
316
+ Return a list of all tool names.
317
+ """
318
+ return tools_df["Name"].dropna().tolist()
319
+
320
+ def get_tool_full_profile(name: str):
321
+ """
322
+ Get the tool's full profile.
323
+ """
324
+ # Increased cutoff to make matching more strict, avoiding false positives for non-existent machines
325
+ matches = difflib.get_close_matches(name, all_tools(), n=1, cutoff=0.6)
326
+ name = matches[0] if matches else None
327
+ if name:
328
+ full_profile = tools_df[tools_df["Name"] == name].iloc[0].to_dict()
329
+ return full_profile
330
  return None
331
 
332
+ def find_candidates(task: str):
333
+ """Return a DataFrame of candidate machines for the given task description."""
334
+ global tools_df
335
+ df = tools_df
336
+ task_lc = task.lower()
337
+ if df is None or df.empty:
338
+ return df.iloc[0:0] # empty with same columns
339
+
340
+ # 1) Matches from keyword mapping
341
+ names_from_keywords = set()
342
+ for kw, machine_names in KEYWORD_TO_MACHINES.items():
343
+ if kw in task_lc:
344
+ names_from_keywords.update(machine_names)
345
+
346
+ # 2) Direct substring matches on machine names
347
+ names_from_substring = set()
348
+ for name in df["Name"]:
349
+ if name.lower() in task_lc:
350
+ names_from_substring.add(name)
351
+
352
+ all_names = sorted(names_from_keywords.union(names_from_substring))
353
+
354
+ # 3) Fallback: token-based substring search
355
+ if not all_names:
356
+ # Add 'name_lower' column if it doesn't exist for substring search
357
+ if 'name_lower' not in df.columns:
358
+ df['name_lower'] = df['Name'].str.lower()
359
+ tokens = [t for t in task_lc.replace(",", " ").split() if len(t) > 3]
360
+ for token in tokens:
361
+ subset = df[df["name_lower"].str.contains(token)]
362
+ if not subset.empty:
363
+ all_names.extend(subset["Name"].tolist())
364
+ all_names = sorted(set(all_names))
365
+
366
+ return df[df["Name"].isin(all_names)]
367
+
368
+ def make_location_plan(task: str):
369
+ """Print a short, human-readable location plan for a TechSpark task."""
370
+ global tools_df
371
+ df = tools_df
372
+ if df is None:
373
+ print("❌ Machine table not loaded yet.")
374
+ return
375
+
376
+ candidates = find_candidates(task)
377
+ print(f"Task: {task}\n")
378
+
379
+ if candidates.empty:
380
+ print("I couldn't find a clear machine match in the current table.")
381
+ print("Try rephrasing with the machine name you expect (e.g., 'laser cutter', '3D printer', 'MIG welder').")
382
+ return
383
+
384
+ print("Suggested machines and locations:\n")
385
+ for _, row in candidates.iterrows():
386
+ name = row["Name"]
387
+ loc = row["Location"]
388
+ print(f"- **{name}** → **{loc}**")
389
+ if name in MACHINE_NOTES:
390
+ print(f" - Why here: {MACHINE_NOTES[name]}")
391
+ print()
392
+
393
+ locations = ", ".join(sorted(candidates["Location"].unique()))
394
+ print("Next steps inside TechSpark:")
395
+ print(f"1. Walk to: {locations}.")
396
+ print("2. Check posted safety/training requirements for the machine you choose.")
397
+ print("3. If you're unsure which specific machine is best, ask the staff in that area.")
398
+ print("4. Imagine how this module could plug into a larger agent that also plans the full fabrication process and checks training.")
399
+
400
+ # Define the agent with all of these tools.
401
+
402
+ class SearchStaffInformationTool(smolagents.tools.Tool):
403
+ name = "search_staff_information"
404
+ description = (
405
+ "Search the staff information by its name."
406
  )
407
+ inputs = {
408
+ "name": {"type": "string", "description": "Name of the staff member."},
409
+ }
410
+ output_type = "object"
411
 
412
+ def forward(self, name: str) -> dict:
413
+ return get_staff_profile(name)
414
 
415
+ class FindSuitableStaffTool(smolagents.tools.Tool):
416
+ name = "find_suitable_staff"
417
+ description = (
418
+ "Find the most suitable staff member for the task based on required skills."
419
+ )
420
+ inputs = {
421
+ "laser_cutting": {"type": "number", "description": "Laser cutting skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
422
+ "wood_working": {"type": "number", "description": "Wood working skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
423
+ "wood_cnc": {"type": "number", "description": "Wood CNC skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
424
+ "metal_machining": {"type": "number", "description": "Metal machining skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
425
+ "metal_cnc": {"type": "number", "description": "Metal CNC skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
426
+ "three_d_printer": {"type": "number", "description": "3D printer skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
427
+ "welding": {"type": "number", "description": "Welding skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
428
+ "electronics": {"type": "number", "description": "Electronics skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
429
+ }
430
+ output_type = "object"
 
 
 
 
 
 
 
 
 
 
431
 
432
+ def forward(self,
433
+ laser_cutting: float = None,
434
+ wood_working: float = None,
435
+ wood_cnc: float = None,
436
+ metal_machining: float = None,
437
+ metal_cnc: float = None,
438
+ three_d_printer: float = None,
439
+ welding: float = None,
440
+ electronics: float = None,
441
+ ) -> dict:
442
+ name = search_staff_by_skills(
443
+ laser_cutting = laser_cutting,
444
+ wood_working = wood_working,
445
+ wood_cnc = wood_cnc,
446
+ metal_machining = metal_machining,
447
+ metal_cnc = metal_cnc,
448
+ three_d_printer = three_d_printer,
449
+ welding = welding,
450
+ electronics = electronics,
 
 
 
 
 
 
 
 
 
 
451
  )
452
+ return get_staff_profile(name)
453
 
454
+ class MachineTrainingTool(smolagents.tools.Tool):
455
+ name = "get_machine_training_info"
456
+ description = (
457
+ "Retrieves training information for a specific machine and checks its accessibility. The `machine_name` argument should exactly match the machine's name as listed in the system."
458
  )
459
+ inputs = {
460
+ "machine_name": {"type": "string", "description": "Name of the machine for which to retrieve training information"},
461
+ }
462
+ output_type = "string"
463
+
464
+ def forward(self, machine_name: str) -> str:
465
+ tool_info = get_tool_full_profile(machine_name)
466
+ if tool_info:
467
+ accessible = tool_info.get("Accessible by Students")
468
+ required_course_code = tool_info.get("Required Course")
469
 
470
+ if accessible is False:
471
+ # Specific message for not accessible machines, as requested
472
+ return f"The {machine_name} is NOT accessible by students. Please ask staff for assistance."
473
+ else: # accessible is True
474
+ response_parts = [f"The {machine_name} is accessible by students."]
475
+ if pd.isna(required_course_code):
476
+ response_parts.append(f"No specific course is required for the {machine_name}.")
477
+ else:
478
+ course_details = get_course_info(required_course_code)
479
+ if course_details:
480
+ course_name = course_details.get('Name', 'Unknown Course')
481
+ response_parts.append(f"The required training for {machine_name} is '{course_name}' (Course Code: {required_course_code}).")
482
+ else:
483
+ response_parts.append(f"A course with code '{required_course_code}' is required for {machine_name}, but its details are not found.")
484
+ return " ".join(response_parts)
485
+ else:
486
+ # Message for non-existent machine, as requested
487
+ return f"Machine '{machine_name}' does not exist."
488
+
489
+ #refresh_hugginface_repo() # Only run to refresh the repo
490
+ staff_df, courses_df, tools_df = load_data_from_huggingface()
491
+
492
+ agent = smolagents.CodeAgent(
493
+ tools=[
494
+ SearchStaffInformationTool(),
495
+ FindSuitableStaffTool(),
496
+ MachineTrainingTool(), # MachineTrainingTool is now defined elsewhere
497
  ],
498
+
499
+ instructions=(
500
+ "You are a helpful assistant for the CMU TechSpark facility. Your purpose is to assist users with inquiries related to staff, courses, and tools. "
501
+ "Use the available tools to find information about staff members, suggest suitable staff based on skills, or provide training information for machines. "
502
+ "Respond concisely and directly with the information requested by the user, utilizing the output from the tools."
503
+ ),
504
+
505
+ model=model,
506
+ #name="TechSpark Agent",
507
+ add_base_tools=False,
508
+ max_steps=12,
509
+ verbosity_level=2, # show steps in logs for class demo
510
  )
511
 
512
+ # --- Page config ---
513
+ st.set_page_config(page_title="TechSpark AI Assistant", layout="wide")
514
+
515
+ # --- Sidebar ---
516
+ with st.sidebar:
517
+ st.markdown("<h1 style='text-align:center; font-size:2.5em;'>❇️ TechSpark AI Assistant</h1>", unsafe_allow_html=True)
518
+ st.markdown('''
519
+ ## About
520
+ This app is a tech-powered AI chatbot built using:
521
+ - Streamlit
522
+ - smolagents for AI responses
523
+
524
+ ️ No API key required!
525
+ ''')
526
+ add_vertical_space(3)
527
+
528
+
529
+
530
+
531
+ # --- CSS FIXES: SIDEBAR WIDER + CHAT TEXT MUCH BIGGER ---
532
+ st.markdown("""
533
+ <style>
534
+
535
+ /* --- MAIN CONTAINER FULL WIDTH --- */
536
+ [data-testid="stAppViewContainer"] {
537
+ max-width: 100% !important;
538
+ padding-left: 10px !important;
539
+ padding-right: 40px !important;
540
+ }
541
+
542
+ /* --- SIDEBAR WIDTH + SMALLER SIDEBAR TEXT --- */
543
+ section[data-testid="stSidebar"] {
544
+ width: 1.6 vw !important;
545
+ }
546
+
547
+ section[data-testid="stSidebar"] * {
548
+ font-size: .8 vw !important;
549
+ }
550
+
551
+ /* --- TITLES (untouched) --- */
552
+
553
+ /* --- MASSIVE CHAT BUBBLES --- */
554
+ div[data-testid="chat-message"] {
555
+ font-size: 5 vw !important; /* HUGE readable text */
556
+ line-height: 2!important;
557
+ padding: 2vw 2.5vw !important; /* large padding */
558
+ border-radius: 2vw !important;
559
+ max-width: 70% !important;
560
+ }
561
+
562
+ /* USER MESSAGE */
563
+ div[data-testid="chat-message-user"] {
564
+ margin-left: auto !important;
565
+ background: #00796b !important;
566
+ color: white !important;
567
+ }
568
+
569
+ /* ASSISTANT MESSAGE */
570
+ div[data-testid="chat-message-assistant"] {
571
+ margin-right: auto !important;
572
+ background: #222 !important;
573
+ color: white !important;
574
+ }
575
+
576
+ /* --- INPUT BOX --- */
577
+ .stTextInput textarea {
578
+ font-size: 2 vw !important;
579
+ padding: 1.4vw !important;
580
+ min-height: 8vh !important;
581
+ border-radius: 1.5vw !important;
582
+ }
583
+
584
+ /* --- SEND BUTTON --- */
585
+ .stButton > button {
586
+ font-size: 2 vw !important;
587
+ padding: 1vw 2vw !important;
588
+ border-radius: 1.5vw !important;
589
+ }
590
+
591
+
592
+ #--SCALE---
593
+
594
+ /* Global scale to simulate 120% zoom */
595
+ html {
596
+ transform: scale(1.2);
597
+ transform-origin: top center;
598
+ }
599
+
600
+ /* Prevent horizontal scrollbar after scaling */
601
+ body, .stApp {
602
+ width: 83.33%; /* 1 / 1.2 */
603
+ margin: 0 auto;
604
+ }
605
+
606
+ </style>
607
+ """, unsafe_allow_html=True)
608
+
609
+
610
+ # --- Centered main title ---
611
+
612
+ st.markdown("<h1 class='main-title' style='text-align:center;'>TechSpark AI Assistant</h1>", unsafe_allow_html=True)
613
+ st.markdown("<h2 class='sub-title' style='text-align:center;'>Ask me anything about TechSpark — </h2>", unsafe_allow_html=True)
614
+
615
+
616
+ # --- Initialize chat history ---
617
+ if 'generated' not in st.session_state:
618
+ st.session_state['generated'] = ["Hi! I'm your AI assistant. How can I help you today?"]
619
+ if 'past' not in st.session_state:
620
+ st.session_state['past'] = ["Hi!"]
621
+
622
+ # --- Layout containers ---
623
+ input_container = st.container()
624
+ colored_header(label='', description='', color_name='blue-30')
625
+ response_container = st.container()
626
+
627
+ # --- User input ---
628
+ def get_text():
629
+ input_text = st.text_input("You:", "", key="input", placeholder="Type your message here...")
630
+ return input_text
631
+
632
+ with input_container:
633
+ user_input = get_text()
634
+
635
+ # --- Generate AI response ---
636
+ def generate_response(prompt):
637
+ try:
638
+ return str(agent.run(prompt))
639
+ except Exception as e:
640
+ return f"[Error] {e}"
641
+
642
+ # --- Display responses ---
643
+ with response_container:
644
+ if user_input:
645
+ response = generate_response(user_input)
646
+ st.session_state.past.append(user_input)
647
+ st.session_state.generated.append(response)
648
+
649
+ if st.session_state['generated']:
650
+ for i in range(len(st.session_state['generated'])):
651
+ message(st.session_state['past'][i], is_user=True, key=f"{i}_user")
652
+ message(st.session_state['generated'][i], key=f"{i}_assistant")