aedupuga commited on
Commit
67a4eb2
·
verified ·
1 Parent(s): 81a0c23

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +356 -325
app.py CHANGED
@@ -1,171 +1,35 @@
1
- import json
2
  import smolagents
 
3
  import pandas as pd
4
  import numpy as np
5
  from huggingface_hub import login, HfApi
6
  from datasets import Dataset, DatasetDict, load_dataset
7
  import difflib
8
  import openai
9
- from typing import List
10
 
11
 
12
- # Setup
 
13
 
14
- import os
15
- token_public = os.getenv("token_public")
16
-
17
- login(token_public)
18
-
19
- OPENAI_API = os.getenv("OPENAI_API")
20
 
21
  REPO_ID_TECHSPARK_STAFF = "aslan-ng/CMU_TechSpark_Staff"
22
  REPO_ID_TECHSPARK_COURSES = "aslan-ng/CMU_TechSpark_Courses"
23
  REPO_ID_TECHSPARK_TOOLS = "aslan-ng/CMU_TechSpark_Tools"
24
 
 
25
 
26
- token_public = os.getenv("TOKEN_PUBLIC")
27
- # LLM model initialization
28
- model = smolagents.OpenAIServerModel(
29
- model_id="gpt-4.1-mini", # or another fast model
30
- api_key=OPENAI_API,
31
- # optionally: base_url="https://api.groq.com/openai/v1" for Groq, etc.
32
- )
33
 
34
- # Numeric profile of skills for each entry
35
  NUMERIC_PROFILE = ["Laser Cutting", "Wood Working", "Wood CNC", "Metal Machining", "Metal CNC", "3D Printer", "Welding", "Electronics"]
36
 
37
- # Map common task keywords to candidate machine names.
38
- KEYWORD_TO_MACHINES = {
39
- "mill": ["Mill"],
40
- "shear": ["Shear"],
41
- "vertical band saw": ["Vertical Band Saw"],
42
- "horizontal band saw": ["Horizontal Band Saw"],
43
- "band saw": ["Band Saw"],
44
- "drill press": ["Drill press", "Drill Press", "Mini Drill Press"],
45
- "lathe": ["Lathe"],
46
- "cnc": ["Metal CNC", "Wood CNC"],
47
- "weld": ["MIG Welder", "TIG Welder"],
48
- "plasma": ["Hand-held Plasma Cutter"],
49
- "waterjet": ["Waterjet"],
50
- "torch": ["Acetylene Torch"],
51
- "furnace": ["Furnace"],
52
- "kiln": ["Kiln"],
53
- "cast": ["Centrifugal Caster", "Vacuum Caster", "Vacuum Former", "Pressure Pots", "Vacuum Chambers"],
54
- "tumble": ["Rotary Tumbler"],
55
- "buff": ["Buffing Wheel"],
56
- "solder": ["Soldering stations"],
57
- "electronics": ["Soldering stations", "DC power supplies", "Multimeters", "Oscilloscopes"],
58
- "jig saw": ["Jig Saws"],
59
- "jigsaw": ["Jig Saws"],
60
- "router": ["Table Router"],
61
- "panel saw": ["Panel Saw"],
62
- "table saw": ["Table Saw"],
63
- "miter": ["Miter Saw"],
64
- "sand": ["Belt/Disc/Spindle Sanders"],
65
- "3d print": ["3D Printers"],
66
- "3d printer": ["3D Printers"],
67
- "printer": ["3D Printers"],
68
- "laser": ["Laser Cutters"],
69
- "paint": ["Paint"],
70
- }
71
-
72
- MACHINE_NOTES = {
73
- "Laser Cutters": "2D cutting/engraving of sheet materials (e.g., acrylic, plywood, cardboard).",
74
- "3D Printers": "Additive manufacturing of small plastic parts.",
75
- "MIG Welder": "Fast welding of steel/aluminium with filler wire.",
76
- "TIG Welder": "Precise welding of thin metals.",
77
- "Waterjet": "High-precision cutting of almost any material with water/abrasive.",
78
- "Hand-held Plasma Cutter": "Rough cutting of steel plate.",
79
- "Centrifugal Caster": "Casting small metal components using centrifugal force.",
80
- "Vacuum Caster": "Degassing and casting for small parts using vacuum.",
81
- "Vacuum Former": "Forming heated plastic sheets over molds.",
82
- "Pressure Pots": "Pressure-curing of cast parts to remove bubbles.",
83
- "Vacuum Chambers": "Degassing silicone and resins before casting.",
84
- "Soldering stations": "Assembly and rework of PCBs and wired electronics.",
85
- "Table Saw": "Straight cuts in sheet/board stock (wood).",
86
- "Panel Saw": "Breaking down large sheet goods (plywood, MDF).",
87
- "Band Saw": "Curved cuts in wood.",
88
- "Belt/Disc/Spindle Sanders": "Shaping and smoothing wood components.",
89
- "Paint": "Finishing parts with spray paint in a ventilated booth.",
90
- }
91
-
92
- def load_data_from_sheet():
93
- """
94
- Load the data from Google Sheets.
95
- """
96
- from google.colab import auth
97
- from google.auth import default
98
- import gspread
99
-
100
- auth.authenticate_user()
101
-
102
- SHEET_SCHEMA = [
103
- {"Staff": ["Name", "Role", "Overview of Responsibilities", *NUMERIC_PROFILE]},
104
- {"Courses": ["Name", "Code", "Description", "Units", "Length (Weeks)", *NUMERIC_PROFILE]},
105
- {"Tools": ["Name", "Location", "Accessible by Students", "Required Course"]},
106
- ]
107
- SHEET_NAMES = [list(d.keys())[0] for d in SHEET_SCHEMA]
108
- #print(SHEET_NAMES)
109
- def get_sheet_columns(sheet_name):
110
- for entry in SHEET_SCHEMA:
111
- if sheet_name in entry:
112
- return entry[sheet_name]
113
- return None
114
- #print(get_sheet_columns(SHEET_NAMES[0]))
115
-
116
- sh = gspread.authorize(default()[0]).open_by_key(SHEET_ID_TECHSPARK)
117
-
118
- dfs = {}
119
- for sheet_name in SHEET_NAMES:
120
- ws = sh.worksheet(sheet_name) # tab with that name
121
- records = ws.get_all_records() # list of dicts (rows)
122
- df = pd.DataFrame(records)
123
-
124
- # Ensure correct column order (and drop extras if any)
125
- cols = get_sheet_columns(sheet_name)
126
- if cols is not None:
127
- df = df.reindex(columns=cols)
128
-
129
- dfs[sheet_name] = df
130
-
131
- # 5. Return them in a fixed order
132
- staff_df = dfs["Staff"]
133
- courses_df = dfs["Courses"]
134
- tools_df = dfs["Tools"]
135
-
136
- # Clean "Accessible by Students" if it comes as strings "TRUE"/"FALSE"
137
- if tools_df["Accessible by Students"].dtype == object:
138
- tools_df["Accessible by Students"] = tools_df["Accessible by Students"].map(
139
- {"TRUE": True, "FALSE": False}
140
- )
141
-
142
- # Clean "Required Course": make it string with missing values
143
- tools_df["Required Course"] = (
144
- tools_df["Required Course"]
145
- .replace("", pd.NA) # empty ➔ missing
146
- .astype("string") # keep as string type
147
- )
148
-
149
- return staff_df, courses_df, tools_df
150
-
151
- def save_data_to_huggingface(staff_df, courses_df, tools_df):
152
- """
153
- Save data to HuggingFace.
154
- """
155
- hf_ds_staff = Dataset.from_pandas(staff_df, preserve_index=False)
156
- hf_ds_staff.push_to_hub(REPO_ID_TECHSPARK_STAFF)
157
- hf_ds_courses = Dataset.from_pandas(courses_df, preserve_index=False)
158
- hf_ds_courses.push_to_hub(REPO_ID_TECHSPARK_COURSES)
159
- hf_ds_tools = Dataset.from_pandas(tools_df, preserve_index=False)
160
- hf_ds_tools.push_to_hub(REPO_ID_TECHSPARK_TOOLS)
161
-
162
- def refresh_hugginface_repo():
163
- """
164
- Loads data from Google Sheets and pushes it to HuggingFace.
165
- """
166
- staff_df, courses_df, tools_df = load_data_from_sheet()
167
- save_data_to_huggingface(staff_df, courses_df, tools_df)
168
-
169
  def load_data_from_huggingface():
170
  """
171
  Loads data from HuggingFace.
@@ -183,9 +47,18 @@ def load_data_from_huggingface():
183
  tools_df = ds_tools["train"].to_pandas()
184
  return staff_df, courses_df, tools_df
185
 
 
 
 
 
 
 
 
 
 
186
  def vector_1st_distance(x: list, y: list):
187
  """
188
- Calculate the 1st distance between two vectors.
189
  """
190
  if len(x) != len(y):
191
  raise ValueError
@@ -233,18 +106,42 @@ def skill_score(
233
  y.append(electronics)
234
  return vector_1st_distance(x, y)
235
 
 
236
  def all_staff():
237
  """
238
  Return a list of all staff.
239
  """
240
  return staff_df["Name"].dropna().tolist()
241
 
242
- def get_staff_full_profile(name: str):
243
  """
244
- Get the staff full profile (including description and skill).
245
  """
246
  matches = difflib.get_close_matches(name, all_staff(), n=1, cutoff=0.2)
247
- name = matches[0] if matches else None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  if name:
249
  full_profile = staff_df[staff_df["Name"] == name].iloc[0].to_dict()
250
  return full_profile
@@ -273,8 +170,9 @@ def search_staff_by_skills(
273
  three_d_printer: float = None,
274
  welding: float = None,
275
  electronics: float = None,
 
276
  ):
277
- names = all_staff()
278
  best_name = None
279
  best_score = float("inf")
280
  for name in names:
@@ -296,140 +194,198 @@ def search_staff_by_skills(
296
  best_name = name
297
  return best_name
298
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
299
  def all_courses_code():
300
  """
301
  Return a list of all course codes.
302
  """
303
  return courses_df["Code"].dropna().astype(str).tolist()
304
 
305
- def get_course_info(code: str):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
306
  """
307
- Get the course information given its code.
308
  """
309
  # Ensure the input code is a string for comparison
310
- code_str = str(code)
311
- matches = difflib.get_close_matches(code_str, all_courses_code(), n=1, cutoff=0.2)
312
  code = matches[0] if matches else None
313
  if code:
314
  full_profile = courses_df[courses_df["Code"].astype(str) == code].iloc[0].to_dict()
315
  return full_profile
316
  return None
317
 
318
- def all_tools():
319
  """
320
- Return a list of all tool names.
321
  """
322
- return tools_df["Name"].dropna().tolist()
 
323
 
324
- def get_tool_full_profile(name: str):
325
  """
326
- Get the tool's full profile.
327
  """
328
- # Increased cutoff to make matching more strict, avoiding false positives for non-existent machines
329
- matches = difflib.get_close_matches(name, all_tools(), n=1, cutoff=0.6)
330
- name = matches[0] if matches else None
331
- if name:
332
- full_profile = tools_df[tools_df["Name"] == name].iloc[0].to_dict()
333
- return full_profile
334
- return None
335
 
336
- def find_candidates(task: str):
337
- """Return a DataFrame of candidate machines for the given task description."""
338
- global tools_df
339
- df = tools_df
340
- task_lc = task.lower()
341
- if df is None or df.empty:
342
- return df.iloc[0:0] # empty with same columns
343
-
344
- # 1) Matches from keyword mapping
345
- names_from_keywords = set()
346
- for kw, machine_names in KEYWORD_TO_MACHINES.items():
347
- if kw in task_lc:
348
- names_from_keywords.update(machine_names)
349
-
350
- # 2) Direct substring matches on machine names
351
- names_from_substring = set()
352
- for name in df["Name"]:
353
- if name.lower() in task_lc:
354
- names_from_substring.add(name)
355
-
356
- all_names = sorted(names_from_keywords.union(names_from_substring))
357
-
358
- # 3) Fallback: token-based substring search
359
- if not all_names:
360
- # Add 'name_lower' column if it doesn't exist for substring search
361
- if 'name_lower' not in df.columns:
362
- df['name_lower'] = df['Name'].str.lower()
363
- tokens = [t for t in task_lc.replace(",", " ").split() if len(t) > 3]
364
- for token in tokens:
365
- subset = df[df["name_lower"].str.contains(token)]
366
- if not subset.empty:
367
- all_names.extend(subset["Name"].tolist())
368
- all_names = sorted(set(all_names))
369
-
370
- return df[df["Name"].isin(all_names)]
371
-
372
- def make_location_plan(task: str):
373
- """Print a short, human-readable location plan for a TechSpark task."""
374
- global tools_df
375
- df = tools_df
376
- if df is None:
377
- print("❌ Machine table not loaded yet.")
378
- return
379
-
380
- candidates = find_candidates(task)
381
- print(f"Task: {task}\n")
382
-
383
- if candidates.empty:
384
- print("I couldn't find a clear machine match in the current table.")
385
- print("Try rephrasing with the machine name you expect (e.g., 'laser cutter', '3D printer', 'MIG welder').")
386
- return
387
-
388
- print("Suggested machines and locations:\n")
389
- for _, row in candidates.iterrows():
390
- name = row["Name"]
391
- loc = row["Location"]
392
- print(f"- **{name}** → **{loc}**")
393
- if name in MACHINE_NOTES:
394
- print(f" - Why here: {MACHINE_NOTES[name]}")
395
- print()
396
-
397
- locations = ", ".join(sorted(candidates["Location"].unique()))
398
- print("Next steps inside TechSpark:")
399
- print(f"1. Walk to: {locations}.")
400
- print("2. Check posted safety/training requirements for the machine you choose.")
401
- print("3. If you're unsure which specific machine is best, ask the staff in that area.")
402
- print("4. Imagine how this module could plug into a larger agent that also plans the full fabrication process and checks training.")
403
-
404
- # Define the agent with all of these tools.
405
-
406
- class SearchStaffInformationTool(smolagents.tools.Tool):
407
- name = "search_staff_information"
408
  description = (
409
- "Search the staff information by its name."
410
  )
411
  inputs = {
412
- "name": {"type": "string", "description": "Name of the staff member."},
413
  }
414
  output_type = "object"
415
 
416
- def forward(self, name: str) -> dict:
417
- return get_staff_profile(name)
418
 
419
- class FindSuitableStaffTool(smolagents.tools.Tool):
420
- name = "find_suitable_staff"
421
  description = (
422
- "Find the most suitable staff member for the task based on required skills."
423
  )
424
  inputs = {
425
- "laser_cutting": {"type": "number", "description": "Laser cutting skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
426
- "wood_working": {"type": "number", "description": "Wood working skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
427
- "wood_cnc": {"type": "number", "description": "Wood CNC skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
428
- "metal_machining": {"type": "number", "description": "Metal machining skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
429
- "metal_cnc": {"type": "number", "description": "Metal CNC skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
430
- "three_d_printer": {"type": "number", "description": "3D printer skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
431
- "welding": {"type": "number", "description": "Welding skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
432
- "electronics": {"type": "number", "description": "Electronics skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
433
  }
434
  output_type = "object"
435
 
@@ -442,8 +398,8 @@ class FindSuitableStaffTool(smolagents.tools.Tool):
442
  three_d_printer: float = None,
443
  welding: float = None,
444
  electronics: float = None,
445
- ) -> dict:
446
- name = search_staff_by_skills(
447
  laser_cutting = laser_cutting,
448
  wood_working = wood_working,
449
  wood_cnc = wood_cnc,
@@ -452,85 +408,162 @@ class FindSuitableStaffTool(smolagents.tools.Tool):
452
  three_d_printer = three_d_printer,
453
  welding = welding,
454
  electronics = electronics,
 
455
  )
456
- return get_staff_profile(name)
 
457
 
458
- class MachineTrainingTool(smolagents.tools.Tool):
459
- name = "get_machine_training_info"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
  description = (
461
- "Retrieves training information for a specific machine and checks its accessibility. The `machine_name` argument should exactly match the machine's name as listed in the system."
462
  )
463
  inputs = {
464
- "machine_name": {"type": "string", "description": "Name of the machine for which to retrieve training information"},
465
  }
466
- output_type = "string"
467
 
468
- def forward(self, machine_name: str) -> str:
469
- tool_info = get_tool_full_profile(machine_name)
470
- if tool_info:
471
- accessible = tool_info.get("Accessible by Students")
472
- required_course_code = tool_info.get("Required Course")
473
-
474
- if accessible is False:
475
- # Specific message for not accessible machines, as requested
476
- return f"The {machine_name} is NOT accessible by students. Please ask staff for assistance."
477
- else: # accessible is True
478
- response_parts = [f"The {machine_name} is accessible by students."]
479
- if pd.isna(required_course_code):
480
- response_parts.append(f"No specific course is required for the {machine_name}.")
481
- else:
482
- course_details = get_course_info(required_course_code)
483
- if course_details:
484
- course_name = course_details.get('Name', 'Unknown Course')
485
- response_parts.append(f"The required training for {machine_name} is '{course_name}' (Course Code: {required_course_code}).")
486
- else:
487
- response_parts.append(f"A course with code '{required_course_code}' is required for {machine_name}, but its details are not found.")
488
- return " ".join(response_parts)
489
- else:
490
- # Message for non-existent machine, as requested
491
- return f"Machine '{machine_name}' does not exist."
492
 
493
- #refresh_hugginface_repo() # Only run to refresh the repo
494
- staff_df, courses_df, tools_df = load_data_from_huggingface()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
495
 
496
  agent = smolagents.CodeAgent(
497
  tools=[
498
- SearchStaffInformationTool(),
499
- FindSuitableStaffTool(),
500
- MachineTrainingTool(), # MachineTrainingTool is now defined elsewhere
 
 
 
 
 
501
  ],
502
-
503
- instructions=(
504
- "You are a helpful assistant for the CMU TechSpark facility. Your purpose is to assist users with inquiries related to staff, courses, and tools. "
505
- "Use the available tools to find information about staff members, suggest suitable staff based on skills, or provide training information for machines. "
506
- "Respond concisely and directly with the information requested by the user, utilizing the output from the tools."
507
- ),
508
-
509
  model=model,
510
- #name="TechSpark Agent",
511
  add_base_tools=False,
512
- max_steps=12,
513
- verbosity_level=2, # show steps in logs for class demo
514
  )
515
 
516
 
517
-
518
- # --- Page config ---
519
- st.set_page_config(page_title="TechSpark AI Assistant", layout="wide")
520
-
521
- import gradio as gr
522
-
523
  # Minimal Gradio chat
524
- with gr.Blocks(title="TechSpark Agent",theme= gr.themes.Soft() ) as demo:
525
-
526
- gr.Markdown(
527
- """
528
- # 🤖 TechSpark AI Assistant
529
  Welcome to the TechSpark AI Assistant!
530
-
531
  Ask anything about **TechSpark staff, tools, courses or location of tools**
532
- This assistant is powered by **OpenAI's GPT model** via `smolagents`, accessing accurate information from our curated dataset verified by techspark staff! """
533
- )
534
  chat = gr.Chatbot(height=420)
535
  inp = gr.Textbox(placeholder="Ask your question in natural language.", label="Your question")
536
 
@@ -547,19 +580,17 @@ with gr.Blocks(title="TechSpark Agent",theme= gr.themes.Soft() ) as demo:
547
  return "", history
548
 
549
  gr.Examples(
550
- fn=respond,
551
  examples=[
552
  "Who is Ed?",
553
  "Who to talk to to create a wooden table?",
554
  "how to access laser cutter"
555
  ],
556
- inputs=[inp]
 
 
 
557
  )
558
 
559
  inp.submit(respond, [inp, chat], [inp, chat])
560
 
561
- # Launch the demo
562
- demo.launch()
563
-
564
-
565
-
 
1
+ import gradio as gr
2
  import smolagents
3
+ import json
4
  import pandas as pd
5
  import numpy as np
6
  from huggingface_hub import login, HfApi
7
  from datasets import Dataset, DatasetDict, load_dataset
8
  import difflib
9
  import openai
10
+ from langchain_community.utilities.wikipedia import WikipediaAPIWrapper
11
 
12
 
13
+ # Re-define all necessary components that the agent relies on
14
+ # This includes data loading, utility functions, and the agent itself
15
 
16
+ # Setup (copied from qRq0g01h3ZvP)
17
+ hf_token_public = os.getenv("token_public")
18
+ # login(hf_token_public) # Login is not needed in app.py if HF_TOKEN is set as secret
 
 
 
19
 
20
  REPO_ID_TECHSPARK_STAFF = "aslan-ng/CMU_TechSpark_Staff"
21
  REPO_ID_TECHSPARK_COURSES = "aslan-ng/CMU_TechSpark_Courses"
22
  REPO_ID_TECHSPARK_TOOLS = "aslan-ng/CMU_TechSpark_Tools"
23
 
24
+ SHEET_ID_TECHSPARK = "1cdL_jDglKa-NxZF3j5s2z9ncSFbJSMGC2d-GsKubV-I"
25
 
26
+ # OPENAI_API = "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" # Use environment variable for API key
27
+ import os
28
+ OPENAI_API = os.getenv("OPENAI_API")
 
 
 
 
29
 
30
+ # Data (copied from rGAiTp0PYvEk, adjusted to load from HF directly)
31
  NUMERIC_PROFILE = ["Laser Cutting", "Wood Working", "Wood CNC", "Metal Machining", "Metal CNC", "3D Printer", "Welding", "Electronics"]
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def load_data_from_huggingface():
34
  """
35
  Loads data from HuggingFace.
 
47
  tools_df = ds_tools["train"].to_pandas()
48
  return staff_df, courses_df, tools_df
49
 
50
+ staff_df, courses_df, tools_df = load_data_from_huggingface()
51
+
52
+ # LLM (copied from NPPbWry0qUIE)
53
+ model = smolagents.OpenAIServerModel(
54
+ model_id="gpt-4.1-mini",
55
+ api_key=OPENAI_API,
56
+ )
57
+
58
+ # General Functions (copied from BwfI-EsvtvVx)
59
  def vector_1st_distance(x: list, y: list):
60
  """
61
+ Calculate the average 1st distance between two vectors.
62
  """
63
  if len(x) != len(y):
64
  raise ValueError
 
106
  y.append(electronics)
107
  return vector_1st_distance(x, y)
108
 
109
+ # Staff Tools (copied from Q47nRn9_Zz1P)
110
  def all_staff():
111
  """
112
  Return a list of all staff.
113
  """
114
  return staff_df["Name"].dropna().tolist()
115
 
116
+ def match_staff_name(name: str):
117
  """
118
+ Match the staff name to the closest match in the staff list.
119
  """
120
  matches = difflib.get_close_matches(name, all_staff(), n=1, cutoff=0.2)
121
+ return matches[0] if matches else None
122
+
123
+ def all_available_staff(exclude: list):
124
+ """
125
+ Return a list of all staff with exclusion.
126
+ """
127
+ try:
128
+ exclude = list(exclude)
129
+ except:
130
+ pass
131
+ if exclude is None or len(exclude) == 0:
132
+ return all_staff()
133
+ excluded_names = []
134
+ for raw_name in exclude:
135
+ excluded_name = match_staff_name(raw_name)
136
+ if excluded_name:
137
+ excluded_names.append(excluded_name)
138
+ return [name for name in all_staff() if name not in excluded_names]
139
+
140
+ def get_staff_full_profile(name: str):
141
+ """
142
+ Get the staff full profile given its name (including description and skill).
143
+ """
144
+ name = match_staff_name(name)
145
  if name:
146
  full_profile = staff_df[staff_df["Name"] == name].iloc[0].to_dict()
147
  return full_profile
 
170
  three_d_printer: float = None,
171
  welding: float = None,
172
  electronics: float = None,
173
+ exclude: list = None,
174
  ):
175
+ names = all_available_staff(exclude)
176
  best_name = None
177
  best_score = float("inf")
178
  for name in names:
 
194
  best_name = name
195
  return best_name
196
 
197
+ class SearchStaffInformation(smolagents.tools.Tool):
198
+ name = "search_staff_information"
199
+ description = (
200
+ "Search the staff information by its name."
201
+ )
202
+ inputs = {
203
+ "name": {"type": "string", "description": "Name of the staff member."},
204
+ }
205
+ output_type = "object"
206
+
207
+ def forward(self, name: str) -> str:
208
+ return json.dumps(get_staff_profile(name))
209
+
210
+ class FindSuitableStaff(smolagents.tools.Tool):
211
+ name = "find_suitable_staff"
212
+ description = (
213
+ "Find the most suitable staff member for the task based on required skills."
214
+ )
215
+ inputs = {
216
+ "laser_cutting": {"type": "number", "nullable": True, "description": "Laser cutting skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
217
+ "wood_working": {"type": "number", "nullable": True, "description": "Wood working skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
218
+ "wood_cnc": {"type": "number", "nullable": True, "description": "Wood CNC skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
219
+ "metal_machining": {"type": "number", "nullable": True, "description": "Metal machining skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
220
+ "metal_cnc": {"type": "number", "nullable": True, "description": "Metal CNC skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
221
+ "three_d_printer": {"type": "number", "nullable": True, "description": "3D printer skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
222
+ "welding": {"type": "number", "nullable": True, "description": "Welding skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
223
+ "electronics": {"type": "number", "nullable": True, "description": "Electronics skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
224
+ "exclude": {"type": "number", "nullable": True, "description": "A list of names that we want to exclude from searching. Default is None or an empty list."}
225
+ }
226
+ output_type = "object"
227
+
228
+ def forward(self,
229
+ laser_cutting: float = None,
230
+ wood_working: float = None,
231
+ wood_cnc: float = None,
232
+ metal_machining: float = None,
233
+ metal_cnc: float = None,
234
+ three_d_printer: float = None,
235
+ welding: float = None,
236
+ electronics: float = None,
237
+ exclude: list = None,
238
+ ) -> str:
239
+ name = search_staff_by_skills(
240
+ laser_cutting = laser_cutting,
241
+ wood_working = wood_working,
242
+ wood_cnc = wood_cnc,
243
+ metal_machining = metal_machining,
244
+ metal_cnc = metal_cnc,
245
+ three_d_printer = three_d_printer,
246
+ welding = welding,
247
+ electronics = electronics,
248
+ exclude = exclude,
249
+ )
250
+ return json.dumps(get_staff_profile(name))
251
+
252
+ # Course Functions (copied from _P8TTwcOaUkN)
253
  def all_courses_code():
254
  """
255
  Return a list of all course codes.
256
  """
257
  return courses_df["Code"].dropna().astype(str).tolist()
258
 
259
+ def all_courses_name():
260
+ """
261
+ Return a list of all course names.
262
+ """
263
+ return courses_df["Name"].dropna().tolist()
264
+
265
+ def course_name_to_code(course_name):
266
+ """
267
+ Convert the course name to course code.
268
+ """
269
+ return str(courses_df[courses_df["Name"] == course_name]["Code"].iloc[0])
270
+
271
+ def course_code_to_name(course_code):
272
+ """
273
+ Convert the course code to course name.
274
+ """
275
+ return str(courses_df[courses_df["Code"].astype(str) == str(course_code)]["Name"].iloc[0])
276
+
277
+ def match_course_name_code(input):
278
+ """
279
+ Match the course to the closest match in the course list and return their codes.
280
+ """
281
+ input = str(input)
282
+ matches = None
283
+ code_matches = difflib.get_close_matches(input, all_courses_code(), n=3, cutoff=0.2)
284
+ name_matches_code = difflib.get_close_matches(input, all_courses_name(), n=2, cutoff=0.3)
285
+ if name_matches_code:
286
+ name_matches = [course_name_to_code(name) for name in name_matches_code]
287
+ else:
288
+ name_matches = None
289
+ if code_matches and name_matches:
290
+ matches = code_matches + name_matches
291
+ elif code_matches and not name_matches:
292
+ matches = code_matches
293
+ elif name_matches and not code_matches:
294
+ matches = name_matches
295
+ return matches
296
+
297
+ def get_course_full_profile(course):
298
  """
299
+ Get the course full profile given its code (including description and skill).
300
  """
301
  # Ensure the input code is a string for comparison
302
+ matches = match_course_name_code(course)
 
303
  code = matches[0] if matches else None
304
  if code:
305
  full_profile = courses_df[courses_df["Code"].astype(str) == code].iloc[0].to_dict()
306
  return full_profile
307
  return None
308
 
309
+ def get_course_skills_profile(course_code):
310
  """
311
+ Get the course skills profile given its code.
312
  """
313
+ full_profile = get_course_full_profile(course_code)
314
+ return {k: full_profile[k] for k in NUMERIC_PROFILE}
315
 
316
+ def get_course_profile(course_code):
317
  """
318
+ Get the course profile without skill part.
319
  """
320
+ full_profile = get_course_full_profile(course_code)
321
+ return {k: v for k, v in full_profile.items() if k not in NUMERIC_PROFILE}
 
 
 
 
 
322
 
323
+ def search_course_by_skills(
324
+ laser_cutting: float = None,
325
+ wood_working: float = None,
326
+ wood_cnc: float = None,
327
+ metal_machining: float = None,
328
+ metal_cnc: float = None,
329
+ three_d_printer: float = None,
330
+ welding: float = None,
331
+ electronics: float = None,
332
+ n_results: int = 1,
333
+ ):
334
+ names = all_courses_code()
335
+ scored_courses = []
336
+
337
+ for name in names:
338
+ skills_profile = get_course_skills_profile(name)
339
+
340
+ score = skill_score(
341
+ skill_profile=skills_profile,
342
+ laser_cutting=laser_cutting,
343
+ wood_working=wood_working,
344
+ wood_cnc=wood_cnc,
345
+ metal_machining=metal_machining,
346
+ metal_cnc=metal_cnc,
347
+ three_d_printer=three_d_printer,
348
+ welding=welding,
349
+ electronics=electronics,
350
+ )
351
+
352
+ if score is not None:
353
+ scored_courses.append((abs(score), name))
354
+ # store (absolute_score, course_name)
355
+
356
+ # Sort by closeness to zero
357
+ scored_courses.sort(key=lambda x: x[0])
358
+
359
+ # Return only the names of top N matches
360
+ return [name for _, name in scored_courses[:n_results]]
361
+
362
+ class SearchCourseInformation(smolagents.tools.Tool):
363
+ name = "search_course_information"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
364
  description = (
365
+ "Search the course information by the course name or course number (code)."
366
  )
367
  inputs = {
368
+ "name": {"type": "string", "description": "Course name or course number (code)."},
369
  }
370
  output_type = "object"
371
 
372
+ def forward(self, name: str) -> str:
373
+ return json.dumps(get_course_profile(name))
374
 
375
+ class FindSuitableCourses(smolagents.tools.Tool):
376
+ name = "find_suitable_courses"
377
  description = (
378
+ "Find the top 3 most suitable courses for the task based on required skills. The first element is the best match."
379
  )
380
  inputs = {
381
+ "laser_cutting": {"type": "number", "nullable": True, "description": "Laser cutting skill being taught during the course. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
382
+ "wood_working": {"type": "number", "nullable": True, "description": "Wood working skill being taught during the course. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
383
+ "wood_cnc": {"type": "number", "nullable": True, "description": "Wood CNC skill being taught during the course. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
384
+ "metal_machining": {"type": "number", "nullable": True, "description": "Metal machining skill being taught during the course. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
385
+ "metal_cnc": {"type": "number", "nullable": True, "description": "Metal CNC skill being taught during the course. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
386
+ "three_d_printer": {"type": "number", "nullable": True, "description": "3D printer skill being taught during the course. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
387
+ "welding": {"type": "number", "nullable": True, "description": "Welding skill being taught during the course. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
388
+ "electronics": {"type": "number", "nullable": True, "description": "Electronics skill being taught during the course. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)"},
389
  }
390
  output_type = "object"
391
 
 
398
  three_d_printer: float = None,
399
  welding: float = None,
400
  electronics: float = None,
401
+ ) -> str:
402
+ matches = search_course_by_skills(
403
  laser_cutting = laser_cutting,
404
  wood_working = wood_working,
405
  wood_cnc = wood_cnc,
 
408
  three_d_printer = three_d_printer,
409
  welding = welding,
410
  electronics = electronics,
411
+ n_results = 3,
412
  )
413
+ options = [get_course_profile(course) for course in matches]
414
+ return json.dumps(options)
415
 
416
+ # Machine Functions (copied from OKKlHB88tt1r)
417
+ def all_tools():
418
+ """
419
+ Return a list of all tools and machines.
420
+ """
421
+ return tools_df["Name"].dropna().astype(str).tolist()
422
+
423
+ def match_tool_name(input):
424
+ """
425
+ Match the course to the closest match in the course list and return their codes.
426
+ """
427
+ input = str(input)
428
+ matches = difflib.get_close_matches(input, all_tools(), n=1, cutoff=0.2)
429
+ return matches[0] if matches else None
430
+
431
+ def get_tool_location(name: str):
432
+ """
433
+ Get the tool location given its name.
434
+ """
435
+ tool_name = match_tool_name(name)
436
+ if tool_name is not None:
437
+ return tools_df[tools_df["Name"] == tool_name].iloc[0]["Location"]
438
+ else:
439
+ raise ValueError("Not found.")
440
+
441
+ def is_tool_accessible(name):
442
+ """
443
+ Check if the machine is accessible to students, and if they require taking mandatory courses.
444
+ """
445
+ result = None
446
+ tool_name = match_tool_name(name)
447
+ if tool_name is not None:
448
+ accessible = tools_df[tools_df["Name"] == tool_name].iloc[0]["Accessible by Students"]
449
+ accessible = bool(accessible)
450
+ course_code = tools_df[tools_df["Name"] == tool_name].iloc[0]["Required Course"]
451
+ else:
452
+ raise ValueError("Not found.")
453
+
454
+ if accessible:
455
+ if course_code:
456
+ # Accessible but conditional (only by passing the course)
457
+ result_short = "Conditional"
458
+ result_description = f"Student can access it only if they take the {course_code}: {course_code_to_name(course_code)}."
459
+ else:
460
+ # Accessible
461
+ result_short = "Yes"
462
+ result_description = "Student can access it."
463
+ else:
464
+ # Not accessible by students. Need staff members!
465
+ result_short = "No"
466
+ result_description = "Student cannot access it. Only available to staff memebers. Ask them to do your task for you."
467
+ result = {
468
+ "short answer": result_short,
469
+ "description": result_description
470
+ }
471
+ return json.dumps(result)
472
+
473
+ class SearchMachineLocation(smolagents.tools.Tool):
474
+ name = "search_machine_location"
475
  description = (
476
+ "Search the machine or tool location in the TechSpark."
477
  )
478
  inputs = {
479
+ "name": {"type": "string", "description": "Tool or machine name."},
480
  }
481
+ output_type = "object"
482
 
483
+ def forward(self, name: str) -> str:
484
+ return json.dumps(get_tool_location(name))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
485
 
486
+ class CheckMachineAccessibility(smolagents.tools.Tool):
487
+ name = "check_machine_accessibility"
488
+ description = (
489
+ "Check whether machine or tool is accessible to students. Some are accessible, some need to take a course to become accessible, and some are only available to staff members."
490
+ )
491
+ inputs = {
492
+ "name": {"type": "string", "description": "Tool or machine name."},
493
+ }
494
+ output_type = "object"
495
+
496
+ def forward(self, name: str) -> str:
497
+ return json.dumps(is_tool_accessible(name))
498
+
499
+ # Wikipedia Search (copied from 6AHceBzBXISE)
500
+ class WikipediaSearch(smolagents.Tool):
501
+ """
502
+ Create tool for searching Wikipedia
503
+ """
504
+ name = "wikipedia_search"
505
+ description = "Search Wikipedia, the free encyclopedia."
506
+ inputs = {
507
+ "query": {"type": "string", "nullable": True, "description": "The search terms"},
508
+ }
509
+ output_type = "string"
510
+
511
+ def forward(self, query: str | None = None) -> str:
512
+ if not query:
513
+ return "Error: 'query' is required."
514
+ wikipedia_api = WikipediaAPIWrapper(top_k_results=1)
515
+ answer = wikipedia_api.run(query)
516
+ return answer
517
+
518
+ # Agent (copied from 9iwR_e424jfJ)
519
+ techspark_definition = """
520
+ TechSpark is the largest makerspace at CMU (Carnegie Mellon University), located in the College of Engineering. 
521
+ Its mission is to promote a vibrant, student-centric making culture to enhance educational, extracurricular, and research activities across the entire campus community.
522
+ """
523
+
524
+ instruction = """
525
+ You are a helpful assistant for the CMU TechSpark facility. Your purpose is to assist users with inquiries related to staff, courses, and tools.
526
+ Use the available tools to find information about staff members, suggest suitable staff based on skills, or provide training information for machines.
527
+ Respond concisely and directly with the information requested by the user, utilizing the output from the tools.
528
+ Which machines to use for a task, and where to find them.
529
+ When you were in doubt, try searching wikipedia to gain more knowledge.
530
+
531
+ Safety is important. So:
532
+ - When talking about any machines, check whether it is accessbile to students or not.
533
+ - Try to match them to correct staff member specially when you are not sure about your answer or the student work might be dangerous.
534
+ """
535
+
536
+ system_prompt = f"""
537
+ {techspark_definition}
538
+ {instruction}
539
+ """
540
 
541
  agent = smolagents.CodeAgent(
542
  tools=[
543
+ smolagents.FinalAnswerTool(),
544
+ SearchStaffInformation(),
545
+ FindSuitableStaff(),
546
+ SearchCourseInformation(),
547
+ FindSuitableCourses(),
548
+ SearchMachineLocation(),
549
+ CheckMachineAccessibility(),
550
+ WikipediaSearch(),
551
  ],
552
+ instructions=system_prompt,
 
 
 
 
 
 
553
  model=model,
 
554
  add_base_tools=False,
555
+ max_steps=10,
556
+ verbosity_level=0, # Changed to 0 for deployment
557
  )
558
 
559
 
560
+ # UI (copied from w0g2EzpD7fUy, adjusted for app.py)
 
 
 
 
 
561
  # Minimal Gradio chat
562
+ with gr.Blocks(title="TechSpark Agent", theme = gr.themes.Soft()) as demo:
563
+ gr.Markdown("""## 🤖 TechSpark AI Assistant
 
 
 
564
  Welcome to the TechSpark AI Assistant!
 
565
  Ask anything about **TechSpark staff, tools, courses or location of tools**
566
+ This assistant is powered by **OpenAI's GPT model** via `smolagents`, accessing accurate information from our curated dataset verified by techspark staff!""")
 
567
  chat = gr.Chatbot(height=420)
568
  inp = gr.Textbox(placeholder="Ask your question in natural language.", label="Your question")
569
 
 
580
  return "", history
581
 
582
  gr.Examples(
 
583
  examples=[
584
  "Who is Ed?",
585
  "Who to talk to to create a wooden table?",
586
  "how to access laser cutter"
587
  ],
588
+ inputs=[inp],
589
+ outputs=[inp, chat],
590
+ fn=respond,
591
+ cache_examples=False, # Set to False for dynamic content or to avoid caching issues
592
  )
593
 
594
  inp.submit(respond, [inp, chat], [inp, chat])
595
 
596
+ demo.launch(share=True)