aedupuga's picture
Update app.py
291e854 verified
raw
history blame
25.4 kB
import json
import smolagents
import pandas as pd
import numpy as np
from huggingface_hub import login, HfApi
from datasets import Dataset, DatasetDict, load_dataset
import difflib
import openai
from typing import List
import streamlit as st
from streamlit_chat import message
from streamlit_extras.colored_header import colored_header
from streamlit_extras.add_vertical_space import add_vertical_space
# Setup
import os
token_public = os.getenv("token_public")
login(token_public)
OPENAI_API = os.getenv("OPENAI_API")
REPO_ID_TECHSPARK_STAFF = "aslan-ng/CMU_TechSpark_Staff"
REPO_ID_TECHSPARK_COURSES = "aslan-ng/CMU_TechSpark_Courses"
REPO_ID_TECHSPARK_TOOLS = "aslan-ng/CMU_TechSpark_Tools"
token_public = os.getenv("TOKEN_PUBLIC")
# LLM model initialization
model = smolagents.OpenAIServerModel(
model_id="gpt-4.1-mini", # or another fast model
api_key=OPENAI_API,
# optionally: base_url="https://api.groq.com/openai/v1" for Groq, etc.
)
# Numeric profile of skills for each entry
NUMERIC_PROFILE = ["Laser Cutting", "Wood Working", "Wood CNC", "Metal Machining", "Metal CNC", "3D Printer", "Welding", "Electronics"]
# Map common task keywords to candidate machine names.
KEYWORD_TO_MACHINES = {
"mill": ["Mill"],
"shear": ["Shear"],
"vertical band saw": ["Vertical Band Saw"],
"horizontal band saw": ["Horizontal Band Saw"],
"band saw": ["Band Saw"],
"drill press": ["Drill press", "Drill Press", "Mini Drill Press"],
"lathe": ["Lathe"],
"cnc": ["Metal CNC", "Wood CNC"],
"weld": ["MIG Welder", "TIG Welder"],
"plasma": ["Hand-held Plasma Cutter"],
"waterjet": ["Waterjet"],
"torch": ["Acetylene Torch"],
"furnace": ["Furnace"],
"kiln": ["Kiln"],
"cast": ["Centrifugal Caster", "Vacuum Caster", "Vacuum Former", "Pressure Pots", "Vacuum Chambers"],
"tumble": ["Rotary Tumbler"],
"buff": ["Buffing Wheel"],
"solder": ["Soldering stations"],
"electronics": ["Soldering stations", "DC power supplies", "Multimeters", "Oscilloscopes"],
"jig saw": ["Jig Saws"],
"jigsaw": ["Jig Saws"],
"router": ["Table Router"],
"panel saw": ["Panel Saw"],
"table saw": ["Table Saw"],
"miter": ["Miter Saw"],
"sand": ["Belt/Disc/Spindle Sanders"],
"3d print": ["3D Printers"],
"3d printer": ["3D Printers"],
"printer": ["3D Printers"],
"laser": ["Laser Cutters"],
"paint": ["Paint"],
}
MACHINE_NOTES = {
"Laser Cutters": "2D cutting/engraving of sheet materials (e.g., acrylic, plywood, cardboard).",
"3D Printers": "Additive manufacturing of small plastic parts.",
"MIG Welder": "Fast welding of steel/aluminium with filler wire.",
"TIG Welder": "Precise welding of thin metals.",
"Waterjet": "High-precision cutting of almost any material with water/abrasive.",
"Hand-held Plasma Cutter": "Rough cutting of steel plate.",
"Centrifugal Caster": "Casting small metal components using centrifugal force.",
"Vacuum Caster": "Degassing and casting for small parts using vacuum.",
"Vacuum Former": "Forming heated plastic sheets over molds.",
"Pressure Pots": "Pressure-curing of cast parts to remove bubbles.",
"Vacuum Chambers": "Degassing silicone and resins before casting.",
"Soldering stations": "Assembly and rework of PCBs and wired electronics.",
"Table Saw": "Straight cuts in sheet/board stock (wood).",
"Panel Saw": "Breaking down large sheet goods (plywood, MDF).",
"Band Saw": "Curved cuts in wood.",
"Belt/Disc/Spindle Sanders": "Shaping and smoothing wood components.",
"Paint": "Finishing parts with spray paint in a ventilated booth.",
}
def load_data_from_sheet():
"""
Load the data from Google Sheets.
"""
from google.colab import auth
from google.auth import default
import gspread
auth.authenticate_user()
SHEET_SCHEMA = [
{"Staff": ["Name", "Role", "Overview of Responsibilities", *NUMERIC_PROFILE]},
{"Courses": ["Name", "Code", "Description", "Units", "Length (Weeks)", *NUMERIC_PROFILE]},
{"Tools": ["Name", "Location", "Accessible by Students", "Required Course"]},
]
SHEET_NAMES = [list(d.keys())[0] for d in SHEET_SCHEMA]
#print(SHEET_NAMES)
def get_sheet_columns(sheet_name):
for entry in SHEET_SCHEMA:
if sheet_name in entry:
return entry[sheet_name]
return None
#print(get_sheet_columns(SHEET_NAMES[0]))
sh = gspread.authorize(default()[0]).open_by_key(SHEET_ID_TECHSPARK)
dfs = {}
for sheet_name in SHEET_NAMES:
ws = sh.worksheet(sheet_name) # tab with that name
records = ws.get_all_records() # list of dicts (rows)
df = pd.DataFrame(records)
# Ensure correct column order (and drop extras if any)
cols = get_sheet_columns(sheet_name)
if cols is not None:
df = df.reindex(columns=cols)
dfs[sheet_name] = df
# 5. Return them in a fixed order
staff_df = dfs["Staff"]
courses_df = dfs["Courses"]
tools_df = dfs["Tools"]
# Clean "Accessible by Students" if it comes as strings "TRUE"/"FALSE"
if tools_df["Accessible by Students"].dtype == object:
tools_df["Accessible by Students"] = tools_df["Accessible by Students"].map(
{"TRUE": True, "FALSE": False}
)
# Clean "Required Course": make it string with missing values
tools_df["Required Course"] = (
tools_df["Required Course"]
.replace("", pd.NA) # empty ➔ missing
.astype("string") # keep as string type
)
return staff_df, courses_df, tools_df
def save_data_to_huggingface(staff_df, courses_df, tools_df):
"""
Save data to HuggingFace.
"""
hf_ds_staff = Dataset.from_pandas(staff_df, preserve_index=False)
hf_ds_staff.push_to_hub(REPO_ID_TECHSPARK_STAFF)
hf_ds_courses = Dataset.from_pandas(courses_df, preserve_index=False)
hf_ds_courses.push_to_hub(REPO_ID_TECHSPARK_COURSES)
hf_ds_tools = Dataset.from_pandas(tools_df, preserve_index=False)
hf_ds_tools.push_to_hub(REPO_ID_TECHSPARK_TOOLS)
def refresh_hugginface_repo():
"""
Loads data from Google Sheets and pushes it to HuggingFace.
"""
staff_df, courses_df, tools_df = load_data_from_sheet()
save_data_to_huggingface(staff_df, courses_df, tools_df)
def load_data_from_huggingface():
"""
Loads data from HuggingFace.
"""
# Staff (People)
ds_staff = load_dataset(REPO_ID_TECHSPARK_STAFF)
staff_df = ds_staff["train"].to_pandas()
# Courses
ds_courses = load_dataset(REPO_ID_TECHSPARK_COURSES)
courses_df = ds_courses["train"].to_pandas()
# Tools
ds_tools = load_dataset(REPO_ID_TECHSPARK_TOOLS)
tools_df = ds_tools["train"].to_pandas()
return staff_df, courses_df, tools_df
def vector_1st_distance(x: list, y: list):
"""
Calculate the 1st distance between two vectors.
"""
if len(x) != len(y):
raise ValueError
return sum(np.array(x) - np.array(y)) / len(x)
def skill_score(
skill_profile: dict, # The skill profile that we want to analyze
laser_cutting: float = None,
wood_working: float = None,
wood_cnc: float = None,
metal_machining: float = None,
metal_cnc: float = None,
three_d_printer: float = None,
welding: float = None,
electronics: float = None,
):
"""
Calculate the skill score for a given skill profile. Useful for both staff and courses skill profiles.
"""
x = []
y = []
if laser_cutting is not None:
x.append(skill_profile['Laser Cutting'])
y.append(laser_cutting)
if wood_working is not None:
x.append(skill_profile['Wood Working'])
y.append(wood_working)
if wood_cnc is not None:
x.append(skill_profile['Wood CNC'])
y.append(wood_cnc)
if metal_machining is not None:
x.append(skill_profile['Metal Machining'])
y.append(metal_machining)
if metal_cnc is not None:
x.append(skill_profile['Metal CNC'])
y.append(metal_cnc)
if three_d_printer is not None:
x.append(skill_profile['3D Printer'])
y.append(three_d_printer)
if welding is not None:
x.append(skill_profile['Welding'])
y.append(welding)
if electronics is not None:
x.append(skill_profile['Electronics'])
y.append(electronics)
return vector_1st_distance(x, y)
def all_staff():
"""
Return a list of all staff.
"""
return staff_df["Name"].dropna().tolist()
def get_staff_full_profile(name: str):
"""
Get the staff full profile (including description and skill).
"""
matches = difflib.get_close_matches(name, all_staff(), n=1, cutoff=0.2)
name = matches[0] if matches else None
if name:
full_profile = staff_df[staff_df["Name"] == name].iloc[0].to_dict()
return full_profile
return None
def get_staff_skills_profile(name: str):
"""
Get the staff skills profile given its name.
"""
full_profile = get_staff_full_profile(name)
return {k: full_profile[k] for k in NUMERIC_PROFILE}
def get_staff_profile(name: str):
"""
Get the staff profile without skill part.
"""
full_profile = get_staff_full_profile(name)
return {k: v for k, v in full_profile.items() if k not in NUMERIC_PROFILE}
def search_staff_by_skills(
laser_cutting: float = None,
wood_working: float = None,
wood_cnc: float = None,
metal_machining: float = None,
metal_cnc: float = None,
three_d_printer: float = None,
welding: float = None,
electronics: float = None,
):
names = all_staff()
best_name = None
best_score = float("inf")
for name in names:
skills_profile = get_staff_skills_profile(name)
score = skill_score(
skill_profile = skills_profile,
laser_cutting = laser_cutting,
wood_working = wood_working,
wood_cnc = wood_cnc,
metal_machining = metal_machining,
metal_cnc = metal_cnc,
three_d_printer = three_d_printer,
welding = welding,
electronics = electronics,
)
# keep only positive scores
if score is not None and score > 0 and score < best_score:
best_score = score
best_name = name
return best_name
def all_courses_code():
"""
Return a list of all course codes.
"""
return courses_df["Code"].dropna().astype(str).tolist()
def get_course_info(code: str):
"""
Get the course information given its code.
"""
# Ensure the input code is a string for comparison
code_str = str(code)
matches = difflib.get_close_matches(code_str, all_courses_code(), n=1, cutoff=0.2)
code = matches[0] if matches else None
if code:
full_profile = courses_df[courses_df["Code"].astype(str) == code].iloc[0].to_dict()
return full_profile
return None
def all_tools():
"""
Return a list of all tool names.
"""
return tools_df["Name"].dropna().tolist()
def get_tool_full_profile(name: str):
"""
Get the tool's full profile.
"""
# Increased cutoff to make matching more strict, avoiding false positives for non-existent machines
matches = difflib.get_close_matches(name, all_tools(), n=1, cutoff=0.6)
name = matches[0] if matches else None
if name:
full_profile = tools_df[tools_df["Name"] == name].iloc[0].to_dict()
return full_profile
return None
def find_candidates(task: str):
"""Return a DataFrame of candidate machines for the given task description."""
global tools_df
df = tools_df
task_lc = task.lower()
if df is None or df.empty:
return df.iloc[0:0] # empty with same columns
# 1) Matches from keyword mapping
names_from_keywords = set()
for kw, machine_names in KEYWORD_TO_MACHINES.items():
if kw in task_lc:
names_from_keywords.update(machine_names)
# 2) Direct substring matches on machine names
names_from_substring = set()
for name in df["Name"]:
if name.lower() in task_lc:
names_from_substring.add(name)
all_names = sorted(names_from_keywords.union(names_from_substring))
# 3) Fallback: token-based substring search
if not all_names:
# Add 'name_lower' column if it doesn't exist for substring search
if 'name_lower' not in df.columns:
df['name_lower'] = df['Name'].str.lower()
tokens = [t for t in task_lc.replace(",", " ").split() if len(t) > 3]
for token in tokens:
subset = df[df["name_lower"].str.contains(token)]
if not subset.empty:
all_names.extend(subset["Name"].tolist())
all_names = sorted(set(all_names))
return df[df["Name"].isin(all_names)]
def make_location_plan(task: str):
"""Print a short, human-readable location plan for a TechSpark task."""
global tools_df
df = tools_df
if df is None:
print("❌ Machine table not loaded yet.")
return
candidates = find_candidates(task)
print(f"Task: {task}\n")
if candidates.empty:
print("I couldn't find a clear machine match in the current table.")
print("Try rephrasing with the machine name you expect (e.g., 'laser cutter', '3D printer', 'MIG welder').")
return
print("Suggested machines and locations:\n")
for _, row in candidates.iterrows():
name = row["Name"]
loc = row["Location"]
print(f"- **{name}** → **{loc}**")
if name in MACHINE_NOTES:
print(f" - Why here: {MACHINE_NOTES[name]}")
print()
locations = ", ".join(sorted(candidates["Location"].unique()))
print("Next steps inside TechSpark:")
print(f"1. Walk to: {locations}.")
print("2. Check posted safety/training requirements for the machine you choose.")
print("3. If you're unsure which specific machine is best, ask the staff in that area.")
print("4. Imagine how this module could plug into a larger agent that also plans the full fabrication process and checks training.")
# Define the agent with all of these tools.
class SearchStaffInformationTool(smolagents.tools.Tool):
name = "search_staff_information"
description = (
"Search the staff information by its name."
)
inputs = {
"name": {"type": "string", "description": "Name of the staff member."},
}
output_type = "object"
def forward(self, name: str) -> dict:
return get_staff_profile(name)
class FindSuitableStaffTool(smolagents.tools.Tool):
name = "find_suitable_staff"
description = (
"Find the most suitable staff member for the task based on required skills."
)
inputs = {
"laser_cutting": {"type": "number", "description": "Laser cutting skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
"wood_working": {"type": "number", "description": "Wood working skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
"wood_cnc": {"type": "number", "description": "Wood CNC skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
"metal_machining": {"type": "number", "description": "Metal machining skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
"metal_cnc": {"type": "number", "description": "Metal CNC skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
"three_d_printer": {"type": "number", "description": "3D printer skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
"welding": {"type": "number", "description": "Welding skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
"electronics": {"type": "number", "description": "Electronics skill required for the task. It is a number between 0 (no expertise required) to 3 (high expertise expertise). Default is None. If left None, it will be ignored. (Optional)", "nullable": True},
}
output_type = "object"
def forward(self,
laser_cutting: float = None,
wood_working: float = None,
wood_cnc: float = None,
metal_machining: float = None,
metal_cnc: float = None,
three_d_printer: float = None,
welding: float = None,
electronics: float = None,
) -> dict:
name = search_staff_by_skills(
laser_cutting = laser_cutting,
wood_working = wood_working,
wood_cnc = wood_cnc,
metal_machining = metal_machining,
metal_cnc = metal_cnc,
three_d_printer = three_d_printer,
welding = welding,
electronics = electronics,
)
return get_staff_profile(name)
class MachineTrainingTool(smolagents.tools.Tool):
name = "get_machine_training_info"
description = (
"Retrieves training information for a specific machine and checks its accessibility. The `machine_name` argument should exactly match the machine's name as listed in the system."
)
inputs = {
"machine_name": {"type": "string", "description": "Name of the machine for which to retrieve training information"},
}
output_type = "string"
def forward(self, machine_name: str) -> str:
tool_info = get_tool_full_profile(machine_name)
if tool_info:
accessible = tool_info.get("Accessible by Students")
required_course_code = tool_info.get("Required Course")
if accessible is False:
# Specific message for not accessible machines, as requested
return f"The {machine_name} is NOT accessible by students. Please ask staff for assistance."
else: # accessible is True
response_parts = [f"The {machine_name} is accessible by students."]
if pd.isna(required_course_code):
response_parts.append(f"No specific course is required for the {machine_name}.")
else:
course_details = get_course_info(required_course_code)
if course_details:
course_name = course_details.get('Name', 'Unknown Course')
response_parts.append(f"The required training for {machine_name} is '{course_name}' (Course Code: {required_course_code}).")
else:
response_parts.append(f"A course with code '{required_course_code}' is required for {machine_name}, but its details are not found.")
return " ".join(response_parts)
else:
# Message for non-existent machine, as requested
return f"Machine '{machine_name}' does not exist."
#refresh_hugginface_repo() # Only run to refresh the repo
staff_df, courses_df, tools_df = load_data_from_huggingface()
agent = smolagents.CodeAgent(
tools=[
SearchStaffInformationTool(),
FindSuitableStaffTool(),
MachineTrainingTool(), # MachineTrainingTool is now defined elsewhere
],
instructions=(
"You are a helpful assistant for the CMU TechSpark facility. Your purpose is to assist users with inquiries related to staff, courses, and tools. "
"Use the available tools to find information about staff members, suggest suitable staff based on skills, or provide training information for machines. "
"Respond concisely and directly with the information requested by the user, utilizing the output from the tools."
),
model=model,
#name="TechSpark Agent",
add_base_tools=False,
max_steps=12,
verbosity_level=2, # show steps in logs for class demo
)
"""
# --- Sidebar --
import gradio as gr
# Minimal Gradio chat
with gr.Blocks(title="TechSpark Agent") as demo:
gr.Markdown("## Beam Agent — Custom Tool Selection (smolagents + llama.cpp)")
chat = gr.Chatbot(height=420)
inp = gr.Textbox(placeholder="Ask your question in natural language.", label="Your question")
# No gr.State for agent — just close over `agent`
def respond(message, history):
try:
# 1. Use agent.chat() to maintain internal history
out = str(agent.run(message))
except Exception as e:
out = f"[Error] {e}"
# This just updates the Gradio UI history
history = (history or []) + [(message, out)]
return "", history
gr.Examples(
fn=respond,
examples=[
"Who is Ed?",
"Who to talk to to create a wooden table?",
"how to access laser cutter"
],
inputs=[inp]
)
inp.submit(respond, [inp, chat], [inp, chat])
"""
# --- Page config ---
st.set_page_config(page_title="TechSpark AI Assistant", layout="wide")
# --- Sidebar ---
with st.sidebar:
st.markdown("<h1 style='text-align:center; font-size:2.5em;'>🤖 TechSpark AI Assistant</h1>", unsafe_allow_html=True)
st.markdown('''
## About
This app is a tech-powered AI chatbot built using:
- Streamlit
- smolagents for AI responses
💡 No API key required!
''')
add_vertical_space(3)
# --- CSS FIXES: SIDEBAR WIDER + CHAT TEXT MUCH BIGGER ---
st.markdown("""
<style>
/* --- MAIN CONTAINER FULL WIDTH --- */
[data-testid="stAppViewContainer"] {
max-width: 100% !important;
padding-left: 10px !important;
padding-right: 40px !important;
}
/* --- SIDEBAR WIDTH + SMALLER SIDEBAR TEXT --- */
section[data-testid="stSidebar"] {
width: 1.6 vw !important;
}
section[data-testid="stSidebar"] * {
font-size: .8 vw !important;
}
/* --- TITLES (untouched) --- */
/* --- MASSIVE CHAT BUBBLES --- */
div[data-testid="chat-message"] {
font-size: 5 vw !important; /* HUGE readable text */
line-height: 2!important;
padding: 2vw 2.5vw !important; /* large padding */
border-radius: 2vw !important;
max-width: 70% !important;
}
/* USER MESSAGE */
div[data-testid="chat-message-user"] {
margin-left: auto !important;
background: #00796b !important;
color: white !important;
}
/* ASSISTANT MESSAGE */
div[data-testid="chat-message-assistant"] {
margin-right: auto !important;
background: #222 !important;
color: white !important;
}
/* --- INPUT BOX --- */
.stTextInput textarea {
font-size: 2 vw !important;
padding: 1.4vw !important;
min-height: 8vh !important;
border-radius: 1.5vw !important;
}
/* --- SEND BUTTON --- */
.stButton > button {
font-size: 2 vw !important;
padding: 1vw 2vw !important;
border-radius: 1.5vw !important;
}
#--SCALE---
/* Global scale to simulate 120% zoom */
html {
transform: scale(1.2);
transform-origin: top center;
}
/* Prevent horizontal scrollbar after scaling */
body, .stApp {
width: 83.33%; /* 1 / 1.2 */
margin: 0 auto;
}
</style>
""", unsafe_allow_html=True)
# --- Centered main title ---
st.markdown("<h1 class='main-title' style='text-align:center;'>TechSpark AI Assistant</h1>", unsafe_allow_html=True)
st.markdown("<h2 class='sub-title' style='text-align:center;'>Ask me anything about TechSpark — </h2>", unsafe_allow_html=True)
# --- Initialize chat history ---
if 'generated' not in st.session_state:
st.session_state['generated'] = ["Hi! I'm your AI assistant. How can I help you today?"]
if 'past' not in st.session_state:
st.session_state['past'] = ["Hi!"]
# --- Layout containers ---
input_container = st.container()
colored_header(label='', description='', color_name='blue-30')
response_container = st.container()
# --- User input ---
def get_text():
input_text = st.text_input("You:", "", key="input", placeholder="Type your message here...")
return input_text
with input_container:
user_input = get_text()
# --- Generate AI response ---
def generate_response(prompt):
try:
return str(smolagents.agent(prompt))
except Exception as e:
return f"[Error] {e}"
# --- Display responses ---
with response_container:
if user_input:
response = generate_response(user_input)
st.session_state.past.append(user_input)
st.session_state.generated.append(response)
if st.session_state['generated']:
for i in range(len(st.session_state['generated'])):
message(st.session_state['past'][i], is_user=True, key=f"{i}_user")
message(st.session_state['generated'][i], key=f"{i}_assistant")
demo.launch(debug=True)