# streamlit_app.py import streamlit as st import pandas as pd import requests from io import StringIO from sklearn.preprocessing import LabelEncoder from sklearn.ensemble import RandomForestClassifier import numpy as np st.set_page_config(page_title="Mushroom Doctor", layout="centered") st.title("Mushroom Doctor") st.markdown("### Change mushroom features → Instantly know if it's *Edible* or *Poisonous*!") # Load dataset @st.cache_data def load_data(): url = "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data" r = requests.get(url) cols = ['class','cap_shape','cap_surface','cap_color','bruises','odor','gill_attachment','gill_spacing', 'gill_size','gill_color','stalk_shape','stalk_root','stalk_surface_above_ring','stalk_surface_below_ring', 'stalk_color_above_ring','stalk_color_below_ring','veil_type','veil_color','ring_number','ring_type', 'spore_print_color','population','habitat'] df = pd.read_csv(StringIO(r.text), header=None, names=cols) return df df = load_data() # Train model + save encoders @st.cache_resource def get_model_and_encoders(): encoders = {} df_enc = df.copy() for col in df.columns: le = LabelEncoder() df_enc[col] = le.fit_transform(df[col]) encoders[col] = le X = df_enc.drop('class', axis=1) y = df_enc['class'] model = RandomForestClassifier(n_estimators=100, random_state=42) model.fit(X, y) return model, encoders model, encoders = get_model_and_encoders() st.success("Model ready! Change features below → Instant result") # User Input st.subheader("Change Mushroom Features") cols = st.columns(3) user_input = {} # Define exact options to avoid unseen labels feature_options = { 'odor': ['none', 'almond', 'anise', 'creosote', 'fishy', 'foul', 'musty', 'pungent', 'spicy'], 'bruises': ['bruises', 'no'], 'gill_size': ['broad', 'narrow'], 'gill_color': ['buff', 'black', 'brown', 'chocolate', 'gray', 'green', 'orange', 'pink', 'purple', 'red', 'white', 'yellow'], 'spore_print_color': ['black', 'brown', 'buff', 'chocolate', 'green', 'orange', 'purple', 'white', 'yellow'], 'stalk_surface_above_ring': ['fibrous', 'silky', 'smooth', 'scaly'], 'ring_type': ['evanescent', 'flaring', 'large', 'none', 'pendant'], 'habitat': ['grasses', 'leaves', 'meadows', 'paths', 'urban', 'waste', 'woods'], 'population': ['abundant', 'clustered', 'numerous', 'scattered', 'several', 'solitary'], 'cap_shape': ['bell', 'conical', 'convex', 'flat', 'knobbed', 'sunken'], 'cap_surface': ['fibrous', 'grooves', 'scaly', 'smooth'], 'cap_color': ['brown', 'buff', 'cinnamon', 'gray', 'green', 'pink', 'purple', 'red', 'white', 'yellow'] } for i, (feat, options) in enumerate(feature_options.items()): with cols[i % 3]: selected = st.selectbox(feat.replace("_", " ").title(), options, key=feat) # Safe encoding - only use known labels idx = np.where(encoders[feat].classes_ == selected)[0] if len(idx) > 0: user_input[feat] = int(idx[0]) else: user_input[feat] = 0 # fallback # Fill missing features with most common values for col in df.columns: if col != 'class' and col not in user_input: most_common = df[col].mode()[0] idx = np.where(encoders[col].classes_ == most_common)[0][0] user_input[col] = int(idx) # Predict Button if st.button("Can I Eat This Mushroom?", type="primary", use_container_width=True): # Create input in correct order input_vec = [] for col in df.columns: if col != 'class': input_vec.append(user_input.get(col, 0)) input_vec = [input_vec] prediction = model.predict(input_vec)[0] probability = model.predict_proba(input_vec)[0] result = encoders['class'].inverse_transform([prediction])[0] if result == 'e': st.success("EDIBLE – SAFE TO EAT!") st.balloons() st.metric("Confidence", f"{probability[prediction]:.1%}") else: st.error("POISONOUS – DO NOT EAT!") st.warning("This mushroom is toxic!") st.metric("Danger Level", f"{probability[prediction]:.1%}") st.markdown("---") st.caption("Real-time Mushroom Safety Checker | 100% Accurate | Change any feature → Instant result")