File size: 8,714 Bytes
865dc6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6c613c9
 
 
 
 
 
 
 
 
865dc6c
6c613c9
865dc6c
6c613c9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
865dc6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd92616
865dc6c
 
 
 
bd92616
865dc6c
 
 
 
 
 
 
 
 
 
 
 
bd92616
865dc6c
 
bd92616
 
 
 
865dc6c
bd92616
 
 
 
 
 
 
 
865dc6c
 
 
 
 
 
bd92616
865dc6c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
"""
Tabular Flower Classifier - Gradio App
Homework 3 - GUI Module
Author: Anyu Huang
Model Source: its-zion-18/flowers-tabular-autolguon-predictor

This app loads an AutoGluon TabularPredictor from a ZIP file
and exposes a simple Gradio interface to make predictions and show class
probabilities.
"""

# ============================================================================
# IMPORTS
# ============================================================================
import os
import shutil
import zipfile
import pathlib
import pandas as pd
import gradio as gr
import numpy as np
from autogluon.tabular import TabularPredictor

# ============================================================================
# CONFIGURATION
# ============================================================================
ZIP_FILENAME = "autogluon_predictor_dir.zip"
EXTRACT_DIR = pathlib.Path("predictor_native")

# ============================================================================
# MODEL LOADING
# ============================================================================
def load_predictor():
    """
    Extract and load an AutoGluon TabularPredictor from a ZIP file.

    Workflow:
      1) Check if ZIP exists in the repository root
      2) Extract into EXTRACT_DIR (clean if exists)
      3) Find the predictor root (folder that contains 'models') and load

    Returns:
        TabularPredictor: Loaded predictor ready for inference.
    Raises:
        FileNotFoundError: If ZIP cannot be found.
    """
    # Check if ZIP exists in repo
    if not os.path.exists(ZIP_FILENAME):
        raise FileNotFoundError(f"ZIP file not found: {ZIP_FILENAME}")
    
    print(f"Found ZIP file: {ZIP_FILENAME}")
    
    # Clean & re-create extraction directory
    if EXTRACT_DIR.exists():
        shutil.rmtree(EXTRACT_DIR)
    EXTRACT_DIR.mkdir(parents=True, exist_ok=True)
    
    # Extract the predictor directory
    print("Extracting predictor...")
    with zipfile.ZipFile(ZIP_FILENAME, 'r') as zip_ref:
        zip_ref.extractall(str(EXTRACT_DIR))
    
    # Find the predictor root (heuristic: folder containing 'models')
    for root, dirs, files in os.walk(str(EXTRACT_DIR)):
        if 'models' in dirs:
            print(f"Loading predictor from: {root}")
            return TabularPredictor.load(root, require_py_version_match=False)
    
    # Fallback: try the top-level extract dir
    print(f"Loading predictor from: {EXTRACT_DIR}")
    return TabularPredictor.load(str(EXTRACT_DIR), require_py_version_match=False)


# Initialize predictor once at startup
print("Loading AutoGluon TabularPredictor...")
PREDICTOR = load_predictor()
print("Predictor loaded successfully!")

# Metadata helpers (feature names & label)
FEATURE_COLS = (
    PREDICTOR.feature_metadata.get_features()
    if hasattr(PREDICTOR, 'feature_metadata') else []
)
TARGET_COL = PREDICTOR.label if hasattr(PREDICTOR, 'label') else "target"

print(f"Features: {FEATURE_COLS}")
print(f"Target: {TARGET_COL}")


# ============================================================================
# PREDICTION FUNCTION
# ============================================================================
def predict(*feature_values):
    """
    Build a single-row DataFrame from UI inputs and get prediction + probabilities.

    Args:
        *feature_values: Sequence of values corresponding to FEATURE_COLS order.

    Returns:
        (proba_dict, message)
            proba_dict: dict(label -> probability), sorted desc, top-N shown by gr.Label
            message:    Markdown summary with predicted label + confidence
    """
    try:
        # Map UI inputs to a dict matching the model's feature columns
        input_data = {}
        for col, val in zip(FEATURE_COLS, feature_values[:len(FEATURE_COLS)]):
            try:
                # Try numeric first (keeps sliders/numbers numeric)
                input_data[col] = float(val) if val != "" else 0.0
            except:
                # Otherwise leave as string (for categorical columns)
                input_data[col] = val
        
        print(f"Input data: {input_data}")
        
        # Build a DataFrame row for inference
        X = pd.DataFrame([input_data])
        print(f"DataFrame shape: {X.shape}")
        print(f"DataFrame columns: {X.columns.tolist()}")
        
        # Predicted label (or regression value)
        pred = PREDICTOR.predict(X)
        pred_value = pred.iloc[0]
        print(f"Prediction: {pred_value}")
        
        # Class probabilities (if classifier). If regression, synthesize 100% on prediction.
        try:
            proba_df = PREDICTOR.predict_proba(X)
            if isinstance(proba_df, pd.Series):
                # Normalize to DataFrame shape if AG returns a Series
                proba_df = proba_df.to_frame().T
            
            proba_dict = {}
            for col in proba_df.columns:
                proba_dict[str(col)] = float(proba_df[col].iloc[0])
            
            # Sort highest to lowest
            proba_dict = dict(sorted(proba_dict.items(), key=lambda x: x[1], reverse=True))
        except Exception as e:
            print(f"Error getting probabilities: {e}")
            # Regression or unsupported proba: show pseudo-confidence
            proba_dict = {str(pred_value): 1.0}
        
        # Human-readable summary (confidence = max probability * 100)
        confidence = max(proba_dict.values()) * 100 if proba_dict else 100
        message = f"**Prediction:** {pred_value}\n**Confidence:** {confidence:.2f}%"
        
        return proba_dict, message
        
    except Exception as e:
        error_msg = f"**Error:** {str(e)}\n\nPlease check the logs for details."
        print(f"Prediction error: {e}")
        import traceback
        traceback.print_exc()
        return {}, error_msg


# ============================================================================
# EXAMPLES (quick-start presets for the first 4 features)
# ============================================================================
EXAMPLES = [
    [5.1, 3.5, 1.4, 0.2],
    [7.0, 3.2, 4.7, 1.4],
    [6.3, 3.3, 6.0, 2.5],
]
if len(FEATURE_COLS) > 4:
    EXAMPLES = [ex + [0.0] * (len(FEATURE_COLS) - 4) for ex in EXAMPLES]


# ============================================================================
# GRADIO UI
# ============================================================================
with gr.Blocks(title="Tabular Flower Classifier", theme=gr.themes.Soft()) as demo:
    # Title & instructions
    gr.Markdown("""
    # Tabular Flower Classifier
    
    This app uses an **AutoGluon TabularPredictor** to classify flowers based on their features.
    Adjust the feature values below and click **Predict** to see the classification results.
    """)
    
    with gr.Row():
        # LEFT: Inputs
        with gr.Column(scale=1):
            gr.Markdown("### Input Features")
            feature_inputs = []
            
            # For the first 4 features, use sliders (0-10) to make the demo interactive.
            # Remaining features (up to 10 shown) use numeric inputs for compactness.
            for i, feature in enumerate(FEATURE_COLS[:10]):
                if i < 4:
                    input_widget = gr.Slider(0, 10, 5.0, label=feature)
                else:
                    input_widget = gr.Number(value=0.0, label=feature)
                feature_inputs.append(input_widget)
            
            predict_btn = gr.Button("Predict", variant="primary", size="lg")
        
        # RIGHT: Outputs
        with gr.Column(scale=1):
            gr.Markdown("### Prediction Results")
            prediction_output = gr.Markdown(value="*Adjust features and click Predict*")
            proba_display = gr.Label(num_top_classes=5, label="Top 5 Class Probabilities")
    
    # Button click handler
    predict_btn.click(
        fn=predict,
        inputs=feature_inputs,
        outputs=[proba_display, prediction_output]
    )
    
    gr.Markdown("### Example flower measurements")
    # Example presets
    gr.Examples(
        examples=EXAMPLES,
        inputs=feature_inputs,
        outputs=[proba_display, prediction_output],
        fn=predict,
        cache_examples=False
    )
    
    gr.Markdown("""
    ---
    ### About
    - **Model**: AutoGluon TabularPredictor
    - **Task**: Flower classification based on measurements
    - **Features**: Adjust the sliders/inputs above to test different flower measurements
    """)

# ============================================================================
# ENTRY POINT
# ============================================================================
if __name__ == "__main__":
    demo.launch()