File size: 13,740 Bytes
7eef64a
 
 
 
 
 
 
 
 
 
 
 
d46fd4e
7eef64a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
 # streamlit_superkart_app.py
# Streamlit app for Super Kart β€” uses a remote Gradio backend and/or a local model file
# This file is written to work even when `streamlit` is NOT available in the environment.
# If `streamlit` is installed, the interactive web UI will run as intended.
# If `streamlit` is missing, the script falls back to a CLI/test mode so you can still
# validate remote endpoint behavior and quick local model tests.

import sys
import io
import traceback
import json
import requests
import pandas as pd
import numpy as np
import joblib

# ----------------------
# Configuration
# ----------------------
DEFAULT_REMOTE = "https://sudharsanamr-superkart.hf.space/gradio_api/call/predict"
DEFAULT_FN_INDEX = 0

# ----------------------
# Utility functions
# ----------------------

def predict_remote(df, endpoint=DEFAULT_REMOTE, fn_index=DEFAULT_FN_INDEX, timeout=30):
    """Send each row in df to the Gradio-style endpoint and return a list of responses.
    Returns (results, errors) where results is a list of parsed responses and errors is a list
    of (row_index, error_info).
    """
    results = []
    errors = []
    for i, row in df.iterrows():
        payload = {"data": [row.tolist()], "fn_index": int(fn_index)}
        try:
            r = requests.post(endpoint, json=payload, timeout=timeout)
            if r.status_code == 200:
                try:
                    j = r.json()
                    if isinstance(j, dict) and 'data' in j:
                        results.append(j['data'])
                    else:
                        results.append(j)
                except Exception:
                    results.append(r.text)
            else:
                errors.append((i, f"HTTP {r.status_code}", r.text[:1000]))
        except Exception as e:
            errors.append((i, str(e)))
    return results, errors


def predict_with_model(df, model):
    """Call model.predict on df. If model doesn't expose predict, try calling it as a callable.
    If neither works, raise a ValueError.
    """
    if hasattr(model, 'predict'):
        return model.predict(df)
    elif callable(model):
        return model(df)
    else:
        raise ValueError("Provided model is not callable and has no .predict method")


# A tiny dummy model used for CLI tests when no model file is provided.
class DummyModel:
    def predict(self, X):
        # simple deterministic output for testing: sum of numeric columns per row
        numeric = X.select_dtypes(include=[np.number])
        if numeric.shape[1] == 0:
            # fallback: return zeros
            return np.zeros(len(X)).tolist()
        return numeric.sum(axis=1).tolist()


# ----------------------
# Main: Streamlit UI (if available)
# ----------------------
try:
    import streamlit as st  # type: ignore
    ST_AVAILABLE = True
except Exception:
    ST_AVAILABLE = False

if ST_AVAILABLE:
    st.set_page_config(page_title="Super Kart β€” Prediction App", layout="wide")
    st.title("Super Kart β€” Prediction App")

    # Sidebar: choose mode
    mode = st.sidebar.selectbox("Prediction mode", ["Remote API (Gradio)", "Local model (.joblib)"])

    # Initialize model variable in module scope so it's always defined
    model = None
    endpoint = DEFAULT_REMOTE

    if mode == "Remote API (Gradio)":
        st.sidebar.write("Remote endpoint (editable)")
        endpoint = st.sidebar.text_input("Gradio API endpoint", value=DEFAULT_REMOTE)
        if st.sidebar.button("Test endpoint"):
            st.sidebar.info("Testing endpoint...")
            try:
                probe = {"data": [[0]], "fn_index": 0}
                r = requests.post(endpoint, json=probe, timeout=10)
                st.sidebar.write(f"Status: {r.status_code}")
                try:
                    st.sidebar.write(r.json())
                except Exception:
                    st.sidebar.write(r.text[:1000])
            except Exception as e:
                st.sidebar.error(f"Endpoint test failed: {e}")

    else:
        st.sidebar.write("Upload a local scikit-learn model (.joblib)")
        uploaded_model = st.sidebar.file_uploader("Upload model (.joblib)", type=["joblib", "pkl"], key="model_uploader")
        if uploaded_model is not None:
            try:
                bytes_data = uploaded_model.read()
                model = joblib.load(io.BytesIO(bytes_data))
                st.sidebar.success("Model loaded β€” ready for predictions")
            except Exception as e:
                st.sidebar.error(f"Failed to load model: {e}")
                st.sidebar.text(traceback.format_exc())

    st.markdown("---")

    st.header("Upload input data")
    uploaded_file = st.file_uploader("Upload CSV (rows = samples). If empty, use manual input below.", type=["csv"]) 

    input_df = None
    if uploaded_file is not None:
        try:
            input_df = pd.read_csv(uploaded_file)
            st.write("Preview of uploaded data:")
            st.dataframe(input_df.head())
        except Exception as e:
            st.error(f"Failed to read CSV: {e}")

    st.markdown("### Or enter single sample manually")
    manual_input = None
    with st.form("manual_form"):
        col1, col2 = st.columns(2)
        sample_text = st.text_area("Paste a single sample as comma-separated values (no header), or JSON list. Example: 12,3.5,0,1", height=80)
        submit = st.form_submit_button("Use manual sample")
        if submit and sample_text.strip():
            s = sample_text.strip()
            try:
                if s.startswith("["):
                    vals = pd.read_json(io.StringIO(s), typ='series')
                    manual_input = pd.DataFrame([vals.tolist()])
                else:
                    parts = [x.strip() for x in s.split(',') if x.strip()!='']
                    parsed = []
                    for p in parts:
                        try:
                            if '.' in p:
                                parsed.append(float(p))
                            else:
                                parsed.append(int(p))
                        except:
                            parsed.append(p)
                    manual_input = pd.DataFrame([parsed])
                st.success("Manual sample parsed")
                st.write(manual_input)
            except Exception as e:
                st.error(f"Failed to parse manual sample: {e}")

    if input_df is not None:
        df_to_predict = input_df
    elif manual_input is not None:
        df_to_predict = manual_input
    else:
        df_to_predict = None

    if df_to_predict is None:
        st.info("Provide an input CSV or a manual sample to get predictions.")
    else:
        st.markdown("---")
        st.header("Prepare & Predict")
        st.write("Columns detected:", list(df_to_predict.columns))

        st.write("Select feature columns to use for prediction (order matters):")
        cols = st.multiselect("Feature columns", options=list(df_to_predict.columns), default=list(df_to_predict.columns))

        if not cols:
            st.error("Select at least one column")
        else:
            X = df_to_predict[cols].copy()
            for c in X.columns:
                if X[c].dtype == object:
                    try:
                        X[c] = pd.to_numeric(X[c])
                    except:
                        pass

            st.write("Prepared features (first rows):")
            st.dataframe(X.head())

            if mode == "Local model (.joblib)":
                if model is None:
                    st.error("No local model loaded. Upload a .joblib model in the sidebar.")
                else:
                    if st.button("Run local predictions"):
                        try:
                            preds = predict_with_model(X, model)
                            st.success("Predictions complete")
                            out = pd.DataFrame({"prediction": preds})
                            st.dataframe(out)

                            csv = out.to_csv(index=False)
                            st.download_button("Download predictions CSV", data=csv, file_name="predictions.csv")
                        except Exception as e:
                            st.error(f"Local prediction failed: {e}")
                            st.text(traceback.format_exc())

            else:
                st.write("Remote API endpoint:", endpoint)
                fn_index = st.number_input("fn_index (Gradio function index)", value=0, min_value=0)
                if st.button("Send to remote API"):
                    with st.spinner("Sending requests..."):
                        results, errors = predict_remote(X, endpoint=endpoint, fn_index=fn_index)

                    if results:
                        st.success(f"Received {len(results)} responses")
                        try:
                            flattened = [r[0] if isinstance(r, list) and len(r)>0 else r for r in results]
                            out_df = pd.DataFrame({"prediction": flattened})
                            st.dataframe(out_df)
                            st.download_button("Download predictions CSV", data=out_df.to_csv(index=False), file_name="remote_predictions.csv")
                        except Exception:
                            st.write(results)

                    if errors:
                        st.error(f"{len(errors)} errors occurred β€” showing first 5")
                        for e in errors[:5]:
                            st.write(e)

    st.markdown("---")
    st.write("Notes:\n- Many Gradio spaces expect POST body like: {\\\"data\\\": [[...inputs...]], \\\"fn_index\\\": 0}. If your space uses a different format, adjust the payload.\n- If you will upload your model for local predictions, upload it in the sidebar as a joblib file.")

    # Requirements hint (properly closed triple-backticks)
    st.sidebar.markdown("**Requirements**\n```\nstreamlit\npandas\nscikit-learn\njoblib\nrequests\n```")

# ----------------------
# CLI / Test Mode (runs when streamlit is not available)
# ----------------------
else:
    def _print_banner():
        print("Streamlit is not available in this environment. Running in CLI/test mode.")
        print("To run the interactive app, install streamlit and run: streamlit run streamlit_superkart_app.py")
        print("Default remote endpoint:", DEFAULT_REMOTE)
        print("")

    def _cli_demo():
        _print_banner()
        # Create a small test dataframe
        df = pd.DataFrame({
            'feature_a': [1.0, 2.5, 3.3],
            'feature_b': [0, 1, 0],
            'category': ['x', 'y', 'z']
        })
        print("Test input:")
        print(df)

        # Try remote predict (best-effort; network must be allowed in environment)
        print('\n--- Remote endpoint test ---')
        try:
            results, errors = predict_remote(df[['feature_a', 'feature_b']], endpoint=DEFAULT_REMOTE)
            print(f"Remote results (count={len(results)}):")
            for r in results:
                print(r)
            if errors:
                print(f"Remote errors (count={len(errors)}):")
                for e in errors:
                    print(e)
        except Exception as e:
            print("Remote test failed:", str(e))
            traceback.print_exc()

        # Try local dummy model predict
        print('\n--- Local dummy model test ---')
        dummy = DummyModel()
        try:
            preds = predict_with_model(df[['feature_a', 'feature_b']], dummy)
            print('Dummy model predictions:', preds)
        except Exception as e:
            print('Local dummy model failed:', e)
            traceback.print_exc()

        # If user provided a model filename as CLI arg, try loading it and predicting
        if len(sys.argv) > 1:
            model_path = sys.argv[1]
            print(f"\n--- Loading local model from: {model_path}")
            try:
                m = joblib.load(model_path)
                p = predict_with_model(df[['feature_a', 'feature_b']], m)
                print('Predictions from provided model:', p)
            except Exception as e:
                print('Failed to load/predict with provided model:', e)
                traceback.print_exc()

    # Add simple tests (these serve as test cases requested)
    def _run_tests():
        print('\n=== Running built-in tests ===')
        # Test 1: predict_remote should return lists (may be empty if network blocked)
        df = pd.DataFrame({'a':[1,2], 'b':[3,4]})
        try:
            results, errors = predict_remote(df, endpoint=DEFAULT_REMOTE)
            print('predict_remote returned:', len(results), 'results and', len(errors), 'errors')
        except Exception as e:
            print('predict_remote raised exception (this may be due to network restrictions):', e)

        # Test 2: predict_with_model with DummyModel
        dummy = DummyModel()
        out = predict_with_model(df, dummy)
        assert len(out) == len(df), 'DummyModel should return same length output as input rows'
        print('DummyModel test passed β€” output:', out)

        # Test 3: predict_with_model error case
        try:
            class BadModel: pass
            bad = BadModel()
            try:
                predict_with_model(df, bad)
                print('ERROR: predict_with_model should have raised for BadModel')
            except ValueError:
                print('predict_with_model correctly raised ValueError for invalid model')
        except AssertionError as e:
            print('Test assertion failed:', e)

        print('All CLI tests completed.')

    if __name__ == '__main__':
        _cli_demo()
        _run_tests()