Spaces:

Sudharsanamr
/

streamlit

Sleeping

File size: 13,740 Bytes

7eef64a
 
 
 
 
 
 
 
 
 
 
 
d46fd4e
7eef64a

 # streamlit_superkart_app.py
# Streamlit app for Super Kart — uses a remote Gradio backend and/or a local model file
# This file is written to work even when `streamlit` is NOT available in the environment.
# If `streamlit` is installed, the interactive web UI will run as intended.
# If `streamlit` is missing, the script falls back to a CLI/test mode so you can still
# validate remote endpoint behavior and quick local model tests.

import sys
import io
import traceback
import json
import requests
import pandas as pd
import numpy as np
import joblib

# ----------------------
# Configuration
# ----------------------
DEFAULT_REMOTE = "https://sudharsanamr-superkart.hf.space/gradio_api/call/predict"
DEFAULT_FN_INDEX = 0

# ----------------------
# Utility functions
# ----------------------

def predict_remote(df, endpoint=DEFAULT_REMOTE, fn_index=DEFAULT_FN_INDEX, timeout=30):
    """Send each row in df to the Gradio-style endpoint and return a list of responses.
    Returns (results, errors) where results is a list of parsed responses and errors is a list
    of (row_index, error_info).
    """
    results = []
    errors = []
    for i, row in df.iterrows():
        payload = {"data": [row.tolist()], "fn_index": int(fn_index)}
        try:
            r = requests.post(endpoint, json=payload, timeout=timeout)
            if r.status_code == 200:
                try:
                    j = r.json()
                    if isinstance(j, dict) and 'data' in j:
                        results.append(j['data'])
                    else:
                        results.append(j)
                except Exception:
                    results.append(r.text)
            else:
                errors.append((i, f"HTTP {r.status_code}", r.text[:1000]))
        except Exception as e:
            errors.append((i, str(e)))
    return results, errors


def predict_with_model(df, model):
    """Call model.predict on df. If model doesn't expose predict, try calling it as a callable.
    If neither works, raise a ValueError.
    """
    if hasattr(model, 'predict'):
        return model.predict(df)
    elif callable(model):
        return model(df)
    else:
        raise ValueError("Provided model is not callable and has no .predict method")


# A tiny dummy model used for CLI tests when no model file is provided.
class DummyModel:
    def predict(self, X):
        # simple deterministic output for testing: sum of numeric columns per row
        numeric = X.select_dtypes(include=[np.number])
        if numeric.shape[1] == 0:
            # fallback: return zeros
            return np.zeros(len(X)).tolist()
        return numeric.sum(axis=1).tolist()


# ----------------------
# Main: Streamlit UI (if available)
# ----------------------
try:
    import streamlit as st  # type: ignore
    ST_AVAILABLE = True
except Exception:
    ST_AVAILABLE = False

if ST_AVAILABLE:
    st.set_page_config(page_title="Super Kart — Prediction App", layout="wide")
    st.title("Super Kart — Prediction App")

    # Sidebar: choose mode
    mode = st.sidebar.selectbox("Prediction mode", ["Remote API (Gradio)", "Local model (.joblib)"])

    # Initialize model variable in module scope so it's always defined
    model = None
    endpoint = DEFAULT_REMOTE

    if mode == "Remote API (Gradio)":
        st.sidebar.write("Remote endpoint (editable)")
        endpoint = st.sidebar.text_input("Gradio API endpoint", value=DEFAULT_REMOTE)
        if st.sidebar.button("Test endpoint"):
            st.sidebar.info("Testing endpoint...")
            try:
                probe = {"data": [[0]], "fn_index": 0}
                r = requests.post(endpoint, json=probe, timeout=10)
                st.sidebar.write(f"Status: {r.status_code}")
                try:
                    st.sidebar.write(r.json())
                except Exception:
                    st.sidebar.write(r.text[:1000])
            except Exception as e:
                st.sidebar.error(f"Endpoint test failed: {e}")

    else:
        st.sidebar.write("Upload a local scikit-learn model (.joblib)")
        uploaded_model = st.sidebar.file_uploader("Upload model (.joblib)", type=["joblib", "pkl"], key="model_uploader")
        if uploaded_model is not None:
            try:
                bytes_data = uploaded_model.read()
                model = joblib.load(io.BytesIO(bytes_data))
                st.sidebar.success("Model loaded — ready for predictions")
            except Exception as e:
                st.sidebar.error(f"Failed to load model: {e}")
                st.sidebar.text(traceback.format_exc())

    st.markdown("---")

    st.header("Upload input data")
    uploaded_file = st.file_uploader("Upload CSV (rows = samples). If empty, use manual input below.", type=["csv"]) 

    input_df = None
    if uploaded_file is not None:
        try:
            input_df = pd.read_csv(uploaded_file)
            st.write("Preview of uploaded data:")
            st.dataframe(input_df.head())
        except Exception as e:
            st.error(f"Failed to read CSV: {e}")

    st.markdown("### Or enter single sample manually")
    manual_input = None
    with st.form("manual_form"):
        col1, col2 = st.columns(2)
        sample_text = st.text_area("Paste a single sample as comma-separated values (no header), or JSON list. Example: 12,3.5,0,1", height=80)
        submit = st.form_submit_button("Use manual sample")
        if submit and sample_text.strip():
            s = sample_text.strip()
            try:
                if s.startswith("["):
                    vals = pd.read_json(io.StringIO(s), typ='series')
                    manual_input = pd.DataFrame([vals.tolist()])
                else:
                    parts = [x.strip() for x in s.split(',') if x.strip()!='']
                    parsed = []
                    for p in parts:
                        try:
                            if '.' in p:
                                parsed.append(float(p))
                            else:
                                parsed.append(int(p))
                        except:
                            parsed.append(p)
                    manual_input = pd.DataFrame([parsed])
                st.success("Manual sample parsed")
                st.write(manual_input)
            except Exception as e:
                st.error(f"Failed to parse manual sample: {e}")

    if input_df is not None:
        df_to_predict = input_df
    elif manual_input is not None:
        df_to_predict = manual_input
    else:
        df_to_predict = None

    if df_to_predict is None:
        st.info("Provide an input CSV or a manual sample to get predictions.")
    else:
        st.markdown("---")
        st.header("Prepare & Predict")
        st.write("Columns detected:", list(df_to_predict.columns))

        st.write("Select feature columns to use for prediction (order matters):")
        cols = st.multiselect("Feature columns", options=list(df_to_predict.columns), default=list(df_to_predict.columns))

        if not cols:
            st.error("Select at least one column")
        else:
            X = df_to_predict[cols].copy()
            for c in X.columns:
                if X[c].dtype == object:
                    try:
                        X[c] = pd.to_numeric(X[c])
                    except:
                        pass

            st.write("Prepared features (first rows):")
            st.dataframe(X.head())

            if mode == "Local model (.joblib)":
                if model is None:
                    st.error("No local model loaded. Upload a .joblib model in the sidebar.")
                else:
                    if st.button("Run local predictions"):
                        try:
                            preds = predict_with_model(X, model)
                            st.success("Predictions complete")
                            out = pd.DataFrame({"prediction": preds})
                            st.dataframe(out)

                            csv = out.to_csv(index=False)
                            st.download_button("Download predictions CSV", data=csv, file_name="predictions.csv")
                        except Exception as e:
                            st.error(f"Local prediction failed: {e}")
                            st.text(traceback.format_exc())

            else:
                st.write("Remote API endpoint:", endpoint)
                fn_index = st.number_input("fn_index (Gradio function index)", value=0, min_value=0)
                if st.button("Send to remote API"):
                    with st.spinner("Sending requests..."):
                        results, errors = predict_remote(X, endpoint=endpoint, fn_index=fn_index)

                    if results:
                        st.success(f"Received {len(results)} responses")
                        try:
                            flattened = [r[0] if isinstance(r, list) and len(r)>0 else r for r in results]
                            out_df = pd.DataFrame({"prediction": flattened})
                            st.dataframe(out_df)
                            st.download_button("Download predictions CSV", data=out_df.to_csv(index=False), file_name="remote_predictions.csv")
                        except Exception:
                            st.write(results)

                    if errors:
                        st.error(f"{len(errors)} errors occurred — showing first 5")
                        for e in errors[:5]:
                            st.write(e)

    st.markdown("---")
    st.write("Notes:\n- Many Gradio spaces expect POST body like: {\\\"data\\\": [[...inputs...]], \\\"fn_index\\\": 0}. If your space uses a different format, adjust the payload.\n- If you will upload your model for local predictions, upload it in the sidebar as a joblib file.")

    # Requirements hint (properly closed triple-backticks)
    st.sidebar.markdown("**Requirements**\n```\nstreamlit\npandas\nscikit-learn\njoblib\nrequests\n```")

# ----------------------
# CLI / Test Mode (runs when streamlit is not available)
# ----------------------
else:
    def _print_banner():
        print("Streamlit is not available in this environment. Running in CLI/test mode.")
        print("To run the interactive app, install streamlit and run: streamlit run streamlit_superkart_app.py")
        print("Default remote endpoint:", DEFAULT_REMOTE)
        print("")

    def _cli_demo():
        _print_banner()
        # Create a small test dataframe
        df = pd.DataFrame({
            'feature_a': [1.0, 2.5, 3.3],
            'feature_b': [0, 1, 0],
            'category': ['x', 'y', 'z']
        })
        print("Test input:")
        print(df)

        # Try remote predict (best-effort; network must be allowed in environment)
        print('\n--- Remote endpoint test ---')
        try:
            results, errors = predict_remote(df[['feature_a', 'feature_b']], endpoint=DEFAULT_REMOTE)
            print(f"Remote results (count={len(results)}):")
            for r in results:
                print(r)
            if errors:
                print(f"Remote errors (count={len(errors)}):")
                for e in errors:
                    print(e)
        except Exception as e:
            print("Remote test failed:", str(e))
            traceback.print_exc()

        # Try local dummy model predict
        print('\n--- Local dummy model test ---')
        dummy = DummyModel()
        try:
            preds = predict_with_model(df[['feature_a', 'feature_b']], dummy)
            print('Dummy model predictions:', preds)
        except Exception as e:
            print('Local dummy model failed:', e)
            traceback.print_exc()

        # If user provided a model filename as CLI arg, try loading it and predicting
        if len(sys.argv) > 1:
            model_path = sys.argv[1]
            print(f"\n--- Loading local model from: {model_path}")
            try:
                m = joblib.load(model_path)
                p = predict_with_model(df[['feature_a', 'feature_b']], m)
                print('Predictions from provided model:', p)
            except Exception as e:
                print('Failed to load/predict with provided model:', e)
                traceback.print_exc()

    # Add simple tests (these serve as test cases requested)
    def _run_tests():
        print('\n=== Running built-in tests ===')
        # Test 1: predict_remote should return lists (may be empty if network blocked)
        df = pd.DataFrame({'a':[1,2], 'b':[3,4]})
        try:
            results, errors = predict_remote(df, endpoint=DEFAULT_REMOTE)
            print('predict_remote returned:', len(results), 'results and', len(errors), 'errors')
        except Exception as e:
            print('predict_remote raised exception (this may be due to network restrictions):', e)

        # Test 2: predict_with_model with DummyModel
        dummy = DummyModel()
        out = predict_with_model(df, dummy)
        assert len(out) == len(df), 'DummyModel should return same length output as input rows'
        print('DummyModel test passed — output:', out)

        # Test 3: predict_with_model error case
        try:
            class BadModel: pass
            bad = BadModel()
            try:
                predict_with_model(df, bad)
                print('ERROR: predict_with_model should have raised for BadModel')
            except ValueError:
                print('predict_with_model correctly raised ValueError for invalid model')
        except AssertionError as e:
            print('Test assertion failed:', e)

        print('All CLI tests completed.')

    if __name__ == '__main__':
        _cli_demo()
        _run_tests()