Spaces:

aylintokgoz27
/

DataScience

Sleeping

File size: 3,665 Bytes

346c97b

import os
import streamlit as st
import joblib
import numpy as np
import pandas as pd
from huggingface_hub import hf_hub_download

st.set_page_config(page_title="Data Science London", page_icon="🔬")
st.title("🔬 Data Science London Classifier")
st.write("Enter 40 feature values to classify the sample.")

REPO = "selimyildirim18/data-science-london-scikit-learn"

@st.cache_resource
def load_models():
    try:
        scaler_path = hf_hub_download(repo_id=REPO, filename="scaler.pkl")
        log_path    = hf_hub_download(repo_id=REPO, filename="log_model.pkl")
        rf_path     = hf_hub_download(repo_id=REPO, filename="rf_model.pkl")
        gb_path     = hf_hub_download(repo_id=REPO, filename="gb_model.pkl")

        scaler    = joblib.load(scaler_path)
        log_model = joblib.load(log_path)
        rf_model  = joblib.load(rf_path)
        gb_model  = joblib.load(gb_path)

        return scaler, log_model, rf_model, gb_model
    except Exception as e:
        st.error(f"❌ Model yüklenemedi: {e}")
        return None, None, None, None

with st.spinner("Loading models..."):
    scaler, log_model, rf_model, gb_model = load_models()

if scaler is not None:
    tab1, tab2 = st.tabs(["✏️ Manuel Giriş", "📂 CSV Yükleme"])

    
    with tab1:
        st.write("Enter values for all 40 features:")

        feature_names = [f"feature_{i}" for i in range(40)]
        values = []

        cols = st.columns(4)
        for i, feat in enumerate(feature_names):
            with cols[i % 4]:
                val = st.number_input(feat, value=0.0, step=0.01, format="%.4f", key=feat)
                values.append(val)

        if st.button("Predict"):
            X_input = np.array(values).reshape(1, -1)
            X_scaled = scaler.transform(X_input)

            pred_log = log_model.predict(X_scaled)[0]
            pred_rf  = rf_model.predict(X_input)[0]
            pred_gb  = gb_model.predict(X_input)[0]

            prob_log = log_model.predict_proba(X_scaled)[0]
            prob_rf  = rf_model.predict_proba(X_input)[0]
            prob_gb  = gb_model.predict_proba(X_input)[0]

            col1, col2, col3 = st.columns(3)

            with col1:
                st.subheader("Logistic Regression")
                st.success(f"**Class: {pred_log}**")
                st.metric("Confidence", f"{prob_log[pred_log]*100:.1f}%")

            with col2:
                st.subheader("Random Forest")
                st.success(f"**Class: {pred_rf}**")
                st.metric("Confidence", f"{prob_rf[pred_rf]*100:.1f}%")

            with col3:
                st.subheader("Gradient Boosting")
                st.success(f"**Class: {pred_gb}**")
                st.metric("Confidence", f"{prob_gb[pred_gb]*100:.1f}%")

    
    with tab2:
        st.write("Upload a CSV with 40 columns (feature_0 to feature_39), no header required.")

        uploaded = st.file_uploader("Upload CSV", type=["csv"])

        if uploaded:
            df_csv = pd.read_csv(uploaded, header=None)
            df_csv.columns = [f"feature_{i}" for i in range(40)]

            X_scaled = scaler.transform(df_csv)

            df_csv["LogReg"]  = log_model.predict(X_scaled)
            df_csv["RandomForest"] = rf_model.predict(df_csv.iloc[:, :40])
            df_csv["GradBoost"]   = gb_model.predict(df_csv.iloc[:, :40])

            st.dataframe(df_csv, use_container_width=True)

            csv_out = df_csv.to_csv(index=False).encode("utf-8")
            st.download_button("⬇️ Download Results", csv_out, "predictions.csv", "text/csv")