import os import streamlit as st import joblib import numpy as np import pandas as pd from huggingface_hub import hf_hub_download st.set_page_config(page_title="Data Science London", page_icon="🔬") st.title("🔬 Data Science London Classifier") st.write("Enter 40 feature values to classify the sample.") REPO = "selimyildirim18/data-science-london-scikit-learn" @st.cache_resource def load_models(): try: scaler_path = hf_hub_download(repo_id=REPO, filename="scaler.pkl") log_path = hf_hub_download(repo_id=REPO, filename="log_model.pkl") rf_path = hf_hub_download(repo_id=REPO, filename="rf_model.pkl") gb_path = hf_hub_download(repo_id=REPO, filename="gb_model.pkl") scaler = joblib.load(scaler_path) log_model = joblib.load(log_path) rf_model = joblib.load(rf_path) gb_model = joblib.load(gb_path) return scaler, log_model, rf_model, gb_model except Exception as e: st.error(f"❌ Model yüklenemedi: {e}") return None, None, None, None with st.spinner("Loading models..."): scaler, log_model, rf_model, gb_model = load_models() if scaler is not None: tab1, tab2 = st.tabs(["✏️ Manuel Giriş", "📂 CSV Yükleme"]) with tab1: st.write("Enter values for all 40 features:") feature_names = [f"feature_{i}" for i in range(40)] values = [] cols = st.columns(4) for i, feat in enumerate(feature_names): with cols[i % 4]: val = st.number_input(feat, value=0.0, step=0.01, format="%.4f", key=feat) values.append(val) if st.button("Predict"): X_input = np.array(values).reshape(1, -1) X_scaled = scaler.transform(X_input) pred_log = log_model.predict(X_scaled)[0] pred_rf = rf_model.predict(X_input)[0] pred_gb = gb_model.predict(X_input)[0] prob_log = log_model.predict_proba(X_scaled)[0] prob_rf = rf_model.predict_proba(X_input)[0] prob_gb = gb_model.predict_proba(X_input)[0] col1, col2, col3 = st.columns(3) with col1: st.subheader("Logistic Regression") st.success(f"**Class: {pred_log}**") st.metric("Confidence", f"{prob_log[pred_log]*100:.1f}%") with col2: st.subheader("Random Forest") st.success(f"**Class: {pred_rf}**") st.metric("Confidence", f"{prob_rf[pred_rf]*100:.1f}%") with col3: st.subheader("Gradient Boosting") st.success(f"**Class: {pred_gb}**") st.metric("Confidence", f"{prob_gb[pred_gb]*100:.1f}%") with tab2: st.write("Upload a CSV with 40 columns (feature_0 to feature_39), no header required.") uploaded = st.file_uploader("Upload CSV", type=["csv"]) if uploaded: df_csv = pd.read_csv(uploaded, header=None) df_csv.columns = [f"feature_{i}" for i in range(40)] X_scaled = scaler.transform(df_csv) df_csv["LogReg"] = log_model.predict(X_scaled) df_csv["RandomForest"] = rf_model.predict(df_csv.iloc[:, :40]) df_csv["GradBoost"] = gb_model.predict(df_csv.iloc[:, :40]) st.dataframe(df_csv, use_container_width=True) csv_out = df_csv.to_csv(index=False).encode("utf-8") st.download_button("⬇️ Download Results", csv_out, "predictions.csv", "text/csv")