DataScience / app.py
aylintokgoz27's picture
Upload 5 files
346c97b verified
import os
import streamlit as st
import joblib
import numpy as np
import pandas as pd
from huggingface_hub import hf_hub_download
st.set_page_config(page_title="Data Science London", page_icon="🔬")
st.title("🔬 Data Science London Classifier")
st.write("Enter 40 feature values to classify the sample.")
REPO = "selimyildirim18/data-science-london-scikit-learn"
@st.cache_resource
def load_models():
try:
scaler_path = hf_hub_download(repo_id=REPO, filename="scaler.pkl")
log_path = hf_hub_download(repo_id=REPO, filename="log_model.pkl")
rf_path = hf_hub_download(repo_id=REPO, filename="rf_model.pkl")
gb_path = hf_hub_download(repo_id=REPO, filename="gb_model.pkl")
scaler = joblib.load(scaler_path)
log_model = joblib.load(log_path)
rf_model = joblib.load(rf_path)
gb_model = joblib.load(gb_path)
return scaler, log_model, rf_model, gb_model
except Exception as e:
st.error(f"❌ Model yüklenemedi: {e}")
return None, None, None, None
with st.spinner("Loading models..."):
scaler, log_model, rf_model, gb_model = load_models()
if scaler is not None:
tab1, tab2 = st.tabs(["✏️ Manuel Giriş", "📂 CSV Yükleme"])
with tab1:
st.write("Enter values for all 40 features:")
feature_names = [f"feature_{i}" for i in range(40)]
values = []
cols = st.columns(4)
for i, feat in enumerate(feature_names):
with cols[i % 4]:
val = st.number_input(feat, value=0.0, step=0.01, format="%.4f", key=feat)
values.append(val)
if st.button("Predict"):
X_input = np.array(values).reshape(1, -1)
X_scaled = scaler.transform(X_input)
pred_log = log_model.predict(X_scaled)[0]
pred_rf = rf_model.predict(X_input)[0]
pred_gb = gb_model.predict(X_input)[0]
prob_log = log_model.predict_proba(X_scaled)[0]
prob_rf = rf_model.predict_proba(X_input)[0]
prob_gb = gb_model.predict_proba(X_input)[0]
col1, col2, col3 = st.columns(3)
with col1:
st.subheader("Logistic Regression")
st.success(f"**Class: {pred_log}**")
st.metric("Confidence", f"{prob_log[pred_log]*100:.1f}%")
with col2:
st.subheader("Random Forest")
st.success(f"**Class: {pred_rf}**")
st.metric("Confidence", f"{prob_rf[pred_rf]*100:.1f}%")
with col3:
st.subheader("Gradient Boosting")
st.success(f"**Class: {pred_gb}**")
st.metric("Confidence", f"{prob_gb[pred_gb]*100:.1f}%")
with tab2:
st.write("Upload a CSV with 40 columns (feature_0 to feature_39), no header required.")
uploaded = st.file_uploader("Upload CSV", type=["csv"])
if uploaded:
df_csv = pd.read_csv(uploaded, header=None)
df_csv.columns = [f"feature_{i}" for i in range(40)]
X_scaled = scaler.transform(df_csv)
df_csv["LogReg"] = log_model.predict(X_scaled)
df_csv["RandomForest"] = rf_model.predict(df_csv.iloc[:, :40])
df_csv["GradBoost"] = gb_model.predict(df_csv.iloc[:, :40])
st.dataframe(df_csv, use_container_width=True)
csv_out = df_csv.to_csv(index=False).encode("utf-8")
st.download_button("⬇️ Download Results", csv_out, "predictions.csv", "text/csv")