Spaces:

aylintokgoz27
/

DataScience

Sleeping

App Files Files Community

DataScience / app.py

aylintokgoz27

Upload 5 files

346c97b verified 13 days ago

raw

history blame contribute delete

3.67 kB

	import os
	import streamlit as st
	import joblib
	import numpy as np
	import pandas as pd
	from huggingface_hub import hf_hub_download

	st.set_page_config(page_title="Data Science London", page_icon="🔬")
	st.title("🔬 Data Science London Classifier")
	st.write("Enter 40 feature values to classify the sample.")

	REPO = "selimyildirim18/data-science-london-scikit-learn"

	@st.cache_resource
	def load_models():
	try:
	scaler_path = hf_hub_download(repo_id=REPO, filename="scaler.pkl")
	log_path = hf_hub_download(repo_id=REPO, filename="log_model.pkl")
	rf_path = hf_hub_download(repo_id=REPO, filename="rf_model.pkl")
	gb_path = hf_hub_download(repo_id=REPO, filename="gb_model.pkl")

	scaler = joblib.load(scaler_path)
	log_model = joblib.load(log_path)
	rf_model = joblib.load(rf_path)
	gb_model = joblib.load(gb_path)

	return scaler, log_model, rf_model, gb_model
	except Exception as e:
	st.error(f"❌ Model yüklenemedi: {e}")
	return None, None, None, None

	with st.spinner("Loading models..."):
	scaler, log_model, rf_model, gb_model = load_models()

	if scaler is not None:
	tab1, tab2 = st.tabs(["✏️ Manuel Giriş", "📂 CSV Yükleme"])


	with tab1:
	st.write("Enter values for all 40 features:")

	feature_names = [f"feature_{i}" for i in range(40)]
	values = []

	cols = st.columns(4)
	for i, feat in enumerate(feature_names):
	with cols[i % 4]:
	val = st.number_input(feat, value=0.0, step=0.01, format="%.4f", key=feat)
	values.append(val)

	if st.button("Predict"):
	X_input = np.array(values).reshape(1, -1)
	X_scaled = scaler.transform(X_input)

	pred_log = log_model.predict(X_scaled)[0]
	pred_rf = rf_model.predict(X_input)[0]
	pred_gb = gb_model.predict(X_input)[0]

	prob_log = log_model.predict_proba(X_scaled)[0]
	prob_rf = rf_model.predict_proba(X_input)[0]
	prob_gb = gb_model.predict_proba(X_input)[0]

	col1, col2, col3 = st.columns(3)

	with col1:
	st.subheader("Logistic Regression")
	st.success(f"Class: {pred_log}")
	st.metric("Confidence", f"{prob_log[pred_log]*100:.1f}%")

	with col2:
	st.subheader("Random Forest")
	st.success(f"Class: {pred_rf}")
	st.metric("Confidence", f"{prob_rf[pred_rf]*100:.1f}%")

	with col3:
	st.subheader("Gradient Boosting")
	st.success(f"Class: {pred_gb}")
	st.metric("Confidence", f"{prob_gb[pred_gb]*100:.1f}%")


	with tab2:
	st.write("Upload a CSV with 40 columns (feature_0 to feature_39), no header required.")

	uploaded = st.file_uploader("Upload CSV", type=["csv"])

	if uploaded:
	df_csv = pd.read_csv(uploaded, header=None)
	df_csv.columns = [f"feature_{i}" for i in range(40)]

	X_scaled = scaler.transform(df_csv)

	df_csv["LogReg"] = log_model.predict(X_scaled)
	df_csv["RandomForest"] = rf_model.predict(df_csv.iloc[:, :40])
	df_csv["GradBoost"] = gb_model.predict(df_csv.iloc[:, :40])

	st.dataframe(df_csv, use_container_width=True)

	csv_out = df_csv.to_csv(index=False).encode("utf-8")
	st.download_button("⬇️ Download Results", csv_out, "predictions.csv", "text/csv")