Spaces:

valegro
/

AI_Parts_Analyzer

Sleeping

App Files Files Community

valegro commited on Feb 1, 2025

Commit

6cc1cdb

verified ·

1 Parent(s): 5e574a5

Create app.py

Browse files

Files changed (1) hide show

app.py +244 -0

app.py ADDED Viewed

	@@ -0,0 +1,244 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import matplotlib.pyplot as plt
+from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
+from sklearn.linear_model import LogisticRegression
+from sklearn.svm import SVC
+from sklearn.model_selection import train_test_split
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import StandardScaler
+############################################
+# 1. Inizializzazione dello stato di sessione
+############################################
+if "data" not in st.session_state:
+    st.session_state["data"] = None
+if "models" not in st.session_state:
+    st.session_state["models"] = {}
+if "models_trained" not in st.session_state:
+    st.session_state["models_trained"] = False
+if "X_test" not in st.session_state:
+    st.session_state["X_test"] = None
+############################################
+# 2. Funzione per resettare l'app
+############################################
+def reset_app():
+    st.session_state["data"] = None
+    st.session_state["models"] = {}
+    st.session_state["models_trained"] = False
+    st.session_state["X_test"] = None
+    st.success("App reset. You can start fresh.")
+############################################
+# 3. Titolo e Parametri Modello
+############################################
+st.title("Classificatore Redditività Materiali con Tutti i Modelli")
+st.sidebar.header("Parametri del Modello")
+test_size = st.sidebar.slider("Dimensione del test set (%)", 10, 50, 20, step=5)
+random_state = st.sidebar.number_input("Random State", min_value=0, value=42, step=1)
+############################################
+# 4. Caricamento o Generazione del Dataset
+############################################
+st.header("1. Carica un file CSV o genera un dataset fittizio")
+# Slider per il numero di record
+N = st.slider("Numero di record da generare", 100, 2000, 500, step=100)
+# Genera dataset sintetico
+if st.button("Genera Dataset Sintetico"):
+    np.random.seed(random_state)
+    eta_uso = np.random.randint(0, 15, size=N)
+    frequenza_uso = np.random.randint(1, 24, size=N)
+    costo_riparazione = np.random.randint(50, 500, size=N)
+    valore_residuo = np.random.randint(100, 1000, size=N)
+    profittevole = [
+        1 if vr - cr - (e * 10) > 0 else 0
+        for e, fr, cr, vr in zip(eta_uso, frequenza_uso, costo_riparazione, valore_residuo)
+    ]
+    data = pd.DataFrame({
+        "eta_uso": eta_uso,
+        "frequenza_uso": frequenza_uso,
+        "costo_riparazione": costo_riparazione,
+        "valore_residuo": valore_residuo,
+        "Profittevole": profittevole
+    })
+    st.session_state["data"] = data
+    st.write(f"Generated data shape: {data.shape}")
+    st.dataframe(data.head(10))
+# Upload CSV se non è già disponibile un dataset
+if st.session_state["data"] is None:
+    uploaded_file = st.file_uploader("Scegli un file CSV", type=["csv"])
+    if uploaded_file is not None:
+        try:
+            data = pd.read_csv(uploaded_file, encoding="utf-8")
+        except UnicodeDecodeError:
+            data = pd.read_csv(uploaded_file, encoding="latin-1", errors="replace")
+        st.write(f"Dataset caricato con {len(data)} record.")
+        st.dataframe(data.head(10))
+        st.session_state["data"] = data
+############################################
+# 4b. Download del Dataset (se presente)
+############################################
+if st.session_state["data"] is not None:
+    st.subheader("Download Dataset Attuale")
+    csv_data = st.session_state["data"].to_csv(index=False)
+    st.download_button(
+        label="Scarica il CSV",
+        data=csv_data,
+        file_name="materiali_profittevole.csv",
+        mime="text/csv"
+    )
+############################################
+# 5. Esplorazione dei Dati
+############################################
+if st.session_state["data"] is not None:
+    st.header("Esplorazione dei Dati")
+    st.subheader("Grafico a Dispersione")
+    features = st.session_state["data"].columns.tolist()
+    if len(features) >= 2:
+        x_axis = st.selectbox("Seleziona l'asse X", features, index=0)
+        y_axis = st.selectbox("Seleziona l'asse Y", features, index=1)
+        st.write("**Grafico a Dispersione**")
+        st.scatter_chart(st.session_state["data"], x=x_axis, y=y_axis)
+    st.subheader("Matrice di Correlazione")
+    corr = st.session_state["data"].corr(numeric_only=True)
+    st.write(corr)
+    fig, ax = plt.subplots()
+    sns.heatmap(corr, annot=True, ax=ax, cmap="viridis")
+    st.pyplot(fig)
+    st.subheader("Istogrammi delle Feature")
+    selected_feature = st.selectbox("Seleziona una Feature", features)
+    if pd.api.types.is_numeric_dtype(st.session_state["data"][selected_feature]):
+        fig, ax = plt.subplots()
+        sns.histplot(st.session_state["data"][selected_feature], kde=True, ax=ax, color="skyblue")
+        st.pyplot(fig)
+    else:
+        st.warning("La feature selezionata non è numerica.")
+############################################
+# 6. Addestramento dei Modelli (tutti in sequenza)
+############################################
+if st.session_state["data"] is not None:
+    st.header("2. Addestramento dei Modelli")
+    target_column = st.text_input("Nome colonna target (es. 'Profittevole'):", value="Profittevole")
+    if target_column in st.session_state["data"].columns:
+        X = st.session_state["data"].drop(columns=[target_column])
+        y = st.session_state["data"][target_column]
+        non_numeric = [c for c in X.columns if not pd.api.types.is_numeric_dtype(X[c])]
+        if non_numeric:
+            st.warning(f"Le colonne non numeriche {non_numeric} verranno rimosse.")
+            X = X.drop(columns=non_numeric)
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=test_size/100, random_state=random_state
+        )
+        # Definizione dei pipeline per ciascun modello
+        models = {
+            "Random Forest": Pipeline([
+                ('scaler', StandardScaler()),
+                ('classifier', RandomForestClassifier(random_state=random_state))
+            ]),
+            "Gradient Boosting": Pipeline([
+                ('scaler', StandardScaler()),
+                ('classifier', GradientBoostingClassifier(random_state=random_state))
+            ]),
+            "Logistic Regression": Pipeline([
+                ('scaler', StandardScaler()),
+                ('classifier', LogisticRegression(max_iter=1000, random_state=random_state))
+            ]),
+            "Support Vector Machine (SVC)": Pipeline([
+                ('scaler', StandardScaler()),
+                ('classifier', SVC(probability=True, random_state=random_state))
+            ])
+        }
+        accuracies = {}
+        # Addestramento e valutazione di ciascun modello
+        for model_name, model in models.items():
+            model.fit(X_train, y_train)
+            y_pred = model.predict(X_test)
+            acc = np.round(100 * (y_pred == y_test).mean(), 2)
+            accuracies[model_name] = acc
+            st.write(f"**{model_name} - Accuratezza:** {acc}%")
+            # Visualizzazione della feature importance per RF e GB
+            if model_name in ["Random Forest", "Gradient Boosting"]:
+                feature_importances = model.named_steps['classifier'].feature_importances_
+                importance_df = pd.DataFrame({
+                    'Feature': X.columns,
+                    'Importance': feature_importances
+                }).sort_values(by='Importance', ascending=False)
+                st.subheader(f"Feature Importance - {model_name}")
+                st.dataframe(importance_df)
+        st.session_state["models"] = models
+        st.session_state["models_trained"] = True
+        st.session_state["X_test"] = X_test
+    else:
+        st.error(f"La colonna '{target_column}' non esiste nel dataset.")
+############################################
+# 7. Valutazione di Nuovi Campioni con Tutti i Modelli
+############################################
+if st.session_state["models_trained"]:
+    st.header("3. Valutazione di Nuovi Campioni con Tutti i Modelli")
+    st.write("Inserisci i valori per il nuovo campione:")
+    # Per ricostruire le feature
+    all_cols = st.session_state["data"].columns
+    feature_cols = [c for c in all_cols if c != target_column]
+    numeric_cols = [c for c in feature_cols if pd.api.types.is_numeric_dtype(st.session_state["data"][c])]
+    input_values = {}
+    for col in numeric_cols:
+        input_values[col] = st.number_input(f"{col}", value=0.0)
+    if st.button("Valuta Campione con Tutti i Modelli"):
+        new_sample = pd.DataFrame([input_values])
+        predictions = {}
+        probabilities = {}
+        for model_name, model in st.session_state["models"].items():
+            pred = model.predict(new_sample)[0]
+            predictions[model_name] = pred
+            # Calcola la probabilità se il modello lo supporta
+            if hasattr(model.named_steps['classifier'], "predict_proba"):
+                proba = model.named_steps['classifier'].predict_proba(new_sample)[0]
+                probabilities[model_name] = proba[1]  # Probabilità per la classe "1" (profittevole)
+            else:
+                probabilities[model_name] = None
+        st.subheader("Risultati della Valutazione:")
+        for model_name in predictions:
+            result = "PROFITTEVOLE" if predictions[model_name] == 1 else "NON PROFITTEVOLE"
+            st.write(f"**{model_name}:** {result}")
+            if probabilities[model_name] is not None:
+                st.write(f"   Probabilità di Redditività: {probabilities[model_name]:.2f}")
+        # Verifica se tutti i modelli hanno predetto lo stesso valore
+        if len(set(predictions.values())) == 1:
+            st.success("Tutti i modelli hanno predetto lo stesso valore!")
+        else:
+            st.warning("I modelli hanno predetto valori differenti!")
+############################################
+# 8. Pulsante di Reset
+############################################
+if st.button("Azzera App"):
+    reset_app()