valegro commited on
Commit
6cc1cdb
·
verified ·
1 Parent(s): 5e574a5

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +244 -0
app.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ import seaborn as sns
5
+ import matplotlib.pyplot as plt
6
+ from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
7
+ from sklearn.linear_model import LogisticRegression
8
+ from sklearn.svm import SVC
9
+ from sklearn.model_selection import train_test_split
10
+ from sklearn.pipeline import Pipeline
11
+ from sklearn.preprocessing import StandardScaler
12
+
13
+ ############################################
14
+ # 1. Inizializzazione dello stato di sessione
15
+ ############################################
16
+ if "data" not in st.session_state:
17
+ st.session_state["data"] = None
18
+ if "models" not in st.session_state:
19
+ st.session_state["models"] = {}
20
+ if "models_trained" not in st.session_state:
21
+ st.session_state["models_trained"] = False
22
+ if "X_test" not in st.session_state:
23
+ st.session_state["X_test"] = None
24
+
25
+ ############################################
26
+ # 2. Funzione per resettare l'app
27
+ ############################################
28
+ def reset_app():
29
+ st.session_state["data"] = None
30
+ st.session_state["models"] = {}
31
+ st.session_state["models_trained"] = False
32
+ st.session_state["X_test"] = None
33
+ st.success("App reset. You can start fresh.")
34
+
35
+ ############################################
36
+ # 3. Titolo e Parametri Modello
37
+ ############################################
38
+ st.title("Classificatore Redditività Materiali con Tutti i Modelli")
39
+
40
+ st.sidebar.header("Parametri del Modello")
41
+ test_size = st.sidebar.slider("Dimensione del test set (%)", 10, 50, 20, step=5)
42
+ random_state = st.sidebar.number_input("Random State", min_value=0, value=42, step=1)
43
+
44
+ ############################################
45
+ # 4. Caricamento o Generazione del Dataset
46
+ ############################################
47
+ st.header("1. Carica un file CSV o genera un dataset fittizio")
48
+
49
+ # Slider per il numero di record
50
+ N = st.slider("Numero di record da generare", 100, 2000, 500, step=100)
51
+
52
+ # Genera dataset sintetico
53
+ if st.button("Genera Dataset Sintetico"):
54
+ np.random.seed(random_state)
55
+ eta_uso = np.random.randint(0, 15, size=N)
56
+ frequenza_uso = np.random.randint(1, 24, size=N)
57
+ costo_riparazione = np.random.randint(50, 500, size=N)
58
+ valore_residuo = np.random.randint(100, 1000, size=N)
59
+
60
+ profittevole = [
61
+ 1 if vr - cr - (e * 10) > 0 else 0
62
+ for e, fr, cr, vr in zip(eta_uso, frequenza_uso, costo_riparazione, valore_residuo)
63
+ ]
64
+
65
+ data = pd.DataFrame({
66
+ "eta_uso": eta_uso,
67
+ "frequenza_uso": frequenza_uso,
68
+ "costo_riparazione": costo_riparazione,
69
+ "valore_residuo": valore_residuo,
70
+ "Profittevole": profittevole
71
+ })
72
+ st.session_state["data"] = data
73
+ st.write(f"Generated data shape: {data.shape}")
74
+ st.dataframe(data.head(10))
75
+
76
+ # Upload CSV se non è già disponibile un dataset
77
+ if st.session_state["data"] is None:
78
+ uploaded_file = st.file_uploader("Scegli un file CSV", type=["csv"])
79
+ if uploaded_file is not None:
80
+ try:
81
+ data = pd.read_csv(uploaded_file, encoding="utf-8")
82
+ except UnicodeDecodeError:
83
+ data = pd.read_csv(uploaded_file, encoding="latin-1", errors="replace")
84
+ st.write(f"Dataset caricato con {len(data)} record.")
85
+ st.dataframe(data.head(10))
86
+ st.session_state["data"] = data
87
+
88
+ ############################################
89
+ # 4b. Download del Dataset (se presente)
90
+ ############################################
91
+ if st.session_state["data"] is not None:
92
+ st.subheader("Download Dataset Attuale")
93
+ csv_data = st.session_state["data"].to_csv(index=False)
94
+ st.download_button(
95
+ label="Scarica il CSV",
96
+ data=csv_data,
97
+ file_name="materiali_profittevole.csv",
98
+ mime="text/csv"
99
+ )
100
+
101
+ ############################################
102
+ # 5. Esplorazione dei Dati
103
+ ############################################
104
+ if st.session_state["data"] is not None:
105
+ st.header("Esplorazione dei Dati")
106
+ st.subheader("Grafico a Dispersione")
107
+ features = st.session_state["data"].columns.tolist()
108
+ if len(features) >= 2:
109
+ x_axis = st.selectbox("Seleziona l'asse X", features, index=0)
110
+ y_axis = st.selectbox("Seleziona l'asse Y", features, index=1)
111
+ st.write("**Grafico a Dispersione**")
112
+ st.scatter_chart(st.session_state["data"], x=x_axis, y=y_axis)
113
+
114
+ st.subheader("Matrice di Correlazione")
115
+ corr = st.session_state["data"].corr(numeric_only=True)
116
+ st.write(corr)
117
+ fig, ax = plt.subplots()
118
+ sns.heatmap(corr, annot=True, ax=ax, cmap="viridis")
119
+ st.pyplot(fig)
120
+
121
+ st.subheader("Istogrammi delle Feature")
122
+ selected_feature = st.selectbox("Seleziona una Feature", features)
123
+ if pd.api.types.is_numeric_dtype(st.session_state["data"][selected_feature]):
124
+ fig, ax = plt.subplots()
125
+ sns.histplot(st.session_state["data"][selected_feature], kde=True, ax=ax, color="skyblue")
126
+ st.pyplot(fig)
127
+ else:
128
+ st.warning("La feature selezionata non è numerica.")
129
+
130
+ ############################################
131
+ # 6. Addestramento dei Modelli (tutti in sequenza)
132
+ ############################################
133
+ if st.session_state["data"] is not None:
134
+ st.header("2. Addestramento dei Modelli")
135
+ target_column = st.text_input("Nome colonna target (es. 'Profittevole'):", value="Profittevole")
136
+
137
+ if target_column in st.session_state["data"].columns:
138
+ X = st.session_state["data"].drop(columns=[target_column])
139
+ y = st.session_state["data"][target_column]
140
+
141
+ non_numeric = [c for c in X.columns if not pd.api.types.is_numeric_dtype(X[c])]
142
+ if non_numeric:
143
+ st.warning(f"Le colonne non numeriche {non_numeric} verranno rimosse.")
144
+ X = X.drop(columns=non_numeric)
145
+
146
+ X_train, X_test, y_train, y_test = train_test_split(
147
+ X, y, test_size=test_size/100, random_state=random_state
148
+ )
149
+
150
+ # Definizione dei pipeline per ciascun modello
151
+ models = {
152
+ "Random Forest": Pipeline([
153
+ ('scaler', StandardScaler()),
154
+ ('classifier', RandomForestClassifier(random_state=random_state))
155
+ ]),
156
+ "Gradient Boosting": Pipeline([
157
+ ('scaler', StandardScaler()),
158
+ ('classifier', GradientBoostingClassifier(random_state=random_state))
159
+ ]),
160
+ "Logistic Regression": Pipeline([
161
+ ('scaler', StandardScaler()),
162
+ ('classifier', LogisticRegression(max_iter=1000, random_state=random_state))
163
+ ]),
164
+ "Support Vector Machine (SVC)": Pipeline([
165
+ ('scaler', StandardScaler()),
166
+ ('classifier', SVC(probability=True, random_state=random_state))
167
+ ])
168
+ }
169
+
170
+ accuracies = {}
171
+ # Addestramento e valutazione di ciascun modello
172
+ for model_name, model in models.items():
173
+ model.fit(X_train, y_train)
174
+ y_pred = model.predict(X_test)
175
+ acc = np.round(100 * (y_pred == y_test).mean(), 2)
176
+ accuracies[model_name] = acc
177
+ st.write(f"**{model_name} - Accuratezza:** {acc}%")
178
+
179
+ # Visualizzazione della feature importance per RF e GB
180
+ if model_name in ["Random Forest", "Gradient Boosting"]:
181
+ feature_importances = model.named_steps['classifier'].feature_importances_
182
+ importance_df = pd.DataFrame({
183
+ 'Feature': X.columns,
184
+ 'Importance': feature_importances
185
+ }).sort_values(by='Importance', ascending=False)
186
+ st.subheader(f"Feature Importance - {model_name}")
187
+ st.dataframe(importance_df)
188
+
189
+ st.session_state["models"] = models
190
+ st.session_state["models_trained"] = True
191
+ st.session_state["X_test"] = X_test
192
+ else:
193
+ st.error(f"La colonna '{target_column}' non esiste nel dataset.")
194
+
195
+ ############################################
196
+ # 7. Valutazione di Nuovi Campioni con Tutti i Modelli
197
+ ############################################
198
+ if st.session_state["models_trained"]:
199
+ st.header("3. Valutazione di Nuovi Campioni con Tutti i Modelli")
200
+ st.write("Inserisci i valori per il nuovo campione:")
201
+
202
+ # Per ricostruire le feature
203
+ all_cols = st.session_state["data"].columns
204
+ feature_cols = [c for c in all_cols if c != target_column]
205
+ numeric_cols = [c for c in feature_cols if pd.api.types.is_numeric_dtype(st.session_state["data"][c])]
206
+
207
+ input_values = {}
208
+ for col in numeric_cols:
209
+ input_values[col] = st.number_input(f"{col}", value=0.0)
210
+
211
+ if st.button("Valuta Campione con Tutti i Modelli"):
212
+ new_sample = pd.DataFrame([input_values])
213
+ predictions = {}
214
+ probabilities = {}
215
+
216
+ for model_name, model in st.session_state["models"].items():
217
+ pred = model.predict(new_sample)[0]
218
+ predictions[model_name] = pred
219
+
220
+ # Calcola la probabilità se il modello lo supporta
221
+ if hasattr(model.named_steps['classifier'], "predict_proba"):
222
+ proba = model.named_steps['classifier'].predict_proba(new_sample)[0]
223
+ probabilities[model_name] = proba[1] # Probabilità per la classe "1" (profittevole)
224
+ else:
225
+ probabilities[model_name] = None
226
+
227
+ st.subheader("Risultati della Valutazione:")
228
+ for model_name in predictions:
229
+ result = "PROFITTEVOLE" if predictions[model_name] == 1 else "NON PROFITTEVOLE"
230
+ st.write(f"**{model_name}:** {result}")
231
+ if probabilities[model_name] is not None:
232
+ st.write(f" Probabilità di Redditività: {probabilities[model_name]:.2f}")
233
+
234
+ # Verifica se tutti i modelli hanno predetto lo stesso valore
235
+ if len(set(predictions.values())) == 1:
236
+ st.success("Tutti i modelli hanno predetto lo stesso valore!")
237
+ else:
238
+ st.warning("I modelli hanno predetto valori differenti!")
239
+
240
+ ############################################
241
+ # 8. Pulsante di Reset
242
+ ############################################
243
+ if st.button("Azzera App"):
244
+ reset_app()