Spaces:
Sleeping
Sleeping
Iman Kozly
commited on
Update src/streamlit_app.py
Browse files- src/streamlit_app.py +38 -1
src/streamlit_app.py
CHANGED
|
@@ -23,6 +23,35 @@ st.markdown(
|
|
| 23 |
unsafe_allow_html=True
|
| 24 |
)
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
# ================== הגדרות ==================
|
| 27 |
DATA_PATH = "./src/Rabies__Weather__War_Combined_1.4.25.xlsx"
|
| 28 |
MODEL_PATH = "./src/final_model_gradient_boosting.pkl"
|
|
@@ -130,6 +159,13 @@ if submitted:
|
|
| 130 |
|
| 131 |
st.success(f"✅ Model Prediction: **Region = {region_pred} ({region_confidence:.2f}%), "
|
| 132 |
f"Month = {month_pred} ({month_confidence:.2f}%)**")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
# ================== Alerts Dictionary per Target ==================
|
| 134 |
alerts_dict_region = {
|
| 135 |
'x': "🟡 X coordinate seems high for Region prediction, check for outliers.",
|
|
@@ -322,7 +358,8 @@ if submitted:
|
|
| 322 |
lines = explain_carmer.split('\n') # אם יש פסקאות
|
| 323 |
explain_carmer_to_save = pd.DataFrame(lines, columns=['Explanation'])
|
| 324 |
with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
|
| 325 |
-
download_df.to_excel(writer, sheet_name='
|
|
|
|
| 326 |
fi_df.to_excel(writer, sheet_name='Feature Importances', index=False)
|
| 327 |
pval_df.to_excel(writer, sheet_name='Pearson p-values', index=False)
|
| 328 |
cramers_df.to_excel(writer, sheet_name= 'Cramers V', index=False)
|
|
|
|
| 23 |
unsafe_allow_html=True
|
| 24 |
)
|
| 25 |
|
| 26 |
+
def compute_similarity(df: pd.DataFrame, inp: pd.DataFrame, columns: list):
|
| 27 |
+
"""
|
| 28 |
+
מחשבת דמיון בין רשומה חדשה לבין כל הדאטה ב-DataFrame.
|
| 29 |
+
|
| 30 |
+
פרמטרים:
|
| 31 |
+
df : DataFrame עם הנתונים הקיימים
|
| 32 |
+
inp : DataFrame עם רשומה אחת לחיזוי
|
| 33 |
+
columns : רשימת עמודות להשוואה
|
| 34 |
+
|
| 35 |
+
מחזירה DataFrame עם עמודת 'similarity' ממוינת מהגבוה לנמוך
|
| 36 |
+
"""
|
| 37 |
+
similarities = []
|
| 38 |
+
|
| 39 |
+
for _, row in df.iterrows():
|
| 40 |
+
score = 0
|
| 41 |
+
for col in columns:
|
| 42 |
+
if pd.api.types.is_numeric_dtype(df[col]):
|
| 43 |
+
# נורמליזציה לפי טווח העמודה
|
| 44 |
+
max_val = df[col].max()
|
| 45 |
+
score += 1 - abs(row[col] - inp[col].values[0]) / (max_val if max_val != 0 else 1)
|
| 46 |
+
else:
|
| 47 |
+
# categorical comparison
|
| 48 |
+
score += (row[col] == inp[col].values[0])
|
| 49 |
+
# ממוצע הדמיון על כל העמודות שנבחרו
|
| 50 |
+
similarities.append(score / len(columns))
|
| 51 |
+
|
| 52 |
+
df['similarity'] = similarities
|
| 53 |
+
return df.sort_values('similarity', ascending=False)
|
| 54 |
+
|
| 55 |
# ================== הגדרות ==================
|
| 56 |
DATA_PATH = "./src/Rabies__Weather__War_Combined_1.4.25.xlsx"
|
| 57 |
MODEL_PATH = "./src/final_model_gradient_boosting.pkl"
|
|
|
|
| 159 |
|
| 160 |
st.success(f"✅ Model Prediction: **Region = {region_pred} ({region_confidence:.2f}%), "
|
| 161 |
f"Month = {month_pred} ({month_confidence:.2f}%)**")
|
| 162 |
+
|
| 163 |
+
st.subheader("🟢 Most Similar Record to Your Input (Similarity Based)")
|
| 164 |
+
columns_to_compare = label_cols + num_cols + extra_cols # all relevant columns
|
| 165 |
+
most_similar_row = compute_similarity(df, input_df, columns_to_compare)
|
| 166 |
+
st.write("The record from the existing dataset that is most similar to your input:")
|
| 167 |
+
st.dataframe(most_similar_row)
|
| 168 |
+
|
| 169 |
# ================== Alerts Dictionary per Target ==================
|
| 170 |
alerts_dict_region = {
|
| 171 |
'x': "🟡 X coordinate seems high for Region prediction, check for outliers.",
|
|
|
|
| 358 |
lines = explain_carmer.split('\n') # אם יש פסקאות
|
| 359 |
explain_carmer_to_save = pd.DataFrame(lines, columns=['Explanation'])
|
| 360 |
with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
|
| 361 |
+
download_df.to_excel(writer, sheet_name='Prediction', index=False)
|
| 362 |
+
most_similar_row.to_excel(writer, sheet_name='Similar row table', index=False)
|
| 363 |
fi_df.to_excel(writer, sheet_name='Feature Importances', index=False)
|
| 364 |
pval_df.to_excel(writer, sheet_name='Pearson p-values', index=False)
|
| 365 |
cramers_df.to_excel(writer, sheet_name= 'Cramers V', index=False)
|