Spaces:

ImanK12
/

RabiesRadar

Sleeping

App Files Files Community

Iman Kozly commited on Sep 12, 2025

Commit

052aecd

verified ·

1 Parent(s): 0ecbeac

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +38 -1

src/streamlit_app.py CHANGED Viewed

@@ -23,6 +23,35 @@ st.markdown(
     unsafe_allow_html=True
 )
 # ================== הגדרות ==================
 DATA_PATH = "./src/Rabies__Weather__War_Combined_1.4.25.xlsx"
 MODEL_PATH = "./src/final_model_gradient_boosting.pkl"
@@ -130,6 +159,13 @@ if submitted:
         st.success(f"✅ Model Prediction: **Region = {region_pred} ({region_confidence:.2f}%), "
                    f"Month = {month_pred} ({month_confidence:.2f}%)**")
         # ================== Alerts Dictionary per Target ==================
         alerts_dict_region = {
             'x': "🟡 X coordinate seems high for Region prediction, check for outliers.",
@@ -322,7 +358,8 @@ if submitted:
         lines = explain_carmer.split('\n')  # אם יש פסקאות
         explain_carmer_to_save = pd.DataFrame(lines, columns=['Explanation'])
         with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
-            download_df.to_excel(writer, sheet_name='סהר -Prediction', index=False)
             fi_df.to_excel(writer, sheet_name='Feature Importances', index=False)
             pval_df.to_excel(writer, sheet_name='Pearson p-values', index=False)
             cramers_df.to_excel(writer, sheet_name= 'Cramers V', index=False)

     unsafe_allow_html=True
 )
+def compute_similarity(df: pd.DataFrame, inp: pd.DataFrame, columns: list):
+    """
+    מחשבת דמיון בין רשומה חדשה לבין כל הדאטה ב-DataFrame.
+    פרמטרים:
+    df       : DataFrame עם הנתונים הקיימים
+    inp      : DataFrame עם רשומה אחת לחיזוי
+    columns  : רשימת עמודות להשוואה
+    מחזירה DataFrame עם עמודת 'similarity' ממוינת מהגבוה לנמוך
+    """
+    similarities = []
+    for _, row in df.iterrows():
+        score = 0
+        for col in columns:
+            if pd.api.types.is_numeric_dtype(df[col]):
+                # נורמליזציה לפי טווח העמודה
+                max_val = df[col].max()
+                score += 1 - abs(row[col] - inp[col].values[0]) / (max_val if max_val != 0 else 1)
+            else:
+                # categorical comparison
+                score += (row[col] == inp[col].values[0])
+        # ממוצע הדמיון על כל העמודות שנבחרו
+        similarities.append(score / len(columns))
+    df['similarity'] = similarities
+    return df.sort_values('similarity', ascending=False)
 # ================== הגדרות ==================
 DATA_PATH = "./src/Rabies__Weather__War_Combined_1.4.25.xlsx"
 MODEL_PATH = "./src/final_model_gradient_boosting.pkl"
         st.success(f"✅ Model Prediction: **Region = {region_pred} ({region_confidence:.2f}%), "
                    f"Month = {month_pred} ({month_confidence:.2f}%)**")
+        st.subheader("🟢 Most Similar Record to Your Input (Similarity Based)")
+        columns_to_compare = label_cols + num_cols + extra_cols  # all relevant columns
+        most_similar_row = compute_similarity(df, input_df, columns_to_compare)
+        st.write("The record from the existing dataset that is most similar to your input:")
+        st.dataframe(most_similar_row)
         # ================== Alerts Dictionary per Target ==================
         alerts_dict_region = {
             'x': "🟡 X coordinate seems high for Region prediction, check for outliers.",
         lines = explain_carmer.split('\n')  # אם יש פסקאות
         explain_carmer_to_save = pd.DataFrame(lines, columns=['Explanation'])
         with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
+            download_df.to_excel(writer, sheet_name='Prediction', index=False)
+            most_similar_row.to_excel(writer, sheet_name='Similar row table', index=False)
             fi_df.to_excel(writer, sheet_name='Feature Importances', index=False)
             pval_df.to_excel(writer, sheet_name='Pearson p-values', index=False)
             cramers_df.to_excel(writer, sheet_name= 'Cramers V', index=False)