Iman Kozly commited on
Commit
052aecd
·
verified ·
1 Parent(s): 0ecbeac

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +38 -1
src/streamlit_app.py CHANGED
@@ -23,6 +23,35 @@ st.markdown(
23
  unsafe_allow_html=True
24
  )
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # ================== הגדרות ==================
27
  DATA_PATH = "./src/Rabies__Weather__War_Combined_1.4.25.xlsx"
28
  MODEL_PATH = "./src/final_model_gradient_boosting.pkl"
@@ -130,6 +159,13 @@ if submitted:
130
 
131
  st.success(f"✅ Model Prediction: **Region = {region_pred} ({region_confidence:.2f}%), "
132
  f"Month = {month_pred} ({month_confidence:.2f}%)**")
 
 
 
 
 
 
 
133
  # ================== Alerts Dictionary per Target ==================
134
  alerts_dict_region = {
135
  'x': "🟡 X coordinate seems high for Region prediction, check for outliers.",
@@ -322,7 +358,8 @@ if submitted:
322
  lines = explain_carmer.split('\n') # אם יש פסקאות
323
  explain_carmer_to_save = pd.DataFrame(lines, columns=['Explanation'])
324
  with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
325
- download_df.to_excel(writer, sheet_name='סהר -Prediction', index=False)
 
326
  fi_df.to_excel(writer, sheet_name='Feature Importances', index=False)
327
  pval_df.to_excel(writer, sheet_name='Pearson p-values', index=False)
328
  cramers_df.to_excel(writer, sheet_name= 'Cramers V', index=False)
 
23
  unsafe_allow_html=True
24
  )
25
 
26
+ def compute_similarity(df: pd.DataFrame, inp: pd.DataFrame, columns: list):
27
+ """
28
+ מחשבת דמיון בין רשומה חדשה לבין כל הדאטה ב-DataFrame.
29
+
30
+ פרמטרים:
31
+ df : DataFrame עם הנתונים הקיימים
32
+ inp : DataFrame עם רשומה אחת לחיזוי
33
+ columns : רשימת עמודות להשוואה
34
+
35
+ מחזירה DataFrame עם עמודת 'similarity' ממוינת מהגבוה לנמוך
36
+ """
37
+ similarities = []
38
+
39
+ for _, row in df.iterrows():
40
+ score = 0
41
+ for col in columns:
42
+ if pd.api.types.is_numeric_dtype(df[col]):
43
+ # נורמליזציה לפי טווח העמודה
44
+ max_val = df[col].max()
45
+ score += 1 - abs(row[col] - inp[col].values[0]) / (max_val if max_val != 0 else 1)
46
+ else:
47
+ # categorical comparison
48
+ score += (row[col] == inp[col].values[0])
49
+ # ממוצע הדמיון על כל העמודות שנבחרו
50
+ similarities.append(score / len(columns))
51
+
52
+ df['similarity'] = similarities
53
+ return df.sort_values('similarity', ascending=False)
54
+
55
  # ================== הגדרות ==================
56
  DATA_PATH = "./src/Rabies__Weather__War_Combined_1.4.25.xlsx"
57
  MODEL_PATH = "./src/final_model_gradient_boosting.pkl"
 
159
 
160
  st.success(f"✅ Model Prediction: **Region = {region_pred} ({region_confidence:.2f}%), "
161
  f"Month = {month_pred} ({month_confidence:.2f}%)**")
162
+
163
+ st.subheader("🟢 Most Similar Record to Your Input (Similarity Based)")
164
+ columns_to_compare = label_cols + num_cols + extra_cols # all relevant columns
165
+ most_similar_row = compute_similarity(df, input_df, columns_to_compare)
166
+ st.write("The record from the existing dataset that is most similar to your input:")
167
+ st.dataframe(most_similar_row)
168
+
169
  # ================== Alerts Dictionary per Target ==================
170
  alerts_dict_region = {
171
  'x': "🟡 X coordinate seems high for Region prediction, check for outliers.",
 
358
  lines = explain_carmer.split('\n') # אם יש פסקאות
359
  explain_carmer_to_save = pd.DataFrame(lines, columns=['Explanation'])
360
  with pd.ExcelWriter(excel_buffer, engine='xlsxwriter') as writer:
361
+ download_df.to_excel(writer, sheet_name='Prediction', index=False)
362
+ most_similar_row.to_excel(writer, sheet_name='Similar row table', index=False)
363
  fi_df.to_excel(writer, sheet_name='Feature Importances', index=False)
364
  pval_df.to_excel(writer, sheet_name='Pearson p-values', index=False)
365
  cramers_df.to_excel(writer, sheet_name= 'Cramers V', index=False)