ElmiraManavi commited on
Commit
2e25d51
·
1 Parent(s): 78d766d

add fn and error tables

Browse files
Files changed (1) hide show
  1. src/pages/Test_Evaluation.py +92 -57
src/pages/Test_Evaluation.py CHANGED
@@ -1,11 +1,9 @@
1
- import pandas as pd
2
  import matplotlib.pyplot as plt
 
3
  import seaborn as sns
4
  import streamlit as st
5
  from bson import ObjectId
6
  from html_to_markdown import convert
7
- from pandas.core.interchange.dataframe_protocol import DataFrame
8
- import altair as alt
9
 
10
  from services import init_connection
11
 
@@ -49,15 +47,83 @@ def print_schedule_obj(s):
49
  return f"{start_date_str} - {end_date_str} | {start_time_str} - {end_time_str}\n\n"
50
 
51
 
52
- def create_overall_metrics_data_frame(overall_metrics: dict) -> DataFrame:
53
  field_order = ["page_type", "title", "locations", "schedule", "start_date", "end_date", "start_time", "end_time"]
 
54
  rows = {key: "" for key in field_order if key in overall_metrics}
 
55
  for field, metrics in overall_metrics.items():
56
- rows[field] = metrics
 
 
57
  print(rows[field])
58
 
59
  df = pd.DataFrame(rows).T
 
 
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  return df
62
 
63
 
@@ -125,7 +191,8 @@ def create_detail_table(test: dict):
125
 
126
 
127
  @st.dialog("Original Seite", width="medium")
128
- def show_website(html):
 
129
  md = convert(html)
130
  st.write(md)
131
 
@@ -141,69 +208,35 @@ selected_id = st.selectbox("Wähle einen Test aus", options=list(options.keys())
141
 
142
  if selected_id:
143
  test = db.test_evaluation.find_one({"_id": ObjectId(selected_id)})
144
- results = test.get("record_results", {})
145
- batchsize = len(results)
146
 
147
  st.write(
148
  f"**Test ID:** {selected_id} | **Status:** {test.get("status")} | **Batchsize:** {batchsize}")
149
 
150
  overall_metrics = test.get("overall_metrics", {})
151
  if overall_metrics:
152
- df_overall_metrics = create_overall_metrics_data_frame(overall_metrics)
153
- st.write("# Overall Metrics")
154
- st.write("### Data Metrics")
155
- st.bar_chart(df_overall_metrics, width=400, stack=False, sort=False)
156
-
157
- # error_count = len(
158
- # [k for k, v in results.items() if v.get("page_type") not in ("EVENT", "NO_EVENT")])
159
- # st.write(f"Errors: {error_count}")
160
-
161
- tp_count = len([v for v in results.values() if v.get("record_metrics", {}).get("page_type") == "tp"])
162
- tn_count = len([v for v in results.values() if v.get("record_metrics", {}).get("page_type") == "tn"])
163
- fp_count = len([v for v in results.values() if v.get("record_metrics", {}).get("page_type") == "fp"])
164
- fn_count = len([v for v in results.values() if v.get("record_metrics", {}).get("page_type") == "fn"])
165
-
166
- # Confusion Matrix als DataFrame
167
- cm = pd.DataFrame(
168
- [[tn_count, fp_count],
169
- [fn_count, tp_count]],
170
- index=['Expected NO_EVENT', 'Expected EVENT'],
171
- columns=['Predicted NO_EVENT', 'Predicted EVENT']
172
- )
173
-
174
- # Heatmap plotten
175
- fig, ax = plt.subplots()
176
- sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
177
- ax.set_xlabel('Predicted')
178
- ax.set_ylabel('Expected')
179
-
180
- false_negatives = [v.get("meta", {}).get("reason") for v in results.values() if
181
- v.get("record_metrics", {}).get("page_type") == "fn"]
182
 
183
- fn_counts = pd.Series(false_negatives).value_counts()
184
- fn_percent = fn_counts / len(results) * 100
185
-
186
- df = pd.DataFrame({
187
- 'Rejected Reason': fn_counts.index,
188
- 'Anzahl': fn_counts.values,
189
- 'Prozent': fn_percent.values
190
- })
191
 
192
- total_row = pd.DataFrame([{
193
- 'Rejected Reason': 'Gesamt',
194
- 'Anzahl': fn_counts.sum(),
195
- 'Prozent': fn_percent.sum()
196
- }])
197
- df = pd.concat([df, total_row], ignore_index=True)
198
 
199
  col1, col2 = st.columns(2)
200
  with col1:
201
- st.write("### Confusion Matrix für Page Classification (page_type")
202
- st.pyplot(fig, width=450)
203
 
204
  with col2:
205
  st.write("### Falsch abgelehnte Seiten (false negatives fn)")
206
- st.dataframe(df.style.format({'Prozent': '{:.1f}%'}))
 
 
 
207
 
208
 
209
  else:
@@ -215,7 +248,9 @@ if selected_id:
215
 
216
  record_id = st.text_input(label="Gebe eine Record ID ein um die Original Website anzusehen.", value="")
217
  if record_id:
218
- html = db.testdata_1.find_one({"_id": ObjectId(record_id)}).get("html")
 
 
219
  if html:
220
  html = html.decode("utf-8")
221
- show_website(html)
 
 
1
  import matplotlib.pyplot as plt
2
+ import pandas as pd
3
  import seaborn as sns
4
  import streamlit as st
5
  from bson import ObjectId
6
  from html_to_markdown import convert
 
 
7
 
8
  from services import init_connection
9
 
 
47
  return f"{start_date_str} - {end_date_str} | {start_time_str} - {end_time_str}\n\n"
48
 
49
 
50
+ def create_data_metrics_df(overall_metrics: dict) -> pd.DataFrame:
51
  field_order = ["page_type", "title", "locations", "schedule", "start_date", "end_date", "start_time", "end_time"]
52
+
53
  rows = {key: "" for key in field_order if key in overall_metrics}
54
+
55
  for field, metrics in overall_metrics.items():
56
+ cleaned_metrics = metrics.copy()
57
+ cleaned_metrics = {k: v for k, v in cleaned_metrics.items() if isinstance(v, float)}
58
+ rows[field] = cleaned_metrics
59
  print(rows[field])
60
 
61
  df = pd.DataFrame(rows).T
62
+ return df
63
+
64
 
65
+ def create_confusion_matrix(overall_metrics: dict):
66
+ page_type_metric = overall_metrics.get("page_type", {})
67
+ tp_count = page_type_metric.get("tp", 0)
68
+ tn_count = page_type_metric.get("tn", 0)
69
+ fp_count = page_type_metric.get("fp", 0)
70
+ fn_count = page_type_metric.get("fn", 0)
71
+
72
+ cm = pd.DataFrame(
73
+ [[tn_count, fp_count],
74
+ [fn_count, tp_count]],
75
+ index=['Expected NO_EVENT', 'Expected EVENT'],
76
+ columns=['Predicted NO_EVENT', 'Predicted EVENT']
77
+ )
78
+
79
+ fig, ax = plt.subplots()
80
+ sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax)
81
+ ax.set_xlabel('Predicted')
82
+ ax.set_ylabel('Expected')
83
+
84
+ return fig
85
+
86
+
87
+ def create_fn_df(record_results: dict):
88
+ false_negatives = [v.get("data", {}).get("reason") for v in record_results.values() if
89
+ v.get("record_metrics", {}).get("page_type", {}).get("fn") == 1]
90
+
91
+ fn_counts = pd.Series(false_negatives).value_counts()
92
+ fn_percent = fn_counts / len(record_results) * 100
93
+
94
+ df = pd.DataFrame({
95
+ 'Rejected Reason': fn_counts.index,
96
+ 'Anzahl': fn_counts.values,
97
+ 'Prozent': fn_percent.values
98
+ })
99
+
100
+ total_row = pd.DataFrame([{
101
+ 'Rejected Reason': 'Gesamt',
102
+ 'Anzahl': fn_counts.sum(),
103
+ 'Prozent': fn_percent.sum()
104
+ }])
105
+ df = pd.concat([df, total_row], ignore_index=True)
106
+ df = df.style.format({'Prozent': '{:.1f}%'})
107
+ return df
108
+
109
+
110
+ def create_error_df(overall_metrics: dict, batchsize: int):
111
+ rows = []
112
+ sum = 0
113
+ for k, v in overall_metrics.get("error", {}).items():
114
+ rows.append({
115
+ 'Error': k.upper(),
116
+ 'Anzahl': v,
117
+ 'Prozent': v / batchsize * 100
118
+ })
119
+ sum += v
120
+ rows.append({
121
+ 'Error': "Gesamt",
122
+ 'Anzahl': sum,
123
+ 'Prozent': sum / batchsize * 100
124
+ })
125
+ sum += v
126
+ df = pd.DataFrame(rows).style.format({'Prozent': '{:.1f}%'})
127
  return df
128
 
129
 
 
191
 
192
 
193
  @st.dialog("Original Seite", width="medium")
194
+ def show_website(url, html):
195
+ st.info(f"Link zur Original Website: {url}")
196
  md = convert(html)
197
  st.write(md)
198
 
 
208
 
209
  if selected_id:
210
  test = db.test_evaluation.find_one({"_id": ObjectId(selected_id)})
211
+ record_results = test.get("record_results", {})
212
+ batchsize = len(record_results)
213
 
214
  st.write(
215
  f"**Test ID:** {selected_id} | **Status:** {test.get("status")} | **Batchsize:** {batchsize}")
216
 
217
  overall_metrics = test.get("overall_metrics", {})
218
  if overall_metrics:
219
+ df_data_metrics = create_data_metrics_df(overall_metrics)
220
+ cm_fig = create_confusion_matrix(overall_metrics)
221
+ df_fn = create_fn_df(record_results)
222
+ df_error = create_error_df(overall_metrics, batchsize)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
+ st.write("# Overall Metrics")
 
 
 
 
 
 
 
225
 
226
+ st.write("### Data Metrics")
227
+ st.bar_chart(df_data_metrics, width=400, stack=False, sort=False)
 
 
 
 
228
 
229
  col1, col2 = st.columns(2)
230
  with col1:
231
+ st.write("### Confusion Matrix für Page Classification (page_type)")
232
+ st.pyplot(cm_fig, width=450)
233
 
234
  with col2:
235
  st.write("### Falsch abgelehnte Seiten (false negatives fn)")
236
+ st.dataframe(df_fn)
237
+
238
+ st.write("### Fehler in der Pipeline (error)")
239
+ st.dataframe(df_error)
240
 
241
 
242
  else:
 
248
 
249
  record_id = st.text_input(label="Gebe eine Record ID ein um die Original Website anzusehen.", value="")
250
  if record_id:
251
+ record = db.testdata_1.find_one({"_id": ObjectId(record_id)})
252
+ html = record.get("html")
253
+ url = record.get("url")
254
  if html:
255
  html = html.decode("utf-8")
256
+ show_website(url, html)