lynn-twinkl commited on
Commit
34a810f
·
1 Parent(s): 1b6cfe4

bug fix for issue causing auto shortlist to be perofmred on manual filter NI range

Browse files
Files changed (1) hide show
  1. app.py +42 -29
app.py CHANGED
@@ -16,7 +16,9 @@ from functions.column_detection import detect_freeform_answer_col
16
  from functions.shortlist import shortlist_applications
17
  import typing
18
 
19
- # ---- CACHEABLE PROCESSING ----
 
 
20
 
21
  @st.cache_data
22
  def load_and_process(raw_csv: bytes) -> typing.Tuple[pd.DataFrame, str]:
@@ -24,17 +26,20 @@ def load_and_process(raw_csv: bytes) -> typing.Tuple[pd.DataFrame, str]:
24
  Load CSV from raw bytes, detect freeform column, compute necessity scores,
25
  and extract usage items. Returns processed DataFrame and freeform column name.
26
  """
27
- # Read uploaded data
28
  df_orig = pd.read_csv(BytesIO(raw_csv))
29
  # Detect freeform column
30
  freeform_col = detect_freeform_answer_col(df_orig)
31
- #Word count
 
32
  df_orig['word_count'] = df_orig[freeform_col].fillna('').str.split().str.len()
33
- # Compute necessity scores
 
34
  scored = df_orig.join(df_orig[freeform_col].apply(compute_necessity))
35
  scored['necessity_index'] = index_scaler(scored['necessity_index'].values)
36
  scored['priority'] = qcut_labels(scored['necessity_index'])
37
- # LangChain function for extracting usage
 
38
  docs = df_orig[freeform_col].to_list()
39
  usage = extract_usage(docs)
40
  scored['Usage'] = usage
@@ -49,7 +54,7 @@ st.title("Community Collections Helper")
49
  uploaded_file = st.file_uploader("Upload grant applications file for analysis", type='csv')
50
 
51
  if uploaded_file is not None:
52
- # Read raw bytes for caching and repeated use --> this ensure all the processing isn't repeated when a user changes the filters
53
  raw = uploaded_file.read()
54
 
55
  ## ---- PROCESSED DATA (CACHED) ----
@@ -58,39 +63,42 @@ if uploaded_file is not None:
58
 
59
  ## ---- INTERACTIVE FILTERING & REVIEW INTERFACE ----
60
 
61
- st.sidebar.title("Shortlist Mode")
62
  with st.sidebar:
 
 
 
63
  mode = st.segmented_control(
64
  "Select one option",
65
  options=["strict", "generous"],
66
  default="strict",
67
  )
 
 
 
 
68
 
69
- st.sidebar.title("Filters")
70
- min_idx = float(df['necessity_index'].min())
71
- max_idx = float(df['necessity_index'].max())
72
- filter_range = st.sidebar.slider(
73
- "Necessity Index Range", min_value=min_idx, max_value=max_idx, value=(min_idx, max_idx)
74
- )
75
- filtered_df = df[df['necessity_index'].between(filter_range[0], filter_range[1])]
 
 
 
76
 
77
- # Sidebar summary
78
- st.sidebar.markdown(f"**Total Applications:** {len(df)}")
79
- st.sidebar.markdown(f"**Filtered Applications:** {len(filtered_df)}")
80
 
81
  tab1, tab2 = st.tabs(["Shortlist Manager","Insights"])
82
 
 
 
83
  with tab1:
84
- # Automatic Shortlisting Controls
85
- st.header("✨ Automatic Shortlist")
86
 
87
-
88
  st.markdown("Here's your **automatically genereated shortlist!** If you'd like to manually add additional applications, you may do so on the section below!")
89
- # Full scores for threshold calculation
90
- scored_full = shortlist_applications(filtered_df, k=len(filtered_df))
91
- quantile_map = {"strict": 0.75, "generous": 0.5}
92
- threshold_score = scored_full["auto_shortlist_score"].quantile(quantile_map[mode])
93
- auto_short = shortlist_applications(filtered_df, threshold=threshold_score)
94
  csv_auto = auto_short.to_csv(index=False).encode("utf-8")
95
  st.download_button(
96
  label="Download Shortlist",
@@ -108,7 +116,10 @@ if uploaded_file is not None:
108
 
109
  freeform_col_index = auto_short.columns.get_loc(freeform_col)
110
  st.dataframe(auto_short.iloc[:, freeform_col_index:], hide_index=True)
111
- # Review applications
 
 
 
112
  st.header("🌸 Manual Filtering")
113
  st.markdown(
114
  """
@@ -125,13 +136,13 @@ if uploaded_file is not None:
125
  col3.metric("Severity", f"{int(row['severity_score'])}")
126
  col4.metric("Vulnerability", f"{int(row['vulnerability_score'])}")
127
  style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6)
128
- # Clean usage items
 
129
  usage_items = [item for item in row['Usage'] if item and item.lower() != 'none']
130
  st.markdown("##### Excerpt")
131
  st.write(row[freeform_col])
132
  if usage_items:
133
  st.markdown("##### Usage")
134
- # Display usage items as colored pills
135
  pills_html = "".join(
136
  f"<span style='display:inline-block;background-color:#E7F4FF;color:#125E9E;border-radius:20px;padding:4px 10px;margin:2px;font-size:0.95rem;'>{item}</span>"
137
  for item in usage_items
@@ -140,7 +151,7 @@ if uploaded_file is not None:
140
  else:
141
  st.caption("*No usage found*")
142
  st.write("")
143
- # Shortlist checkbox
144
  st.checkbox(
145
  "Add to shortlist",
146
  key=f"shortlist_{idx}"
@@ -159,6 +170,8 @@ if uploaded_file is not None:
159
  )
160
 
161
 
 
 
162
  with tab2:
163
  st.write("")
164
 
 
16
  from functions.shortlist import shortlist_applications
17
  import typing
18
 
19
+ ##################################
20
+ # CACHED PROCESSING FUNCTION
21
+ ##################################
22
 
23
  @st.cache_data
24
  def load_and_process(raw_csv: bytes) -> typing.Tuple[pd.DataFrame, str]:
 
26
  Load CSV from raw bytes, detect freeform column, compute necessity scores,
27
  and extract usage items. Returns processed DataFrame and freeform column name.
28
  """
29
+ # Read Uploaded Data
30
  df_orig = pd.read_csv(BytesIO(raw_csv))
31
  # Detect freeform column
32
  freeform_col = detect_freeform_answer_col(df_orig)
33
+
34
+ #Word Count
35
  df_orig['word_count'] = df_orig[freeform_col].fillna('').str.split().str.len()
36
+
37
+ # Compute Necessity Scores
38
  scored = df_orig.join(df_orig[freeform_col].apply(compute_necessity))
39
  scored['necessity_index'] = index_scaler(scored['necessity_index'].values)
40
  scored['priority'] = qcut_labels(scored['necessity_index'])
41
+
42
+ # Usage Extraction
43
  docs = df_orig[freeform_col].to_list()
44
  usage = extract_usage(docs)
45
  scored['Usage'] = usage
 
54
  uploaded_file = st.file_uploader("Upload grant applications file for analysis", type='csv')
55
 
56
  if uploaded_file is not None:
57
+ # Read file from rawfor caching and repeated use --> this ensure all the processing isn't repeated when a user changes the filters
58
  raw = uploaded_file.read()
59
 
60
  ## ---- PROCESSED DATA (CACHED) ----
 
63
 
64
  ## ---- INTERACTIVE FILTERING & REVIEW INTERFACE ----
65
 
 
66
  with st.sidebar:
67
+ st.title("Shortlist Mode")
68
+
69
+ quantile_map = {"strict": 0.75, "generous": 0.5}
70
  mode = st.segmented_control(
71
  "Select one option",
72
  options=["strict", "generous"],
73
  default="strict",
74
  )
75
+
76
+ scored_full = shortlist_applications(df, k=len(df))
77
+ threshold_score = scored_full["auto_shortlist_score"].quantile(quantile_map[mode])
78
+ auto_short = shortlist_applications(df, threshold=threshold_score)
79
 
80
+ st.title("Filters")
81
+ min_idx = float(df['necessity_index'].min())
82
+ max_idx = float(df['necessity_index'].max())
83
+ filter_range = st.sidebar.slider(
84
+ "Necessity Index Range", min_value=min_idx, max_value=max_idx, value=(min_idx, max_idx)
85
+ )
86
+ filtered_df = df[df['necessity_index'].between(filter_range[0], filter_range[1])]
87
+
88
+ st.markdown(f"**Total Applications:** {len(df)}")
89
+ st.markdown(f"**Filtered Applications:** {len(filtered_df)}")
90
 
91
+ ## ----------------- MAIN PANEL ----------------
 
 
92
 
93
  tab1, tab2 = st.tabs(["Shortlist Manager","Insights"])
94
 
95
+ ## ---------- SHORTLIST MANAGER TAB -----------
96
+
97
  with tab1:
 
 
98
 
99
+ st.header("✨ Automatic Shortlist")
100
  st.markdown("Here's your **automatically genereated shortlist!** If you'd like to manually add additional applications, you may do so on the section below!")
101
+
 
 
 
 
102
  csv_auto = auto_short.to_csv(index=False).encode("utf-8")
103
  st.download_button(
104
  label="Download Shortlist",
 
116
 
117
  freeform_col_index = auto_short.columns.get_loc(freeform_col)
118
  st.dataframe(auto_short.iloc[:, freeform_col_index:], hide_index=True)
119
+
120
+
121
+ ## REVIEW APPLICATIONS
122
+
123
  st.header("🌸 Manual Filtering")
124
  st.markdown(
125
  """
 
136
  col3.metric("Severity", f"{int(row['severity_score'])}")
137
  col4.metric("Vulnerability", f"{int(row['vulnerability_score'])}")
138
  style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6)
139
+
140
+ # HTML for clean usage items
141
  usage_items = [item for item in row['Usage'] if item and item.lower() != 'none']
142
  st.markdown("##### Excerpt")
143
  st.write(row[freeform_col])
144
  if usage_items:
145
  st.markdown("##### Usage")
 
146
  pills_html = "".join(
147
  f"<span style='display:inline-block;background-color:#E7F4FF;color:#125E9E;border-radius:20px;padding:4px 10px;margin:2px;font-size:0.95rem;'>{item}</span>"
148
  for item in usage_items
 
151
  else:
152
  st.caption("*No usage found*")
153
  st.write("")
154
+
155
  st.checkbox(
156
  "Add to shortlist",
157
  key=f"shortlist_{idx}"
 
170
  )
171
 
172
 
173
+ ## ------------ INSIGHTS TAB -----------
174
+
175
  with tab2:
176
  st.write("")
177