lynn-twinkl commited on
Commit
65ec40d
·
1 Parent(s): 76ee39e

added: additional filter options; id_col detector; dynamic downloader

Browse files
Files changed (1) hide show
  1. app.py +34 -8
app.py CHANGED
@@ -18,7 +18,7 @@ from streamlit_extras.metric_cards import style_metric_cards
18
 
19
  from src.extract_usage import extract_usage
20
  from src.necessity_index import compute_necessity, index_scaler, qcut_labels
21
- from src.column_detection import detect_freeform_col
22
  from src.shortlist import shortlist_applications
23
  from src.twinkl_originals import find_book_candidates
24
  from src.preprocess_text import normalise_text
@@ -56,6 +56,8 @@ def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]:
56
 
57
  # Detect freeform column
58
  freeform_col = detect_freeform_col(df_orig)
 
 
59
 
60
  df_orig = df_orig[df_orig[freeform_col].notna()]
61
 
@@ -81,7 +83,7 @@ def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]:
81
  docs = df_orig[freeform_col].to_list()
82
  scored['Usage'] = extract_usage(docs)
83
 
84
- return scored, freeform_col
85
 
86
  # -----------------------------------------------------------------------------
87
  # Derivative computations that rely only on the processed DataFrame are also
@@ -114,7 +116,7 @@ if uploaded_file is not None:
114
 
115
  ## ====== PROCESSED DATA (CACHED) ======
116
 
117
- df, freeform_col = load_and_process(raw)
118
 
119
  book_candidates_df = df[df['book_candidates'] == True]
120
 
@@ -138,20 +140,44 @@ if uploaded_file is not None:
138
  auto_short_df = scored_full[scored_full["shortlist_score"] >= threshold_score]
139
 
140
  st.title("Filters")
 
 
 
 
 
 
 
141
  min_idx = float(df['necessity_index'].min())
142
  max_idx = float(df['necessity_index'].max())
143
  filter_range = st.sidebar.slider(
144
  "Necessity Index Range", min_value=min_idx, max_value=max_idx, value=(min_idx, max_idx)
145
  )
146
 
147
- filtered_df = df[(~df.index.isin(auto_short_df.index)) & (df['necessity_index'].between(filter_range[0], filter_range[1]))]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
  st.markdown(f"**Total Applications:** {len(df)}")
150
  st.markdown(f"**Filtered Applications:** {len(filtered_df)}")
151
 
152
- # ------ CREATE TAB SECTIONS -------
 
153
  tab1, tab2 = st.tabs(["Shortlist Manager","Insights"])
154
 
 
155
  ##################################################
156
  # SHORTLIST MANAGER TAB #
157
  ##################################################
@@ -179,7 +205,7 @@ if uploaded_file is not None:
179
 
180
 
181
  st.download_button(
182
- label=f"Download CSV",
183
  data=csv_data,
184
  file_name=file_name,
185
  mime="text/csv",
@@ -197,7 +223,7 @@ if uploaded_file is not None:
197
  mode_col.metric("Mode", mode)
198
 
199
  shorltist_cols_to_show = [
200
- 'Id',
201
  freeform_col,
202
  'Usage',
203
  'necessity_index',
@@ -225,7 +251,7 @@ if uploaded_file is not None:
225
  st.markdown("#### Filtered Applications")
226
  st.write("")
227
  for idx, row in filtered_df.iterrows():
228
- with st.expander(f"Application \#{idx}"):
229
  st.write("")
230
  col1, col2, col3, col4 = st.columns(4)
231
  col1.metric("Necessity", f"{row['necessity_index']:.1f}")
 
18
 
19
  from src.extract_usage import extract_usage
20
  from src.necessity_index import compute_necessity, index_scaler, qcut_labels
21
+ from src.column_detection import detect_freeform_col, detect_id_col
22
  from src.shortlist import shortlist_applications
23
  from src.twinkl_originals import find_book_candidates
24
  from src.preprocess_text import normalise_text
 
56
 
57
  # Detect freeform column
58
  freeform_col = detect_freeform_col(df_orig)
59
+ id_col = detect_id_col(df_orig)
60
+ print(id_col)
61
 
62
  df_orig = df_orig[df_orig[freeform_col].notna()]
63
 
 
83
  docs = df_orig[freeform_col].to_list()
84
  scored['Usage'] = extract_usage(docs)
85
 
86
+ return scored, freeform_col, id_col
87
 
88
  # -----------------------------------------------------------------------------
89
  # Derivative computations that rely only on the processed DataFrame are also
 
116
 
117
  ## ====== PROCESSED DATA (CACHED) ======
118
 
119
+ df, freeform_col, id_col = load_and_process(raw)
120
 
121
  book_candidates_df = df[df['book_candidates'] == True]
122
 
 
140
  auto_short_df = scored_full[scored_full["shortlist_score"] >= threshold_score]
141
 
142
  st.title("Filters")
143
+
144
+ ## --- Dataframe To Filter ---
145
+ options = ['All applications', 'Not shortlisted']
146
+ selected_view = st.pills('Choose data to filter', options, default='Not shortlisted')
147
+ st.write("")
148
+
149
+ ## --- Necessity Index Filtering ---
150
  min_idx = float(df['necessity_index'].min())
151
  max_idx = float(df['necessity_index'].max())
152
  filter_range = st.sidebar.slider(
153
  "Necessity Index Range", min_value=min_idx, max_value=max_idx, value=(min_idx, max_idx)
154
  )
155
 
156
+ def filter_all_applications(df, auto_short_df, filter_range):
157
+ return df[df['necessity_index'].between(filter_range[0], filter_range[1])]
158
+
159
+ def filter_not_shortlisted(df, auto_short_df, filter_range):
160
+ return df[
161
+ (~df.index.isin(auto_short_df.index)) &
162
+ (df['necessity_index'].between(filter_range[0], filter_range[1]))
163
+ ]
164
+
165
+ filter_map = {
166
+ 'All applications': filter_all_applications,
167
+ 'Not shortlisted': filter_not_shortlisted,
168
+ }
169
+
170
+ filtered_df = filter_map[selected_view](df, auto_short_df, filter_range)
171
+
172
 
173
  st.markdown(f"**Total Applications:** {len(df)}")
174
  st.markdown(f"**Filtered Applications:** {len(filtered_df)}")
175
 
176
+
177
+ ## ====== CREATE TAB SECTIONS =======
178
  tab1, tab2 = st.tabs(["Shortlist Manager","Insights"])
179
 
180
+
181
  ##################################################
182
  # SHORTLIST MANAGER TAB #
183
  ##################################################
 
205
 
206
 
207
  st.download_button(
208
+ label=f"Download {choice}",
209
  data=csv_data,
210
  file_name=file_name,
211
  mime="text/csv",
 
223
  mode_col.metric("Mode", mode)
224
 
225
  shorltist_cols_to_show = [
226
+ id_col,
227
  freeform_col,
228
  'Usage',
229
  'necessity_index',
 
251
  st.markdown("#### Filtered Applications")
252
  st.write("")
253
  for idx, row in filtered_df.iterrows():
254
+ with st.expander(f"Application {row[id_col]}"):
255
  st.write("")
256
  col1, col2, col3, col4 = st.columns(4)
257
  col1.metric("Necessity", f"{row['necessity_index']:.1f}")