lynn-twinkl commited on
Commit ·
65ec40d
1
Parent(s): 76ee39e
added: additional filter options; id_col detector; dynamic downloader
Browse files
app.py
CHANGED
|
@@ -18,7 +18,7 @@ from streamlit_extras.metric_cards import style_metric_cards
|
|
| 18 |
|
| 19 |
from src.extract_usage import extract_usage
|
| 20 |
from src.necessity_index import compute_necessity, index_scaler, qcut_labels
|
| 21 |
-
from src.column_detection import detect_freeform_col
|
| 22 |
from src.shortlist import shortlist_applications
|
| 23 |
from src.twinkl_originals import find_book_candidates
|
| 24 |
from src.preprocess_text import normalise_text
|
|
@@ -56,6 +56,8 @@ def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]:
|
|
| 56 |
|
| 57 |
# Detect freeform column
|
| 58 |
freeform_col = detect_freeform_col(df_orig)
|
|
|
|
|
|
|
| 59 |
|
| 60 |
df_orig = df_orig[df_orig[freeform_col].notna()]
|
| 61 |
|
|
@@ -81,7 +83,7 @@ def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]:
|
|
| 81 |
docs = df_orig[freeform_col].to_list()
|
| 82 |
scored['Usage'] = extract_usage(docs)
|
| 83 |
|
| 84 |
-
return scored, freeform_col
|
| 85 |
|
| 86 |
# -----------------------------------------------------------------------------
|
| 87 |
# Derivative computations that rely only on the processed DataFrame are also
|
|
@@ -114,7 +116,7 @@ if uploaded_file is not None:
|
|
| 114 |
|
| 115 |
## ====== PROCESSED DATA (CACHED) ======
|
| 116 |
|
| 117 |
-
df, freeform_col = load_and_process(raw)
|
| 118 |
|
| 119 |
book_candidates_df = df[df['book_candidates'] == True]
|
| 120 |
|
|
@@ -138,20 +140,44 @@ if uploaded_file is not None:
|
|
| 138 |
auto_short_df = scored_full[scored_full["shortlist_score"] >= threshold_score]
|
| 139 |
|
| 140 |
st.title("Filters")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
min_idx = float(df['necessity_index'].min())
|
| 142 |
max_idx = float(df['necessity_index'].max())
|
| 143 |
filter_range = st.sidebar.slider(
|
| 144 |
"Necessity Index Range", min_value=min_idx, max_value=max_idx, value=(min_idx, max_idx)
|
| 145 |
)
|
| 146 |
|
| 147 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
st.markdown(f"**Total Applications:** {len(df)}")
|
| 150 |
st.markdown(f"**Filtered Applications:** {len(filtered_df)}")
|
| 151 |
|
| 152 |
-
|
|
|
|
| 153 |
tab1, tab2 = st.tabs(["Shortlist Manager","Insights"])
|
| 154 |
|
|
|
|
| 155 |
##################################################
|
| 156 |
# SHORTLIST MANAGER TAB #
|
| 157 |
##################################################
|
|
@@ -179,7 +205,7 @@ if uploaded_file is not None:
|
|
| 179 |
|
| 180 |
|
| 181 |
st.download_button(
|
| 182 |
-
label=f"Download
|
| 183 |
data=csv_data,
|
| 184 |
file_name=file_name,
|
| 185 |
mime="text/csv",
|
|
@@ -197,7 +223,7 @@ if uploaded_file is not None:
|
|
| 197 |
mode_col.metric("Mode", mode)
|
| 198 |
|
| 199 |
shorltist_cols_to_show = [
|
| 200 |
-
|
| 201 |
freeform_col,
|
| 202 |
'Usage',
|
| 203 |
'necessity_index',
|
|
@@ -225,7 +251,7 @@ if uploaded_file is not None:
|
|
| 225 |
st.markdown("#### Filtered Applications")
|
| 226 |
st.write("")
|
| 227 |
for idx, row in filtered_df.iterrows():
|
| 228 |
-
with st.expander(f"Application
|
| 229 |
st.write("")
|
| 230 |
col1, col2, col3, col4 = st.columns(4)
|
| 231 |
col1.metric("Necessity", f"{row['necessity_index']:.1f}")
|
|
|
|
| 18 |
|
| 19 |
from src.extract_usage import extract_usage
|
| 20 |
from src.necessity_index import compute_necessity, index_scaler, qcut_labels
|
| 21 |
+
from src.column_detection import detect_freeform_col, detect_id_col
|
| 22 |
from src.shortlist import shortlist_applications
|
| 23 |
from src.twinkl_originals import find_book_candidates
|
| 24 |
from src.preprocess_text import normalise_text
|
|
|
|
| 56 |
|
| 57 |
# Detect freeform column
|
| 58 |
freeform_col = detect_freeform_col(df_orig)
|
| 59 |
+
id_col = detect_id_col(df_orig)
|
| 60 |
+
print(id_col)
|
| 61 |
|
| 62 |
df_orig = df_orig[df_orig[freeform_col].notna()]
|
| 63 |
|
|
|
|
| 83 |
docs = df_orig[freeform_col].to_list()
|
| 84 |
scored['Usage'] = extract_usage(docs)
|
| 85 |
|
| 86 |
+
return scored, freeform_col, id_col
|
| 87 |
|
| 88 |
# -----------------------------------------------------------------------------
|
| 89 |
# Derivative computations that rely only on the processed DataFrame are also
|
|
|
|
| 116 |
|
| 117 |
## ====== PROCESSED DATA (CACHED) ======
|
| 118 |
|
| 119 |
+
df, freeform_col, id_col = load_and_process(raw)
|
| 120 |
|
| 121 |
book_candidates_df = df[df['book_candidates'] == True]
|
| 122 |
|
|
|
|
| 140 |
auto_short_df = scored_full[scored_full["shortlist_score"] >= threshold_score]
|
| 141 |
|
| 142 |
st.title("Filters")
|
| 143 |
+
|
| 144 |
+
## --- Dataframe To Filter ---
|
| 145 |
+
options = ['All applications', 'Not shortlisted']
|
| 146 |
+
selected_view = st.pills('Choose data to filter', options, default='Not shortlisted')
|
| 147 |
+
st.write("")
|
| 148 |
+
|
| 149 |
+
## --- Necessity Index Filtering ---
|
| 150 |
min_idx = float(df['necessity_index'].min())
|
| 151 |
max_idx = float(df['necessity_index'].max())
|
| 152 |
filter_range = st.sidebar.slider(
|
| 153 |
"Necessity Index Range", min_value=min_idx, max_value=max_idx, value=(min_idx, max_idx)
|
| 154 |
)
|
| 155 |
|
| 156 |
+
def filter_all_applications(df, auto_short_df, filter_range):
|
| 157 |
+
return df[df['necessity_index'].between(filter_range[0], filter_range[1])]
|
| 158 |
+
|
| 159 |
+
def filter_not_shortlisted(df, auto_short_df, filter_range):
|
| 160 |
+
return df[
|
| 161 |
+
(~df.index.isin(auto_short_df.index)) &
|
| 162 |
+
(df['necessity_index'].between(filter_range[0], filter_range[1]))
|
| 163 |
+
]
|
| 164 |
+
|
| 165 |
+
filter_map = {
|
| 166 |
+
'All applications': filter_all_applications,
|
| 167 |
+
'Not shortlisted': filter_not_shortlisted,
|
| 168 |
+
}
|
| 169 |
+
|
| 170 |
+
filtered_df = filter_map[selected_view](df, auto_short_df, filter_range)
|
| 171 |
+
|
| 172 |
|
| 173 |
st.markdown(f"**Total Applications:** {len(df)}")
|
| 174 |
st.markdown(f"**Filtered Applications:** {len(filtered_df)}")
|
| 175 |
|
| 176 |
+
|
| 177 |
+
## ====== CREATE TAB SECTIONS =======
|
| 178 |
tab1, tab2 = st.tabs(["Shortlist Manager","Insights"])
|
| 179 |
|
| 180 |
+
|
| 181 |
##################################################
|
| 182 |
# SHORTLIST MANAGER TAB #
|
| 183 |
##################################################
|
|
|
|
| 205 |
|
| 206 |
|
| 207 |
st.download_button(
|
| 208 |
+
label=f"Download {choice}",
|
| 209 |
data=csv_data,
|
| 210 |
file_name=file_name,
|
| 211 |
mime="text/csv",
|
|
|
|
| 223 |
mode_col.metric("Mode", mode)
|
| 224 |
|
| 225 |
shorltist_cols_to_show = [
|
| 226 |
+
id_col,
|
| 227 |
freeform_col,
|
| 228 |
'Usage',
|
| 229 |
'necessity_index',
|
|
|
|
| 251 |
st.markdown("#### Filtered Applications")
|
| 252 |
st.write("")
|
| 253 |
for idx, row in filtered_df.iterrows():
|
| 254 |
+
with st.expander(f"Application {row[id_col]}"):
|
| 255 |
st.write("")
|
| 256 |
col1, col2, col3, col4 = st.columns(4)
|
| 257 |
col1.metric("Necessity", f"{row['necessity_index']:.1f}")
|