| |
| |
| |
|
|
| import streamlit as st |
| import pandas as pd |
| import altair as alt |
| import joblib |
| from io import BytesIO |
| import os |
| from streamlit_extras.metric_cards import style_metric_cards |
|
|
| |
|
|
| from src.extract_usage import extract_usage |
| from src.necessity_index import compute_necessity, index_scaler, qcut_labels |
| from src.column_detection import detect_freeform_col |
| from src.shortlist import shortlist_applications |
| from src.twinkl_originals import find_book_candidates |
| from src.preprocess_text import normalise_text |
| from typing import Tuple |
|
|
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| @st.cache_resource |
| def load_heartfelt_predictor(): |
| model_path = os.path.join("src", "models", "heartfelt_pipeline.joblib") |
| return joblib.load(model_path) |
|
|
| @st.cache_data(show_spinner=True) |
| def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]: |
| """ |
| Load CSV from raw bytes, detect freeform column, compute necessity scores, |
| and extract usage items. Returns processed DataFrame and freeform column name. |
| """ |
| |
| df_orig = pd.read_csv(BytesIO(raw_csv)) |
|
|
| |
| freeform_col = detect_freeform_col(df_orig) |
|
|
| df_orig = df_orig[df_orig[freeform_col].notna()] |
|
|
| |
| df_orig['word_count'] = df_orig[freeform_col].fillna('').str.split().str.len() |
|
|
| |
| scored = df_orig.join(df_orig[freeform_col].apply(compute_necessity)) |
| scored['necessity_index'] = index_scaler(scored['necessity_index'].values) |
| scored['priority'] = qcut_labels(scored['necessity_index']) |
|
|
| |
| scored['book_candidates'] = find_book_candidates(scored, freeform_col) |
|
|
| |
| scored['clean_text'] = scored[freeform_col].map(normalise_text) |
| model = load_heartfelt_predictor() |
| scored['is_heartfelt'] = model.predict(scored['clean_text'].astype(str)) |
|
|
|
|
| |
| |
| docs = df_orig[freeform_col].to_list() |
| |
|
|
| return scored, freeform_col |
|
|
| |
| |
| |
| |
| |
|
|
|
|
| @st.cache_data(show_spinner=True) |
| def compute_shortlist(df: pd.DataFrame) -> pd.DataFrame: |
| """Pre‑compute shortlist_score for all rows (used for both modes).""" |
| return shortlist_applications(df, k=len(df)) |
|
|
| |
| |
| |
|
|
| style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6) |
|
|
| st.title("Community Collections Helper") |
|
|
| uploaded_file = st.file_uploader("Upload grant applications file for analysis", type='csv') |
|
|
| if uploaded_file is not None: |
| |
| raw = uploaded_file.read() |
|
|
| |
|
|
| df, freeform_col = load_and_process(raw) |
|
|
| st.dataframe(df) |
|
|
| |
|
|
| with st.sidebar: |
| st.title("Shortlist Mode") |
|
|
| quantile_map = {"strict": 0.75, "generous": 0.5} |
| mode = st.segmented_control( |
| "Select one option", |
| options=["strict", "generous"], |
| default="strict", |
| ) |
| |
| scored_full = compute_shortlist(df) |
| threshold_score = scored_full["shortlist_score"].quantile(quantile_map[mode]) |
| auto_short_df = scored_full[scored_full["shortlist_score"] >= threshold_score] |
|
|
| st.title("Filters") |
| min_idx = float(df['necessity_index'].min()) |
| max_idx = float(df['necessity_index'].max()) |
| filter_range = st.sidebar.slider( |
| "Necessity Index Range", min_value=min_idx, max_value=max_idx, value=(min_idx, max_idx) |
| ) |
| filtered_df = df[(~df.index.isin(auto_short_df.index)) & (df['necessity_index'].between(filter_range[0], filter_range[1]))] |
|
|
| st.markdown(f"**Total Applications:** {len(df)}") |
| st.markdown(f"**Filtered Applications:** {len(filtered_df)}") |
|
|
| |
|
|
| tab1, tab2 = st.tabs(["Shortlist Manager","Insights"]) |
|
|
| |
|
|
| with tab1: |
| |
| st.header("✨ Automatic Shortlist") |
| st.markdown("Here's your **automatically genereated shortlist!** If you'd like to manually add additional applications, you may do so on the section below!") |
|
|
| csv_auto = auto_short_df.to_csv(index=False).encode("utf-8") |
| st.download_button( |
| label="Download Shortlist", |
| data=csv_auto, |
| file_name="shortlist.csv", |
| mime="text/csv", |
| icon='⬇️' |
| ) |
| st.markdown("#### Shortlist Preview") |
| st.write("") |
| total_col, shortlistCounter_col, mode_col = st.columns(3) |
|
|
| total_col.metric("Applications Submitted", len(df)) |
| shortlistCounter_col.metric("Shorlist Length", len(auto_short_df)) |
| mode_col.metric("Mode", mode) |
|
|
| shorltist_cols_to_show = [ |
| 'Id', |
| freeform_col, |
| 'Usage', |
| 'necessity_index', |
| 'urgency_score', |
| 'severity_score', |
| 'vulnerability_score', |
| 'shortlist_score', |
| 'book_candidates', |
| ] |
|
|
| st.dataframe(auto_short_df.loc[:, shorltist_cols_to_show], hide_index=True) |
|
|
| |
|
|
| st.header("🌸 Manual Filtering") |
| st.markdown( |
| """ |
| Below you'll find applications that **did not** make it into the shortlist for you to manually review or append to the shortlist if desired. |
| |
| You may use the **side panel** filters to more easily sort through applications that you'd like to review. |
| """ |
| ) |
|
|
| st.markdown("#### Filtered Applications") |
| st.write("") |
| for idx, row in filtered_df.iterrows(): |
| with st.expander(f"Application \#{idx}"): |
| st.write("") |
| col1, col2, col3, col4 = st.columns(4) |
| col1.metric("Necessity", f"{row['necessity_index']:.1f}") |
| col2.metric("Urgency", f"{int(row['urgency_score'])}") |
| col3.metric("Severity", f"{int(row['severity_score'])}") |
| col4.metric("Vulnerability", f"{int(row['vulnerability_score'])}") |
|
|
| |
| usage_items = [item for item in row['Usage'] if item and item.lower() != 'none'] |
| st.markdown("##### Excerpt") |
| st.write(row[freeform_col]) |
| if usage_items: |
| st.markdown("##### Usage") |
| pills_html = "".join( |
| f"<span style='display:inline-block;background-color:#E7F4FF;color:#125E9E;border-radius:20px;padding:4px 10px;margin:2px;font-size:0.95rem;'>{item}</span>" |
| for item in usage_items |
| ) |
| st.markdown(pills_html, unsafe_allow_html=True) |
| else: |
| st.caption("*No usage found*") |
| st.write("") |
|
|
| st.checkbox( |
| "Add to shortlist", |
| key=f"shortlist_{idx}" |
| ) |
|
|
| |
| shortlisted = [ |
| i for i in filtered_df.index |
| if st.session_state.get(f"shortlist_{i}", False) |
| ] |
| st.sidebar.markdown(f"**Manual Shortlisted:** {len(shortlisted)}") |
| if shortlisted: |
| csv = df.loc[shortlisted].to_csv(index=False).encode('utf-8') |
| st.sidebar.download_button( |
| "Download Manual Shortlist", csv, "shortlist.csv", "text/csv" |
| ) |
|
|
|
|
| |
|
|
| with tab2: |
| st.write("") |
|
|
| col1, col2, col3 = st.columns(3) |
| col1.metric("Avg. Word Count", f"{df['word_count'].mean().round(1)}") |
| col2.metric("Median N.I", df['necessity_index'].median().round(2)) |
| col3.metric("Total Applications", len(df)) |
| st.html("<br>") |
|
|
| st.subheader("Necessity Index (NI) Distribution") |
| st.write("") |
| st.write("") |
| |
| chart = alt.Chart(df).mark_bar().encode( |
| x=alt.X('necessity_index:Q', bin=alt.Bin(maxbins=20), title='Necessity Index'), |
| y='count()', |
| color=alt.Color( |
| 'priority:N', |
| scale=alt.Scale( |
| domain=['low', 'medium', 'high', 'priority'], |
| range=['#a7d6fd', '#FFA500', '#FF5733', '#FF0000'] |
| ), |
| legend=alt.Legend(title='Priority') |
| ) |
| ) |
| st.altair_chart(chart, use_container_width=True) |
| st.dataframe(df, hide_index=True) |
|
|