Spaces:

TwinklData
/

Community_Collections_App

Sleeping

App Files Files Community

lynn-twinkl commited on Apr 20, 2025

Commit

34a810f

1 Parent(s): 1b6cfe4

bug fix for issue causing auto shortlist to be perofmred on manual filter NI range

Browse files

Files changed (1) hide show

app.py +42 -29

app.py CHANGED Viewed

@@ -16,7 +16,9 @@ from functions.column_detection import detect_freeform_answer_col
 from functions.shortlist import shortlist_applications
 import typing
-# ---- CACHEABLE PROCESSING ----
 @st.cache_data
 def load_and_process(raw_csv: bytes) -> typing.Tuple[pd.DataFrame, str]:
@@ -24,17 +26,20 @@ def load_and_process(raw_csv: bytes) -> typing.Tuple[pd.DataFrame, str]:
     Load CSV from raw bytes, detect freeform column, compute necessity scores,
     and extract usage items. Returns processed DataFrame and freeform column name.
     """
-    # Read uploaded data
     df_orig = pd.read_csv(BytesIO(raw_csv))
     # Detect freeform column
     freeform_col = detect_freeform_answer_col(df_orig)
-    #Word count
     df_orig['word_count'] = df_orig[freeform_col].fillna('').str.split().str.len()
-    # Compute necessity scores
     scored = df_orig.join(df_orig[freeform_col].apply(compute_necessity))
     scored['necessity_index'] = index_scaler(scored['necessity_index'].values)
     scored['priority'] = qcut_labels(scored['necessity_index'])
-    # LangChain function for extracting usage
     docs = df_orig[freeform_col].to_list()
     usage = extract_usage(docs)
     scored['Usage'] = usage
@@ -49,7 +54,7 @@ st.title("Community Collections Helper")
 uploaded_file = st.file_uploader("Upload grant applications file for analysis", type='csv')
 if uploaded_file is not None:
-    # Read raw bytes for caching and repeated use --> this ensure all the processing isn't repeated when a user changes the filters
     raw = uploaded_file.read()
     ## ---- PROCESSED DATA (CACHED) ----
@@ -58,39 +63,42 @@ if uploaded_file is not None:
     ## ---- INTERACTIVE FILTERING & REVIEW INTERFACE ----
-    st.sidebar.title("Shortlist Mode")
     with st.sidebar:
         mode = st.segmented_control(
                 "Select one option",
                 options=["strict", "generous"],
                 default="strict",
                 )
-    st.sidebar.title("Filters")
-    min_idx = float(df['necessity_index'].min())
-    max_idx = float(df['necessity_index'].max())
-    filter_range = st.sidebar.slider(
-        "Necessity Index Range", min_value=min_idx, max_value=max_idx, value=(min_idx, max_idx)
-    )
-    filtered_df = df[df['necessity_index'].between(filter_range[0], filter_range[1])]
-    # Sidebar summary
-    st.sidebar.markdown(f"**Total Applications:** {len(df)}")
-    st.sidebar.markdown(f"**Filtered Applications:** {len(filtered_df)}")
     tab1, tab2 = st.tabs(["Shortlist Manager","Insights"])
     with tab1:
-        # Automatic Shortlisting Controls
-        st.header("✨ Automatic Shortlist")
         st.markdown("Here's your **automatically genereated shortlist!** If you'd like to manually add additional applications, you may do so on the section below!")
-        # Full scores for threshold calculation
-        scored_full = shortlist_applications(filtered_df, k=len(filtered_df))
-        quantile_map = {"strict": 0.75, "generous": 0.5}
-        threshold_score = scored_full["auto_shortlist_score"].quantile(quantile_map[mode])
-        auto_short = shortlist_applications(filtered_df, threshold=threshold_score)
         csv_auto = auto_short.to_csv(index=False).encode("utf-8")
         st.download_button(
             label="Download Shortlist",
@@ -108,7 +116,10 @@ if uploaded_file is not None:
         freeform_col_index = auto_short.columns.get_loc(freeform_col)
         st.dataframe(auto_short.iloc[:, freeform_col_index:], hide_index=True)
-        # Review applications
         st.header("🌸 Manual Filtering")
         st.markdown(
                 """
@@ -125,13 +136,13 @@ if uploaded_file is not None:
                 col3.metric("Severity", f"{int(row['severity_score'])}")
                 col4.metric("Vulnerability", f"{int(row['vulnerability_score'])}")
                 style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6)
-                # Clean usage items
                 usage_items = [item for item in row['Usage'] if item and item.lower() != 'none']
                 st.markdown("##### Excerpt")
                 st.write(row[freeform_col])
                 if usage_items:
                     st.markdown("##### Usage")
-                    # Display usage items as colored pills
                     pills_html = "".join(
                             f"<span style='display:inline-block;background-color:#E7F4FF;color:#125E9E;border-radius:20px;padding:4px 10px;margin:2px;font-size:0.95rem;'>{item}</span>"
                         for item in usage_items
@@ -140,7 +151,7 @@ if uploaded_file is not None:
                 else:
                     st.caption("*No usage found*")
                 st.write("")
-                # Shortlist checkbox
                 st.checkbox(
                     "Add to shortlist",
                     key=f"shortlist_{idx}"
@@ -159,6 +170,8 @@ if uploaded_file is not None:
             )
     with tab2:
         st.write("")

 from functions.shortlist import shortlist_applications
 import typing
+##################################
+# CACHED PROCESSING FUNCTION
+##################################
 @st.cache_data
 def load_and_process(raw_csv: bytes) -> typing.Tuple[pd.DataFrame, str]:
     Load CSV from raw bytes, detect freeform column, compute necessity scores,
     and extract usage items. Returns processed DataFrame and freeform column name.
     """
+    # Read Uploaded Data
     df_orig = pd.read_csv(BytesIO(raw_csv))
     # Detect freeform column
     freeform_col = detect_freeform_answer_col(df_orig)
+    #Word Count
     df_orig['word_count'] = df_orig[freeform_col].fillna('').str.split().str.len()
+    # Compute Necessity Scores
     scored = df_orig.join(df_orig[freeform_col].apply(compute_necessity))
     scored['necessity_index'] = index_scaler(scored['necessity_index'].values)
     scored['priority'] = qcut_labels(scored['necessity_index'])
+    # Usage Extraction
     docs = df_orig[freeform_col].to_list()
     usage = extract_usage(docs)
     scored['Usage'] = usage
 uploaded_file = st.file_uploader("Upload grant applications file for analysis", type='csv')
 if uploaded_file is not None:
+    # Read file from rawfor caching and repeated use --> this ensure all the processing isn't repeated when a user changes the filters
     raw = uploaded_file.read()
     ## ---- PROCESSED DATA (CACHED) ----
     ## ---- INTERACTIVE FILTERING & REVIEW INTERFACE ----
     with st.sidebar:
+        st.title("Shortlist Mode")
+        quantile_map = {"strict": 0.75, "generous": 0.5}
         mode = st.segmented_control(
                 "Select one option",
                 options=["strict", "generous"],
                 default="strict",
                 )
+        scored_full = shortlist_applications(df, k=len(df))
+        threshold_score = scored_full["auto_shortlist_score"].quantile(quantile_map[mode])
+        auto_short = shortlist_applications(df, threshold=threshold_score)
+        st.title("Filters")
+        min_idx = float(df['necessity_index'].min())
+        max_idx = float(df['necessity_index'].max())
+        filter_range = st.sidebar.slider(
+            "Necessity Index Range", min_value=min_idx, max_value=max_idx, value=(min_idx, max_idx)
+        )
+        filtered_df = df[df['necessity_index'].between(filter_range[0], filter_range[1])]
+        st.markdown(f"**Total Applications:** {len(df)}")
+        st.markdown(f"**Filtered Applications:** {len(filtered_df)}")
+    ## ----------------- MAIN PANEL ----------------
     tab1, tab2 = st.tabs(["Shortlist Manager","Insights"])
+    ## ---------- SHORTLIST MANAGER TAB -----------
     with tab1:
+        st.header("✨ Automatic Shortlist")
         st.markdown("Here's your **automatically genereated shortlist!** If you'd like to manually add additional applications, you may do so on the section below!")
         csv_auto = auto_short.to_csv(index=False).encode("utf-8")
         st.download_button(
             label="Download Shortlist",
         freeform_col_index = auto_short.columns.get_loc(freeform_col)
         st.dataframe(auto_short.iloc[:, freeform_col_index:], hide_index=True)
+        ## REVIEW APPLICATIONS
         st.header("🌸 Manual Filtering")
         st.markdown(
                 """
                 col3.metric("Severity", f"{int(row['severity_score'])}")
                 col4.metric("Vulnerability", f"{int(row['vulnerability_score'])}")
                 style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6)
+                # HTML for clean usage items
                 usage_items = [item for item in row['Usage'] if item and item.lower() != 'none']
                 st.markdown("##### Excerpt")
                 st.write(row[freeform_col])
                 if usage_items:
                     st.markdown("##### Usage")
                     pills_html = "".join(
                             f"<span style='display:inline-block;background-color:#E7F4FF;color:#125E9E;border-radius:20px;padding:4px 10px;margin:2px;font-size:0.95rem;'>{item}</span>"
                         for item in usage_items
                 else:
                     st.caption("*No usage found*")
                 st.write("")
                 st.checkbox(
                     "Add to shortlist",
                     key=f"shortlist_{idx}"
             )
+    ## ------------ INSIGHTS TAB -----------
     with tab2:
         st.write("")