lynn-twinkl
commited on
Commit
·
a5f34f7
1
Parent(s):
7cf41c8
Refactored by o3 for efficiency
Browse files
app.py
CHANGED
|
@@ -14,20 +14,26 @@ from functions.extract_usage import extract_usage
|
|
| 14 |
from functions.necessity_index import compute_necessity, index_scaler, qcut_labels
|
| 15 |
from functions.column_detection import detect_freeform_col
|
| 16 |
from functions.shortlist import shortlist_applications
|
| 17 |
-
import
|
| 18 |
|
| 19 |
##################################
|
| 20 |
# CACHED PROCESSING FUNCTION
|
| 21 |
##################################
|
| 22 |
|
| 23 |
-
|
| 24 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
"""
|
| 26 |
Load CSV from raw bytes, detect freeform column, compute necessity scores,
|
| 27 |
and extract usage items. Returns processed DataFrame and freeform column name.
|
| 28 |
"""
|
| 29 |
# Read Uploaded Data
|
| 30 |
df_orig = pd.read_csv(BytesIO(raw_csv))
|
|
|
|
| 31 |
# Detect freeform column
|
| 32 |
freeform_col = detect_freeform_col(df_orig)
|
| 33 |
|
|
@@ -41,14 +47,27 @@ def load_and_process(raw_csv: bytes) -> typing.Tuple[pd.DataFrame, str]:
|
|
| 41 |
|
| 42 |
# Usage Extraction
|
| 43 |
docs = df_orig[freeform_col].to_list()
|
| 44 |
-
|
| 45 |
-
|
| 46 |
return scored, freeform_col
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
################################
|
| 49 |
# APP SCRIPT
|
| 50 |
################################
|
| 51 |
|
|
|
|
| 52 |
st.title("Community Collections Helper")
|
| 53 |
|
| 54 |
uploaded_file = st.file_uploader("Upload grant applications file for analysis", type='csv')
|
|
@@ -73,9 +92,9 @@ if uploaded_file is not None:
|
|
| 73 |
default="strict",
|
| 74 |
)
|
| 75 |
|
| 76 |
-
scored_full =
|
| 77 |
threshold_score = scored_full["shortlist_score"].quantile(quantile_map[mode])
|
| 78 |
-
auto_short_df =
|
| 79 |
|
| 80 |
st.title("Filters")
|
| 81 |
min_idx = float(df['necessity_index'].min())
|
|
@@ -145,7 +164,6 @@ if uploaded_file is not None:
|
|
| 145 |
col2.metric("Urgency", f"{int(row['urgency_score'])}")
|
| 146 |
col3.metric("Severity", f"{int(row['severity_score'])}")
|
| 147 |
col4.metric("Vulnerability", f"{int(row['vulnerability_score'])}")
|
| 148 |
-
style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6)
|
| 149 |
|
| 150 |
# HTML for clean usage items
|
| 151 |
usage_items = [item for item in row['Usage'] if item and item.lower() != 'none']
|
|
@@ -187,7 +205,7 @@ if uploaded_file is not None:
|
|
| 187 |
|
| 188 |
col1, col2, col3 = st.columns(3)
|
| 189 |
col1.metric("Avg. Word Count", f"{df['word_count'].mean().round(1)}")
|
| 190 |
-
col2.metric("Median N.I", df['necessity_index'].median())
|
| 191 |
col3.metric("Total Applications", len(df))
|
| 192 |
st.html("<br>")
|
| 193 |
|
|
|
|
| 14 |
from functions.necessity_index import compute_necessity, index_scaler, qcut_labels
|
| 15 |
from functions.column_detection import detect_freeform_col
|
| 16 |
from functions.shortlist import shortlist_applications
|
| 17 |
+
from typing import Tuple
|
| 18 |
|
| 19 |
##################################
|
| 20 |
# CACHED PROCESSING FUNCTION
|
| 21 |
##################################
|
| 22 |
|
| 23 |
+
# -----------------------------------------------------------------------------
|
| 24 |
+
# Heavy processing (IO + NLP) is cached to avoid re‑executing when the UI state
|
| 25 |
+
# changes. The function only re‑runs if the **file contents** change.
|
| 26 |
+
# -----------------------------------------------------------------------------
|
| 27 |
+
|
| 28 |
+
@st.cache_data(show_spinner=False)
|
| 29 |
+
def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]:
|
| 30 |
"""
|
| 31 |
Load CSV from raw bytes, detect freeform column, compute necessity scores,
|
| 32 |
and extract usage items. Returns processed DataFrame and freeform column name.
|
| 33 |
"""
|
| 34 |
# Read Uploaded Data
|
| 35 |
df_orig = pd.read_csv(BytesIO(raw_csv))
|
| 36 |
+
|
| 37 |
# Detect freeform column
|
| 38 |
freeform_col = detect_freeform_col(df_orig)
|
| 39 |
|
|
|
|
| 47 |
|
| 48 |
# Usage Extraction
|
| 49 |
docs = df_orig[freeform_col].to_list()
|
| 50 |
+
scored['Usage'] = extract_usage(docs)
|
| 51 |
+
|
| 52 |
return scored, freeform_col
|
| 53 |
|
| 54 |
+
# -----------------------------------------------------------------------------
|
| 55 |
+
# Derivative computations that rely only on the processed DataFrame are also
|
| 56 |
+
# cached. These are lightweight but still benefit from caching because this
|
| 57 |
+
# function might be called multiple times during widget interaction.
|
| 58 |
+
# -----------------------------------------------------------------------------
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@st.cache_data(show_spinner=False)
|
| 62 |
+
def compute_shortlist(df: pd.DataFrame) -> pd.DataFrame:
|
| 63 |
+
"""Pre‑compute shortlist_score for all rows (used for both modes)."""
|
| 64 |
+
return shortlist_applications(df, k=len(df))
|
| 65 |
+
|
| 66 |
################################
|
| 67 |
# APP SCRIPT
|
| 68 |
################################
|
| 69 |
|
| 70 |
+
style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6)
|
| 71 |
st.title("Community Collections Helper")
|
| 72 |
|
| 73 |
uploaded_file = st.file_uploader("Upload grant applications file for analysis", type='csv')
|
|
|
|
| 92 |
default="strict",
|
| 93 |
)
|
| 94 |
|
| 95 |
+
scored_full = compute_shortlist(df)
|
| 96 |
threshold_score = scored_full["shortlist_score"].quantile(quantile_map[mode])
|
| 97 |
+
auto_short_df = scored_full[scored_full["shortlist_score"] >= threshold_score]
|
| 98 |
|
| 99 |
st.title("Filters")
|
| 100 |
min_idx = float(df['necessity_index'].min())
|
|
|
|
| 164 |
col2.metric("Urgency", f"{int(row['urgency_score'])}")
|
| 165 |
col3.metric("Severity", f"{int(row['severity_score'])}")
|
| 166 |
col4.metric("Vulnerability", f"{int(row['vulnerability_score'])}")
|
|
|
|
| 167 |
|
| 168 |
# HTML for clean usage items
|
| 169 |
usage_items = [item for item in row['Usage'] if item and item.lower() != 'none']
|
|
|
|
| 205 |
|
| 206 |
col1, col2, col3 = st.columns(3)
|
| 207 |
col1.metric("Avg. Word Count", f"{df['word_count'].mean().round(1)}")
|
| 208 |
+
col2.metric("Median N.I", df['necessity_index'].median().round(2))
|
| 209 |
col3.metric("Total Applications", len(df))
|
| 210 |
st.html("<br>")
|
| 211 |
|