lynn-twinkl commited on
Commit
a5f34f7
·
1 Parent(s): 7cf41c8

Refactored by o3 for efficiency

Browse files
Files changed (1) hide show
  1. app.py +27 -9
app.py CHANGED
@@ -14,20 +14,26 @@ from functions.extract_usage import extract_usage
14
  from functions.necessity_index import compute_necessity, index_scaler, qcut_labels
15
  from functions.column_detection import detect_freeform_col
16
  from functions.shortlist import shortlist_applications
17
- import typing
18
 
19
  ##################################
20
  # CACHED PROCESSING FUNCTION
21
  ##################################
22
 
23
- @st.cache_data
24
- def load_and_process(raw_csv: bytes) -> typing.Tuple[pd.DataFrame, str]:
 
 
 
 
 
25
  """
26
  Load CSV from raw bytes, detect freeform column, compute necessity scores,
27
  and extract usage items. Returns processed DataFrame and freeform column name.
28
  """
29
  # Read Uploaded Data
30
  df_orig = pd.read_csv(BytesIO(raw_csv))
 
31
  # Detect freeform column
32
  freeform_col = detect_freeform_col(df_orig)
33
 
@@ -41,14 +47,27 @@ def load_and_process(raw_csv: bytes) -> typing.Tuple[pd.DataFrame, str]:
41
 
42
  # Usage Extraction
43
  docs = df_orig[freeform_col].to_list()
44
- usage = extract_usage(docs)
45
- scored['Usage'] = usage
46
  return scored, freeform_col
47
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  ################################
49
  # APP SCRIPT
50
  ################################
51
 
 
52
  st.title("Community Collections Helper")
53
 
54
  uploaded_file = st.file_uploader("Upload grant applications file for analysis", type='csv')
@@ -73,9 +92,9 @@ if uploaded_file is not None:
73
  default="strict",
74
  )
75
 
76
- scored_full = shortlist_applications(df, k=len(df))
77
  threshold_score = scored_full["shortlist_score"].quantile(quantile_map[mode])
78
- auto_short_df = shortlist_applications(df, threshold=threshold_score)
79
 
80
  st.title("Filters")
81
  min_idx = float(df['necessity_index'].min())
@@ -145,7 +164,6 @@ if uploaded_file is not None:
145
  col2.metric("Urgency", f"{int(row['urgency_score'])}")
146
  col3.metric("Severity", f"{int(row['severity_score'])}")
147
  col4.metric("Vulnerability", f"{int(row['vulnerability_score'])}")
148
- style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6)
149
 
150
  # HTML for clean usage items
151
  usage_items = [item for item in row['Usage'] if item and item.lower() != 'none']
@@ -187,7 +205,7 @@ if uploaded_file is not None:
187
 
188
  col1, col2, col3 = st.columns(3)
189
  col1.metric("Avg. Word Count", f"{df['word_count'].mean().round(1)}")
190
- col2.metric("Median N.I", df['necessity_index'].median())
191
  col3.metric("Total Applications", len(df))
192
  st.html("<br>")
193
 
 
14
  from functions.necessity_index import compute_necessity, index_scaler, qcut_labels
15
  from functions.column_detection import detect_freeform_col
16
  from functions.shortlist import shortlist_applications
17
+ from typing import Tuple
18
 
19
  ##################################
20
  # CACHED PROCESSING FUNCTION
21
  ##################################
22
 
23
+ # -----------------------------------------------------------------------------
24
+ # Heavy processing (IO + NLP) is cached to avoid re‑executing when the UI state
25
+ # changes. The function only re‑runs if the **file contents** change.
26
+ # -----------------------------------------------------------------------------
27
+
28
+ @st.cache_data(show_spinner=False)
29
+ def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]:
30
  """
31
  Load CSV from raw bytes, detect freeform column, compute necessity scores,
32
  and extract usage items. Returns processed DataFrame and freeform column name.
33
  """
34
  # Read Uploaded Data
35
  df_orig = pd.read_csv(BytesIO(raw_csv))
36
+
37
  # Detect freeform column
38
  freeform_col = detect_freeform_col(df_orig)
39
 
 
47
 
48
  # Usage Extraction
49
  docs = df_orig[freeform_col].to_list()
50
+ scored['Usage'] = extract_usage(docs)
51
+
52
  return scored, freeform_col
53
 
54
+ # -----------------------------------------------------------------------------
55
+ # Derivative computations that rely only on the processed DataFrame are also
56
+ # cached. These are lightweight but still benefit from caching because this
57
+ # function might be called multiple times during widget interaction.
58
+ # -----------------------------------------------------------------------------
59
+
60
+
61
+ @st.cache_data(show_spinner=False)
62
+ def compute_shortlist(df: pd.DataFrame) -> pd.DataFrame:
63
+ """Pre‑compute shortlist_score for all rows (used for both modes)."""
64
+ return shortlist_applications(df, k=len(df))
65
+
66
  ################################
67
  # APP SCRIPT
68
  ################################
69
 
70
+ style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6)
71
  st.title("Community Collections Helper")
72
 
73
  uploaded_file = st.file_uploader("Upload grant applications file for analysis", type='csv')
 
92
  default="strict",
93
  )
94
 
95
+ scored_full = compute_shortlist(df)
96
  threshold_score = scored_full["shortlist_score"].quantile(quantile_map[mode])
97
+ auto_short_df = scored_full[scored_full["shortlist_score"] >= threshold_score]
98
 
99
  st.title("Filters")
100
  min_idx = float(df['necessity_index'].min())
 
164
  col2.metric("Urgency", f"{int(row['urgency_score'])}")
165
  col3.metric("Severity", f"{int(row['severity_score'])}")
166
  col4.metric("Vulnerability", f"{int(row['vulnerability_score'])}")
 
167
 
168
  # HTML for clean usage items
169
  usage_items = [item for item in row['Usage'] if item and item.lower() != 'none']
 
205
 
206
  col1, col2, col3 = st.columns(3)
207
  col1.metric("Avg. Word Count", f"{df['word_count'].mean().round(1)}")
208
+ col2.metric("Median N.I", df['necessity_index'].median().round(2))
209
  col3.metric("Total Applications", len(df))
210
  st.html("<br>")
211