lynn-twinkl commited on
Commit
6c1f317
·
1 Parent(s): d0ecd99

Interface changes by codex

Browse files
Files changed (1) hide show
  1. app.py +84 -25
app.py CHANGED
@@ -4,12 +4,33 @@
4
 
5
  import streamlit as st
6
  import pandas as pd
 
7
 
8
  # -- FUNCTIONS --
9
 
10
  from functions.extract_usage import extract_usage
11
  from functions.necessity_index import compute_necessity
12
  from functions.column_detection import detect_freeform_answer_col
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
  ################################
15
  # APP SCRIPT
@@ -20,34 +41,72 @@ st.title("Grant Applications Helper")
20
  uploaded_file = st.file_uploader("Upload grant applications file for analysis", type='csv')
21
 
22
  if uploaded_file is not None:
23
- df = pd.read_csv(uploaded_file)
 
24
 
 
25
  st.markdown("""
26
  ### Data Preview
27
  Here's the data you uploaded!
28
  """
29
- )
30
-
31
- st.dataframe(df)
32
-
33
- ## ------- 0. PREPROCESSING ----------
34
-
35
- freeform_col = detect_freeform_answer_col(df) # <- detects the long-form column used for processing
36
-
37
- docs = df[freeform_col].to_list()
38
-
39
-
40
- ## -------- 1. ASSIGN NECESSITY INDEX ------------
41
-
42
- df = df.join(df[freeform_col].apply(compute_necessity))
43
-
44
-
45
- ## --------- 2. EXTRACT USAGE -------------
46
-
47
- with st.spinner("Extracting usage with AI...", show_time=True):
48
- extracted_usage = extract_usage(docs)
49
-
50
- df['Usage'] = extracted_usage
51
-
52
- st.dataframe(df)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
 
4
 
5
  import streamlit as st
6
  import pandas as pd
7
+ from io import BytesIO
8
 
9
  # -- FUNCTIONS --
10
 
11
  from functions.extract_usage import extract_usage
12
  from functions.necessity_index import compute_necessity
13
  from functions.column_detection import detect_freeform_answer_col
14
+ import typing
15
+
16
+ # -- CACHEABLE PROCESSING --
17
+ @st.cache_data
18
+ def load_and_process(raw_csv: bytes) -> typing.Tuple[pd.DataFrame, str]:
19
+ """
20
+ Load CSV from raw bytes, detect freeform column, compute necessity scores,
21
+ and extract usage items. Returns processed DataFrame and freeform column name.
22
+ """
23
+ # Read original data
24
+ df_orig = pd.read_csv(BytesIO(raw_csv))
25
+ # Detect narrative column
26
+ freeform_col = detect_freeform_answer_col(df_orig)
27
+ # Compute necessity scores
28
+ scored = df_orig.join(df_orig[freeform_col].apply(compute_necessity))
29
+ # Extract usage via AI
30
+ docs = df_orig[freeform_col].to_list()
31
+ usage = extract_usage(docs)
32
+ scored['Usage'] = usage
33
+ return scored, freeform_col
34
 
35
  ################################
36
  # APP SCRIPT
 
41
  uploaded_file = st.file_uploader("Upload grant applications file for analysis", type='csv')
42
 
43
  if uploaded_file is not None:
44
+ # Read raw bytes for caching and repeated use
45
+ raw = uploaded_file.read()
46
 
47
+ # --- Original Data Preview ---
48
  st.markdown("""
49
  ### Data Preview
50
  Here's the data you uploaded!
51
  """
52
+ )
53
+ df_orig = pd.read_csv(BytesIO(raw))
54
+ st.dataframe(df_orig)
55
+
56
+ # --- Processed Data (cached): add scores & extracted usage ---
57
+ df, freeform_col = load_and_process(raw)
58
+
59
+ # -- Interactive Filtering & Review Interface --
60
+ st.sidebar.header("Filters")
61
+ # Filter by necessity index
62
+ min_idx = float(df['necessity_index'].min())
63
+ max_idx = float(df['necessity_index'].max())
64
+ filter_range = st.sidebar.slider(
65
+ "Necessity Index Range", min_value=min_idx, max_value=max_idx, value=(min_idx, max_idx)
66
+ )
67
+ filtered_df = df[df['necessity_index'].between(filter_range[0], filter_range[1])]
68
+
69
+ # Sidebar summary
70
+ st.sidebar.markdown(f"**Total Applications:** {len(df)}")
71
+ st.sidebar.markdown(f"**Filtered Applications:** {len(filtered_df)}")
72
+
73
+ # Distribution chart
74
+ st.subheader("Necessity Index Distribution")
75
+ st.bar_chart(df['necessity_index'])
76
+
77
+ # Review applications
78
+ st.subheader("Applications")
79
+ for idx, row in filtered_df.iterrows():
80
+ with st.expander(f"Application {idx} | Necessity: {row['necessity_index']:.1f}"):
81
+ col1, col2 = st.columns((1, 3))
82
+ col1.metric("Necessity", f"{row['necessity_index']:.1f}")
83
+ col1.metric("Urgency", f"{row['urgency_score']}")
84
+ col1.metric("Severity", f"{row['severity_score']}")
85
+ col1.metric("Vulnerability", f"{row['vulnerability_score']}")
86
+ # Clean usage items
87
+ usage_items = [item for item in row['Usage'] if item and item.lower() != 'none']
88
+ if usage_items:
89
+ col2.markdown("**Extracted Usage Items:**")
90
+ col2.write(", ".join(usage_items))
91
+ else:
92
+ col2.markdown("*No specific usage items extracted.*")
93
+ col2.markdown("**Excerpt:**")
94
+ col2.write(row[freeform_col])
95
+ # Shortlist checkbox
96
+ st.checkbox(
97
+ "Shortlist this application",
98
+ key=f"shortlist_{idx}"
99
+ )
100
+
101
+ # Shortlist summary and download
102
+ shortlisted = [
103
+ i for i in filtered_df.index
104
+ if st.session_state.get(f"shortlist_{i}", False)
105
+ ]
106
+ st.sidebar.markdown(f"**Shortlisted:** {len(shortlisted)}")
107
+ if shortlisted:
108
+ csv = df.loc[shortlisted].to_csv(index=False).encode('utf-8')
109
+ st.sidebar.download_button(
110
+ "Download Shortlist", csv, "shortlist.csv", "text/csv"
111
+ )
112