lynn-twinkl commited on
Commit
7d6f84e
·
1 Parent(s): fca167e

Implemented heartfelt prediction model

Browse files
Files changed (1) hide show
  1. app.py +17 -1
app.py CHANGED
@@ -5,7 +5,9 @@
5
  import streamlit as st
6
  import pandas as pd
7
  import altair as alt
 
8
  from io import BytesIO
 
9
  from streamlit_extras.metric_cards import style_metric_cards
10
 
11
  # ---- FUNCTIONS ----
@@ -15,6 +17,7 @@ from src.necessity_index import compute_necessity, index_scaler, qcut_labels
15
  from src.column_detection import detect_freeform_col
16
  from src.shortlist import shortlist_applications
17
  from src.twinkl_originals import find_book_candidates
 
18
  from typing import Tuple
19
 
20
  ##################################
@@ -26,6 +29,11 @@ from typing import Tuple
26
  # changes. The function only re‑runs if the **file contents** change.
27
  # -----------------------------------------------------------------------------
28
 
 
 
 
 
 
29
  @st.cache_data(show_spinner=True)
30
  def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]:
31
  """
@@ -51,10 +59,16 @@ def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]:
51
  # Find Twinkl Originals Candidates
52
  scored['book_candidates'] = find_book_candidates(scored, freeform_col)
53
 
 
 
 
 
 
 
54
 
55
  # Usage Extraction
56
  docs = df_orig[freeform_col].to_list()
57
- scored['Usage'] = extract_usage(docs)
58
 
59
  return scored, freeform_col
60
 
@@ -88,6 +102,8 @@ if uploaded_file is not None:
88
 
89
  df, freeform_col = load_and_process(raw)
90
 
 
 
91
  ## ---- INTERACTIVE FILTERING & REVIEW INTERFACE ----
92
 
93
  with st.sidebar:
 
5
  import streamlit as st
6
  import pandas as pd
7
  import altair as alt
8
+ import joblib
9
  from io import BytesIO
10
+ import os
11
  from streamlit_extras.metric_cards import style_metric_cards
12
 
13
  # ---- FUNCTIONS ----
 
17
  from src.column_detection import detect_freeform_col
18
  from src.shortlist import shortlist_applications
19
  from src.twinkl_originals import find_book_candidates
20
+ from src.preprocess_text import normalise_text
21
  from typing import Tuple
22
 
23
  ##################################
 
29
  # changes. The function only re‑runs if the **file contents** change.
30
  # -----------------------------------------------------------------------------
31
 
32
+ @st.cache_resource
33
+ def load_heartfelt_predictor():
34
+ model_path = os.path.join("src", "models", "heartfelt_pipeline.joblib")
35
+ return joblib.load(model_path)
36
+
37
  @st.cache_data(show_spinner=True)
38
  def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]:
39
  """
 
59
  # Find Twinkl Originals Candidates
60
  scored['book_candidates'] = find_book_candidates(scored, freeform_col)
61
 
62
+ # Label Heartfelt Applications
63
+ scored['clean_text'] = scored[freeform_col].map(normalise_text)
64
+ model = load_heartfelt_predictor()
65
+ scored['is_heartfelt'] = model.predict(scored['clean_text'].astype(str))
66
+
67
+
68
 
69
  # Usage Extraction
70
  docs = df_orig[freeform_col].to_list()
71
+ #scored['Usage'] = extract_usage(docs)
72
 
73
  return scored, freeform_col
74
 
 
102
 
103
  df, freeform_col = load_and_process(raw)
104
 
105
+ st.dataframe(df)
106
+
107
  ## ---- INTERACTIVE FILTERING & REVIEW INTERFACE ----
108
 
109
  with st.sidebar: