lynn-twinkl commited on
Commit ·
7d6f84e
1
Parent(s): fca167e
Implemented heartfelt prediction model
Browse files
app.py
CHANGED
|
@@ -5,7 +5,9 @@
|
|
| 5 |
import streamlit as st
|
| 6 |
import pandas as pd
|
| 7 |
import altair as alt
|
|
|
|
| 8 |
from io import BytesIO
|
|
|
|
| 9 |
from streamlit_extras.metric_cards import style_metric_cards
|
| 10 |
|
| 11 |
# ---- FUNCTIONS ----
|
|
@@ -15,6 +17,7 @@ from src.necessity_index import compute_necessity, index_scaler, qcut_labels
|
|
| 15 |
from src.column_detection import detect_freeform_col
|
| 16 |
from src.shortlist import shortlist_applications
|
| 17 |
from src.twinkl_originals import find_book_candidates
|
|
|
|
| 18 |
from typing import Tuple
|
| 19 |
|
| 20 |
##################################
|
|
@@ -26,6 +29,11 @@ from typing import Tuple
|
|
| 26 |
# changes. The function only re‑runs if the **file contents** change.
|
| 27 |
# -----------------------------------------------------------------------------
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
@st.cache_data(show_spinner=True)
|
| 30 |
def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]:
|
| 31 |
"""
|
|
@@ -51,10 +59,16 @@ def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]:
|
|
| 51 |
# Find Twinkl Originals Candidates
|
| 52 |
scored['book_candidates'] = find_book_candidates(scored, freeform_col)
|
| 53 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
# Usage Extraction
|
| 56 |
docs = df_orig[freeform_col].to_list()
|
| 57 |
-
scored['Usage'] = extract_usage(docs)
|
| 58 |
|
| 59 |
return scored, freeform_col
|
| 60 |
|
|
@@ -88,6 +102,8 @@ if uploaded_file is not None:
|
|
| 88 |
|
| 89 |
df, freeform_col = load_and_process(raw)
|
| 90 |
|
|
|
|
|
|
|
| 91 |
## ---- INTERACTIVE FILTERING & REVIEW INTERFACE ----
|
| 92 |
|
| 93 |
with st.sidebar:
|
|
|
|
| 5 |
import streamlit as st
|
| 6 |
import pandas as pd
|
| 7 |
import altair as alt
|
| 8 |
+
import joblib
|
| 9 |
from io import BytesIO
|
| 10 |
+
import os
|
| 11 |
from streamlit_extras.metric_cards import style_metric_cards
|
| 12 |
|
| 13 |
# ---- FUNCTIONS ----
|
|
|
|
| 17 |
from src.column_detection import detect_freeform_col
|
| 18 |
from src.shortlist import shortlist_applications
|
| 19 |
from src.twinkl_originals import find_book_candidates
|
| 20 |
+
from src.preprocess_text import normalise_text
|
| 21 |
from typing import Tuple
|
| 22 |
|
| 23 |
##################################
|
|
|
|
| 29 |
# changes. The function only re‑runs if the **file contents** change.
|
| 30 |
# -----------------------------------------------------------------------------
|
| 31 |
|
| 32 |
+
@st.cache_resource
|
| 33 |
+
def load_heartfelt_predictor():
|
| 34 |
+
model_path = os.path.join("src", "models", "heartfelt_pipeline.joblib")
|
| 35 |
+
return joblib.load(model_path)
|
| 36 |
+
|
| 37 |
@st.cache_data(show_spinner=True)
|
| 38 |
def load_and_process(raw_csv: bytes) -> Tuple[pd.DataFrame, str]:
|
| 39 |
"""
|
|
|
|
| 59 |
# Find Twinkl Originals Candidates
|
| 60 |
scored['book_candidates'] = find_book_candidates(scored, freeform_col)
|
| 61 |
|
| 62 |
+
# Label Heartfelt Applications
|
| 63 |
+
scored['clean_text'] = scored[freeform_col].map(normalise_text)
|
| 64 |
+
model = load_heartfelt_predictor()
|
| 65 |
+
scored['is_heartfelt'] = model.predict(scored['clean_text'].astype(str))
|
| 66 |
+
|
| 67 |
+
|
| 68 |
|
| 69 |
# Usage Extraction
|
| 70 |
docs = df_orig[freeform_col].to_list()
|
| 71 |
+
#scored['Usage'] = extract_usage(docs)
|
| 72 |
|
| 73 |
return scored, freeform_col
|
| 74 |
|
|
|
|
| 102 |
|
| 103 |
df, freeform_col = load_and_process(raw)
|
| 104 |
|
| 105 |
+
st.dataframe(df)
|
| 106 |
+
|
| 107 |
## ---- INTERACTIVE FILTERING & REVIEW INTERFACE ----
|
| 108 |
|
| 109 |
with st.sidebar:
|