lynn-twinkl
commited on
Commit
·
1cc6aaf
1
Parent(s):
2d4c953
added: NI distirbution plot with px
Browse files
app.py
CHANGED
|
@@ -23,6 +23,7 @@ from src.shortlist import shortlist_applications
|
|
| 23 |
from src.twinkl_originals import find_book_candidates
|
| 24 |
from src.preprocess_text import normalise_text
|
| 25 |
import src.models.topic_modeling_pipeline as topic_modeling_pipeline
|
|
|
|
| 26 |
from typing import Tuple
|
| 27 |
|
| 28 |
style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6)
|
|
@@ -279,7 +280,7 @@ if uploaded_file is not None:
|
|
| 279 |
key=f"shortlist_{idx}"
|
| 280 |
)
|
| 281 |
|
| 282 |
-
|
| 283 |
shortlisted = [
|
| 284 |
i for i in filtered_df.index
|
| 285 |
if st.session_state.get(f"shortlist_{i}", False)
|
|
@@ -298,6 +299,25 @@ if uploaded_file is not None:
|
|
| 298 |
|
| 299 |
with tab2:
|
| 300 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 301 |
# =========== TOPIC MODELING ============
|
| 302 |
|
| 303 |
## ------- 1. Tokenize texts into sentences -------
|
|
@@ -330,29 +350,4 @@ if uploaded_file is not None:
|
|
| 330 |
st.dataframe(topic_model.get_topic_info())
|
| 331 |
|
| 332 |
|
| 333 |
-
|
| 334 |
-
|
| 335 |
-
col1, col2, col3 = st.columns(3)
|
| 336 |
-
col1.metric("Avg. Word Count", f"{df['word_count'].mean().round(1)}")
|
| 337 |
-
col2.metric("Median N.I", df['necessity_index'].median().round(2))
|
| 338 |
-
col3.metric("Total Applications", len(df))
|
| 339 |
-
st.html("<br>")
|
| 340 |
-
|
| 341 |
-
st.subheader("Necessity Index (NI) Distribution")
|
| 342 |
-
st.write("")
|
| 343 |
-
st.write("")
|
| 344 |
-
# Histogram of necessity index colored by priority labels
|
| 345 |
-
chart = alt.Chart(df).mark_bar().encode(
|
| 346 |
-
x=alt.X('necessity_index:Q', bin=alt.Bin(maxbins=20), title='Necessity Index'),
|
| 347 |
-
y='count()',
|
| 348 |
-
color=alt.Color(
|
| 349 |
-
'priority:N',
|
| 350 |
-
scale=alt.Scale(
|
| 351 |
-
domain=['low', 'medium', 'high', 'priority'],
|
| 352 |
-
range=['#a7d6fd', '#FFA500', '#FF5733', '#FF0000']
|
| 353 |
-
),
|
| 354 |
-
legend=alt.Legend(title='Priority')
|
| 355 |
-
)
|
| 356 |
-
)
|
| 357 |
-
st.altair_chart(chart, use_container_width=True)
|
| 358 |
-
st.dataframe(df, hide_index=True)
|
|
|
|
| 23 |
from src.twinkl_originals import find_book_candidates
|
| 24 |
from src.preprocess_text import normalise_text
|
| 25 |
import src.models.topic_modeling_pipeline as topic_modeling_pipeline
|
| 26 |
+
from src.plot_histogram import plot_hist
|
| 27 |
from typing import Tuple
|
| 28 |
|
| 29 |
style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6)
|
|
|
|
| 280 |
key=f"shortlist_{idx}"
|
| 281 |
)
|
| 282 |
|
| 283 |
+
# ======== SHORTLIST SUMMARY AND DOWNLOAD (MANUAL) ======
|
| 284 |
shortlisted = [
|
| 285 |
i for i in filtered_df.index
|
| 286 |
if st.session_state.get(f"shortlist_{i}", False)
|
|
|
|
| 299 |
|
| 300 |
with tab2:
|
| 301 |
|
| 302 |
+
## =========== DATA OVERVIEW ==========
|
| 303 |
+
st.write("")
|
| 304 |
+
|
| 305 |
+
col1, col2, col3 = st.columns(3)
|
| 306 |
+
col1.metric("Avg. Word Count", f"{df['word_count'].mean().round(1)}")
|
| 307 |
+
col2.metric("Median N.I", df['necessity_index'].median().round(2))
|
| 308 |
+
col3.metric("Total Applications", len(df))
|
| 309 |
+
st.html("<br>")
|
| 310 |
+
|
| 311 |
+
st.subheader("Necessity Index (NI) Distribution")
|
| 312 |
+
st.write("")
|
| 313 |
+
st.write("")
|
| 314 |
+
# Histogram of necessity index colored by priority labels
|
| 315 |
+
ni_distribution_plt = plot_hist(df, col_to_plot='necessity_index', bins=20)
|
| 316 |
+
|
| 317 |
+
st.plotly_chart(ni_distribution_plt)
|
| 318 |
+
|
| 319 |
+
st.dataframe(df, hide_index=True)
|
| 320 |
+
|
| 321 |
# =========== TOPIC MODELING ============
|
| 322 |
|
| 323 |
## ------- 1. Tokenize texts into sentences -------
|
|
|
|
| 350 |
st.dataframe(topic_model.get_topic_info())
|
| 351 |
|
| 352 |
|
| 353 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|