lynn-twinkl commited on
Commit
1cc6aaf
·
1 Parent(s): 2d4c953

added: NI distirbution plot with px

Browse files
Files changed (1) hide show
  1. app.py +22 -27
app.py CHANGED
@@ -23,6 +23,7 @@ from src.shortlist import shortlist_applications
23
  from src.twinkl_originals import find_book_candidates
24
  from src.preprocess_text import normalise_text
25
  import src.models.topic_modeling_pipeline as topic_modeling_pipeline
 
26
  from typing import Tuple
27
 
28
  style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6)
@@ -279,7 +280,7 @@ if uploaded_file is not None:
279
  key=f"shortlist_{idx}"
280
  )
281
 
282
- ## ======== SHORTLIST SUMMARY AND DOWNLOAD (MANUAL) ======
283
  shortlisted = [
284
  i for i in filtered_df.index
285
  if st.session_state.get(f"shortlist_{i}", False)
@@ -298,6 +299,25 @@ if uploaded_file is not None:
298
 
299
  with tab2:
300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
301
  # =========== TOPIC MODELING ============
302
 
303
  ## ------- 1. Tokenize texts into sentences -------
@@ -330,29 +350,4 @@ if uploaded_file is not None:
330
  st.dataframe(topic_model.get_topic_info())
331
 
332
 
333
- st.write("")
334
-
335
- col1, col2, col3 = st.columns(3)
336
- col1.metric("Avg. Word Count", f"{df['word_count'].mean().round(1)}")
337
- col2.metric("Median N.I", df['necessity_index'].median().round(2))
338
- col3.metric("Total Applications", len(df))
339
- st.html("<br>")
340
-
341
- st.subheader("Necessity Index (NI) Distribution")
342
- st.write("")
343
- st.write("")
344
- # Histogram of necessity index colored by priority labels
345
- chart = alt.Chart(df).mark_bar().encode(
346
- x=alt.X('necessity_index:Q', bin=alt.Bin(maxbins=20), title='Necessity Index'),
347
- y='count()',
348
- color=alt.Color(
349
- 'priority:N',
350
- scale=alt.Scale(
351
- domain=['low', 'medium', 'high', 'priority'],
352
- range=['#a7d6fd', '#FFA500', '#FF5733', '#FF0000']
353
- ),
354
- legend=alt.Legend(title='Priority')
355
- )
356
- )
357
- st.altair_chart(chart, use_container_width=True)
358
- st.dataframe(df, hide_index=True)
 
23
  from src.twinkl_originals import find_book_candidates
24
  from src.preprocess_text import normalise_text
25
  import src.models.topic_modeling_pipeline as topic_modeling_pipeline
26
+ from src.plot_histogram import plot_hist
27
  from typing import Tuple
28
 
29
  style_metric_cards(box_shadow=False, border_left_color='#E7F4FF',background_color='#E7F4FF', border_size_px=0, border_radius_px=6)
 
280
  key=f"shortlist_{idx}"
281
  )
282
 
283
+ # ======== SHORTLIST SUMMARY AND DOWNLOAD (MANUAL) ======
284
  shortlisted = [
285
  i for i in filtered_df.index
286
  if st.session_state.get(f"shortlist_{i}", False)
 
299
 
300
  with tab2:
301
 
302
+ ## =========== DATA OVERVIEW ==========
303
+ st.write("")
304
+
305
+ col1, col2, col3 = st.columns(3)
306
+ col1.metric("Avg. Word Count", f"{df['word_count'].mean().round(1)}")
307
+ col2.metric("Median N.I", df['necessity_index'].median().round(2))
308
+ col3.metric("Total Applications", len(df))
309
+ st.html("<br>")
310
+
311
+ st.subheader("Necessity Index (NI) Distribution")
312
+ st.write("")
313
+ st.write("")
314
+ # Histogram of necessity index colored by priority labels
315
+ ni_distribution_plt = plot_hist(df, col_to_plot='necessity_index', bins=20)
316
+
317
+ st.plotly_chart(ni_distribution_plt)
318
+
319
+ st.dataframe(df, hide_index=True)
320
+
321
  # =========== TOPIC MODELING ============
322
 
323
  ## ------- 1. Tokenize texts into sentences -------
 
350
  st.dataframe(topic_model.get_topic_info())
351
 
352
 
353
+