penguinsfly's picture
reorganize into component files, add info, and add pca plot
fd4a87f verified
import numpy as np
import pandas as pd
import streamlit as st
from process import (
process_comparisons,
process_model_sentences
)
from components import (
render_heading,
render_loading,
render_parameters,
render_tab_info,
render_tab_tables,
render_tab_counts,
render_tab_models,
render_tab_inspect,
render_tab_project,
)
comparison_file = 'data/visualize/model-segments_vs_bill-texts.json.zst'
billtext_file = 'data/visualize/texts.json.zst'
modelseg_file = 'data/visualize/segments.json.zst'
# Heading
render_heading.render()
# Load data
df, text_df, seg_df = render_loading.render(
comparison_file,
billtext_file,
modelseg_file
)
# Set parameters
(
min_nwords,
thres_ratio,
min_high_sim_num_sents,
min_high_sim_pct_sim,
version_to_count
) = render_parameters.render()
# Process data
(
sim_df,
proj_df,
disp_sim_df,
multi_model_bills,
sim_bill_counts,
sim_state_counts,
sec_counts
) = process_comparisons(
df,
seg_df,
min_nwords,
thres_ratio,
min_high_sim_num_sents,
min_high_sim_pct_sim,
version_to_count
)
model_sent_df = process_model_sentences(
df,
seg_df,
min_nwords,
thres_ratio,
)
model_bills = model_sent_df['model'].unique()
# Construct tabs
(
tab_info,
tab_tables,
tab_counts,
tab_models,
tab_inspect,
tab_project,
) = st.tabs([
"Information",
"Tables",
"Counts",
"Models",
"Inspect",
"Projection"
])
# Render information
render_tab_info.render(tab_info)
# Render data tables of similar bills
render_tab_tables.render(
tab_tables,
disp_sim_df, multi_model_bills
)
# Render bar charts for counts
render_tab_counts.render(
tab_counts,
sim_bill_counts, sim_state_counts,
model_bills, sec_counts
)
# Render model sentences' prevalence
render_tab_models.render(
tab_models,
model_bills, model_sent_df
)
# Render tab to inspect model sentence and corresponding bill's sentences across versions
render_tab_inspect.render(
tab_inspect,
df, seg_df,
text_df, disp_sim_df,
min_nwords, thres_ratio,
)
# Projection of bills
render_tab_project.render(
tab_project,
proj_df,
)