import numpy as np import pandas as pd import streamlit as st from process import ( process_comparisons, process_model_sentences ) from components import ( render_heading, render_loading, render_parameters, render_tab_info, render_tab_tables, render_tab_counts, render_tab_models, render_tab_inspect, render_tab_project, ) comparison_file = 'data/visualize/model-segments_vs_bill-texts.json.zst' billtext_file = 'data/visualize/texts.json.zst' modelseg_file = 'data/visualize/segments.json.zst' # Heading render_heading.render() # Load data df, text_df, seg_df = render_loading.render( comparison_file, billtext_file, modelseg_file ) # Set parameters ( min_nwords, thres_ratio, min_high_sim_num_sents, min_high_sim_pct_sim, version_to_count ) = render_parameters.render() # Process data ( sim_df, proj_df, disp_sim_df, multi_model_bills, sim_bill_counts, sim_state_counts, sec_counts ) = process_comparisons( df, seg_df, min_nwords, thres_ratio, min_high_sim_num_sents, min_high_sim_pct_sim, version_to_count ) model_sent_df = process_model_sentences( df, seg_df, min_nwords, thres_ratio, ) model_bills = model_sent_df['model'].unique() # Construct tabs ( tab_info, tab_tables, tab_counts, tab_models, tab_inspect, tab_project, ) = st.tabs([ "Information", "Tables", "Counts", "Models", "Inspect", "Projection" ]) # Render information render_tab_info.render(tab_info) # Render data tables of similar bills render_tab_tables.render( tab_tables, disp_sim_df, multi_model_bills ) # Render bar charts for counts render_tab_counts.render( tab_counts, sim_bill_counts, sim_state_counts, model_bills, sec_counts ) # Render model sentences' prevalence render_tab_models.render( tab_models, model_bills, model_sent_df ) # Render tab to inspect model sentence and corresponding bill's sentences across versions render_tab_inspect.render( tab_inspect, df, seg_df, text_df, disp_sim_df, min_nwords, thres_ratio, ) # Projection of bills render_tab_project.render( tab_project, proj_df, )