# =============================================== # VAILL AI Governance Bills Tracker # =============================================== # Table of Contents (functions): # 1. get_qa_llm() # 2. get_embeddings() # 3. get_text_splitter() # 4. create_bill_documents() # 5. create_vectorstore_from_bills() # 6. compare_bills_with_rag() # 7. answer_bill_question() # 8. load_eu_ai_act_vectorstore() # 9. get_eu_vectorstore_info() # 10. compare_bill_with_eu_ai_act() # 11. load_bill_reports() # 12. get_bill_report() # 13. load_bill_summaries() # 14. load_bill_suggested_questions() # 15. get_bill_suggested_questions() # 16. get_bill_summary() # 17. load_and_process_data() # 18. load_openai_api_key() # 19. display_bill_details() # 20. get_last_updated_date() # 21. extract_iapp_subcategories() # 22. format_date() # 23. load_us_states_geojson() # 24. _bill_label() # 25. _group_to_ul() # 26. create_bill_options() # ----------------------------------------------- # Section markers: search for '==== SECTION' lines to jump around. # =============================================== # ==== SECTION: Original file begins below (unchanged) ==== #!/usr/bin/env python3 # scripts/app.py """ Streamlit visualization for the AI Governance Bills Tracker. Displays an interactive dashboard of AI-related bills from known_bills_visualize.json, including a table, map, filters, Q&A, plan comparison, summary generation, and CSV download functionality. """ import streamlit as st import pandas as pd import time from streamlit_folium import st_folium import folium import json from pathlib import Path import os import dotenv import io import logging from datetime import datetime, date, timedelta from constants import IAPP_CATEGORIES import requests import html from langchain_openai import ChatOpenAI from langchain.prompts import ChatPromptTemplate from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_openai import OpenAIEmbeddings from langchain_community.vectorstores import FAISS from langchain.schema import Document from langchain.chains import create_retrieval_chain from langchain.chains.combine_documents import create_stuff_documents_chain import pickle import plotly.express as px dotenv.load_dotenv() # Page configuration st.set_page_config( layout="wide", page_title="VAILL AI Governance Legislation Tracker", page_icon="⚖️" ) # Custom CSS for clean, section-based layout st.markdown(""" """, unsafe_allow_html=True) # Hero Section (fixed HTML) st.markdown("""

Tracking and Analyzing State-Level AI Governance Legislation

A resource from the Vanderbilt AI Law Lab (VAILL) to help policymakers, researchers, and the public stay informed about the evolving landscape of AI regulation in the United States.

""", unsafe_allow_html=True) # What is the Tracker Section (fixed HTML) st.markdown("""

What is the AI Governance Legislation Tracker?

This tracker is a centralized, user-friendly platform for monitoring artificial intelligence (AI) governance legislation across the United States. As AI technology rapidly advances, it's becoming increasingly important to understand how different states are approaching its regulation. This tool aims to simplify the process of finding and comparing various state-level AI governance bills, their current statuses, and their key provisions.

""", unsafe_allow_html=True) # Tool descriptions TOOL_DESCRIPTIONS = { "bills_table": { "name": "Bills Explorer", "description": "Navigate and filter AI governance legislation with powerful search tools. View details on bill numbers, titles, status, scope, and last action dates in an easy-to-read table format. Export your filtered results as CSV for further analysis." }, "bills_map": { "name": "Geospatial Insights", "description": "Visualize the geographic distribution of AI governance bills across states with an interactive map. Circle size represents bill volume, helping you identify legislative hotspots and regional trends at a glance." }, "state_status_viz": { "name": "State & Status Analysis", "description": "Interactive visualizations showing the distribution of bills across states and their current legislative status. Identify which states are most active and track bill progression through the legislative process." }, "category_viz": { "name": "Category Analysis", "description": "Explore how AI governance bills are distributed across different regulatory categories. Understand the focus areas of legislation and identify emerging trends in AI regulation." }, "temporal_viz": { "name": "Temporal Analysis", "description": "Track the evolution of AI governance legislation over time. Visualize trends, identify peak activity periods, and understand how legislative focus has shifted across different time periods." }, "ai_toolkit": { "name": "AI Analysis Toolkit", "description": "Comprehensive AI-powered analysis suite for legislative research. Choose from multiple analysis types: Q&A for specific bill insights, comparative analysis across multiple bills, executive summary generation, and EU AI Act comparisons." } } # AI Toolkit Analysis Types AI_ANALYSIS_TYPES = { "qa": { "name": "Legislative Q&A", "description": "Get instant answers to your questions about specific AI governance bills. Simply select a bill and ask any question to receive AI-powered insights based on the bill's text and analysis." }, "comparison": { "name": "Bill Comparison", "description": "Analyze how AI governance approaches differ across multiple bills. Select a focus bill and comparison bills to identify similarities, differences, and unique approaches to regulation on specific topics." }, "summary": { "name": "Legislative Report", "description": "Review comprehensive reports of selected bills with AI assistance. Reports cover key aspects including scope, enforcement mechanisms, and potential impacts, downloadable as markdown." }, "eu_comparison": { "name": "EU AI Act Comparison", "description": "Compare US AI governance bills against the EU AI Act. Analyze similarities, differences, and regulatory approaches between US legislation and the comprehensive EU framework." } } # ==== OPENAI SETUP (HF-FRIENDLY) ==== def load_openai_api_key(): """ Load OpenAI API key in a way that works both locally and on Hugging Face Spaces. Priority: 1. Environment variables: OPENAI_API_KEY, openai_api_key, OPENAI_API_TOKEN 2. Streamlit secrets: st.secrets["OPENAI_API_KEY"] / ["openai_api_key"] 3. Fallback to manual input (for local debugging). """ possible_env_keys = ["OPENAI_API_KEY", "openai_api_key", "OPENAI_API_TOKEN"] api_key = None # 1. Env vars (HF Spaces secrets are exposed this way) for k in possible_env_keys: if os.environ.get(k): api_key = os.environ.get(k) break # 2. Streamlit secrets (useful on Streamlit Cloud or local secrets.toml) if not api_key: try: if "OPENAI_API_KEY" in st.secrets: api_key = st.secrets["OPENAI_API_KEY"] elif "openai_api_key" in st.secrets: api_key = st.secrets["openai_api_key"] except Exception: # st.secrets may not be configured; ignore pass if api_key: # Ensure downstream libraries see the standard env var os.environ["OPENAI_API_KEY"] = api_key return api_key # 3. Last resort: manual input (local only – not recommended for public HF Space) st.warning( "OpenAI API key not found in environment variables. " "On Hugging Face Spaces, set a secret named `OPENAI_API_KEY` " "in the Space settings." ) api_key = st.text_input( "Enter your OpenAI API key (for local testing only):", type="password", ) if api_key: os.environ["OPENAI_API_KEY"] = api_key return api_key st.error("Please provide an OpenAI API key to continue.") st.stop() # Load the key once at startup openai_api_key = load_openai_api_key() @st.cache_resource def get_qa_llm(): """Initialize and cache the ChatOpenAI instance for Q&A.""" if ChatOpenAI is None: raise RuntimeError("langchain_openai package required. Install with: pip install langchain langchain_openai") return ChatOpenAI( api_key=openai_api_key, model="gpt-4o", temperature=0 ) @st.cache_resource def get_embeddings(): """Initialize and cache the OpenAI embeddings.""" return OpenAIEmbeddings( api_key=openai_api_key, model="text-embedding-3-small" # More cost-effective than text-embedding-3-large ) @st.cache_resource def get_text_splitter(): """Initialize and cache the text splitter for chunking documents.""" return RecursiveCharacterTextSplitter( chunk_size=1000, # Reasonable chunk size for embeddings chunk_overlap=100, # Some overlap to maintain context length_function=len, separators=["\n\n", "\n", ". ", " ", ""] ) def create_bill_documents(bill_data_list): """Convert bill data to Document objects with metadata.""" documents = [] for bill_data in bill_data_list: # Combine relevant text fields from the bill text_content = "" # Add title and basic info if bill_data.get('title'): text_content += f"Title: {bill_data['title']}\n\n" if bill_data.get('summary'): text_content += f"Summary: {bill_data['summary']}\n\n" # Add full text if available if bill_data.get('full_text'): text_content += f"Full Text:\n{bill_data['full_text']}\n\n" elif bill_data.get('bill_text'): text_content += f"Bill Text:\n{bill_data['bill_text']}\n\n" # Add other relevant fields if bill_data.get('description'): text_content += f"Description: {bill_data['description']}\n\n" # Create metadata metadata = { 'bill_id': bill_data.get('bill_id', 'Unknown'), 'state': bill_data.get('state', 'Unknown'), 'bill_number': bill_data.get('bill_number', 'Unknown'), 'title': bill_data.get('title', 'Unknown'), 'url': bill_data.get('url', 'Unknown'), 'status': bill_data.get('status', 'Unknown'), 'sponsors': str(bill_data.get('sponsors', 'Unknown')), 'last_action_date': str(bill_data.get('last_action_date', 'Unknown')) } # Create document doc = Document( page_content=text_content.strip(), metadata=metadata ) documents.append(doc) return documents def create_vectorstore_from_bills(bill_data_list): """Create a FAISS vectorstore from bill data.""" try: # Get embeddings and text splitter embeddings = get_embeddings() text_splitter = get_text_splitter() # Create documents documents = create_bill_documents(bill_data_list) if not documents: raise ValueError("No documents created from bill data") # Split documents into chunks splits = text_splitter.split_documents(documents) if not splits: raise ValueError("No text chunks created from documents") # Create vectorstore vectorstore = FAISS.from_documents(splits, embeddings) return vectorstore except Exception as e: st.error(f"Error creating vectorstore: {e}") raise e def compare_bills_with_rag(focus_bill_data, comparison_bills_data, question): """Compare bills using modern LCEL RAG approach with FAISS vectorstore.""" try: # Combine all bills for the vectorstore all_bills = [focus_bill_data] + comparison_bills_data # Create vectorstore vectorstore = create_vectorstore_from_bills(all_bills) # Create retriever retriever = vectorstore.as_retriever( search_type="similarity", search_kwargs={"k": 6} # Retrieve top 6 most relevant chunks ) # Format bill information for the prompt focus_bill_info = f"{focus_bill_data.get('state', 'Unknown')} {focus_bill_data.get('bill_number', 'Unknown')}: {focus_bill_data.get('title', 'Unknown')}" comparison_bills_info = [] for bill in comparison_bills_data: bill_info = f"{bill.get('state', 'Unknown')} {bill.get('bill_number', 'Unknown')}: {bill.get('title', 'Unknown')}" comparison_bills_info.append(bill_info) comparison_bills_info_str = "; ".join(comparison_bills_info) # Create the comparison prompt template using ChatPromptTemplate comparison_prompt = ChatPromptTemplate.from_template("""You are a legislative analyst expert at comparing AI governance bills. You have been provided with relevant excerpts from multiple bills to answer a comparison question. IMPORTANT BILL INFORMATION: - Focus Bill: {focus_bill_info} - Comparison Bills: {comparison_bills_info} Your task is to compare these bills based on the user's question, using the relevant excerpts provided below. Guidelines for your analysis: - Clearly identify which bill each piece of information comes from - Highlight similarities and differences between the bills - Be specific about provisions, definitions, and requirements - If information is missing from the excerpts, state that clearly - Structure your response with clear sections for each bill - Conclude with a summary of key similarities and differences - Always include the legiscan link to the bills in your response - Format your answer as markdown Relevant excerpts from the bills: {context} User Question: {input} Please provide a comprehensive comparison analysis based on the excerpts above. """) # Get LLM llm = get_qa_llm() # Create the document chain document_chain = create_stuff_documents_chain(llm, comparison_prompt) # Create the retrieval chain retrieval_chain = create_retrieval_chain(retriever, document_chain) # Run the query with bill information result = retrieval_chain.invoke({ "input": question, "focus_bill_info": focus_bill_info, "comparison_bills_info": comparison_bills_info_str }) # Extract answer and sources answer = result.get("answer", "No answer generated") source_docs = result.get("context", []) # Add source information to the answer if source_docs: answer += "\n\n---\n\n**Sources used in this analysis:**\n\n" seen_bills = set() for doc in source_docs: bill_id = f"{doc.metadata.get('state', 'Unknown')} {doc.metadata.get('bill_number', 'Unknown')}" if bill_id not in seen_bills: seen_bills.add(bill_id) answer += f"- {bill_id}: {doc.metadata.get('title', 'Unknown')}\n" return answer except Exception as e: st.error(f"Error in RAG comparison: {e}") return f"Error during comparison analysis: {str(e)}" def answer_bill_question(bill_data: dict, question: str) -> str: """Answer a question about a specific bill using LangChain.""" try: llm = get_qa_llm() # Create the prompt template qa_prompt = ChatPromptTemplate.from_template( """You are a legislative analyst expert at interpreting AI governance bills. A user has asked a question about a specific bill. Use the bill information provided as JSON to answer their question accurately and comprehensively. Guidelines: - Answer based only on the information provided in the bill JSON - Be specific and cite relevant sections when possible - If the information isn't available in the bill, clearly state that - Keep your answer focused and relevant to the question - Use clear, accessible language - Always include the legiscan link to the bill in your answer - Format your answer as markdown Bill JSON: ```json {bill_json} ``` User Question: {question} Please provide a detailed answer based on the bill information above. """ ) # Convert timestamps and other non-serializable objects to strings serializable_bill_data = {} for key, value in bill_data.items(): try: # Handle None/NaN values if value is None: serializable_bill_data[key] = None elif isinstance(value, (int, float)) and pd.isna(value): serializable_bill_data[key] = None elif hasattr(value, 'strftime'): # Handle datetime/timestamp objects serializable_bill_data[key] = value.strftime('%Y-%m-%d') elif isinstance(value, (list, dict, str, int, float, bool)): # These types are JSON serializable serializable_bill_data[key] = value else: # Convert anything else to string serializable_bill_data[key] = str(value) except Exception: # Fallback: convert to string or None serializable_bill_data[key] = str(value) if value is not None else None # Convert bill data to JSON string bill_json = json.dumps(serializable_bill_data, ensure_ascii=False, indent=2) # Create chain and invoke chain = qa_prompt | llm result = chain.invoke({ "bill_json": bill_json, "question": question }) # Extract content from result answer = getattr(result, "content", str(result)) return answer except Exception as e: st.error(f"Error in Q&A: {e}") return f"Error processing question: {str(e)}" @st.cache_resource def load_eu_ai_act_vectorstore(): """Load the EU AI Act vectorstore from disk.""" vectorstore_path = "data/eu_ai_act_vectorstore" try: if not Path(vectorstore_path).exists(): st.warning(f"EU AI Act vectorstore not found at {vectorstore_path}") return None, "Vectorstore not found. Please run the EU AI Act processing script first." # Initialize embeddings embeddings = get_embeddings() # Load vectorstore vectorstore = FAISS.load_local( vectorstore_path, embeddings, allow_dangerous_deserialization=True ) print(f"✅ EU AI Act vectorstore loaded successfully") return vectorstore, None except Exception as e: error_msg = f"Error loading EU AI Act vectorstore: {str(e)}" st.error(error_msg) return None, error_msg @st.cache_data def get_eu_vectorstore_info(): """Get information about the EU AI Act vectorstore.""" try: metadata_path = Path("data/eu_ai_act_vectorstore") / "metadata.pickle" if metadata_path.exists(): with open(metadata_path, 'rb') as f: metadata = pickle.load(f) return metadata else: return {"error": "Metadata not found"} except Exception as e: return {"error": str(e)} # Add this function before the existing comparison functions def compare_bill_with_eu_ai_act(bill_data, question): """Compare a US bill with the EU AI Act using RAG approach.""" try: # Load EU AI Act vectorstore eu_vectorstore, error = load_eu_ai_act_vectorstore() if eu_vectorstore is None: return f"Error loading EU AI Act data: {error}" # Create US bill vectorstore us_vectorstore = create_vectorstore_from_bills([bill_data]) # Create retrievers eu_retriever = eu_vectorstore.as_retriever( search_type="similarity", search_kwargs={"k": 4} # Get top 4 relevant EU sections ) us_retriever = us_vectorstore.as_retriever( search_type="similarity", search_kwargs={"k": 3} # Get top 3 relevant US bill sections ) # Get relevant documents from both sources eu_docs = eu_retriever.get_relevant_documents(question) us_docs = us_retriever.get_relevant_documents(question) # Format bill information bill_info = f"{bill_data.get('state', 'Unknown')} {bill_data.get('bill_number', 'Unknown')}: {bill_data.get('title', 'Unknown')}" # Create the comparison prompt comparison_prompt = ChatPromptTemplate.from_template("""You are a legal analyst expert at comparing AI governance frameworks between the US and EU. You have been provided with relevant excerpts from a US bill and the EU AI Act to answer a comparison question. IMPORTANT CONTEXT: - US Bill: {bill_info} - EU Framework: Regulation (EU) 2024/1689 on Artificial Intelligence (AI Act) Your task is to compare these regulatory frameworks based on the user's question, using the relevant excerpts provided below. Guidelines for your analysis: - Clearly distinguish between US and EU approaches - Highlight similarities and differences in regulatory philosophy - Be specific about provisions, definitions, and requirements from each framework - Note any gaps or areas not covered by either framework - Consider the different legal systems and enforcement mechanisms - Structure your response with clear sections for each framework - Conclude with a summary of key similarities, differences, and implications - Always include the legiscan link to the US bill in your response - Format your answer as markdown Relevant excerpts from the US Bill: {us_context} Relevant excerpts from the EU AI Act: {eu_context} User Question: {input} Please provide a comprehensive comparison analysis based on the excerpts above. """) # Prepare context us_context = "\n\n".join([doc.page_content for doc in us_docs]) eu_context = "\n\n".join([doc.page_content for doc in eu_docs]) # Get LLM and create chain llm = get_qa_llm() chain = comparison_prompt | llm # Run the comparison result = chain.invoke({ "input": question, "bill_info": bill_info, "us_context": us_context, "eu_context": eu_context }) # Extract content from result answer = getattr(result, "content", str(result)) # Add source information answer += "\n\n---\n\n**Sources used in this analysis:**\n\n" answer += f"**US Bill:** {bill_info}\n" answer += f"**EU Framework:** EU AI Act (Regulation 2024/1689)\n" return answer except Exception as e: st.error(f"Error in EU comparison: {e}") return f"Error during EU comparison analysis: {str(e)}" @st.cache_data def load_bill_reports() -> dict: """Load pre-generated bill reports from JSON file.""" reports_file = Path("data/bill_reports.json") try: if reports_file.exists(): with open(reports_file, 'r', encoding='utf-8') as f: reports_data = json.load(f) # Convert to dict with bill_id as key reports = {report['bill_id']: report['report_markdown'] for report in reports_data} print(f"Loaded {len(reports)} pre-generated reports") return reports else: st.warning(f"Reports file not found: {reports_file}") return {} except Exception as e: st.error(f"Error loading reports: {e}") return {} def get_bill_report(bill_data, reports_cache): """Get report for a bill from cache or return message.""" bill_id = str(bill_data.get('bill_id', '')) if bill_id in reports_cache: return reports_cache[bill_id] else: return "No pre-generated report available for this bill." reports_cache = load_bill_reports() @st.cache_data def load_bill_summaries() -> dict: """Load pre-generated bill summaries from JSON file.""" summaries_file = Path("data/bill_summaries.json") try: if summaries_file.exists(): with open(summaries_file, 'r', encoding='utf-8') as f: summaries = json.load(f) print(f"Loaded {len(summaries)} pre-generated summaries") return summaries else: st.warning(f"Summaries file not found: {summaries_file}") return {} except Exception as e: st.error(f"Error loading summaries: {e}") return {} @st.cache_data def load_bill_suggested_questions() -> dict: """Load pre-generated suggested questions from JSON file.""" questions_file = Path("data/bill_suggested_questions.json") try: if questions_file.exists(): with open(questions_file, 'r', encoding='utf-8') as f: questions = json.load(f) print(f"Loaded {len(questions)} pre-generated question sets") return questions else: st.warning(f"Questions file not found: {questions_file}") return {} except Exception as e: st.error(f"Error loading questions: {e}") return {} def get_bill_suggested_questions(bill_data, questions_cache): """Get suggested questions for a bill from cache or return fallback.""" bill_key = f"{bill_data.get('state', 'Unknown')}_{bill_data.get('bill_number', 'Unknown')}" if bill_key in questions_cache: questions = questions_cache[bill_key].get('suggested_questions', []) if len(questions) == 5: return questions # Fallback to static example questions return [ "What are the key definitions in this bill?", "What are the enforcement mechanisms?", "Who does this bill apply to?", "What are the compliance requirements?", "What penalties are specified?" ] def get_bill_summary(bill_data, summaries_cache): """Get summary for a bill from cache or return error message.""" bill_key = f"{bill_data.get('state', 'Unknown')}_{bill_data.get('bill_number', 'Unknown')}" if bill_key in summaries_cache: summary = summaries_cache[bill_key].get('summary', '') if summary.startswith('ERROR:'): return f"Summary generation failed: {summary}" return summary else: return "No pre-generated summary available. Run the summary generation script first." @st.cache_data(ttl=3600) def load_from_huggingface(): repo = os.getenv("dataset_repo") token = os.getenv("HUGGINGFACE_HUB_TOKEN") if not repo: st.warning("dataset_repo is missing.") return None url = f"https://huggingface.co/datasets/{repo}/resolve/main/known_bills_visualize.json" headers = {} if token: headers["Authorization"] = f"Bearer {token}" try: response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() data = response.json() st.success(f"Loaded {len(data)} bills from HuggingFace") return data except Exception as e: st.warning(f"HF load failed: {str(e)}") return None def load_and_process_data() -> pd.DataFrame: """Load data from HuggingFace first, fall back to local""" start_time = time.time() # Strategy 1: Try HuggingFace first bills_data = load_from_huggingface() # Strategy 2: Fall back to local file if HuggingFace failed if bills_data is None: st.info("Loading from local file...") json_path = Path("data/known_bills_visualize.json") if not json_path.exists(): st.error("No data available") return None try: with json_path.open("r", encoding='utf-8') as f: bills_data = json.load(f) st.info(f"Loaded {len(bills_data)} bills from local") except Exception as e: st.error(f"Error loading local: {e}") return None # Convert to DataFrame if bills_data is None: return None try: df = pd.DataFrame(bills_data) # Convert dates if "last_action_date" in df.columns: df["last_action_date"] = pd.to_datetime( df["last_action_date"], errors="coerce" ) if "lastUpdatedAt" in df.columns: df["lastUpdatedAt"] = pd.to_datetime( df["lastUpdatedAt"], errors="coerce" ) print(f"DataFrame created in {time.time() - start_time:.2f} seconds") return df except Exception as e: st.error(f"Error processing data: {e}") return None def display_bill_details(bill_data, summaries_cache): """Display bill details and summary in a formatted way.""" st.markdown("#### Bill Details") # Create columns for better layout col1, col2 = st.columns(2) with col1: st.write(f"**State:** {bill_data.get('state', 'N/A')}") st.write(f"**Bill Number:** {bill_data.get('bill_number', 'N/A')}") st.write(f"**Status:** {bill_data.get('status', 'N/A')}") # Format last action date if 'last_action_date' in bill_data and pd.notna(bill_data['last_action_date']): if isinstance(bill_data['last_action_date'], str): st.write(f"**Last Action Date:** {bill_data['last_action_date']}") else: st.write(f"**Last Action Date:** {bill_data['last_action_date'].strftime('%Y-%m-%d')}") else: st.write(f"**Last Action Date:** N/A") with col2: # Extract IAPP categories for display if 'iapp_categories' in bill_data and isinstance(bill_data['iapp_categories'], dict): all_subcategories = [] for category, subcategories in bill_data['iapp_categories'].items(): if isinstance(subcategories, list): all_subcategories.extend(subcategories) if all_subcategories: iapp_display = ", ".join(all_subcategories[:3]) # Show first 3 if len(all_subcategories) > 3: iapp_display += f" + {len(all_subcategories) - 3} more" else: iapp_display = "None" else: iapp_display = "N/A" st.write(f"**Categories:** {iapp_display}") # Show sponsors if available sponsors = bill_data.get('sponsors', 'N/A') if isinstance(sponsors, list): sponsors_display = ", ".join(sponsors[:2]) # Show first 2 sponsors if len(sponsors) > 2: sponsors_display += f" + {len(sponsors) - 2} more" else: sponsors_display = str(sponsors) if sponsors else "N/A" st.write(f"**Sponsors:** {sponsors_display}") # Show full title st.write(f"**Title:** {bill_data.get('title', 'N/A')}") # Display pre-generated summary st.markdown("#### Bill Summary") summary = get_bill_summary(bill_data, summaries_cache) if summary.startswith('Summary generation failed') or summary.startswith('No pre-generated summary'): st.warning(summary) else: st.info(summary) df = load_and_process_data() summaries_cache = load_bill_summaries() questions_cache = load_bill_suggested_questions() if df is None: st.write("No data available. Ensure 'known_bills_visualize.json' is populated (or HF dataset + token configured).") st.stop() # Sidebar for filters with improved styling with st.sidebar: # Add logo to the sidebar logo_path = "vaill_logo.png" try: st.image(logo_path, width="stretch") except FileNotFoundError: st.warning("Logo image 'vaill_logo.png' not found.") st.markdown("### Filter Controls") # Date Filter Section date_df = ( df.dropna(subset=["last_action_date"]) if "last_action_date" in df.columns else pd.DataFrame() ) if not date_df.empty: current_date = datetime.now().date() df_dates = df[df["last_action_date"].notna()]["last_action_date"] min_year = df_dates.min().year max_year = min(df_dates.max().year, current_date.year) st.markdown("#### Date Range") filter_type = st.radio( "Filter by:", options=["No Date Filter", "Year Only", "Year & Month"], index=0, help="Choose how to filter bills by their last action date" ) # Initialize filtered_df filtered_df = df.copy() if filter_type == "Year Only": available_years = sorted(df_dates.dt.year.unique()) available_years = [year for year in available_years if year <= current_date.year] if available_years: selected_years = st.multiselect( "Select Years:", options=available_years, default=[max(available_years)], help="Select one or more years to filter bills" ) if selected_years: mask = df["last_action_date"].dt.year.isin(selected_years) filtered_df = df[mask].copy() if len(selected_years) == 1: year_range = f"{selected_years[0]}" else: year_range = f"{min(selected_years)}-{max(selected_years)}" st.success(f"Filtering: {year_range}") else: st.warning("No years available for filtering") elif filter_type == "Year & Month": available_years = list(range(min_year, max_year + 1)) col1, col2 = st.columns(2) with col1: start_year = st.selectbox( "From Year:", options=available_years, index=max(0, len(available_years) - 2) if len(available_years) > 1 else 0, key="start_year_select" ) with col2: end_year_options = list(range(start_year, max_year + 1)) end_year = st.selectbox( "To Year:", options=end_year_options, index=len(end_year_options) - 1, key="end_year_select" ) months = { 1: "January", 2: "February", 3: "March", 4: "April", 5: "May", 6: "June", 7: "July", 8: "August", 9: "September", 10: "October", 11: "November", 12: "December" } col3, col4 = st.columns(2) with col3: start_month = st.selectbox( "From Month:", options=list(months.keys()), format_func=lambda x: months[x], index=0, key="start_month_select" ) with col4: max_end_month = 12 if end_year == current_date.year: max_end_month = current_date.month end_month_options = list(range(1, max_end_month + 1)) if end_month_options: end_month = st.selectbox( "To Month:", options=end_month_options, format_func=lambda x: months[x], index=len(end_month_options) - 1, key="end_month_select" ) else: end_month = 1 st.warning("Invalid month range") try: start_date = date(start_year, start_month, 1) if end_month == 12: last_day = date(end_year + 1, 1, 1) - timedelta(days=1) else: last_day = date(end_year, end_month + 1, 1) - timedelta(days=1) end_date = min(last_day, current_date) if start_date <= end_date: mask = (df["last_action_date"].dt.date >= start_date) & ( df["last_action_date"].dt.date <= end_date ) filtered_df = df[mask].copy() st.success(f"Filtering: {start_date.strftime('%b %Y')} - {end_date.strftime('%b %Y')}") else: st.error("Start date must be before end date") except ValueError as e: st.error(f"Invalid date range: {e}") if filter_type != "No Date Filter" and not filtered_df.empty: date_stats = filtered_df["last_action_date"].dropna() if not date_stats.empty: st.info(f"{len(date_stats)} bills with dates in range") else: filtered_df = df.copy() st.warning("No date information available for filtering") st.markdown("#### Bill Type") bill_type_filter = st.radio( "Show bills:", options=["All Bills", "State Bills Only", "Federal Bills Only"], index=0 ) # Apply the filter if bill_type_filter == "State Bills Only": filtered_df = filtered_df[filtered_df["state"] != "US"] elif bill_type_filter == "Federal Bills Only": filtered_df = filtered_df[filtered_df["state"] == "US"] # IAPP Categories Filter if "iapp_categories" in filtered_df.columns: st.markdown("#### Categories") all_iapp_categories = set() filtered_df["iapp_categories"].apply( lambda x: all_iapp_categories.update(x.keys()) if isinstance(x, dict) else None ) if all_iapp_categories: for category in sorted(all_iapp_categories): if category in IAPP_CATEGORIES: all_subcategories = set() filtered_df["iapp_categories"].apply( lambda x: all_subcategories.update(x.get(category, [])) if isinstance(x, dict) and category in x else None ) if all_subcategories: subcategory_options = sorted(all_subcategories) selected_subcategories = st.multiselect( f"{category}", options=subcategory_options, default=[], key=f"iapp_{category.lower().replace(' ', '_')}" ) if selected_subcategories: filtered_df = filtered_df[ filtered_df["iapp_categories"].apply( lambda x: ( any(subcat in x.get(category, []) for subcat in selected_subcategories) if isinstance(x, dict) and category in x else False ) ) ] # Main content with tab-based layout (tab1, tab2, tab3, tab4, tab5, tab6) = st.tabs([ TOOL_DESCRIPTIONS["bills_table"]["name"], TOOL_DESCRIPTIONS["bills_map"]["name"], TOOL_DESCRIPTIONS["state_status_viz"]["name"], TOOL_DESCRIPTIONS["category_viz"]["name"], TOOL_DESCRIPTIONS["temporal_viz"]["name"], TOOL_DESCRIPTIONS["ai_toolkit"]["name"] ]) # TAB 1: BILLS EXPLORER with tab1: st.markdown(f'

{TOOL_DESCRIPTIONS["bills_table"]["description"]}

', unsafe_allow_html=True) if filtered_df.empty: st.warning("No bills match the selected filters.") else: # Separate federal and state bills federal_bills = filtered_df[filtered_df["state"] == "US"] state_bills = filtered_df[filtered_df["state"] != "US"] # Helper function for last updated date def get_last_updated_date(): if "lastUpdatedAt" in filtered_df.columns and not filtered_df.empty: valid_dates = filtered_df[filtered_df["lastUpdatedAt"].notna()] if not valid_dates.empty: most_recent = valid_dates["lastUpdatedAt"].max() return most_recent.strftime("%Y-%m-%d") if pd.notna(most_recent) else "N/A" return "N/A" # Metrics section st.markdown('

', unsafe_allow_html=True) st.markdown('

Database Overview

Current statistics for filtered bill dataset

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) if bill_type_filter == "Federal Bills Only": col1, col2, col3 = st.columns(3) with col1: st.markdown('

{}

Federal Bills

'.format(len(federal_bills)), unsafe_allow_html=True) with col2: current_year = datetime.now().year this_year_bills = len(filtered_df[filtered_df["last_action_date"].dt.year == current_year]) if "last_action_date" in filtered_df.columns else 0 st.markdown('

{}

Bills This Year

'.format(this_year_bills), unsafe_allow_html=True) with col3: st.markdown('

{}

Last Updated

'.format(get_last_updated_date()), unsafe_allow_html=True) elif bill_type_filter == "State Bills Only": col1, col2, col3 = st.columns(3) with col1: st.markdown('

{}

State Bills

'.format(len(state_bills)), unsafe_allow_html=True) with col2: if not state_bills.empty: state_counts = state_bills["state"].value_counts() most_active = state_counts.index[0] if not state_counts.empty else "N/A" count = state_counts.iloc[0] if not state_counts.empty else 0 display_text = f"{most_active} ({count})" if most_active != "N/A" else "N/A" st.markdown('

{}

Most Active State

'.format(display_text), unsafe_allow_html=True) else: st.markdown('

N/A

Most Active State

', unsafe_allow_html=True) with col3: st.markdown('

{}

Last Updated

'.format(get_last_updated_date()), unsafe_allow_html=True) else: # All Bills col1, col2, col3, col4 = st.columns(4) with col1: st.markdown('

{}

Federal Bills

'.format(len(federal_bills)), unsafe_allow_html=True) with col2: st.markdown('

{}

State Bills

'.format(len(state_bills)), unsafe_allow_html=True) with col3: if not state_bills.empty: state_counts = state_bills["state"].value_counts() most_active = state_counts.index[0] if not state_counts.empty else "N/A" count = state_counts.iloc[0] if not state_counts.empty else 0 display_text = f"{most_active} ({count})" if most_active != "N/A" else "N/A" st.markdown('

{}

Most Active State

'.format(display_text), unsafe_allow_html=True) else: st.markdown('

N/A

Most Active State

', unsafe_allow_html=True) with col4: st.markdown('

{}

Last Updated

'.format(get_last_updated_date()), unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) # Bills table section st.markdown('

', unsafe_allow_html=True) st.markdown('

Legislation Database

Comprehensive listing of AI governance legislation

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) if bill_type_filter == "State Bills Only": st.write(f"Showing {len(filtered_df)} state bills") elif bill_type_filter == "Federal Bills Only": st.write(f"Showing {len(filtered_df)} federal bills") else: federal_count = len(filtered_df[filtered_df["state"] == "US"]) state_count = len(filtered_df[filtered_df["state"] != "US"]) st.write(f"Showing {len(filtered_df)} bills ({federal_count} federal, {state_count} state)") display_df = filtered_df.copy() # Process IAPP Categories for display def extract_iapp_subcategories(iapp_categories): """Extract all subcategories from IAPP categories dict and format for display.""" if not isinstance(iapp_categories, dict) or not iapp_categories: return "None" all_subcategories = [] for category, subcategories in iapp_categories.items(): if isinstance(subcategories, list): all_subcategories.extend(subcategories) if not all_subcategories: return "None" subcategories_str = ", ".join(all_subcategories) return subcategories_str # Add IAPP categories column if the field exists if "iapp_categories" in display_df.columns: display_df["iapp_categories_display"] = display_df["iapp_categories"].apply(extract_iapp_subcategories) else: display_df["iapp_categories_display"] = "None" # Define column mappings for display column_mapping = { "state": "State", "bill_number": "Bill Number", "title": "Title", "status": "Status", "iapp_categories_display": "Categories", "last_action_date": "Last Action Date", "sponsors": "Sponsors" } # Format dates for display with validation if "last_action_date" in display_df.columns: current_date = datetime.now() def format_date(row): if pd.isna(row["last_action_date"]): return "N/A" elif row["last_action_date"] > current_date: return f"{row['last_action_date'].strftime('%Y-%m-%d')} (FUTURE DATE - INVALID)" else: return row['last_action_date'].strftime('%Y-%m-%d') display_df["formatted_last_action_date"] = display_df.apply(format_date, axis=1) display_df["last_action_date"] = display_df["formatted_last_action_date"] display_df = display_df.drop(columns=["formatted_last_action_date"]) # Create a column with proper links if bill_url exists if "bill_url" in display_df.columns and "bill_number" in display_df.columns: display_df["View"] = display_df.apply( lambda row: ( f"{row['bill_url']}" if pd.notna(row["bill_url"]) and row["bill_url"] else "" ), axis=1, ) # Select and rename columns for display display_columns = [ col for col in column_mapping.keys() if col in display_df.columns ] if "View" in display_df.columns: display_columns.append("View") display_df = display_df[display_columns].copy() display_df = display_df.rename(columns=column_mapping) # Display the dataframe with clickable links and IAPP categories truncation column_config = { "View": st.column_config.LinkColumn("Link"), "Categories": st.column_config.TextColumn( "Categories", help="AI governance subcategories from IAPP framework", max_chars=50, width="medium" ) } st.data_editor( display_df, column_config=column_config, hide_index=True, use_container_width=True, disabled=True, height=500, ) # Add CSV download option csv = display_df.to_csv(index=False) st.download_button( label="Download Table as CSV", data=csv, file_name="filtered_bills.csv", mime="text/csv", ) st.markdown('

', unsafe_allow_html=True) # How to Use This Tracker and About the Data sections st.markdown("""

How to Use This Tracker

For Policymakers: Quickly compare legislative approaches from other states to inform drafting and decision-making.
For Researchers: Access a centralized database of AI-related bills to analyze trends and export data for academic study.
For the Public: Stay informed about how your state is regulating AI technology and understand proposed laws.

About the Data

The data in this tracker is compiled from state legislative records, primarily utilizing the Legiscan API. The tracker focuses on state-level legislation, with federal bills included for context but separated in counts and views. Bill statuses are simplified into "Signed Into Law", "Active", and "Inactive" for clarity.

""", unsafe_allow_html=True) # TAB 2: GEOSPATIAL INSIGHTS # ---- helper: cached GeoJSON loader ---- @st.cache_data(show_spinner=False) def load_us_states_geojson(): url = "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json" try: return requests.get(url, timeout=10).json() except Exception: with open("us-states.json", "r") as f: return json.load(f) with tab2: st.markdown(f'

{TOOL_DESCRIPTIONS["bills_map"]["description"]}

', unsafe_allow_html=True) st.markdown('

Geographic Distribution Map

Interactive visualization of AI governance bills across US states

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) # --- Year selector (expects values like "2025-2026") --- year_options = sorted([str(y) for y in filtered_df['session_year'].dropna().unique()]) if year_options: selected_year = st.selectbox("Session year", year_options, index=len(year_options)-1) df_year = filtered_df[filtered_df['session_year'] == selected_year].copy() else: selected_year = None df_year = filtered_df.copy() # --- Counts by state (expects 2-letter postal abbreviations) --- tmp = df_year.copy() tmp['state'] = tmp['state'].astype(str).str.upper() counts_df = ( tmp['state'] .value_counts() .rename_axis('state') .reset_index(name='bills') ) state_to_count = dict(zip(counts_df['state'], counts_df['bills'])) # --- Build per-state popup HTML (scrollable) --- title_col = 'title' if 'title' in tmp.columns else ('bill_title' if 'bill_title' in tmp.columns else None) bn_col = 'bill_number' if 'bill_number' in tmp.columns else ('number' if 'number' in tmp.columns else None) def _bill_label(row): bn = str(row.get(bn_col, '') or '').strip() if bn_col else '' tt = str(row.get(title_col, '') or '').strip() if title_col else '' bn = html.escape(bn) tt = html.escape(tt) if bn and tt: return f"

{bn}: {tt}

" elif bn: return f"

{bn}

" elif tt: return f"

{tt}

" return "

(unnamed bill)

" def _group_to_ul(g: pd.DataFrame) -> str: # Be robust to pandas versions: 'state' may or may not be present in g if 'state' in g.columns: g = g.drop(columns='state') return "

" + "".join(_bill_label(r) for _, r in g.iterrows()) + "" if not tmp.empty: # Try pandas >= 2.2 signature first (supports include_groups) try: bills_by_state = ( tmp.groupby('state', group_keys=False) .apply(_group_to_ul, include_groups=False) .to_dict() ) except TypeError: # Older pandas: no include_groups; still safe due to the 'state' check in _group_to_ul bills_by_state = ( tmp.groupby('state', group_keys=False) .apply(_group_to_ul) .to_dict() ) else: bills_by_state = {} # --- GeoJSON & properties (also build scrollable popup HTML) --- us_states = load_us_states_geojson() for feat in us_states.get('features', []): abbr = (feat.get('id') or "").upper() props = feat.setdefault('properties', {}) props['bills'] = int(state_to_count.get(abbr, 0)) state_name = html.escape(props.get('name', '')) total = props['bills'] list_html = bills_by_state.get(abbr, "No bills for the selected session.") count_label = "bill" if total == 1 else "bills" props['popup_html'] = ( f"

" f"

{state_name} — {total} {count_label}" + (f" ({html.escape(selected_year)})" if selected_year else "") + "

" f"

{list_html}

" f"

" ) # --- Map --- m = folium.Map(location=[39.8283, -98.5795], zoom_start=4, tiles="OpenStreetMap") # Choropleth (one-tone Blues) folium.Choropleth( geo_data=us_states, name="Bills choropleth", data=counts_df, columns=["state", "bills"], key_on="feature.id", fill_color="Blues", fill_opacity=0.85, line_opacity=0.2, # thin internal boundaries from choropleth layer line_color="#ffffff", nan_fill_color="#f0f0f0", legend_name=f"AI governance bills by state ({selected_year})" if selected_year else "AI governance bills by state", ).add_to(m) # Outline layer so borders are clearly visible folium.GeoJson( us_states, name="State boundaries", style_function=lambda x: { "fillOpacity": 0, "color": "#666666", "weight": 1.2 }, highlight_function=lambda x: {"weight": 2, "color": "#333333", "fillOpacity": 0}, tooltip=folium.features.GeoJsonTooltip( fields=["name", "bills"], aliases=["State", "Bills"], sticky=True, localize=True, ) ).add_to(m) # Transparent layer for clickable popups (scrollable content) folium.GeoJson( us_states, name="State popups", style_function=lambda x: {"color": "#00000000", "fillOpacity": 0, "weight": 0}, popup=folium.features.GeoJsonPopup( fields=["popup_html"], labels=False, localize=True, parse_html=True, max_width=500, # width cap; vertical scroll inside content ), ).add_to(m) folium.LayerControl(collapsed=True).add_to(m) st_folium(m, width=1200, height=700) st.markdown('

', unsafe_allow_html=True) # TAB 3: AI ANALYSIS TOOLKIT with tab3: st.markdown(f'

{TOOL_DESCRIPTIONS["state_status_viz"]["description"]}

', unsafe_allow_html=True) # TAB 3: STATE & STATUS ANALYSIS st.markdown('

', unsafe_allow_html=True) st.markdown('

State & Status Distribution

Analyze bill distribution across states and their legislative status

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) if filtered_df.empty: st.warning("No bills match the selected filters.") else: # Filter out federal bills for state analysis state_bills_df = filtered_df[filtered_df["state"] != "US"].copy() if not state_bills_df.empty: # Bills by State st.markdown("### Bills by State") state_counts = state_bills_df["state"].value_counts().reset_index() state_counts.columns = ["State", "Number of Bills"] fig_state = px.bar( state_counts.head(20), x="Number of Bills", y="State", orientation='h', title="Top 20 States by Number of Bills", color="Number of Bills", color_continuous_scale="Blues" ) fig_state.update_layout( height=600, showlegend=False, yaxis={'categoryorder': 'total ascending'} ) st.plotly_chart(fig_state, use_container_width=True) # Bills by State and Status if "status" in state_bills_df.columns: st.markdown("### Bills by State and Status") # Create pivot table for stacked bar chart state_status_df = state_bills_df.groupby(["state", "status"]).size().reset_index(name="count") # Get top 15 states by total bills top_states = state_bills_df["state"].value_counts().head(15).index.tolist() state_status_filtered = state_status_df[state_status_df["state"].isin(top_states)] fig_state_status = px.bar( state_status_filtered, x="state", y="count", color="status", title="Top 15 States: Bill Distribution by Status", labels={"count": "Number of Bills", "state": "State", "status": "Status"}, barmode="stack" ) fig_state_status.update_layout(height=500) st.plotly_chart(fig_state_status, use_container_width=True) # Status Distribution Overall st.markdown("### Overall Status Distribution") col1, col2 = st.columns(2) with col1: status_counts = state_bills_df["status"].value_counts().reset_index() status_counts.columns = ["Status", "Number of Bills"] fig_status_pie = px.pie( status_counts, values="Number of Bills", names="Status", title="Bill Status Distribution" ) st.plotly_chart(fig_status_pie, use_container_width=True) with col2: fig_status_bar = px.bar( status_counts, x="Status", y="Number of Bills", title="Bill Count by Status", color="Number of Bills", color_continuous_scale="Viridis" ) fig_status_bar.update_layout(showlegend=False) st.plotly_chart(fig_status_bar, use_container_width=True) else: st.info("No state bills available. Showing only federal bills in your filter.") # Federal Bills Status (if any) federal_bills_df = filtered_df[filtered_df["state"] == "US"].copy() if not federal_bills_df.empty and "status" in federal_bills_df.columns: st.markdown("### Federal Bills Status") federal_status_counts = federal_bills_df["status"].value_counts().reset_index() federal_status_counts.columns = ["Status", "Number of Bills"] fig_federal = px.bar( federal_status_counts, x="Status", y="Number of Bills", title="Federal Bills by Status", color="Number of Bills", color_continuous_scale="Reds" ) fig_federal.update_layout(showlegend=False) st.plotly_chart(fig_federal, use_container_width=True) st.markdown('

', unsafe_allow_html=True) with tab4: st.markdown(f'

{TOOL_DESCRIPTIONS["category_viz"]["description"]}

', unsafe_allow_html=True) # TAB 4: CATEGORY ANALYSIS st.markdown('

', unsafe_allow_html=True) st.markdown('

Category Distribution Analysis

Explore bill distribution across regulatory categories

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) if filtered_df.empty: st.warning("No bills match the selected filters.") elif "iapp_categories" not in filtered_df.columns: st.info("Category information is not available in the dataset.") else: # Extract all categories and subcategories all_categories = [] all_subcategories = [] for idx, row in filtered_df.iterrows(): if isinstance(row["iapp_categories"], dict): for category, subcategories in row["iapp_categories"].items(): if isinstance(subcategories, list): for subcat in subcategories: all_categories.append(category) all_subcategories.append(subcat) if not all_categories: st.info("No category data available for the filtered bills.") else: # Category Distribution st.markdown("### Main Category Distribution") category_counts = pd.DataFrame({"Category": all_categories}) category_counts = category_counts["Category"].value_counts().reset_index() category_counts.columns = ["Category", "Count"] col1, col2 = st.columns(2) with col1: fig_cat_pie = px.pie( category_counts, values="Count", names="Category", title="Bills by Main Category" ) st.plotly_chart(fig_cat_pie, use_container_width=True) with col2: fig_cat_bar = px.bar( category_counts, x="Category", y="Count", title="Bill Count by Main Category", color="Count", color_continuous_scale="Greens" ) fig_cat_bar.update_layout(showlegend=False, xaxis_tickangle=-45) st.plotly_chart(fig_cat_bar, use_container_width=True) # Subcategory Distribution st.markdown("### Subcategory Distribution") subcat_counts = pd.DataFrame({"Subcategory": all_subcategories}) subcat_counts = subcat_counts["Subcategory"].value_counts().reset_index() subcat_counts.columns = ["Subcategory", "Count"] # Show top 20 subcategories fig_subcat = px.bar( subcat_counts.head(20), x="Count", y="Subcategory", orientation='h', title="Top 20 Most Common Subcategories", color="Count", color_continuous_scale="Teal" ) fig_subcat.update_layout( height=600, showlegend=False, yaxis={'categoryorder': 'total ascending'} ) st.plotly_chart(fig_subcat, use_container_width=True) # Category-Subcategory Relationship st.markdown("### Category-Subcategory Breakdown") cat_subcat_data = [] for idx, row in filtered_df.iterrows(): if isinstance(row["iapp_categories"], dict): for category, subcategories in row["iapp_categories"].items(): if isinstance(subcategories, list): for subcat in subcategories: cat_subcat_data.append({ "Category": category, "Subcategory": subcat }) if cat_subcat_data: cat_subcat_df = pd.DataFrame(cat_subcat_data) cat_subcat_grouped = cat_subcat_df.groupby(["Category", "Subcategory"]).size().reset_index(name="Count") # Create sunburst chart fig_sunburst = px.sunburst( cat_subcat_grouped, path=["Category", "Subcategory"], values="Count", title="Category-Subcategory Hierarchy" ) fig_sunburst.update_layout(height=600) st.plotly_chart(fig_sunburst, use_container_width=True) # Category by State (if state data exists) if "state" in filtered_df.columns: st.markdown("### Categories by State") # Get top 10 states top_states = filtered_df["state"].value_counts().head(10).index.tolist() state_cat_data = [] for idx, row in filtered_df.iterrows(): if row["state"] in top_states and isinstance(row["iapp_categories"], dict): for category in row["iapp_categories"].keys(): state_cat_data.append({ "State": row["state"], "Category": category }) if state_cat_data: state_cat_df = pd.DataFrame(state_cat_data) state_cat_grouped = state_cat_df.groupby(["State", "Category"]).size().reset_index(name="Count") fig_state_cat = px.bar( state_cat_grouped, x="State", y="Count", color="Category", title="Top 10 States: Bill Distribution by Category", labels={"Count": "Number of Bills"}, barmode="stack" ) fig_state_cat.update_layout(height=500) st.plotly_chart(fig_state_cat, use_container_width=True) st.markdown('

', unsafe_allow_html=True) with tab5: st.markdown(f'

{TOOL_DESCRIPTIONS["temporal_viz"]["description"]}

', unsafe_allow_html=True) # TAB 5: TEMPORAL ANALYSIS st.markdown('

', unsafe_allow_html=True) st.markdown('

Temporal Trends Analysis

Track legislative activity and trends over time

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) if filtered_df.empty: st.warning("No bills match the selected filters.") elif "last_action_date" not in filtered_df.columns or filtered_df["last_action_date"].isna().all(): st.info("Date information is not available for temporal analysis.") else: # Filter out rows with valid dates temporal_df = filtered_df[filtered_df["last_action_date"].notna()].copy() if temporal_df.empty: st.info("No bills with valid date information in the filtered dataset.") else: # Bills Over Time (by Year) st.markdown("### Bills Activity by Year") temporal_df["year"] = temporal_df["last_action_date"].dt.year # Remove future years current_year = datetime.now().year temporal_df = temporal_df[temporal_df["year"] <= current_year] yearly_counts = temporal_df.groupby("year").size().reset_index(name="count") fig_yearly = px.line( yearly_counts, x="year", y="count", title="Number of Bills by Year (Last Action Date)", markers=True, labels={"year": "Year", "count": "Number of Bills"} ) fig_yearly.update_layout(height=400) st.plotly_chart(fig_yearly, use_container_width=True) # Bills by Year and Status if "status" in temporal_df.columns: st.markdown("### Bills by Year and Status") yearly_status = temporal_df.groupby(["year", "status"]).size().reset_index(name="count") fig_yearly_status = px.bar( yearly_status, x="year", y="count", color="status", title="Bill Activity by Year and Status", labels={"year": "Year", "count": "Number of Bills", "status": "Status"}, barmode="stack" ) fig_yearly_status.update_layout(height=500) st.plotly_chart(fig_yearly_status, use_container_width=True) # Monthly Trend (Recent Years) st.markdown("### Monthly Activity Trend") # Allow user to select year range available_years = sorted(temporal_df["year"].unique()) if len(available_years) > 0: col1, col2 = st.columns(2) with col1: start_year = st.selectbox( "Start Year", options=available_years, index=max(0, len(available_years) - 3) if len(available_years) > 3 else 0 ) with col2: end_year = st.selectbox( "End Year", options=[y for y in available_years if y >= start_year], index=len([y for y in available_years if y >= start_year]) - 1 ) # Filter by year range monthly_df = temporal_df[ (temporal_df["year"] >= start_year) & (temporal_df["year"] <= end_year) ].copy() monthly_df["year_month"] = monthly_df["last_action_date"].dt.to_period("M").astype(str) monthly_counts = monthly_df.groupby("year_month").size().reset_index(name="count") fig_monthly = px.line( monthly_counts, x="year_month", y="count", title=f"Monthly Bill Activity ({start_year}-{end_year})", markers=True, labels={"year_month": "Month", "count": "Number of Bills"} ) fig_monthly.update_layout(height=400, xaxis_tickangle=-45) st.plotly_chart(fig_monthly, use_container_width=True) # Heatmap: Bills by Year and State if "state" in temporal_df.columns: st.markdown("### Bills Activity Heatmap: Year vs State") # Get top 15 states top_states = temporal_df["state"].value_counts().head(15).index.tolist() heatmap_df = temporal_df[temporal_df["state"].isin(top_states)].copy() # Create pivot table heatmap_pivot = heatmap_df.groupby(["year", "state"]).size().reset_index(name="count") heatmap_pivot_wide = heatmap_pivot.pivot(index="state", columns="year", values="count").fillna(0) fig_heatmap = px.imshow( heatmap_pivot_wide, labels=dict(x="Year", y="State", color="Number of Bills"), title="Top 15 States: Bill Activity Heatmap by Year", color_continuous_scale="YlOrRd", aspect="auto" ) fig_heatmap.update_layout(height=500) st.plotly_chart(fig_heatmap, use_container_width=True) # Category Trends Over Time if "iapp_categories" in temporal_df.columns: st.markdown("### Category Trends Over Time") # Extract categories per year category_yearly_data = [] for idx, row in temporal_df.iterrows(): if isinstance(row["iapp_categories"], dict) and pd.notna(row["year"]): for category in row["iapp_categories"].keys(): category_yearly_data.append({ "year": row["year"], "category": category }) if category_yearly_data: category_yearly_df = pd.DataFrame(category_yearly_data) category_yearly_grouped = category_yearly_df.groupby(["year", "category"]).size().reset_index(name="count") fig_cat_trend = px.line( category_yearly_grouped, x="year", y="count", color="category", title="Category Trends Over Time", markers=True, labels={"year": "Year", "count": "Number of Bills", "category": "Category"} ) fig_cat_trend.update_layout(height=500) st.plotly_chart(fig_cat_trend, use_container_width=True) # Summary Statistics st.markdown("### Temporal Summary Statistics") col1, col2, col3, col4 = st.columns(4) with col1: most_active_year = yearly_counts.loc[yearly_counts["count"].idxmax(), "year"] most_active_count = yearly_counts.loc[yearly_counts["count"].idxmax(), "count"] st.markdown(f'

{int(most_active_year)}

Most Active Year ({int(most_active_count)} bills)

', unsafe_allow_html=True) with col2: date_range = f"{temporal_df['year'].min()}-{temporal_df['year'].max()}" st.markdown(f'

{date_range}

Date Range

', unsafe_allow_html=True) with col3: avg_per_year = yearly_counts["count"].mean() st.markdown(f'

{avg_per_year:.1f}

Avg Bills/Year

', unsafe_allow_html=True) with col4: recent_year = temporal_df["year"].max() recent_count = yearly_counts[yearly_counts["year"] == recent_year]["count"].values[0] st.markdown(f'

{int(recent_count)}

{int(recent_year)} Bills

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) with tab6: st.markdown(f'

{TOOL_DESCRIPTIONS["ai_toolkit"]["description"]}

', unsafe_allow_html=True) # Analysis type selector st.markdown('

', unsafe_allow_html=True) st.markdown('

Analysis Type Selection

Choose your preferred AI-powered analysis method

', unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) analysis_type = st.selectbox( "Select Analysis Type:", options=list(AI_ANALYSIS_TYPES.keys()), format_func=lambda x: AI_ANALYSIS_TYPES[x]["name"], index=0, key="analysis_type_selector" ) st.markdown('

', unsafe_allow_html=True) # Create bill options helper function def create_bill_options(): if "bill_number" in filtered_df.columns and "state" in filtered_df.columns and "title" in filtered_df.columns: bill_options = [ ( f"{row['state']}_{row['bill_number']}", f"{row['state']}_{row['bill_number']}: {row['title'][:50] + '...' if len(row['title']) > 50 else row['title']}" ) for _, row in filtered_df.iterrows() if pd.notna(row["title"]) and row["title"].strip() ] bill_options = sorted(bill_options, key=lambda x: x[0]) bill_keys = [option[0] for option in bill_options] bill_labels = [option[1] for option in bill_options] return bill_keys, bill_labels return [], [] bill_keys, bill_labels = create_bill_options() if not bill_keys: st.warning("No relevant bills with titles available for analysis.") else: # Analysis interface section st.markdown('

', unsafe_allow_html=True) st.markdown('

{}

'.format( AI_ANALYSIS_TYPES[analysis_type]["name"], AI_ANALYSIS_TYPES[analysis_type]["description"] ), unsafe_allow_html=True) st.markdown('

', unsafe_allow_html=True) # Different UI based on analysis type if analysis_type == "qa": # Legislative Q&A Interface selected_option = st.selectbox( "Select a bill to query:", options=bill_labels, index=None, placeholder="-- Select or type a bill --", key="toolkit_qa_bill" ) # Handle bill selection if selected_option is None: selected_bill = None else: selected_bill_index = bill_labels.index(selected_option) selected_bill = bill_keys[selected_bill_index] # Display bill details and summary when a bill is selected if selected_bill: # Extract the selected bill's data state, bill_number = selected_bill.split("_", 1) bill_mask = (df["state"] == state) & (df["bill_number"] == bill_number) bill_df = df[bill_mask].copy() if not bill_df.empty: bill_data = bill_df.iloc[0].to_dict() display_bill_details(bill_data, summaries_cache) st.markdown("---") st.markdown("#### Example Questions You Can Ask:") st.markdown(""" • What are the key definitions in this bill? • What are the enforcement mechanisms? • Who does this bill apply to? • What are the compliance requirements? • What penalties are specified? • What are the transparency requirements? • How does this bill define AI systems? • What are the implementation timelines? """) st.markdown("#### Suggested Questions for This Bill:") try: filtered_bill_mask = (filtered_df["state"] == state) & (filtered_df["bill_number"] == bill_number) filtered_bill_df = filtered_df[filtered_bill_mask].copy() if not filtered_bill_df.empty: bill_data = filtered_bill_df.iloc[0].to_dict() suggested_questions = get_bill_suggested_questions(bill_data, questions_cache) for i, question in enumerate(suggested_questions): if st.button(question, key=f"toolkit_suggested_q_{i}_{selected_bill}", use_container_width=True): st.session_state.toolkit_qa_input = question st.rerun() else: st.info("Using example questions above") except Exception as e: st.error(f"Error loading suggested questions: {e}") st.info("Using example questions above") st.markdown("---") st.markdown("#### Ask a Question") user_input = st.text_input( "Ask a question about this bill:", key="toolkit_qa_input" ) if st.button("Ask", key="toolkit_qa_button"): if not selected_bill: st.error("Please select a bill first.") elif not user_input.strip(): st.error("Please enter a question.") else: # Get the selected bill data state, bill_number = selected_bill.split("_", 1) bill_mask = (filtered_df["state"] == state) & (filtered_df["bill_number"] == bill_number) bill_df = filtered_df[bill_mask].copy() if not bill_df.empty: bill_data = bill_df.iloc[0].to_dict() with st.spinner("Analyzing bill and generating answer..."): try: answer = answer_bill_question(bill_data, user_input) st.markdown("#### Answer") st.markdown(answer) except Exception as e: st.error(f"Failed to generate answer: {str(e)}") st.error(f"Q&A error: {e}") else: st.error("Could not find the selected bill data.") elif analysis_type == "comparison": # Bill Comparison Interface bill_options = [ ( f"{row['state']}_{row['bill_number']}", f"{row['state']}_{row['bill_number']}: {row['title'][:50] + '...' if len(row['title']) > 50 else row['title']}" ) for _, row in filtered_df.iterrows() if pd.notna(row["title"]) and row["title"].strip() ] bill_options = sorted(bill_options, key=lambda x: x[0]) bill_keys = [option[0] for option in bill_options] bill_labels = [option[1] for option in bill_options] if not bill_keys: st.warning("No relevant bills with titles available for comparison.") else: focus_bill_option = st.selectbox( "Select a focus bill:", options=bill_labels, index=None, placeholder="-- Select or type a bill --", key="toolkit_focus_bill", help="Type to search through available bills" ) if focus_bill_option is None: focus_bill = None else: focus_bill_index = bill_labels.index(focus_bill_option) focus_bill = bill_keys[focus_bill_index] if focus_bill is not None: comparison_keys = [key for key in bill_keys if key != focus_bill] comparison_labels = [label for key, label in zip(bill_keys, bill_labels) if key != focus_bill] comparison_bills_options = st.multiselect( "Select bills to compare:", options=comparison_labels, placeholder="-- Select or type bills to compare --", key="toolkit_comparison_bills" ) comparison_bills = [] for selected_label in comparison_bills_options: if selected_label in comparison_labels: comp_index = comparison_labels.index(selected_label) comparison_bills.append(comparison_keys[comp_index]) else: comparison_bills = [] # Display bill details when bills are selected if focus_bill is not None: # Display focus bill details focus_state, focus_bill_number = focus_bill.split("_", 1) focus_mask = (filtered_df["state"] == focus_state) & (filtered_df["bill_number"] == focus_bill_number) focus_bill_df = filtered_df[focus_mask].copy() if not focus_bill_df.empty: focus_bill_data = focus_bill_df.iloc[0].to_dict() st.markdown("---") st.markdown("#### 📋 Focus Bill Details") display_bill_details(focus_bill_data, summaries_cache) # Display comparison bills if selected if comparison_bills: st.markdown("---") st.markdown("#### 📋 Comparison Bills Details") # Create columns for comparison bills if len(comparison_bills) > 0: cols = st.columns(len(comparison_bills)) for i, comp_bill in enumerate(comparison_bills): with cols[i]: comp_state, comp_bill_number = comp_bill.split("_", 1) comp_mask = (filtered_df["state"] == comp_state) & (filtered_df["bill_number"] == comp_bill_number) comp_bill_df = filtered_df[comp_mask].copy() if not comp_bill_df.empty: comp_bill_data = comp_bill_df.iloc[0].to_dict() st.markdown(f"**{comp_bill}**") display_bill_details(comp_bill_data, summaries_cache) st.markdown("---") st.markdown("#### Example Comparison Questions:") st.markdown(""" • How do these bills define AI systems differently? • What are the key differences in enforcement mechanisms? • Which bill has stricter compliance requirements? • How do the penalty structures compare? • What are the similarities in scope and coverage? • How do the implementation timelines differ? • Which bill provides more detailed privacy protections? • How do the exemptions and exceptions compare? """) comparison_question = st.text_input( "Ask a comparison question:", key="toolkit_comparison_input" ) if st.button("Compare", key="toolkit_compare_button"): if not focus_bill: st.error("Please select a focus bill first.") elif not comparison_bills: st.error("Please select at least one bill to compare against.") elif not comparison_question.strip(): st.error("Please enter a comparison question.") else: # Get the selected bills data focus_state, focus_bill_number = focus_bill.split("_", 1) focus_mask = (filtered_df["state"] == focus_state) & (filtered_df["bill_number"] == focus_bill_number) focus_bill_df = filtered_df[focus_mask].copy() comparison_bills_data = [] for comp_bill in comparison_bills: comp_state, comp_bill_number = comp_bill.split("_", 1) comp_mask = (filtered_df["state"] == comp_state) & (filtered_df["bill_number"] == comp_bill_number) comp_bill_df = filtered_df[comp_mask].copy() if not comp_bill_df.empty: comparison_bills_data.append(comp_bill_df.iloc[0].to_dict()) if not focus_bill_df.empty and comparison_bills_data: focus_bill_data = focus_bill_df.iloc[0].to_dict() with st.spinner("Creating vectorstore and analyzing bills for comparison..."): try: answer = compare_bills_with_rag( focus_bill_data, comparison_bills_data, comparison_question ) st.markdown("#### Comparison Analysis") st.markdown(answer) except Exception as e: st.error(f"Failed to generate comparison: {str(e)}") st.error(f"Comparison error: {e}") else: st.error("Could not find the selected bills data.") elif analysis_type == "summary": # Executive Summary Interface selected_option = st.selectbox( "Select a bill to read its report:", options=bill_labels, index=None, placeholder="-- Select or type a bill --", key="toolkit_summary_bill" ) # Handle bill selection if selected_option is None: selected_bill = None else: selected_bill_index = bill_labels.index(selected_option) selected_bill = bill_keys[selected_bill_index] # Display bill report when a bill is selected if selected_bill: # Extract the selected bill's data state, bill_number = selected_bill.split("_", 1) bill_mask = (filtered_df["state"] == state) & (filtered_df["bill_number"] == bill_number) bill_df = filtered_df[bill_mask].copy() if not bill_df.empty: bill_data = bill_df.iloc[0].to_dict() # Get and display the report report = get_bill_report(bill_data, reports_cache) if report.startswith('No pre-generated report'): st.warning(report) else: st.markdown(report, unsafe_allow_html=True) # Add download button for the report pdf_filename = f"{state}_{bill_number}_report.md" st.download_button( label="Download Report as Markdown", data=report, file_name=pdf_filename, mime="text/markdown", key="download_report" ) elif analysis_type == "eu_comparison": # EU AI Act Comparison Interface # First, check if EU vectorstore is available eu_vectorstore, eu_error = load_eu_ai_act_vectorstore() if eu_vectorstore is None: st.error("EU AI Act vectorstore not available.") st.info(""" To use the EU AI Act comparison feature: 1. Ensure `eu-ai-act.pdf` is in the project directory 2. Run the EU AI Act processing script: `python create_eu_ai_act_vectorstore.py` 3. Refresh this page """) st.code("python create_eu_ai_act_vectorstore.py") if eu_error: st.error(f"Error details: {eu_error}") else: # Show EU AI Act info eu_info = get_eu_vectorstore_info() if "error" not in eu_info: st.success(f"✅ EU AI Act loaded") selected_option = st.selectbox( "Select a US bill to compare with the EU AI Act:", options=bill_labels, index=None, placeholder="-- Select or type a bill --", key="toolkit_eu_comparison_bill" ) # Handle bill selection if selected_option is None: selected_bill = None else: selected_bill_index = bill_labels.index(selected_option) selected_bill = bill_keys[selected_bill_index] # Display bill details when a bill is selected if selected_bill: # Extract the selected bill's data state, bill_number = selected_bill.split("_", 1) bill_mask = (filtered_df["state"] == state) & (filtered_df["bill_number"] == bill_number) bill_df = filtered_df[bill_mask].copy() if not bill_df.empty: bill_data = bill_df.iloc[0].to_dict() st.markdown("---") st.markdown("#### 📋 US Bill Details") display_bill_details(bill_data, summaries_cache) # Show EU AI Act summary st.markdown("---") st.markdown("#### 🇪🇺 EU AI Act Overview") st.info(""" The EU AI Act (Regulation 2024/1689) is comprehensive legislation that regulates AI systems based on their risk level. It establishes prohibited AI practices, high-risk AI systems requirements, transparency obligations, and governance structures. The Act applies to providers and deployers of AI systems in the EU market. """) st.markdown("---") st.markdown("#### Example EU Comparison Questions:") st.markdown(""" • How does this US bill's definition of AI compare to the EU AI Act's definition? • What are the key differences in risk assessment approaches? • How do enforcement mechanisms compare between the two frameworks? • Which framework has stricter requirements for high-risk AI systems? • How do transparency and documentation requirements compare? • What are the differences in prohibited AI practices? • How do the two frameworks approach algorithmic impact assessments? • What are the similarities and differences in governance structures? • How do compliance timelines compare? • Which framework provides better protection for fundamental rights? """) eu_comparison_question = st.text_input( "Ask a comparison question:", key="toolkit_eu_comparison_input" ) if st.button("Compare with EU AI Act", key="toolkit_eu_compare_button"): if not selected_bill: st.error("Please select a US bill first.") elif not eu_comparison_question.strip(): st.error("Please enter a comparison question.") else: # Get the selected bill data state, bill_number = selected_bill.split("_", 1) bill_mask = (filtered_df["state"] == state) & (filtered_df["bill_number"] == bill_number) bill_df = filtered_df[bill_mask].copy() if not bill_df.empty: bill_data = bill_df.iloc[0].to_dict() with st.spinner("Analyzing US bill and EU AI Act for comparison..."): try: answer = compare_bill_with_eu_ai_act( bill_data, eu_comparison_question ) st.markdown("#### US vs EU AI Governance Comparison") st.markdown(answer) except Exception as e: st.error(f"Failed to generate EU comparison: {str(e)}") st.error(f"EU comparison error: {e}") else: st.error("Could not find the selected bill data.")