diff --git "a/huggingface_app.py" "b/huggingface_app.py" new file mode 100644--- /dev/null +++ "b/huggingface_app.py" @@ -0,0 +1,2459 @@ +# =============================================== +# VAILL AI Governance Bills Tracker +# =============================================== +# Table of Contents (functions): +# 1. get_qa_llm() +# 2. get_embeddings() +# 3. get_text_splitter() +# 4. create_bill_documents() +# 5. create_vectorstore_from_bills() +# 6. compare_bills_with_rag() +# 7. answer_bill_question() +# 8. load_eu_ai_act_vectorstore() +# 9. get_eu_vectorstore_info() +# 10. compare_bill_with_eu_ai_act() +# 11. load_bill_reports() +# 12. get_bill_report() +# 13. load_bill_summaries() +# 14. load_bill_suggested_questions() +# 15. get_bill_suggested_questions() +# 16. get_bill_summary() +# 17. load_and_process_data() +# 18. load_openai_api_key() +# 19. display_bill_details() +# 20. get_last_updated_date() +# 21. extract_iapp_subcategories() +# 22. format_date() +# 23. load_us_states_geojson() +# 24. _bill_label() +# 25. _group_to_ul() +# 26. create_bill_options() +# ----------------------------------------------- +# Section markers: search for '==== SECTION' lines to jump around. +# =============================================== + +# ==== SECTION: Original file begins below (unchanged) ==== +#!/usr/bin/env python3 +# scripts/app.py + +""" +Streamlit visualization for the AI Governance Bills Tracker. + +Displays an interactive dashboard of AI-related bills from known_bills_visualize.json, including +a table, map, filters, Q&A, plan comparison, summary generation, and CSV download functionality. +""" + +import streamlit as st +import pandas as pd +import time +from streamlit_folium import st_folium +import folium +import json +from pathlib import Path +import os +import dotenv +import io +import logging +from datetime import datetime, date, timedelta +from constants import IAPP_CATEGORIES +import requests +import html +from langchain_openai import ChatOpenAI +from langchain.prompts import ChatPromptTemplate +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain_openai import OpenAIEmbeddings +from langchain_community.vectorstores import FAISS +from langchain.schema import Document +from langchain.chains import create_retrieval_chain +from langchain.chains.combine_documents import create_stuff_documents_chain +from langchain.prompts import ChatPromptTemplate +import pickle +import os +from datasets import load_dataset +import tempfile +import shutil + +dotenv.load_dotenv() + +# Create logs directory if it doesn't exist +os.makedirs("app_logs", exist_ok=True) + +# Configure logging +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(message)s", + handlers=[logging.StreamHandler(), logging.FileHandler("app_logs/visualize.log")], +) + +logger = logging.getLogger(__name__) + +# Hugging Face dataset configuration +HF_DATASET_NAME = "VAILL/legislation-tracker-data" +HF_DATA_FOLDER = "data" + +@st.cache_data +def load_from_hf_dataset(file_path: str): + """Load a file from the Hugging Face dataset repository.""" + try: + # Load the dataset + dataset = load_dataset(HF_DATASET_NAME, data_files=f"{HF_DATA_FOLDER}/{file_path}") + + # Get the first (and only) split + split_name = list(dataset.keys())[0] + data = dataset[split_name] + + # Convert to the appropriate format based on file type + if file_path.endswith('.json'): + # For JSON files, return the data as a list of dictionaries + return data.to_list() + else: + # For other files, return the raw data + return data + + except Exception as e: + logger.error(f"Error loading {file_path} from Hugging Face dataset: {e}") + return None + +@st.cache_data +def load_file_from_hf_dataset(file_path: str): + """Load a file from the Hugging Face dataset repository and return as bytes.""" + try: + # Load the dataset + dataset = load_dataset(HF_DATASET_NAME, data_files=f"{HF_DATA_FOLDER}/{file_path}") + + # Get the first (and only) split + split_name = list(dataset.keys())[0] + data = dataset[split_name] + + # For binary files, we need to handle them differently + # This is a simplified approach - you might need to adjust based on your specific needs + return data + + except Exception as e: + logger.error(f"Error loading {file_path} from Hugging Face dataset: {e}") + return None + +@st.cache_data +def download_vectorstore_from_hf(): + """Download the EU AI Act vectorstore from Hugging Face dataset to a temporary directory.""" + try: + # Create a temporary directory + temp_dir = tempfile.mkdtemp() + vectorstore_temp_path = os.path.join(temp_dir, "eu_ai_act_vectorstore") + + # Store the temp directory path for cleanup + st.session_state['vectorstore_temp_dir'] = temp_dir + + # Create the vectorstore directory + os.makedirs(vectorstore_temp_path, exist_ok=True) + + # List of expected vectorstore files + vectorstore_files = [ + "index.faiss", + "index.pkl", + "metadata.pickle" + ] + + # Download each file from the Hugging Face dataset + for filename in vectorstore_files: + try: + # Load the specific file from the dataset + dataset = load_dataset(HF_DATASET_NAME, data_files=f"{HF_DATA_FOLDER}/eu_ai_act_vectorstore/{filename}") + + # Get the first (and only) split + split_name = list(dataset.keys())[0] + data = dataset[split_name] + + if len(data) > 0 and 'bytes' in data[0]: + file_path = os.path.join(vectorstore_temp_path, filename) + with open(file_path, 'wb') as f: + f.write(data[0]['bytes']) + logger.info(f"Downloaded {filename}") + else: + logger.warning(f"Could not find {filename} in dataset") + + except Exception as e: + logger.warning(f"Error downloading {filename}: {e}") + + # Check if we have the essential files + index_faiss_path = os.path.join(vectorstore_temp_path, "index.faiss") + index_pkl_path = os.path.join(vectorstore_temp_path, "index.pkl") + + if os.path.exists(index_faiss_path) and os.path.exists(index_pkl_path): + logger.info(f"✅ EU AI Act vectorstore downloaded to {vectorstore_temp_path}") + return vectorstore_temp_path + else: + logger.error("Essential vectorstore files not found") + return None + + except Exception as e: + logger.error(f"Error downloading vectorstore from Hugging Face dataset: {e}") + return None + +# Page configuration +st.set_page_config( + layout="wide", + page_title="VAILL AI Governance Legislation Tracker", + page_icon="⚖️" +) + +# Custom CSS for clean, section-based layout +st.markdown(""" + +""", unsafe_allow_html=True) + +# Hero Section +st.markdown(""" +
+

Tracking and Analyzing State-Level AI Governance Legislation

+

A resource from the Vanderbilt AI Law Lab (VAILL) to help policymakers, researchers, and the public stay informed about the evolving landscape of AI regulation in the United States. +

What is the AI Governance Legislation Tracker?

+

This tracker is a centralized, user-friendly platform for monitoring artificial intelligence (AI) governance legislation across the United States. As AI technology rapidly advances, it's becoming increasingly important to understand how different states are approaching its regulation. This tool aims to simplify the process of finding and comparing various state-level AI governance bills, their current statuses, and their key provisions. str: + """Answer a question about a specific bill using LangChain.""" + try: + llm = get_qa_llm() + + # Create the prompt template + qa_prompt = ChatPromptTemplate.from_template( + """You are a legislative analyst expert at interpreting AI governance bills. + A user has asked a question about a specific bill. Use the bill information + provided as JSON to answer their question accurately and comprehensively. + + Guidelines: + - Answer based only on the information provided in the bill JSON + - Be specific and cite relevant sections when possible + - If the information isn't available in the bill, clearly state that + - Keep your answer focused and relevant to the question + - Use clear, accessible language + - Always include the legiscan link to the bill in your answer + - Format your answer as markdown + + Bill JSON: + ```json + {bill_json} + ``` + + User Question: {question} + + Please provide a detailed answer based on the bill information above. + """ + ) + + # Convert timestamps and other non-serializable objects to strings + serializable_bill_data = {} + for key, value in bill_data.items(): + try: + # Handle None/NaN values + if value is None: + serializable_bill_data[key] = None + elif isinstance(value, (int, float)) and pd.isna(value): + serializable_bill_data[key] = None + elif hasattr(value, 'strftime'): # Handle datetime/timestamp objects + serializable_bill_data[key] = value.strftime('%Y-%m-%d') + elif isinstance(value, (list, dict, str, int, float, bool)): + # These types are JSON serializable + serializable_bill_data[key] = value + else: + # Convert anything else to string + serializable_bill_data[key] = str(value) + except Exception: + # Fallback: convert to string or None + serializable_bill_data[key] = str(value) if value is not None else None + + # Convert bill data to JSON string + bill_json = json.dumps(serializable_bill_data, ensure_ascii=False, indent=2) + + # Create chain and invoke + chain = qa_prompt | llm + result = chain.invoke({ + "bill_json": bill_json, + "question": question + }) + + # Extract content from result + answer = getattr(result, "content", str(result)) + return answer + + except Exception as e: + logger.error(f"Error in Q&A: {e}") + return f"Error processing question: {str(e)}" + +@st.cache_resource +def load_eu_ai_act_vectorstore(): + """Load the EU AI Act vectorstore from Hugging Face dataset.""" + try: + # Download vectorstore from Hugging Face dataset + vectorstore_temp_path = download_vectorstore_from_hf() + if vectorstore_temp_path is None: + logger.warning("EU AI Act vectorstore not found in Hugging Face dataset") + return None, "Vectorstore not found in Hugging Face dataset. Please ensure it's uploaded to VAILL/legislation-tracker-data." + + # Initialize embeddings + embeddings = get_embeddings() + + # Load vectorstore + vectorstore = FAISS.load_local( + vectorstore_temp_path, + embeddings, + allow_dangerous_deserialization=True + ) + + logger.info(f"✅ EU AI Act vectorstore loaded successfully from Hugging Face dataset") + return vectorstore, None + + except Exception as e: + error_msg = f"Error loading EU AI Act vectorstore: {str(e)}" + logger.error(error_msg) + return None, error_msg + +@st.cache_data +def get_eu_vectorstore_info(): + """Get information about the EU AI Act vectorstore from Hugging Face dataset.""" + try: + # Load metadata from Hugging Face dataset + metadata_data = load_file_from_hf_dataset("eu_ai_act_vectorstore/metadata.pickle") + if metadata_data is not None: + # Convert the dataset to bytes and load with pickle + metadata_bytes = metadata_data[0]['bytes'] if isinstance(metadata_data, list) and len(metadata_data) > 0 else metadata_data + if metadata_bytes: + metadata = pickle.loads(metadata_bytes) + return metadata + return {"error": "Metadata not found in Hugging Face dataset"} + except Exception as e: + return {"error": str(e)} + +# Add this function before the existing comparison functions + +def compare_bill_with_eu_ai_act(bill_data, question): + """Compare a US bill with the EU AI Act using RAG approach.""" + try: + # Load EU AI Act vectorstore + eu_vectorstore, error = load_eu_ai_act_vectorstore() + if eu_vectorstore is None: + return f"Error loading EU AI Act data: {error}" + + # Create US bill vectorstore + us_vectorstore = create_vectorstore_from_bills([bill_data]) + + # Create retrievers + eu_retriever = eu_vectorstore.as_retriever( + search_type="similarity", + search_kwargs={"k": 4} # Get top 4 relevant EU sections + ) + + us_retriever = us_vectorstore.as_retriever( + search_type="similarity", + search_kwargs={"k": 3} # Get top 3 relevant US bill sections + ) + + # Get relevant documents from both sources + eu_docs = eu_retriever.get_relevant_documents(question) + us_docs = us_retriever.get_relevant_documents(question) + + # Format bill information + bill_info = f"{bill_data.get('state', 'Unknown')} {bill_data.get('bill_number', 'Unknown')}: {bill_data.get('title', 'Unknown')}" + + # Create the comparison prompt + comparison_prompt = ChatPromptTemplate.from_template("""You are a legal analyst expert at comparing AI governance frameworks between the US and EU. +You have been provided with relevant excerpts from a US bill and the EU AI Act to answer a comparison question. + +IMPORTANT CONTEXT: +- US Bill: {bill_info} +- EU Framework: Regulation (EU) 2024/1689 on Artificial Intelligence (AI Act) + +Your task is to compare these regulatory frameworks based on the user's question, using the relevant excerpts provided below. + +Guidelines for your analysis: +- Clearly distinguish between US and EU approaches +- Highlight similarities and differences in regulatory philosophy +- Be specific about provisions, definitions, and requirements from each framework +- Note any gaps or areas not covered by either framework +- Consider the different legal systems and enforcement mechanisms +- Structure your response with clear sections for each framework +- Conclude with a summary of key similarities, differences, and implications +- Always include the legiscan link to the US bill in your response +- Format your answer as markdown + +Relevant excerpts from the US Bill: +{us_context} + +Relevant excerpts from the EU AI Act: +{eu_context} + +User Question: {input} + +Please provide a comprehensive comparison analysis based on the excerpts above. +""") + + # Prepare context + us_context = "\n\n".join([doc.page_content for doc in us_docs]) + eu_context = "\n\n".join([doc.page_content for doc in eu_docs]) + + # Get LLM and create chain + llm = get_qa_llm() + chain = comparison_prompt | llm + + # Run the comparison + result = chain.invoke({ + "input": question, + "bill_info": bill_info, + "us_context": us_context, + "eu_context": eu_context + }) + + # Extract content from result + answer = getattr(result, "content", str(result)) + + # Add source information + answer += "\n\n---\n\n**Sources used in this analysis:**\n\n" + answer += f"**US Bill:** {bill_info}\n" + answer += f"**EU Framework:** EU AI Act (Regulation 2024/1689)\n" + + return answer + + except Exception as e: + logger.error(f"Error in EU comparison: {e}") + return f"Error during EU comparison analysis: {str(e)}" + + +@st.cache_data +def load_bill_reports() -> dict: + """Load pre-generated bill reports from Hugging Face dataset.""" + try: + reports_data = load_from_hf_dataset("bill_reports.json") + if reports_data is not None: + # Convert to dict with bill_id as key + reports = {report['bill_id']: report['report_markdown'] for report in reports_data} + logger.info(f"Loaded {len(reports)} pre-generated reports from Hugging Face dataset") + return reports + else: + logger.warning("Reports file not found in Hugging Face dataset") + return {} + except Exception as e: + logger.error(f"Error loading reports from Hugging Face dataset: {e}") + return {} + +def get_bill_report(bill_data, reports_cache): + """Get report for a bill from cache or return message.""" + bill_id = str(bill_data.get('bill_id', '')) + + if bill_id in reports_cache: + return reports_cache[bill_id] + else: + return "No pre-generated report available for this bill." + +reports_cache = load_bill_reports() + +@st.cache_data +def load_bill_summaries() -> dict: + """Load pre-generated bill summaries from Hugging Face dataset.""" + try: + summaries = load_from_hf_dataset("bill_summaries.json") + if summaries is not None: + logger.info(f"Loaded {len(summaries)} pre-generated summaries from Hugging Face dataset") + return summaries + else: + logger.warning("Summaries file not found in Hugging Face dataset") + return {} + except Exception as e: + logger.error(f"Error loading summaries from Hugging Face dataset: {e}") + return {} + +@st.cache_data +def load_bill_suggested_questions() -> dict: + """Load pre-generated suggested questions from Hugging Face dataset.""" + try: + questions = load_from_hf_dataset("bill_suggested_questions.json") + if questions is not None: + logger.info(f"Loaded {len(questions)} pre-generated question sets from Hugging Face dataset") + return questions + else: + logger.warning("Questions file not found in Hugging Face dataset") + return {} + except Exception as e: + logger.error(f"Error loading questions from Hugging Face dataset: {e}") + return {} + +def get_bill_suggested_questions(bill_data, questions_cache): + """Get suggested questions for a bill from cache or return fallback.""" + bill_key = f"{bill_data.get('state', 'Unknown')}_{bill_data.get('bill_number', 'Unknown')}" + + if bill_key in questions_cache: + questions = questions_cache[bill_key].get('suggested_questions', []) + if len(questions) == 5: + return questions + + # Fallback to static example questions + return [ + "What are the key definitions in this bill?", + "What are the enforcement mechanisms?", + "Who does this bill apply to?", + "What are the compliance requirements?", + "What penalties are specified?" + ] + +def get_bill_summary(bill_data, summaries_cache): + """Get summary for a bill from cache or return error message.""" + bill_key = f"{bill_data.get('state', 'Unknown')}_{bill_data.get('bill_number', 'Unknown')}" + + if bill_key in summaries_cache: + summary = summaries_cache[bill_key].get('summary', '') + if summary.startswith('ERROR:'): + return f"Summary generation failed: {summary}" + return summary + else: + return "No pre-generated summary available. Run the summary generation script first." + +@st.cache_data +def load_and_process_data() -> pd.DataFrame: + start_time = time.time() + + try: + bills_data = load_from_hf_dataset("known_bills_visualize.json") + if bills_data is None: + logger.warning("Data file not found in Hugging Face dataset") + return None + + logger.info(f"Loaded {len(bills_data)} bills from Hugging Face dataset") + + df = pd.DataFrame(bills_data) + # Convert dates + if "last_action_date" in df.columns: + df["last_action_date"] = pd.to_datetime( + df["last_action_date"], errors="coerce" + ) + if "lastUpdatedAt" in df.columns: + df["lastUpdatedAt"] = pd.to_datetime(df["lastUpdatedAt"], errors="coerce") + + logger.info(f"DataFrame created in {time.time() - start_time:.2f} seconds") + return df + except Exception as e: + logger.error(f"Error loading data from Hugging Face dataset: {e}") + return None + +# Load OpenAI API key from Hugging Face secrets, Streamlit secrets, or environment variable +def load_openai_api_key(): + # First, try Hugging Face secrets (for Hugging Face Spaces deployment) + try: + # In Hugging Face Spaces, secrets are available as environment variables + # with the prefix HF_SECRETS_ + hf_api_key = os.environ.get("HF_SECRETS_OPENAI_API_KEY") + if hf_api_key: + logger.info("Loaded OpenAI API key from Hugging Face secrets.") + return hf_api_key + except Exception as e: + logger.debug(f"Could not load from Hugging Face secrets: {e}") + + # Second, try Streamlit secrets (for other deployed environments) + try: + return st.secrets["OPENAI_API_KEY"] + except (KeyError, FileNotFoundError): + logger.debug("Could not load from Streamlit secrets") + + # Third, try environment variable (for local dev) + api_key = os.environ.get("OPENAI_API_KEY") + if api_key: + logger.info("Loaded OpenAI API key from environment variable.") + return api_key + + # Finally, fallback to user input (for local dev without env var) + st.warning("OpenAI API key not found in Hugging Face secrets, Streamlit secrets, or environment variables.") + api_key = st.text_input("Enter your OpenAI API key:", type="password") + if api_key: + logger.info("OpenAI API key provided via user input.") + return api_key + else: + st.error("Please provide an OpenAI API key to continue.") + st.stop() + +# Load the key +openai_api_key = load_openai_api_key() + +def display_bill_details(bill_data, summaries_cache): + """Display bill details and summary in a formatted way.""" + st.markdown("#### Bill Details") + + # Create columns for better layout + col1, col2 = st.columns(2) + + with col1: + st.write(f"**State:** {bill_data.get('state', 'N/A')}") + st.write(f"**Bill Number:** {bill_data.get('bill_number', 'N/A')}") + st.write(f"**Status:** {bill_data.get('status', 'N/A')}") + + # Format last action date + if 'last_action_date' in bill_data and pd.notna(bill_data['last_action_date']): + if isinstance(bill_data['last_action_date'], str): + st.write(f"**Last Action Date:** {bill_data['last_action_date']}") + else: + st.write(f"**Last Action Date:** {bill_data['last_action_date'].strftime('%Y-%m-%d')}") + else: + st.write(f"**Last Action Date:** N/A") + + with col2: + # Extract IAPP categories for display + if 'iapp_categories' in bill_data and isinstance(bill_data['iapp_categories'], dict): + all_subcategories = [] + for category, subcategories in bill_data['iapp_categories'].items(): + if isinstance(subcategories, list): + all_subcategories.extend(subcategories) + + if all_subcategories: + iapp_display = ", ".join(all_subcategories[:3]) # Show first 3 + if len(all_subcategories) > 3: + iapp_display += f" + {len(all_subcategories) - 3} more" + else: + iapp_display = "None" + else: + iapp_display = "N/A" + + st.write(f"**Categories:** {iapp_display}") + + # Show sponsors if available + sponsors = bill_data.get('sponsors', 'N/A') + if isinstance(sponsors, list): + sponsors_display = ", ".join(sponsors[:2]) # Show first 2 sponsors + if len(sponsors) > 2: + sponsors_display += f" + {len(sponsors) - 2} more" + else: + sponsors_display = str(sponsors) if sponsors else "N/A" + + st.write(f"**Sponsors:** {sponsors_display}") + + # Show full title + st.write(f"**Title:** {bill_data.get('title', 'N/A')}") + + # Display pre-generated summary + st.markdown("#### Bill Summary") + summary = get_bill_summary(bill_data, summaries_cache) + + if summary.startswith('Summary generation failed') or summary.startswith('No pre-generated summary'): + st.warning(summary) + else: + st.info(summary) + +df = load_and_process_data() +summaries_cache = load_bill_summaries() +questions_cache = load_bill_suggested_questions() + +if df is None: + st.write("No data available. Ensure the VAILL/legislation-tracker-data Hugging Face dataset contains the required files.") + st.stop() + +# Sidebar for filters with improved styling +with st.sidebar: + # Add logo to the sidebar + logo_path = "vaill_logo.png" + try: + st.image(logo_path, use_container_width=True) + except FileNotFoundError: + st.warning("Logo image 'vaill_logo.png' not found.") + + st.markdown("### Filter Controls") + + # Date Filter Section + date_df = ( + df.dropna(subset=["last_action_date"]) + if "last_action_date" in df.columns + else pd.DataFrame() + ) + + if not date_df.empty: + current_date = datetime.now().date() + df_dates = df[df["last_action_date"].notna()]["last_action_date"] + min_year = df_dates.min().year + max_year = min(df_dates.max().year, current_date.year) + + st.markdown("#### Date Range") + + filter_type = st.radio( + "Filter by:", + options=["No Date Filter", "Year Only", "Year & Month"], + index=0, + help="Choose how to filter bills by their last action date" + ) + + # Initialize filtered_df + filtered_df = df.copy() + + if filter_type == "Year Only": + available_years = sorted(df_dates.dt.year.unique()) + available_years = [year for year in available_years if year <= current_date.year] + + if available_years: + selected_years = st.multiselect( + "Select Years:", + options=available_years, + default=[max(available_years)], + help="Select one or more years to filter bills" + ) + + if selected_years: + mask = df["last_action_date"].dt.year.isin(selected_years) + filtered_df = df[mask].copy() + + if len(selected_years) == 1: + year_range = f"{selected_years[0]}" + else: + year_range = f"{min(selected_years)}-{max(selected_years)}" + st.success(f"Filtering: {year_range}") + else: + st.warning("No years available for filtering") + + elif filter_type == "Year & Month": + available_years = list(range(min_year, max_year + 1)) + + col1, col2 = st.columns(2) + + with col1: + start_year = st.selectbox( + "From Year:", + options=available_years, + index=max(0, len(available_years) - 2) if len(available_years) > 1 else 0, + key="start_year_select" + ) + + with col2: + end_year_options = list(range(start_year, max_year + 1)) + end_year = st.selectbox( + "To Year:", + options=end_year_options, + index=len(end_year_options) - 1, + key="end_year_select" + ) + + months = { + 1: "January", 2: "February", 3: "March", 4: "April", + 5: "May", 6: "June", 7: "July", 8: "August", + 9: "September", 10: "October", 11: "November", 12: "December" + } + + col3, col4 = st.columns(2) + + with col3: + start_month = st.selectbox( + "From Month:", + options=list(months.keys()), + format_func=lambda x: months[x], + index=0, + key="start_month_select" + ) + + with col4: + max_end_month = 12 + if end_year == current_date.year: + max_end_month = current_date.month + + end_month_options = list(range(1, max_end_month + 1)) + if end_month_options: + end_month = st.selectbox( + "To Month:", + options=end_month_options, + format_func=lambda x: months[x], + index=len(end_month_options) - 1, + key="end_month_select" + ) + else: + end_month = 1 + st.warning("Invalid month range") + + try: + start_date = date(start_year, start_month, 1) + + if end_month == 12: + last_day = date(end_year + 1, 1, 1) - timedelta(days=1) + else: + last_day = date(end_year, end_month + 1, 1) - timedelta(days=1) + + end_date = min(last_day, current_date) + + if start_date <= end_date: + mask = (df["last_action_date"].dt.date >= start_date) & ( + df["last_action_date"].dt.date <= end_date + ) + filtered_df = df[mask].copy() + + st.success(f"Filtering: {start_date.strftime('%b %Y')} - {end_date.strftime('%b %Y')}") + else: + st.error("Start date must be before end date") + + except ValueError as e: + st.error(f"Invalid date range: {e}") + + if filter_type != "No Date Filter" and not filtered_df.empty: + date_stats = filtered_df["last_action_date"].dropna() + if not date_stats.empty: + st.info(f"{len(date_stats)} bills with dates in range") + + else: + filtered_df = df.copy() + st.warning("No date information available for filtering") + + st.markdown("#### Bill Type") + bill_type_filter = st.radio( + "Show bills:", + options=["All Bills", "State Bills Only", "Federal Bills Only"], + index=0 + ) + + # Apply the filter + if bill_type_filter == "State Bills Only": + filtered_df = filtered_df[filtered_df["state"] != "US"] + elif bill_type_filter == "Federal Bills Only": + filtered_df = filtered_df[filtered_df["state"] == "US"] + + # IAPP Categories Filter + if "iapp_categories" in filtered_df.columns: + st.markdown("#### Categories") + + all_iapp_categories = set() + filtered_df["iapp_categories"].apply( + lambda x: all_iapp_categories.update(x.keys()) if isinstance(x, dict) else None + ) + + if all_iapp_categories: + for category in sorted(all_iapp_categories): + if category in IAPP_CATEGORIES: + all_subcategories = set() + filtered_df["iapp_categories"].apply( + lambda x: all_subcategories.update(x.get(category, [])) + if isinstance(x, dict) and category in x else None + ) + + if all_subcategories: + subcategory_options = sorted(all_subcategories) + selected_subcategories = st.multiselect( + f"{category}", + options=subcategory_options, + default=[], + key=f"iapp_{category.lower().replace(' ', '_')}" + ) + + if selected_subcategories: + filtered_df = filtered_df[ + filtered_df["iapp_categories"].apply( + lambda x: ( + any(subcat in x.get(category, []) for subcat in selected_subcategories) + if isinstance(x, dict) and category in x + else False + ) + ) + ] + +# Main content with tab-based layout +(tab1, tab2, tab3) = st.tabs([ + TOOL_DESCRIPTIONS["bills_table"]["name"], + TOOL_DESCRIPTIONS["bills_map"]["name"], + TOOL_DESCRIPTIONS["ai_toolkit"]["name"] +]) + +# TAB 1: BILLS EXPLORER +with tab1: + st.markdown(f'

{TOOL_DESCRIPTIONS["bills_table"]["description"]}

', unsafe_allow_html=True) + + if filtered_df.empty: + st.warning("No bills match the selected filters.") + else: + # Separate federal and state bills + federal_bills = filtered_df[filtered_df["state"] == "US"] + state_bills = filtered_df[filtered_df["state"] != "US"] + + # Helper function for last updated date + def get_last_updated_date(): + if "lastUpdatedAt" in filtered_df.columns and not filtered_df.empty: + valid_dates = filtered_df[filtered_df["lastUpdatedAt"].notna()] + if not valid_dates.empty: + most_recent = valid_dates["lastUpdatedAt"].max() + return most_recent.strftime("%Y-%m-%d") if pd.notna(most_recent) else "N/A" + return "N/A" + + # Metrics section + st.markdown('
', unsafe_allow_html=True) + st.markdown('

Database Overview

Current statistics for filtered bill dataset

', unsafe_allow_html=True) + st.markdown('
', unsafe_allow_html=True) + + if bill_type_filter == "Federal Bills Only": + col1, col2, col3 = st.columns(3) + with col1: + st.markdown('

{}

Federal Bills

'.format(len(federal_bills)), unsafe_allow_html=True) + with col2: + current_year = datetime.now().year + this_year_bills = len(filtered_df[filtered_df["last_action_date"].dt.year == current_year]) if "last_action_date" in filtered_df.columns else 0 + st.markdown('

{}

Bills This Year

'.format(this_year_bills), unsafe_allow_html=True) + with col3: + st.markdown('

{}

Last Updated

'.format(get_last_updated_date()), unsafe_allow_html=True) + + elif bill_type_filter == "State Bills Only": + col1, col2, col3 = st.columns(3) + with col1: + st.markdown('

{}

State Bills

'.format(len(state_bills)), unsafe_allow_html=True) + with col2: + if not state_bills.empty: + state_counts = state_bills["state"].value_counts() + most_active = state_counts.index[0] if not state_counts.empty else "N/A" + count = state_counts.iloc[0] if not state_counts.empty else 0 + display_text = f"{most_active} ({count})" if most_active != "N/A" else "N/A" + st.markdown('

{}

Most Active State

'.format(display_text), unsafe_allow_html=True) + else: + st.markdown('

N/A

Most Active State

', unsafe_allow_html=True) + with col3: + st.markdown('

{}

Last Updated

'.format(get_last_updated_date()), unsafe_allow_html=True) + + else: # All Bills + col1, col2, col3, col4 = st.columns(4) + with col1: + st.markdown('

{}

Federal Bills

'.format(len(federal_bills)), unsafe_allow_html=True) + with col2: + st.markdown('

{}

State Bills

'.format(len(state_bills)), unsafe_allow_html=True) + with col3: + if not state_bills.empty: + state_counts = state_bills["state"].value_counts() + most_active = state_counts.index[0] if not state_counts.empty else "N/A" + count = state_counts.iloc[0] if not state_counts.empty else 0 + display_text = f"{most_active} ({count})" if most_active != "N/A" else "N/A" + st.markdown('

{}

Most Active State

'.format(display_text), unsafe_allow_html=True) + else: + st.markdown('

N/A

Most Active State

', unsafe_allow_html=True) + with col4: + st.markdown('

{}

Last Updated

'.format(get_last_updated_date()), unsafe_allow_html=True) + + st.markdown('
', unsafe_allow_html=True) + + # Bills table section + st.markdown('
', unsafe_allow_html=True) + st.markdown('

Legislation Database

Comprehensive listing of AI governance legislation

', unsafe_allow_html=True) + st.markdown('
', unsafe_allow_html=True) + + if bill_type_filter == "State Bills Only": + st.write(f"Showing {len(filtered_df)} state bills") + elif bill_type_filter == "Federal Bills Only": + st.write(f"Showing {len(filtered_df)} federal bills") + else: + federal_count = len(filtered_df[filtered_df["state"] == "US"]) + state_count = len(filtered_df[filtered_df["state"] != "US"]) + st.write(f"Showing {len(filtered_df)} bills ({federal_count} federal, {state_count} state)") + display_df = filtered_df.copy() + + # Process IAPP Categories for display + def extract_iapp_subcategories(iapp_categories): + """Extract all subcategories from IAPP categories dict and format for display.""" + if not isinstance(iapp_categories, dict) or not iapp_categories: + return "None" + + all_subcategories = [] + for category, subcategories in iapp_categories.items(): + if isinstance(subcategories, list): + all_subcategories.extend(subcategories) + + if not all_subcategories: + return "None" + + subcategories_str = ", ".join(all_subcategories) + return subcategories_str + + # Add IAPP categories column if the field exists + if "iapp_categories" in display_df.columns: + display_df["iapp_categories_display"] = display_df["iapp_categories"].apply(extract_iapp_subcategories) + else: + display_df["iapp_categories_display"] = "None" + + # Define column mappings for display + column_mapping = { + "state": "State", + "bill_number": "Bill Number", + "title": "Title", + "status": "Status", + "iapp_categories_display": "Categories", + "last_action_date": "Last Action Date", + "sponsors": "Sponsors" + } + + # Format dates for display with validation + if "last_action_date" in display_df.columns: + current_date = datetime.now() + + def format_date(row): + if pd.isna(row["last_action_date"]): + return "N/A" + elif row["last_action_date"] > current_date: + return f"{row['last_action_date'].strftime('%Y-%m-%d')} (FUTURE DATE - INVALID)" + else: + return row['last_action_date'].strftime('%Y-%m-%d') + + display_df["formatted_last_action_date"] = display_df.apply(format_date, axis=1) + display_df["last_action_date"] = display_df["formatted_last_action_date"] + display_df = display_df.drop(columns=["formatted_last_action_date"]) + + # Create a column with proper links if bill_url exists + if "bill_url" in display_df.columns and "bill_number" in display_df.columns: + display_df["View"] = display_df.apply( + lambda row: ( + f"{row['bill_url']}" + if pd.notna(row["bill_url"]) and row["bill_url"] + else "" + ), + axis=1, + ) + + # Select and rename columns for display + display_columns = [ + col for col in column_mapping.keys() if col in display_df.columns + ] + if "View" in display_df.columns: + display_columns.append("View") + + display_df = display_df[display_columns].copy() + display_df = display_df.rename(columns=column_mapping) + + # Display the dataframe with clickable links and IAPP categories truncation + column_config = { + "View": st.column_config.LinkColumn("Link"), + "Categories": st.column_config.TextColumn( + "Categories", + help="AI governance subcategories from IAPP framework", + max_chars=50, + width="medium" + ) + } + + st.data_editor( + display_df, + column_config=column_config, + hide_index=True, + use_container_width=True, + disabled=True, + height=500, + ) + + # Add CSV download option + csv = display_df.to_csv(index=False) + st.download_button( + label="Download Table as CSV", + data=csv, + file_name="filtered_bills.csv", + mime="text/csv", + ) + + st.markdown('
', unsafe_allow_html=True) + +# How to Use This Tracker and About the Data sections +st.markdown(""" +
+
+

How to Use This Tracker

+
    +
  • For Policymakers: Quickly compare legislative approaches from other states to inform drafting and decision-making.
  • +
  • For Researchers: Access a centralized database of AI-related bills to analyze trends and export data for academic study.
  • +
  • For the Public: Stay informed about how your state is regulating AI technology and understand proposed laws.
  • +
+
+
+

About the Data

+

The data in this tracker is compiled from state legislative records, primarily utilizing the Legiscan API. The tracker focuses on state-level legislation, with federal bills included for context but separated in counts and views. Bill statuses are simplified into "Signed Into Law", "Active", and "Inactive" for clarity.

+
+
+""", unsafe_allow_html=True) + +# TAB 2: GEOSPATIAL INSIGHTS + +# ---- helper: cached GeoJSON loader ---- +@st.cache_data(show_spinner=False) +def load_us_states_geojson(): + url = "https://raw.githubusercontent.com/python-visualization/folium/master/examples/data/us-states.json" + try: + return requests.get(url, timeout=10).json() + except Exception: + with open("us-states.json", "r") as f: + return json.load(f) + +with tab2: + st.markdown(f'

{TOOL_DESCRIPTIONS["bills_map"]["description"]}

', unsafe_allow_html=True) + + st.markdown('
', unsafe_allow_html=True) + st.markdown('

Geographic Distribution Map

Interactive visualization of AI governance bills across US states

', unsafe_allow_html=True) + st.markdown('
', unsafe_allow_html=True) + + # --- Year selector (expects values like "2025-2026") --- + year_options = sorted([str(y) for y in filtered_df['session_year'].dropna().unique()]) + if year_options: + selected_year = st.selectbox("Session year", year_options, index=len(year_options)-1) + df_year = filtered_df[filtered_df['session_year'] == selected_year].copy() + else: + selected_year = None + df_year = filtered_df.copy() + + # --- Counts by state (expects 2-letter postal abbreviations) --- + tmp = df_year.copy() + tmp['state'] = tmp['state'].astype(str).str.upper() + + counts_df = ( + tmp['state'] + .value_counts() + .rename_axis('state') + .reset_index(name='bills') + ) + state_to_count = dict(zip(counts_df['state'], counts_df['bills'])) + + # --- Build per-state popup HTML (scrollable) --- + title_col = 'title' if 'title' in tmp.columns else ('bill_title' if 'bill_title' in tmp.columns else None) + bn_col = 'bill_number' if 'bill_number' in tmp.columns else ('number' if 'number' in tmp.columns else None) + + def _bill_label(row): + bn = str(row.get(bn_col, '') or '').strip() if bn_col else '' + tt = str(row.get(title_col, '') or '').strip() if title_col else '' + bn = html.escape(bn) + tt = html.escape(tt) + if bn and tt: + return f"
  • {bn}: {tt}
  • " + elif bn: + return f"
  • {bn}
  • " + elif tt: + return f"
  • {tt}
  • " + return "
  • (unnamed bill)
  • " + + def _group_to_ul(g: pd.DataFrame) -> str: + # Be robust to pandas versions: 'state' may or may not be present in g + if 'state' in g.columns: + g = g.drop(columns='state') + return "
      " + "".join(_bill_label(r) for _, r in g.iterrows()) + "
    " + + if not tmp.empty: + # Try pandas >= 2.2 signature first (supports include_groups) + try: + bills_by_state = ( + tmp.groupby('state', group_keys=False) + .apply(_group_to_ul, include_groups=False) + .to_dict() + ) + except TypeError: + # Older pandas: no include_groups; still safe due to the 'state' check in _group_to_ul + bills_by_state = ( + tmp.groupby('state', group_keys=False) + .apply(_group_to_ul) + .to_dict() + ) + else: + bills_by_state = {} + + # --- GeoJSON & properties (also build scrollable popup HTML) --- + us_states = load_us_states_geojson() + + for feat in us_states.get('features', []): + abbr = (feat.get('id') or "").upper() + props = feat.setdefault('properties', {}) + props['bills'] = int(state_to_count.get(abbr, 0)) + + state_name = html.escape(props.get('name', '')) + total = props['bills'] + list_html = bills_by_state.get(abbr, "No bills for the selected session.") + + count_label = "bill" if total == 1 else "bills" + props['popup_html'] = ( + f"
    " + f"

    {state_name} — {total} {count_label}" + + (f" ({html.escape(selected_year)})" if selected_year else "") + + "

    " + f"
    {list_html}
    " + f"
    " + ) + + # --- Map --- + m = folium.Map(location=[39.8283, -98.5795], zoom_start=4, tiles="OpenStreetMap") + + # Choropleth (one-tone Blues) + folium.Choropleth( + geo_data=us_states, + name="Bills choropleth", + data=counts_df, + columns=["state", "bills"], + key_on="feature.id", + fill_color="Blues", + fill_opacity=0.85, + line_opacity=0.2, # thin internal boundaries from choropleth layer + line_color="#ffffff", + nan_fill_color="#f0f0f0", + legend_name=f"AI governance bills by state ({selected_year})" if selected_year else "AI governance bills by state", + ).add_to(m) + + # Outline layer so borders are clearly visible + folium.GeoJson( + us_states, + name="State boundaries", + style_function=lambda x: { + "fillOpacity": 0, + "color": "#666666", + "weight": 1.2 + }, + highlight_function=lambda x: {"weight": 2, "color": "#333333", "fillOpacity": 0}, + tooltip=folium.features.GeoJsonTooltip( + fields=["name", "bills"], + aliases=["State", "Bills"], + sticky=True, + localize=True, + ) + ).add_to(m) + + # Transparent layer for clickable popups (scrollable content) + folium.GeoJson( + us_states, + name="State popups", + style_function=lambda x: {"color": "#00000000", "fillOpacity": 0, "weight": 0}, + popup=folium.features.GeoJsonPopup( + fields=["popup_html"], + labels=False, + localize=True, + parse_html=True, + max_width=500, # width cap; vertical scroll inside content + ), + ).add_to(m) + + folium.LayerControl(collapsed=True).add_to(m) + + st_folium(m, width=1200, height=700) + + st.markdown('
    ', unsafe_allow_html=True) + + +# TAB 3: AI ANALYSIS TOOLKIT +with tab3: + st.markdown(f'

    {TOOL_DESCRIPTIONS["ai_toolkit"]["description"]}

    ', unsafe_allow_html=True) + + # Analysis type selector + st.markdown('
    ', unsafe_allow_html=True) + st.markdown('

    Analysis Type Selection

    Choose your preferred AI-powered analysis method

    ', unsafe_allow_html=True) + st.markdown('
    ', unsafe_allow_html=True) + + analysis_type = st.selectbox( + "Select Analysis Type:", + options=list(AI_ANALYSIS_TYPES.keys()), + format_func=lambda x: AI_ANALYSIS_TYPES[x]["name"], + index=0, + key="analysis_type_selector" + ) + + st.markdown('
    ', unsafe_allow_html=True) + + # Create bill options helper function + def create_bill_options(): + if "bill_number" in filtered_df.columns and "state" in filtered_df.columns and "title" in filtered_df.columns: + bill_options = [ + ( + f"{row['state']}_{row['bill_number']}", + f"{row['state']}_{row['bill_number']}: {row['title'][:50] + '...' if len(row['title']) > 50 else row['title']}" + ) + for _, row in filtered_df.iterrows() + if pd.notna(row["title"]) and row["title"].strip() + ] + bill_options = sorted(bill_options, key=lambda x: x[0]) + bill_keys = [option[0] for option in bill_options] + bill_labels = [option[1] for option in bill_options] + return bill_keys, bill_labels + return [], [] + + bill_keys, bill_labels = create_bill_options() + + if not bill_keys: + st.warning("No relevant bills with titles available for analysis.") + else: + # Analysis interface section + st.markdown('
    ', unsafe_allow_html=True) + st.markdown('

    {}

    {}

    '.format( + AI_ANALYSIS_TYPES[analysis_type]["name"], + AI_ANALYSIS_TYPES[analysis_type]["description"] + ), unsafe_allow_html=True) + st.markdown('
    ', unsafe_allow_html=True) + + # Different UI based on analysis type + if analysis_type == "qa": + # Legislative Q&A Interface + selected_option = st.selectbox( + "Select a bill to query:", + options=bill_labels, + index=None, + placeholder="-- Select or type a bill --", + key="toolkit_qa_bill" + ) + + # Handle bill selection + if selected_option is None: + selected_bill = None + else: + selected_bill_index = bill_labels.index(selected_option) + selected_bill = bill_keys[selected_bill_index] + + # Display bill details and summary when a bill is selected + if selected_bill: + # Extract the selected bill's data + state, bill_number = selected_bill.split("_", 1) + bill_mask = (df["state"] == state) & (df["bill_number"] == bill_number) + bill_df = df[bill_mask].copy() + + if not bill_df.empty: + bill_data = bill_df.iloc[0].to_dict() + display_bill_details(bill_data, summaries_cache) + + st.markdown("---") + st.markdown("#### Example Questions You Can Ask:") + st.markdown(""" + • What are the key definitions in this bill? + • What are the enforcement mechanisms? + • Who does this bill apply to? + • What are the compliance requirements? + • What penalties are specified? + • What are the transparency requirements? + • How does this bill define AI systems? + • What are the implementation timelines? + """) + + st.markdown("#### Suggested Questions for This Bill:") + + try: + filtered_bill_mask = (filtered_df["state"] == state) & (filtered_df["bill_number"] == bill_number) + filtered_bill_df = filtered_df[filtered_bill_mask].copy() + + if not filtered_bill_df.empty: + bill_data = filtered_bill_df.iloc[0].to_dict() + suggested_questions = get_bill_suggested_questions(bill_data, questions_cache) + + for i, question in enumerate(suggested_questions): + if st.button(question, key=f"toolkit_suggested_q_{i}_{selected_bill}", use_container_width=True): + st.session_state.toolkit_qa_input = question + st.rerun() + else: + st.info("Using example questions above") + + except Exception as e: + logger.error(f"Error loading suggested questions: {e}") + st.info("Using example questions above") + + st.markdown("---") + st.markdown("#### Ask a Question") + + user_input = st.text_input( + "Ask a question about this bill:", + key="toolkit_qa_input" + ) + + if st.button("Ask", key="toolkit_qa_button"): + if not selected_bill: + st.error("Please select a bill first.") + elif not user_input.strip(): + st.error("Please enter a question.") + else: + # Get the selected bill data + state, bill_number = selected_bill.split("_", 1) + bill_mask = (filtered_df["state"] == state) & (filtered_df["bill_number"] == bill_number) + bill_df = filtered_df[bill_mask].copy() + + if not bill_df.empty: + bill_data = bill_df.iloc[0].to_dict() + + with st.spinner("Analyzing bill and generating answer..."): + try: + answer = answer_bill_question(bill_data, user_input) + + st.markdown("#### Answer") + st.markdown(answer) + + except Exception as e: + st.error(f"Failed to generate answer: {str(e)}") + logger.error(f"Q&A error: {e}") + else: + st.error("Could not find the selected bill data.") + + + elif analysis_type == "comparison": + # Bill Comparison Interface + bill_options = [ + ( + f"{row['state']}_{row['bill_number']}", + f"{row['state']}_{row['bill_number']}: {row['title'][:50] + '...' if len(row['title']) > 50 else row['title']}" + ) + for _, row in filtered_df.iterrows() + if pd.notna(row["title"]) and row["title"].strip() + ] + bill_options = sorted(bill_options, key=lambda x: x[0]) + bill_keys = [option[0] for option in bill_options] + bill_labels = [option[1] for option in bill_options] + + if not bill_keys: + st.warning("No relevant bills with titles available for comparison.") + else: + focus_bill_option = st.selectbox( + "Select a focus bill:", + options=bill_labels, + index=None, + placeholder="-- Select or type a bill --", + key="toolkit_focus_bill", + help="Type to search through available bills" + ) + + if focus_bill_option is None: + focus_bill = None + else: + focus_bill_index = bill_labels.index(focus_bill_option) + focus_bill = bill_keys[focus_bill_index] + + if focus_bill is not None: + comparison_keys = [key for key in bill_keys if key != focus_bill] + comparison_labels = [label for key, label in zip(bill_keys, bill_labels) if key != focus_bill] + + comparison_bills_options = st.multiselect( + "Select bills to compare:", + options=comparison_labels, + placeholder="-- Select or type bills to compare --", + key="toolkit_comparison_bills" + ) + + comparison_bills = [] + for selected_label in comparison_bills_options: + if selected_label in comparison_labels: + comp_index = comparison_labels.index(selected_label) + comparison_bills.append(comparison_keys[comp_index]) + else: + comparison_bills = [] + + # Display bill details when bills are selected + if focus_bill is not None: + # Display focus bill details + focus_state, focus_bill_number = focus_bill.split("_", 1) + focus_mask = (filtered_df["state"] == focus_state) & (filtered_df["bill_number"] == focus_bill_number) + focus_bill_df = filtered_df[focus_mask].copy() + + if not focus_bill_df.empty: + focus_bill_data = focus_bill_df.iloc[0].to_dict() + st.markdown("---") + st.markdown("#### 📋 Focus Bill Details") + display_bill_details(focus_bill_data, summaries_cache) + + # Display comparison bills if selected + if comparison_bills: + st.markdown("---") + st.markdown("#### 📋 Comparison Bills Details") + + # Create columns for comparison bills + if len(comparison_bills) > 0: + cols = st.columns(len(comparison_bills)) + + for i, comp_bill in enumerate(comparison_bills): + with cols[i]: + comp_state, comp_bill_number = comp_bill.split("_", 1) + comp_mask = (filtered_df["state"] == comp_state) & (filtered_df["bill_number"] == comp_bill_number) + comp_bill_df = filtered_df[comp_mask].copy() + + if not comp_bill_df.empty: + comp_bill_data = comp_bill_df.iloc[0].to_dict() + st.markdown(f"**{comp_bill}**") + display_bill_details(comp_bill_data, summaries_cache) + + st.markdown("---") + st.markdown("#### Example Comparison Questions:") + st.markdown(""" + • How do these bills define AI systems differently? + • What are the key differences in enforcement mechanisms? + • Which bill has stricter compliance requirements? + • How do the penalty structures compare? + • What are the similarities in scope and coverage? + • How do the implementation timelines differ? + • Which bill provides more detailed privacy protections? + • How do the exemptions and exceptions compare? + """) + + comparison_question = st.text_input( + "Ask a comparison question:", key="toolkit_comparison_input" + ) + + if st.button("Compare", key="toolkit_compare_button"): + if not focus_bill: + st.error("Please select a focus bill first.") + elif not comparison_bills: + st.error("Please select at least one bill to compare against.") + elif not comparison_question.strip(): + st.error("Please enter a comparison question.") + else: + # Get the selected bills data + focus_state, focus_bill_number = focus_bill.split("_", 1) + focus_mask = (filtered_df["state"] == focus_state) & (filtered_df["bill_number"] == focus_bill_number) + focus_bill_df = filtered_df[focus_mask].copy() + + comparison_bills_data = [] + for comp_bill in comparison_bills: + comp_state, comp_bill_number = comp_bill.split("_", 1) + comp_mask = (filtered_df["state"] == comp_state) & (filtered_df["bill_number"] == comp_bill_number) + comp_bill_df = filtered_df[comp_mask].copy() + + if not comp_bill_df.empty: + comparison_bills_data.append(comp_bill_df.iloc[0].to_dict()) + + if not focus_bill_df.empty and comparison_bills_data: + focus_bill_data = focus_bill_df.iloc[0].to_dict() + + with st.spinner("Creating vectorstore and analyzing bills for comparison..."): + try: + answer = compare_bills_with_rag( + focus_bill_data, + comparison_bills_data, + comparison_question + ) + + st.markdown("#### Comparison Analysis") + st.markdown(answer) + + except Exception as e: + st.error(f"Failed to generate comparison: {str(e)}") + logger.error(f"Comparison error: {e}") + else: + st.error("Could not find the selected bills data.") + + elif analysis_type == "summary": + # Executive Summary Interface + selected_option = st.selectbox( + "Select a bill to read its report:", + options=bill_labels, + index=None, + placeholder="-- Select or type a bill --", + key="toolkit_summary_bill" + ) + + # Handle bill selection + if selected_option is None: + selected_bill = None + else: + selected_bill_index = bill_labels.index(selected_option) + selected_bill = bill_keys[selected_bill_index] + + # Display bill report when a bill is selected + if selected_bill: + # Extract the selected bill's data + state, bill_number = selected_bill.split("_", 1) + bill_mask = (filtered_df["state"] == state) & (filtered_df["bill_number"] == bill_number) + bill_df = filtered_df[bill_mask].copy() + + if not bill_df.empty: + bill_data = bill_df.iloc[0].to_dict() + + # Get and display the report + report = get_bill_report(bill_data, reports_cache) + + if report.startswith('No pre-generated report'): + st.warning(report) + else: + st.markdown(report, unsafe_allow_html=True) + + # Add download button for the report + pdf_filename = f"{state}_{bill_number}_report.md" + st.download_button( + label="Download Report as Markdown", + data=report, + file_name=pdf_filename, + mime="text/markdown", + key="download_report" + ) + + elif analysis_type == "eu_comparison": + # EU AI Act Comparison Interface + + # First, check if EU vectorstore is available + eu_vectorstore, eu_error = load_eu_ai_act_vectorstore() + + if eu_vectorstore is None: + st.error("EU AI Act vectorstore not available.") + st.info(""" + To use the EU AI Act comparison feature: + 1. Ensure the EU AI Act vectorstore files are uploaded to the VAILL/legislation-tracker-data Hugging Face dataset + 2. The vectorstore should be in the data/eu_ai_act_vectorstore/ folder + 3. Required files: index.faiss, index.pkl, metadata.pickle + 4. Refresh this page + """) + st.code("Upload vectorstore files to VAILL/legislation-tracker-data dataset") + + if eu_error: + st.error(f"Error details: {eu_error}") + else: + # Show EU AI Act info + eu_info = get_eu_vectorstore_info() + if "error" not in eu_info: + st.success(f"✅ EU AI Act loaded") + + selected_option = st.selectbox( + "Select a US bill to compare with the EU AI Act:", + options=bill_labels, + index=None, + placeholder="-- Select or type a bill --", + key="toolkit_eu_comparison_bill" + ) + + # Handle bill selection + if selected_option is None: + selected_bill = None + else: + selected_bill_index = bill_labels.index(selected_option) + selected_bill = bill_keys[selected_bill_index] + + # Display bill details when a bill is selected + if selected_bill: + # Extract the selected bill's data + state, bill_number = selected_bill.split("_", 1) + bill_mask = (filtered_df["state"] == state) & (filtered_df["bill_number"] == bill_number) + bill_df = filtered_df[bill_mask].copy() + + if not bill_df.empty: + bill_data = bill_df.iloc[0].to_dict() + st.markdown("---") + st.markdown("#### 📋 US Bill Details") + display_bill_details(bill_data, summaries_cache) + + # Show EU AI Act summary + st.markdown("---") + st.markdown("#### 🇪🇺 EU AI Act Overview") + st.info(""" + The EU AI Act (Regulation 2024/1689) is comprehensive legislation that regulates AI systems based on their risk level. + It establishes prohibited AI practices, high-risk AI systems requirements, transparency obligations, and governance structures. + The Act applies to providers and deployers of AI systems in the EU market. + """) + + st.markdown("---") + st.markdown("#### Example EU Comparison Questions:") + st.markdown(""" + • How does this US bill's definition of AI compare to the EU AI Act's definition? + • What are the key differences in risk assessment approaches? + • How do enforcement mechanisms compare between the two frameworks? + • Which framework has stricter requirements for high-risk AI systems? + • How do transparency and documentation requirements compare? + • What are the differences in prohibited AI practices? + • How do the two frameworks approach algorithmic impact assessments? + • What are the similarities and differences in governance structures? + • How do compliance timelines compare? + • Which framework provides better protection for fundamental rights? + """) + + eu_comparison_question = st.text_input( + "Ask a comparison question:", + key="toolkit_eu_comparison_input" + ) + + if st.button("Compare with EU AI Act", key="toolkit_eu_compare_button"): + if not selected_bill: + st.error("Please select a US bill first.") + elif not eu_comparison_question.strip(): + st.error("Please enter a comparison question.") + else: + # Get the selected bill data + state, bill_number = selected_bill.split("_", 1) + bill_mask = (filtered_df["state"] == state) & (filtered_df["bill_number"] == bill_number) + bill_df = filtered_df[bill_mask].copy() + + if not bill_df.empty: + bill_data = bill_df.iloc[0].to_dict() + + with st.spinner("Analyzing US bill and EU AI Act for comparison..."): + try: + answer = compare_bill_with_eu_ai_act( + bill_data, + eu_comparison_question + ) + + st.markdown("#### US vs EU AI Governance Comparison") + st.markdown(answer) + + except Exception as e: + st.error(f"Failed to generate EU comparison: {str(e)}") + logger.error(f"EU comparison error: {e}") + else: + st.error("Could not find the selected bill data.") + +# Cleanup function to remove temporary directories +def cleanup_temp_directories(): + """Clean up temporary directories created during the session.""" + if 'vectorstore_temp_dir' in st.session_state: + try: + temp_dir = st.session_state['vectorstore_temp_dir'] + if os.path.exists(temp_dir): + shutil.rmtree(temp_dir) + logger.info(f"Cleaned up temporary directory: {temp_dir}") + except Exception as e: + logger.warning(f"Error cleaning up temporary directory: {e}") + +# Register cleanup function to run when the app is closed +if st.session_state.get('_cleanup_registered', False) == False: + st.session_state['_cleanup_registered'] = True + # Note: Streamlit doesn't have a built-in cleanup mechanism, + # but the temp directories will be cleaned up by the OS eventually \ No newline at end of file