""" Streamlit front-end for the AI Litigation Tracker. This app: - Loads case-level summaries (and metadata) from data/summaries.csv - Lets users filter and explore AI-related litigation - Integrates optional RAG backends for case-specific and global Q&A """ import os from typing import Optional import calendar import pandas as pd import streamlit as st # ============================================================ # Config # ============================================================ APP_TITLE = "AI Litigation Tracker" CSV_PATH = os.path.join(os.path.dirname(__file__), "data", "summaries.csv") LOGO_DIR = os.path.join(os.path.dirname(__file__), "logos") LAWFARE_LOGO_PATH = os.path.join(LOGO_DIR, "lawfare_logo.png") VAILL_LOGO_PATH = os.path.join(LOGO_DIR, "vaill_logo.png") COURTLISTENER_LOGO_PATH = os.path.join(LOGO_DIR, "court_listener_logo.png") # Try to load RAG chains (optional backends) try: from rag.chains import case_specific_qa, global_qa, ping_backends chains_ok = True except Exception as e: # pragma: no cover - only hit when backends misconfigured chains_ok = False chains_import_error = e st.set_page_config( page_title=APP_TITLE, layout="wide", page_icon="⚖️", ) # ============================================================ # Data # ============================================================ @st.cache_data(show_spinner=False) def load_summaries() -> pd.DataFrame: """ Load the case summaries CSV and normalize columns/types. Expected base columns: - case_name - filing_date - docket_number - summary Optional metadata columns (if present) are parsed and kept for UI: - last_updated (legacy) - latest_update (canonical last activity date; YYYY-MM-DD recommended) - jurisdiction - court_id - courtlistener_url """ if not os.path.exists(CSV_PATH): raise FileNotFoundError(f"Missing summaries CSV at {CSV_PATH}") df = pd.read_csv(CSV_PATH) expected_cols = ["case_name", "filing_date", "docket_number", "summary"] missing = [c for c in expected_cols if c not in df.columns] if missing: raise ValueError(f"summaries.csv missing columns: {missing}") # Normalize core text columns to strings for c in expected_cols: df[c] = df[c].fillna("").astype(str) optional_cols = [ "last_updated", "latest_update", "jurisdiction", "court_id", "courtlistener_url", ] for c in optional_cols: if c in df.columns: df[c] = df[c].fillna("").astype(str) # Parse filing date for correct sorting/filtering df["filing_date_dt"] = pd.to_datetime(df["filing_date"], errors="coerce") # Parse latest_update into a datetime column if present # (this represents the best-guess "last activity" date for the case) if "latest_update" in df.columns: df["latest_update_dt"] = pd.to_datetime(df["latest_update"], errors="coerce") elif "last_updated" in df.columns: # Fallback for legacy naming: treat last_updated as latest_update df["latest_update_dt"] = pd.to_datetime(df["last_updated"], errors="coerce") return df def refresh_data() -> None: """Clear the cached summaries so the next load() call re-reads from disk.""" load_summaries.clear() def pretty_date(dt: pd.Timestamp) -> str: """ Format a Timestamp as 'Month D, YYYY' (e.g. 'August 8, 2025'). Returns 'N/A' for NaT values. """ if pd.isna(dt): return "N/A" # Month name + non-padded day, e.g. "August 8, 2025" return f"{dt.strftime('%B')} {dt.day}, {dt.year}" # ============================================================ # Chat helpers # ============================================================ def ensure_chat_state(key: str) -> None: """Initialize a session_state list for a given chat key if missing.""" if key not in st.session_state: st.session_state[key] = [] # list of {role, content} def replay_chat(key: str) -> None: """Replay all messages for a given chat key into the Streamlit chat UI.""" for msg in st.session_state.get(key, []): with st.chat_message(msg["role"]): st.write(msg["content"]) def add_message(key: str, role: str, content: str) -> None: """Append a new message to the stored chat transcript.""" st.session_state[key].append({"role": role, "content": content}) # ============================================================ # CSS (Lawfare-centered color scheme) # ============================================================ GLOBAL_CSS = """ """ st.markdown(GLOBAL_CSS, unsafe_allow_html=True) # ============================================================ # Table renderer (full summary, scrollable) # ============================================================ def render_cases_table(df: pd.DataFrame) -> None: """ Render an interactive Streamlit dataframe for the filtered cases. Includes: - Lawsuit name - Jurisdiction and court (if available) - Docket number - Filing date - Latest activity date (if available) - Short summary - CourtListener URL (if available) """ if df.empty: st.warning("No cases match the selected filters.") return display_df = df.copy() # Drop raw string date fields in favor of datetime columns used for display. for col in ["filing_date", "latest_update", "last_updated"]: if col in display_df.columns: display_df = display_df.drop(columns=[col]) # ---- Column mapping / ordering ---- column_mapping = { "case_name": "Lawsuit", "jurisdiction": "Jurisdiction", "court_id": "Court", "summary": "Summary", "docket_number": "Docket Number", "filing_date_dt": "Date Filed", "latest_update_dt": "Most Recent Activity", "courtlistener_url": "CourtListener URL", } # Only keep columns that actually exist in the dataframe base_cols = [ "case_name", "jurisdiction", "court_id", "docket_number", "filing_date_dt", "latest_update_dt", "summary", "courtlistener_url", ] existing_base_cols = [c for c in base_cols if c in display_df.columns] # Anything else in the df gets appended at the end extra_cols = [c for c in display_df.columns if c not in existing_base_cols] display_columns = existing_base_cols + extra_cols display_df = display_df[display_columns].copy() display_df = display_df.rename(columns={k: v for k, v in column_mapping.items() if k in display_df.columns}) # Streamlit's dataframe will turn HTTP URLs into clickable links automatically. st.dataframe( display_df, hide_index=True, width="stretch", column_config={ "Date Filed": st.column_config.DateColumn( "Date Filed", format="MMMM D, YYYY", ), "Most Recent Activity": st.column_config.DateColumn( "Most Recent Activity", format="MMMM D, YYYY", ), "Summary": st.column_config.TextColumn( "Summary", width="large", ), # Optional: make the link column a bit wider "CourtListener URL": st.column_config.TextColumn( "CourtListener URL", width="medium", ), }, ) # ---- CSV download (export the exact view the user sees) ---- csv = display_df.to_csv(index=False) st.download_button( label="Download Table as CSV", data=csv, file_name="ai_litigation_cases.csv", mime="text/csv", ) # ============================================================ # Load data # ============================================================ df: Optional[pd.DataFrame] = None try: df = load_summaries() except Exception as e: st.error(f"Unable to load summaries: {e}") # ============================================================ # Sidebar (dynamic filters like tracker) # ============================================================ with st.sidebar: st.markdown('', unsafe_allow_html=True) st.markdown("### Filter Controls") # Defaults in case df is empty sidebar_q = "" date_mask = None if df is not None and not df.empty: # Basic full-text search over case_name and docket_number sidebar_q = st.text_input( "Search", placeholder="case name or docket…", ) # ---------------- Date Range ---------------- st.markdown("#### Date Range") min_dt = df["filing_date_dt"].min() max_dt = df["filing_date_dt"].max() if pd.isna(min_dt) or pd.isna(max_dt): st.caption("No valid filing dates available for filtering.") else: years = sorted( df["filing_date_dt"].dropna().dt.year.unique().tolist() ) month_names = [ "January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December", ] date_filter_mode = st.radio( "Filter by:", options=["No Date Filter", "Year Only", "Year & Month"], index=0, ) date_filter_summary = "" date_filter_count = None if date_filter_mode == "Year Only": selected_years = st.multiselect( "Select Years:", options=years, default=years, ) if selected_years: year_series = df["filing_date_dt"].dt.year date_mask = year_series.isin(selected_years) date_filter_summary = ( "Filtering: " + ", ".join(str(y) for y in selected_years) ) date_filter_count = int(date_mask.sum()) else: # no years picked → no rows date_mask = df["filing_date_dt"].notna() & False date_filter_summary = "No years selected." date_filter_count = 0 elif date_filter_mode == "Year & Month": # ---- Row 1: From / To year ---- col_y1, col_y2 = st.columns(2) with col_y1: from_year = st.selectbox( "From Year:", options=years, index=0, ) with col_y2: to_year = st.selectbox( "To Year:", options=years, index=len(years) - 1, ) # ---- Row 2: From / To month ---- col_m1, col_m2 = st.columns(2) with col_m1: from_month_name = st.selectbox( "From Month:", options=month_names, index=0, ) with col_m2: to_month_name = st.selectbox( "To Month:", options=month_names, index=11, ) from_month = month_names.index(from_month_name) + 1 to_month = month_names.index(to_month_name) + 1 # Ensure end is not before start if (to_year, to_month) < (from_year, from_month): from_year, to_year = to_year, from_year from_month, to_month = to_month, from_month from_month_name, to_month_name = to_month_name, from_month_name # Build mask: year*100 + month allows a clean between() filter date_vals = ( df["filing_date_dt"].dt.year * 100 + df["filing_date_dt"].dt.month ) start_val = from_year * 100 + from_month end_val = to_year * 100 + to_month date_mask = date_vals.between(start_val, end_val) # Use abbreviated month names in the summary, like "Jan 2024 - Nov 2025" from_abbr = calendar.month_abbr[from_month] to_abbr = calendar.month_abbr[to_month] date_filter_summary = ( f"Filtering: {from_abbr} {from_year} - {to_abbr} {to_year}" ) date_filter_count = int(date_mask.sum()) if date_filter_mode != "No Date Filter": if date_filter_summary: st.success(date_filter_summary) if date_filter_count is not None: st.info(f"{date_filter_count} cases with dates in range") else: sidebar_q = "" date_mask = None # ---- Developer tools ----------------------------------- st.markdown("---") with st.expander("Developer tools (advanced)", expanded=False): st.markdown("#### Backend Status") if chains_ok: try: status = ping_backends() st.write(f"OpenAI: {'✅' if status.get('openai') else '⚠️'}") st.write(f"Pinecone: {'✅' if status.get('pinecone') else '⚠️'}") if status.get("index_name"): st.write(f"Index: `{status['index_name']}`") except Exception as e: st.warning(f"Health check error: {e}") else: st.error("RAG chains import failed.") st.exception(chains_import_error) # ============================================================ # Hero + description # ============================================================ st.markdown( """

Tracking and Analyzing AI-Related Litigation

Explore lawsuits involving artificial intelligence across U.S. courts. Use search and filters to browse cases, read summaries, and run AI-powered Q&A on individual matters or the full corpus.

""", unsafe_allow_html=True, ) st.markdown( """

What is the AI Litigation Tracker?

This tracker is a centralized, user-friendly platform for monitoring AI-related lawsuits across the United States. It helps users quickly see where and how AI issues are being litigated, understand the posture of each case, and compare patterns across jurisdictions.

""", unsafe_allow_html=True, ) # ============================================================ # Apply filters to build filtered_df # ============================================================ if df is None or df.empty: st.warning("No cases available yet.") st.stop() filtered_df = df.copy() # Text search if sidebar_q: q_low = sidebar_q.lower() filtered_df = filtered_df[ filtered_df["case_name"].str.lower().str.contains(q_low, na=False) | filtered_df["docket_number"].str.lower().str.contains(q_low, na=False) ] if date_mask is not None: filtered_df = filtered_df[date_mask] # Default sort: newest filing first if "filing_date_dt" in filtered_df.columns: filtered_df = filtered_df.sort_values( "filing_date_dt", ascending=False, na_position="last" ) # ============================================================ # Tabs (Cases Explorer, Case QA, Global QA) # ============================================================ tab1, tab2, tab3 = st.tabs(["Cases Explorer", "Case Q&A", "Global Q&A"]) # === TAB 1: Cases Explorer (table view) ====================== with tab1: st.markdown( """

Navigate and filter AI litigation using search and sidebar filters. View case details, summaries, filing dates, latest activity dates, and export your view as a CSV for further analysis.

""", unsafe_allow_html=True, ) # Overview metrics (three KPIs) st.markdown('
', unsafe_allow_html=True) st.markdown( '

Database Overview

' '

Current statistics for the filtered case set

', unsafe_allow_html=True, ) st.markdown('
', unsafe_allow_html=True) total_cases = len(filtered_df) # Distinct jurisdictions (fall back to court_id if jurisdiction is empty) if "jurisdiction" in filtered_df.columns: juris = ( filtered_df["jurisdiction"] .astype(str) .str.strip() .replace("", pd.NA) .dropna() ) else: juris = pd.Series([], dtype="object") # If jurisdiction is effectively empty, fall back to court_id if juris.empty and "court_id" in filtered_df.columns: juris = ( filtered_df["court_id"] .astype(str) .str.strip() .replace("", pd.NA) .dropna() ) num_jurisdictions = juris.nunique() # Most recent activity (preferred) or most recent filing as fallback recent_label = "Most Recent Case Activity" recent_value = "N/A" if ( "latest_update_dt" in filtered_df.columns and not filtered_df["latest_update_dt"].dropna().empty ): recent_value = pretty_date(filtered_df["latest_update_dt"].max()) elif ( "filing_date_dt" in filtered_df.columns and not filtered_df["filing_date_dt"].dropna().empty ): recent_label = "Most Recent Filing" recent_value = pretty_date(filtered_df["filing_date_dt"].max()) c1, c2, c3 = st.columns(3) with c1: st.markdown( f'

{total_cases}

' f'

Total Cases in View

', unsafe_allow_html=True, ) with c2: st.markdown( f'

{num_jurisdictions}

' f'

Jurisdictions in View

', unsafe_allow_html=True, ) with c3: st.markdown( f'

{recent_value}

' f'

{recent_label}

', unsafe_allow_html=True, ) st.markdown('
', unsafe_allow_html=True) # Table itself st.markdown('
', unsafe_allow_html=True) st.markdown( '

Litigation Database

' '

Comprehensive listing of AI-related lawsuits in the filtered view

', unsafe_allow_html=True, ) st.markdown('
', unsafe_allow_html=True) render_cases_table(filtered_df) st.markdown('
', unsafe_allow_html=True) # === TAB 2: Case Q&A ========================================= with tab2: st.markdown( """

Select a specific case to view its details and run AI-powered Q&A grounded in that case's documents and metadata.

""", unsafe_allow_html=True, ) st.markdown('
', unsafe_allow_html=True) st.markdown( '

Case Details & Q&A

' '

Choose a docket and ask focused questions about that case

', unsafe_allow_html=True, ) st.markdown('
', unsafe_allow_html=True) if df is None or df.empty: st.info("No cases available yet.") st.markdown('
', unsafe_allow_html=True) else: options = df.sort_values("case_name")[["docket_number", "case_name"]] selected_docket = st.selectbox( "Select a case", options=options["docket_number"], format_func=lambda d: options.loc[ options["docket_number"] == d, "case_name" ].iloc[0], placeholder="Choose a case", ) if selected_docket: row = df[df["docket_number"] == selected_docket].iloc[0] latest_update_dt = row.get("latest_update_dt", pd.NaT) latest_update_str = pretty_date(latest_update_dt) # Summary card with st.container(border=True): st.markdown( f"**Lawsuit:** {row['case_name']} \n" f"**Docket Number:** `{row['docket_number']}` \n" f"**Date Filed:** {row.get('filing_date', 'Unknown')}" ) # Optional metadata if present jurisdiction = row.get("jurisdiction", "") or None court_id = row.get("court_id", "") or None courtlistener_url = row.get("courtlistener_url", "") or None if jurisdiction: st.markdown(f"**Jurisdiction:** {jurisdiction}") if court_id: st.markdown(f"**Court:** `{court_id}`") if latest_update_str != "N/A": st.markdown(f"**Latest Activity:** {latest_update_str}") if courtlistener_url: st.markdown( f"[Open in CourtListener]({courtlistener_url})", unsafe_allow_html=False, ) st.markdown("**Summary**") st.write(row["summary"]) st.markdown("---") if not chains_ok: st.error( "RAG backends are not available yet. Check rag/chains.py & Pinecone." ) else: state_key = f"chat_case::{row['docket_number']}" ensure_chat_state(state_key) colA, _ = st.columns([1, 5]) with colA: if st.button("Clear chat", key="clear_case_chat"): st.session_state[state_key] = [] st.toast("Cleared case chat.") # Full-width caption directly under the button (not in a column) st.caption("Ask questions grounded **only in this case**.") replay_chat(state_key) prompt = st.chat_input(f"Ask about {row['case_name']}…") if prompt: add_message(state_key, "user", prompt) with st.chat_message("assistant"): try: ans = case_specific_qa( prompt, docket_number=row["docket_number"], case_name=row["case_name"], ) except Exception as e: ans = f"Error answering case-specific question: {e}" add_message(state_key, "assistant", ans) st.write(ans) st.markdown('', unsafe_allow_html=True) # === TAB 3: Global Q&A ======================================= with tab3: st.markdown( """

Ask questions across the full litigation corpus using RAG-based search over all tracked cases.

""", unsafe_allow_html=True, ) st.markdown('
', unsafe_allow_html=True) st.markdown( '

Global Q&A Across All Cases

' '

Explore broader patterns, themes, and trends in AI-related litigation

', unsafe_allow_html=True, ) st.markdown('
', unsafe_allow_html=True) if not chains_ok: st.error( "RAG backends are not available yet. Check rag/chains.py & Pinecone." ) else: state_key = "chat_global" ensure_chat_state(state_key) colA, _ = st.columns([1, 5]) with colA: if st.button("Clear chat", key="clear_global_chat"): st.session_state[state_key] = [] st.toast("Cleared global chat.") # Full-width caption directly under the button st.caption("Ask questions across the full litigation corpus (RAG).") replay_chat(state_key) prompt = st.chat_input("Ask a question across all cases…") if prompt: add_message(state_key, "user", prompt) with st.chat_message("assistant"): try: ans = global_qa(prompt, top_k=4) except Exception as e: ans = f"Error answering global question: {e}" add_message(state_key, "assistant", ans) st.write(ans) # Optional: show top retrieved hits if vectorstore is available try: from vectorstore.cases_vectorstore import query_global hits = query_global(prompt, top_k=4) if hits: st.markdown("**Top retrieved cases:**") for h in hits: st.markdown( f"- {h.get('case_name','?')} " f"({h.get('docket_number','?')} · score={h.get('score',0):.3f})" ) except Exception: # If vectorstore isn't available, we still return the LLM answer. pass st.markdown('
', unsafe_allow_html=True) # ============================================================ # Footer with credits # ============================================================ import base64 def encode_image(path: str) -> str: with open(path, "rb") as f: return base64.b64encode(f.read()).decode() st.markdown("---") if os.path.exists(COURTLISTENER_LOGO_PATH): img_b64 = encode_image(COURTLISTENER_LOGO_PATH) footer_html = f"""

© 2025 AI Litigation Tracker · Vanderbilt AI Law Lab × Lawfare

Court data provided by CourtListener, a project of Free Law Project.

""" st.markdown(footer_html, unsafe_allow_html=True)