# app.py import streamlit as st from transformers import pipeline import spacy from collections import Counter import matplotlib.pyplot as plt # Load AI models ONCE at startup to cache them and avoid reloading on every interaction @st.cache_resource def load_summarizer(): """Load the text summarization model""" return pipeline("summarization", model="facebook/bart-large-cnn") @st.cache_resource def load_ner_model(): """Load the Named Entity Recognition model""" return spacy.load("en_core_web_sm") # Initialize the models summarizer = load_summarizer() nlp = load_ner_model() def summarize_text(text): """Function to summarize long text""" # Limit input text to avoid model limits input_text = text[:2000] summary = summarizer(input_text, max_length=150, min_length=30, do_sample=False) return summary[0]['summary_text'] def extract_entities(text): """Function to find people, orgs, money, and laws""" doc = nlp(text) entities = [] for ent in doc.ents: # Filter for only the entity types we care about if ent.label_ in ['PERSON', 'ORG', 'GPE', 'MONEY', 'LAW']: entities.append((ent.text, ent.label_)) return entities # app.py (continued) # Configure the page st.set_page_config(page_title="Policy Lens", page_icon="📜", layout="wide") st.title("📜 Policy Lens") st.markdown("**AI-Powered Legislative Analysis** - Paste a bill or policy below to get a plain language summary and key insights.") # Input section input_text = st.text_area("Paste Legislative Text Here:", height=250, placeholder="Paste the text of a bill, policy, or news article here...") if st.button("Analyze", type="primary") and input_text: with st.spinner("Analyzing text with AI..."): # Create a layout with columns col1, col2 = st.columns(2) with col1: st.header("📋 Summary") summary = summarize_text(input_text) st.success(summary) with col2: st.header("🧠 Key Entities") entities = extract_entities(input_text) # Categorize the entities people = [text for text, label in entities if label == 'PERSON'] organizations = [text for text, label in entities if label == 'ORG'] money = [text for text, label in entities if label == 'MONEY'] locations = [text for text, label in entities if label == 'GPE'] # Display the entities in an organized way if people: st.write("**People:**", ", ".join(set(people))) # Use set() to remove duplicates if organizations: st.write("**Organizations:**", ", ".join(set(organizations))) if money: st.write("**Financials:**", ", ".join(set(money))) if locations: st.write("**Locations:**", ", ".join(set(locations))) # Visualization section (optional but impressive) st.header("📊 Entity Frequency") if entities: # Count the most common entities entity_counts = Counter([label for text, label in entities]) # Create a simple bar chart fig, ax = plt.subplots() ax.bar(entity_counts.keys(), entity_counts.values()) ax.set_ylabel('Frequency') ax.set_title('Most Common Entity Types') plt.xticks(rotation=45) st.pyplot(fig) else: st.info("No significant entities found to display.") else: st.info("👆 Please paste some text to analyze. For demo purposes, you can find text on sites like congress.gov") # Add a footer st.markdown("---") st.caption("Policy Lens uses Facebook's BART model for summarization and spaCy for entity recognition.")