Spaces:
Sleeping
Sleeping
File size: 3,873 Bytes
9451045 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# app.py
import streamlit as st
from transformers import pipeline
import spacy
from collections import Counter
import matplotlib.pyplot as plt
# Load AI models ONCE at startup to cache them and avoid reloading on every interaction
@st.cache_resource
def load_summarizer():
"""Load the text summarization model"""
return pipeline("summarization", model="facebook/bart-large-cnn")
@st.cache_resource
def load_ner_model():
"""Load the Named Entity Recognition model"""
return spacy.load("en_core_web_sm")
# Initialize the models
summarizer = load_summarizer()
nlp = load_ner_model()
def summarize_text(text):
"""Function to summarize long text"""
# Limit input text to avoid model limits
input_text = text[:2000]
summary = summarizer(input_text, max_length=150, min_length=30, do_sample=False)
return summary[0]['summary_text']
def extract_entities(text):
"""Function to find people, orgs, money, and laws"""
doc = nlp(text)
entities = []
for ent in doc.ents:
# Filter for only the entity types we care about
if ent.label_ in ['PERSON', 'ORG', 'GPE', 'MONEY', 'LAW']:
entities.append((ent.text, ent.label_))
return entities
# app.py (continued)
# Configure the page
st.set_page_config(page_title="Policy Lens", page_icon="π", layout="wide")
st.title("π Policy Lens")
st.markdown("**AI-Powered Legislative Analysis** - Paste a bill or policy below to get a plain language summary and key insights.")
# Input section
input_text = st.text_area("Paste Legislative Text Here:", height=250, placeholder="Paste the text of a bill, policy, or news article here...")
if st.button("Analyze", type="primary") and input_text:
with st.spinner("Analyzing text with AI..."):
# Create a layout with columns
col1, col2 = st.columns(2)
with col1:
st.header("π Summary")
summary = summarize_text(input_text)
st.success(summary)
with col2:
st.header("π§ Key Entities")
entities = extract_entities(input_text)
# Categorize the entities
people = [text for text, label in entities if label == 'PERSON']
organizations = [text for text, label in entities if label == 'ORG']
money = [text for text, label in entities if label == 'MONEY']
locations = [text for text, label in entities if label == 'GPE']
# Display the entities in an organized way
if people:
st.write("**People:**", ", ".join(set(people))) # Use set() to remove duplicates
if organizations:
st.write("**Organizations:**", ", ".join(set(organizations)))
if money:
st.write("**Financials:**", ", ".join(set(money)))
if locations:
st.write("**Locations:**", ", ".join(set(locations)))
# Visualization section (optional but impressive)
st.header("π Entity Frequency")
if entities:
# Count the most common entities
entity_counts = Counter([label for text, label in entities])
# Create a simple bar chart
fig, ax = plt.subplots()
ax.bar(entity_counts.keys(), entity_counts.values())
ax.set_ylabel('Frequency')
ax.set_title('Most Common Entity Types')
plt.xticks(rotation=45)
st.pyplot(fig)
else:
st.info("No significant entities found to display.")
else:
st.info("π Please paste some text to analyze. For demo purposes, you can find text on sites like congress.gov")
# Add a footer
st.markdown("---")
st.caption("Policy Lens uses Facebook's BART model for summarization and spaCy for entity recognition.") |