Spaces:

Legislation
/

RAG

Runtime error

App Files Files Community

tjl8 commited on Jul 8, 2025

Commit

7c62b1c

verified ·

1 Parent(s): 34fa400

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -34

app.py CHANGED Viewed

@@ -7,7 +7,7 @@ from sklearn.metrics.pairwise import cosine_similarity
 from sklearn.feature_extraction.text import TfidfVectorizer
 from datetime import datetime
-# Load dataset
 @st.cache_data
 def load_data():
     df = pd.read_csv("Illinois_Entire_Data_Insights_Final_v2.csv")
@@ -50,57 +50,74 @@ def rag_summarize(group_texts, summarizer, top_k=5):
     result = summarizer(prompt, max_length=60, min_length=30, do_sample=False)
     return result[0]['summary_text']
-# Streamlit UI
-st.set_page_config(page_title="Illinois Trends Q&A", layout="wide")
-st.title("📊 Illinois Trends Explorer")
-# Load data & models
 df = load_data()
 embed_model, summarizer = load_models()
-# Sidebar filters
-st.sidebar.header("📅 Filter Options")
-years = sorted(df['status_date'].dt.year.unique(), reverse=True)
-months = list(range(1, 13))
-month_names = {i: datetime(2000, i, 1).strftime('%B') for i in months}
-selected_year = st.sidebar.selectbox("Select Year", years)
-selected_month = st.sidebar.selectbox("Select Month (optional)", [None] + months, format_func=lambda x: "All" if x is None else month_names[x])
-selected_category = st.sidebar.selectbox("Select Category (optional)", ["All"] + sorted(df['category_&_subcategory_standardized'].dropna().unique()))
-# Filter data
-df_filtered = df[df['status_date'].dt.year == selected_year]
-if selected_month:
-    df_filtered = df_filtered[df_filtered['status_date'].dt.month == selected_month]
-if selected_category != "All":
-    df_filtered = df_filtered[df_filtered['category_&_subcategory_standardized'] == selected_category]
-# Summary statistics
-st.markdown(f"### 📈 Top Categories in {month_names.get(selected_month, 'All Months')} {selected_year}")
-top_cats = df_filtered['category_&_subcategory_standardized'].value_counts().head(5)
-st.bar_chart(top_cats)
-# Question input
-query = st.text_input("🔍 Ask your question about trends:")
-if query:
     if df_filtered.empty:
-        st.warning("No data available for this filter.")
     else:
-        embeddings = compute_embeddings(df_filtered["summary_insight"].tolist(), _model=embed_model)
         results = semantic_search(query, embeddings, embed_model, threshold=0.4)
         if not results:
-            st.warning("No relevant insights found.")
         else:
             top_texts = []
-            st.subheader("🔎 Top Matching Insights:")
             for idx, score in sorted(results, key=lambda x: x[1], reverse=True)[:5]:
                 row = df_filtered.iloc[idx]
-                st.markdown(f"**📅 Date:** {row['status_date'].date()} | **🔢 Score:** {score:.2f}")
-                st.markdown(f"**📌 Title:** {row['title']}")
-                st.markdown(f"**🏷️ Category:** {row['category_&_subcategory_standardized']} | **🎯 Goal:** {row['legislative_goal_standardized']}")
-                st.markdown(f"**🧭 Intent:** {row['intent_standardized']} | **���️ Stance:** {row['stance_standardized']}")
                 st.markdown(f"```{row['summary_insight'][:500]}```")
                 top_texts.append(row['summary_insight'])

 from sklearn.feature_extraction.text import TfidfVectorizer
 from datetime import datetime
+# Load data
 @st.cache_data
 def load_data():
     df = pd.read_csv("Illinois_Entire_Data_Insights_Final_v2.csv")
     result = summarizer(prompt, max_length=60, min_length=30, do_sample=False)
     return result[0]['summary_text']
+# Extract month/year from query
+def extract_month_year(query):
+    month_map = {
+        "january": 1, "february": 2, "march": 3, "april": 4,
+        "may": 5, "june": 6, "july": 7, "august": 8,
+        "september": 9, "october": 10, "november": 11, "december": 12
+    }
+    query_lower = query.lower()
+    month = next((month_map[m] for m in month_map if m in query_lower), None)
+    year_match = re.search(r"(19|20)\d{2}", query)
+    year = int(year_match.group()) if year_match else None
+    return month, year
+# Extract category from query using simple keyword match
+def extract_category_from_query(query, categories):
+    query = query.lower()
+    for cat in categories:
+        if pd.isna(cat):
+            continue
+        if any(word in query for word in cat.lower().split()):
+            return cat
+    return None
+# UI
+st.set_page_config(page_title="Illinois Legislative Q&A", layout="wide")
+st.title("📚 Illinois Legislative Trends Q&A")
 df = load_data()
 embed_model, summarizer = load_models()
+query = st.text_input("🔍 Ask a question about a topic, category, or time period (e.g., education in May 2024)")
+if query:
+    month, year = extract_month_year(query)
+    all_categories = df['category_&_subcategory_standardized'].unique()
+    detected_category = extract_category_from_query(query, all_categories)
+    df_filtered = df.copy()
+    if detected_category:
+        df_filtered = df_filtered[df_filtered['category_&_subcategory_standardized'] == detected_category]
+        st.info(f"Filtering by category: **{detected_category}**")
+    if year:
+        df_filtered = df_filtered[df_filtered['status_date'].dt.year == year]
+        if month:
+            df_filtered = df_filtered[df_filtered['status_date'].dt.month == month]
+            st.info(f"Filtering by time: **{datetime(year, month, 1).strftime('%B %Y')}**")
+        else:
+            st.info(f"Filtering by year: **{year}**")
     if df_filtered.empty:
+        st.warning("No matching records found for your query.")
     else:
+        embeddings = compute_embeddings(df_filtered['summary_insight'].tolist(), _model=embed_model)
         results = semantic_search(query, embeddings, embed_model, threshold=0.4)
         if not results:
+            st.warning("No relevant summaries found.")
         else:
+            st.subheader("📘 Top Matching Insights:")
             top_texts = []
             for idx, score in sorted(results, key=lambda x: x[1], reverse=True)[:5]:
                 row = df_filtered.iloc[idx]
+                st.markdown(f"**🗓️ Date:** {row['status_date'].date()} | **Score:** {score:.2f}")
+                st.markdown(f"**Title:** {row['title']}")
+                st.markdown(f"**Category:** {row['category_&_subcategory_standardized']} | **Goal:** {row['legislative_goal_standardized']}")
+                st.markdown(f"**Intent:** {row['intent_standardized']} | **Stance:** {row['stance_standardized']}")
                 st.markdown(f"```{row['summary_insight'][:500]}```")
                 top_texts.append(row['summary_insight'])