Spaces:

Legislation
/

RAG

Runtime error

App Files Files Community

tjl8 commited on Jul 17, 2025

Commit

122af9c

verified ·

1 Parent(s): 346457d

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -34

app.py CHANGED Viewed

@@ -299,6 +299,9 @@
 #             summary = rag_summarize(collected, summarizer)
 #             st.success(summary)
 import streamlit as st
 import pandas as pd
 import re
@@ -363,35 +366,7 @@ def extract_month_year(q):
     yr = int(ym.group()) if ym else None
     return mon, yr
-# def extract_date_range(query):
-#     """
-#     Extracts a start and end month-year from a question like 'from Jan 2024 to May 2025'
-#     """
-#     month_map = {
-#         "january": 1, "february": 2, "march": 3, "april": 4, "may": 5, "june": 6,
-#         "july": 7, "august": 8, "september": 9, "october": 10, "november": 11, "december": 12
-#     }
-#     # pattern = r"(?i)from\s+([a-zA-Z]+)\s+(\d{4})\s+(to|until)\s+([a-zA-Z]+)\s+(\d{4})"
-#     pattern = [
-#         r"(?i)(from|between)?\s*([a-zA-Z]+)\s+(\d{4})\s*(to|through|and|-)\s*([a-zA-Z]+)\s+(\d{4})",
-#     ]
-#     match = re.search(pattern, query)
-#     if match:
-#         start_month_str, start_year = match.group(1).lower(), int(match.group(2))
-#         end_month_str, end_year = match.group(4).lower(), int(match.group(5))
-#         start_month = month_map.get(start_month_str)
-#         end_month = month_map.get(end_month_str)
-#         if start_month and end_month:
-#             start_date = datetime(start_year, start_month, 1)
-#             end_date = datetime(end_year, end_month, 28)
-#             return start_date, end_date
-#     return None, None
 def extract_date_range(query):
-    """
-    Extracts a start and end month-year from a question like 'from Jan 2024 to May 2025'
-    Supports patterns like 'from', 'between', 'to', 'through', 'and', '-'.
-    """
     month_map = {
         "january": 1, "february": 2, "march": 3, "april": 4, "may": 5, "june": 6,
         "july": 7, "august": 8, "september": 9, "october": 10, "november": 11, "december": 12
@@ -412,7 +387,7 @@ def extract_date_range(query):
             if start_month and end_month:
                 start_date = datetime(start_year, start_month, 1)
-                end_date = datetime(end_year, end_month, 28)  # safe default
                 return start_date, end_date
     return None, None
@@ -458,7 +433,8 @@ if query:
     if df2.empty:
         st.warning("No matching records found.")
     else:
-        texts = df2['summary_insight'].tolist()
         embs = compute_embeddings(texts, _model=embed_model)
         res = semantic_search(query, embs, embed_model, threshold=0.5)
@@ -489,23 +465,21 @@ if query:
                 st.markdown(f"**Date:** {date} | **Bill Number:** {bill_number} | **Score:** {score:.2f}")
                 st.markdown(f"**Category:** {cat}")
-                # st.markdown(f"**Category Std:** {cat_std}")
                 st.markdown(f"**Intended Beneficiaries:** {bene}")
-                # st.markdown(f"**Intended Beneficiaries STD:** {bene_std}")
                 st.markdown(f"**Goal:** {goal}")
                 st.markdown(f"**Intent:** {intent} | **Stance:** {stance}")
                 st.markdown(f"**Policy Impact Area:** {impact}")
                 st.markdown(f"**Key Provision:** {provision}")
                 st.markdown(f"**Description:** {description}")
-                # st.markdown(f"**Summary:** {summary}")
                 st.markdown(f"**Trend Summary:** {trend}")
                 st.markdown(f"**Actionable Insight:** {insight}")
                 st.markdown(f"[View Full Bill Text]({full_url})\n")
                 st.divider()
-                collected.append(row['summary_insight'])
             st.subheader("RAG-Generated Overall Summary")
             summary = rag_summarize(collected, summarizer)
             st.success(summary)

 #             summary = rag_summarize(collected, summarizer)
 #             st.success(summary)
+#
+# including description
 import streamlit as st
 import pandas as pd
 import re
     yr = int(ym.group()) if ym else None
     return mon, yr
 def extract_date_range(query):
     month_map = {
         "january": 1, "february": 2, "march": 3, "april": 4, "may": 5, "june": 6,
         "july": 7, "august": 8, "september": 9, "october": 10, "november": 11, "december": 12
             if start_month and end_month:
                 start_date = datetime(start_year, start_month, 1)
+                end_date = datetime(end_year, end_month, 28)
                 return start_date, end_date
     return None, None
     if df2.empty:
         st.warning("No matching records found.")
     else:
+        # Include description in embeddings + RAG
+        texts = (df2['description'].fillna('') + "\n" + df2['summary_insight'].fillna('')).tolist()
         embs = compute_embeddings(texts, _model=embed_model)
         res = semantic_search(query, embs, embed_model, threshold=0.5)
                 st.markdown(f"**Date:** {date} | **Bill Number:** {bill_number} | **Score:** {score:.2f}")
                 st.markdown(f"**Category:** {cat}")
                 st.markdown(f"**Intended Beneficiaries:** {bene}")
                 st.markdown(f"**Goal:** {goal}")
                 st.markdown(f"**Intent:** {intent} | **Stance:** {stance}")
                 st.markdown(f"**Policy Impact Area:** {impact}")
                 st.markdown(f"**Key Provision:** {provision}")
                 st.markdown(f"**Description:** {description}")
                 st.markdown(f"**Trend Summary:** {trend}")
                 st.markdown(f"**Actionable Insight:** {insight}")
                 st.markdown(f"[View Full Bill Text]({full_url})\n")
                 st.divider()
+                collected.append(description + "\n" + row['summary_insight'])
             st.subheader("RAG-Generated Overall Summary")
             summary = rag_summarize(collected, summarizer)
             st.success(summary)