Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -299,6 +299,9 @@
|
|
| 299 |
# summary = rag_summarize(collected, summarizer)
|
| 300 |
# st.success(summary)
|
| 301 |
|
|
|
|
|
|
|
|
|
|
| 302 |
import streamlit as st
|
| 303 |
import pandas as pd
|
| 304 |
import re
|
|
@@ -363,35 +366,7 @@ def extract_month_year(q):
|
|
| 363 |
yr = int(ym.group()) if ym else None
|
| 364 |
return mon, yr
|
| 365 |
|
| 366 |
-
# def extract_date_range(query):
|
| 367 |
-
# """
|
| 368 |
-
# Extracts a start and end month-year from a question like 'from Jan 2024 to May 2025'
|
| 369 |
-
# """
|
| 370 |
-
# month_map = {
|
| 371 |
-
# "january": 1, "february": 2, "march": 3, "april": 4, "may": 5, "june": 6,
|
| 372 |
-
# "july": 7, "august": 8, "september": 9, "october": 10, "november": 11, "december": 12
|
| 373 |
-
# }
|
| 374 |
-
# # pattern = r"(?i)from\s+([a-zA-Z]+)\s+(\d{4})\s+(to|until)\s+([a-zA-Z]+)\s+(\d{4})"
|
| 375 |
-
# pattern = [
|
| 376 |
-
# r"(?i)(from|between)?\s*([a-zA-Z]+)\s+(\d{4})\s*(to|through|and|-)\s*([a-zA-Z]+)\s+(\d{4})",
|
| 377 |
-
# ]
|
| 378 |
-
# match = re.search(pattern, query)
|
| 379 |
-
# if match:
|
| 380 |
-
# start_month_str, start_year = match.group(1).lower(), int(match.group(2))
|
| 381 |
-
# end_month_str, end_year = match.group(4).lower(), int(match.group(5))
|
| 382 |
-
# start_month = month_map.get(start_month_str)
|
| 383 |
-
# end_month = month_map.get(end_month_str)
|
| 384 |
-
# if start_month and end_month:
|
| 385 |
-
# start_date = datetime(start_year, start_month, 1)
|
| 386 |
-
# end_date = datetime(end_year, end_month, 28)
|
| 387 |
-
# return start_date, end_date
|
| 388 |
-
# return None, None
|
| 389 |
-
|
| 390 |
def extract_date_range(query):
|
| 391 |
-
"""
|
| 392 |
-
Extracts a start and end month-year from a question like 'from Jan 2024 to May 2025'
|
| 393 |
-
Supports patterns like 'from', 'between', 'to', 'through', 'and', '-'.
|
| 394 |
-
"""
|
| 395 |
month_map = {
|
| 396 |
"january": 1, "february": 2, "march": 3, "april": 4, "may": 5, "june": 6,
|
| 397 |
"july": 7, "august": 8, "september": 9, "october": 10, "november": 11, "december": 12
|
|
@@ -412,7 +387,7 @@ def extract_date_range(query):
|
|
| 412 |
|
| 413 |
if start_month and end_month:
|
| 414 |
start_date = datetime(start_year, start_month, 1)
|
| 415 |
-
end_date = datetime(end_year, end_month, 28)
|
| 416 |
return start_date, end_date
|
| 417 |
|
| 418 |
return None, None
|
|
@@ -458,7 +433,8 @@ if query:
|
|
| 458 |
if df2.empty:
|
| 459 |
st.warning("No matching records found.")
|
| 460 |
else:
|
| 461 |
-
|
|
|
|
| 462 |
embs = compute_embeddings(texts, _model=embed_model)
|
| 463 |
res = semantic_search(query, embs, embed_model, threshold=0.5)
|
| 464 |
|
|
@@ -489,23 +465,21 @@ if query:
|
|
| 489 |
|
| 490 |
st.markdown(f"**Date:** {date} | **Bill Number:** {bill_number} | **Score:** {score:.2f}")
|
| 491 |
st.markdown(f"**Category:** {cat}")
|
| 492 |
-
# st.markdown(f"**Category Std:** {cat_std}")
|
| 493 |
st.markdown(f"**Intended Beneficiaries:** {bene}")
|
| 494 |
-
# st.markdown(f"**Intended Beneficiaries STD:** {bene_std}")
|
| 495 |
st.markdown(f"**Goal:** {goal}")
|
| 496 |
st.markdown(f"**Intent:** {intent} | **Stance:** {stance}")
|
| 497 |
st.markdown(f"**Policy Impact Area:** {impact}")
|
| 498 |
st.markdown(f"**Key Provision:** {provision}")
|
| 499 |
st.markdown(f"**Description:** {description}")
|
| 500 |
-
# st.markdown(f"**Summary:** {summary}")
|
| 501 |
st.markdown(f"**Trend Summary:** {trend}")
|
| 502 |
st.markdown(f"**Actionable Insight:** {insight}")
|
| 503 |
st.markdown(f"[View Full Bill Text]({full_url})\n")
|
| 504 |
st.divider()
|
| 505 |
|
| 506 |
-
collected.append(row['summary_insight'])
|
| 507 |
|
| 508 |
st.subheader("RAG-Generated Overall Summary")
|
| 509 |
summary = rag_summarize(collected, summarizer)
|
| 510 |
st.success(summary)
|
| 511 |
|
|
|
|
|
|
| 299 |
# summary = rag_summarize(collected, summarizer)
|
| 300 |
# st.success(summary)
|
| 301 |
|
| 302 |
+
#
|
| 303 |
+
|
| 304 |
+
# including description
|
| 305 |
import streamlit as st
|
| 306 |
import pandas as pd
|
| 307 |
import re
|
|
|
|
| 366 |
yr = int(ym.group()) if ym else None
|
| 367 |
return mon, yr
|
| 368 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
def extract_date_range(query):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
month_map = {
|
| 371 |
"january": 1, "february": 2, "march": 3, "april": 4, "may": 5, "june": 6,
|
| 372 |
"july": 7, "august": 8, "september": 9, "october": 10, "november": 11, "december": 12
|
|
|
|
| 387 |
|
| 388 |
if start_month and end_month:
|
| 389 |
start_date = datetime(start_year, start_month, 1)
|
| 390 |
+
end_date = datetime(end_year, end_month, 28)
|
| 391 |
return start_date, end_date
|
| 392 |
|
| 393 |
return None, None
|
|
|
|
| 433 |
if df2.empty:
|
| 434 |
st.warning("No matching records found.")
|
| 435 |
else:
|
| 436 |
+
# Include description in embeddings + RAG
|
| 437 |
+
texts = (df2['description'].fillna('') + "\n" + df2['summary_insight'].fillna('')).tolist()
|
| 438 |
embs = compute_embeddings(texts, _model=embed_model)
|
| 439 |
res = semantic_search(query, embs, embed_model, threshold=0.5)
|
| 440 |
|
|
|
|
| 465 |
|
| 466 |
st.markdown(f"**Date:** {date} | **Bill Number:** {bill_number} | **Score:** {score:.2f}")
|
| 467 |
st.markdown(f"**Category:** {cat}")
|
|
|
|
| 468 |
st.markdown(f"**Intended Beneficiaries:** {bene}")
|
|
|
|
| 469 |
st.markdown(f"**Goal:** {goal}")
|
| 470 |
st.markdown(f"**Intent:** {intent} | **Stance:** {stance}")
|
| 471 |
st.markdown(f"**Policy Impact Area:** {impact}")
|
| 472 |
st.markdown(f"**Key Provision:** {provision}")
|
| 473 |
st.markdown(f"**Description:** {description}")
|
|
|
|
| 474 |
st.markdown(f"**Trend Summary:** {trend}")
|
| 475 |
st.markdown(f"**Actionable Insight:** {insight}")
|
| 476 |
st.markdown(f"[View Full Bill Text]({full_url})\n")
|
| 477 |
st.divider()
|
| 478 |
|
| 479 |
+
collected.append(description + "\n" + row['summary_insight'])
|
| 480 |
|
| 481 |
st.subheader("RAG-Generated Overall Summary")
|
| 482 |
summary = rag_summarize(collected, summarizer)
|
| 483 |
st.success(summary)
|
| 484 |
|
| 485 |
+
|