tjl8 commited on
Commit
122af9c
·
verified ·
1 Parent(s): 346457d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -34
app.py CHANGED
@@ -299,6 +299,9 @@
299
  # summary = rag_summarize(collected, summarizer)
300
  # st.success(summary)
301
 
 
 
 
302
  import streamlit as st
303
  import pandas as pd
304
  import re
@@ -363,35 +366,7 @@ def extract_month_year(q):
363
  yr = int(ym.group()) if ym else None
364
  return mon, yr
365
 
366
- # def extract_date_range(query):
367
- # """
368
- # Extracts a start and end month-year from a question like 'from Jan 2024 to May 2025'
369
- # """
370
- # month_map = {
371
- # "january": 1, "february": 2, "march": 3, "april": 4, "may": 5, "june": 6,
372
- # "july": 7, "august": 8, "september": 9, "october": 10, "november": 11, "december": 12
373
- # }
374
- # # pattern = r"(?i)from\s+([a-zA-Z]+)\s+(\d{4})\s+(to|until)\s+([a-zA-Z]+)\s+(\d{4})"
375
- # pattern = [
376
- # r"(?i)(from|between)?\s*([a-zA-Z]+)\s+(\d{4})\s*(to|through|and|-)\s*([a-zA-Z]+)\s+(\d{4})",
377
- # ]
378
- # match = re.search(pattern, query)
379
- # if match:
380
- # start_month_str, start_year = match.group(1).lower(), int(match.group(2))
381
- # end_month_str, end_year = match.group(4).lower(), int(match.group(5))
382
- # start_month = month_map.get(start_month_str)
383
- # end_month = month_map.get(end_month_str)
384
- # if start_month and end_month:
385
- # start_date = datetime(start_year, start_month, 1)
386
- # end_date = datetime(end_year, end_month, 28)
387
- # return start_date, end_date
388
- # return None, None
389
-
390
  def extract_date_range(query):
391
- """
392
- Extracts a start and end month-year from a question like 'from Jan 2024 to May 2025'
393
- Supports patterns like 'from', 'between', 'to', 'through', 'and', '-'.
394
- """
395
  month_map = {
396
  "january": 1, "february": 2, "march": 3, "april": 4, "may": 5, "june": 6,
397
  "july": 7, "august": 8, "september": 9, "october": 10, "november": 11, "december": 12
@@ -412,7 +387,7 @@ def extract_date_range(query):
412
 
413
  if start_month and end_month:
414
  start_date = datetime(start_year, start_month, 1)
415
- end_date = datetime(end_year, end_month, 28) # safe default
416
  return start_date, end_date
417
 
418
  return None, None
@@ -458,7 +433,8 @@ if query:
458
  if df2.empty:
459
  st.warning("No matching records found.")
460
  else:
461
- texts = df2['summary_insight'].tolist()
 
462
  embs = compute_embeddings(texts, _model=embed_model)
463
  res = semantic_search(query, embs, embed_model, threshold=0.5)
464
 
@@ -489,23 +465,21 @@ if query:
489
 
490
  st.markdown(f"**Date:** {date} | **Bill Number:** {bill_number} | **Score:** {score:.2f}")
491
  st.markdown(f"**Category:** {cat}")
492
- # st.markdown(f"**Category Std:** {cat_std}")
493
  st.markdown(f"**Intended Beneficiaries:** {bene}")
494
- # st.markdown(f"**Intended Beneficiaries STD:** {bene_std}")
495
  st.markdown(f"**Goal:** {goal}")
496
  st.markdown(f"**Intent:** {intent} | **Stance:** {stance}")
497
  st.markdown(f"**Policy Impact Area:** {impact}")
498
  st.markdown(f"**Key Provision:** {provision}")
499
  st.markdown(f"**Description:** {description}")
500
- # st.markdown(f"**Summary:** {summary}")
501
  st.markdown(f"**Trend Summary:** {trend}")
502
  st.markdown(f"**Actionable Insight:** {insight}")
503
  st.markdown(f"[View Full Bill Text]({full_url})\n")
504
  st.divider()
505
 
506
- collected.append(row['summary_insight'])
507
 
508
  st.subheader("RAG-Generated Overall Summary")
509
  summary = rag_summarize(collected, summarizer)
510
  st.success(summary)
511
 
 
 
299
  # summary = rag_summarize(collected, summarizer)
300
  # st.success(summary)
301
 
302
+ #
303
+
304
+ # including description
305
  import streamlit as st
306
  import pandas as pd
307
  import re
 
366
  yr = int(ym.group()) if ym else None
367
  return mon, yr
368
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
369
  def extract_date_range(query):
 
 
 
 
370
  month_map = {
371
  "january": 1, "february": 2, "march": 3, "april": 4, "may": 5, "june": 6,
372
  "july": 7, "august": 8, "september": 9, "october": 10, "november": 11, "december": 12
 
387
 
388
  if start_month and end_month:
389
  start_date = datetime(start_year, start_month, 1)
390
+ end_date = datetime(end_year, end_month, 28)
391
  return start_date, end_date
392
 
393
  return None, None
 
433
  if df2.empty:
434
  st.warning("No matching records found.")
435
  else:
436
+ # Include description in embeddings + RAG
437
+ texts = (df2['description'].fillna('') + "\n" + df2['summary_insight'].fillna('')).tolist()
438
  embs = compute_embeddings(texts, _model=embed_model)
439
  res = semantic_search(query, embs, embed_model, threshold=0.5)
440
 
 
465
 
466
  st.markdown(f"**Date:** {date} | **Bill Number:** {bill_number} | **Score:** {score:.2f}")
467
  st.markdown(f"**Category:** {cat}")
 
468
  st.markdown(f"**Intended Beneficiaries:** {bene}")
 
469
  st.markdown(f"**Goal:** {goal}")
470
  st.markdown(f"**Intent:** {intent} | **Stance:** {stance}")
471
  st.markdown(f"**Policy Impact Area:** {impact}")
472
  st.markdown(f"**Key Provision:** {provision}")
473
  st.markdown(f"**Description:** {description}")
 
474
  st.markdown(f"**Trend Summary:** {trend}")
475
  st.markdown(f"**Actionable Insight:** {insight}")
476
  st.markdown(f"[View Full Bill Text]({full_url})\n")
477
  st.divider()
478
 
479
+ collected.append(description + "\n" + row['summary_insight'])
480
 
481
  st.subheader("RAG-Generated Overall Summary")
482
  summary = rag_summarize(collected, summarizer)
483
  st.success(summary)
484
 
485
+