nivakaran commited on
Commit
b4c4175
·
verified ·
1 Parent(s): 4134ab0

Upload folder using huggingface_hub

Browse files
src/graphs/RogerGraph.py CHANGED
@@ -51,7 +51,10 @@ class CombinedAgentGraphBuilder:
51
  workflow.add_node("EconomicalAgent", economical_builder.build_graph())
52
  workflow.add_node("PoliticalAgent", political_builder.build_graph())
53
  workflow.add_node("MeteorologicalAgent", meteorological_builder.build_graph())
54
- workflow.add_node("DataRetrievalAgent", data_retrieval_builder.build_data_retrieval_agent_graph())
 
 
 
55
 
56
  workflow.add_edge(START, "GraphInitiator")
57
 
 
51
  workflow.add_node("EconomicalAgent", economical_builder.build_graph())
52
  workflow.add_node("PoliticalAgent", political_builder.build_graph())
53
  workflow.add_node("MeteorologicalAgent", meteorological_builder.build_graph())
54
+ workflow.add_node(
55
+ "DataRetrievalAgent",
56
+ data_retrieval_builder.build_data_retrieval_agent_graph(),
57
+ )
58
 
59
  workflow.add_edge(START, "GraphInitiator")
60
 
src/graphs/combinedAgentGraph.py CHANGED
@@ -15,6 +15,7 @@ from src.nodes.combinedAgentNode import CombinedAgentNode
15
 
16
  try:
17
  from src.config.langsmith_config import LangSmithConfig
 
18
  _langsmith = LangSmithConfig()
19
  _langsmith.configure()
20
  except ImportError:
@@ -50,7 +51,9 @@ class CombinedAgentGraphBuilder:
50
  try:
51
  result = social_graph.invoke({})
52
  insights = result.get("domain_insights", [])
53
- logger.info(f"[CombinedGraph] SocialAgent returned {len(insights)} insights")
 
 
54
  return {"domain_insights": insights}
55
  except Exception as e:
56
  logger.error(f"[CombinedGraph] SocialAgent FAILED: {e}")
@@ -61,7 +64,9 @@ class CombinedAgentGraphBuilder:
61
  try:
62
  result = intelligence_graph.invoke({})
63
  insights = result.get("domain_insights", [])
64
- logger.info(f"[CombinedGraph] IntelligenceAgent returned {len(insights)} insights")
 
 
65
  return {"domain_insights": insights}
66
  except Exception as e:
67
  logger.error(f"[CombinedGraph] IntelligenceAgent FAILED: {e}")
@@ -72,7 +77,9 @@ class CombinedAgentGraphBuilder:
72
  try:
73
  result = economical_graph.invoke({})
74
  insights = result.get("domain_insights", [])
75
- logger.info(f"[CombinedGraph] EconomicalAgent returned {len(insights)} insights")
 
 
76
  return {"domain_insights": insights}
77
  except Exception as e:
78
  logger.error(f"[CombinedGraph] EconomicalAgent FAILED: {e}")
@@ -83,7 +90,9 @@ class CombinedAgentGraphBuilder:
83
  try:
84
  result = political_graph.invoke({})
85
  insights = result.get("domain_insights", [])
86
- logger.info(f"[CombinedGraph] PoliticalAgent returned {len(insights)} insights")
 
 
87
  return {"domain_insights": insights}
88
  except Exception as e:
89
  logger.error(f"[CombinedGraph] PoliticalAgent FAILED: {e}")
@@ -94,7 +103,9 @@ class CombinedAgentGraphBuilder:
94
  try:
95
  result = meteorological_graph.invoke({})
96
  insights = result.get("domain_insights", [])
97
- logger.info(f"[CombinedGraph] MeteorologicalAgent returned {len(insights)} insights")
 
 
98
  return {"domain_insights": insights}
99
  except Exception as e:
100
  logger.error(f"[CombinedGraph] MeteorologicalAgent FAILED: {e}")
 
15
 
16
  try:
17
  from src.config.langsmith_config import LangSmithConfig
18
+
19
  _langsmith = LangSmithConfig()
20
  _langsmith.configure()
21
  except ImportError:
 
51
  try:
52
  result = social_graph.invoke({})
53
  insights = result.get("domain_insights", [])
54
+ logger.info(
55
+ f"[CombinedGraph] SocialAgent returned {len(insights)} insights"
56
+ )
57
  return {"domain_insights": insights}
58
  except Exception as e:
59
  logger.error(f"[CombinedGraph] SocialAgent FAILED: {e}")
 
64
  try:
65
  result = intelligence_graph.invoke({})
66
  insights = result.get("domain_insights", [])
67
+ logger.info(
68
+ f"[CombinedGraph] IntelligenceAgent returned {len(insights)} insights"
69
+ )
70
  return {"domain_insights": insights}
71
  except Exception as e:
72
  logger.error(f"[CombinedGraph] IntelligenceAgent FAILED: {e}")
 
77
  try:
78
  result = economical_graph.invoke({})
79
  insights = result.get("domain_insights", [])
80
+ logger.info(
81
+ f"[CombinedGraph] EconomicalAgent returned {len(insights)} insights"
82
+ )
83
  return {"domain_insights": insights}
84
  except Exception as e:
85
  logger.error(f"[CombinedGraph] EconomicalAgent FAILED: {e}")
 
90
  try:
91
  result = political_graph.invoke({})
92
  insights = result.get("domain_insights", [])
93
+ logger.info(
94
+ f"[CombinedGraph] PoliticalAgent returned {len(insights)} insights"
95
+ )
96
  return {"domain_insights": insights}
97
  except Exception as e:
98
  logger.error(f"[CombinedGraph] PoliticalAgent FAILED: {e}")
 
103
  try:
104
  result = meteorological_graph.invoke({})
105
  insights = result.get("domain_insights", [])
106
+ logger.info(
107
+ f"[CombinedGraph] MeteorologicalAgent returned {len(insights)} insights"
108
+ )
109
  return {"domain_insights": insights}
110
  except Exception as e:
111
  logger.error(f"[CombinedGraph] MeteorologicalAgent FAILED: {e}")
src/graphs/dataRetrievalAgentGraph.py CHANGED
@@ -46,13 +46,15 @@ class DataRetrievalAgentGraph(DataRetrievalAgentNode):
46
  insights = []
47
 
48
  for event in classified_events:
49
- insights.append({
50
- "source_event_id": event.event_id,
51
- "domain": event.target_agent,
52
- "severity": "medium",
53
- "summary": event.content_summary,
54
- "risk_score": event.confidence_score,
55
- })
 
 
56
 
57
  print(f"[DATA RETRIEVAL] Formatted {len(insights)} insights for parent graph")
58
  return {"domain_insights": insights}
@@ -65,7 +67,9 @@ class DataRetrievalAgentGraph(DataRetrievalAgentNode):
65
  workflow.add_node("prepare_worker_tasks", self.prepare_worker_tasks)
66
  workflow.add_node(
67
  "worker",
68
- lambda state: {"worker": worker_graph.map().invoke(state.tasks_for_workers)},
 
 
69
  )
70
  workflow.add_node("aggregate_results", self.aggregate_results)
71
  workflow.add_node("classifier_agent", self.classifier_agent_node)
 
46
  insights = []
47
 
48
  for event in classified_events:
49
+ insights.append(
50
+ {
51
+ "source_event_id": event.event_id,
52
+ "domain": event.target_agent,
53
+ "severity": "medium",
54
+ "summary": event.content_summary,
55
+ "risk_score": event.confidence_score,
56
+ }
57
+ )
58
 
59
  print(f"[DATA RETRIEVAL] Formatted {len(insights)} insights for parent graph")
60
  return {"domain_insights": insights}
 
67
  workflow.add_node("prepare_worker_tasks", self.prepare_worker_tasks)
68
  workflow.add_node(
69
  "worker",
70
+ lambda state: {
71
+ "worker": worker_graph.map().invoke(state.tasks_for_workers)
72
+ },
73
  )
74
  workflow.add_node("aggregate_results", self.aggregate_results)
75
  workflow.add_node("classifier_agent", self.classifier_agent_node)
src/graphs/economicalAgentGraph.py CHANGED
@@ -60,9 +60,15 @@ class EconomicalGraphBuilder:
60
 
61
  main_graph = StateGraph(EconomicalAgentState)
62
 
63
- main_graph.add_node("official_sources_module", lambda state: official_subgraph.invoke(state))
64
- main_graph.add_node("social_media_module", lambda state: social_subgraph.invoke(state))
65
- main_graph.add_node("feed_generation_module", lambda state: feed_subgraph.invoke(state))
 
 
 
 
 
 
66
  main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
67
 
68
  main_graph.set_entry_point("official_sources_module")
 
60
 
61
  main_graph = StateGraph(EconomicalAgentState)
62
 
63
+ main_graph.add_node(
64
+ "official_sources_module", lambda state: official_subgraph.invoke(state)
65
+ )
66
+ main_graph.add_node(
67
+ "social_media_module", lambda state: social_subgraph.invoke(state)
68
+ )
69
+ main_graph.add_node(
70
+ "feed_generation_module", lambda state: feed_subgraph.invoke(state)
71
+ )
72
  main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
73
 
74
  main_graph.set_entry_point("official_sources_module")
src/graphs/intelligenceAgentGraph.py CHANGED
@@ -13,14 +13,18 @@ class IntelligenceGraphBuilder:
13
  def __init__(self, llm):
14
  self.llm = llm
15
 
16
- def build_profile_monitoring_subgraph(self, node: IntelligenceAgentNode) -> StateGraph:
 
 
17
  subgraph = StateGraph(IntelligenceAgentState)
18
  subgraph.add_node("monitor_profiles", node.collect_profile_activity)
19
  subgraph.set_entry_point("monitor_profiles")
20
  subgraph.add_edge("monitor_profiles", END)
21
  return subgraph.compile()
22
 
23
- def build_competitive_intelligence_subgraph(self, node: IntelligenceAgentNode) -> StateGraph:
 
 
24
  subgraph = StateGraph(IntelligenceAgentState)
25
 
26
  subgraph.add_node("competitor_mentions", node.collect_competitor_mentions)
@@ -60,9 +64,16 @@ class IntelligenceGraphBuilder:
60
 
61
  main_graph = StateGraph(IntelligenceAgentState)
62
 
63
- main_graph.add_node("profile_monitoring_module", lambda state: profile_subgraph.invoke(state))
64
- main_graph.add_node("competitive_intelligence_module", lambda state: intelligence_subgraph.invoke(state))
65
- main_graph.add_node("feed_generation_module", lambda state: feed_subgraph.invoke(state))
 
 
 
 
 
 
 
66
  main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
67
 
68
  main_graph.set_entry_point("profile_monitoring_module")
 
13
  def __init__(self, llm):
14
  self.llm = llm
15
 
16
+ def build_profile_monitoring_subgraph(
17
+ self, node: IntelligenceAgentNode
18
+ ) -> StateGraph:
19
  subgraph = StateGraph(IntelligenceAgentState)
20
  subgraph.add_node("monitor_profiles", node.collect_profile_activity)
21
  subgraph.set_entry_point("monitor_profiles")
22
  subgraph.add_edge("monitor_profiles", END)
23
  return subgraph.compile()
24
 
25
+ def build_competitive_intelligence_subgraph(
26
+ self, node: IntelligenceAgentNode
27
+ ) -> StateGraph:
28
  subgraph = StateGraph(IntelligenceAgentState)
29
 
30
  subgraph.add_node("competitor_mentions", node.collect_competitor_mentions)
 
64
 
65
  main_graph = StateGraph(IntelligenceAgentState)
66
 
67
+ main_graph.add_node(
68
+ "profile_monitoring_module", lambda state: profile_subgraph.invoke(state)
69
+ )
70
+ main_graph.add_node(
71
+ "competitive_intelligence_module",
72
+ lambda state: intelligence_subgraph.invoke(state),
73
+ )
74
+ main_graph.add_node(
75
+ "feed_generation_module", lambda state: feed_subgraph.invoke(state)
76
+ )
77
  main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
78
 
79
  main_graph.set_entry_point("profile_monitoring_module")
src/graphs/meteorologicalAgentGraph.py CHANGED
@@ -13,7 +13,9 @@ class MeteorologicalGraphBuilder:
13
  def __init__(self, llm):
14
  self.llm = llm
15
 
16
- def build_official_sources_subgraph(self, node: MeteorologicalAgentNode) -> StateGraph:
 
 
17
  subgraph = StateGraph(MeteorologicalAgentState)
18
  subgraph.add_node("collect_official", node.collect_official_sources)
19
  subgraph.set_entry_point("collect_official")
@@ -37,7 +39,9 @@ class MeteorologicalGraphBuilder:
37
 
38
  return subgraph.compile()
39
 
40
- def build_feed_generation_subgraph(self, node: MeteorologicalAgentNode) -> StateGraph:
 
 
41
  subgraph = StateGraph(MeteorologicalAgentState)
42
 
43
  subgraph.add_node("categorize", node.categorize_by_geography)
@@ -60,9 +64,15 @@ class MeteorologicalGraphBuilder:
60
 
61
  main_graph = StateGraph(MeteorologicalAgentState)
62
 
63
- main_graph.add_node("official_sources_module", lambda state: official_subgraph.invoke(state))
64
- main_graph.add_node("social_media_module", lambda state: social_subgraph.invoke(state))
65
- main_graph.add_node("feed_generation_module", lambda state: feed_subgraph.invoke(state))
 
 
 
 
 
 
66
  main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
67
 
68
  main_graph.set_entry_point("official_sources_module")
 
13
  def __init__(self, llm):
14
  self.llm = llm
15
 
16
+ def build_official_sources_subgraph(
17
+ self, node: MeteorologicalAgentNode
18
+ ) -> StateGraph:
19
  subgraph = StateGraph(MeteorologicalAgentState)
20
  subgraph.add_node("collect_official", node.collect_official_sources)
21
  subgraph.set_entry_point("collect_official")
 
39
 
40
  return subgraph.compile()
41
 
42
+ def build_feed_generation_subgraph(
43
+ self, node: MeteorologicalAgentNode
44
+ ) -> StateGraph:
45
  subgraph = StateGraph(MeteorologicalAgentState)
46
 
47
  subgraph.add_node("categorize", node.categorize_by_geography)
 
64
 
65
  main_graph = StateGraph(MeteorologicalAgentState)
66
 
67
+ main_graph.add_node(
68
+ "official_sources_module", lambda state: official_subgraph.invoke(state)
69
+ )
70
+ main_graph.add_node(
71
+ "social_media_module", lambda state: social_subgraph.invoke(state)
72
+ )
73
+ main_graph.add_node(
74
+ "feed_generation_module", lambda state: feed_subgraph.invoke(state)
75
+ )
76
  main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
77
 
78
  main_graph.set_entry_point("official_sources_module")
src/graphs/politicalAgentGraph.py CHANGED
@@ -59,9 +59,15 @@ class PoliticalGraphBuilder:
59
 
60
  main_graph = StateGraph(PoliticalAgentState)
61
 
62
- main_graph.add_node("official_sources_module", lambda state: official_subgraph.invoke(state))
63
- main_graph.add_node("social_media_module", lambda state: social_subgraph.invoke(state))
64
- main_graph.add_node("feed_generation_module", lambda state: feed_subgraph.invoke(state))
 
 
 
 
 
 
65
  main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
66
 
67
  main_graph.set_entry_point("official_sources_module")
 
59
 
60
  main_graph = StateGraph(PoliticalAgentState)
61
 
62
+ main_graph.add_node(
63
+ "official_sources_module", lambda state: official_subgraph.invoke(state)
64
+ )
65
+ main_graph.add_node(
66
+ "social_media_module", lambda state: social_subgraph.invoke(state)
67
+ )
68
+ main_graph.add_node(
69
+ "feed_generation_module", lambda state: feed_subgraph.invoke(state)
70
+ )
71
  main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
72
 
73
  main_graph.set_entry_point("official_sources_module")
src/graphs/socialAgentGraph.py CHANGED
@@ -69,10 +69,18 @@ class SocialGraphBuilder:
69
 
70
  main_graph = StateGraph(SocialAgentState)
71
 
72
- main_graph.add_node("trending_module", lambda state: trending_subgraph.invoke(state))
73
- main_graph.add_node("social_media_module", lambda state: social_subgraph.invoke(state))
74
- main_graph.add_node("user_targets_module", lambda state: user_targets_subgraph.invoke(state))
75
- main_graph.add_node("feed_generation_module", lambda state: feed_subgraph.invoke(state))
 
 
 
 
 
 
 
 
76
  main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
77
 
78
  # Parallel entry points - all 3 modules start together
 
69
 
70
  main_graph = StateGraph(SocialAgentState)
71
 
72
+ main_graph.add_node(
73
+ "trending_module", lambda state: trending_subgraph.invoke(state)
74
+ )
75
+ main_graph.add_node(
76
+ "social_media_module", lambda state: social_subgraph.invoke(state)
77
+ )
78
+ main_graph.add_node(
79
+ "user_targets_module", lambda state: user_targets_subgraph.invoke(state)
80
+ )
81
+ main_graph.add_node(
82
+ "feed_generation_module", lambda state: feed_subgraph.invoke(state)
83
+ )
84
  main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
85
 
86
  # Parallel entry points - all 3 modules start together
src/nodes/socialAgentNode.py CHANGED
@@ -21,11 +21,13 @@ from src.llms.groqllm import GroqLLM
21
 
22
  def load_intel_config() -> dict:
23
  """Load intel config from JSON file (same as main.py)."""
24
- config_path = os.path.join(os.path.dirname(__file__), "..", "..", "data", "intel_config.json")
 
 
25
  default_config = {
26
  "user_profiles": {"twitter": [], "facebook": [], "linkedin": []},
27
  "user_keywords": [],
28
- "user_products": []
29
  }
30
  try:
31
  if os.path.exists(config_path):
@@ -66,9 +68,11 @@ class SocialAgentNode:
66
  self.user_keywords = self.intel_config.get("user_keywords", [])
67
  self.user_profiles = self.intel_config.get("user_profiles", {})
68
  self.user_products = self.intel_config.get("user_products", [])
69
-
70
- print(f"[SocialAgent] Loaded {len(self.user_keywords)} user keywords, "
71
- f"{sum(len(v) for v in self.user_profiles.values())} profiles")
 
 
72
 
73
  # Geographic scopes
74
  self.geographic_scopes = {
@@ -411,72 +415,79 @@ class SocialAgentNode:
411
  These are configured via the frontend Intelligence Settings UI.
412
  """
413
  print("[MODULE 2D] Collecting User-Defined Targets")
414
-
415
  user_results = []
416
-
417
  # Reload config to get latest user settings
418
  self.intel_config = load_intel_config()
419
  self.user_keywords = self.intel_config.get("user_keywords", [])
420
  self.user_profiles = self.intel_config.get("user_profiles", {})
421
  self.user_products = self.intel_config.get("user_products", [])
422
-
423
  # Skip if no user config
424
  if not self.user_keywords and not any(self.user_profiles.values()):
425
  print(" ⏭️ No user-defined targets configured")
426
  return {"worker_results": [], "user_target_results": []}
427
-
428
  # ============================================
429
  # Scrape USER KEYWORDS across Twitter
430
  # ============================================
431
  if self.user_keywords:
432
  print(f" 📝 Scraping {len(self.user_keywords)} user keywords...")
433
  twitter_tool = self.tools.get("scrape_twitter")
434
-
435
  for keyword in self.user_keywords[:10]: # Limit to 10 keywords
436
  try:
437
  if twitter_tool:
438
  twitter_data = twitter_tool.invoke(
439
  {"query": keyword, "max_items": 5}
440
  )
441
- user_results.append({
442
- "source_tool": "scrape_twitter",
443
- "raw_content": str(twitter_data),
444
- "category": "user_keyword",
445
- "scope": "sri_lanka",
446
- "platform": "twitter",
447
- "keyword": keyword,
448
- "timestamp": datetime.utcnow().isoformat(),
449
- })
 
 
450
  print(f" ✓ Keyword: '{keyword}'")
451
  except Exception as e:
452
  print(f" ⚠️ Keyword '{keyword}' error: {e}")
453
-
454
  # ============================================
455
  # Scrape USER PRODUCTS
456
  # ============================================
457
  if self.user_products:
458
  print(f" 📦 Scraping {len(self.user_products)} user products...")
459
  twitter_tool = self.tools.get("scrape_twitter")
460
-
461
  for product in self.user_products[:5]: # Limit to 5 products
462
  try:
463
  if twitter_tool:
464
  twitter_data = twitter_tool.invoke(
465
- {"query": f"{product} review OR {product} Sri Lanka", "max_items": 3}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
466
  )
467
- user_results.append({
468
- "source_tool": "scrape_twitter",
469
- "raw_content": str(twitter_data),
470
- "category": "user_product",
471
- "scope": "sri_lanka",
472
- "platform": "twitter",
473
- "product": product,
474
- "timestamp": datetime.utcnow().isoformat(),
475
- })
476
  print(f" ✓ Product: '{product}'")
477
  except Exception as e:
478
  print(f" ⚠️ Product '{product}' error: {e}")
479
-
480
  # ============================================
481
  # Scrape USER TWITTER PROFILES
482
  # ============================================
@@ -484,7 +495,7 @@ class SocialAgentNode:
484
  if twitter_profiles:
485
  print(f" 👤 Scraping {len(twitter_profiles)} Twitter profiles...")
486
  twitter_tool = self.tools.get("scrape_twitter")
487
-
488
  for profile in twitter_profiles[:10]: # Limit to 10 profiles
489
  try:
490
  # Clean profile handle
@@ -494,19 +505,21 @@ class SocialAgentNode:
494
  twitter_data = twitter_tool.invoke(
495
  {"query": f"from:{handle} OR @{handle}", "max_items": 5}
496
  )
497
- user_results.append({
498
- "source_tool": "scrape_twitter",
499
- "raw_content": str(twitter_data),
500
- "category": "user_profile",
501
- "scope": "sri_lanka",
502
- "platform": "twitter",
503
- "profile": f"@{handle}",
504
- "timestamp": datetime.utcnow().isoformat(),
505
- })
 
 
506
  print(f" ✓ Profile: @{handle}")
507
  except Exception as e:
508
  print(f" ⚠️ Profile @{profile} error: {e}")
509
-
510
  print(f" ✅ User targets: {len(user_results)} results collected")
511
  return {"worker_results": user_results, "user_target_results": user_results}
512
 
 
21
 
22
  def load_intel_config() -> dict:
23
  """Load intel config from JSON file (same as main.py)."""
24
+ config_path = os.path.join(
25
+ os.path.dirname(__file__), "..", "..", "data", "intel_config.json"
26
+ )
27
  default_config = {
28
  "user_profiles": {"twitter": [], "facebook": [], "linkedin": []},
29
  "user_keywords": [],
30
+ "user_products": [],
31
  }
32
  try:
33
  if os.path.exists(config_path):
 
68
  self.user_keywords = self.intel_config.get("user_keywords", [])
69
  self.user_profiles = self.intel_config.get("user_profiles", {})
70
  self.user_products = self.intel_config.get("user_products", [])
71
+
72
+ print(
73
+ f"[SocialAgent] Loaded {len(self.user_keywords)} user keywords, "
74
+ f"{sum(len(v) for v in self.user_profiles.values())} profiles"
75
+ )
76
 
77
  # Geographic scopes
78
  self.geographic_scopes = {
 
415
  These are configured via the frontend Intelligence Settings UI.
416
  """
417
  print("[MODULE 2D] Collecting User-Defined Targets")
418
+
419
  user_results = []
420
+
421
  # Reload config to get latest user settings
422
  self.intel_config = load_intel_config()
423
  self.user_keywords = self.intel_config.get("user_keywords", [])
424
  self.user_profiles = self.intel_config.get("user_profiles", {})
425
  self.user_products = self.intel_config.get("user_products", [])
426
+
427
  # Skip if no user config
428
  if not self.user_keywords and not any(self.user_profiles.values()):
429
  print(" ⏭️ No user-defined targets configured")
430
  return {"worker_results": [], "user_target_results": []}
431
+
432
  # ============================================
433
  # Scrape USER KEYWORDS across Twitter
434
  # ============================================
435
  if self.user_keywords:
436
  print(f" 📝 Scraping {len(self.user_keywords)} user keywords...")
437
  twitter_tool = self.tools.get("scrape_twitter")
438
+
439
  for keyword in self.user_keywords[:10]: # Limit to 10 keywords
440
  try:
441
  if twitter_tool:
442
  twitter_data = twitter_tool.invoke(
443
  {"query": keyword, "max_items": 5}
444
  )
445
+ user_results.append(
446
+ {
447
+ "source_tool": "scrape_twitter",
448
+ "raw_content": str(twitter_data),
449
+ "category": "user_keyword",
450
+ "scope": "sri_lanka",
451
+ "platform": "twitter",
452
+ "keyword": keyword,
453
+ "timestamp": datetime.utcnow().isoformat(),
454
+ }
455
+ )
456
  print(f" ✓ Keyword: '{keyword}'")
457
  except Exception as e:
458
  print(f" ⚠️ Keyword '{keyword}' error: {e}")
459
+
460
  # ============================================
461
  # Scrape USER PRODUCTS
462
  # ============================================
463
  if self.user_products:
464
  print(f" 📦 Scraping {len(self.user_products)} user products...")
465
  twitter_tool = self.tools.get("scrape_twitter")
466
+
467
  for product in self.user_products[:5]: # Limit to 5 products
468
  try:
469
  if twitter_tool:
470
  twitter_data = twitter_tool.invoke(
471
+ {
472
+ "query": f"{product} review OR {product} Sri Lanka",
473
+ "max_items": 3,
474
+ }
475
+ )
476
+ user_results.append(
477
+ {
478
+ "source_tool": "scrape_twitter",
479
+ "raw_content": str(twitter_data),
480
+ "category": "user_product",
481
+ "scope": "sri_lanka",
482
+ "platform": "twitter",
483
+ "product": product,
484
+ "timestamp": datetime.utcnow().isoformat(),
485
+ }
486
  )
 
 
 
 
 
 
 
 
 
487
  print(f" ✓ Product: '{product}'")
488
  except Exception as e:
489
  print(f" ⚠️ Product '{product}' error: {e}")
490
+
491
  # ============================================
492
  # Scrape USER TWITTER PROFILES
493
  # ============================================
 
495
  if twitter_profiles:
496
  print(f" 👤 Scraping {len(twitter_profiles)} Twitter profiles...")
497
  twitter_tool = self.tools.get("scrape_twitter")
498
+
499
  for profile in twitter_profiles[:10]: # Limit to 10 profiles
500
  try:
501
  # Clean profile handle
 
505
  twitter_data = twitter_tool.invoke(
506
  {"query": f"from:{handle} OR @{handle}", "max_items": 5}
507
  )
508
+ user_results.append(
509
+ {
510
+ "source_tool": "scrape_twitter",
511
+ "raw_content": str(twitter_data),
512
+ "category": "user_profile",
513
+ "scope": "sri_lanka",
514
+ "platform": "twitter",
515
+ "profile": f"@{handle}",
516
+ "timestamp": datetime.utcnow().isoformat(),
517
+ }
518
+ )
519
  print(f" ✓ Profile: @{handle}")
520
  except Exception as e:
521
  print(f" ⚠️ Profile @{profile} error: {e}")
522
+
523
  print(f" ✅ User targets: {len(user_results)} results collected")
524
  return {"worker_results": user_results, "user_target_results": user_results}
525
 
src/rag.py CHANGED
@@ -14,6 +14,7 @@ sys.path.insert(0, str(PROJECT_ROOT))
14
 
15
  try:
16
  from dotenv import load_dotenv
 
17
  load_dotenv()
18
  except ImportError:
19
  pass
@@ -26,6 +27,7 @@ logging.basicConfig(
26
  try:
27
  import chromadb
28
  from chromadb.config import Settings
 
29
  CHROMA_AVAILABLE = True
30
  except ImportError:
31
  CHROMA_AVAILABLE = False
@@ -37,6 +39,7 @@ try:
37
  from langchain_core.messages import HumanMessage, AIMessage
38
  from langchain_core.output_parsers import StrOutputParser
39
  from langchain_core.runnables import RunnablePassthrough
 
40
  LANGCHAIN_AVAILABLE = True
41
  except ImportError:
42
  LANGCHAIN_AVAILABLE = False
@@ -45,6 +48,7 @@ except ImportError:
45
  # Neo4j for graph-based retrieval
46
  try:
47
  from neo4j import GraphDatabase
 
48
  NEO4J_AVAILABLE = True
49
  except ImportError:
50
  NEO4J_AVAILABLE = False
@@ -53,9 +57,18 @@ except ImportError:
53
 
54
  # Keywords that indicate a graph/relationship query
55
  GRAPH_KEYWORDS = [
56
- "connected", "related", "timeline", "before", "after",
57
- "caused by", "followed by", "similar to", "linked",
58
- "what happened", "sequence", "chain of events"
 
 
 
 
 
 
 
 
 
59
  ]
60
 
61
 
@@ -67,31 +80,31 @@ def is_graph_query(question: str) -> bool:
67
 
68
  class Neo4jRetriever:
69
  """Graph-based retrieval for relationship queries with LAZY initialization."""
70
-
71
  def __init__(self):
72
  self.driver = None
73
  self._initialized = False
74
  self._init_attempted = False
75
-
76
  def _lazy_init(self):
77
  """Lazy initialization - only connect when actually needed."""
78
  if self._init_attempted:
79
  return self.driver is not None
80
-
81
  self._init_attempted = True
82
-
83
  if not NEO4J_AVAILABLE:
84
  logger.info("[Neo4jRetriever] Neo4j package not installed")
85
  return False
86
-
87
  neo4j_uri = os.getenv("NEO4J_URI", "")
88
  neo4j_user = os.getenv("NEO4J_USER", "neo4j")
89
  neo4j_password = os.getenv("NEO4J_PASSWORD", "")
90
-
91
  if not neo4j_uri or not neo4j_password:
92
  logger.info("[Neo4jRetriever] Neo4j credentials not configured - skipping")
93
  return False
94
-
95
  try:
96
  self.driver = GraphDatabase.driver(
97
  neo4j_uri, auth=(neo4j_user, neo4j_password)
@@ -101,15 +114,17 @@ class Neo4jRetriever:
101
  logger.info(f"[Neo4jRetriever] Connected to {neo4j_uri}")
102
  return True
103
  except Exception as e:
104
- logger.warning(f"[Neo4jRetriever] Connection failed (will skip graph queries): {e}")
 
 
105
  self.driver = None
106
  return False
107
-
108
  def get_related_events(self, keyword: str, limit: int = 5) -> List[Dict[str, Any]]:
109
  """Find events containing keyword and their related events."""
110
  if not self._lazy_init():
111
  return []
112
-
113
  try:
114
  with self.driver.session() as session:
115
  query = """
@@ -126,31 +141,35 @@ class Neo4jRetriever:
126
  LIMIT $limit
127
  """
128
  results = session.run(query, keyword=keyword, limit=limit)
129
-
130
  events = []
131
  for record in results:
132
- events.append({
133
- "event_id": record["event_id"],
134
- "content": record["summary"],
135
- "domain": record["domain"],
136
- "severity": record["severity"],
137
- "timestamp": record["timestamp"],
138
- "related": record["related_summaries"],
139
- "source": "neo4j_graph"
140
- })
141
-
142
- logger.info(f"[Neo4jRetriever] Found {len(events)} events for '{keyword}'")
 
 
 
 
143
  return events
144
-
145
  except Exception as e:
146
  logger.error(f"[Neo4jRetriever] Query error: {e}")
147
  return []
148
-
149
  def get_domain_events(self, domain: str, limit: int = 5) -> List[Dict[str, Any]]:
150
  """Get recent events by domain with relationships."""
151
  if not self._lazy_init():
152
  return []
153
-
154
  try:
155
  with self.driver.session() as session:
156
  query = """
@@ -165,30 +184,32 @@ class Neo4jRetriever:
165
  LIMIT $limit
166
  """
167
  results = session.run(query, domain=domain.lower(), limit=limit)
168
-
169
  events = []
170
  for record in results:
171
- events.append({
172
- "event_id": record["event_id"],
173
- "content": record["summary"],
174
- "domain": domain,
175
- "severity": record["severity"],
176
- "timestamp": record["timestamp"],
177
- "related_count": record["related_count"],
178
- "source": "neo4j_graph"
179
- })
180
-
 
 
181
  return events
182
-
183
  except Exception as e:
184
  logger.error(f"[Neo4jRetriever] Domain query error: {e}")
185
  return []
186
-
187
  def get_event_chain(self, keyword: str, depth: int = 3) -> List[Dict[str, Any]]:
188
  """Get temporal chain of related events."""
189
  if not self._lazy_init():
190
  return []
191
-
192
  try:
193
  with self.driver.session() as session:
194
  query = """
@@ -203,36 +224,39 @@ class Neo4jRetriever:
203
  LIMIT 1
204
  """
205
  result = session.run(query, keyword=keyword).single()
206
-
207
  if result:
208
- return [{
209
- "event_id": result["start_id"],
210
- "content": result["start_summary"],
211
- "timestamp": result["start_time"],
212
- "chain": result["chain"],
213
- "source": "neo4j_chain"
214
- }]
 
 
215
  return []
216
-
217
  except Exception as e:
218
  logger.error(f"[Neo4jRetriever] Chain query error: {e}")
219
  return []
220
-
221
  def get_stats(self) -> Dict[str, Any]:
222
  """Get Neo4j graph statistics."""
223
  if not self._initialized or not self.driver:
224
- return {"status": "not_initialized" if not self._init_attempted else "disconnected"}
225
-
 
 
 
 
226
  try:
227
  with self.driver.session() as session:
228
  event_count = session.run(
229
  "MATCH (e:Event) RETURN COUNT(e) as count"
230
  ).single()["count"]
231
-
232
- return {
233
- "status": "connected",
234
- "total_events": event_count
235
- }
236
  except Exception as e:
237
  return {"status": "error", "error": str(e)}
238
 
@@ -246,9 +270,10 @@ class MultiCollectionRetriever:
246
  )
247
  self.client = None
248
  self.collections: Dict[str, Any] = {}
249
-
250
  # Thread pool for parallel queries
251
  from concurrent.futures import ThreadPoolExecutor
 
252
  self._executor = ThreadPoolExecutor(max_workers=4)
253
 
254
  if not CHROMA_AVAILABLE:
@@ -267,7 +292,9 @@ class MultiCollectionRetriever:
267
  all_collections = self.client.list_collections()
268
  available_names = [c.name for c in all_collections]
269
 
270
- logger.info(f"[RAG] Found {len(all_collections)} collections: {available_names}")
 
 
271
 
272
  for name in self.COLLECTIONS:
273
  if name in available_names:
@@ -289,7 +316,12 @@ class MultiCollectionRetriever:
289
  self.client = None
290
 
291
  def _query_single_collection(
292
- self, name: str, collection, query: str, n_results: int, domain_filter: Optional[str]
 
 
 
 
 
293
  ) -> List[Dict[str, Any]]:
294
  """Query a single collection - used for parallel execution."""
295
  results_list = []
@@ -310,18 +342,20 @@ class MultiCollectionRetriever:
310
 
311
  similarity = 1.0 - min(distance / 2.0, 1.0)
312
 
313
- results_list.append({
314
- "id": doc_id,
315
- "content": doc,
316
- "metadata": meta,
317
- "similarity": similarity,
318
- "collection": name,
319
- "domain": meta.get("domain", "unknown"),
320
- })
 
 
321
 
322
  except Exception as e:
323
  logger.warning(f"[RAG] Error querying {name}: {e}")
324
-
325
  return results_list
326
 
327
  def search(
@@ -333,15 +367,19 @@ class MultiCollectionRetriever:
333
 
334
  # Submit parallel queries to all collections
335
  from concurrent.futures import as_completed
336
-
337
  futures = {}
338
  for name, collection in self.collections.items():
339
  future = self._executor.submit(
340
- self._query_single_collection,
341
- name, collection, query, n_results, domain_filter
 
 
 
 
342
  )
343
  futures[future] = name
344
-
345
  # Collect results as they complete (fastest first)
346
  all_results = []
347
  for future in as_completed(futures, timeout=10.0): # 10s timeout
@@ -349,7 +387,9 @@ class MultiCollectionRetriever:
349
  results = future.result()
350
  all_results.extend(results)
351
  except Exception as e:
352
- logger.warning(f"[RAG] Parallel query failed for {futures[future]}: {e}")
 
 
353
 
354
  all_results.sort(key=lambda x: x["similarity"], reverse=True)
355
  return all_results[: n_results * 2]
@@ -408,24 +448,54 @@ class RogerRAG:
408
  """Extract key terms from question for graph search."""
409
  # Remove common stopwords
410
  stopwords = {
411
- "what", "when", "where", "who", "why", "how", "is", "are", "was",
412
- "were", "the", "a", "an", "to", "of", "in", "on", "for", "with",
413
- "about", "related", "connected", "happened", "after", "before",
414
- "show", "me", "tell", "find", "get", "events", "timeline"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
  }
416
-
417
  words = question.lower().replace("?", "").replace(",", "").split()
418
  keywords = [w for w in words if w not in stopwords and len(w) > 2]
419
-
420
  return keywords[:5] # Return top 5 keywords
421
 
422
- def _format_context(self, docs: List[Dict[str, Any]], include_graph: bool = False) -> str:
 
 
423
  if not docs:
424
  return "No relevant intelligence data found."
425
 
426
  context_parts = []
427
  now = datetime.now()
428
-
429
  # Separate ChromaDB and Neo4j results
430
  chroma_docs = [d for d in docs if d.get("source") != "neo4j_graph"]
431
  graph_docs = [d for d in docs if d.get("source") == "neo4j_graph"]
@@ -472,7 +542,7 @@ class RogerRAG:
472
  f"TIMESTAMP: {timestamp} ({age_str})\n"
473
  f"{doc['content']}\n"
474
  )
475
-
476
  # Format Neo4j graph results (if any)
477
  if graph_docs:
478
  context_parts.append("\n=== RELATED EVENTS FROM KNOWLEDGE GRAPH ===\n")
@@ -481,7 +551,7 @@ class RogerRAG:
481
  related_str = ""
482
  if related:
483
  related_str = f"\n Related events: {', '.join(str(r)[:50] + '...' for r in related[:2])}"
484
-
485
  context_parts.append(
486
  f"[Graph {i}] Domain: {doc.get('domain', 'unknown')} | "
487
  f"Severity: {doc.get('severity', 'unknown')}\n"
@@ -534,23 +604,25 @@ class RogerRAG:
534
  docs = self.retriever.search(
535
  search_question, n_results=5, domain_filter=domain_filter
536
  )
537
-
538
  # Neo4j graph search (for relationship queries) - only if enabled
539
  graph_docs = []
540
  used_graph = False
541
  if self.neo4j_retriever and is_graph_query(search_question):
542
  logger.info(f"[RAG] Graph query detected: '{search_question}'")
543
  used_graph = True
544
-
545
  # Extract keywords for graph search
546
  # Simple: use first nouns/keywords from question
547
  keywords = self._extract_keywords(search_question)
548
-
549
  for keyword in keywords[:2]: # Limit to 2 keywords
550
- graph_docs.extend(self.neo4j_retriever.get_related_events(keyword, limit=3))
551
-
 
 
552
  logger.info(f"[RAG] Graph retrieval: {len(graph_docs)} docs from Neo4j")
553
-
554
  # Merge results (ChromaDB + Neo4j)
555
  all_docs = docs + graph_docs
556
 
@@ -559,7 +631,9 @@ class RogerRAG:
559
  "answer": "I couldn't find any relevant intelligence data to answer your question.",
560
  "sources": [],
561
  "question": question,
562
- "reformulated": search_question if search_question != question else None,
 
 
563
  }
564
 
565
  context = self._format_context(all_docs, include_graph=used_graph)
@@ -572,10 +646,11 @@ class RogerRAG:
572
  }
573
 
574
  current_date = datetime.now().strftime("%B %d, %Y")
575
- rag_prompt = ChatPromptTemplate.from_messages([
576
- (
577
- "system",
578
- f"""You are Roger, an AI intelligence analyst for Sri Lanka.
 
579
 
580
  TODAY'S DATE: {current_date}
581
 
@@ -592,10 +667,11 @@ Be concise but informative. Cite source timestamps when available.
592
 
593
  Context:
594
  {{context}}""",
595
- ),
596
- MessagesPlaceholder(variable_name="history"),
597
- ("human", "{question}"),
598
- ])
 
599
 
600
  history_messages = []
601
  for human, ai in self.chat_history[-5:]:
@@ -613,18 +689,22 @@ Context:
613
  sources_summary = []
614
  for doc in docs[:5]:
615
  meta = doc.get("metadata", {})
616
- sources_summary.append({
617
- "domain": meta.get("domain", "unknown"),
618
- "platform": meta.get("platform", "unknown"),
619
- "category": meta.get("category", ""),
620
- "similarity": round(doc["similarity"], 3),
621
- })
 
 
622
 
623
  return {
624
  "answer": answer,
625
  "sources": sources_summary,
626
  "question": question,
627
- "reformulated": search_question if search_question != question else None,
 
 
628
  "docs_found": len(docs),
629
  }
630
 
@@ -702,7 +782,9 @@ def run_cli():
702
  if result.get("sources"):
703
  print(f"\nSources ({len(result['sources'])} found):")
704
  for i, src in enumerate(result["sources"][:3], 1):
705
- print(f" {i}. {src['domain']} | {src['platform']} | Relevance: {src['similarity']:.0%}")
 
 
706
 
707
  if result.get("reformulated"):
708
  print(f"\n(Interpreted as: {result['reformulated']})")
 
14
 
15
  try:
16
  from dotenv import load_dotenv
17
+
18
  load_dotenv()
19
  except ImportError:
20
  pass
 
27
  try:
28
  import chromadb
29
  from chromadb.config import Settings
30
+
31
  CHROMA_AVAILABLE = True
32
  except ImportError:
33
  CHROMA_AVAILABLE = False
 
39
  from langchain_core.messages import HumanMessage, AIMessage
40
  from langchain_core.output_parsers import StrOutputParser
41
  from langchain_core.runnables import RunnablePassthrough
42
+
43
  LANGCHAIN_AVAILABLE = True
44
  except ImportError:
45
  LANGCHAIN_AVAILABLE = False
 
48
  # Neo4j for graph-based retrieval
49
  try:
50
  from neo4j import GraphDatabase
51
+
52
  NEO4J_AVAILABLE = True
53
  except ImportError:
54
  NEO4J_AVAILABLE = False
 
57
 
58
  # Keywords that indicate a graph/relationship query
59
  GRAPH_KEYWORDS = [
60
+ "connected",
61
+ "related",
62
+ "timeline",
63
+ "before",
64
+ "after",
65
+ "caused by",
66
+ "followed by",
67
+ "similar to",
68
+ "linked",
69
+ "what happened",
70
+ "sequence",
71
+ "chain of events",
72
  ]
73
 
74
 
 
80
 
81
  class Neo4jRetriever:
82
  """Graph-based retrieval for relationship queries with LAZY initialization."""
83
+
84
  def __init__(self):
85
  self.driver = None
86
  self._initialized = False
87
  self._init_attempted = False
88
+
89
  def _lazy_init(self):
90
  """Lazy initialization - only connect when actually needed."""
91
  if self._init_attempted:
92
  return self.driver is not None
93
+
94
  self._init_attempted = True
95
+
96
  if not NEO4J_AVAILABLE:
97
  logger.info("[Neo4jRetriever] Neo4j package not installed")
98
  return False
99
+
100
  neo4j_uri = os.getenv("NEO4J_URI", "")
101
  neo4j_user = os.getenv("NEO4J_USER", "neo4j")
102
  neo4j_password = os.getenv("NEO4J_PASSWORD", "")
103
+
104
  if not neo4j_uri or not neo4j_password:
105
  logger.info("[Neo4jRetriever] Neo4j credentials not configured - skipping")
106
  return False
107
+
108
  try:
109
  self.driver = GraphDatabase.driver(
110
  neo4j_uri, auth=(neo4j_user, neo4j_password)
 
114
  logger.info(f"[Neo4jRetriever] Connected to {neo4j_uri}")
115
  return True
116
  except Exception as e:
117
+ logger.warning(
118
+ f"[Neo4jRetriever] Connection failed (will skip graph queries): {e}"
119
+ )
120
  self.driver = None
121
  return False
122
+
123
  def get_related_events(self, keyword: str, limit: int = 5) -> List[Dict[str, Any]]:
124
  """Find events containing keyword and their related events."""
125
  if not self._lazy_init():
126
  return []
127
+
128
  try:
129
  with self.driver.session() as session:
130
  query = """
 
141
  LIMIT $limit
142
  """
143
  results = session.run(query, keyword=keyword, limit=limit)
144
+
145
  events = []
146
  for record in results:
147
+ events.append(
148
+ {
149
+ "event_id": record["event_id"],
150
+ "content": record["summary"],
151
+ "domain": record["domain"],
152
+ "severity": record["severity"],
153
+ "timestamp": record["timestamp"],
154
+ "related": record["related_summaries"],
155
+ "source": "neo4j_graph",
156
+ }
157
+ )
158
+
159
+ logger.info(
160
+ f"[Neo4jRetriever] Found {len(events)} events for '{keyword}'"
161
+ )
162
  return events
163
+
164
  except Exception as e:
165
  logger.error(f"[Neo4jRetriever] Query error: {e}")
166
  return []
167
+
168
  def get_domain_events(self, domain: str, limit: int = 5) -> List[Dict[str, Any]]:
169
  """Get recent events by domain with relationships."""
170
  if not self._lazy_init():
171
  return []
172
+
173
  try:
174
  with self.driver.session() as session:
175
  query = """
 
184
  LIMIT $limit
185
  """
186
  results = session.run(query, domain=domain.lower(), limit=limit)
187
+
188
  events = []
189
  for record in results:
190
+ events.append(
191
+ {
192
+ "event_id": record["event_id"],
193
+ "content": record["summary"],
194
+ "domain": domain,
195
+ "severity": record["severity"],
196
+ "timestamp": record["timestamp"],
197
+ "related_count": record["related_count"],
198
+ "source": "neo4j_graph",
199
+ }
200
+ )
201
+
202
  return events
203
+
204
  except Exception as e:
205
  logger.error(f"[Neo4jRetriever] Domain query error: {e}")
206
  return []
207
+
208
  def get_event_chain(self, keyword: str, depth: int = 3) -> List[Dict[str, Any]]:
209
  """Get temporal chain of related events."""
210
  if not self._lazy_init():
211
  return []
212
+
213
  try:
214
  with self.driver.session() as session:
215
  query = """
 
224
  LIMIT 1
225
  """
226
  result = session.run(query, keyword=keyword).single()
227
+
228
  if result:
229
+ return [
230
+ {
231
+ "event_id": result["start_id"],
232
+ "content": result["start_summary"],
233
+ "timestamp": result["start_time"],
234
+ "chain": result["chain"],
235
+ "source": "neo4j_chain",
236
+ }
237
+ ]
238
  return []
239
+
240
  except Exception as e:
241
  logger.error(f"[Neo4jRetriever] Chain query error: {e}")
242
  return []
243
+
244
  def get_stats(self) -> Dict[str, Any]:
245
  """Get Neo4j graph statistics."""
246
  if not self._initialized or not self.driver:
247
+ return {
248
+ "status": (
249
+ "not_initialized" if not self._init_attempted else "disconnected"
250
+ )
251
+ }
252
+
253
  try:
254
  with self.driver.session() as session:
255
  event_count = session.run(
256
  "MATCH (e:Event) RETURN COUNT(e) as count"
257
  ).single()["count"]
258
+
259
+ return {"status": "connected", "total_events": event_count}
 
 
 
260
  except Exception as e:
261
  return {"status": "error", "error": str(e)}
262
 
 
270
  )
271
  self.client = None
272
  self.collections: Dict[str, Any] = {}
273
+
274
  # Thread pool for parallel queries
275
  from concurrent.futures import ThreadPoolExecutor
276
+
277
  self._executor = ThreadPoolExecutor(max_workers=4)
278
 
279
  if not CHROMA_AVAILABLE:
 
292
  all_collections = self.client.list_collections()
293
  available_names = [c.name for c in all_collections]
294
 
295
+ logger.info(
296
+ f"[RAG] Found {len(all_collections)} collections: {available_names}"
297
+ )
298
 
299
  for name in self.COLLECTIONS:
300
  if name in available_names:
 
316
  self.client = None
317
 
318
  def _query_single_collection(
319
+ self,
320
+ name: str,
321
+ collection,
322
+ query: str,
323
+ n_results: int,
324
+ domain_filter: Optional[str],
325
  ) -> List[Dict[str, Any]]:
326
  """Query a single collection - used for parallel execution."""
327
  results_list = []
 
342
 
343
  similarity = 1.0 - min(distance / 2.0, 1.0)
344
 
345
+ results_list.append(
346
+ {
347
+ "id": doc_id,
348
+ "content": doc,
349
+ "metadata": meta,
350
+ "similarity": similarity,
351
+ "collection": name,
352
+ "domain": meta.get("domain", "unknown"),
353
+ }
354
+ )
355
 
356
  except Exception as e:
357
  logger.warning(f"[RAG] Error querying {name}: {e}")
358
+
359
  return results_list
360
 
361
  def search(
 
367
 
368
  # Submit parallel queries to all collections
369
  from concurrent.futures import as_completed
370
+
371
  futures = {}
372
  for name, collection in self.collections.items():
373
  future = self._executor.submit(
374
+ self._query_single_collection,
375
+ name,
376
+ collection,
377
+ query,
378
+ n_results,
379
+ domain_filter,
380
  )
381
  futures[future] = name
382
+
383
  # Collect results as they complete (fastest first)
384
  all_results = []
385
  for future in as_completed(futures, timeout=10.0): # 10s timeout
 
387
  results = future.result()
388
  all_results.extend(results)
389
  except Exception as e:
390
+ logger.warning(
391
+ f"[RAG] Parallel query failed for {futures[future]}: {e}"
392
+ )
393
 
394
  all_results.sort(key=lambda x: x["similarity"], reverse=True)
395
  return all_results[: n_results * 2]
 
448
  """Extract key terms from question for graph search."""
449
  # Remove common stopwords
450
  stopwords = {
451
+ "what",
452
+ "when",
453
+ "where",
454
+ "who",
455
+ "why",
456
+ "how",
457
+ "is",
458
+ "are",
459
+ "was",
460
+ "were",
461
+ "the",
462
+ "a",
463
+ "an",
464
+ "to",
465
+ "of",
466
+ "in",
467
+ "on",
468
+ "for",
469
+ "with",
470
+ "about",
471
+ "related",
472
+ "connected",
473
+ "happened",
474
+ "after",
475
+ "before",
476
+ "show",
477
+ "me",
478
+ "tell",
479
+ "find",
480
+ "get",
481
+ "events",
482
+ "timeline",
483
  }
484
+
485
  words = question.lower().replace("?", "").replace(",", "").split()
486
  keywords = [w for w in words if w not in stopwords and len(w) > 2]
487
+
488
  return keywords[:5] # Return top 5 keywords
489
 
490
+ def _format_context(
491
+ self, docs: List[Dict[str, Any]], include_graph: bool = False
492
+ ) -> str:
493
  if not docs:
494
  return "No relevant intelligence data found."
495
 
496
  context_parts = []
497
  now = datetime.now()
498
+
499
  # Separate ChromaDB and Neo4j results
500
  chroma_docs = [d for d in docs if d.get("source") != "neo4j_graph"]
501
  graph_docs = [d for d in docs if d.get("source") == "neo4j_graph"]
 
542
  f"TIMESTAMP: {timestamp} ({age_str})\n"
543
  f"{doc['content']}\n"
544
  )
545
+
546
  # Format Neo4j graph results (if any)
547
  if graph_docs:
548
  context_parts.append("\n=== RELATED EVENTS FROM KNOWLEDGE GRAPH ===\n")
 
551
  related_str = ""
552
  if related:
553
  related_str = f"\n Related events: {', '.join(str(r)[:50] + '...' for r in related[:2])}"
554
+
555
  context_parts.append(
556
  f"[Graph {i}] Domain: {doc.get('domain', 'unknown')} | "
557
  f"Severity: {doc.get('severity', 'unknown')}\n"
 
604
  docs = self.retriever.search(
605
  search_question, n_results=5, domain_filter=domain_filter
606
  )
607
+
608
  # Neo4j graph search (for relationship queries) - only if enabled
609
  graph_docs = []
610
  used_graph = False
611
  if self.neo4j_retriever and is_graph_query(search_question):
612
  logger.info(f"[RAG] Graph query detected: '{search_question}'")
613
  used_graph = True
614
+
615
  # Extract keywords for graph search
616
  # Simple: use first nouns/keywords from question
617
  keywords = self._extract_keywords(search_question)
618
+
619
  for keyword in keywords[:2]: # Limit to 2 keywords
620
+ graph_docs.extend(
621
+ self.neo4j_retriever.get_related_events(keyword, limit=3)
622
+ )
623
+
624
  logger.info(f"[RAG] Graph retrieval: {len(graph_docs)} docs from Neo4j")
625
+
626
  # Merge results (ChromaDB + Neo4j)
627
  all_docs = docs + graph_docs
628
 
 
631
  "answer": "I couldn't find any relevant intelligence data to answer your question.",
632
  "sources": [],
633
  "question": question,
634
+ "reformulated": (
635
+ search_question if search_question != question else None
636
+ ),
637
  }
638
 
639
  context = self._format_context(all_docs, include_graph=used_graph)
 
646
  }
647
 
648
  current_date = datetime.now().strftime("%B %d, %Y")
649
+ rag_prompt = ChatPromptTemplate.from_messages(
650
+ [
651
+ (
652
+ "system",
653
+ f"""You are Roger, an AI intelligence analyst for Sri Lanka.
654
 
655
  TODAY'S DATE: {current_date}
656
 
 
667
 
668
  Context:
669
  {{context}}""",
670
+ ),
671
+ MessagesPlaceholder(variable_name="history"),
672
+ ("human", "{question}"),
673
+ ]
674
+ )
675
 
676
  history_messages = []
677
  for human, ai in self.chat_history[-5:]:
 
689
  sources_summary = []
690
  for doc in docs[:5]:
691
  meta = doc.get("metadata", {})
692
+ sources_summary.append(
693
+ {
694
+ "domain": meta.get("domain", "unknown"),
695
+ "platform": meta.get("platform", "unknown"),
696
+ "category": meta.get("category", ""),
697
+ "similarity": round(doc["similarity"], 3),
698
+ }
699
+ )
700
 
701
  return {
702
  "answer": answer,
703
  "sources": sources_summary,
704
  "question": question,
705
+ "reformulated": (
706
+ search_question if search_question != question else None
707
+ ),
708
  "docs_found": len(docs),
709
  }
710
 
 
782
  if result.get("sources"):
783
  print(f"\nSources ({len(result['sources'])} found):")
784
  for i, src in enumerate(result["sources"][:3], 1):
785
+ print(
786
+ f" {i}. {src['domain']} | {src['platform']} | Relevance: {src['similarity']:.0%}"
787
+ )
788
 
789
  if result.get("reformulated"):
790
  print(f"\n(Interpreted as: {result['reformulated']})")
src/storage/storage_manager.py CHANGED
@@ -20,6 +20,7 @@ logger = logging.getLogger("storage_manager")
20
  # Trending detection integration
21
  try:
22
  from ..utils.trending_detector import record_topic_mention
 
23
  TRENDING_AVAILABLE = True
24
  except ImportError:
25
  TRENDING_AVAILABLE = False
@@ -156,43 +157,84 @@ class StorageManager:
156
  def _extract_keywords(self, text: str, max_keywords: int = 5) -> List[str]:
157
  """
158
  Extract significant keywords from text for trending detection.
159
-
160
  Args:
161
  text: Text to extract keywords from
162
  max_keywords: Maximum number of keywords to return
163
-
164
  Returns:
165
  List of keywords (2-3 word phrases)
166
  """
167
  # Common stopwords to filter out
168
  stopwords = {
169
- "the", "is", "at", "which", "on", "a", "an", "and", "or", "but",
170
- "in", "with", "to", "for", "of", "as", "by", "from", "that", "this",
171
- "be", "are", "was", "were", "been", "being", "have", "has", "had",
172
- "do", "does", "did", "will", "would", "could", "should", "may",
173
- "might", "must", "shall", "can", "need", "dare", "ought", "used",
174
- "सिंहल", "தமிழ்", # Common Sinhala/Tamil particles
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  }
176
-
177
  # Clean text
178
  text = text.lower()
179
- text = re.sub(r'http\S+|www\.\S+', '', text) # Remove URLs
180
- text = re.sub(r'[^\w\s]', ' ', text) # Remove punctuation
181
-
182
  # Split into words
183
  words = text.split()
184
-
185
  # Filter stopwords and short words
186
  filtered = [w for w in words if w not in stopwords and len(w) > 2]
187
-
188
  # Extract significant words (prioritize proper nouns, locations, etc.)
189
  keywords = []
190
-
191
  # Single important words (capitalized in original or long words)
192
  for word in filtered[:20]:
193
  if len(word) > 4: # Longer words are often more significant
194
  keywords.append(word)
195
-
196
  # Deduplicate and limit
197
  seen = set()
198
  unique_keywords = []
@@ -200,18 +242,15 @@ class StorageManager:
200
  if kw not in seen:
201
  seen.add(kw)
202
  unique_keywords.append(kw)
203
-
204
  return unique_keywords[:max_keywords]
205
 
206
  def _record_trending_mentions(
207
- self,
208
- summary: str,
209
- domain: str,
210
- metadata: Optional[Dict[str, Any]] = None
211
  ):
212
  """
213
  Extract keywords from summary and record them for trending detection.
214
-
215
  Args:
216
  summary: Event summary text
217
  domain: Event domain (political, economical, etc.)
@@ -220,17 +259,15 @@ class StorageManager:
220
  try:
221
  keywords = self._extract_keywords(summary)
222
  source = metadata.get("platform", "scraper") if metadata else "scraper"
223
-
224
  for keyword in keywords:
225
- record_topic_mention(
226
- topic=keyword,
227
- source=source,
228
- domain=domain
229
- )
230
-
231
  if keywords:
232
- logger.debug(f"[TRENDING] Recorded {len(keywords)} keywords: {keywords[:3]}...")
233
-
 
 
234
  except Exception as e:
235
  logger.warning(f"[TRENDING] Error recording mentions: {e}")
236
 
 
20
  # Trending detection integration
21
  try:
22
  from ..utils.trending_detector import record_topic_mention
23
+
24
  TRENDING_AVAILABLE = True
25
  except ImportError:
26
  TRENDING_AVAILABLE = False
 
157
  def _extract_keywords(self, text: str, max_keywords: int = 5) -> List[str]:
158
  """
159
  Extract significant keywords from text for trending detection.
160
+
161
  Args:
162
  text: Text to extract keywords from
163
  max_keywords: Maximum number of keywords to return
164
+
165
  Returns:
166
  List of keywords (2-3 word phrases)
167
  """
168
  # Common stopwords to filter out
169
  stopwords = {
170
+ "the",
171
+ "is",
172
+ "at",
173
+ "which",
174
+ "on",
175
+ "a",
176
+ "an",
177
+ "and",
178
+ "or",
179
+ "but",
180
+ "in",
181
+ "with",
182
+ "to",
183
+ "for",
184
+ "of",
185
+ "as",
186
+ "by",
187
+ "from",
188
+ "that",
189
+ "this",
190
+ "be",
191
+ "are",
192
+ "was",
193
+ "were",
194
+ "been",
195
+ "being",
196
+ "have",
197
+ "has",
198
+ "had",
199
+ "do",
200
+ "does",
201
+ "did",
202
+ "will",
203
+ "would",
204
+ "could",
205
+ "should",
206
+ "may",
207
+ "might",
208
+ "must",
209
+ "shall",
210
+ "can",
211
+ "need",
212
+ "dare",
213
+ "ought",
214
+ "used",
215
+ "सिंहल",
216
+ "தமிழ்", # Common Sinhala/Tamil particles
217
  }
218
+
219
  # Clean text
220
  text = text.lower()
221
+ text = re.sub(r"http\S+|www\.\S+", "", text) # Remove URLs
222
+ text = re.sub(r"[^\w\s]", " ", text) # Remove punctuation
223
+
224
  # Split into words
225
  words = text.split()
226
+
227
  # Filter stopwords and short words
228
  filtered = [w for w in words if w not in stopwords and len(w) > 2]
229
+
230
  # Extract significant words (prioritize proper nouns, locations, etc.)
231
  keywords = []
232
+
233
  # Single important words (capitalized in original or long words)
234
  for word in filtered[:20]:
235
  if len(word) > 4: # Longer words are often more significant
236
  keywords.append(word)
237
+
238
  # Deduplicate and limit
239
  seen = set()
240
  unique_keywords = []
 
242
  if kw not in seen:
243
  seen.add(kw)
244
  unique_keywords.append(kw)
245
+
246
  return unique_keywords[:max_keywords]
247
 
248
  def _record_trending_mentions(
249
+ self, summary: str, domain: str, metadata: Optional[Dict[str, Any]] = None
 
 
 
250
  ):
251
  """
252
  Extract keywords from summary and record them for trending detection.
253
+
254
  Args:
255
  summary: Event summary text
256
  domain: Event domain (political, economical, etc.)
 
259
  try:
260
  keywords = self._extract_keywords(summary)
261
  source = metadata.get("platform", "scraper") if metadata else "scraper"
262
+
263
  for keyword in keywords:
264
+ record_topic_mention(topic=keyword, source=source, domain=domain)
265
+
 
 
 
 
266
  if keywords:
267
+ logger.debug(
268
+ f"[TRENDING] Recorded {len(keywords)} keywords: {keywords[:3]}..."
269
+ )
270
+
271
  except Exception as e:
272
  logger.warning(f"[TRENDING] Error recording mentions: {e}")
273
 
src/utils/utils.py CHANGED
@@ -28,6 +28,7 @@ def utc_now() -> datetime:
28
  """Return current UTC time (Python 3.12+ compatible)."""
29
  return datetime.now(timezone.utc)
30
 
 
31
  # Optional Playwright import
32
  try:
33
  from playwright.sync_api import (
@@ -1021,26 +1022,26 @@ SA_CACHE_DURATION_MINUTES = 15 # 15 minute cache for all SA tools
1021
  def tool_ceb_power_status() -> Dict[str, Any]:
1022
  """
1023
  Get CEB power outage / load shedding schedule for Sri Lanka.
1024
-
1025
- ENHANCED:
1026
  - Scrapes ceb.lk for official schedules and PDF press releases
1027
  - Extracts text from Dropbox-hosted PDF announcements
1028
  - Falls back to news sites for power-related updates
1029
-
1030
  Returns:
1031
  Dict with schedules by area, current status, and timestamp
1032
  """
1033
  global _ceb_cache, _ceb_cache_time
1034
-
1035
  # Check cache
1036
  if _ceb_cache_time:
1037
  cache_age = (utc_now() - _ceb_cache_time).total_seconds() / 60
1038
  if cache_age < SA_CACHE_DURATION_MINUTES and _ceb_cache:
1039
  logger.info(f"[CEB] Using cached data ({cache_age:.1f} min old)")
1040
  return _ceb_cache
1041
-
1042
  logger.info("[CEB] Fetching power outage status...")
1043
-
1044
  result = {
1045
  "status": "operational",
1046
  "load_shedding_active": False,
@@ -1051,37 +1052,46 @@ def tool_ceb_power_status() -> Dict[str, Any]:
1051
  "fetched_at": utc_now().isoformat(),
1052
  "scrape_status": "baseline",
1053
  }
1054
-
1055
  pdf_links_found = []
1056
-
1057
  try:
1058
  # Try to scrape CEB website
1059
  resp = _safe_get("https://ceb.lk/", timeout=30)
1060
  if resp:
1061
  soup = BeautifulSoup(resp.text, "html.parser")
1062
  page_text = soup.get_text(separator="\n", strip=True).lower()
1063
-
1064
  # Check for load shedding keywords
1065
- if any(kw in page_text for kw in ["load shedding", "power cut", "outage schedule"]):
 
 
 
1066
  result["load_shedding_active"] = True
1067
  result["status"] = "load_shedding"
1068
-
1069
  # Extract any announcements
1070
- for tag in soup.find_all(["marquee", "div", "p"], class_=lambda x: x and "announce" in str(x).lower()):
 
 
 
1071
  text = tag.get_text(strip=True)
1072
  if text and len(text) > 20:
1073
  result["announcements"].append(text[:200])
1074
-
1075
  # ENHANCED: Find PDF links (Dropbox, direct PDFs, press releases)
1076
  for link in soup.find_all("a", href=True):
1077
  href = link.get("href", "")
1078
  link_text = link.get_text(strip=True).lower()
1079
-
1080
  # Check for Dropbox links or PDF links
1081
  is_dropbox = "dropbox.com" in href
1082
  is_pdf = href.lower().endswith(".pdf")
1083
- is_press_release = any(kw in link_text for kw in ["press release", "announcement", "notice", "schedule"])
1084
-
 
 
 
1085
  if is_dropbox or is_pdf or is_press_release:
1086
  # Convert Dropbox links for direct download
1087
  if is_dropbox:
@@ -1090,102 +1100,134 @@ def tool_ceb_power_status() -> Dict[str, Any]:
1090
  href = href.replace("dl=0", "dl=1")
1091
  elif "?dl=" not in href and "&dl=" not in href:
1092
  href = href + ("&" if "?" in href else "?") + "dl=1"
1093
-
1094
- pdf_links_found.append({
1095
- "url": href,
1096
- "title": link_text or "Press Release",
1097
- "is_dropbox": is_dropbox,
1098
- })
1099
-
 
 
1100
  # Limit to latest 3 PDFs to avoid too many downloads
1101
  pdf_links_found = pdf_links_found[:3]
1102
-
1103
  # Extract text from PDF links
1104
  for pdf_info in pdf_links_found:
1105
  try:
1106
  logger.info(f"[CEB] Extracting PDF: {pdf_info['title'][:50]}...")
1107
  pdf_text = _extract_text_from_pdf_url(pdf_info["url"])
1108
-
1109
- if pdf_text and not pdf_text.startswith("["): # Not an error message
 
 
1110
  # Check for load shedding in PDF content
1111
  pdf_lower = pdf_text.lower()
1112
- if any(kw in pdf_lower for kw in ["load shedding", "power cut", "outage", "interruption"]):
 
 
 
 
 
 
 
 
1113
  result["load_shedding_active"] = True
1114
  result["status"] = "load_shedding"
1115
-
1116
- result["press_releases"].append({
1117
- "title": pdf_info["title"],
1118
- "content": pdf_text[:1000] + ("..." if len(pdf_text) > 1000 else ""),
1119
- "source": "dropbox" if pdf_info["is_dropbox"] else "ceb.lk",
1120
- })
 
 
 
 
 
1121
  result["scrape_status"] = "live"
1122
  except Exception as pdf_error:
1123
  logger.warning(f"[CEB] PDF extraction error: {pdf_error}")
1124
-
1125
- logger.info(f"[CEB] Scraped - PDFs found: {len(pdf_links_found)}, Active: {result['load_shedding_active']}")
1126
-
 
 
1127
  # Also check news sites for power-related updates
1128
  news_sources = [
1129
  "https://www.news.lk/",
1130
  "https://www.dailymirror.lk/",
1131
  ]
1132
-
1133
  for news_url in news_sources:
1134
  try:
1135
  news_resp = _safe_get(news_url, timeout=20)
1136
  if news_resp:
1137
  news_soup = BeautifulSoup(news_resp.text, "html.parser")
1138
  news_text = news_soup.get_text(separator=" ", strip=True).lower()
1139
-
1140
  # Check for power-related news
1141
- if any(kw in news_text for kw in ["power cut", "load shedding", "ceb", "electricity"]):
 
 
 
1142
  # Look for headlines mentioning power
1143
  for headline in news_soup.find_all(["h1", "h2", "h3", "h4"]):
1144
  h_text = headline.get_text(strip=True)
1145
- if any(kw in h_text.lower() for kw in ["power", "ceb", "electricity", "load shedding"]):
 
 
 
 
 
 
 
 
1146
  if h_text not in result["announcements"]:
1147
- result["announcements"].append(f"[News] {h_text[:150]}")
 
 
1148
  break
1149
  except Exception as news_error:
1150
  logger.debug(f"[CEB] News scraping error for {news_url}: {news_error}")
1151
-
1152
  # If no press releases or announcements found, provide baseline message
1153
  if not result["press_releases"] and not result["announcements"]:
1154
  result["status"] = "no_load_shedding"
1155
  result["announcements"].append("CEB: Normal power supply across the island")
1156
-
1157
  except Exception as e:
1158
  logger.warning(f"[CEB] Scraping error: {e}")
1159
  result["status"] = "unknown"
1160
  result["error"] = str(e)
1161
-
1162
  # Update cache
1163
  _ceb_cache = result
1164
  _ceb_cache_time = utc_now()
1165
-
1166
  return result
1167
 
1168
 
1169
  def tool_fuel_prices() -> Dict[str, Any]:
1170
  """
1171
  Get current fuel prices in Sri Lanka.
1172
-
1173
  Scrapes official CEYPETCO/LIOC announcements or news sources.
1174
-
1175
  Returns:
1176
  Dict with prices for petrol, diesel, kerosene, and last update
1177
  """
1178
  global _fuel_cache, _fuel_cache_time
1179
-
1180
  # Check cache
1181
  if _fuel_cache_time:
1182
  cache_age = (utc_now() - _fuel_cache_time).total_seconds() / 60
1183
  if cache_age < SA_CACHE_DURATION_MINUTES and _fuel_cache:
1184
  logger.info(f"[FUEL] Using cached data ({cache_age:.1f} min old)")
1185
  return _fuel_cache
1186
-
1187
  logger.info("[FUEL] Fetching fuel prices...")
1188
-
1189
  # December 2025 CEYPETCO prices (confirmed unchanged from November 2025)
1190
  # Source: CEYPETCO official announcement
1191
  result = {
@@ -1201,7 +1243,7 @@ def tool_fuel_prices() -> Dict[str, Any]:
1201
  "fetched_at": utc_now().isoformat(),
1202
  "note": "Prices confirmed unchanged for December 2025",
1203
  }
1204
-
1205
  try:
1206
  # Try to scrape news for latest fuel price announcements
1207
  news_sources = [
@@ -1209,69 +1251,81 @@ def tool_fuel_prices() -> Dict[str, Any]:
1209
  "https://www.dailymirror.lk/",
1210
  "https://www.newsfirst.lk/",
1211
  ]
1212
-
1213
  for source_url in news_sources:
1214
  resp = _safe_get(source_url, timeout=20)
1215
  if resp:
1216
  soup = BeautifulSoup(resp.text, "html.parser")
1217
  page_text = soup.get_text(separator=" ", strip=True).lower()
1218
-
1219
  # Look for fuel price mentions
1220
  if "fuel" in page_text and ("price" in page_text or "lkr" in page_text):
1221
  # Extract prices using regex
1222
- petrol_match = re.search(r"petrol\s*(?:92|95)?\s*(?:octane)?\s*[:\-]?\s*(?:rs\.?|lkr)?\s*(\d{2,3}(?:\.\d{2})?)", page_text)
1223
- diesel_match = re.search(r"diesel\s*[:\-]?\s*(?:rs\.?|lkr)?\s*(\d{2,3}(?:\.\d{2})?)", page_text)
1224
-
 
 
 
 
 
 
1225
  if petrol_match:
1226
  try:
1227
- result["prices"]["petrol_92"]["price"] = float(petrol_match.group(1))
 
 
1228
  result["source"] = "news_scrape"
1229
  except ValueError:
1230
  pass
1231
  if diesel_match:
1232
  try:
1233
- result["prices"]["auto_diesel"]["price"] = float(diesel_match.group(1))
 
 
1234
  except ValueError:
1235
  pass
1236
  break
1237
-
1238
- logger.info(f"[FUEL] Fetched prices - Petrol 92: {result['prices']['petrol_92']['price']}")
1239
-
 
 
1240
  except Exception as e:
1241
  logger.warning(f"[FUEL] Scraping error: {e}")
1242
  result["error"] = str(e)
1243
-
1244
  # Update cache
1245
  _fuel_cache = result
1246
  _fuel_cache_time = utc_now()
1247
-
1248
  return result
1249
 
1250
 
1251
  def tool_cbsl_indicators() -> Dict[str, Any]:
1252
  """
1253
  Get key economic indicators from Central Bank of Sri Lanka.
1254
-
1255
  Scrapes live data from cbsl.gov.lk including:
1256
  - Exchange rates (USD/LKR TT Buy/Sell)
1257
  - CCPI Inflation
1258
  - Overnight Policy Rate
1259
  - Forex reserves
1260
-
1261
  Returns:
1262
  Dict with economic indicators and trend data
1263
  """
1264
  global _cbsl_cache, _cbsl_cache_time
1265
-
1266
  # Check cache
1267
  if _cbsl_cache_time:
1268
  cache_age = (utc_now() - _cbsl_cache_time).total_seconds() / 60
1269
  if cache_age < SA_CACHE_DURATION_MINUTES and _cbsl_cache:
1270
  logger.info(f"[CBSL] Using cached data ({cache_age:.1f} min old)")
1271
  return _cbsl_cache
1272
-
1273
  logger.info("[CBSL] Fetching economic indicators from cbsl.gov.lk...")
1274
-
1275
  # Baseline economic data (December 2025 - latest known values)
1276
  result = {
1277
  "indicators": {
@@ -1308,40 +1362,50 @@ def tool_cbsl_indicators() -> Dict[str, Any]:
1308
  "data_as_of": "2025-12",
1309
  "scrape_status": "baseline",
1310
  }
1311
-
1312
  try:
1313
  # Try to scrape CBSL for updated rates
1314
  resp = _safe_get("https://www.cbsl.gov.lk/", timeout=30)
1315
  if resp:
1316
  soup = BeautifulSoup(resp.text, "html.parser")
1317
  page_text = soup.get_text(separator=" ", strip=True)
1318
-
1319
  scraped_any = False
1320
-
1321
  # Extract TT Buy exchange rate (format: "TT Buy 305.3238" or "TT Buy: 305.3238")
1322
- tt_buy_match = re.search(r"TT\s*Buy[:\s]*(\d{2,3}(?:\.\d{2,4})?)", page_text, re.I)
 
 
1323
  if tt_buy_match:
1324
  try:
1325
- result["indicators"]["exchange_rate"]["usd_lkr_buy"] = round(float(tt_buy_match.group(1)), 2)
 
 
1326
  scraped_any = True
1327
  except ValueError:
1328
  pass
1329
-
1330
  # Extract TT Sell exchange rate
1331
- tt_sell_match = re.search(r"TT\s*Sell[:\s]*(\d{2,3}(?:\.\d{2,4})?)", page_text, re.I)
 
 
1332
  if tt_sell_match:
1333
  try:
1334
- result["indicators"]["exchange_rate"]["usd_lkr_sell"] = round(float(tt_sell_match.group(1)), 2)
 
 
1335
  scraped_any = True
1336
  except ValueError:
1337
  pass
1338
-
1339
  # Calculate mid rate if we have both buy and sell
1340
  if tt_buy_match and tt_sell_match:
1341
  buy = result["indicators"]["exchange_rate"]["usd_lkr_buy"]
1342
  sell = result["indicators"]["exchange_rate"]["usd_lkr_sell"]
1343
- result["indicators"]["exchange_rate"]["usd_lkr"] = round((buy + sell) / 2, 2)
1344
-
 
 
1345
  # Extract CCPI Inflation (format: "CCPI Inflation 2.10%" or just "Inflation 2.10 %")
1346
  inflation_patterns = [
1347
  r"CCPI\s*Inflation[:\s]*(\d{1,2}(?:\.\d{1,2})?)\s*%",
@@ -1352,12 +1416,14 @@ def tool_cbsl_indicators() -> Dict[str, Any]:
1352
  inflation_match = re.search(pattern, page_text, re.I)
1353
  if inflation_match:
1354
  try:
1355
- result["indicators"]["inflation"]["ccpi_yoy"] = float(inflation_match.group(1))
 
 
1356
  scraped_any = True
1357
  break
1358
  except ValueError:
1359
  pass
1360
-
1361
  # Extract Overnight Policy Rate (format: "Overnight Policy Rate 7.75%" or "Policy Rate 7.75 %")
1362
  policy_patterns = [
1363
  r"Overnight\s*Policy\s*Rate[:\s]*(\d{1,2}(?:\.\d{1,2})?)\s*%",
@@ -1368,12 +1434,14 @@ def tool_cbsl_indicators() -> Dict[str, Any]:
1368
  policy_match = re.search(pattern, page_text, re.I)
1369
  if policy_match:
1370
  try:
1371
- result["indicators"]["policy_rates"]["overnight_rate"] = float(policy_match.group(1))
 
 
1372
  scraped_any = True
1373
  break
1374
  except ValueError:
1375
  pass
1376
-
1377
  if scraped_any:
1378
  result["scrape_status"] = "live"
1379
  result["data_as_of"] = utc_now().strftime("%Y-%m")
@@ -1387,38 +1455,38 @@ def tool_cbsl_indicators() -> Dict[str, Any]:
1387
  logger.info("[CBSL] Using baseline data - no live values matched")
1388
  else:
1389
  logger.warning("[CBSL] Could not reach cbsl.gov.lk, using baseline data")
1390
-
1391
  except Exception as e:
1392
  logger.warning(f"[CBSL] Scraping error: {e}")
1393
  result["error"] = str(e)
1394
-
1395
  # Update cache
1396
  _cbsl_cache = result
1397
  _cbsl_cache_time = utc_now()
1398
-
1399
  return result
1400
 
1401
 
1402
  def tool_health_alerts() -> Dict[str, Any]:
1403
  """
1404
  Get health alerts and disease outbreak information for Sri Lanka.
1405
-
1406
  Includes dengue case counts, epidemic alerts, and health advisories.
1407
-
1408
  Returns:
1409
  Dict with health alerts, disease data, and notifications
1410
  """
1411
  global _health_cache, _health_cache_time
1412
-
1413
  # Check cache
1414
  if _health_cache_time:
1415
  cache_age = (utc_now() - _health_cache_time).total_seconds() / 60
1416
  if cache_age < SA_CACHE_DURATION_MINUTES and _health_cache:
1417
  logger.info(f"[HEALTH] Using cached data ({cache_age:.1f} min old)")
1418
  return _health_cache
1419
-
1420
  logger.info("[HEALTH] Fetching health alerts...")
1421
-
1422
  # Baseline health data
1423
  result = {
1424
  "alerts": [],
@@ -1433,29 +1501,39 @@ def tool_health_alerts() -> Dict[str, Any]:
1433
  "source": "health.gov.lk",
1434
  "fetched_at": utc_now().isoformat(),
1435
  }
1436
-
1437
  try:
1438
  # Try to scrape Health Ministry
1439
  resp = _safe_get("https://www.health.gov.lk/", timeout=30)
1440
  if resp:
1441
  soup = BeautifulSoup(resp.text, "html.parser")
1442
  page_text = soup.get_text(separator="\n", strip=True).lower()
1443
-
1444
  # Check for outbreak keywords
1445
- outbreak_keywords = ["outbreak", "epidemic", "alert", "warning", "emergency"]
 
 
 
 
 
 
1446
  for kw in outbreak_keywords:
1447
  if kw in page_text:
1448
  # Try to extract the context
1449
  idx = page_text.find(kw)
1450
- context = page_text[max(0, idx-50):idx+100]
1451
  if len(context) > 20:
1452
- result["alerts"].append({
1453
- "type": "health_notice",
1454
- "text": context.strip()[:150],
1455
- "severity": "medium" if kw in ["alert", "warning"] else "low",
1456
- })
 
 
 
 
1457
  break
1458
-
1459
  # Check for dengue data
1460
  dengue_match = re.search(r"dengue[:\s]*(\d{1,5})\s*(?:cases?)?", page_text)
1461
  if dengue_match:
@@ -1463,67 +1541,161 @@ def tool_health_alerts() -> Dict[str, Any]:
1463
  result["dengue"]["weekly_cases"] = int(dengue_match.group(1))
1464
  except ValueError:
1465
  pass
1466
-
1467
- logger.info(f"[HEALTH] Fetched - Dengue cases: {result['dengue']['weekly_cases']}")
1468
-
 
 
1469
  # Add seasonal health advisory
1470
  current_month = utc_now().month
1471
  if current_month in [5, 6, 10, 11]: # Monsoon = mosquito season
1472
- result["advisories"].append({
1473
- "type": "seasonal",
1474
- "text": "Monsoon season: Increased dengue risk. Remove stagnant water around homes.",
1475
- "severity": "medium",
1476
- })
1477
-
 
 
1478
  except Exception as e:
1479
  logger.warning(f"[HEALTH] Scraping error: {e}")
1480
  result["error"] = str(e)
1481
-
1482
  # Update cache
1483
  _health_cache = result
1484
  _health_cache_time = utc_now()
1485
-
1486
  return result
1487
 
1488
 
1489
  def tool_commodity_prices() -> Dict[str, Any]:
1490
  """
1491
  Get prices for essential commodities in Sri Lanka.
1492
-
1493
  Includes rice, sugar, dhal, milk powder, and other staples.
1494
-
1495
  Returns:
1496
  Dict with commodity prices, units, and recent changes
1497
  """
1498
  global _commodity_cache, _commodity_cache_time
1499
-
1500
  # Check cache
1501
  if _commodity_cache_time:
1502
  cache_age = (utc_now() - _commodity_cache_time).total_seconds() / 60
1503
  if cache_age < SA_CACHE_DURATION_MINUTES and _commodity_cache:
1504
  logger.info(f"[COMMODITY] Using cached data ({cache_age:.1f} min old)")
1505
  return _commodity_cache
1506
-
1507
  logger.info("[COMMODITY] Fetching commodity prices...")
1508
-
1509
  # Current approximate commodity prices (LKR)
1510
  result = {
1511
  "commodities": [
1512
- {"name": "White Rice (Nadu)", "price": 220, "unit": "LKR/kg", "change": 0, "category": "grains"},
1513
- {"name": "White Rice (Samba)", "price": 250, "unit": "LKR/kg", "change": 0, "category": "grains"},
1514
- {"name": "Red Rice", "price": 240, "unit": "LKR/kg", "change": 0, "category": "grains"},
1515
- {"name": "Wheat Flour", "price": 195, "unit": "LKR/kg", "change": -5, "category": "grains"},
1516
- {"name": "Sugar (White)", "price": 240, "unit": "LKR/kg", "change": 0, "category": "essentials"},
1517
- {"name": "Dhal (Mysore)", "price": 510, "unit": "LKR/kg", "change": 10, "category": "pulses"},
1518
- {"name": "Dhal (Red)", "price": 340, "unit": "LKR/kg", "change": 0, "category": "pulses"},
1519
- {"name": "Milk Powder (400g)", "price": 1250, "unit": "LKR/pack", "change": 0, "category": "dairy"},
1520
- {"name": "Coconut Oil", "price": 680, "unit": "LKR/L", "change": -20, "category": "cooking"},
1521
- {"name": "Coconut (Fresh)", "price": 120, "unit": "LKR/each", "change": 10, "category": "cooking"},
1522
- {"name": "Eggs (10)", "price": 480, "unit": "LKR/10", "change": 0, "category": "protein"},
1523
- {"name": "Chicken", "price": 1350, "unit": "LKR/kg", "change": 50, "category": "protein"},
1524
- {"name": "Big Onion", "price": 280, "unit": "LKR/kg", "change": -10, "category": "vegetables"},
1525
- {"name": "Potatoes", "price": 350, "unit": "LKR/kg", "change": 20, "category": "vegetables"},
1526
- {"name": "LP Gas (12.5kg)", "price": 4290, "unit": "LKR/cylinder", "change": 0, "category": "fuel"},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1527
  ],
1528
  "source": "Consumer Affairs Authority / Market Survey",
1529
  "fetched_at": utc_now().isoformat(),
@@ -1533,7 +1705,7 @@ def tool_commodity_prices() -> Dict[str, Any]:
1533
  "items_stable": 0,
1534
  },
1535
  }
1536
-
1537
  # Calculate summary
1538
  for item in result["commodities"]:
1539
  if item["change"] > 0:
@@ -1542,14 +1714,14 @@ def tool_commodity_prices() -> Dict[str, Any]:
1542
  result["summary"]["items_decreased"] += 1
1543
  else:
1544
  result["summary"]["items_stable"] += 1
1545
-
1546
  try:
1547
  # Try to scrape news for price updates
1548
  resp = _safe_get("https://www.dailymirror.lk/", timeout=20)
1549
  if resp:
1550
  soup = BeautifulSoup(resp.text, "html.parser")
1551
  page_text = soup.get_text(separator=" ", strip=True).lower()
1552
-
1553
  # Check for LP Gas price updates (commonly announced)
1554
  gas_match = re.search(r"lp\s*gas[:\s]*(?:rs\.?|lkr)?\s*(\d{4})", page_text)
1555
  if gas_match:
@@ -1563,40 +1735,40 @@ def tool_commodity_prices() -> Dict[str, Any]:
1563
  break
1564
  except ValueError:
1565
  pass
1566
-
1567
  logger.info("[COMMODITY] Successfully fetched commodity prices")
1568
-
1569
  except Exception as e:
1570
  logger.warning(f"[COMMODITY] Scraping error: {e}")
1571
  result["error"] = str(e)
1572
-
1573
  # Update cache
1574
  _commodity_cache = result
1575
  _commodity_cache_time = utc_now()
1576
-
1577
  return result
1578
 
1579
 
1580
  def tool_water_supply_alerts() -> Dict[str, Any]:
1581
  """
1582
  Get water supply disruption alerts from NWSDB.
1583
-
1584
  Returns information about planned/unplanned water cuts and affected areas.
1585
-
1586
  Returns:
1587
  Dict with active disruptions, affected areas, and restoration times
1588
  """
1589
  global _water_cache, _water_cache_time
1590
-
1591
  # Check cache
1592
  if _water_cache_time:
1593
  cache_age = (utc_now() - _water_cache_time).total_seconds() / 60
1594
  if cache_age < SA_CACHE_DURATION_MINUTES and _water_cache:
1595
  logger.info(f"[WATER] Using cached data ({cache_age:.1f} min old)")
1596
  return _water_cache
1597
-
1598
  logger.info("[WATER] Fetching water supply alerts...")
1599
-
1600
  result = {
1601
  "status": "normal",
1602
  "active_disruptions": [],
@@ -1605,22 +1777,28 @@ def tool_water_supply_alerts() -> Dict[str, Any]:
1605
  "fetched_at": utc_now().isoformat(),
1606
  "overall_supply": "stable",
1607
  }
1608
-
1609
  try:
1610
  # Try to scrape NWSDB website
1611
  resp = _safe_get("https://www.waterboard.lk/", timeout=30)
1612
  if resp:
1613
  soup = BeautifulSoup(resp.text, "html.parser")
1614
  page_text = soup.get_text(separator="\n", strip=True).lower()
1615
-
1616
  # Check for disruption keywords
1617
- disruption_keywords = ["disruption", "interruption", "cut off", "maintenance", "repair"]
 
 
 
 
 
 
1618
  for kw in disruption_keywords:
1619
  if kw in page_text:
1620
  result["status"] = "disruptions_reported"
1621
  idx = page_text.find(kw)
1622
- context = page_text[max(0, idx-30):idx+120]
1623
-
1624
  # Try to extract area name
1625
  area_patterns = [
1626
  r"(colombo|gampaha|kandy|galle|matara|jaffna|kurunegala|ratnapura)",
@@ -1632,31 +1810,35 @@ def tool_water_supply_alerts() -> Dict[str, Any]:
1632
  if match:
1633
  area = match.group(1).title()
1634
  break
1635
-
1636
- result["active_disruptions"].append({
1637
- "area": area,
1638
- "type": kw,
1639
- "details": context.strip()[:150],
1640
- "severity": "medium",
1641
- })
 
 
1642
  break
1643
-
1644
- logger.info(f"[WATER] Fetched - Disruptions: {len(result['active_disruptions'])}")
1645
-
 
 
1646
  # If no disruptions found via scraping, report normal
1647
  if not result["active_disruptions"]:
1648
  result["status"] = "normal"
1649
  result["overall_supply"] = "Normal water supply across most areas"
1650
-
1651
  except Exception as e:
1652
  logger.warning(f"[WATER] Scraping error: {e}")
1653
  result["error"] = str(e)
1654
  result["status"] = "unknown"
1655
-
1656
  # Update cache
1657
  _water_cache = result
1658
  _water_cache_time = utc_now()
1659
-
1660
  return result
1661
 
1662
 
@@ -4389,10 +4571,12 @@ def scrape_reddit(
4389
  data = scrape_reddit_impl(keywords=keywords, limit=limit, subreddit=subreddit)
4390
  return json.dumps(data, default=str)
4391
 
 
4392
  # ============================================
4393
  # SITUATIONAL AWARENESS TOOLS (DASHBOARD APIs)
4394
  # ============================================
4395
 
 
4396
  def tool_health_alerts() -> dict:
4397
  """Get health alerts from health.gov.lk - structured for dashboard."""
4398
  try:
@@ -4401,14 +4585,16 @@ def tool_health_alerts() -> dict:
4401
  "dengue": {
4402
  "weekly_cases": 1890,
4403
  "high_risk_districts": ["Colombo", "Gampaha", "Kalutara"],
4404
- "trend": "stable"
4405
  },
4406
- "advisories": [{
4407
- "type": "seasonal",
4408
- "text": "Monsoon season: Take precautions against dengue",
4409
- "severity": "medium"
4410
- }],
4411
- "fetched_at": utc_now().isoformat()
 
 
4412
  }
4413
  except Exception as e:
4414
  return {"alerts": [], "dengue": {}, "advisories": [], "error": str(e)}
@@ -4421,7 +4607,7 @@ def tool_water_supply_alerts() -> dict:
4421
  "status": "normal",
4422
  "active_disruptions": [],
4423
  "overall_supply": "Normal water supply across most areas",
4424
- "fetched_at": utc_now().isoformat()
4425
  }
4426
  except Exception as e:
4427
  return {"status": "unknown", "active_disruptions": [], "error": str(e)}
@@ -4434,7 +4620,7 @@ def tool_ceb_power_status() -> dict:
4434
  "current_schedule": None,
4435
  "announcements": [],
4436
  "generation_capacity": "Normal",
4437
- "fetched_at": utc_now().isoformat()
4438
  }
4439
 
4440
 
@@ -4446,11 +4632,11 @@ def tool_fuel_prices() -> dict:
4446
  "petrol_95": {"price": 335, "unit": "LKR/L"},
4447
  "diesel": {"price": 277, "unit": "LKR/L"},
4448
  "super_diesel": {"price": 318, "unit": "LKR/L"},
4449
- "kerosene": {"price": 185, "unit": "LKR/L"}
4450
  },
4451
  "last_updated": "2025-12-01",
4452
  "source": "CEYPETCO",
4453
- "fetched_at": utc_now().isoformat()
4454
  }
4455
 
4456
 
@@ -4460,7 +4646,7 @@ def tool_cbsl_rates() -> dict:
4460
  "inflation": {"headline": 0.7, "core": 1.2, "unit": "%"},
4461
  "policy_rates": {"sdfr": 8.25, "slfr": 9.25, "unit": "%"},
4462
  "exchange_rate": {"usd": 296.50, "eur": 312.80, "unit": "LKR"},
4463
- "fetched_at": utc_now().isoformat()
4464
  }
4465
 
4466
 
@@ -4475,13 +4661,13 @@ def tool_cbsl_indicators() -> dict:
4475
  "inflation": {
4476
  "ccpi_yoy": 2.1, # CCPI Year-on-Year (Nov 2025 actual)
4477
  "core_yoy": 1.8,
4478
- "trend": "stable"
4479
  },
4480
  "policy_rates": {
4481
  "overnight_rate": 7.75, # Overnight Policy Rate (Dec 2025)
4482
  "sdfr": 7.25, # Standing Deposit Facility Rate
4483
  "slfr": 8.25, # Standing Lending Facility Rate
4484
- "last_changed": "2024-12"
4485
  },
4486
  "exchange_rate": {
4487
  "usd_lkr": 309.17, # Dec 11, 2025 rate
@@ -4489,16 +4675,16 @@ def tool_cbsl_indicators() -> dict:
4489
  "usd_lkr_sell": 313.00,
4490
  "eur_lkr": 325.50,
4491
  "gbp_lkr": 390.25,
4492
- "trend": "stable"
4493
  },
4494
  "forex_reserves": {
4495
  "value": 6.5, # Billion USD (Dec 2025)
4496
- "trend": "improving"
4497
- }
4498
  },
4499
  "source": "Central Bank of Sri Lanka",
4500
  "scrape_status": "baseline",
4501
- "fetched_at": utc_now().isoformat()
4502
  }
4503
 
4504
 
@@ -4510,9 +4696,9 @@ def tool_commodity_prices() -> dict:
4510
  {"name": "Rice (Samba)", "price": 250, "unit": "LKR/kg"},
4511
  {"name": "Dhal (Red)", "price": 360, "unit": "LKR/kg"},
4512
  {"name": "Sugar", "price": 215, "unit": "LKR/kg"},
4513
- {"name": "Coconut", "price": 120, "unit": "LKR/nut"}
4514
  ],
4515
- "fetched_at": utc_now().isoformat()
4516
  }
4517
 
4518
 
 
28
  """Return current UTC time (Python 3.12+ compatible)."""
29
  return datetime.now(timezone.utc)
30
 
31
+
32
  # Optional Playwright import
33
  try:
34
  from playwright.sync_api import (
 
1022
  def tool_ceb_power_status() -> Dict[str, Any]:
1023
  """
1024
  Get CEB power outage / load shedding schedule for Sri Lanka.
1025
+
1026
+ ENHANCED:
1027
  - Scrapes ceb.lk for official schedules and PDF press releases
1028
  - Extracts text from Dropbox-hosted PDF announcements
1029
  - Falls back to news sites for power-related updates
1030
+
1031
  Returns:
1032
  Dict with schedules by area, current status, and timestamp
1033
  """
1034
  global _ceb_cache, _ceb_cache_time
1035
+
1036
  # Check cache
1037
  if _ceb_cache_time:
1038
  cache_age = (utc_now() - _ceb_cache_time).total_seconds() / 60
1039
  if cache_age < SA_CACHE_DURATION_MINUTES and _ceb_cache:
1040
  logger.info(f"[CEB] Using cached data ({cache_age:.1f} min old)")
1041
  return _ceb_cache
1042
+
1043
  logger.info("[CEB] Fetching power outage status...")
1044
+
1045
  result = {
1046
  "status": "operational",
1047
  "load_shedding_active": False,
 
1052
  "fetched_at": utc_now().isoformat(),
1053
  "scrape_status": "baseline",
1054
  }
1055
+
1056
  pdf_links_found = []
1057
+
1058
  try:
1059
  # Try to scrape CEB website
1060
  resp = _safe_get("https://ceb.lk/", timeout=30)
1061
  if resp:
1062
  soup = BeautifulSoup(resp.text, "html.parser")
1063
  page_text = soup.get_text(separator="\n", strip=True).lower()
1064
+
1065
  # Check for load shedding keywords
1066
+ if any(
1067
+ kw in page_text
1068
+ for kw in ["load shedding", "power cut", "outage schedule"]
1069
+ ):
1070
  result["load_shedding_active"] = True
1071
  result["status"] = "load_shedding"
1072
+
1073
  # Extract any announcements
1074
+ for tag in soup.find_all(
1075
+ ["marquee", "div", "p"],
1076
+ class_=lambda x: x and "announce" in str(x).lower(),
1077
+ ):
1078
  text = tag.get_text(strip=True)
1079
  if text and len(text) > 20:
1080
  result["announcements"].append(text[:200])
1081
+
1082
  # ENHANCED: Find PDF links (Dropbox, direct PDFs, press releases)
1083
  for link in soup.find_all("a", href=True):
1084
  href = link.get("href", "")
1085
  link_text = link.get_text(strip=True).lower()
1086
+
1087
  # Check for Dropbox links or PDF links
1088
  is_dropbox = "dropbox.com" in href
1089
  is_pdf = href.lower().endswith(".pdf")
1090
+ is_press_release = any(
1091
+ kw in link_text
1092
+ for kw in ["press release", "announcement", "notice", "schedule"]
1093
+ )
1094
+
1095
  if is_dropbox or is_pdf or is_press_release:
1096
  # Convert Dropbox links for direct download
1097
  if is_dropbox:
 
1100
  href = href.replace("dl=0", "dl=1")
1101
  elif "?dl=" not in href and "&dl=" not in href:
1102
  href = href + ("&" if "?" in href else "?") + "dl=1"
1103
+
1104
+ pdf_links_found.append(
1105
+ {
1106
+ "url": href,
1107
+ "title": link_text or "Press Release",
1108
+ "is_dropbox": is_dropbox,
1109
+ }
1110
+ )
1111
+
1112
  # Limit to latest 3 PDFs to avoid too many downloads
1113
  pdf_links_found = pdf_links_found[:3]
1114
+
1115
  # Extract text from PDF links
1116
  for pdf_info in pdf_links_found:
1117
  try:
1118
  logger.info(f"[CEB] Extracting PDF: {pdf_info['title'][:50]}...")
1119
  pdf_text = _extract_text_from_pdf_url(pdf_info["url"])
1120
+
1121
+ if pdf_text and not pdf_text.startswith(
1122
+ "["
1123
+ ): # Not an error message
1124
  # Check for load shedding in PDF content
1125
  pdf_lower = pdf_text.lower()
1126
+ if any(
1127
+ kw in pdf_lower
1128
+ for kw in [
1129
+ "load shedding",
1130
+ "power cut",
1131
+ "outage",
1132
+ "interruption",
1133
+ ]
1134
+ ):
1135
  result["load_shedding_active"] = True
1136
  result["status"] = "load_shedding"
1137
+
1138
+ result["press_releases"].append(
1139
+ {
1140
+ "title": pdf_info["title"],
1141
+ "content": pdf_text[:1000]
1142
+ + ("..." if len(pdf_text) > 1000 else ""),
1143
+ "source": (
1144
+ "dropbox" if pdf_info["is_dropbox"] else "ceb.lk"
1145
+ ),
1146
+ }
1147
+ )
1148
  result["scrape_status"] = "live"
1149
  except Exception as pdf_error:
1150
  logger.warning(f"[CEB] PDF extraction error: {pdf_error}")
1151
+
1152
+ logger.info(
1153
+ f"[CEB] Scraped - PDFs found: {len(pdf_links_found)}, Active: {result['load_shedding_active']}"
1154
+ )
1155
+
1156
  # Also check news sites for power-related updates
1157
  news_sources = [
1158
  "https://www.news.lk/",
1159
  "https://www.dailymirror.lk/",
1160
  ]
1161
+
1162
  for news_url in news_sources:
1163
  try:
1164
  news_resp = _safe_get(news_url, timeout=20)
1165
  if news_resp:
1166
  news_soup = BeautifulSoup(news_resp.text, "html.parser")
1167
  news_text = news_soup.get_text(separator=" ", strip=True).lower()
1168
+
1169
  # Check for power-related news
1170
+ if any(
1171
+ kw in news_text
1172
+ for kw in ["power cut", "load shedding", "ceb", "electricity"]
1173
+ ):
1174
  # Look for headlines mentioning power
1175
  for headline in news_soup.find_all(["h1", "h2", "h3", "h4"]):
1176
  h_text = headline.get_text(strip=True)
1177
+ if any(
1178
+ kw in h_text.lower()
1179
+ for kw in [
1180
+ "power",
1181
+ "ceb",
1182
+ "electricity",
1183
+ "load shedding",
1184
+ ]
1185
+ ):
1186
  if h_text not in result["announcements"]:
1187
+ result["announcements"].append(
1188
+ f"[News] {h_text[:150]}"
1189
+ )
1190
  break
1191
  except Exception as news_error:
1192
  logger.debug(f"[CEB] News scraping error for {news_url}: {news_error}")
1193
+
1194
  # If no press releases or announcements found, provide baseline message
1195
  if not result["press_releases"] and not result["announcements"]:
1196
  result["status"] = "no_load_shedding"
1197
  result["announcements"].append("CEB: Normal power supply across the island")
1198
+
1199
  except Exception as e:
1200
  logger.warning(f"[CEB] Scraping error: {e}")
1201
  result["status"] = "unknown"
1202
  result["error"] = str(e)
1203
+
1204
  # Update cache
1205
  _ceb_cache = result
1206
  _ceb_cache_time = utc_now()
1207
+
1208
  return result
1209
 
1210
 
1211
  def tool_fuel_prices() -> Dict[str, Any]:
1212
  """
1213
  Get current fuel prices in Sri Lanka.
1214
+
1215
  Scrapes official CEYPETCO/LIOC announcements or news sources.
1216
+
1217
  Returns:
1218
  Dict with prices for petrol, diesel, kerosene, and last update
1219
  """
1220
  global _fuel_cache, _fuel_cache_time
1221
+
1222
  # Check cache
1223
  if _fuel_cache_time:
1224
  cache_age = (utc_now() - _fuel_cache_time).total_seconds() / 60
1225
  if cache_age < SA_CACHE_DURATION_MINUTES and _fuel_cache:
1226
  logger.info(f"[FUEL] Using cached data ({cache_age:.1f} min old)")
1227
  return _fuel_cache
1228
+
1229
  logger.info("[FUEL] Fetching fuel prices...")
1230
+
1231
  # December 2025 CEYPETCO prices (confirmed unchanged from November 2025)
1232
  # Source: CEYPETCO official announcement
1233
  result = {
 
1243
  "fetched_at": utc_now().isoformat(),
1244
  "note": "Prices confirmed unchanged for December 2025",
1245
  }
1246
+
1247
  try:
1248
  # Try to scrape news for latest fuel price announcements
1249
  news_sources = [
 
1251
  "https://www.dailymirror.lk/",
1252
  "https://www.newsfirst.lk/",
1253
  ]
1254
+
1255
  for source_url in news_sources:
1256
  resp = _safe_get(source_url, timeout=20)
1257
  if resp:
1258
  soup = BeautifulSoup(resp.text, "html.parser")
1259
  page_text = soup.get_text(separator=" ", strip=True).lower()
1260
+
1261
  # Look for fuel price mentions
1262
  if "fuel" in page_text and ("price" in page_text or "lkr" in page_text):
1263
  # Extract prices using regex
1264
+ petrol_match = re.search(
1265
+ r"petrol\s*(?:92|95)?\s*(?:octane)?\s*[:\-]?\s*(?:rs\.?|lkr)?\s*(\d{2,3}(?:\.\d{2})?)",
1266
+ page_text,
1267
+ )
1268
+ diesel_match = re.search(
1269
+ r"diesel\s*[:\-]?\s*(?:rs\.?|lkr)?\s*(\d{2,3}(?:\.\d{2})?)",
1270
+ page_text,
1271
+ )
1272
+
1273
  if petrol_match:
1274
  try:
1275
+ result["prices"]["petrol_92"]["price"] = float(
1276
+ petrol_match.group(1)
1277
+ )
1278
  result["source"] = "news_scrape"
1279
  except ValueError:
1280
  pass
1281
  if diesel_match:
1282
  try:
1283
+ result["prices"]["auto_diesel"]["price"] = float(
1284
+ diesel_match.group(1)
1285
+ )
1286
  except ValueError:
1287
  pass
1288
  break
1289
+
1290
+ logger.info(
1291
+ f"[FUEL] Fetched prices - Petrol 92: {result['prices']['petrol_92']['price']}"
1292
+ )
1293
+
1294
  except Exception as e:
1295
  logger.warning(f"[FUEL] Scraping error: {e}")
1296
  result["error"] = str(e)
1297
+
1298
  # Update cache
1299
  _fuel_cache = result
1300
  _fuel_cache_time = utc_now()
1301
+
1302
  return result
1303
 
1304
 
1305
  def tool_cbsl_indicators() -> Dict[str, Any]:
1306
  """
1307
  Get key economic indicators from Central Bank of Sri Lanka.
1308
+
1309
  Scrapes live data from cbsl.gov.lk including:
1310
  - Exchange rates (USD/LKR TT Buy/Sell)
1311
  - CCPI Inflation
1312
  - Overnight Policy Rate
1313
  - Forex reserves
1314
+
1315
  Returns:
1316
  Dict with economic indicators and trend data
1317
  """
1318
  global _cbsl_cache, _cbsl_cache_time
1319
+
1320
  # Check cache
1321
  if _cbsl_cache_time:
1322
  cache_age = (utc_now() - _cbsl_cache_time).total_seconds() / 60
1323
  if cache_age < SA_CACHE_DURATION_MINUTES and _cbsl_cache:
1324
  logger.info(f"[CBSL] Using cached data ({cache_age:.1f} min old)")
1325
  return _cbsl_cache
1326
+
1327
  logger.info("[CBSL] Fetching economic indicators from cbsl.gov.lk...")
1328
+
1329
  # Baseline economic data (December 2025 - latest known values)
1330
  result = {
1331
  "indicators": {
 
1362
  "data_as_of": "2025-12",
1363
  "scrape_status": "baseline",
1364
  }
1365
+
1366
  try:
1367
  # Try to scrape CBSL for updated rates
1368
  resp = _safe_get("https://www.cbsl.gov.lk/", timeout=30)
1369
  if resp:
1370
  soup = BeautifulSoup(resp.text, "html.parser")
1371
  page_text = soup.get_text(separator=" ", strip=True)
1372
+
1373
  scraped_any = False
1374
+
1375
  # Extract TT Buy exchange rate (format: "TT Buy 305.3238" or "TT Buy: 305.3238")
1376
+ tt_buy_match = re.search(
1377
+ r"TT\s*Buy[:\s]*(\d{2,3}(?:\.\d{2,4})?)", page_text, re.I
1378
+ )
1379
  if tt_buy_match:
1380
  try:
1381
+ result["indicators"]["exchange_rate"]["usd_lkr_buy"] = round(
1382
+ float(tt_buy_match.group(1)), 2
1383
+ )
1384
  scraped_any = True
1385
  except ValueError:
1386
  pass
1387
+
1388
  # Extract TT Sell exchange rate
1389
+ tt_sell_match = re.search(
1390
+ r"TT\s*Sell[:\s]*(\d{2,3}(?:\.\d{2,4})?)", page_text, re.I
1391
+ )
1392
  if tt_sell_match:
1393
  try:
1394
+ result["indicators"]["exchange_rate"]["usd_lkr_sell"] = round(
1395
+ float(tt_sell_match.group(1)), 2
1396
+ )
1397
  scraped_any = True
1398
  except ValueError:
1399
  pass
1400
+
1401
  # Calculate mid rate if we have both buy and sell
1402
  if tt_buy_match and tt_sell_match:
1403
  buy = result["indicators"]["exchange_rate"]["usd_lkr_buy"]
1404
  sell = result["indicators"]["exchange_rate"]["usd_lkr_sell"]
1405
+ result["indicators"]["exchange_rate"]["usd_lkr"] = round(
1406
+ (buy + sell) / 2, 2
1407
+ )
1408
+
1409
  # Extract CCPI Inflation (format: "CCPI Inflation 2.10%" or just "Inflation 2.10 %")
1410
  inflation_patterns = [
1411
  r"CCPI\s*Inflation[:\s]*(\d{1,2}(?:\.\d{1,2})?)\s*%",
 
1416
  inflation_match = re.search(pattern, page_text, re.I)
1417
  if inflation_match:
1418
  try:
1419
+ result["indicators"]["inflation"]["ccpi_yoy"] = float(
1420
+ inflation_match.group(1)
1421
+ )
1422
  scraped_any = True
1423
  break
1424
  except ValueError:
1425
  pass
1426
+
1427
  # Extract Overnight Policy Rate (format: "Overnight Policy Rate 7.75%" or "Policy Rate 7.75 %")
1428
  policy_patterns = [
1429
  r"Overnight\s*Policy\s*Rate[:\s]*(\d{1,2}(?:\.\d{1,2})?)\s*%",
 
1434
  policy_match = re.search(pattern, page_text, re.I)
1435
  if policy_match:
1436
  try:
1437
+ result["indicators"]["policy_rates"]["overnight_rate"] = float(
1438
+ policy_match.group(1)
1439
+ )
1440
  scraped_any = True
1441
  break
1442
  except ValueError:
1443
  pass
1444
+
1445
  if scraped_any:
1446
  result["scrape_status"] = "live"
1447
  result["data_as_of"] = utc_now().strftime("%Y-%m")
 
1455
  logger.info("[CBSL] Using baseline data - no live values matched")
1456
  else:
1457
  logger.warning("[CBSL] Could not reach cbsl.gov.lk, using baseline data")
1458
+
1459
  except Exception as e:
1460
  logger.warning(f"[CBSL] Scraping error: {e}")
1461
  result["error"] = str(e)
1462
+
1463
  # Update cache
1464
  _cbsl_cache = result
1465
  _cbsl_cache_time = utc_now()
1466
+
1467
  return result
1468
 
1469
 
1470
  def tool_health_alerts() -> Dict[str, Any]:
1471
  """
1472
  Get health alerts and disease outbreak information for Sri Lanka.
1473
+
1474
  Includes dengue case counts, epidemic alerts, and health advisories.
1475
+
1476
  Returns:
1477
  Dict with health alerts, disease data, and notifications
1478
  """
1479
  global _health_cache, _health_cache_time
1480
+
1481
  # Check cache
1482
  if _health_cache_time:
1483
  cache_age = (utc_now() - _health_cache_time).total_seconds() / 60
1484
  if cache_age < SA_CACHE_DURATION_MINUTES and _health_cache:
1485
  logger.info(f"[HEALTH] Using cached data ({cache_age:.1f} min old)")
1486
  return _health_cache
1487
+
1488
  logger.info("[HEALTH] Fetching health alerts...")
1489
+
1490
  # Baseline health data
1491
  result = {
1492
  "alerts": [],
 
1501
  "source": "health.gov.lk",
1502
  "fetched_at": utc_now().isoformat(),
1503
  }
1504
+
1505
  try:
1506
  # Try to scrape Health Ministry
1507
  resp = _safe_get("https://www.health.gov.lk/", timeout=30)
1508
  if resp:
1509
  soup = BeautifulSoup(resp.text, "html.parser")
1510
  page_text = soup.get_text(separator="\n", strip=True).lower()
1511
+
1512
  # Check for outbreak keywords
1513
+ outbreak_keywords = [
1514
+ "outbreak",
1515
+ "epidemic",
1516
+ "alert",
1517
+ "warning",
1518
+ "emergency",
1519
+ ]
1520
  for kw in outbreak_keywords:
1521
  if kw in page_text:
1522
  # Try to extract the context
1523
  idx = page_text.find(kw)
1524
+ context = page_text[max(0, idx - 50) : idx + 100]
1525
  if len(context) > 20:
1526
+ result["alerts"].append(
1527
+ {
1528
+ "type": "health_notice",
1529
+ "text": context.strip()[:150],
1530
+ "severity": (
1531
+ "medium" if kw in ["alert", "warning"] else "low"
1532
+ ),
1533
+ }
1534
+ )
1535
  break
1536
+
1537
  # Check for dengue data
1538
  dengue_match = re.search(r"dengue[:\s]*(\d{1,5})\s*(?:cases?)?", page_text)
1539
  if dengue_match:
 
1541
  result["dengue"]["weekly_cases"] = int(dengue_match.group(1))
1542
  except ValueError:
1543
  pass
1544
+
1545
+ logger.info(
1546
+ f"[HEALTH] Fetched - Dengue cases: {result['dengue']['weekly_cases']}"
1547
+ )
1548
+
1549
  # Add seasonal health advisory
1550
  current_month = utc_now().month
1551
  if current_month in [5, 6, 10, 11]: # Monsoon = mosquito season
1552
+ result["advisories"].append(
1553
+ {
1554
+ "type": "seasonal",
1555
+ "text": "Monsoon season: Increased dengue risk. Remove stagnant water around homes.",
1556
+ "severity": "medium",
1557
+ }
1558
+ )
1559
+
1560
  except Exception as e:
1561
  logger.warning(f"[HEALTH] Scraping error: {e}")
1562
  result["error"] = str(e)
1563
+
1564
  # Update cache
1565
  _health_cache = result
1566
  _health_cache_time = utc_now()
1567
+
1568
  return result
1569
 
1570
 
1571
  def tool_commodity_prices() -> Dict[str, Any]:
1572
  """
1573
  Get prices for essential commodities in Sri Lanka.
1574
+
1575
  Includes rice, sugar, dhal, milk powder, and other staples.
1576
+
1577
  Returns:
1578
  Dict with commodity prices, units, and recent changes
1579
  """
1580
  global _commodity_cache, _commodity_cache_time
1581
+
1582
  # Check cache
1583
  if _commodity_cache_time:
1584
  cache_age = (utc_now() - _commodity_cache_time).total_seconds() / 60
1585
  if cache_age < SA_CACHE_DURATION_MINUTES and _commodity_cache:
1586
  logger.info(f"[COMMODITY] Using cached data ({cache_age:.1f} min old)")
1587
  return _commodity_cache
1588
+
1589
  logger.info("[COMMODITY] Fetching commodity prices...")
1590
+
1591
  # Current approximate commodity prices (LKR)
1592
  result = {
1593
  "commodities": [
1594
+ {
1595
+ "name": "White Rice (Nadu)",
1596
+ "price": 220,
1597
+ "unit": "LKR/kg",
1598
+ "change": 0,
1599
+ "category": "grains",
1600
+ },
1601
+ {
1602
+ "name": "White Rice (Samba)",
1603
+ "price": 250,
1604
+ "unit": "LKR/kg",
1605
+ "change": 0,
1606
+ "category": "grains",
1607
+ },
1608
+ {
1609
+ "name": "Red Rice",
1610
+ "price": 240,
1611
+ "unit": "LKR/kg",
1612
+ "change": 0,
1613
+ "category": "grains",
1614
+ },
1615
+ {
1616
+ "name": "Wheat Flour",
1617
+ "price": 195,
1618
+ "unit": "LKR/kg",
1619
+ "change": -5,
1620
+ "category": "grains",
1621
+ },
1622
+ {
1623
+ "name": "Sugar (White)",
1624
+ "price": 240,
1625
+ "unit": "LKR/kg",
1626
+ "change": 0,
1627
+ "category": "essentials",
1628
+ },
1629
+ {
1630
+ "name": "Dhal (Mysore)",
1631
+ "price": 510,
1632
+ "unit": "LKR/kg",
1633
+ "change": 10,
1634
+ "category": "pulses",
1635
+ },
1636
+ {
1637
+ "name": "Dhal (Red)",
1638
+ "price": 340,
1639
+ "unit": "LKR/kg",
1640
+ "change": 0,
1641
+ "category": "pulses",
1642
+ },
1643
+ {
1644
+ "name": "Milk Powder (400g)",
1645
+ "price": 1250,
1646
+ "unit": "LKR/pack",
1647
+ "change": 0,
1648
+ "category": "dairy",
1649
+ },
1650
+ {
1651
+ "name": "Coconut Oil",
1652
+ "price": 680,
1653
+ "unit": "LKR/L",
1654
+ "change": -20,
1655
+ "category": "cooking",
1656
+ },
1657
+ {
1658
+ "name": "Coconut (Fresh)",
1659
+ "price": 120,
1660
+ "unit": "LKR/each",
1661
+ "change": 10,
1662
+ "category": "cooking",
1663
+ },
1664
+ {
1665
+ "name": "Eggs (10)",
1666
+ "price": 480,
1667
+ "unit": "LKR/10",
1668
+ "change": 0,
1669
+ "category": "protein",
1670
+ },
1671
+ {
1672
+ "name": "Chicken",
1673
+ "price": 1350,
1674
+ "unit": "LKR/kg",
1675
+ "change": 50,
1676
+ "category": "protein",
1677
+ },
1678
+ {
1679
+ "name": "Big Onion",
1680
+ "price": 280,
1681
+ "unit": "LKR/kg",
1682
+ "change": -10,
1683
+ "category": "vegetables",
1684
+ },
1685
+ {
1686
+ "name": "Potatoes",
1687
+ "price": 350,
1688
+ "unit": "LKR/kg",
1689
+ "change": 20,
1690
+ "category": "vegetables",
1691
+ },
1692
+ {
1693
+ "name": "LP Gas (12.5kg)",
1694
+ "price": 4290,
1695
+ "unit": "LKR/cylinder",
1696
+ "change": 0,
1697
+ "category": "fuel",
1698
+ },
1699
  ],
1700
  "source": "Consumer Affairs Authority / Market Survey",
1701
  "fetched_at": utc_now().isoformat(),
 
1705
  "items_stable": 0,
1706
  },
1707
  }
1708
+
1709
  # Calculate summary
1710
  for item in result["commodities"]:
1711
  if item["change"] > 0:
 
1714
  result["summary"]["items_decreased"] += 1
1715
  else:
1716
  result["summary"]["items_stable"] += 1
1717
+
1718
  try:
1719
  # Try to scrape news for price updates
1720
  resp = _safe_get("https://www.dailymirror.lk/", timeout=20)
1721
  if resp:
1722
  soup = BeautifulSoup(resp.text, "html.parser")
1723
  page_text = soup.get_text(separator=" ", strip=True).lower()
1724
+
1725
  # Check for LP Gas price updates (commonly announced)
1726
  gas_match = re.search(r"lp\s*gas[:\s]*(?:rs\.?|lkr)?\s*(\d{4})", page_text)
1727
  if gas_match:
 
1735
  break
1736
  except ValueError:
1737
  pass
1738
+
1739
  logger.info("[COMMODITY] Successfully fetched commodity prices")
1740
+
1741
  except Exception as e:
1742
  logger.warning(f"[COMMODITY] Scraping error: {e}")
1743
  result["error"] = str(e)
1744
+
1745
  # Update cache
1746
  _commodity_cache = result
1747
  _commodity_cache_time = utc_now()
1748
+
1749
  return result
1750
 
1751
 
1752
  def tool_water_supply_alerts() -> Dict[str, Any]:
1753
  """
1754
  Get water supply disruption alerts from NWSDB.
1755
+
1756
  Returns information about planned/unplanned water cuts and affected areas.
1757
+
1758
  Returns:
1759
  Dict with active disruptions, affected areas, and restoration times
1760
  """
1761
  global _water_cache, _water_cache_time
1762
+
1763
  # Check cache
1764
  if _water_cache_time:
1765
  cache_age = (utc_now() - _water_cache_time).total_seconds() / 60
1766
  if cache_age < SA_CACHE_DURATION_MINUTES and _water_cache:
1767
  logger.info(f"[WATER] Using cached data ({cache_age:.1f} min old)")
1768
  return _water_cache
1769
+
1770
  logger.info("[WATER] Fetching water supply alerts...")
1771
+
1772
  result = {
1773
  "status": "normal",
1774
  "active_disruptions": [],
 
1777
  "fetched_at": utc_now().isoformat(),
1778
  "overall_supply": "stable",
1779
  }
1780
+
1781
  try:
1782
  # Try to scrape NWSDB website
1783
  resp = _safe_get("https://www.waterboard.lk/", timeout=30)
1784
  if resp:
1785
  soup = BeautifulSoup(resp.text, "html.parser")
1786
  page_text = soup.get_text(separator="\n", strip=True).lower()
1787
+
1788
  # Check for disruption keywords
1789
+ disruption_keywords = [
1790
+ "disruption",
1791
+ "interruption",
1792
+ "cut off",
1793
+ "maintenance",
1794
+ "repair",
1795
+ ]
1796
  for kw in disruption_keywords:
1797
  if kw in page_text:
1798
  result["status"] = "disruptions_reported"
1799
  idx = page_text.find(kw)
1800
+ context = page_text[max(0, idx - 30) : idx + 120]
1801
+
1802
  # Try to extract area name
1803
  area_patterns = [
1804
  r"(colombo|gampaha|kandy|galle|matara|jaffna|kurunegala|ratnapura)",
 
1810
  if match:
1811
  area = match.group(1).title()
1812
  break
1813
+
1814
+ result["active_disruptions"].append(
1815
+ {
1816
+ "area": area,
1817
+ "type": kw,
1818
+ "details": context.strip()[:150],
1819
+ "severity": "medium",
1820
+ }
1821
+ )
1822
  break
1823
+
1824
+ logger.info(
1825
+ f"[WATER] Fetched - Disruptions: {len(result['active_disruptions'])}"
1826
+ )
1827
+
1828
  # If no disruptions found via scraping, report normal
1829
  if not result["active_disruptions"]:
1830
  result["status"] = "normal"
1831
  result["overall_supply"] = "Normal water supply across most areas"
1832
+
1833
  except Exception as e:
1834
  logger.warning(f"[WATER] Scraping error: {e}")
1835
  result["error"] = str(e)
1836
  result["status"] = "unknown"
1837
+
1838
  # Update cache
1839
  _water_cache = result
1840
  _water_cache_time = utc_now()
1841
+
1842
  return result
1843
 
1844
 
 
4571
  data = scrape_reddit_impl(keywords=keywords, limit=limit, subreddit=subreddit)
4572
  return json.dumps(data, default=str)
4573
 
4574
+
4575
  # ============================================
4576
  # SITUATIONAL AWARENESS TOOLS (DASHBOARD APIs)
4577
  # ============================================
4578
 
4579
+
4580
  def tool_health_alerts() -> dict:
4581
  """Get health alerts from health.gov.lk - structured for dashboard."""
4582
  try:
 
4585
  "dengue": {
4586
  "weekly_cases": 1890,
4587
  "high_risk_districts": ["Colombo", "Gampaha", "Kalutara"],
4588
+ "trend": "stable",
4589
  },
4590
+ "advisories": [
4591
+ {
4592
+ "type": "seasonal",
4593
+ "text": "Monsoon season: Take precautions against dengue",
4594
+ "severity": "medium",
4595
+ }
4596
+ ],
4597
+ "fetched_at": utc_now().isoformat(),
4598
  }
4599
  except Exception as e:
4600
  return {"alerts": [], "dengue": {}, "advisories": [], "error": str(e)}
 
4607
  "status": "normal",
4608
  "active_disruptions": [],
4609
  "overall_supply": "Normal water supply across most areas",
4610
+ "fetched_at": utc_now().isoformat(),
4611
  }
4612
  except Exception as e:
4613
  return {"status": "unknown", "active_disruptions": [], "error": str(e)}
 
4620
  "current_schedule": None,
4621
  "announcements": [],
4622
  "generation_capacity": "Normal",
4623
+ "fetched_at": utc_now().isoformat(),
4624
  }
4625
 
4626
 
 
4632
  "petrol_95": {"price": 335, "unit": "LKR/L"},
4633
  "diesel": {"price": 277, "unit": "LKR/L"},
4634
  "super_diesel": {"price": 318, "unit": "LKR/L"},
4635
+ "kerosene": {"price": 185, "unit": "LKR/L"},
4636
  },
4637
  "last_updated": "2025-12-01",
4638
  "source": "CEYPETCO",
4639
+ "fetched_at": utc_now().isoformat(),
4640
  }
4641
 
4642
 
 
4646
  "inflation": {"headline": 0.7, "core": 1.2, "unit": "%"},
4647
  "policy_rates": {"sdfr": 8.25, "slfr": 9.25, "unit": "%"},
4648
  "exchange_rate": {"usd": 296.50, "eur": 312.80, "unit": "LKR"},
4649
+ "fetched_at": utc_now().isoformat(),
4650
  }
4651
 
4652
 
 
4661
  "inflation": {
4662
  "ccpi_yoy": 2.1, # CCPI Year-on-Year (Nov 2025 actual)
4663
  "core_yoy": 1.8,
4664
+ "trend": "stable",
4665
  },
4666
  "policy_rates": {
4667
  "overnight_rate": 7.75, # Overnight Policy Rate (Dec 2025)
4668
  "sdfr": 7.25, # Standing Deposit Facility Rate
4669
  "slfr": 8.25, # Standing Lending Facility Rate
4670
+ "last_changed": "2024-12",
4671
  },
4672
  "exchange_rate": {
4673
  "usd_lkr": 309.17, # Dec 11, 2025 rate
 
4675
  "usd_lkr_sell": 313.00,
4676
  "eur_lkr": 325.50,
4677
  "gbp_lkr": 390.25,
4678
+ "trend": "stable",
4679
  },
4680
  "forex_reserves": {
4681
  "value": 6.5, # Billion USD (Dec 2025)
4682
+ "trend": "improving",
4683
+ },
4684
  },
4685
  "source": "Central Bank of Sri Lanka",
4686
  "scrape_status": "baseline",
4687
+ "fetched_at": utc_now().isoformat(),
4688
  }
4689
 
4690
 
 
4696
  {"name": "Rice (Samba)", "price": 250, "unit": "LKR/kg"},
4697
  {"name": "Dhal (Red)", "price": 360, "unit": "LKR/kg"},
4698
  {"name": "Sugar", "price": 215, "unit": "LKR/kg"},
4699
+ {"name": "Coconut", "price": 120, "unit": "LKR/nut"},
4700
  ],
4701
+ "fetched_at": utc_now().isoformat(),
4702
  }
4703
 
4704