Spaces:

nivakaran
/

modelx

Sleeping

App Files Files Community

nivakaran commited on Dec 13, 2025

Commit

b4c4175

verified ·

1 Parent(s): 4134ab0

Upload folder using huggingface_hub

Browse files

Files changed (12) hide show

src/graphs/RogerGraph.py +4 -1
src/graphs/combinedAgentGraph.py +16 -5
src/graphs/dataRetrievalAgentGraph.py +12 -8
src/graphs/economicalAgentGraph.py +9 -3
src/graphs/intelligenceAgentGraph.py +16 -5
src/graphs/meteorologicalAgentGraph.py +15 -5
src/graphs/politicalAgentGraph.py +9 -3
src/graphs/socialAgentGraph.py +12 -4
src/nodes/socialAgentNode.py +56 -43
src/rag.py +191 -109
src/storage/storage_manager.py +68 -31
src/utils/utils.py +368 -182

src/graphs/RogerGraph.py CHANGED Viewed

@@ -51,7 +51,10 @@ class CombinedAgentGraphBuilder:
         workflow.add_node("EconomicalAgent", economical_builder.build_graph())
         workflow.add_node("PoliticalAgent", political_builder.build_graph())
         workflow.add_node("MeteorologicalAgent", meteorological_builder.build_graph())
-        workflow.add_node("DataRetrievalAgent", data_retrieval_builder.build_data_retrieval_agent_graph())
         workflow.add_edge(START, "GraphInitiator")

         workflow.add_node("EconomicalAgent", economical_builder.build_graph())
         workflow.add_node("PoliticalAgent", political_builder.build_graph())
         workflow.add_node("MeteorologicalAgent", meteorological_builder.build_graph())
+        workflow.add_node(
+            "DataRetrievalAgent",
+            data_retrieval_builder.build_data_retrieval_agent_graph(),
+        )
         workflow.add_edge(START, "GraphInitiator")

src/graphs/combinedAgentGraph.py CHANGED Viewed

@@ -15,6 +15,7 @@ from src.nodes.combinedAgentNode import CombinedAgentNode
 try:
     from src.config.langsmith_config import LangSmithConfig
     _langsmith = LangSmithConfig()
     _langsmith.configure()
 except ImportError:
@@ -50,7 +51,9 @@ class CombinedAgentGraphBuilder:
             try:
                 result = social_graph.invoke({})
                 insights = result.get("domain_insights", [])
-                logger.info(f"[CombinedGraph] SocialAgent returned {len(insights)} insights")
                 return {"domain_insights": insights}
             except Exception as e:
                 logger.error(f"[CombinedGraph] SocialAgent FAILED: {e}")
@@ -61,7 +64,9 @@ class CombinedAgentGraphBuilder:
             try:
                 result = intelligence_graph.invoke({})
                 insights = result.get("domain_insights", [])
-                logger.info(f"[CombinedGraph] IntelligenceAgent returned {len(insights)} insights")
                 return {"domain_insights": insights}
             except Exception as e:
                 logger.error(f"[CombinedGraph] IntelligenceAgent FAILED: {e}")
@@ -72,7 +77,9 @@ class CombinedAgentGraphBuilder:
             try:
                 result = economical_graph.invoke({})
                 insights = result.get("domain_insights", [])
-                logger.info(f"[CombinedGraph] EconomicalAgent returned {len(insights)} insights")
                 return {"domain_insights": insights}
             except Exception as e:
                 logger.error(f"[CombinedGraph] EconomicalAgent FAILED: {e}")
@@ -83,7 +90,9 @@ class CombinedAgentGraphBuilder:
             try:
                 result = political_graph.invoke({})
                 insights = result.get("domain_insights", [])
-                logger.info(f"[CombinedGraph] PoliticalAgent returned {len(insights)} insights")
                 return {"domain_insights": insights}
             except Exception as e:
                 logger.error(f"[CombinedGraph] PoliticalAgent FAILED: {e}")
@@ -94,7 +103,9 @@ class CombinedAgentGraphBuilder:
             try:
                 result = meteorological_graph.invoke({})
                 insights = result.get("domain_insights", [])
-                logger.info(f"[CombinedGraph] MeteorologicalAgent returned {len(insights)} insights")
                 return {"domain_insights": insights}
             except Exception as e:
                 logger.error(f"[CombinedGraph] MeteorologicalAgent FAILED: {e}")

 try:
     from src.config.langsmith_config import LangSmithConfig
     _langsmith = LangSmithConfig()
     _langsmith.configure()
 except ImportError:
             try:
                 result = social_graph.invoke({})
                 insights = result.get("domain_insights", [])
+                logger.info(
+                    f"[CombinedGraph] SocialAgent returned {len(insights)} insights"
+                )
                 return {"domain_insights": insights}
             except Exception as e:
                 logger.error(f"[CombinedGraph] SocialAgent FAILED: {e}")
             try:
                 result = intelligence_graph.invoke({})
                 insights = result.get("domain_insights", [])
+                logger.info(
+                    f"[CombinedGraph] IntelligenceAgent returned {len(insights)} insights"
+                )
                 return {"domain_insights": insights}
             except Exception as e:
                 logger.error(f"[CombinedGraph] IntelligenceAgent FAILED: {e}")
             try:
                 result = economical_graph.invoke({})
                 insights = result.get("domain_insights", [])
+                logger.info(
+                    f"[CombinedGraph] EconomicalAgent returned {len(insights)} insights"
+                )
                 return {"domain_insights": insights}
             except Exception as e:
                 logger.error(f"[CombinedGraph] EconomicalAgent FAILED: {e}")
             try:
                 result = political_graph.invoke({})
                 insights = result.get("domain_insights", [])
+                logger.info(
+                    f"[CombinedGraph] PoliticalAgent returned {len(insights)} insights"
+                )
                 return {"domain_insights": insights}
             except Exception as e:
                 logger.error(f"[CombinedGraph] PoliticalAgent FAILED: {e}")
             try:
                 result = meteorological_graph.invoke({})
                 insights = result.get("domain_insights", [])
+                logger.info(
+                    f"[CombinedGraph] MeteorologicalAgent returned {len(insights)} insights"
+                )
                 return {"domain_insights": insights}
             except Exception as e:
                 logger.error(f"[CombinedGraph] MeteorologicalAgent FAILED: {e}")

src/graphs/dataRetrievalAgentGraph.py CHANGED Viewed

@@ -46,13 +46,15 @@ class DataRetrievalAgentGraph(DataRetrievalAgentNode):
         insights = []
         for event in classified_events:
-            insights.append({
-                "source_event_id": event.event_id,
-                "domain": event.target_agent,
-                "severity": "medium",
-                "summary": event.content_summary,
-                "risk_score": event.confidence_score,
-            })
         print(f"[DATA RETRIEVAL] Formatted {len(insights)} insights for parent graph")
         return {"domain_insights": insights}
@@ -65,7 +67,9 @@ class DataRetrievalAgentGraph(DataRetrievalAgentNode):
         workflow.add_node("prepare_worker_tasks", self.prepare_worker_tasks)
         workflow.add_node(
             "worker",
-            lambda state: {"worker": worker_graph.map().invoke(state.tasks_for_workers)},
         )
         workflow.add_node("aggregate_results", self.aggregate_results)
         workflow.add_node("classifier_agent", self.classifier_agent_node)

         insights = []
         for event in classified_events:
+            insights.append(
+                {
+                    "source_event_id": event.event_id,
+                    "domain": event.target_agent,
+                    "severity": "medium",
+                    "summary": event.content_summary,
+                    "risk_score": event.confidence_score,
+                }
+            )
         print(f"[DATA RETRIEVAL] Formatted {len(insights)} insights for parent graph")
         return {"domain_insights": insights}
         workflow.add_node("prepare_worker_tasks", self.prepare_worker_tasks)
         workflow.add_node(
             "worker",
+            lambda state: {
+                "worker": worker_graph.map().invoke(state.tasks_for_workers)
+            },
         )
         workflow.add_node("aggregate_results", self.aggregate_results)
         workflow.add_node("classifier_agent", self.classifier_agent_node)

src/graphs/economicalAgentGraph.py CHANGED Viewed

@@ -60,9 +60,15 @@ class EconomicalGraphBuilder:
         main_graph = StateGraph(EconomicalAgentState)
-        main_graph.add_node("official_sources_module", lambda state: official_subgraph.invoke(state))
-        main_graph.add_node("social_media_module", lambda state: social_subgraph.invoke(state))
-        main_graph.add_node("feed_generation_module", lambda state: feed_subgraph.invoke(state))
         main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
         main_graph.set_entry_point("official_sources_module")

         main_graph = StateGraph(EconomicalAgentState)
+        main_graph.add_node(
+            "official_sources_module", lambda state: official_subgraph.invoke(state)
+        )
+        main_graph.add_node(
+            "social_media_module", lambda state: social_subgraph.invoke(state)
+        )
+        main_graph.add_node(
+            "feed_generation_module", lambda state: feed_subgraph.invoke(state)
+        )
         main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
         main_graph.set_entry_point("official_sources_module")

src/graphs/intelligenceAgentGraph.py CHANGED Viewed

@@ -13,14 +13,18 @@ class IntelligenceGraphBuilder:
     def __init__(self, llm):
         self.llm = llm
-    def build_profile_monitoring_subgraph(self, node: IntelligenceAgentNode) -> StateGraph:
         subgraph = StateGraph(IntelligenceAgentState)
         subgraph.add_node("monitor_profiles", node.collect_profile_activity)
         subgraph.set_entry_point("monitor_profiles")
         subgraph.add_edge("monitor_profiles", END)
         return subgraph.compile()
-    def build_competitive_intelligence_subgraph(self, node: IntelligenceAgentNode) -> StateGraph:
         subgraph = StateGraph(IntelligenceAgentState)
         subgraph.add_node("competitor_mentions", node.collect_competitor_mentions)
@@ -60,9 +64,16 @@ class IntelligenceGraphBuilder:
         main_graph = StateGraph(IntelligenceAgentState)
-        main_graph.add_node("profile_monitoring_module", lambda state: profile_subgraph.invoke(state))
-        main_graph.add_node("competitive_intelligence_module", lambda state: intelligence_subgraph.invoke(state))
-        main_graph.add_node("feed_generation_module", lambda state: feed_subgraph.invoke(state))
         main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
         main_graph.set_entry_point("profile_monitoring_module")

     def __init__(self, llm):
         self.llm = llm
+    def build_profile_monitoring_subgraph(
+        self, node: IntelligenceAgentNode
+    ) -> StateGraph:
         subgraph = StateGraph(IntelligenceAgentState)
         subgraph.add_node("monitor_profiles", node.collect_profile_activity)
         subgraph.set_entry_point("monitor_profiles")
         subgraph.add_edge("monitor_profiles", END)
         return subgraph.compile()
+    def build_competitive_intelligence_subgraph(
+        self, node: IntelligenceAgentNode
+    ) -> StateGraph:
         subgraph = StateGraph(IntelligenceAgentState)
         subgraph.add_node("competitor_mentions", node.collect_competitor_mentions)
         main_graph = StateGraph(IntelligenceAgentState)
+        main_graph.add_node(
+            "profile_monitoring_module", lambda state: profile_subgraph.invoke(state)
+        )
+        main_graph.add_node(
+            "competitive_intelligence_module",
+            lambda state: intelligence_subgraph.invoke(state),
+        )
+        main_graph.add_node(
+            "feed_generation_module", lambda state: feed_subgraph.invoke(state)
+        )
         main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
         main_graph.set_entry_point("profile_monitoring_module")

src/graphs/meteorologicalAgentGraph.py CHANGED Viewed

@@ -13,7 +13,9 @@ class MeteorologicalGraphBuilder:
     def __init__(self, llm):
         self.llm = llm
-    def build_official_sources_subgraph(self, node: MeteorologicalAgentNode) -> StateGraph:
         subgraph = StateGraph(MeteorologicalAgentState)
         subgraph.add_node("collect_official", node.collect_official_sources)
         subgraph.set_entry_point("collect_official")
@@ -37,7 +39,9 @@ class MeteorologicalGraphBuilder:
         return subgraph.compile()
-    def build_feed_generation_subgraph(self, node: MeteorologicalAgentNode) -> StateGraph:
         subgraph = StateGraph(MeteorologicalAgentState)
         subgraph.add_node("categorize", node.categorize_by_geography)
@@ -60,9 +64,15 @@ class MeteorologicalGraphBuilder:
         main_graph = StateGraph(MeteorologicalAgentState)
-        main_graph.add_node("official_sources_module", lambda state: official_subgraph.invoke(state))
-        main_graph.add_node("social_media_module", lambda state: social_subgraph.invoke(state))
-        main_graph.add_node("feed_generation_module", lambda state: feed_subgraph.invoke(state))
         main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
         main_graph.set_entry_point("official_sources_module")

     def __init__(self, llm):
         self.llm = llm
+    def build_official_sources_subgraph(
+        self, node: MeteorologicalAgentNode
+    ) -> StateGraph:
         subgraph = StateGraph(MeteorologicalAgentState)
         subgraph.add_node("collect_official", node.collect_official_sources)
         subgraph.set_entry_point("collect_official")
         return subgraph.compile()
+    def build_feed_generation_subgraph(
+        self, node: MeteorologicalAgentNode
+    ) -> StateGraph:
         subgraph = StateGraph(MeteorologicalAgentState)
         subgraph.add_node("categorize", node.categorize_by_geography)
         main_graph = StateGraph(MeteorologicalAgentState)
+        main_graph.add_node(
+            "official_sources_module", lambda state: official_subgraph.invoke(state)
+        )
+        main_graph.add_node(
+            "social_media_module", lambda state: social_subgraph.invoke(state)
+        )
+        main_graph.add_node(
+            "feed_generation_module", lambda state: feed_subgraph.invoke(state)
+        )
         main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
         main_graph.set_entry_point("official_sources_module")

src/graphs/politicalAgentGraph.py CHANGED Viewed

@@ -59,9 +59,15 @@ class PoliticalGraphBuilder:
         main_graph = StateGraph(PoliticalAgentState)
-        main_graph.add_node("official_sources_module", lambda state: official_subgraph.invoke(state))
-        main_graph.add_node("social_media_module", lambda state: social_subgraph.invoke(state))
-        main_graph.add_node("feed_generation_module", lambda state: feed_subgraph.invoke(state))
         main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
         main_graph.set_entry_point("official_sources_module")

         main_graph = StateGraph(PoliticalAgentState)
+        main_graph.add_node(
+            "official_sources_module", lambda state: official_subgraph.invoke(state)
+        )
+        main_graph.add_node(
+            "social_media_module", lambda state: social_subgraph.invoke(state)
+        )
+        main_graph.add_node(
+            "feed_generation_module", lambda state: feed_subgraph.invoke(state)
+        )
         main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
         main_graph.set_entry_point("official_sources_module")

src/graphs/socialAgentGraph.py CHANGED Viewed

@@ -69,10 +69,18 @@ class SocialGraphBuilder:
         main_graph = StateGraph(SocialAgentState)
-        main_graph.add_node("trending_module", lambda state: trending_subgraph.invoke(state))
-        main_graph.add_node("social_media_module", lambda state: social_subgraph.invoke(state))
-        main_graph.add_node("user_targets_module", lambda state: user_targets_subgraph.invoke(state))
-        main_graph.add_node("feed_generation_module", lambda state: feed_subgraph.invoke(state))
         main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
         # Parallel entry points - all 3 modules start together

         main_graph = StateGraph(SocialAgentState)
+        main_graph.add_node(
+            "trending_module", lambda state: trending_subgraph.invoke(state)
+        )
+        main_graph.add_node(
+            "social_media_module", lambda state: social_subgraph.invoke(state)
+        )
+        main_graph.add_node(
+            "user_targets_module", lambda state: user_targets_subgraph.invoke(state)
+        )
+        main_graph.add_node(
+            "feed_generation_module", lambda state: feed_subgraph.invoke(state)
+        )
         main_graph.add_node("feed_aggregator", node.aggregate_and_store_feeds)
         # Parallel entry points - all 3 modules start together

src/nodes/socialAgentNode.py CHANGED Viewed

@@ -21,11 +21,13 @@ from src.llms.groqllm import GroqLLM
 def load_intel_config() -> dict:
     """Load intel config from JSON file (same as main.py)."""
-    config_path = os.path.join(os.path.dirname(__file__), "..", "..", "data", "intel_config.json")
     default_config = {
         "user_profiles": {"twitter": [], "facebook": [], "linkedin": []},
         "user_keywords": [],
-        "user_products": []
     }
     try:
         if os.path.exists(config_path):
@@ -66,9 +68,11 @@ class SocialAgentNode:
         self.user_keywords = self.intel_config.get("user_keywords", [])
         self.user_profiles = self.intel_config.get("user_profiles", {})
         self.user_products = self.intel_config.get("user_products", [])
-        print(f"[SocialAgent] Loaded {len(self.user_keywords)} user keywords, "
-              f"{sum(len(v) for v in self.user_profiles.values())} profiles")
         # Geographic scopes
         self.geographic_scopes = {
@@ -411,72 +415,79 @@ class SocialAgentNode:
         These are configured via the frontend Intelligence Settings UI.
         """
         print("[MODULE 2D] Collecting User-Defined Targets")
         user_results = []
         # Reload config to get latest user settings
         self.intel_config = load_intel_config()
         self.user_keywords = self.intel_config.get("user_keywords", [])
         self.user_profiles = self.intel_config.get("user_profiles", {})
         self.user_products = self.intel_config.get("user_products", [])
         # Skip if no user config
         if not self.user_keywords and not any(self.user_profiles.values()):
             print("  ⏭️ No user-defined targets configured")
             return {"worker_results": [], "user_target_results": []}
         # ============================================
         # Scrape USER KEYWORDS across Twitter
         # ============================================
         if self.user_keywords:
             print(f"  📝 Scraping {len(self.user_keywords)} user keywords...")
             twitter_tool = self.tools.get("scrape_twitter")
             for keyword in self.user_keywords[:10]:  # Limit to 10 keywords
                 try:
                     if twitter_tool:
                         twitter_data = twitter_tool.invoke(
                             {"query": keyword, "max_items": 5}
                         )
-                        user_results.append({
-                            "source_tool": "scrape_twitter",
-                            "raw_content": str(twitter_data),
-                            "category": "user_keyword",
-                            "scope": "sri_lanka",
-                            "platform": "twitter",
-                            "keyword": keyword,
-                            "timestamp": datetime.utcnow().isoformat(),
-                        })
                         print(f"    ✓ Keyword: '{keyword}'")
                 except Exception as e:
                     print(f"    ⚠️ Keyword '{keyword}' error: {e}")
         # ============================================
         # Scrape USER PRODUCTS
         # ============================================
         if self.user_products:
             print(f"  📦 Scraping {len(self.user_products)} user products...")
             twitter_tool = self.tools.get("scrape_twitter")
             for product in self.user_products[:5]:  # Limit to 5 products
                 try:
                     if twitter_tool:
                         twitter_data = twitter_tool.invoke(
-                            {"query": f"{product} review OR {product} Sri Lanka", "max_items": 3}
                         )
-                        user_results.append({
-                            "source_tool": "scrape_twitter",
-                            "raw_content": str(twitter_data),
-                            "category": "user_product",
-                            "scope": "sri_lanka",
-                            "platform": "twitter",
-                            "product": product,
-                            "timestamp": datetime.utcnow().isoformat(),
-                        })
                         print(f"    ✓ Product: '{product}'")
                 except Exception as e:
                     print(f"    ⚠️ Product '{product}' error: {e}")
         # ============================================
         # Scrape USER TWITTER PROFILES
         # ============================================
@@ -484,7 +495,7 @@ class SocialAgentNode:
         if twitter_profiles:
             print(f"  👤 Scraping {len(twitter_profiles)} Twitter profiles...")
             twitter_tool = self.tools.get("scrape_twitter")
             for profile in twitter_profiles[:10]:  # Limit to 10 profiles
                 try:
                     # Clean profile handle
@@ -494,19 +505,21 @@ class SocialAgentNode:
                         twitter_data = twitter_tool.invoke(
                             {"query": f"from:{handle} OR @{handle}", "max_items": 5}
                         )
-                        user_results.append({
-                            "source_tool": "scrape_twitter",
-                            "raw_content": str(twitter_data),
-                            "category": "user_profile",
-                            "scope": "sri_lanka",
-                            "platform": "twitter",
-                            "profile": f"@{handle}",
-                            "timestamp": datetime.utcnow().isoformat(),
-                        })
                         print(f"    ✓ Profile: @{handle}")
                 except Exception as e:
                     print(f"    ⚠️ Profile @{profile} error: {e}")
         print(f"  ✅ User targets: {len(user_results)} results collected")
         return {"worker_results": user_results, "user_target_results": user_results}

 def load_intel_config() -> dict:
     """Load intel config from JSON file (same as main.py)."""
+    config_path = os.path.join(
+        os.path.dirname(__file__), "..", "..", "data", "intel_config.json"
+    )
     default_config = {
         "user_profiles": {"twitter": [], "facebook": [], "linkedin": []},
         "user_keywords": [],
+        "user_products": [],
     }
     try:
         if os.path.exists(config_path):
         self.user_keywords = self.intel_config.get("user_keywords", [])
         self.user_profiles = self.intel_config.get("user_profiles", {})
         self.user_products = self.intel_config.get("user_products", [])
+        print(
+            f"[SocialAgent] Loaded {len(self.user_keywords)} user keywords, "
+            f"{sum(len(v) for v in self.user_profiles.values())} profiles"
+        )
         # Geographic scopes
         self.geographic_scopes = {
         These are configured via the frontend Intelligence Settings UI.
         """
         print("[MODULE 2D] Collecting User-Defined Targets")
         user_results = []
         # Reload config to get latest user settings
         self.intel_config = load_intel_config()
         self.user_keywords = self.intel_config.get("user_keywords", [])
         self.user_profiles = self.intel_config.get("user_profiles", {})
         self.user_products = self.intel_config.get("user_products", [])
         # Skip if no user config
         if not self.user_keywords and not any(self.user_profiles.values()):
             print("  ⏭️ No user-defined targets configured")
             return {"worker_results": [], "user_target_results": []}
         # ============================================
         # Scrape USER KEYWORDS across Twitter
         # ============================================
         if self.user_keywords:
             print(f"  📝 Scraping {len(self.user_keywords)} user keywords...")
             twitter_tool = self.tools.get("scrape_twitter")
             for keyword in self.user_keywords[:10]:  # Limit to 10 keywords
                 try:
                     if twitter_tool:
                         twitter_data = twitter_tool.invoke(
                             {"query": keyword, "max_items": 5}
                         )
+                        user_results.append(
+                            {
+                                "source_tool": "scrape_twitter",
+                                "raw_content": str(twitter_data),
+                                "category": "user_keyword",
+                                "scope": "sri_lanka",
+                                "platform": "twitter",
+                                "keyword": keyword,
+                                "timestamp": datetime.utcnow().isoformat(),
+                            }
+                        )
                         print(f"    ✓ Keyword: '{keyword}'")
                 except Exception as e:
                     print(f"    ⚠️ Keyword '{keyword}' error: {e}")
         # ============================================
         # Scrape USER PRODUCTS
         # ============================================
         if self.user_products:
             print(f"  📦 Scraping {len(self.user_products)} user products...")
             twitter_tool = self.tools.get("scrape_twitter")
             for product in self.user_products[:5]:  # Limit to 5 products
                 try:
                     if twitter_tool:
                         twitter_data = twitter_tool.invoke(
+                            {
+                                "query": f"{product} review OR {product} Sri Lanka",
+                                "max_items": 3,
+                            }
+                        )
+                        user_results.append(
+                            {
+                                "source_tool": "scrape_twitter",
+                                "raw_content": str(twitter_data),
+                                "category": "user_product",
+                                "scope": "sri_lanka",
+                                "platform": "twitter",
+                                "product": product,
+                                "timestamp": datetime.utcnow().isoformat(),
+                            }
                         )
                         print(f"    ✓ Product: '{product}'")
                 except Exception as e:
                     print(f"    ⚠️ Product '{product}' error: {e}")
         # ============================================
         # Scrape USER TWITTER PROFILES
         # ============================================
         if twitter_profiles:
             print(f"  👤 Scraping {len(twitter_profiles)} Twitter profiles...")
             twitter_tool = self.tools.get("scrape_twitter")
             for profile in twitter_profiles[:10]:  # Limit to 10 profiles
                 try:
                     # Clean profile handle
                         twitter_data = twitter_tool.invoke(
                             {"query": f"from:{handle} OR @{handle}", "max_items": 5}
                         )
+                        user_results.append(
+                            {
+                                "source_tool": "scrape_twitter",
+                                "raw_content": str(twitter_data),
+                                "category": "user_profile",
+                                "scope": "sri_lanka",
+                                "platform": "twitter",
+                                "profile": f"@{handle}",
+                                "timestamp": datetime.utcnow().isoformat(),
+                            }
+                        )
                         print(f"    ✓ Profile: @{handle}")
                 except Exception as e:
                     print(f"    ⚠️ Profile @{profile} error: {e}")
         print(f"  ✅ User targets: {len(user_results)} results collected")
         return {"worker_results": user_results, "user_target_results": user_results}

src/rag.py CHANGED Viewed

@@ -14,6 +14,7 @@ sys.path.insert(0, str(PROJECT_ROOT))
 try:
     from dotenv import load_dotenv
     load_dotenv()
 except ImportError:
     pass
@@ -26,6 +27,7 @@ logging.basicConfig(
 try:
     import chromadb
     from chromadb.config import Settings
     CHROMA_AVAILABLE = True
 except ImportError:
     CHROMA_AVAILABLE = False
@@ -37,6 +39,7 @@ try:
     from langchain_core.messages import HumanMessage, AIMessage
     from langchain_core.output_parsers import StrOutputParser
     from langchain_core.runnables import RunnablePassthrough
     LANGCHAIN_AVAILABLE = True
 except ImportError:
     LANGCHAIN_AVAILABLE = False
@@ -45,6 +48,7 @@ except ImportError:
 # Neo4j for graph-based retrieval
 try:
     from neo4j import GraphDatabase
     NEO4J_AVAILABLE = True
 except ImportError:
     NEO4J_AVAILABLE = False
@@ -53,9 +57,18 @@ except ImportError:
 # Keywords that indicate a graph/relationship query
 GRAPH_KEYWORDS = [
-    "connected", "related", "timeline", "before", "after",
-    "caused by", "followed by", "similar to", "linked",
-    "what happened", "sequence", "chain of events"
 ]
@@ -67,31 +80,31 @@ def is_graph_query(question: str) -> bool:
 class Neo4jRetriever:
     """Graph-based retrieval for relationship queries with LAZY initialization."""
     def __init__(self):
         self.driver = None
         self._initialized = False
         self._init_attempted = False
     def _lazy_init(self):
         """Lazy initialization - only connect when actually needed."""
         if self._init_attempted:
             return self.driver is not None
         self._init_attempted = True
         if not NEO4J_AVAILABLE:
             logger.info("[Neo4jRetriever] Neo4j package not installed")
             return False
         neo4j_uri = os.getenv("NEO4J_URI", "")
         neo4j_user = os.getenv("NEO4J_USER", "neo4j")
         neo4j_password = os.getenv("NEO4J_PASSWORD", "")
         if not neo4j_uri or not neo4j_password:
             logger.info("[Neo4jRetriever] Neo4j credentials not configured - skipping")
             return False
         try:
             self.driver = GraphDatabase.driver(
                 neo4j_uri, auth=(neo4j_user, neo4j_password)
@@ -101,15 +114,17 @@ class Neo4jRetriever:
             logger.info(f"[Neo4jRetriever] Connected to {neo4j_uri}")
             return True
         except Exception as e:
-            logger.warning(f"[Neo4jRetriever] Connection failed (will skip graph queries): {e}")
             self.driver = None
             return False
     def get_related_events(self, keyword: str, limit: int = 5) -> List[Dict[str, Any]]:
         """Find events containing keyword and their related events."""
         if not self._lazy_init():
             return []
         try:
             with self.driver.session() as session:
                 query = """
@@ -126,31 +141,35 @@ class Neo4jRetriever:
                 LIMIT $limit
                 """
                 results = session.run(query, keyword=keyword, limit=limit)
                 events = []
                 for record in results:
-                    events.append({
-                        "event_id": record["event_id"],
-                        "content": record["summary"],
-                        "domain": record["domain"],
-                        "severity": record["severity"],
-                        "timestamp": record["timestamp"],
-                        "related": record["related_summaries"],
-                        "source": "neo4j_graph"
-                    })
-                logger.info(f"[Neo4jRetriever] Found {len(events)} events for '{keyword}'")
                 return events
         except Exception as e:
             logger.error(f"[Neo4jRetriever] Query error: {e}")
             return []
     def get_domain_events(self, domain: str, limit: int = 5) -> List[Dict[str, Any]]:
         """Get recent events by domain with relationships."""
         if not self._lazy_init():
             return []
         try:
             with self.driver.session() as session:
                 query = """
@@ -165,30 +184,32 @@ class Neo4jRetriever:
                 LIMIT $limit
                 """
                 results = session.run(query, domain=domain.lower(), limit=limit)
                 events = []
                 for record in results:
-                    events.append({
-                        "event_id": record["event_id"],
-                        "content": record["summary"],
-                        "domain": domain,
-                        "severity": record["severity"],
-                        "timestamp": record["timestamp"],
-                        "related_count": record["related_count"],
-                        "source": "neo4j_graph"
-                    })
                 return events
         except Exception as e:
             logger.error(f"[Neo4jRetriever] Domain query error: {e}")
             return []
     def get_event_chain(self, keyword: str, depth: int = 3) -> List[Dict[str, Any]]:
         """Get temporal chain of related events."""
         if not self._lazy_init():
             return []
         try:
             with self.driver.session() as session:
                 query = """
@@ -203,36 +224,39 @@ class Neo4jRetriever:
                 LIMIT 1
                 """
                 result = session.run(query, keyword=keyword).single()
                 if result:
-                    return [{
-                        "event_id": result["start_id"],
-                        "content": result["start_summary"],
-                        "timestamp": result["start_time"],
-                        "chain": result["chain"],
-                        "source": "neo4j_chain"
-                    }]
                 return []
         except Exception as e:
             logger.error(f"[Neo4jRetriever] Chain query error: {e}")
             return []
     def get_stats(self) -> Dict[str, Any]:
         """Get Neo4j graph statistics."""
         if not self._initialized or not self.driver:
-            return {"status": "not_initialized" if not self._init_attempted else "disconnected"}
         try:
             with self.driver.session() as session:
                 event_count = session.run(
                     "MATCH (e:Event) RETURN COUNT(e) as count"
                 ).single()["count"]
-                return {
-                    "status": "connected",
-                    "total_events": event_count
-                }
         except Exception as e:
             return {"status": "error", "error": str(e)}
@@ -246,9 +270,10 @@ class MultiCollectionRetriever:
         )
         self.client = None
         self.collections: Dict[str, Any] = {}
         # Thread pool for parallel queries
         from concurrent.futures import ThreadPoolExecutor
         self._executor = ThreadPoolExecutor(max_workers=4)
         if not CHROMA_AVAILABLE:
@@ -267,7 +292,9 @@ class MultiCollectionRetriever:
             all_collections = self.client.list_collections()
             available_names = [c.name for c in all_collections]
-            logger.info(f"[RAG] Found {len(all_collections)} collections: {available_names}")
             for name in self.COLLECTIONS:
                 if name in available_names:
@@ -289,7 +316,12 @@ class MultiCollectionRetriever:
             self.client = None
     def _query_single_collection(
-        self, name: str, collection, query: str, n_results: int, domain_filter: Optional[str]
     ) -> List[Dict[str, Any]]:
         """Query a single collection - used for parallel execution."""
         results_list = []
@@ -310,18 +342,20 @@ class MultiCollectionRetriever:
                     similarity = 1.0 - min(distance / 2.0, 1.0)
-                    results_list.append({
-                        "id": doc_id,
-                        "content": doc,
-                        "metadata": meta,
-                        "similarity": similarity,
-                        "collection": name,
-                        "domain": meta.get("domain", "unknown"),
-                    })
         except Exception as e:
             logger.warning(f"[RAG] Error querying {name}: {e}")
         return results_list
     def search(
@@ -333,15 +367,19 @@ class MultiCollectionRetriever:
         # Submit parallel queries to all collections
         from concurrent.futures import as_completed
         futures = {}
         for name, collection in self.collections.items():
             future = self._executor.submit(
-                self._query_single_collection,
-                name, collection, query, n_results, domain_filter
             )
             futures[future] = name
         # Collect results as they complete (fastest first)
         all_results = []
         for future in as_completed(futures, timeout=10.0):  # 10s timeout
@@ -349,7 +387,9 @@ class MultiCollectionRetriever:
                 results = future.result()
                 all_results.extend(results)
             except Exception as e:
-                logger.warning(f"[RAG] Parallel query failed for {futures[future]}: {e}")
         all_results.sort(key=lambda x: x["similarity"], reverse=True)
         return all_results[: n_results * 2]
@@ -408,24 +448,54 @@ class RogerRAG:
         """Extract key terms from question for graph search."""
         # Remove common stopwords
         stopwords = {
-            "what", "when", "where", "who", "why", "how", "is", "are", "was",
-            "were", "the", "a", "an", "to", "of", "in", "on", "for", "with",
-            "about", "related", "connected", "happened", "after", "before",
-            "show", "me", "tell", "find", "get", "events", "timeline"
         }
         words = question.lower().replace("?", "").replace(",", "").split()
         keywords = [w for w in words if w not in stopwords and len(w) > 2]
         return keywords[:5]  # Return top 5 keywords
-    def _format_context(self, docs: List[Dict[str, Any]], include_graph: bool = False) -> str:
         if not docs:
             return "No relevant intelligence data found."
         context_parts = []
         now = datetime.now()
         # Separate ChromaDB and Neo4j results
         chroma_docs = [d for d in docs if d.get("source") != "neo4j_graph"]
         graph_docs = [d for d in docs if d.get("source") == "neo4j_graph"]
@@ -472,7 +542,7 @@ class RogerRAG:
                 f"TIMESTAMP: {timestamp} ({age_str})\n"
                 f"{doc['content']}\n"
             )
         # Format Neo4j graph results (if any)
         if graph_docs:
             context_parts.append("\n=== RELATED EVENTS FROM KNOWLEDGE GRAPH ===\n")
@@ -481,7 +551,7 @@ class RogerRAG:
                 related_str = ""
                 if related:
                     related_str = f"\n  Related events: {', '.join(str(r)[:50] + '...' for r in related[:2])}"
                 context_parts.append(
                     f"[Graph {i}] Domain: {doc.get('domain', 'unknown')} | "
                     f"Severity: {doc.get('severity', 'unknown')}\n"
@@ -534,23 +604,25 @@ class RogerRAG:
         docs = self.retriever.search(
             search_question, n_results=5, domain_filter=domain_filter
         )
         # Neo4j graph search (for relationship queries) - only if enabled
         graph_docs = []
         used_graph = False
         if self.neo4j_retriever and is_graph_query(search_question):
             logger.info(f"[RAG] Graph query detected: '{search_question}'")
             used_graph = True
             # Extract keywords for graph search
             # Simple: use first nouns/keywords from question
             keywords = self._extract_keywords(search_question)
             for keyword in keywords[:2]:  # Limit to 2 keywords
-                graph_docs.extend(self.neo4j_retriever.get_related_events(keyword, limit=3))
             logger.info(f"[RAG] Graph retrieval: {len(graph_docs)} docs from Neo4j")
         # Merge results (ChromaDB + Neo4j)
         all_docs = docs + graph_docs
@@ -559,7 +631,9 @@ class RogerRAG:
                 "answer": "I couldn't find any relevant intelligence data to answer your question.",
                 "sources": [],
                 "question": question,
-                "reformulated": search_question if search_question != question else None,
             }
         context = self._format_context(all_docs, include_graph=used_graph)
@@ -572,10 +646,11 @@ class RogerRAG:
             }
         current_date = datetime.now().strftime("%B %d, %Y")
-        rag_prompt = ChatPromptTemplate.from_messages([
-            (
-                "system",
-                f"""You are Roger, an AI intelligence analyst for Sri Lanka.
 TODAY'S DATE: {current_date}
@@ -592,10 +667,11 @@ Be concise but informative. Cite source timestamps when available.
 Context:
 {{context}}""",
-            ),
-            MessagesPlaceholder(variable_name="history"),
-            ("human", "{question}"),
-        ])
         history_messages = []
         for human, ai in self.chat_history[-5:]:
@@ -613,18 +689,22 @@ Context:
             sources_summary = []
             for doc in docs[:5]:
                 meta = doc.get("metadata", {})
-                sources_summary.append({
-                    "domain": meta.get("domain", "unknown"),
-                    "platform": meta.get("platform", "unknown"),
-                    "category": meta.get("category", ""),
-                    "similarity": round(doc["similarity"], 3),
-                })
             return {
                 "answer": answer,
                 "sources": sources_summary,
                 "question": question,
-                "reformulated": search_question if search_question != question else None,
                 "docs_found": len(docs),
             }
@@ -702,7 +782,9 @@ def run_cli():
             if result.get("sources"):
                 print(f"\nSources ({len(result['sources'])} found):")
                 for i, src in enumerate(result["sources"][:3], 1):
-                    print(f"   {i}. {src['domain']} | {src['platform']} | Relevance: {src['similarity']:.0%}")
             if result.get("reformulated"):
                 print(f"\n(Interpreted as: {result['reformulated']})")

 try:
     from dotenv import load_dotenv
     load_dotenv()
 except ImportError:
     pass
 try:
     import chromadb
     from chromadb.config import Settings
     CHROMA_AVAILABLE = True
 except ImportError:
     CHROMA_AVAILABLE = False
     from langchain_core.messages import HumanMessage, AIMessage
     from langchain_core.output_parsers import StrOutputParser
     from langchain_core.runnables import RunnablePassthrough
     LANGCHAIN_AVAILABLE = True
 except ImportError:
     LANGCHAIN_AVAILABLE = False
 # Neo4j for graph-based retrieval
 try:
     from neo4j import GraphDatabase
     NEO4J_AVAILABLE = True
 except ImportError:
     NEO4J_AVAILABLE = False
 # Keywords that indicate a graph/relationship query
 GRAPH_KEYWORDS = [
+    "connected",
+    "related",
+    "timeline",
+    "before",
+    "after",
+    "caused by",
+    "followed by",
+    "similar to",
+    "linked",
+    "what happened",
+    "sequence",
+    "chain of events",
 ]
 class Neo4jRetriever:
     """Graph-based retrieval for relationship queries with LAZY initialization."""
     def __init__(self):
         self.driver = None
         self._initialized = False
         self._init_attempted = False
     def _lazy_init(self):
         """Lazy initialization - only connect when actually needed."""
         if self._init_attempted:
             return self.driver is not None
         self._init_attempted = True
         if not NEO4J_AVAILABLE:
             logger.info("[Neo4jRetriever] Neo4j package not installed")
             return False
         neo4j_uri = os.getenv("NEO4J_URI", "")
         neo4j_user = os.getenv("NEO4J_USER", "neo4j")
         neo4j_password = os.getenv("NEO4J_PASSWORD", "")
         if not neo4j_uri or not neo4j_password:
             logger.info("[Neo4jRetriever] Neo4j credentials not configured - skipping")
             return False
         try:
             self.driver = GraphDatabase.driver(
                 neo4j_uri, auth=(neo4j_user, neo4j_password)
             logger.info(f"[Neo4jRetriever] Connected to {neo4j_uri}")
             return True
         except Exception as e:
+            logger.warning(
+                f"[Neo4jRetriever] Connection failed (will skip graph queries): {e}"
+            )
             self.driver = None
             return False
     def get_related_events(self, keyword: str, limit: int = 5) -> List[Dict[str, Any]]:
         """Find events containing keyword and their related events."""
         if not self._lazy_init():
             return []
         try:
             with self.driver.session() as session:
                 query = """
                 LIMIT $limit
                 """
                 results = session.run(query, keyword=keyword, limit=limit)
                 events = []
                 for record in results:
+                    events.append(
+                        {
+                            "event_id": record["event_id"],
+                            "content": record["summary"],
+                            "domain": record["domain"],
+                            "severity": record["severity"],
+                            "timestamp": record["timestamp"],
+                            "related": record["related_summaries"],
+                            "source": "neo4j_graph",
+                        }
+                    )
+                logger.info(
+                    f"[Neo4jRetriever] Found {len(events)} events for '{keyword}'"
+                )
                 return events
         except Exception as e:
             logger.error(f"[Neo4jRetriever] Query error: {e}")
             return []
     def get_domain_events(self, domain: str, limit: int = 5) -> List[Dict[str, Any]]:
         """Get recent events by domain with relationships."""
         if not self._lazy_init():
             return []
         try:
             with self.driver.session() as session:
                 query = """
                 LIMIT $limit
                 """
                 results = session.run(query, domain=domain.lower(), limit=limit)
                 events = []
                 for record in results:
+                    events.append(
+                        {
+                            "event_id": record["event_id"],
+                            "content": record["summary"],
+                            "domain": domain,
+                            "severity": record["severity"],
+                            "timestamp": record["timestamp"],
+                            "related_count": record["related_count"],
+                            "source": "neo4j_graph",
+                        }
+                    )
                 return events
         except Exception as e:
             logger.error(f"[Neo4jRetriever] Domain query error: {e}")
             return []
     def get_event_chain(self, keyword: str, depth: int = 3) -> List[Dict[str, Any]]:
         """Get temporal chain of related events."""
         if not self._lazy_init():
             return []
         try:
             with self.driver.session() as session:
                 query = """
                 LIMIT 1
                 """
                 result = session.run(query, keyword=keyword).single()
                 if result:
+                    return [
+                        {
+                            "event_id": result["start_id"],
+                            "content": result["start_summary"],
+                            "timestamp": result["start_time"],
+                            "chain": result["chain"],
+                            "source": "neo4j_chain",
+                        }
+                    ]
                 return []
         except Exception as e:
             logger.error(f"[Neo4jRetriever] Chain query error: {e}")
             return []
     def get_stats(self) -> Dict[str, Any]:
         """Get Neo4j graph statistics."""
         if not self._initialized or not self.driver:
+            return {
+                "status": (
+                    "not_initialized" if not self._init_attempted else "disconnected"
+                )
+            }
         try:
             with self.driver.session() as session:
                 event_count = session.run(
                     "MATCH (e:Event) RETURN COUNT(e) as count"
                 ).single()["count"]
+                return {"status": "connected", "total_events": event_count}
         except Exception as e:
             return {"status": "error", "error": str(e)}
         )
         self.client = None
         self.collections: Dict[str, Any] = {}
         # Thread pool for parallel queries
         from concurrent.futures import ThreadPoolExecutor
         self._executor = ThreadPoolExecutor(max_workers=4)
         if not CHROMA_AVAILABLE:
             all_collections = self.client.list_collections()
             available_names = [c.name for c in all_collections]
+            logger.info(
+                f"[RAG] Found {len(all_collections)} collections: {available_names}"
+            )
             for name in self.COLLECTIONS:
                 if name in available_names:
             self.client = None
     def _query_single_collection(
+        self,
+        name: str,
+        collection,
+        query: str,
+        n_results: int,
+        domain_filter: Optional[str],
     ) -> List[Dict[str, Any]]:
         """Query a single collection - used for parallel execution."""
         results_list = []
                     similarity = 1.0 - min(distance / 2.0, 1.0)
+                    results_list.append(
+                        {
+                            "id": doc_id,
+                            "content": doc,
+                            "metadata": meta,
+                            "similarity": similarity,
+                            "collection": name,
+                            "domain": meta.get("domain", "unknown"),
+                        }
+                    )
         except Exception as e:
             logger.warning(f"[RAG] Error querying {name}: {e}")
         return results_list
     def search(
         # Submit parallel queries to all collections
         from concurrent.futures import as_completed
         futures = {}
         for name, collection in self.collections.items():
             future = self._executor.submit(
+                self._query_single_collection,
+                name,
+                collection,
+                query,
+                n_results,
+                domain_filter,
             )
             futures[future] = name
         # Collect results as they complete (fastest first)
         all_results = []
         for future in as_completed(futures, timeout=10.0):  # 10s timeout
                 results = future.result()
                 all_results.extend(results)
             except Exception as e:
+                logger.warning(
+                    f"[RAG] Parallel query failed for {futures[future]}: {e}"
+                )
         all_results.sort(key=lambda x: x["similarity"], reverse=True)
         return all_results[: n_results * 2]
         """Extract key terms from question for graph search."""
         # Remove common stopwords
         stopwords = {
+            "what",
+            "when",
+            "where",
+            "who",
+            "why",
+            "how",
+            "is",
+            "are",
+            "was",
+            "were",
+            "the",
+            "a",
+            "an",
+            "to",
+            "of",
+            "in",
+            "on",
+            "for",
+            "with",
+            "about",
+            "related",
+            "connected",
+            "happened",
+            "after",
+            "before",
+            "show",
+            "me",
+            "tell",
+            "find",
+            "get",
+            "events",
+            "timeline",
         }
         words = question.lower().replace("?", "").replace(",", "").split()
         keywords = [w for w in words if w not in stopwords and len(w) > 2]
         return keywords[:5]  # Return top 5 keywords
+    def _format_context(
+        self, docs: List[Dict[str, Any]], include_graph: bool = False
+    ) -> str:
         if not docs:
             return "No relevant intelligence data found."
         context_parts = []
         now = datetime.now()
         # Separate ChromaDB and Neo4j results
         chroma_docs = [d for d in docs if d.get("source") != "neo4j_graph"]
         graph_docs = [d for d in docs if d.get("source") == "neo4j_graph"]
                 f"TIMESTAMP: {timestamp} ({age_str})\n"
                 f"{doc['content']}\n"
             )
         # Format Neo4j graph results (if any)
         if graph_docs:
             context_parts.append("\n=== RELATED EVENTS FROM KNOWLEDGE GRAPH ===\n")
                 related_str = ""
                 if related:
                     related_str = f"\n  Related events: {', '.join(str(r)[:50] + '...' for r in related[:2])}"
                 context_parts.append(
                     f"[Graph {i}] Domain: {doc.get('domain', 'unknown')} | "
                     f"Severity: {doc.get('severity', 'unknown')}\n"
         docs = self.retriever.search(
             search_question, n_results=5, domain_filter=domain_filter
         )
         # Neo4j graph search (for relationship queries) - only if enabled
         graph_docs = []
         used_graph = False
         if self.neo4j_retriever and is_graph_query(search_question):
             logger.info(f"[RAG] Graph query detected: '{search_question}'")
             used_graph = True
             # Extract keywords for graph search
             # Simple: use first nouns/keywords from question
             keywords = self._extract_keywords(search_question)
             for keyword in keywords[:2]:  # Limit to 2 keywords
+                graph_docs.extend(
+                    self.neo4j_retriever.get_related_events(keyword, limit=3)
+                )
             logger.info(f"[RAG] Graph retrieval: {len(graph_docs)} docs from Neo4j")
         # Merge results (ChromaDB + Neo4j)
         all_docs = docs + graph_docs
                 "answer": "I couldn't find any relevant intelligence data to answer your question.",
                 "sources": [],
                 "question": question,
+                "reformulated": (
+                    search_question if search_question != question else None
+                ),
             }
         context = self._format_context(all_docs, include_graph=used_graph)
             }
         current_date = datetime.now().strftime("%B %d, %Y")
+        rag_prompt = ChatPromptTemplate.from_messages(
+            [
+                (
+                    "system",
+                    f"""You are Roger, an AI intelligence analyst for Sri Lanka.
 TODAY'S DATE: {current_date}
 Context:
 {{context}}""",
+                ),
+                MessagesPlaceholder(variable_name="history"),
+                ("human", "{question}"),
+            ]
+        )
         history_messages = []
         for human, ai in self.chat_history[-5:]:
             sources_summary = []
             for doc in docs[:5]:
                 meta = doc.get("metadata", {})
+                sources_summary.append(
+                    {
+                        "domain": meta.get("domain", "unknown"),
+                        "platform": meta.get("platform", "unknown"),
+                        "category": meta.get("category", ""),
+                        "similarity": round(doc["similarity"], 3),
+                    }
+                )
             return {
                 "answer": answer,
                 "sources": sources_summary,
                 "question": question,
+                "reformulated": (
+                    search_question if search_question != question else None
+                ),
                 "docs_found": len(docs),
             }
             if result.get("sources"):
                 print(f"\nSources ({len(result['sources'])} found):")
                 for i, src in enumerate(result["sources"][:3], 1):
+                    print(
+                        f"   {i}. {src['domain']} | {src['platform']} | Relevance: {src['similarity']:.0%}"
+                    )
             if result.get("reformulated"):
                 print(f"\n(Interpreted as: {result['reformulated']})")

src/storage/storage_manager.py CHANGED Viewed

@@ -20,6 +20,7 @@ logger = logging.getLogger("storage_manager")
 # Trending detection integration
 try:
     from ..utils.trending_detector import record_topic_mention
     TRENDING_AVAILABLE = True
 except ImportError:
     TRENDING_AVAILABLE = False
@@ -156,43 +157,84 @@ class StorageManager:
     def _extract_keywords(self, text: str, max_keywords: int = 5) -> List[str]:
         """
         Extract significant keywords from text for trending detection.
         Args:
             text: Text to extract keywords from
             max_keywords: Maximum number of keywords to return
         Returns:
             List of keywords (2-3 word phrases)
         """
         # Common stopwords to filter out
         stopwords = {
-            "the", "is", "at", "which", "on", "a", "an", "and", "or", "but",
-            "in", "with", "to", "for", "of", "as", "by", "from", "that", "this",
-            "be", "are", "was", "were", "been", "being", "have", "has", "had",
-            "do", "does", "did", "will", "would", "could", "should", "may",
-            "might", "must", "shall", "can", "need", "dare", "ought", "used",
-            "सिंहल", "தமிழ்",  # Common Sinhala/Tamil particles
         }
         # Clean text
         text = text.lower()
-        text = re.sub(r'http\S+|www\.\S+', '', text)  # Remove URLs
-        text = re.sub(r'[^\w\s]', ' ', text)  # Remove punctuation
         # Split into words
         words = text.split()
         # Filter stopwords and short words
         filtered = [w for w in words if w not in stopwords and len(w) > 2]
         # Extract significant words (prioritize proper nouns, locations, etc.)
         keywords = []
         # Single important words (capitalized in original or long words)
         for word in filtered[:20]:
             if len(word) > 4:  # Longer words are often more significant
                 keywords.append(word)
         # Deduplicate and limit
         seen = set()
         unique_keywords = []
@@ -200,18 +242,15 @@ class StorageManager:
             if kw not in seen:
                 seen.add(kw)
                 unique_keywords.append(kw)
         return unique_keywords[:max_keywords]
     def _record_trending_mentions(
-        self,
-        summary: str,
-        domain: str,
-        metadata: Optional[Dict[str, Any]] = None
     ):
         """
         Extract keywords from summary and record them for trending detection.
         Args:
             summary: Event summary text
             domain: Event domain (political, economical, etc.)
@@ -220,17 +259,15 @@ class StorageManager:
         try:
             keywords = self._extract_keywords(summary)
             source = metadata.get("platform", "scraper") if metadata else "scraper"
             for keyword in keywords:
-                record_topic_mention(
-                    topic=keyword,
-                    source=source,
-                    domain=domain
-                )
             if keywords:
-                logger.debug(f"[TRENDING] Recorded {len(keywords)} keywords: {keywords[:3]}...")
         except Exception as e:
             logger.warning(f"[TRENDING] Error recording mentions: {e}")

 # Trending detection integration
 try:
     from ..utils.trending_detector import record_topic_mention
     TRENDING_AVAILABLE = True
 except ImportError:
     TRENDING_AVAILABLE = False
     def _extract_keywords(self, text: str, max_keywords: int = 5) -> List[str]:
         """
         Extract significant keywords from text for trending detection.
         Args:
             text: Text to extract keywords from
             max_keywords: Maximum number of keywords to return
         Returns:
             List of keywords (2-3 word phrases)
         """
         # Common stopwords to filter out
         stopwords = {
+            "the",
+            "is",
+            "at",
+            "which",
+            "on",
+            "a",
+            "an",
+            "and",
+            "or",
+            "but",
+            "in",
+            "with",
+            "to",
+            "for",
+            "of",
+            "as",
+            "by",
+            "from",
+            "that",
+            "this",
+            "be",
+            "are",
+            "was",
+            "were",
+            "been",
+            "being",
+            "have",
+            "has",
+            "had",
+            "do",
+            "does",
+            "did",
+            "will",
+            "would",
+            "could",
+            "should",
+            "may",
+            "might",
+            "must",
+            "shall",
+            "can",
+            "need",
+            "dare",
+            "ought",
+            "used",
+            "सिंहल",
+            "தமிழ்",  # Common Sinhala/Tamil particles
         }
         # Clean text
         text = text.lower()
+        text = re.sub(r"http\S+|www\.\S+", "", text)  # Remove URLs
+        text = re.sub(r"[^\w\s]", " ", text)  # Remove punctuation
         # Split into words
         words = text.split()
         # Filter stopwords and short words
         filtered = [w for w in words if w not in stopwords and len(w) > 2]
         # Extract significant words (prioritize proper nouns, locations, etc.)
         keywords = []
         # Single important words (capitalized in original or long words)
         for word in filtered[:20]:
             if len(word) > 4:  # Longer words are often more significant
                 keywords.append(word)
         # Deduplicate and limit
         seen = set()
         unique_keywords = []
             if kw not in seen:
                 seen.add(kw)
                 unique_keywords.append(kw)
         return unique_keywords[:max_keywords]
     def _record_trending_mentions(
+        self, summary: str, domain: str, metadata: Optional[Dict[str, Any]] = None
     ):
         """
         Extract keywords from summary and record them for trending detection.
         Args:
             summary: Event summary text
             domain: Event domain (political, economical, etc.)
         try:
             keywords = self._extract_keywords(summary)
             source = metadata.get("platform", "scraper") if metadata else "scraper"
             for keyword in keywords:
+                record_topic_mention(topic=keyword, source=source, domain=domain)
             if keywords:
+                logger.debug(
+                    f"[TRENDING] Recorded {len(keywords)} keywords: {keywords[:3]}..."
+                )
         except Exception as e:
             logger.warning(f"[TRENDING] Error recording mentions: {e}")

src/utils/utils.py CHANGED Viewed

@@ -28,6 +28,7 @@ def utc_now() -> datetime:
     """Return current UTC time (Python 3.12+ compatible)."""
     return datetime.now(timezone.utc)
 # Optional Playwright import
 try:
     from playwright.sync_api import (
@@ -1021,26 +1022,26 @@ SA_CACHE_DURATION_MINUTES = 15  # 15 minute cache for all SA tools
 def tool_ceb_power_status() -> Dict[str, Any]:
     """
     Get CEB power outage / load shedding schedule for Sri Lanka.
-    ENHANCED:
     - Scrapes ceb.lk for official schedules and PDF press releases
     - Extracts text from Dropbox-hosted PDF announcements
     - Falls back to news sites for power-related updates
     Returns:
         Dict with schedules by area, current status, and timestamp
     """
     global _ceb_cache, _ceb_cache_time
     # Check cache
     if _ceb_cache_time:
         cache_age = (utc_now() - _ceb_cache_time).total_seconds() / 60
         if cache_age < SA_CACHE_DURATION_MINUTES and _ceb_cache:
             logger.info(f"[CEB] Using cached data ({cache_age:.1f} min old)")
             return _ceb_cache
     logger.info("[CEB] Fetching power outage status...")
     result = {
         "status": "operational",
         "load_shedding_active": False,
@@ -1051,37 +1052,46 @@ def tool_ceb_power_status() -> Dict[str, Any]:
         "fetched_at": utc_now().isoformat(),
         "scrape_status": "baseline",
     }
     pdf_links_found = []
     try:
         # Try to scrape CEB website
         resp = _safe_get("https://ceb.lk/", timeout=30)
         if resp:
             soup = BeautifulSoup(resp.text, "html.parser")
             page_text = soup.get_text(separator="\n", strip=True).lower()
             # Check for load shedding keywords
-            if any(kw in page_text for kw in ["load shedding", "power cut", "outage schedule"]):
                 result["load_shedding_active"] = True
                 result["status"] = "load_shedding"
             # Extract any announcements
-            for tag in soup.find_all(["marquee", "div", "p"], class_=lambda x: x and "announce" in str(x).lower()):
                 text = tag.get_text(strip=True)
                 if text and len(text) > 20:
                     result["announcements"].append(text[:200])
             # ENHANCED: Find PDF links (Dropbox, direct PDFs, press releases)
             for link in soup.find_all("a", href=True):
                 href = link.get("href", "")
                 link_text = link.get_text(strip=True).lower()
                 # Check for Dropbox links or PDF links
                 is_dropbox = "dropbox.com" in href
                 is_pdf = href.lower().endswith(".pdf")
-                is_press_release = any(kw in link_text for kw in ["press release", "announcement", "notice", "schedule"])
                 if is_dropbox or is_pdf or is_press_release:
                     # Convert Dropbox links for direct download
                     if is_dropbox:
@@ -1090,102 +1100,134 @@ def tool_ceb_power_status() -> Dict[str, Any]:
                             href = href.replace("dl=0", "dl=1")
                         elif "?dl=" not in href and "&dl=" not in href:
                             href = href + ("&" if "?" in href else "?") + "dl=1"
-                    pdf_links_found.append({
-                        "url": href,
-                        "title": link_text or "Press Release",
-                        "is_dropbox": is_dropbox,
-                    })
             # Limit to latest 3 PDFs to avoid too many downloads
             pdf_links_found = pdf_links_found[:3]
             # Extract text from PDF links
             for pdf_info in pdf_links_found:
                 try:
                     logger.info(f"[CEB] Extracting PDF: {pdf_info['title'][:50]}...")
                     pdf_text = _extract_text_from_pdf_url(pdf_info["url"])
-                    if pdf_text and not pdf_text.startswith("["):  # Not an error message
                         # Check for load shedding in PDF content
                         pdf_lower = pdf_text.lower()
-                        if any(kw in pdf_lower for kw in ["load shedding", "power cut", "outage", "interruption"]):
                             result["load_shedding_active"] = True
                             result["status"] = "load_shedding"
-                        result["press_releases"].append({
-                            "title": pdf_info["title"],
-                            "content": pdf_text[:1000] + ("..." if len(pdf_text) > 1000 else ""),
-                            "source": "dropbox" if pdf_info["is_dropbox"] else "ceb.lk",
-                        })
                         result["scrape_status"] = "live"
                 except Exception as pdf_error:
                     logger.warning(f"[CEB] PDF extraction error: {pdf_error}")
-            logger.info(f"[CEB] Scraped - PDFs found: {len(pdf_links_found)}, Active: {result['load_shedding_active']}")
         # Also check news sites for power-related updates
         news_sources = [
             "https://www.news.lk/",
             "https://www.dailymirror.lk/",
         ]
         for news_url in news_sources:
             try:
                 news_resp = _safe_get(news_url, timeout=20)
                 if news_resp:
                     news_soup = BeautifulSoup(news_resp.text, "html.parser")
                     news_text = news_soup.get_text(separator=" ", strip=True).lower()
                     # Check for power-related news
-                    if any(kw in news_text for kw in ["power cut", "load shedding", "ceb", "electricity"]):
                         # Look for headlines mentioning power
                         for headline in news_soup.find_all(["h1", "h2", "h3", "h4"]):
                             h_text = headline.get_text(strip=True)
-                            if any(kw in h_text.lower() for kw in ["power", "ceb", "electricity", "load shedding"]):
                                 if h_text not in result["announcements"]:
-                                    result["announcements"].append(f"[News] {h_text[:150]}")
                                     break
             except Exception as news_error:
                 logger.debug(f"[CEB] News scraping error for {news_url}: {news_error}")
         # If no press releases or announcements found, provide baseline message
         if not result["press_releases"] and not result["announcements"]:
             result["status"] = "no_load_shedding"
             result["announcements"].append("CEB: Normal power supply across the island")
     except Exception as e:
         logger.warning(f"[CEB] Scraping error: {e}")
         result["status"] = "unknown"
         result["error"] = str(e)
     # Update cache
     _ceb_cache = result
     _ceb_cache_time = utc_now()
     return result
 def tool_fuel_prices() -> Dict[str, Any]:
     """
     Get current fuel prices in Sri Lanka.
     Scrapes official CEYPETCO/LIOC announcements or news sources.
     Returns:
         Dict with prices for petrol, diesel, kerosene, and last update
     """
     global _fuel_cache, _fuel_cache_time
     # Check cache
     if _fuel_cache_time:
         cache_age = (utc_now() - _fuel_cache_time).total_seconds() / 60
         if cache_age < SA_CACHE_DURATION_MINUTES and _fuel_cache:
             logger.info(f"[FUEL] Using cached data ({cache_age:.1f} min old)")
             return _fuel_cache
     logger.info("[FUEL] Fetching fuel prices...")
     # December 2025 CEYPETCO prices (confirmed unchanged from November 2025)
     # Source: CEYPETCO official announcement
     result = {
@@ -1201,7 +1243,7 @@ def tool_fuel_prices() -> Dict[str, Any]:
         "fetched_at": utc_now().isoformat(),
         "note": "Prices confirmed unchanged for December 2025",
     }
     try:
         # Try to scrape news for latest fuel price announcements
         news_sources = [
@@ -1209,69 +1251,81 @@ def tool_fuel_prices() -> Dict[str, Any]:
             "https://www.dailymirror.lk/",
             "https://www.newsfirst.lk/",
         ]
         for source_url in news_sources:
             resp = _safe_get(source_url, timeout=20)
             if resp:
                 soup = BeautifulSoup(resp.text, "html.parser")
                 page_text = soup.get_text(separator=" ", strip=True).lower()
                 # Look for fuel price mentions
                 if "fuel" in page_text and ("price" in page_text or "lkr" in page_text):
                     # Extract prices using regex
-                    petrol_match = re.search(r"petrol\s*(?:92|95)?\s*(?:octane)?\s*[:\-]?\s*(?:rs\.?|lkr)?\s*(\d{2,3}(?:\.\d{2})?)", page_text)
-                    diesel_match = re.search(r"diesel\s*[:\-]?\s*(?:rs\.?|lkr)?\s*(\d{2,3}(?:\.\d{2})?)", page_text)
                     if petrol_match:
                         try:
-                            result["prices"]["petrol_92"]["price"] = float(petrol_match.group(1))
                             result["source"] = "news_scrape"
                         except ValueError:
                             pass
                     if diesel_match:
                         try:
-                            result["prices"]["auto_diesel"]["price"] = float(diesel_match.group(1))
                         except ValueError:
                             pass
                     break
-        logger.info(f"[FUEL] Fetched prices - Petrol 92: {result['prices']['petrol_92']['price']}")
     except Exception as e:
         logger.warning(f"[FUEL] Scraping error: {e}")
         result["error"] = str(e)
     # Update cache
     _fuel_cache = result
     _fuel_cache_time = utc_now()
     return result
 def tool_cbsl_indicators() -> Dict[str, Any]:
     """
     Get key economic indicators from Central Bank of Sri Lanka.
     Scrapes live data from cbsl.gov.lk including:
     - Exchange rates (USD/LKR TT Buy/Sell)
     - CCPI Inflation
     - Overnight Policy Rate
     - Forex reserves
     Returns:
         Dict with economic indicators and trend data
     """
     global _cbsl_cache, _cbsl_cache_time
     # Check cache
     if _cbsl_cache_time:
         cache_age = (utc_now() - _cbsl_cache_time).total_seconds() / 60
         if cache_age < SA_CACHE_DURATION_MINUTES and _cbsl_cache:
             logger.info(f"[CBSL] Using cached data ({cache_age:.1f} min old)")
             return _cbsl_cache
     logger.info("[CBSL] Fetching economic indicators from cbsl.gov.lk...")
     # Baseline economic data (December 2025 - latest known values)
     result = {
         "indicators": {
@@ -1308,40 +1362,50 @@ def tool_cbsl_indicators() -> Dict[str, Any]:
         "data_as_of": "2025-12",
         "scrape_status": "baseline",
     }
     try:
         # Try to scrape CBSL for updated rates
         resp = _safe_get("https://www.cbsl.gov.lk/", timeout=30)
         if resp:
             soup = BeautifulSoup(resp.text, "html.parser")
             page_text = soup.get_text(separator=" ", strip=True)
             scraped_any = False
             # Extract TT Buy exchange rate (format: "TT Buy 305.3238" or "TT Buy: 305.3238")
-            tt_buy_match = re.search(r"TT\s*Buy[:\s]*(\d{2,3}(?:\.\d{2,4})?)", page_text, re.I)
             if tt_buy_match:
                 try:
-                    result["indicators"]["exchange_rate"]["usd_lkr_buy"] = round(float(tt_buy_match.group(1)), 2)
                     scraped_any = True
                 except ValueError:
                     pass
             # Extract TT Sell exchange rate
-            tt_sell_match = re.search(r"TT\s*Sell[:\s]*(\d{2,3}(?:\.\d{2,4})?)", page_text, re.I)
             if tt_sell_match:
                 try:
-                    result["indicators"]["exchange_rate"]["usd_lkr_sell"] = round(float(tt_sell_match.group(1)), 2)
                     scraped_any = True
                 except ValueError:
                     pass
             # Calculate mid rate if we have both buy and sell
             if tt_buy_match and tt_sell_match:
                 buy = result["indicators"]["exchange_rate"]["usd_lkr_buy"]
                 sell = result["indicators"]["exchange_rate"]["usd_lkr_sell"]
-                result["indicators"]["exchange_rate"]["usd_lkr"] = round((buy + sell) / 2, 2)
             # Extract CCPI Inflation (format: "CCPI Inflation 2.10%" or just "Inflation 2.10 %")
             inflation_patterns = [
                 r"CCPI\s*Inflation[:\s]*(\d{1,2}(?:\.\d{1,2})?)\s*%",
@@ -1352,12 +1416,14 @@ def tool_cbsl_indicators() -> Dict[str, Any]:
                 inflation_match = re.search(pattern, page_text, re.I)
                 if inflation_match:
                     try:
-                        result["indicators"]["inflation"]["ccpi_yoy"] = float(inflation_match.group(1))
                         scraped_any = True
                         break
                     except ValueError:
                         pass
             # Extract Overnight Policy Rate (format: "Overnight Policy Rate 7.75%" or "Policy Rate 7.75 %")
             policy_patterns = [
                 r"Overnight\s*Policy\s*Rate[:\s]*(\d{1,2}(?:\.\d{1,2})?)\s*%",
@@ -1368,12 +1434,14 @@ def tool_cbsl_indicators() -> Dict[str, Any]:
                 policy_match = re.search(pattern, page_text, re.I)
                 if policy_match:
                     try:
-                        result["indicators"]["policy_rates"]["overnight_rate"] = float(policy_match.group(1))
                         scraped_any = True
                         break
                     except ValueError:
                         pass
             if scraped_any:
                 result["scrape_status"] = "live"
                 result["data_as_of"] = utc_now().strftime("%Y-%m")
@@ -1387,38 +1455,38 @@ def tool_cbsl_indicators() -> Dict[str, Any]:
                 logger.info("[CBSL] Using baseline data - no live values matched")
         else:
             logger.warning("[CBSL] Could not reach cbsl.gov.lk, using baseline data")
     except Exception as e:
         logger.warning(f"[CBSL] Scraping error: {e}")
         result["error"] = str(e)
     # Update cache
     _cbsl_cache = result
     _cbsl_cache_time = utc_now()
     return result
 def tool_health_alerts() -> Dict[str, Any]:
     """
     Get health alerts and disease outbreak information for Sri Lanka.
     Includes dengue case counts, epidemic alerts, and health advisories.
     Returns:
         Dict with health alerts, disease data, and notifications
     """
     global _health_cache, _health_cache_time
     # Check cache
     if _health_cache_time:
         cache_age = (utc_now() - _health_cache_time).total_seconds() / 60
         if cache_age < SA_CACHE_DURATION_MINUTES and _health_cache:
             logger.info(f"[HEALTH] Using cached data ({cache_age:.1f} min old)")
             return _health_cache
     logger.info("[HEALTH] Fetching health alerts...")
     # Baseline health data
     result = {
         "alerts": [],
@@ -1433,29 +1501,39 @@ def tool_health_alerts() -> Dict[str, Any]:
         "source": "health.gov.lk",
         "fetched_at": utc_now().isoformat(),
     }
     try:
         # Try to scrape Health Ministry
         resp = _safe_get("https://www.health.gov.lk/", timeout=30)
         if resp:
             soup = BeautifulSoup(resp.text, "html.parser")
             page_text = soup.get_text(separator="\n", strip=True).lower()
             # Check for outbreak keywords
-            outbreak_keywords = ["outbreak", "epidemic", "alert", "warning", "emergency"]
             for kw in outbreak_keywords:
                 if kw in page_text:
                     # Try to extract the context
                     idx = page_text.find(kw)
-                    context = page_text[max(0, idx-50):idx+100]
                     if len(context) > 20:
-                        result["alerts"].append({
-                            "type": "health_notice",
-                            "text": context.strip()[:150],
-                            "severity": "medium" if kw in ["alert", "warning"] else "low",
-                        })
                         break
             # Check for dengue data
             dengue_match = re.search(r"dengue[:\s]*(\d{1,5})\s*(?:cases?)?", page_text)
             if dengue_match:
@@ -1463,67 +1541,161 @@ def tool_health_alerts() -> Dict[str, Any]:
                     result["dengue"]["weekly_cases"] = int(dengue_match.group(1))
                 except ValueError:
                     pass
-            logger.info(f"[HEALTH] Fetched - Dengue cases: {result['dengue']['weekly_cases']}")
         # Add seasonal health advisory
         current_month = utc_now().month
         if current_month in [5, 6, 10, 11]:  # Monsoon = mosquito season
-            result["advisories"].append({
-                "type": "seasonal",
-                "text": "Monsoon season: Increased dengue risk. Remove stagnant water around homes.",
-                "severity": "medium",
-            })
     except Exception as e:
         logger.warning(f"[HEALTH] Scraping error: {e}")
         result["error"] = str(e)
     # Update cache
     _health_cache = result
     _health_cache_time = utc_now()
     return result
 def tool_commodity_prices() -> Dict[str, Any]:
     """
     Get prices for essential commodities in Sri Lanka.
     Includes rice, sugar, dhal, milk powder, and other staples.
     Returns:
         Dict with commodity prices, units, and recent changes
     """
     global _commodity_cache, _commodity_cache_time
     # Check cache
     if _commodity_cache_time:
         cache_age = (utc_now() - _commodity_cache_time).total_seconds() / 60
         if cache_age < SA_CACHE_DURATION_MINUTES and _commodity_cache:
             logger.info(f"[COMMODITY] Using cached data ({cache_age:.1f} min old)")
             return _commodity_cache
     logger.info("[COMMODITY] Fetching commodity prices...")
     # Current approximate commodity prices (LKR)
     result = {
         "commodities": [
-            {"name": "White Rice (Nadu)", "price": 220, "unit": "LKR/kg", "change": 0, "category": "grains"},
-            {"name": "White Rice (Samba)", "price": 250, "unit": "LKR/kg", "change": 0, "category": "grains"},
-            {"name": "Red Rice", "price": 240, "unit": "LKR/kg", "change": 0, "category": "grains"},
-            {"name": "Wheat Flour", "price": 195, "unit": "LKR/kg", "change": -5, "category": "grains"},
-            {"name": "Sugar (White)", "price": 240, "unit": "LKR/kg", "change": 0, "category": "essentials"},
-            {"name": "Dhal (Mysore)", "price": 510, "unit": "LKR/kg", "change": 10, "category": "pulses"},
-            {"name": "Dhal (Red)", "price": 340, "unit": "LKR/kg", "change": 0, "category": "pulses"},
-            {"name": "Milk Powder (400g)", "price": 1250, "unit": "LKR/pack", "change": 0, "category": "dairy"},
-            {"name": "Coconut Oil", "price": 680, "unit": "LKR/L", "change": -20, "category": "cooking"},
-            {"name": "Coconut (Fresh)", "price": 120, "unit": "LKR/each", "change": 10, "category": "cooking"},
-            {"name": "Eggs (10)", "price": 480, "unit": "LKR/10", "change": 0, "category": "protein"},
-            {"name": "Chicken", "price": 1350, "unit": "LKR/kg", "change": 50, "category": "protein"},
-            {"name": "Big Onion", "price": 280, "unit": "LKR/kg", "change": -10, "category": "vegetables"},
-            {"name": "Potatoes", "price": 350, "unit": "LKR/kg", "change": 20, "category": "vegetables"},
-            {"name": "LP Gas (12.5kg)", "price": 4290, "unit": "LKR/cylinder", "change": 0, "category": "fuel"},
         ],
         "source": "Consumer Affairs Authority / Market Survey",
         "fetched_at": utc_now().isoformat(),
@@ -1533,7 +1705,7 @@ def tool_commodity_prices() -> Dict[str, Any]:
             "items_stable": 0,
         },
     }
     # Calculate summary
     for item in result["commodities"]:
         if item["change"] > 0:
@@ -1542,14 +1714,14 @@ def tool_commodity_prices() -> Dict[str, Any]:
             result["summary"]["items_decreased"] += 1
         else:
             result["summary"]["items_stable"] += 1
     try:
         # Try to scrape news for price updates
         resp = _safe_get("https://www.dailymirror.lk/", timeout=20)
         if resp:
             soup = BeautifulSoup(resp.text, "html.parser")
             page_text = soup.get_text(separator=" ", strip=True).lower()
             # Check for LP Gas price updates (commonly announced)
             gas_match = re.search(r"lp\s*gas[:\s]*(?:rs\.?|lkr)?\s*(\d{4})", page_text)
             if gas_match:
@@ -1563,40 +1735,40 @@ def tool_commodity_prices() -> Dict[str, Any]:
                             break
                 except ValueError:
                     pass
             logger.info("[COMMODITY] Successfully fetched commodity prices")
     except Exception as e:
         logger.warning(f"[COMMODITY] Scraping error: {e}")
         result["error"] = str(e)
     # Update cache
     _commodity_cache = result
     _commodity_cache_time = utc_now()
     return result
 def tool_water_supply_alerts() -> Dict[str, Any]:
     """
     Get water supply disruption alerts from NWSDB.
     Returns information about planned/unplanned water cuts and affected areas.
     Returns:
         Dict with active disruptions, affected areas, and restoration times
     """
     global _water_cache, _water_cache_time
     # Check cache
     if _water_cache_time:
         cache_age = (utc_now() - _water_cache_time).total_seconds() / 60
         if cache_age < SA_CACHE_DURATION_MINUTES and _water_cache:
             logger.info(f"[WATER] Using cached data ({cache_age:.1f} min old)")
             return _water_cache
     logger.info("[WATER] Fetching water supply alerts...")
     result = {
         "status": "normal",
         "active_disruptions": [],
@@ -1605,22 +1777,28 @@ def tool_water_supply_alerts() -> Dict[str, Any]:
         "fetched_at": utc_now().isoformat(),
         "overall_supply": "stable",
     }
     try:
         # Try to scrape NWSDB website
         resp = _safe_get("https://www.waterboard.lk/", timeout=30)
         if resp:
             soup = BeautifulSoup(resp.text, "html.parser")
             page_text = soup.get_text(separator="\n", strip=True).lower()
             # Check for disruption keywords
-            disruption_keywords = ["disruption", "interruption", "cut off", "maintenance", "repair"]
             for kw in disruption_keywords:
                 if kw in page_text:
                     result["status"] = "disruptions_reported"
                     idx = page_text.find(kw)
-                    context = page_text[max(0, idx-30):idx+120]
                     # Try to extract area name
                     area_patterns = [
                         r"(colombo|gampaha|kandy|galle|matara|jaffna|kurunegala|ratnapura)",
@@ -1632,31 +1810,35 @@ def tool_water_supply_alerts() -> Dict[str, Any]:
                         if match:
                             area = match.group(1).title()
                             break
-                    result["active_disruptions"].append({
-                        "area": area,
-                        "type": kw,
-                        "details": context.strip()[:150],
-                        "severity": "medium",
-                    })
                     break
-            logger.info(f"[WATER] Fetched - Disruptions: {len(result['active_disruptions'])}")
         # If no disruptions found via scraping, report normal
         if not result["active_disruptions"]:
             result["status"] = "normal"
             result["overall_supply"] = "Normal water supply across most areas"
     except Exception as e:
         logger.warning(f"[WATER] Scraping error: {e}")
         result["error"] = str(e)
         result["status"] = "unknown"
     # Update cache
     _water_cache = result
     _water_cache_time = utc_now()
     return result
@@ -4389,10 +4571,12 @@ def scrape_reddit(
     data = scrape_reddit_impl(keywords=keywords, limit=limit, subreddit=subreddit)
     return json.dumps(data, default=str)
 # ============================================
 # SITUATIONAL AWARENESS TOOLS (DASHBOARD APIs)
 # ============================================
 def tool_health_alerts() -> dict:
     """Get health alerts from health.gov.lk - structured for dashboard."""
     try:
@@ -4401,14 +4585,16 @@ def tool_health_alerts() -> dict:
             "dengue": {
                 "weekly_cases": 1890,
                 "high_risk_districts": ["Colombo", "Gampaha", "Kalutara"],
-                "trend": "stable"
             },
-            "advisories": [{
-                "type": "seasonal",
-                "text": "Monsoon season: Take precautions against dengue",
-                "severity": "medium"
-            }],
-            "fetched_at": utc_now().isoformat()
         }
     except Exception as e:
         return {"alerts": [], "dengue": {}, "advisories": [], "error": str(e)}
@@ -4421,7 +4607,7 @@ def tool_water_supply_alerts() -> dict:
             "status": "normal",
             "active_disruptions": [],
             "overall_supply": "Normal water supply across most areas",
-            "fetched_at": utc_now().isoformat()
         }
     except Exception as e:
         return {"status": "unknown", "active_disruptions": [], "error": str(e)}
@@ -4434,7 +4620,7 @@ def tool_ceb_power_status() -> dict:
         "current_schedule": None,
         "announcements": [],
         "generation_capacity": "Normal",
-        "fetched_at": utc_now().isoformat()
     }
@@ -4446,11 +4632,11 @@ def tool_fuel_prices() -> dict:
             "petrol_95": {"price": 335, "unit": "LKR/L"},
             "diesel": {"price": 277, "unit": "LKR/L"},
             "super_diesel": {"price": 318, "unit": "LKR/L"},
-            "kerosene": {"price": 185, "unit": "LKR/L"}
         },
         "last_updated": "2025-12-01",
         "source": "CEYPETCO",
-        "fetched_at": utc_now().isoformat()
     }
@@ -4460,7 +4646,7 @@ def tool_cbsl_rates() -> dict:
         "inflation": {"headline": 0.7, "core": 1.2, "unit": "%"},
         "policy_rates": {"sdfr": 8.25, "slfr": 9.25, "unit": "%"},
         "exchange_rate": {"usd": 296.50, "eur": 312.80, "unit": "LKR"},
-        "fetched_at": utc_now().isoformat()
     }
@@ -4475,13 +4661,13 @@ def tool_cbsl_indicators() -> dict:
             "inflation": {
                 "ccpi_yoy": 2.1,  # CCPI Year-on-Year (Nov 2025 actual)
                 "core_yoy": 1.8,
-                "trend": "stable"
             },
             "policy_rates": {
                 "overnight_rate": 7.75,  # Overnight Policy Rate (Dec 2025)
                 "sdfr": 7.25,  # Standing Deposit Facility Rate
                 "slfr": 8.25,  # Standing Lending Facility Rate
-                "last_changed": "2024-12"
             },
             "exchange_rate": {
                 "usd_lkr": 309.17,  # Dec 11, 2025 rate
@@ -4489,16 +4675,16 @@ def tool_cbsl_indicators() -> dict:
                 "usd_lkr_sell": 313.00,
                 "eur_lkr": 325.50,
                 "gbp_lkr": 390.25,
-                "trend": "stable"
             },
             "forex_reserves": {
                 "value": 6.5,  # Billion USD (Dec 2025)
-                "trend": "improving"
-            }
         },
         "source": "Central Bank of Sri Lanka",
         "scrape_status": "baseline",
-        "fetched_at": utc_now().isoformat()
     }
@@ -4510,9 +4696,9 @@ def tool_commodity_prices() -> dict:
             {"name": "Rice (Samba)", "price": 250, "unit": "LKR/kg"},
             {"name": "Dhal (Red)", "price": 360, "unit": "LKR/kg"},
             {"name": "Sugar", "price": 215, "unit": "LKR/kg"},
-            {"name": "Coconut", "price": 120, "unit": "LKR/nut"}
         ],
-        "fetched_at": utc_now().isoformat()
     }

     """Return current UTC time (Python 3.12+ compatible)."""
     return datetime.now(timezone.utc)
 # Optional Playwright import
 try:
     from playwright.sync_api import (
 def tool_ceb_power_status() -> Dict[str, Any]:
     """
     Get CEB power outage / load shedding schedule for Sri Lanka.
+    ENHANCED:
     - Scrapes ceb.lk for official schedules and PDF press releases
     - Extracts text from Dropbox-hosted PDF announcements
     - Falls back to news sites for power-related updates
     Returns:
         Dict with schedules by area, current status, and timestamp
     """
     global _ceb_cache, _ceb_cache_time
     # Check cache
     if _ceb_cache_time:
         cache_age = (utc_now() - _ceb_cache_time).total_seconds() / 60
         if cache_age < SA_CACHE_DURATION_MINUTES and _ceb_cache:
             logger.info(f"[CEB] Using cached data ({cache_age:.1f} min old)")
             return _ceb_cache
     logger.info("[CEB] Fetching power outage status...")
     result = {
         "status": "operational",
         "load_shedding_active": False,
         "fetched_at": utc_now().isoformat(),
         "scrape_status": "baseline",
     }
     pdf_links_found = []
     try:
         # Try to scrape CEB website
         resp = _safe_get("https://ceb.lk/", timeout=30)
         if resp:
             soup = BeautifulSoup(resp.text, "html.parser")
             page_text = soup.get_text(separator="\n", strip=True).lower()
             # Check for load shedding keywords
+            if any(
+                kw in page_text
+                for kw in ["load shedding", "power cut", "outage schedule"]
+            ):
                 result["load_shedding_active"] = True
                 result["status"] = "load_shedding"
             # Extract any announcements
+            for tag in soup.find_all(
+                ["marquee", "div", "p"],
+                class_=lambda x: x and "announce" in str(x).lower(),
+            ):
                 text = tag.get_text(strip=True)
                 if text and len(text) > 20:
                     result["announcements"].append(text[:200])
             # ENHANCED: Find PDF links (Dropbox, direct PDFs, press releases)
             for link in soup.find_all("a", href=True):
                 href = link.get("href", "")
                 link_text = link.get_text(strip=True).lower()
                 # Check for Dropbox links or PDF links
                 is_dropbox = "dropbox.com" in href
                 is_pdf = href.lower().endswith(".pdf")
+                is_press_release = any(
+                    kw in link_text
+                    for kw in ["press release", "announcement", "notice", "schedule"]
+                )
                 if is_dropbox or is_pdf or is_press_release:
                     # Convert Dropbox links for direct download
                     if is_dropbox:
                             href = href.replace("dl=0", "dl=1")
                         elif "?dl=" not in href and "&dl=" not in href:
                             href = href + ("&" if "?" in href else "?") + "dl=1"
+                    pdf_links_found.append(
+                        {
+                            "url": href,
+                            "title": link_text or "Press Release",
+                            "is_dropbox": is_dropbox,
+                        }
+                    )
             # Limit to latest 3 PDFs to avoid too many downloads
             pdf_links_found = pdf_links_found[:3]
             # Extract text from PDF links
             for pdf_info in pdf_links_found:
                 try:
                     logger.info(f"[CEB] Extracting PDF: {pdf_info['title'][:50]}...")
                     pdf_text = _extract_text_from_pdf_url(pdf_info["url"])
+                    if pdf_text and not pdf_text.startswith(
+                        "["
+                    ):  # Not an error message
                         # Check for load shedding in PDF content
                         pdf_lower = pdf_text.lower()
+                        if any(
+                            kw in pdf_lower
+                            for kw in [
+                                "load shedding",
+                                "power cut",
+                                "outage",
+                                "interruption",
+                            ]
+                        ):
                             result["load_shedding_active"] = True
                             result["status"] = "load_shedding"
+                        result["press_releases"].append(
+                            {
+                                "title": pdf_info["title"],
+                                "content": pdf_text[:1000]
+                                + ("..." if len(pdf_text) > 1000 else ""),
+                                "source": (
+                                    "dropbox" if pdf_info["is_dropbox"] else "ceb.lk"
+                                ),
+                            }
+                        )
                         result["scrape_status"] = "live"
                 except Exception as pdf_error:
                     logger.warning(f"[CEB] PDF extraction error: {pdf_error}")
+            logger.info(
+                f"[CEB] Scraped - PDFs found: {len(pdf_links_found)}, Active: {result['load_shedding_active']}"
+            )
         # Also check news sites for power-related updates
         news_sources = [
             "https://www.news.lk/",
             "https://www.dailymirror.lk/",
         ]
         for news_url in news_sources:
             try:
                 news_resp = _safe_get(news_url, timeout=20)
                 if news_resp:
                     news_soup = BeautifulSoup(news_resp.text, "html.parser")
                     news_text = news_soup.get_text(separator=" ", strip=True).lower()
                     # Check for power-related news
+                    if any(
+                        kw in news_text
+                        for kw in ["power cut", "load shedding", "ceb", "electricity"]
+                    ):
                         # Look for headlines mentioning power
                         for headline in news_soup.find_all(["h1", "h2", "h3", "h4"]):
                             h_text = headline.get_text(strip=True)
+                            if any(
+                                kw in h_text.lower()
+                                for kw in [
+                                    "power",
+                                    "ceb",
+                                    "electricity",
+                                    "load shedding",
+                                ]
+                            ):
                                 if h_text not in result["announcements"]:
+                                    result["announcements"].append(
+                                        f"[News] {h_text[:150]}"
+                                    )
                                     break
             except Exception as news_error:
                 logger.debug(f"[CEB] News scraping error for {news_url}: {news_error}")
         # If no press releases or announcements found, provide baseline message
         if not result["press_releases"] and not result["announcements"]:
             result["status"] = "no_load_shedding"
             result["announcements"].append("CEB: Normal power supply across the island")
     except Exception as e:
         logger.warning(f"[CEB] Scraping error: {e}")
         result["status"] = "unknown"
         result["error"] = str(e)
     # Update cache
     _ceb_cache = result
     _ceb_cache_time = utc_now()
     return result
 def tool_fuel_prices() -> Dict[str, Any]:
     """
     Get current fuel prices in Sri Lanka.
     Scrapes official CEYPETCO/LIOC announcements or news sources.
     Returns:
         Dict with prices for petrol, diesel, kerosene, and last update
     """
     global _fuel_cache, _fuel_cache_time
     # Check cache
     if _fuel_cache_time:
         cache_age = (utc_now() - _fuel_cache_time).total_seconds() / 60
         if cache_age < SA_CACHE_DURATION_MINUTES and _fuel_cache:
             logger.info(f"[FUEL] Using cached data ({cache_age:.1f} min old)")
             return _fuel_cache
     logger.info("[FUEL] Fetching fuel prices...")
     # December 2025 CEYPETCO prices (confirmed unchanged from November 2025)
     # Source: CEYPETCO official announcement
     result = {
         "fetched_at": utc_now().isoformat(),
         "note": "Prices confirmed unchanged for December 2025",
     }
     try:
         # Try to scrape news for latest fuel price announcements
         news_sources = [
             "https://www.dailymirror.lk/",
             "https://www.newsfirst.lk/",
         ]
         for source_url in news_sources:
             resp = _safe_get(source_url, timeout=20)
             if resp:
                 soup = BeautifulSoup(resp.text, "html.parser")
                 page_text = soup.get_text(separator=" ", strip=True).lower()
                 # Look for fuel price mentions
                 if "fuel" in page_text and ("price" in page_text or "lkr" in page_text):
                     # Extract prices using regex
+                    petrol_match = re.search(
+                        r"petrol\s*(?:92|95)?\s*(?:octane)?\s*[:\-]?\s*(?:rs\.?|lkr)?\s*(\d{2,3}(?:\.\d{2})?)",
+                        page_text,
+                    )
+                    diesel_match = re.search(
+                        r"diesel\s*[:\-]?\s*(?:rs\.?|lkr)?\s*(\d{2,3}(?:\.\d{2})?)",
+                        page_text,
+                    )
                     if petrol_match:
                         try:
+                            result["prices"]["petrol_92"]["price"] = float(
+                                petrol_match.group(1)
+                            )
                             result["source"] = "news_scrape"
                         except ValueError:
                             pass
                     if diesel_match:
                         try:
+                            result["prices"]["auto_diesel"]["price"] = float(
+                                diesel_match.group(1)
+                            )
                         except ValueError:
                             pass
                     break
+        logger.info(
+            f"[FUEL] Fetched prices - Petrol 92: {result['prices']['petrol_92']['price']}"
+        )
     except Exception as e:
         logger.warning(f"[FUEL] Scraping error: {e}")
         result["error"] = str(e)
     # Update cache
     _fuel_cache = result
     _fuel_cache_time = utc_now()
     return result
 def tool_cbsl_indicators() -> Dict[str, Any]:
     """
     Get key economic indicators from Central Bank of Sri Lanka.
     Scrapes live data from cbsl.gov.lk including:
     - Exchange rates (USD/LKR TT Buy/Sell)
     - CCPI Inflation
     - Overnight Policy Rate
     - Forex reserves
     Returns:
         Dict with economic indicators and trend data
     """
     global _cbsl_cache, _cbsl_cache_time
     # Check cache
     if _cbsl_cache_time:
         cache_age = (utc_now() - _cbsl_cache_time).total_seconds() / 60
         if cache_age < SA_CACHE_DURATION_MINUTES and _cbsl_cache:
             logger.info(f"[CBSL] Using cached data ({cache_age:.1f} min old)")
             return _cbsl_cache
     logger.info("[CBSL] Fetching economic indicators from cbsl.gov.lk...")
     # Baseline economic data (December 2025 - latest known values)
     result = {
         "indicators": {
         "data_as_of": "2025-12",
         "scrape_status": "baseline",
     }
     try:
         # Try to scrape CBSL for updated rates
         resp = _safe_get("https://www.cbsl.gov.lk/", timeout=30)
         if resp:
             soup = BeautifulSoup(resp.text, "html.parser")
             page_text = soup.get_text(separator=" ", strip=True)
             scraped_any = False
             # Extract TT Buy exchange rate (format: "TT Buy 305.3238" or "TT Buy: 305.3238")
+            tt_buy_match = re.search(
+                r"TT\s*Buy[:\s]*(\d{2,3}(?:\.\d{2,4})?)", page_text, re.I
+            )
             if tt_buy_match:
                 try:
+                    result["indicators"]["exchange_rate"]["usd_lkr_buy"] = round(
+                        float(tt_buy_match.group(1)), 2
+                    )
                     scraped_any = True
                 except ValueError:
                     pass
             # Extract TT Sell exchange rate
+            tt_sell_match = re.search(
+                r"TT\s*Sell[:\s]*(\d{2,3}(?:\.\d{2,4})?)", page_text, re.I
+            )
             if tt_sell_match:
                 try:
+                    result["indicators"]["exchange_rate"]["usd_lkr_sell"] = round(
+                        float(tt_sell_match.group(1)), 2
+                    )
                     scraped_any = True
                 except ValueError:
                     pass
             # Calculate mid rate if we have both buy and sell
             if tt_buy_match and tt_sell_match:
                 buy = result["indicators"]["exchange_rate"]["usd_lkr_buy"]
                 sell = result["indicators"]["exchange_rate"]["usd_lkr_sell"]
+                result["indicators"]["exchange_rate"]["usd_lkr"] = round(
+                    (buy + sell) / 2, 2
+                )
             # Extract CCPI Inflation (format: "CCPI Inflation 2.10%" or just "Inflation 2.10 %")
             inflation_patterns = [
                 r"CCPI\s*Inflation[:\s]*(\d{1,2}(?:\.\d{1,2})?)\s*%",
                 inflation_match = re.search(pattern, page_text, re.I)
                 if inflation_match:
                     try:
+                        result["indicators"]["inflation"]["ccpi_yoy"] = float(
+                            inflation_match.group(1)
+                        )
                         scraped_any = True
                         break
                     except ValueError:
                         pass
             # Extract Overnight Policy Rate (format: "Overnight Policy Rate 7.75%" or "Policy Rate 7.75 %")
             policy_patterns = [
                 r"Overnight\s*Policy\s*Rate[:\s]*(\d{1,2}(?:\.\d{1,2})?)\s*%",
                 policy_match = re.search(pattern, page_text, re.I)
                 if policy_match:
                     try:
+                        result["indicators"]["policy_rates"]["overnight_rate"] = float(
+                            policy_match.group(1)
+                        )
                         scraped_any = True
                         break
                     except ValueError:
                         pass
             if scraped_any:
                 result["scrape_status"] = "live"
                 result["data_as_of"] = utc_now().strftime("%Y-%m")
                 logger.info("[CBSL] Using baseline data - no live values matched")
         else:
             logger.warning("[CBSL] Could not reach cbsl.gov.lk, using baseline data")
     except Exception as e:
         logger.warning(f"[CBSL] Scraping error: {e}")
         result["error"] = str(e)
     # Update cache
     _cbsl_cache = result
     _cbsl_cache_time = utc_now()
     return result
 def tool_health_alerts() -> Dict[str, Any]:
     """
     Get health alerts and disease outbreak information for Sri Lanka.
     Includes dengue case counts, epidemic alerts, and health advisories.
     Returns:
         Dict with health alerts, disease data, and notifications
     """
     global _health_cache, _health_cache_time
     # Check cache
     if _health_cache_time:
         cache_age = (utc_now() - _health_cache_time).total_seconds() / 60
         if cache_age < SA_CACHE_DURATION_MINUTES and _health_cache:
             logger.info(f"[HEALTH] Using cached data ({cache_age:.1f} min old)")
             return _health_cache
     logger.info("[HEALTH] Fetching health alerts...")
     # Baseline health data
     result = {
         "alerts": [],
         "source": "health.gov.lk",
         "fetched_at": utc_now().isoformat(),
     }
     try:
         # Try to scrape Health Ministry
         resp = _safe_get("https://www.health.gov.lk/", timeout=30)
         if resp:
             soup = BeautifulSoup(resp.text, "html.parser")
             page_text = soup.get_text(separator="\n", strip=True).lower()
             # Check for outbreak keywords
+            outbreak_keywords = [
+                "outbreak",
+                "epidemic",
+                "alert",
+                "warning",
+                "emergency",
+            ]
             for kw in outbreak_keywords:
                 if kw in page_text:
                     # Try to extract the context
                     idx = page_text.find(kw)
+                    context = page_text[max(0, idx - 50) : idx + 100]
                     if len(context) > 20:
+                        result["alerts"].append(
+                            {
+                                "type": "health_notice",
+                                "text": context.strip()[:150],
+                                "severity": (
+                                    "medium" if kw in ["alert", "warning"] else "low"
+                                ),
+                            }
+                        )
                         break
             # Check for dengue data
             dengue_match = re.search(r"dengue[:\s]*(\d{1,5})\s*(?:cases?)?", page_text)
             if dengue_match:
                     result["dengue"]["weekly_cases"] = int(dengue_match.group(1))
                 except ValueError:
                     pass
+            logger.info(
+                f"[HEALTH] Fetched - Dengue cases: {result['dengue']['weekly_cases']}"
+            )
         # Add seasonal health advisory
         current_month = utc_now().month
         if current_month in [5, 6, 10, 11]:  # Monsoon = mosquito season
+            result["advisories"].append(
+                {
+                    "type": "seasonal",
+                    "text": "Monsoon season: Increased dengue risk. Remove stagnant water around homes.",
+                    "severity": "medium",
+                }
+            )
     except Exception as e:
         logger.warning(f"[HEALTH] Scraping error: {e}")
         result["error"] = str(e)
     # Update cache
     _health_cache = result
     _health_cache_time = utc_now()
     return result
 def tool_commodity_prices() -> Dict[str, Any]:
     """
     Get prices for essential commodities in Sri Lanka.
     Includes rice, sugar, dhal, milk powder, and other staples.
     Returns:
         Dict with commodity prices, units, and recent changes
     """
     global _commodity_cache, _commodity_cache_time
     # Check cache
     if _commodity_cache_time:
         cache_age = (utc_now() - _commodity_cache_time).total_seconds() / 60
         if cache_age < SA_CACHE_DURATION_MINUTES and _commodity_cache:
             logger.info(f"[COMMODITY] Using cached data ({cache_age:.1f} min old)")
             return _commodity_cache
     logger.info("[COMMODITY] Fetching commodity prices...")
     # Current approximate commodity prices (LKR)
     result = {
         "commodities": [
+            {
+                "name": "White Rice (Nadu)",
+                "price": 220,
+                "unit": "LKR/kg",
+                "change": 0,
+                "category": "grains",
+            },
+            {
+                "name": "White Rice (Samba)",
+                "price": 250,
+                "unit": "LKR/kg",
+                "change": 0,
+                "category": "grains",
+            },
+            {
+                "name": "Red Rice",
+                "price": 240,
+                "unit": "LKR/kg",
+                "change": 0,
+                "category": "grains",
+            },
+            {
+                "name": "Wheat Flour",
+                "price": 195,
+                "unit": "LKR/kg",
+                "change": -5,
+                "category": "grains",
+            },
+            {
+                "name": "Sugar (White)",
+                "price": 240,
+                "unit": "LKR/kg",
+                "change": 0,
+                "category": "essentials",
+            },
+            {
+                "name": "Dhal (Mysore)",
+                "price": 510,
+                "unit": "LKR/kg",
+                "change": 10,
+                "category": "pulses",
+            },
+            {
+                "name": "Dhal (Red)",
+                "price": 340,
+                "unit": "LKR/kg",
+                "change": 0,
+                "category": "pulses",
+            },
+            {
+                "name": "Milk Powder (400g)",
+                "price": 1250,
+                "unit": "LKR/pack",
+                "change": 0,
+                "category": "dairy",
+            },
+            {
+                "name": "Coconut Oil",
+                "price": 680,
+                "unit": "LKR/L",
+                "change": -20,
+                "category": "cooking",
+            },
+            {
+                "name": "Coconut (Fresh)",
+                "price": 120,
+                "unit": "LKR/each",
+                "change": 10,
+                "category": "cooking",
+            },
+            {
+                "name": "Eggs (10)",
+                "price": 480,
+                "unit": "LKR/10",
+                "change": 0,
+                "category": "protein",
+            },
+            {
+                "name": "Chicken",
+                "price": 1350,
+                "unit": "LKR/kg",
+                "change": 50,
+                "category": "protein",
+            },
+            {
+                "name": "Big Onion",
+                "price": 280,
+                "unit": "LKR/kg",
+                "change": -10,
+                "category": "vegetables",
+            },
+            {
+                "name": "Potatoes",
+                "price": 350,
+                "unit": "LKR/kg",
+                "change": 20,
+                "category": "vegetables",
+            },
+            {
+                "name": "LP Gas (12.5kg)",
+                "price": 4290,
+                "unit": "LKR/cylinder",
+                "change": 0,
+                "category": "fuel",
+            },
         ],
         "source": "Consumer Affairs Authority / Market Survey",
         "fetched_at": utc_now().isoformat(),
             "items_stable": 0,
         },
     }
     # Calculate summary
     for item in result["commodities"]:
         if item["change"] > 0:
             result["summary"]["items_decreased"] += 1
         else:
             result["summary"]["items_stable"] += 1
     try:
         # Try to scrape news for price updates
         resp = _safe_get("https://www.dailymirror.lk/", timeout=20)
         if resp:
             soup = BeautifulSoup(resp.text, "html.parser")
             page_text = soup.get_text(separator=" ", strip=True).lower()
             # Check for LP Gas price updates (commonly announced)
             gas_match = re.search(r"lp\s*gas[:\s]*(?:rs\.?|lkr)?\s*(\d{4})", page_text)
             if gas_match:
                             break
                 except ValueError:
                     pass
             logger.info("[COMMODITY] Successfully fetched commodity prices")
     except Exception as e:
         logger.warning(f"[COMMODITY] Scraping error: {e}")
         result["error"] = str(e)
     # Update cache
     _commodity_cache = result
     _commodity_cache_time = utc_now()
     return result
 def tool_water_supply_alerts() -> Dict[str, Any]:
     """
     Get water supply disruption alerts from NWSDB.
     Returns information about planned/unplanned water cuts and affected areas.
     Returns:
         Dict with active disruptions, affected areas, and restoration times
     """
     global _water_cache, _water_cache_time
     # Check cache
     if _water_cache_time:
         cache_age = (utc_now() - _water_cache_time).total_seconds() / 60
         if cache_age < SA_CACHE_DURATION_MINUTES and _water_cache:
             logger.info(f"[WATER] Using cached data ({cache_age:.1f} min old)")
             return _water_cache
     logger.info("[WATER] Fetching water supply alerts...")
     result = {
         "status": "normal",
         "active_disruptions": [],
         "fetched_at": utc_now().isoformat(),
         "overall_supply": "stable",
     }
     try:
         # Try to scrape NWSDB website
         resp = _safe_get("https://www.waterboard.lk/", timeout=30)
         if resp:
             soup = BeautifulSoup(resp.text, "html.parser")
             page_text = soup.get_text(separator="\n", strip=True).lower()
             # Check for disruption keywords
+            disruption_keywords = [
+                "disruption",
+                "interruption",
+                "cut off",
+                "maintenance",
+                "repair",
+            ]
             for kw in disruption_keywords:
                 if kw in page_text:
                     result["status"] = "disruptions_reported"
                     idx = page_text.find(kw)
+                    context = page_text[max(0, idx - 30) : idx + 120]
                     # Try to extract area name
                     area_patterns = [
                         r"(colombo|gampaha|kandy|galle|matara|jaffna|kurunegala|ratnapura)",
                         if match:
                             area = match.group(1).title()
                             break
+                    result["active_disruptions"].append(
+                        {
+                            "area": area,
+                            "type": kw,
+                            "details": context.strip()[:150],
+                            "severity": "medium",
+                        }
+                    )
                     break
+            logger.info(
+                f"[WATER] Fetched - Disruptions: {len(result['active_disruptions'])}"
+            )
         # If no disruptions found via scraping, report normal
         if not result["active_disruptions"]:
             result["status"] = "normal"
             result["overall_supply"] = "Normal water supply across most areas"
     except Exception as e:
         logger.warning(f"[WATER] Scraping error: {e}")
         result["error"] = str(e)
         result["status"] = "unknown"
     # Update cache
     _water_cache = result
     _water_cache_time = utc_now()
     return result
     data = scrape_reddit_impl(keywords=keywords, limit=limit, subreddit=subreddit)
     return json.dumps(data, default=str)
 # ============================================
 # SITUATIONAL AWARENESS TOOLS (DASHBOARD APIs)
 # ============================================
 def tool_health_alerts() -> dict:
     """Get health alerts from health.gov.lk - structured for dashboard."""
     try:
             "dengue": {
                 "weekly_cases": 1890,
                 "high_risk_districts": ["Colombo", "Gampaha", "Kalutara"],
+                "trend": "stable",
             },
+            "advisories": [
+                {
+                    "type": "seasonal",
+                    "text": "Monsoon season: Take precautions against dengue",
+                    "severity": "medium",
+                }
+            ],
+            "fetched_at": utc_now().isoformat(),
         }
     except Exception as e:
         return {"alerts": [], "dengue": {}, "advisories": [], "error": str(e)}
             "status": "normal",
             "active_disruptions": [],
             "overall_supply": "Normal water supply across most areas",
+            "fetched_at": utc_now().isoformat(),
         }
     except Exception as e:
         return {"status": "unknown", "active_disruptions": [], "error": str(e)}
         "current_schedule": None,
         "announcements": [],
         "generation_capacity": "Normal",
+        "fetched_at": utc_now().isoformat(),
     }
             "petrol_95": {"price": 335, "unit": "LKR/L"},
             "diesel": {"price": 277, "unit": "LKR/L"},
             "super_diesel": {"price": 318, "unit": "LKR/L"},
+            "kerosene": {"price": 185, "unit": "LKR/L"},
         },
         "last_updated": "2025-12-01",
         "source": "CEYPETCO",
+        "fetched_at": utc_now().isoformat(),
     }
         "inflation": {"headline": 0.7, "core": 1.2, "unit": "%"},
         "policy_rates": {"sdfr": 8.25, "slfr": 9.25, "unit": "%"},
         "exchange_rate": {"usd": 296.50, "eur": 312.80, "unit": "LKR"},
+        "fetched_at": utc_now().isoformat(),
     }
             "inflation": {
                 "ccpi_yoy": 2.1,  # CCPI Year-on-Year (Nov 2025 actual)
                 "core_yoy": 1.8,
+                "trend": "stable",
             },
             "policy_rates": {
                 "overnight_rate": 7.75,  # Overnight Policy Rate (Dec 2025)
                 "sdfr": 7.25,  # Standing Deposit Facility Rate
                 "slfr": 8.25,  # Standing Lending Facility Rate
+                "last_changed": "2024-12",
             },
             "exchange_rate": {
                 "usd_lkr": 309.17,  # Dec 11, 2025 rate
                 "usd_lkr_sell": 313.00,
                 "eur_lkr": 325.50,
                 "gbp_lkr": 390.25,
+                "trend": "stable",
             },
             "forex_reserves": {
                 "value": 6.5,  # Billion USD (Dec 2025)
+                "trend": "improving",
+            },
         },
         "source": "Central Bank of Sri Lanka",
         "scrape_status": "baseline",
+        "fetched_at": utc_now().isoformat(),
     }
             {"name": "Rice (Samba)", "price": 250, "unit": "LKR/kg"},
             {"name": "Dhal (Red)", "price": 360, "unit": "LKR/kg"},
             {"name": "Sugar", "price": 215, "unit": "LKR/kg"},
+            {"name": "Coconut", "price": 120, "unit": "LKR/nut"},
         ],
+        "fetched_at": utc_now().isoformat(),
     }