Spaces:

kingabzpro
/

RegRadar

Sleeping

App Files Files Community

Abid Ali Awan commited on Jun 19, 2025

Commit

48e84df

1 Parent(s): ad9dfde

Refactor RegRadarAgent to simplify parameter extraction by removing LLM function calling. Update UIHandler to pass parameters directly to the regulatory query processing function. Enhance memory saving functionality by implementing background threading for improved performance.

Browse files

Files changed (4) hide show

agents/reg_radar.py +23 -122
agents/ui_handler.py +10 -5
tools/llm.py +0 -26
tools/web_tools.py +25 -38

agents/reg_radar.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import json
 from typing import Dict, Tuple
-from tools.llm import call_llm, call_llm_with_function, stream_llm
 from tools.memory_tools import MemoryTools
 from tools.web_tools import WebTools
@@ -29,124 +29,24 @@ class RegRadarAgent:
             return "search", "Regulatory Search"
     def extract_parameters(self, message: str) -> Dict:
-        """Extract industry, region, and keywords from the query using LLM function calling"""
-        function_schema = {
-            "name": "extract_parameters",
-            "description": (
-                "Extract industry, region, and keywords from a user query.\n"
-                "- 'industry': If not explicitly mentioned, infer the most relevant industry from the context (e.g., if the query is about SEC regulations, infer 'fintech' or 'finance').\n"
-                "- 'region': The country or region explicitly mentioned (e.g., US, EU, UK, Asia, Global).\n"
-                "- 'keywords': Only the most important regulatory topics or terms (e.g., 'data privacy', 'GDPR', 'ESG compliance', 'SEC regulations'), not generic words or verbs.\n"
-                "Examples:\n"
-                "- 'Show me the latest SEC regulations for fintech' => industry: 'fintech', region: 'US', keywords: 'SEC regulations'\n"
-                "- 'What are the new data privacy rules in the EU?' => industry: 'General', region: 'EU', keywords: 'data privacy'\n"
-                "- 'Scan for healthcare regulations in the US' => industry: 'healthcare', region: 'US', keywords: 'healthcare regulations'\n"
-                "- 'Any updates on ESG compliance for energy companies?' => industry: 'energy', region: 'US', keywords: 'ESG compliance'\n"
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "industry": {
-                        "type": "string",
-                        "description": "The industry mentioned or implied in the query (e.g., fintech, healthcare, energy, general).",
-                    },
-                    "region": {
-                        "type": "string",
-                        "description": "The region or country explicitly mentioned in the query (e.g., US, EU, UK, Asia).",
-                    },
-                    "keywords": {
-                        "type": "string",
-                        "description": "A concise list of the most important regulatory topics or terms from the query, separated by commas. Do NOT return the full user question, generic words, or verbs.",
-                    },
-                },
-                "required": ["industry", "region", "keywords"],
-            },
-        }
-        params = call_llm_with_function(message, function_schema)
-        # Fallback: context-aware extraction if LLM fails
-        if not params or not all(
-            k in params for k in ("industry", "region", "keywords")
-        ):
-            import re
-            # Infer industry from context
-            industry = "General"
-            industry_map = {
-                "fintech": ["fintech", "finance", "sec", "bank", "investment"],
-                "healthcare": ["healthcare", "medical", "pharma", "hospital"],
-                "energy": ["energy", "oil", "gas", "renewable", "power"],
-                "technology": ["technology", "tech", "ai", "software", "it", "cyber"],
-                "retail": ["retail", "ecommerce", "shopping", "store"],
-                "general": [],
-            }
-            for ind, keywords in industry_map.items():
-                if any(word in message.lower() for word in keywords):
-                    industry = ind
-                    break
-            # Extract region
-            region_match = re.search(
-                r"\b(EU|US|UK|Asia|Europe|America|Canada|Australia|India|China|Japan|Global)\b",
-                message,
-                re.IGNORECASE,
-            )
-            region = region_match.group(1).upper() if region_match else "US"
-            # Extract keywords: regulatory terms and meaningful noun phrases only
-            regulatory_terms = [
-                "regulation",
-                "regulations",
-                "compliance",
-                "GDPR",
-                "data privacy",
-                "SEC",
-                "ESG",
-                "law",
-                "rules",
-                "requirements",
-            ]
-            found_terms = [
-                term for term in regulatory_terms if term.lower() in message.lower()
-            ]
-            # Multi-word capitalized noun phrases (e.g., 'data privacy', 'SEC regulations')
-            noun_phrases = re.findall(r"([A-Z][a-z]+(?: [a-z]+)+)", message)
-            # Remove question words and generic words
-            question_words = {
-                "what",
-                "which",
-                "who",
-                "whom",
-                "whose",
-                "when",
-                "where",
-                "why",
-                "how",
-            }
-            generic_words = {
-                "rules",
-                "regulation",
-                "regulations",
-                "requirement",
-                "requirements",
-                "law",
-                "laws",
-            }
-            filtered_phrases = [
-                phrase
-                for phrase in noun_phrases
-                if phrase.split()[0].lower() not in question_words
-                and phrase.lower() not in generic_words
-            ]
-            # Combine and deduplicate
-            keywords_set = set(found_terms + filtered_phrases)
-            # Remove single generic words
-            keywords_set = {
-                kw
-                for kw in keywords_set
-                if kw.lower() not in question_words and kw.lower() not in generic_words
-            }
-            keywords = ", ".join(keywords_set)
-            if not keywords and found_terms:
-                keywords = found_terms[0]
-            params = {"industry": industry, "region": region, "keywords": keywords}
         return params
     def is_regulatory_query(self, message: str) -> bool:
@@ -160,13 +60,14 @@ class RegRadarAgent:
         intent = call_llm(intent_prompt).strip().lower()
         return not intent.startswith("n")
-    def process_regulatory_query(self, message: str):
         """Process a regulatory query and return results"""
         # Determine the intended tool
         tool_key, tool_name = self.determine_intended_tool(message)
-        # Extract parameters
-        params = self.extract_parameters(message)
         # Execute tool (crawl sites)
         crawl_results = self.web_tools.crawl_regulatory_sites(

 import json
 from typing import Dict, Tuple
+from tools.llm import call_llm, stream_llm
 from tools.memory_tools import MemoryTools
 from tools.web_tools import WebTools
             return "search", "Regulatory Search"
     def extract_parameters(self, message: str) -> Dict:
+        """Extract industry, region, and keywords from the query using LLM (no function calling)."""
+        prompt = f"""
+        Extract the following information from the user query below and return ONLY a valid JSON object with keys: industry, region, keywords.
+        - industry: The industry mentioned or implied (e.g., fintech, healthcare, energy, general).
+        - region: The region or country explicitly mentioned (e.g., US, EU, UK, Asia, Global).
+        - keywords: The most important regulatory topics or terms, separated by commas. Do NOT include generic words or verbs.
+        User query: {message}
+        Example output:
+        {{"industry": "fintech", "region": "US", "keywords": "SEC regulations"}}
+        """
+        response = call_llm(prompt)
+        try:
+            params = json.loads(response)
+        except Exception:
+            # fallback: return empty/defaults if parsing fails
+            params = {"industry": "General", "region": "US", "keywords": ""}
         return params
     def is_regulatory_query(self, message: str) -> bool:
         intent = call_llm(intent_prompt).strip().lower()
         return not intent.startswith("n")
+    def process_regulatory_query(self, message: str, params: dict = None):
         """Process a regulatory query and return results"""
         # Determine the intended tool
         tool_key, tool_name = self.determine_intended_tool(message)
+        # Extract parameters only if not provided
+        if params is None:
+            params = self.extract_parameters(message)
         # Execute tool (crawl sites)
         crawl_results = self.web_tools.crawl_regulatory_sites(

agents/ui_handler.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import time
 import gradio as gr
@@ -81,7 +82,7 @@ class UIHandler:
         yield history, "", gr.update(interactive=False)
         # Process the regulatory query
-        results = self.agent.process_regulatory_query(message)
         crawl_results = results["crawl_results"]
         memory_results = results["memory_results"]
@@ -159,10 +160,7 @@ Found {len(memory_results)} similar past queries in memory.
             history[-1] = ChatMessage(role="assistant", content=streaming_content)
             yield history, "", gr.update(interactive=False)
-        # Save to memory
-        self.agent.memory_tools.save_to_memory("user", message, streaming_content)
-        # Show completion time
         elapsed = time.time() - start_time
         history.append(
             ChatMessage(
@@ -172,6 +170,13 @@ Found {len(memory_results)} similar past queries in memory.
         # Re-enable input box at the end
         yield history, "", gr.update(interactive=True)
     def delayed_clear(self):
         time.sleep(0.1)  # 100ms delay to allow generator cancellation
         return [], "", gr.update(interactive=True)

+import threading
 import time
 import gradio as gr
         yield history, "", gr.update(interactive=False)
         # Process the regulatory query
+        results = self.agent.process_regulatory_query(message, params)
         crawl_results = results["crawl_results"]
         memory_results = results["memory_results"]
             history[-1] = ChatMessage(role="assistant", content=streaming_content)
             yield history, "", gr.update(interactive=False)
+        # Show completion time (before saving to memory)
         elapsed = time.time() - start_time
         history.append(
             ChatMessage(
         # Re-enable input box at the end
         yield history, "", gr.update(interactive=True)
+        # Save to memory in the background
+        threading.Thread(
+            target=self.agent.memory_tools.save_to_memory,
+            args=("user", message, streaming_content),
+            daemon=True,
+        ).start()
     def delayed_clear(self):
         time.sleep(0.1)  # 100ms delay to allow generator cancellation
         return [], "", gr.update(interactive=True)

tools/llm.py CHANGED Viewed

@@ -43,29 +43,3 @@ def stream_llm(prompt: str, temperature: float = DEFAULT_LLM_TEMPERATURE):
                 yield delta
     except Exception as e:
         yield f"Error: {str(e)}"
-def call_llm_with_function(
-    user_message: str,
-    function_schema: dict,
-    temperature: float = DEFAULT_LLM_TEMPERATURE,
-) -> dict:
-    """Call the LLM with function calling and return extracted arguments as a dict."""
-    try:
-        response = client.chat.completions.create(
-            model=DEFAULT_LLM_MODEL,
-            messages=[{"role": "user", "content": user_message}],
-            functions=[function_schema],
-            function_call="auto",
-            temperature=temperature,
-        )
-        function_call = response.choices[0].message.function_call
-        if function_call and hasattr(function_call, "arguments"):
-            import json
-            return json.loads(function_call.arguments)
-        else:
-            return {}
-    except Exception as e:
-        print(f"LLM function call error: {e}")
-        return {}

                 yield delta
     except Exception as e:
         yield f"Error: {str(e)}"

tools/web_tools.py CHANGED Viewed

@@ -4,11 +4,18 @@ from typing import Dict
 from tavily import TavilyClient
 from config.settings import REGULATORY_SOURCES, TAVILY_API_KEY
 # Initialize Tavily client
 tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
 class WebTools:
     def __init__(self):
         self.cached_searches = {}
@@ -78,43 +85,23 @@ class WebTools:
         return results
     def extract_parameters(self, message: str) -> Dict:
-        """Extract industry, region, and keywords from the query using LLM function calling only"""
-        function_schema = {
-            "name": "extract_parameters",
-            "description": (
-                "Extract industry, region, and keywords from a user query.\n"
-                "- 'industry': The main industry mentioned or implied (e.g., fintech, healthcare, energy, general).\n"
-                "- 'region': The country or region explicitly mentioned (e.g., US, EU, UK, Asia).\n"
-                "- 'keywords': Only the most important regulatory topics or terms (e.g., 'data privacy', 'GDPR', 'ESG compliance', 'SEC regulations'), not generic words or verbs.\n"
-                "Examples:\n"
-                "- 'Show me the latest SEC regulations for fintech' => industry: 'fintech', region: 'US', keywords: 'SEC regulations'\n"
-                "- 'What are the new data privacy rules in the EU?' => industry: 'General', region: 'EU', keywords: 'data privacy'\n"
-                "- 'Scan for healthcare regulations in the US' => industry: 'healthcare', region: 'US', keywords: 'healthcare regulations'\n"
-                "- 'Any updates on ESG compliance for energy companies?' => industry: 'energy', region: 'US', keywords: 'ESG compliance'\n"
-            ),
-            "parameters": {
-                "type": "object",
-                "properties": {
-                    "industry": {
-                        "type": "string",
-                        "description": "The industry mentioned or implied in the query (e.g., fintech, healthcare, energy, general).",
-                    },
-                    "region": {
-                        "type": "string",
-                        "description": "The region or country explicitly mentioned in the query (e.g., US, EU, UK, Asia).",
-                    },
-                    "keywords": {
-                        "type": "string",
-                        "description": "A concise list of the most important regulatory topics or terms from the query, separated by commas. Do NOT return the full user question, generic words, or verbs.",
-                    },
-                },
-                "required": ["industry", "region", "keywords"],
-            },
-        }
-        params = call_llm_with_function(message, function_schema)
-        # Optionally, you can add a minimal fallback if params is None or missing keys
-        if not params or not all(
-            k in params for k in ("industry", "region", "keywords")
-        ):
             params = {"industry": "", "region": "", "keywords": ""}
         return params

 from tavily import TavilyClient
 from config.settings import REGULATORY_SOURCES, TAVILY_API_KEY
+from tools.llm import call_llm
 # Initialize Tavily client
 tavily_client = TavilyClient(api_key=TAVILY_API_KEY)
+class ChatMessage:
+    def __init__(self, role, content):
+        self.role = role
+        self.content = content
 class WebTools:
     def __init__(self):
         self.cached_searches = {}
         return results
     def extract_parameters(self, message: str) -> Dict:
+        """Extract industry, region, and keywords from the query using LLM (no function calling)."""
+        prompt = f"""
+        Extract the following information from the user query below and return ONLY a valid JSON object with keys: industry, region, keywords.
+        - industry: The industry mentioned or implied (e.g., fintech, healthcare, energy, general).
+        - region: The region or country explicitly mentioned (e.g., US, EU, UK, Asia, Global).
+        - keywords: The most important regulatory topics or terms, separated by commas. Do NOT include generic words or verbs.
+        User query: {message}
+        Example output:
+        {{"industry": "fintech", "region": "US", "keywords": "SEC regulations"}}
+        """
+        import json
+        response = call_llm(prompt)
+        try:
+            params = json.loads(response)
+        except Exception:
             params = {"industry": "", "region": "", "keywords": ""}
         return params