Spaces:

Yash030
/

AI-Package-Doctor

Sleeping

App Files Files Community

Yash030 commited on Nov 28, 2025

Commit

5d3c73d

1 Parent(s): f488bb6

Added Pinecone Session Storage

Browse files

Files changed (11) hide show

.gitignore +3 -0
requirements.txt +1 -1
src/agents.py +70 -66
src/app.py +4 -1
src/config.py +3 -1
src/demo issue.json +0 -0
src/evalset169426.evalset.json +0 -0
src/memory.py +34 -8
src/test1.evalset.json +0 -0
src/tools.py +169 -137
web_app.py +21 -24

.gitignore CHANGED Viewed

@@ -47,6 +47,9 @@ env/
 # Logs
 *.log
 # OS
 .DS_Store
 Thumbs.db

 # Logs
 *.log
+#json
+*.evalset.json
 # OS
 .DS_Store
 Thumbs.db

requirements.txt CHANGED Viewed

@@ -6,5 +6,5 @@ nest_asyncio
 python-dotenv
 certifi
 litellm
-pinecone-client
 sentence-transformers

 python-dotenv
 certifi
 litellm
+pinecone
 sentence-transformers

src/agents.py CHANGED Viewed

@@ -2,12 +2,20 @@
 Agent definitions for the AI-Powered Package Conflict Resolver.
 Defines Query Creator, Web Search, Web Crawl, and CodeSurgeon agents.
 """
 from google.adk import Agent
-from google.adk.agents import SequentialAgent, LoopAgent, BaseAgent, ParallelAgent
-from google.adk.events import Event, EventActions
 from google.adk.tools import google_search, load_memory
 from .config import get_model, get_gemini_model
-from .tools import batch_tool, adaptive_tool, save_context_tool, retrieve_context_tool, submit_queries_tool, validate_tool
 from .utils import logger
@@ -19,12 +27,12 @@ def create_query_creator_agent():
     agent = Agent(
         name="Query_Creator_Agent",
         model=get_gemini_model(),
-        tools=[google_search, save_context_tool, load_memory], # Added load_memory
         description="Dependency Detective specialized in diagnosing Python environment conflicts",
         instruction="""
         You are the "Dependency Detective," an expert AI agent specialized in diagnosing Python environment conflicts, legacy code rot, and version mismatch errors.
         Use Google Search Tool if You don't Know about those issue or packages.
-        Use `load_memory` to recall details from previous conversations if the user refers to "last time" or "previous error".
         YOUR GOAL:
         1. Analyze the input to identify the specific packages involved (e.g., "tensorflow", "numpy").
@@ -114,6 +122,35 @@ def create_community_search_agent():
     logger.info("✅ Community Search agent created")
     return agent
 class WebCrawlAgent(Agent):
     """
@@ -147,16 +184,22 @@ class WebCrawlAgent(Agent):
         batch_result = await batch_crawl_tool.func(urls)
         # 2. Analyze Result (Simple Heuristic)
         # If result contains many "Error" or is very short, we might need adaptive
-        if "Error" not in batch_result and len(batch_result) > 500:
-             return f"**Model: Custom Logic**\n## Crawled Content Analysis\n\n{batch_result}"
         # 3. Fallback to Adaptive (if batch failed significantly)
         logger.info("⚠️ Batch crawl had issues. Falling back to Adaptive Crawl for first URL...")
         # For simplicity in this custom agent, we just try the first URL adaptively as a fallback
         adaptive_result = await adaptive_tool.func(urls[0], query="dependency conflicts version requirements")
-        return f"**Model: Custom Logic (Adaptive Fallback)**\n## Crawled Content Analysis\n\n{adaptive_result}"
 def create_web_crawl_agent():
     """
@@ -202,64 +245,29 @@ def create_code_surgeon_agent():
         - Clear explanation of the issue
         - Updated requirements.txt content
         - Migration notes (if breaking changes exist)
-        """
-    )
-    logger.info("✅ Code Surgeon agent created")
-    return agent
-def create_verification_agent():
-    """
-    Creates the Verification agent that checks the Code Surgeon's work.
-    """
-    agent = Agent(
-        name="Verification_Agent",
-        model=get_model(),
-        tools=[validate_tool, save_context_tool],
-        description="Quality Assurance specialist for dependency files",
-        instruction="""
-        You are the "Quality Assurance Specialist".
-        YOUR TASK:
-        1. Review the 'requirements.txt' content generated by the Code Surgeon.
-        2. Use the `validate_requirements` tool to check for syntax errors.
-        3. If the tool returns "SUCCESS":
-           - Call `save_context('verification_status', 'SUCCESS')`.
-           - Respond with "Verification Passed".
-        4. If the tool returns errors:
-           - Call `save_context('verification_status', 'FAILED')`.
-           - Explain the errors to the Code Surgeon so they can fix it.
         """
     )
-    logger.info("✅ Verification agent created")
     return agent
-class StopCheckerAgent(BaseAgent):
-    """
-    Agent that checks the verification status and stops the loop if successful.
-    """
-    async def _run_async_impl(self, ctx):
-        # Retrieve status from session state
-        status = ctx.session.state.get("verification_status", "FAILED")
-        logger.info(f"🛑 StopChecker: Status is {status}")
-        should_stop = (status == "SUCCESS")
-        if should_stop:
-            logger.info("🛑 StopChecker: Escalating to stop loop.")
-        # Yield an event with escalate=True if we should stop
-        yield Event(author=self.name, actions=EventActions(escalate=should_stop))
 # ===== MEMORY CALLBACK =====
 async def auto_save_to_memory(callback_context):
     """Automatically save session to memory after each agent turn."""
     try:
-        await callback_context._invocation_context.memory_service.add_session_to_memory(
             callback_context._invocation_context.session
         )
-        logger.info("💾 Session automatically saved to memory.")
     except Exception as e:
         logger.error(f"❌ Failed to auto-save session: {e}")
@@ -274,12 +282,13 @@ def create_root_agent():
     docs_search = create_docs_search_agent()
     community_search = create_community_search_agent()
     # Parallel Research
     parallel_search = ParallelAgent(
         name="Parallel_Search_Team",
-        sub_agents=[docs_search, community_search],
-        description="Parallel search for official and community resources"
     )
     # Group Research Team
@@ -292,22 +301,13 @@ def create_root_agent():
     web_crawl = create_web_crawl_agent()
     web_crawl = create_web_crawl_agent()
-    # Code Surgeon Loop
     code_surgeon = create_code_surgeon_agent()
-    verification = create_verification_agent()
-    stop_checker = StopCheckerAgent(name="Stop_Checker")
-    code_surgeon_team = LoopAgent(
-        name="Code_Surgeon_Team",
-        sub_agents=[code_surgeon, verification, stop_checker],
-        max_iterations=3,
-        description="Self-correcting dependency resolution team"
-    )
     # Create the sequential agent
     agent = SequentialAgent(
         name="Package_Conflict_Resolver_Root_Agent",
-        sub_agents=[web_research_team, web_crawl, code_surgeon_team],
         description="Root agent managing the dependency resolution pipeline",
         after_agent_callback=auto_save_to_memory # Auto-save history
     )
@@ -317,3 +317,7 @@ def create_root_agent():
 # ===== MODULE-LEVEL INITIALIZATION FOR ADK WEB =====
 root_agent = create_root_agent()

 Agent definitions for the AI-Powered Package Conflict Resolver.
 Defines Query Creator, Web Search, Web Crawl, and CodeSurgeon agents.
 """
+import sys
+import asyncio
+import json
+# Fix for Playwright on Windows (NotImplementedError in subprocess)
+if sys.platform == 'win32':
+    asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
 from google.adk import Agent
+from google.adk.agents import SequentialAgent, ParallelAgent
+# from google.adk.events import Event, EventActions # Unused after removing loop
 from google.adk.tools import google_search, load_memory
 from .config import get_model, get_gemini_model
+from .tools import batch_tool, adaptive_tool, save_context_tool, retrieve_context_tool, submit_queries_tool, validate_tool, retrieve_memory_tool
 from .utils import logger
     agent = Agent(
         name="Query_Creator_Agent",
         model=get_gemini_model(),
+        tools=[google_search, retrieve_memory_tool], # Added retrieve_memory_tool
         description="Dependency Detective specialized in diagnosing Python environment conflicts",
         instruction="""
         You are the "Dependency Detective," an expert AI agent specialized in diagnosing Python environment conflicts, legacy code rot, and version mismatch errors.
         Use Google Search Tool if You don't Know about those issue or packages.
+        Use `retrieve_memory` to recall details from previous conversations if the user refers to "last time" or "previous error".
         YOUR GOAL:
         1. Analyze the input to identify the specific packages involved (e.g., "tensorflow", "numpy").
     logger.info("✅ Community Search agent created")
     return agent
+def create_context_search_agent():
+    """
+    Creates the Context Search agent (General Context).
+    """
+    agent = Agent(
+        name="Context_Search_Agent",
+        model=get_gemini_model(),
+        tools=[google_search],
+        description="Search agent focused on general context and main URL",
+        instruction="""
+        You are the "Context Researcher".
+        YOUR GOAL:
+        1. Analyze the input search queries to identify the "Main Topic" or "Core Library/Framework" (e.g., if input is "numpy float error", main topic is "numpy").
+        2. Search for the Home Page, Main Documentation Hub, or Wikipedia page for this Main Topic.
+        3. Provide the top 3-4 most authoritative URLs for this topic.
+        INPUT: List of search queries.
+        OUTPUT: Top 3-4 most relevant URLs.
+        OUTPUT FORMAT:
+        **Model: Gemini 2.5 Pro**
+        ## Context Results
+        {"top_urls": ["url1", "url2", "url3"]}
+        """
+    )
+    logger.info("✅ Context Search agent created")
+    return agent
 class WebCrawlAgent(Agent):
     """
         batch_result = await batch_crawl_tool.func(urls)
         # 2. Analyze Result (Simple Heuristic)
+        # Check if we got valid content
+        content = batch_result.get("combined_content", "")
         # If result contains many "Error" or is very short, we might need adaptive
+        if "Error" not in content and len(content) > 500:
+             return f"**Model: Custom Logic**\n## Crawled Content Analysis\n\n{content}"
         # 3. Fallback to Adaptive (if batch failed significantly)
         logger.info("⚠️ Batch crawl had issues. Falling back to Adaptive Crawl for first URL...")
         # For simplicity in this custom agent, we just try the first URL adaptively as a fallback
         adaptive_result = await adaptive_tool.func(urls[0], query="dependency conflicts version requirements")
+        # Format adaptive result (it's a dict)
+        formatted_adaptive = json.dumps(adaptive_result, indent=2) if isinstance(adaptive_result, dict) else str(adaptive_result)
+        return f"**Model: Custom Logic (Adaptive Fallback)**\n## Crawled Content Analysis\n\n{formatted_adaptive}"
 def create_web_crawl_agent():
     """
         - Clear explanation of the issue
         - Updated requirements.txt content
         - Migration notes (if breaking changes exist)
+        IMPORTANT:
+        - Call `save_context('solution', 'YOUR_SOLUTION_SUMMARY')` to store the final resolution.
+        - Call `save_context('requirements', 'YOUR_REQUIREMENTS_CONTENT')` to store the file content.
         """
     )
+    logger.info("✅ Code Surgeon agent created")
     return agent
+# ===== MEMORY SERVICE =====
+from .config import get_memory_service
+global_memory_service = get_memory_service()
 # ===== MEMORY CALLBACK =====
 async def auto_save_to_memory(callback_context):
     """Automatically save session to memory after each agent turn."""
     try:
+        # Use global memory service instead of context-bound one
+        await global_memory_service.add_session_to_memory(
             callback_context._invocation_context.session
         )
+        logger.info("💾 Session automatically saved to memory (Global Service).")
     except Exception as e:
         logger.error(f"❌ Failed to auto-save session: {e}")
     docs_search = create_docs_search_agent()
     community_search = create_community_search_agent()
+    context_search = create_context_search_agent()
     # Parallel Research
     parallel_search = ParallelAgent(
         name="Parallel_Search_Team",
+        sub_agents=[docs_search, community_search, context_search],
+        description="Parallel search for official, community, and general context resources"
     )
     # Group Research Team
     web_crawl = create_web_crawl_agent()
     web_crawl = create_web_crawl_agent()
+    # Code Surgeon (No Loop)
     code_surgeon = create_code_surgeon_agent()
     # Create the sequential agent
     agent = SequentialAgent(
         name="Package_Conflict_Resolver_Root_Agent",
+        sub_agents=[web_research_team, web_crawl, code_surgeon],
         description="Root agent managing the dependency resolution pipeline",
         after_agent_callback=auto_save_to_memory # Auto-save history
     )
 # ===== MODULE-LEVEL INITIALIZATION FOR ADK WEB =====
 root_agent = create_root_agent()
+# Removed App definition to avoid ImportError.
+# Memory is handled via global_memory_service in callback.
+agent = root_agent

src/app.py CHANGED Viewed

@@ -6,11 +6,14 @@ from google.adk import App
 from google.adk.types import EventsCompactionConfig
 from .agents import root_agent
 from .utils import logger
-# Define the App with Events Compaction
 package_conflict_resolver_app = App(
     name="Package_Conflict_Resolver_App",
     root_agent=root_agent,
     events_compaction_config=EventsCompactionConfig(
         compaction_interval=3,  # Trigger compaction every 3 invocations
         overlap_size=1,         # Keep 1 previous turn for context

 from google.adk.types import EventsCompactionConfig
 from .agents import root_agent
 from .utils import logger
+from .config import get_memory_service, get_session_service
+# Define the App with Events Compaction and Custom Services
 package_conflict_resolver_app = App(
     name="Package_Conflict_Resolver_App",
     root_agent=root_agent,
+    memory_service=get_memory_service(),
+    session_service=get_session_service(),
     events_compaction_config=EventsCompactionConfig(
         compaction_interval=3,  # Trigger compaction every 3 invocations
         overlap_size=1,         # Keep 1 previous turn for context

src/config.py CHANGED Viewed

@@ -57,7 +57,8 @@ def get_session_service(db_url=None):
     """
     # Prioritize argument, then env var, then local default
     if not db_url:
-        db_url = os.getenv("DATABASE_URL", "sqlite+aiosqlite:///package_conflict_resolver.db")
     session_service = DatabaseSessionService(db_url=db_url)
     logger.info(f"✅ Session service initialized: {db_url.split('://')[0]}://...") # Log safe URL
@@ -74,6 +75,7 @@ def get_memory_service():
     Uses Pinecone if PINECONE_API_KEY is set, otherwise InMemory.
     """
     pinecone_key = os.getenv("PINECONE_API_KEY")
     if pinecone_key:
         try:

     """
     # Prioritize argument, then env var, then local default
     if not db_url:
+        # Use legacy_solver.db as it contains the existing sessions
+        db_url = os.getenv("DATABASE_URL", "sqlite+aiosqlite:///legacy_solver.db")
     session_service = DatabaseSessionService(db_url=db_url)
     logger.info(f"✅ Session service initialized: {db_url.split('://')[0]}://...") # Log safe URL
     Uses Pinecone if PINECONE_API_KEY is set, otherwise InMemory.
     """
     pinecone_key = os.getenv("PINECONE_API_KEY")
+    logger.info(f"🔍 Checking PINECONE_API_KEY: {'Found' if pinecone_key else 'Missing'}")
     if pinecone_key:
         try:

src/demo issue.json ADDED Viewed

The diff for this file is too large to render. See raw diff

src/evalset169426.evalset.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

src/memory.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import os
 import uuid
 from typing import List, Dict, Any
-from google.adk.memory import MemoryService
 from pinecone import Pinecone, ServerlessSpec
 from sentence_transformers import SentenceTransformer
 from .utils import logger
-class PineconeMemoryService(MemoryService):
     """
     Custom Memory Service using Pinecone for long-term vector storage.
     Uses 'all-MiniLM-L6-v2' for local embedding generation.
@@ -32,8 +33,10 @@ class PineconeMemoryService(MemoryService):
         self.index = self.pc.Index(self.index_name)
         # Initialize Embedding Model
-        logger.info("🧠 Loading embedding model: all-MiniLM-L6-v2...")
         self.model = SentenceTransformer('all-MiniLM-L6-v2')
         logger.info("✅ Pinecone Memory Service initialized")
     async def add_session_to_memory(self, session: Any):
@@ -41,14 +44,37 @@ class PineconeMemoryService(MemoryService):
         Embeds the session history and saves it to Pinecone.
         """
         try:
             # 1. Convert session to text
             # Assuming session has a 'history' or we can iterate turns
             # We'll construct a simplified text representation
             text_content = ""
-            for turn in session.turns:
-                text_content += f"{turn.role}: {turn.content}\n"
             if not text_content.strip():
                 return
             # 2. Generate Embedding
@@ -56,15 +82,15 @@ class PineconeMemoryService(MemoryService):
             # 3. Create Metadata
             metadata = {
-                "session_id": session.session_id,
                 "text": text_content[:1000], # Store snippet (limit size)
                 "timestamp": str(session.created_at) if hasattr(session, 'created_at') else ""
             }
             # 4. Upsert to Pinecone
             # Use session_id as vector ID
-            self.index.upsert(vectors=[(session.session_id, vector, metadata)])
-            logger.info(f"💾 Saved session {session.session_id} to Pinecone")
         except Exception as e:
             logger.error(f"❌ Failed to save to Pinecone: {e}")

 import os
 import uuid
 from typing import List, Dict, Any
+from typing import List, Dict, Any
+# from google.adk.memory import MemoryService # Not available in this version
 from pinecone import Pinecone, ServerlessSpec
 from sentence_transformers import SentenceTransformer
 from .utils import logger
+class PineconeMemoryService: # Removed inheritance to avoid ImportError
     """
     Custom Memory Service using Pinecone for long-term vector storage.
     Uses 'all-MiniLM-L6-v2' for local embedding generation.
         self.index = self.pc.Index(self.index_name)
         # Initialize Embedding Model
+        logger.info("🧠 Loading embedding model: all-MiniLM-L6-v2... (This may take a while if downloading)")
+        print("DEBUG: Starting SentenceTransformer load...")
         self.model = SentenceTransformer('all-MiniLM-L6-v2')
+        print("DEBUG: SentenceTransformer loaded.")
         logger.info("✅ Pinecone Memory Service initialized")
     async def add_session_to_memory(self, session: Any):
         Embeds the session history and saves it to Pinecone.
         """
         try:
+            # Get session ID safely (ADK sessions usually use .id)
+            session_id = getattr(session, 'id', getattr(session, 'session_id', 'UNKNOWN'))
+            logger.info(f"💾 Attempting to save session to Pinecone. Session ID: {session_id}")
+            # Debug session structure
+            # logger.info(f"Session dir: {dir(session)}")
             # 1. Convert session to text
             # Assuming session has a 'history' or we can iterate turns
             # We'll construct a simplified text representation
             text_content = ""
+            # Check for 'turns' or 'events'
+            if hasattr(session, 'turns'):
+                turns = session.turns
+                logger.info(f"Found {len(turns)} turns.")
+                for turn in turns:
+                    text_content += f"{turn.role}: {turn.content}\n"
+            elif hasattr(session, 'events'):
+                events = session.events
+                logger.info(f"Found {len(events)} events.")
+                for event in events:
+                    # Event structure might vary
+                    author = getattr(event, 'author', 'unknown')
+                    content = getattr(event, 'content', getattr(event, 'text', ''))
+                    text_content += f"{author}: {content}\n"
+            else:
+                logger.warning("⚠️ Session has no 'turns' or 'events' attribute.")
             if not text_content.strip():
+                logger.warning("⚠️ Session content is empty. Skipping Pinecone save.")
                 return
             # 2. Generate Embedding
             # 3. Create Metadata
             metadata = {
+                "session_id": session_id,
                 "text": text_content[:1000], # Store snippet (limit size)
                 "timestamp": str(session.created_at) if hasattr(session, 'created_at') else ""
             }
             # 4. Upsert to Pinecone
             # Use session_id as vector ID
+            self.index.upsert(vectors=[(session_id, vector, metadata)])
+            logger.info(f"💾 Saved session {session_id} to Pinecone")
         except Exception as e:
             logger.error(f"❌ Failed to save to Pinecone: {e}")

src/test1.evalset.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

src/tools.py CHANGED Viewed

@@ -2,161 +2,189 @@
 Tool definitions for the Legacy Dependency Solver.
 Includes Crawl4AI batch crawler for efficient multi-URL processing.
 """
-from typing import List
-from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, AdaptiveConfig
 from google.adk.tools import FunctionTool
 from .utils import logger
-async def batch_crawl_tool(urls: List[str]) -> str:
     """
-    Batch crawls multiple URLs to extract technical documentation.
-    Args:
-        urls: List of URLs to crawl
-    Returns:
-        Combined markdown content from all URLs
     """
-    logger.info(f"🕷️  Batch crawling {len(urls)} URLs...")
-    # Configure browser with headless mode and disable SSL verification
-    browser_config = BrowserConfig(
-        headless=True,
-        verbose=True
-    )
-    # Configure crawler to bypass cache
-    run_config = CrawlerRunConfig(
-        cache_mode=CacheMode.BYPASS,
-        word_count_threshold=10,
-    )
-    results = []
-    async with AsyncWebCrawler(config=browser_config) as crawler:
-        for url in urls:
-            logger.info(f"  📄 Crawling: {url}")
-            try:
-                result = await crawler.arun(url=url, config=run_config)
-                if result.success:
-                    results.append(f"# Content from {url}\n\n{result.markdown}\n\n")
-                    logger.info(f"  ✅ Success: {url}")
-                else:
-                    error_msg = f"Error crawling {url}: {result.error_message}"
-                    results.append(f"# {error_msg}\n\n")
-                    logger.warning(f"  ❌ Failed: {url} - {result.error_message}")
-            except Exception as e:
-                error_msg = f"Exception crawling {url}: {str(e)}"
-                results.append(f"# {error_msg}\n\n")
-                logger.error(f"  ⚠️  Exception: {url} - {e}")
-    combined = "\n".join(results)
-    logger.info(f"✅ Batch crawl completed: {len(results)} results")
-    return combined
-# ===== ADAPTIVE CRAWLING (COMMENTED OUT - NOT CURRENTLY USED) =====
-# Keeping this code for potential future use
-#
-#
-async def adaptive_crawl_tool(url: str, query: str) -> str:
     """
-    Adaptive crawl for single URLs when batch crawl needs deeper analysis.
-    Args:
-        url: The URL to crawl
-        query: The specific query/topic to look for
-    Returns:
-        Extracted markdown content or error message
-    """
-    logger.info(f"🔍 Adaptive crawling: {url} for '{query}'")
-    browser_config = BrowserConfig(
-        headless=True,
-        verbose=True,
-        ignore_https_errors=True,
-        extra_args=["--ignore-certificate-errors", "--ignore-ssl-errors"]
-    )
-    # Adaptive config for discovery
-    adaptive_config = AdaptiveConfig(
-        max_pages=3,
-        confidence_threshold=0.7,
-        top_k_links=2,
-    )
-    async with AsyncWebCrawler(config=browser_config) as crawler:
-        # We need to use the adaptive crawler wrapper or logic if available in this version of crawl4ai
-        # Based on reference code, it uses AdaptiveCrawler
-        from crawl4ai import AdaptiveCrawler
-        adaptive = AdaptiveCrawler(crawler, config=adaptive_config)
-        try:
-            # Discovery
-            await adaptive.digest(start_url=url, query=query)
             top_content = adaptive.get_relevant_content(top_k=1)
             if not top_content:
-                return "No relevant content found via adaptive crawling."
             best_url = top_content[0]['url']
-            logger.info(f"  ✅ Best source found: {best_url}")
-            # Extraction (simplified to just return markdown for now to avoid complex LLM config in tool)
-            # The reference uses LLMExtractionStrategy, but we can rely on the Agent to parse the markdown.
-            # We'll just crawl the best URL found.
-            run_config = CrawlerRunConfig(
                 cache_mode=CacheMode.BYPASS,
-                word_count_threshold=10,
             )
-            result = await crawler.arun(url=best_url, config=run_config)
-            if result.success:
-                return f"# Adaptive Result from {best_url}\n\n{result.markdown}"
-            else:
-                return f"Error crawling best url {best_url}: {result.error_message}"
-        except Exception as e:
-            logger.error(f"  ❌ Adaptive crawl failed: {e}")
-            return f"Adaptive crawl failed: {str(e)}"
-adaptive_tool = FunctionTool(adaptive_crawl_tool)
-# Wrap tool for ADK Agent usage
 batch_tool = FunctionTool(batch_crawl_tool)
 # ===== STATE MANAGEMENT TOOLS =====
-from typing import Dict, Any
 from google.adk.tools import ToolContext
 def save_context(tool_context: ToolContext, key: str, value: str) -> str:
-    """
-    Saves a key-value pair to the session state.
-    Useful for remembering packages, versions, or decisions across agents.
-    Args:
-        key: The key to store (e.g., 'packages', 'versions')
-        value: The value to store
-    """
     tool_context.state[key] = value
     logger.info(f"💾 State Saved: {key} = {value}")
     return f"Saved {key} to state."
 def retrieve_context(tool_context: ToolContext, key: str) -> str:
-    """
-    Retrieves a value from the session state.
-    Args:
-        key: The key to retrieve
-    """
     value = tool_context.state.get(key, "Not found")
     logger.info(f"📂 State Retrieved: {key} = {value}")
     return str(value)
@@ -165,12 +193,6 @@ save_context_tool = FunctionTool(save_context)
 retrieve_context_tool = FunctionTool(retrieve_context)
 def submit_queries(tool_context: ToolContext, queries: List[str]) -> str:
-    """
-    Submits the generated search queries to the shared session state.
-    Args:
-        queries: The list of search queries to submit.
-    """
     tool_context.state['search_queries'] = queries
     logger.info(f"🚀 Queries Submitted: {queries}")
     return "Queries submitted successfully."
@@ -178,35 +200,45 @@ def submit_queries(tool_context: ToolContext, queries: List[str]) -> str:
 submit_queries_tool = FunctionTool(submit_queries)
 def validate_requirements(tool_context: ToolContext, requirements_content: str) -> str:
-    """
-    Validates the generated requirements.txt content.
-    Checks for basic syntax and conflicting versions (mocked logic).
-    Args:
-        requirements_content: The content of the requirements.txt file.
-    """
     if not requirements_content:
         return "Error: Empty requirements content."
     lines = requirements_content.strip().split('\n')
     errors = []
     for line in lines:
         line = line.strip()
         if not line or line.startswith('#'):
             continue
-        # Basic syntax check (package==version)
         import re
         if not re.match(r'^[a-zA-Z0-9_\-]+[=<>!~]+[0-9a-zA-Z\.]+', line):
-             # Allow simple package names too, but warn
              if not re.match(r'^[a-zA-Z0-9_\-]+$', line):
                  errors.append(f"Invalid syntax: {line}")
     if errors:
         return f"Validation Failed: {'; '.join(errors)}"
     logger.info("✅ Requirements validation passed.")
     return "SUCCESS"
 validate_tool = FunctionTool(validate_requirements)

 Tool definitions for the Legacy Dependency Solver.
 Includes Crawl4AI batch crawler for efficient multi-URL processing.
 """
+from typing import List, Dict, Any
+import json
+import sys
+import asyncio
+import concurrent.futures
+from pydantic import BaseModel, Field
 from google.adk.tools import FunctionTool
 from .utils import logger
+from .config import get_memory_service # Import memory service factory
+# --- 1. Define Schema (Module level for pickling) ---
+class SearchResult(BaseModel):
+    relevant_facts: List[str] = Field(..., description="Specific facts/numbers found.")
+    summary: str = Field(..., description="Concise summary related to the query.")
+    confidence: str = Field(..., description="Confidence level (High/Medium/Low).")
+# --- 2. Worker Functions (Run in Subprocess) ---
+def _run_batch_crawl_worker(urls: List[str]) -> Dict[str, Any]:
     """
+    Worker function to run batch crawl in a separate process.
+    """
+    # Enforce ProactorEventLoop on Windows for Playwright
+    if sys.platform == 'win32':
+        asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
+    async def _async_logic():
+        from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
+        # Shared Config
+        browser_config = BrowserConfig(
+            headless=True,
+            ignore_https_errors=True,
+            extra_args=["--ignore-certificate-errors", "--ignore-ssl-errors"]
+        )
+        run_config = CrawlerRunConfig(
+            cache_mode=CacheMode.BYPASS,
+            word_count_threshold=10,
+        )
+        results = []
+        # limit to top 3
+        target_urls = urls[:3]
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            for url in target_urls:
+                try:
+                    crawl_result = await crawler.arun(url=url, config=run_config)
+                    if crawl_result.success:
+                        results.append(f"--- SOURCE: {url} ---\n{crawl_result.markdown[:15000]}\n")
+                    else:
+                        results.append(f"--- SOURCE: {url} ---\n[Error: Failed to crawl]\n")
+                except Exception as e:
+                    results.append(f"--- SOURCE: {url} ---\n[Exception: {str(e)}]\n")
+        return {
+            "combined_content": "\n".join(results),
+            "status": "completed"
+        }
+    return asyncio.run(_async_logic())
+def _run_adaptive_crawl_worker(start_url: str, user_query: str) -> Dict[str, Any]:
     """
+    Worker function to run adaptive crawl in a separate process.
     """
+    if sys.platform == 'win32':
+        asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
+    async def _async_logic():
+        from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, AdaptiveConfig, LLMConfig
+        from crawl4ai.extraction_strategy import LLMExtractionStrategy
+        browser_config = BrowserConfig(
+            headless=True,
+            verbose=True,
+            ignore_https_errors=True,
+            extra_args=["--ignore-certificate-errors", "--ignore-ssl-errors"]
+        )
+        async with AsyncWebCrawler(config=browser_config) as crawler:
+            # Phase 1: Discovery
+            adaptive_config = AdaptiveConfig(
+                max_pages=3,
+                confidence_threshold=0.7,
+                top_k_links=2,
+            )
+            # Import inside function to avoid top-level import issues in subprocess if needed
+            from crawl4ai import AdaptiveCrawler
+            adaptive = AdaptiveCrawler(crawler, config=adaptive_config)
+            try:
+                await adaptive.digest(start_url=start_url, query=user_query)
+            except Exception as e:
+                return {"error": f"Crawl failed during discovery: {str(e)}"}
             top_content = adaptive.get_relevant_content(top_k=1)
             if not top_content:
+                return {"error": "No relevant content found via adaptive crawling."}
             best_url = top_content[0]['url']
+            # Phase 2: Extraction
+            dynamic_instruction = f"""
+            Extract ONLY information matching this request: '{user_query}'.
+            If not found, state that in the summary. Do not hallucinate.
+            """
+            extraction_config = CrawlerRunConfig(
                 cache_mode=CacheMode.BYPASS,
+                word_count_threshold=1,
+                page_timeout=60000,
+                extraction_strategy=LLMExtractionStrategy(
+                    llm_config=LLMConfig(provider="ollama/qwen2.5:7b", api_token="ollama"),
+                    schema=SearchResult.model_json_schema(),
+                    extraction_type="schema",
+                    instruction=dynamic_instruction,
+                ),
             )
+            try:
+                result = await crawler.arun(url=best_url, config=extraction_config)
+                if result.extracted_content:
+                    return json.loads(result.extracted_content)
+                return {"error": "Extraction returned empty content."}
+            except json.JSONDecodeError:
+                return {"raw_output": result.extracted_content}
+            except Exception as e:
+                return {"error": f"Extraction failed: {str(e)}"}
+    return asyncio.run(_async_logic())
+# --- 3. Main Tools (Async Wrappers) ---
+async def batch_crawl_tool(urls: List[str]) -> Dict[str, Any]:
+    """
+    Crawls a LIST of URLs in one go using a subprocess to ensure correct event loop.
+    """
+    logger.info(f"🚀 Batch Tool Triggered: Processing {len(urls)} URLs...")
+    loop = asyncio.get_running_loop()
+    with concurrent.futures.ProcessPoolExecutor() as pool:
+        try:
+            result = await loop.run_in_executor(pool, _run_batch_crawl_worker, urls)
+            return result
+        except Exception as e:
+            logger.error(f"❌ Batch crawl subprocess failed: {e}")
+            return {"combined_content": f"Error: {str(e)}", "status": "failed"}
+async def adaptive_crawl_tool(start_url: str, user_query: str) -> Dict[str, Any]:
+    """
+    Performs adaptive crawl using a subprocess.
+    """
+    logger.info(f"🛠️ Tool Triggered: Adaptive Crawl on {start_url}")
+    loop = asyncio.get_running_loop()
+    with concurrent.futures.ProcessPoolExecutor() as pool:
+        try:
+            result = await loop.run_in_executor(pool, _run_adaptive_crawl_worker, start_url, user_query)
+            return result
+        except Exception as e:
+            logger.error(f"❌ Adaptive crawl subprocess failed: {e}")
+            return {"error": f"Subprocess failed: {str(e)}"}
+# Convert to ADK Tools
 batch_tool = FunctionTool(batch_crawl_tool)
+adaptive_tool = FunctionTool(adaptive_crawl_tool)
 # ===== STATE MANAGEMENT TOOLS =====
 from google.adk.tools import ToolContext
 def save_context(tool_context: ToolContext, key: str, value: str) -> str:
     tool_context.state[key] = value
     logger.info(f"💾 State Saved: {key} = {value}")
     return f"Saved {key} to state."
 def retrieve_context(tool_context: ToolContext, key: str) -> str:
     value = tool_context.state.get(key, "Not found")
     logger.info(f"📂 State Retrieved: {key} = {value}")
     return str(value)
 retrieve_context_tool = FunctionTool(retrieve_context)
 def submit_queries(tool_context: ToolContext, queries: List[str]) -> str:
     tool_context.state['search_queries'] = queries
     logger.info(f"🚀 Queries Submitted: {queries}")
     return "Queries submitted successfully."
 submit_queries_tool = FunctionTool(submit_queries)
 def validate_requirements(tool_context: ToolContext, requirements_content: str) -> str:
     if not requirements_content:
         return "Error: Empty requirements content."
     lines = requirements_content.strip().split('\n')
     errors = []
     for line in lines:
         line = line.strip()
         if not line or line.startswith('#'):
             continue
         import re
         if not re.match(r'^[a-zA-Z0-9_\-]+[=<>!~]+[0-9a-zA-Z\.]+', line):
              if not re.match(r'^[a-zA-Z0-9_\-]+$', line):
                  errors.append(f"Invalid syntax: {line}")
     if errors:
         return f"Validation Failed: {'; '.join(errors)}"
     logger.info("✅ Requirements validation passed.")
     return "SUCCESS"
 validate_tool = FunctionTool(validate_requirements)
+# ===== MEMORY RETRIEVAL TOOL =====
+async def retrieve_memory(query: str) -> str:
+    """
+    Searches long-term memory (Pinecone) for relevant past sessions.
+    Use this to recall details from previous conversations.
+    """
+    logger.info(f"🧠 Searching Memory for: {query}")
+    try:
+        # Initialize service on demand (or use singleton if configured)
+        memory_service = get_memory_service()
+        results = await memory_service.search_memory(query)
+        if not results:
+            return "No relevant memories found."
+        formatted_results = "\n---\n".join(results)
+        return f"Found relevant memories:\n{formatted_results}"
+    except Exception as e:
+        logger.error(f"❌ Memory retrieval failed: {e}")
+        return f"Error retrieving memory: {str(e)}"
+retrieve_memory_tool = FunctionTool(retrieve_memory)

web_app.py CHANGED Viewed

@@ -1,32 +1,29 @@
 """
-Web Interface Entry Point for ADK Web UI.
-Run with: adk web web_app.py --no-reload
 """
 import nest_asyncio
 from google.adk import Runner
-from src.config import get_session_service
-from src.agents import create_root_agent
-from src.utils import logger
-# Apply nest_asyncio to handle event loop conflicts in the web server
 nest_asyncio.apply()
-logger.info("🌐 Initializing Web Interface...")
-# Initialize Session Service
-# We use the same SQLite database as the CLI
-session_service = get_session_service()
-# Initialize the Root Agent
-# This is the agent that will process the web queries
-agent = create_root_agent()
-# Initialize Runner
-# The ADK Web UI looks for a 'runner' or 'agent' instance
-runner = Runner(
-    agent=agent,
-    app_name="package_conflict_resolver_web",
-    session_service=session_service
-)
-logger.info("✅ Web Interface Ready. Run 'adk web web_app.py --no-reload' to start.")

 """
+Inspection script for Runner source.
 """
 import nest_asyncio
 from google.adk import Runner
+import inspect
 nest_asyncio.apply()
+print("Source of Runner.__init__:")
+try:
+    print(inspect.getsource(Runner.__init__))
+except Exception as e:
+    print(f"Error getting source: {e}")
+print("\nSource of Runner properties:")
+# Check if app is a property or attribute
+if hasattr(Runner, 'app'):
+    attr = getattr(Runner, 'app')
+    if isinstance(attr, property):
+        print("Found 'app' property.")
+        try:
+            print(inspect.getsource(attr.fget))
+        except:
+            print("Could not get source of fget")
+    else:
+        print(f"'app' is {type(attr)}")
+else:
+    print("'app' not found in Runner class dict")