Spaces:
Sleeping
Sleeping
Added Pinecone Session Storage
Browse files- .gitignore +3 -0
- requirements.txt +1 -1
- src/agents.py +70 -66
- src/app.py +4 -1
- src/config.py +3 -1
- src/demo issue.json +0 -0
- src/evalset169426.evalset.json +0 -0
- src/memory.py +34 -8
- src/test1.evalset.json +0 -0
- src/tools.py +169 -137
- web_app.py +21 -24
.gitignore
CHANGED
|
@@ -47,6 +47,9 @@ env/
|
|
| 47 |
# Logs
|
| 48 |
*.log
|
| 49 |
|
|
|
|
|
|
|
|
|
|
| 50 |
# OS
|
| 51 |
.DS_Store
|
| 52 |
Thumbs.db
|
|
|
|
| 47 |
# Logs
|
| 48 |
*.log
|
| 49 |
|
| 50 |
+
#json
|
| 51 |
+
*.evalset.json
|
| 52 |
+
|
| 53 |
# OS
|
| 54 |
.DS_Store
|
| 55 |
Thumbs.db
|
requirements.txt
CHANGED
|
@@ -6,5 +6,5 @@ nest_asyncio
|
|
| 6 |
python-dotenv
|
| 7 |
certifi
|
| 8 |
litellm
|
| 9 |
-
pinecone
|
| 10 |
sentence-transformers
|
|
|
|
| 6 |
python-dotenv
|
| 7 |
certifi
|
| 8 |
litellm
|
| 9 |
+
pinecone
|
| 10 |
sentence-transformers
|
src/agents.py
CHANGED
|
@@ -2,12 +2,20 @@
|
|
| 2 |
Agent definitions for the AI-Powered Package Conflict Resolver.
|
| 3 |
Defines Query Creator, Web Search, Web Crawl, and CodeSurgeon agents.
|
| 4 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
from google.adk import Agent
|
| 6 |
-
from google.adk.agents import SequentialAgent,
|
| 7 |
-
from google.adk.events import Event, EventActions
|
| 8 |
from google.adk.tools import google_search, load_memory
|
| 9 |
from .config import get_model, get_gemini_model
|
| 10 |
-
from .tools import batch_tool, adaptive_tool, save_context_tool, retrieve_context_tool, submit_queries_tool, validate_tool
|
| 11 |
from .utils import logger
|
| 12 |
|
| 13 |
|
|
@@ -19,12 +27,12 @@ def create_query_creator_agent():
|
|
| 19 |
agent = Agent(
|
| 20 |
name="Query_Creator_Agent",
|
| 21 |
model=get_gemini_model(),
|
| 22 |
-
tools=[google_search,
|
| 23 |
description="Dependency Detective specialized in diagnosing Python environment conflicts",
|
| 24 |
instruction="""
|
| 25 |
You are the "Dependency Detective," an expert AI agent specialized in diagnosing Python environment conflicts, legacy code rot, and version mismatch errors.
|
| 26 |
Use Google Search Tool if You don't Know about those issue or packages.
|
| 27 |
-
Use `
|
| 28 |
|
| 29 |
YOUR GOAL:
|
| 30 |
1. Analyze the input to identify the specific packages involved (e.g., "tensorflow", "numpy").
|
|
@@ -114,6 +122,35 @@ def create_community_search_agent():
|
|
| 114 |
logger.info("β
Community Search agent created")
|
| 115 |
return agent
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
|
| 118 |
class WebCrawlAgent(Agent):
|
| 119 |
"""
|
|
@@ -147,16 +184,22 @@ class WebCrawlAgent(Agent):
|
|
| 147 |
batch_result = await batch_crawl_tool.func(urls)
|
| 148 |
|
| 149 |
# 2. Analyze Result (Simple Heuristic)
|
|
|
|
|
|
|
|
|
|
| 150 |
# If result contains many "Error" or is very short, we might need adaptive
|
| 151 |
-
if "Error" not in
|
| 152 |
-
return f"**Model: Custom Logic**\n## Crawled Content Analysis\n\n{
|
| 153 |
|
| 154 |
# 3. Fallback to Adaptive (if batch failed significantly)
|
| 155 |
logger.info("β οΈ Batch crawl had issues. Falling back to Adaptive Crawl for first URL...")
|
| 156 |
# For simplicity in this custom agent, we just try the first URL adaptively as a fallback
|
| 157 |
adaptive_result = await adaptive_tool.func(urls[0], query="dependency conflicts version requirements")
|
| 158 |
|
| 159 |
-
|
|
|
|
|
|
|
|
|
|
| 160 |
|
| 161 |
def create_web_crawl_agent():
|
| 162 |
"""
|
|
@@ -202,64 +245,29 @@ def create_code_surgeon_agent():
|
|
| 202 |
- Clear explanation of the issue
|
| 203 |
- Updated requirements.txt content
|
| 204 |
- Migration notes (if breaking changes exist)
|
| 205 |
-
"""
|
| 206 |
-
)
|
| 207 |
-
logger.info("β
Code Surgeon agent created")
|
| 208 |
-
return agent
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
def create_verification_agent():
|
| 212 |
-
"""
|
| 213 |
-
Creates the Verification agent that checks the Code Surgeon's work.
|
| 214 |
-
"""
|
| 215 |
-
agent = Agent(
|
| 216 |
-
name="Verification_Agent",
|
| 217 |
-
model=get_model(),
|
| 218 |
-
tools=[validate_tool, save_context_tool],
|
| 219 |
-
description="Quality Assurance specialist for dependency files",
|
| 220 |
-
instruction="""
|
| 221 |
-
You are the "Quality Assurance Specialist".
|
| 222 |
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
3. If the tool returns "SUCCESS":
|
| 227 |
-
- Call `save_context('verification_status', 'SUCCESS')`.
|
| 228 |
-
- Respond with "Verification Passed".
|
| 229 |
-
4. If the tool returns errors:
|
| 230 |
-
- Call `save_context('verification_status', 'FAILED')`.
|
| 231 |
-
- Explain the errors to the Code Surgeon so they can fix it.
|
| 232 |
"""
|
| 233 |
)
|
| 234 |
-
logger.info("β
|
| 235 |
return agent
|
| 236 |
|
| 237 |
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
"""
|
| 242 |
-
async def _run_async_impl(self, ctx):
|
| 243 |
-
# Retrieve status from session state
|
| 244 |
-
status = ctx.session.state.get("verification_status", "FAILED")
|
| 245 |
-
logger.info(f"π StopChecker: Status is {status}")
|
| 246 |
-
|
| 247 |
-
should_stop = (status == "SUCCESS")
|
| 248 |
-
if should_stop:
|
| 249 |
-
logger.info("π StopChecker: Escalating to stop loop.")
|
| 250 |
-
|
| 251 |
-
# Yield an event with escalate=True if we should stop
|
| 252 |
-
yield Event(author=self.name, actions=EventActions(escalate=should_stop))
|
| 253 |
-
|
| 254 |
|
| 255 |
# ===== MEMORY CALLBACK =====
|
| 256 |
async def auto_save_to_memory(callback_context):
|
| 257 |
"""Automatically save session to memory after each agent turn."""
|
| 258 |
try:
|
| 259 |
-
|
|
|
|
| 260 |
callback_context._invocation_context.session
|
| 261 |
)
|
| 262 |
-
logger.info("πΎ Session automatically saved to memory.")
|
| 263 |
except Exception as e:
|
| 264 |
logger.error(f"β Failed to auto-save session: {e}")
|
| 265 |
|
|
@@ -274,12 +282,13 @@ def create_root_agent():
|
|
| 274 |
|
| 275 |
docs_search = create_docs_search_agent()
|
| 276 |
community_search = create_community_search_agent()
|
|
|
|
| 277 |
|
| 278 |
# Parallel Research
|
| 279 |
parallel_search = ParallelAgent(
|
| 280 |
name="Parallel_Search_Team",
|
| 281 |
-
sub_agents=[docs_search, community_search],
|
| 282 |
-
description="Parallel search for official and
|
| 283 |
)
|
| 284 |
|
| 285 |
# Group Research Team
|
|
@@ -292,22 +301,13 @@ def create_root_agent():
|
|
| 292 |
web_crawl = create_web_crawl_agent()
|
| 293 |
web_crawl = create_web_crawl_agent()
|
| 294 |
|
| 295 |
-
# Code Surgeon Loop
|
| 296 |
code_surgeon = create_code_surgeon_agent()
|
| 297 |
-
verification = create_verification_agent()
|
| 298 |
-
stop_checker = StopCheckerAgent(name="Stop_Checker")
|
| 299 |
-
|
| 300 |
-
code_surgeon_team = LoopAgent(
|
| 301 |
-
name="Code_Surgeon_Team",
|
| 302 |
-
sub_agents=[code_surgeon, verification, stop_checker],
|
| 303 |
-
max_iterations=3,
|
| 304 |
-
description="Self-correcting dependency resolution team"
|
| 305 |
-
)
|
| 306 |
|
| 307 |
# Create the sequential agent
|
| 308 |
agent = SequentialAgent(
|
| 309 |
name="Package_Conflict_Resolver_Root_Agent",
|
| 310 |
-
sub_agents=[web_research_team, web_crawl,
|
| 311 |
description="Root agent managing the dependency resolution pipeline",
|
| 312 |
after_agent_callback=auto_save_to_memory # Auto-save history
|
| 313 |
)
|
|
@@ -317,3 +317,7 @@ def create_root_agent():
|
|
| 317 |
|
| 318 |
# ===== MODULE-LEVEL INITIALIZATION FOR ADK WEB =====
|
| 319 |
root_agent = create_root_agent()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
Agent definitions for the AI-Powered Package Conflict Resolver.
|
| 3 |
Defines Query Creator, Web Search, Web Crawl, and CodeSurgeon agents.
|
| 4 |
"""
|
| 5 |
+
import sys
|
| 6 |
+
import asyncio
|
| 7 |
+
import json
|
| 8 |
+
|
| 9 |
+
# Fix for Playwright on Windows (NotImplementedError in subprocess)
|
| 10 |
+
if sys.platform == 'win32':
|
| 11 |
+
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
|
| 12 |
+
|
| 13 |
from google.adk import Agent
|
| 14 |
+
from google.adk.agents import SequentialAgent, ParallelAgent
|
| 15 |
+
# from google.adk.events import Event, EventActions # Unused after removing loop
|
| 16 |
from google.adk.tools import google_search, load_memory
|
| 17 |
from .config import get_model, get_gemini_model
|
| 18 |
+
from .tools import batch_tool, adaptive_tool, save_context_tool, retrieve_context_tool, submit_queries_tool, validate_tool, retrieve_memory_tool
|
| 19 |
from .utils import logger
|
| 20 |
|
| 21 |
|
|
|
|
| 27 |
agent = Agent(
|
| 28 |
name="Query_Creator_Agent",
|
| 29 |
model=get_gemini_model(),
|
| 30 |
+
tools=[google_search, retrieve_memory_tool], # Added retrieve_memory_tool
|
| 31 |
description="Dependency Detective specialized in diagnosing Python environment conflicts",
|
| 32 |
instruction="""
|
| 33 |
You are the "Dependency Detective," an expert AI agent specialized in diagnosing Python environment conflicts, legacy code rot, and version mismatch errors.
|
| 34 |
Use Google Search Tool if You don't Know about those issue or packages.
|
| 35 |
+
Use `retrieve_memory` to recall details from previous conversations if the user refers to "last time" or "previous error".
|
| 36 |
|
| 37 |
YOUR GOAL:
|
| 38 |
1. Analyze the input to identify the specific packages involved (e.g., "tensorflow", "numpy").
|
|
|
|
| 122 |
logger.info("β
Community Search agent created")
|
| 123 |
return agent
|
| 124 |
|
| 125 |
+
def create_context_search_agent():
|
| 126 |
+
"""
|
| 127 |
+
Creates the Context Search agent (General Context).
|
| 128 |
+
"""
|
| 129 |
+
agent = Agent(
|
| 130 |
+
name="Context_Search_Agent",
|
| 131 |
+
model=get_gemini_model(),
|
| 132 |
+
tools=[google_search],
|
| 133 |
+
description="Search agent focused on general context and main URL",
|
| 134 |
+
instruction="""
|
| 135 |
+
You are the "Context Researcher".
|
| 136 |
+
|
| 137 |
+
YOUR GOAL:
|
| 138 |
+
1. Analyze the input search queries to identify the "Main Topic" or "Core Library/Framework" (e.g., if input is "numpy float error", main topic is "numpy").
|
| 139 |
+
2. Search for the Home Page, Main Documentation Hub, or Wikipedia page for this Main Topic.
|
| 140 |
+
3. Provide the top 3-4 most authoritative URLs for this topic.
|
| 141 |
+
|
| 142 |
+
INPUT: List of search queries.
|
| 143 |
+
OUTPUT: Top 3-4 most relevant URLs.
|
| 144 |
+
|
| 145 |
+
OUTPUT FORMAT:
|
| 146 |
+
**Model: Gemini 2.5 Pro**
|
| 147 |
+
## Context Results
|
| 148 |
+
{"top_urls": ["url1", "url2", "url3"]}
|
| 149 |
+
"""
|
| 150 |
+
)
|
| 151 |
+
logger.info("β
Context Search agent created")
|
| 152 |
+
return agent
|
| 153 |
+
|
| 154 |
|
| 155 |
class WebCrawlAgent(Agent):
|
| 156 |
"""
|
|
|
|
| 184 |
batch_result = await batch_crawl_tool.func(urls)
|
| 185 |
|
| 186 |
# 2. Analyze Result (Simple Heuristic)
|
| 187 |
+
# Check if we got valid content
|
| 188 |
+
content = batch_result.get("combined_content", "")
|
| 189 |
+
|
| 190 |
# If result contains many "Error" or is very short, we might need adaptive
|
| 191 |
+
if "Error" not in content and len(content) > 500:
|
| 192 |
+
return f"**Model: Custom Logic**\n## Crawled Content Analysis\n\n{content}"
|
| 193 |
|
| 194 |
# 3. Fallback to Adaptive (if batch failed significantly)
|
| 195 |
logger.info("β οΈ Batch crawl had issues. Falling back to Adaptive Crawl for first URL...")
|
| 196 |
# For simplicity in this custom agent, we just try the first URL adaptively as a fallback
|
| 197 |
adaptive_result = await adaptive_tool.func(urls[0], query="dependency conflicts version requirements")
|
| 198 |
|
| 199 |
+
# Format adaptive result (it's a dict)
|
| 200 |
+
formatted_adaptive = json.dumps(adaptive_result, indent=2) if isinstance(adaptive_result, dict) else str(adaptive_result)
|
| 201 |
+
|
| 202 |
+
return f"**Model: Custom Logic (Adaptive Fallback)**\n## Crawled Content Analysis\n\n{formatted_adaptive}"
|
| 203 |
|
| 204 |
def create_web_crawl_agent():
|
| 205 |
"""
|
|
|
|
| 245 |
- Clear explanation of the issue
|
| 246 |
- Updated requirements.txt content
|
| 247 |
- Migration notes (if breaking changes exist)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 248 |
|
| 249 |
+
IMPORTANT:
|
| 250 |
+
- Call `save_context('solution', 'YOUR_SOLUTION_SUMMARY')` to store the final resolution.
|
| 251 |
+
- Call `save_context('requirements', 'YOUR_REQUIREMENTS_CONTENT')` to store the file content.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
"""
|
| 253 |
)
|
| 254 |
+
logger.info("β
Code Surgeon agent created")
|
| 255 |
return agent
|
| 256 |
|
| 257 |
|
| 258 |
+
# ===== MEMORY SERVICE =====
|
| 259 |
+
from .config import get_memory_service
|
| 260 |
+
global_memory_service = get_memory_service()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 261 |
|
| 262 |
# ===== MEMORY CALLBACK =====
|
| 263 |
async def auto_save_to_memory(callback_context):
|
| 264 |
"""Automatically save session to memory after each agent turn."""
|
| 265 |
try:
|
| 266 |
+
# Use global memory service instead of context-bound one
|
| 267 |
+
await global_memory_service.add_session_to_memory(
|
| 268 |
callback_context._invocation_context.session
|
| 269 |
)
|
| 270 |
+
logger.info("πΎ Session automatically saved to memory (Global Service).")
|
| 271 |
except Exception as e:
|
| 272 |
logger.error(f"β Failed to auto-save session: {e}")
|
| 273 |
|
|
|
|
| 282 |
|
| 283 |
docs_search = create_docs_search_agent()
|
| 284 |
community_search = create_community_search_agent()
|
| 285 |
+
context_search = create_context_search_agent()
|
| 286 |
|
| 287 |
# Parallel Research
|
| 288 |
parallel_search = ParallelAgent(
|
| 289 |
name="Parallel_Search_Team",
|
| 290 |
+
sub_agents=[docs_search, community_search, context_search],
|
| 291 |
+
description="Parallel search for official, community, and general context resources"
|
| 292 |
)
|
| 293 |
|
| 294 |
# Group Research Team
|
|
|
|
| 301 |
web_crawl = create_web_crawl_agent()
|
| 302 |
web_crawl = create_web_crawl_agent()
|
| 303 |
|
| 304 |
+
# Code Surgeon (No Loop)
|
| 305 |
code_surgeon = create_code_surgeon_agent()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
# Create the sequential agent
|
| 308 |
agent = SequentialAgent(
|
| 309 |
name="Package_Conflict_Resolver_Root_Agent",
|
| 310 |
+
sub_agents=[web_research_team, web_crawl, code_surgeon],
|
| 311 |
description="Root agent managing the dependency resolution pipeline",
|
| 312 |
after_agent_callback=auto_save_to_memory # Auto-save history
|
| 313 |
)
|
|
|
|
| 317 |
|
| 318 |
# ===== MODULE-LEVEL INITIALIZATION FOR ADK WEB =====
|
| 319 |
root_agent = create_root_agent()
|
| 320 |
+
|
| 321 |
+
# Removed App definition to avoid ImportError.
|
| 322 |
+
# Memory is handled via global_memory_service in callback.
|
| 323 |
+
agent = root_agent
|
src/app.py
CHANGED
|
@@ -6,11 +6,14 @@ from google.adk import App
|
|
| 6 |
from google.adk.types import EventsCompactionConfig
|
| 7 |
from .agents import root_agent
|
| 8 |
from .utils import logger
|
|
|
|
| 9 |
|
| 10 |
-
# Define the App with Events Compaction
|
| 11 |
package_conflict_resolver_app = App(
|
| 12 |
name="Package_Conflict_Resolver_App",
|
| 13 |
root_agent=root_agent,
|
|
|
|
|
|
|
| 14 |
events_compaction_config=EventsCompactionConfig(
|
| 15 |
compaction_interval=3, # Trigger compaction every 3 invocations
|
| 16 |
overlap_size=1, # Keep 1 previous turn for context
|
|
|
|
| 6 |
from google.adk.types import EventsCompactionConfig
|
| 7 |
from .agents import root_agent
|
| 8 |
from .utils import logger
|
| 9 |
+
from .config import get_memory_service, get_session_service
|
| 10 |
|
| 11 |
+
# Define the App with Events Compaction and Custom Services
|
| 12 |
package_conflict_resolver_app = App(
|
| 13 |
name="Package_Conflict_Resolver_App",
|
| 14 |
root_agent=root_agent,
|
| 15 |
+
memory_service=get_memory_service(),
|
| 16 |
+
session_service=get_session_service(),
|
| 17 |
events_compaction_config=EventsCompactionConfig(
|
| 18 |
compaction_interval=3, # Trigger compaction every 3 invocations
|
| 19 |
overlap_size=1, # Keep 1 previous turn for context
|
src/config.py
CHANGED
|
@@ -57,7 +57,8 @@ def get_session_service(db_url=None):
|
|
| 57 |
"""
|
| 58 |
# Prioritize argument, then env var, then local default
|
| 59 |
if not db_url:
|
| 60 |
-
|
|
|
|
| 61 |
|
| 62 |
session_service = DatabaseSessionService(db_url=db_url)
|
| 63 |
logger.info(f"β
Session service initialized: {db_url.split('://')[0]}://...") # Log safe URL
|
|
@@ -74,6 +75,7 @@ def get_memory_service():
|
|
| 74 |
Uses Pinecone if PINECONE_API_KEY is set, otherwise InMemory.
|
| 75 |
"""
|
| 76 |
pinecone_key = os.getenv("PINECONE_API_KEY")
|
|
|
|
| 77 |
|
| 78 |
if pinecone_key:
|
| 79 |
try:
|
|
|
|
| 57 |
"""
|
| 58 |
# Prioritize argument, then env var, then local default
|
| 59 |
if not db_url:
|
| 60 |
+
# Use legacy_solver.db as it contains the existing sessions
|
| 61 |
+
db_url = os.getenv("DATABASE_URL", "sqlite+aiosqlite:///legacy_solver.db")
|
| 62 |
|
| 63 |
session_service = DatabaseSessionService(db_url=db_url)
|
| 64 |
logger.info(f"β
Session service initialized: {db_url.split('://')[0]}://...") # Log safe URL
|
|
|
|
| 75 |
Uses Pinecone if PINECONE_API_KEY is set, otherwise InMemory.
|
| 76 |
"""
|
| 77 |
pinecone_key = os.getenv("PINECONE_API_KEY")
|
| 78 |
+
logger.info(f"π Checking PINECONE_API_KEY: {'Found' if pinecone_key else 'Missing'}")
|
| 79 |
|
| 80 |
if pinecone_key:
|
| 81 |
try:
|
src/demo issue.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/evalset169426.evalset.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/memory.py
CHANGED
|
@@ -1,12 +1,13 @@
|
|
| 1 |
import os
|
| 2 |
import uuid
|
| 3 |
from typing import List, Dict, Any
|
| 4 |
-
from
|
|
|
|
| 5 |
from pinecone import Pinecone, ServerlessSpec
|
| 6 |
from sentence_transformers import SentenceTransformer
|
| 7 |
from .utils import logger
|
| 8 |
|
| 9 |
-
class PineconeMemoryService
|
| 10 |
"""
|
| 11 |
Custom Memory Service using Pinecone for long-term vector storage.
|
| 12 |
Uses 'all-MiniLM-L6-v2' for local embedding generation.
|
|
@@ -32,8 +33,10 @@ class PineconeMemoryService(MemoryService):
|
|
| 32 |
self.index = self.pc.Index(self.index_name)
|
| 33 |
|
| 34 |
# Initialize Embedding Model
|
| 35 |
-
logger.info("π§ Loading embedding model: all-MiniLM-L6-v2...")
|
|
|
|
| 36 |
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
|
| 37 |
logger.info("β
Pinecone Memory Service initialized")
|
| 38 |
|
| 39 |
async def add_session_to_memory(self, session: Any):
|
|
@@ -41,14 +44,37 @@ class PineconeMemoryService(MemoryService):
|
|
| 41 |
Embeds the session history and saves it to Pinecone.
|
| 42 |
"""
|
| 43 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
# 1. Convert session to text
|
| 45 |
# Assuming session has a 'history' or we can iterate turns
|
| 46 |
# We'll construct a simplified text representation
|
| 47 |
text_content = ""
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
if not text_content.strip():
|
|
|
|
| 52 |
return
|
| 53 |
|
| 54 |
# 2. Generate Embedding
|
|
@@ -56,15 +82,15 @@ class PineconeMemoryService(MemoryService):
|
|
| 56 |
|
| 57 |
# 3. Create Metadata
|
| 58 |
metadata = {
|
| 59 |
-
"session_id":
|
| 60 |
"text": text_content[:1000], # Store snippet (limit size)
|
| 61 |
"timestamp": str(session.created_at) if hasattr(session, 'created_at') else ""
|
| 62 |
}
|
| 63 |
|
| 64 |
# 4. Upsert to Pinecone
|
| 65 |
# Use session_id as vector ID
|
| 66 |
-
self.index.upsert(vectors=[(
|
| 67 |
-
logger.info(f"πΎ Saved session {
|
| 68 |
|
| 69 |
except Exception as e:
|
| 70 |
logger.error(f"β Failed to save to Pinecone: {e}")
|
|
|
|
| 1 |
import os
|
| 2 |
import uuid
|
| 3 |
from typing import List, Dict, Any
|
| 4 |
+
from typing import List, Dict, Any
|
| 5 |
+
# from google.adk.memory import MemoryService # Not available in this version
|
| 6 |
from pinecone import Pinecone, ServerlessSpec
|
| 7 |
from sentence_transformers import SentenceTransformer
|
| 8 |
from .utils import logger
|
| 9 |
|
| 10 |
+
class PineconeMemoryService: # Removed inheritance to avoid ImportError
|
| 11 |
"""
|
| 12 |
Custom Memory Service using Pinecone for long-term vector storage.
|
| 13 |
Uses 'all-MiniLM-L6-v2' for local embedding generation.
|
|
|
|
| 33 |
self.index = self.pc.Index(self.index_name)
|
| 34 |
|
| 35 |
# Initialize Embedding Model
|
| 36 |
+
logger.info("π§ Loading embedding model: all-MiniLM-L6-v2... (This may take a while if downloading)")
|
| 37 |
+
print("DEBUG: Starting SentenceTransformer load...")
|
| 38 |
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
| 39 |
+
print("DEBUG: SentenceTransformer loaded.")
|
| 40 |
logger.info("β
Pinecone Memory Service initialized")
|
| 41 |
|
| 42 |
async def add_session_to_memory(self, session: Any):
|
|
|
|
| 44 |
Embeds the session history and saves it to Pinecone.
|
| 45 |
"""
|
| 46 |
try:
|
| 47 |
+
# Get session ID safely (ADK sessions usually use .id)
|
| 48 |
+
session_id = getattr(session, 'id', getattr(session, 'session_id', 'UNKNOWN'))
|
| 49 |
+
|
| 50 |
+
logger.info(f"πΎ Attempting to save session to Pinecone. Session ID: {session_id}")
|
| 51 |
+
# Debug session structure
|
| 52 |
+
# logger.info(f"Session dir: {dir(session)}")
|
| 53 |
+
|
| 54 |
# 1. Convert session to text
|
| 55 |
# Assuming session has a 'history' or we can iterate turns
|
| 56 |
# We'll construct a simplified text representation
|
| 57 |
text_content = ""
|
| 58 |
+
|
| 59 |
+
# Check for 'turns' or 'events'
|
| 60 |
+
if hasattr(session, 'turns'):
|
| 61 |
+
turns = session.turns
|
| 62 |
+
logger.info(f"Found {len(turns)} turns.")
|
| 63 |
+
for turn in turns:
|
| 64 |
+
text_content += f"{turn.role}: {turn.content}\n"
|
| 65 |
+
elif hasattr(session, 'events'):
|
| 66 |
+
events = session.events
|
| 67 |
+
logger.info(f"Found {len(events)} events.")
|
| 68 |
+
for event in events:
|
| 69 |
+
# Event structure might vary
|
| 70 |
+
author = getattr(event, 'author', 'unknown')
|
| 71 |
+
content = getattr(event, 'content', getattr(event, 'text', ''))
|
| 72 |
+
text_content += f"{author}: {content}\n"
|
| 73 |
+
else:
|
| 74 |
+
logger.warning("β οΈ Session has no 'turns' or 'events' attribute.")
|
| 75 |
|
| 76 |
if not text_content.strip():
|
| 77 |
+
logger.warning("β οΈ Session content is empty. Skipping Pinecone save.")
|
| 78 |
return
|
| 79 |
|
| 80 |
# 2. Generate Embedding
|
|
|
|
| 82 |
|
| 83 |
# 3. Create Metadata
|
| 84 |
metadata = {
|
| 85 |
+
"session_id": session_id,
|
| 86 |
"text": text_content[:1000], # Store snippet (limit size)
|
| 87 |
"timestamp": str(session.created_at) if hasattr(session, 'created_at') else ""
|
| 88 |
}
|
| 89 |
|
| 90 |
# 4. Upsert to Pinecone
|
| 91 |
# Use session_id as vector ID
|
| 92 |
+
self.index.upsert(vectors=[(session_id, vector, metadata)])
|
| 93 |
+
logger.info(f"πΎ Saved session {session_id} to Pinecone")
|
| 94 |
|
| 95 |
except Exception as e:
|
| 96 |
logger.error(f"β Failed to save to Pinecone: {e}")
|
src/test1.evalset.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
src/tools.py
CHANGED
|
@@ -2,161 +2,189 @@
|
|
| 2 |
Tool definitions for the Legacy Dependency Solver.
|
| 3 |
Includes Crawl4AI batch crawler for efficient multi-URL processing.
|
| 4 |
"""
|
| 5 |
-
from typing import List
|
| 6 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
from google.adk.tools import FunctionTool
|
| 9 |
from .utils import logger
|
|
|
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
|
| 12 |
-
|
|
|
|
|
|
|
| 13 |
"""
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
"""
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
# Configure browser with headless mode and disable SSL verification
|
| 25 |
-
browser_config = BrowserConfig(
|
| 26 |
-
headless=True,
|
| 27 |
-
verbose=True
|
| 28 |
-
)
|
| 29 |
-
|
| 30 |
-
# Configure crawler to bypass cache
|
| 31 |
-
run_config = CrawlerRunConfig(
|
| 32 |
-
cache_mode=CacheMode.BYPASS,
|
| 33 |
-
word_count_threshold=10,
|
| 34 |
-
)
|
| 35 |
-
|
| 36 |
-
results = []
|
| 37 |
-
|
| 38 |
-
async with AsyncWebCrawler(config=browser_config) as crawler:
|
| 39 |
-
for url in urls:
|
| 40 |
-
logger.info(f" π Crawling: {url}")
|
| 41 |
-
try:
|
| 42 |
-
result = await crawler.arun(url=url, config=run_config)
|
| 43 |
-
if result.success:
|
| 44 |
-
results.append(f"# Content from {url}\n\n{result.markdown}\n\n")
|
| 45 |
-
logger.info(f" β
Success: {url}")
|
| 46 |
-
else:
|
| 47 |
-
error_msg = f"Error crawling {url}: {result.error_message}"
|
| 48 |
-
results.append(f"# {error_msg}\n\n")
|
| 49 |
-
logger.warning(f" β Failed: {url} - {result.error_message}")
|
| 50 |
-
except Exception as e:
|
| 51 |
-
error_msg = f"Exception crawling {url}: {str(e)}"
|
| 52 |
-
results.append(f"# {error_msg}\n\n")
|
| 53 |
-
logger.error(f" β οΈ Exception: {url} - {e}")
|
| 54 |
-
|
| 55 |
-
combined = "\n".join(results)
|
| 56 |
-
logger.info(f"β
Batch crawl completed: {len(results)} results")
|
| 57 |
-
return combined
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
# ===== ADAPTIVE CRAWLING (COMMENTED OUT - NOT CURRENTLY USED) =====
|
| 61 |
-
# Keeping this code for potential future use
|
| 62 |
-
#
|
| 63 |
-
#
|
| 64 |
-
async def adaptive_crawl_tool(url: str, query: str) -> str:
|
| 65 |
"""
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
Args:
|
| 69 |
-
url: The URL to crawl
|
| 70 |
-
query: The specific query/topic to look for
|
| 71 |
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
logger.info(f"π Adaptive crawling: {url} for '{query}'")
|
| 76 |
-
|
| 77 |
-
browser_config = BrowserConfig(
|
| 78 |
-
headless=True,
|
| 79 |
-
verbose=True,
|
| 80 |
-
ignore_https_errors=True,
|
| 81 |
-
extra_args=["--ignore-certificate-errors", "--ignore-ssl-errors"]
|
| 82 |
-
)
|
| 83 |
-
|
| 84 |
-
# Adaptive config for discovery
|
| 85 |
-
adaptive_config = AdaptiveConfig(
|
| 86 |
-
max_pages=3,
|
| 87 |
-
confidence_threshold=0.7,
|
| 88 |
-
top_k_links=2,
|
| 89 |
-
)
|
| 90 |
-
|
| 91 |
-
async with AsyncWebCrawler(config=browser_config) as crawler:
|
| 92 |
-
# We need to use the adaptive crawler wrapper or logic if available in this version of crawl4ai
|
| 93 |
-
# Based on reference code, it uses AdaptiveCrawler
|
| 94 |
-
from crawl4ai import AdaptiveCrawler
|
| 95 |
|
| 96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
-
|
| 99 |
-
# Discovery
|
| 100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
top_content = adaptive.get_relevant_content(top_k=1)
|
| 103 |
if not top_content:
|
| 104 |
-
return "No relevant content found via adaptive crawling."
|
| 105 |
|
| 106 |
best_url = top_content[0]['url']
|
| 107 |
-
logger.info(f" β
Best source found: {best_url}")
|
| 108 |
|
| 109 |
-
#
|
| 110 |
-
|
| 111 |
-
|
|
|
|
|
|
|
| 112 |
|
| 113 |
-
|
| 114 |
cache_mode=CacheMode.BYPASS,
|
| 115 |
-
word_count_threshold=
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
)
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
return
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
|
| 132 |
-
#
|
| 133 |
batch_tool = FunctionTool(batch_crawl_tool)
|
|
|
|
| 134 |
|
| 135 |
|
| 136 |
# ===== STATE MANAGEMENT TOOLS =====
|
| 137 |
-
from typing import Dict, Any
|
| 138 |
from google.adk.tools import ToolContext
|
| 139 |
|
| 140 |
def save_context(tool_context: ToolContext, key: str, value: str) -> str:
|
| 141 |
-
"""
|
| 142 |
-
Saves a key-value pair to the session state.
|
| 143 |
-
Useful for remembering packages, versions, or decisions across agents.
|
| 144 |
-
|
| 145 |
-
Args:
|
| 146 |
-
key: The key to store (e.g., 'packages', 'versions')
|
| 147 |
-
value: The value to store
|
| 148 |
-
"""
|
| 149 |
tool_context.state[key] = value
|
| 150 |
logger.info(f"πΎ State Saved: {key} = {value}")
|
| 151 |
return f"Saved {key} to state."
|
| 152 |
|
| 153 |
def retrieve_context(tool_context: ToolContext, key: str) -> str:
|
| 154 |
-
"""
|
| 155 |
-
Retrieves a value from the session state.
|
| 156 |
-
|
| 157 |
-
Args:
|
| 158 |
-
key: The key to retrieve
|
| 159 |
-
"""
|
| 160 |
value = tool_context.state.get(key, "Not found")
|
| 161 |
logger.info(f"π State Retrieved: {key} = {value}")
|
| 162 |
return str(value)
|
|
@@ -165,12 +193,6 @@ save_context_tool = FunctionTool(save_context)
|
|
| 165 |
retrieve_context_tool = FunctionTool(retrieve_context)
|
| 166 |
|
| 167 |
def submit_queries(tool_context: ToolContext, queries: List[str]) -> str:
|
| 168 |
-
"""
|
| 169 |
-
Submits the generated search queries to the shared session state.
|
| 170 |
-
|
| 171 |
-
Args:
|
| 172 |
-
queries: The list of search queries to submit.
|
| 173 |
-
"""
|
| 174 |
tool_context.state['search_queries'] = queries
|
| 175 |
logger.info(f"π Queries Submitted: {queries}")
|
| 176 |
return "Queries submitted successfully."
|
|
@@ -178,35 +200,45 @@ def submit_queries(tool_context: ToolContext, queries: List[str]) -> str:
|
|
| 178 |
submit_queries_tool = FunctionTool(submit_queries)
|
| 179 |
|
| 180 |
def validate_requirements(tool_context: ToolContext, requirements_content: str) -> str:
|
| 181 |
-
"""
|
| 182 |
-
Validates the generated requirements.txt content.
|
| 183 |
-
Checks for basic syntax and conflicting versions (mocked logic).
|
| 184 |
-
|
| 185 |
-
Args:
|
| 186 |
-
requirements_content: The content of the requirements.txt file.
|
| 187 |
-
"""
|
| 188 |
if not requirements_content:
|
| 189 |
return "Error: Empty requirements content."
|
| 190 |
-
|
| 191 |
lines = requirements_content.strip().split('\n')
|
| 192 |
errors = []
|
| 193 |
-
|
| 194 |
for line in lines:
|
| 195 |
line = line.strip()
|
| 196 |
if not line or line.startswith('#'):
|
| 197 |
continue
|
| 198 |
-
|
| 199 |
-
# Basic syntax check (package==version)
|
| 200 |
import re
|
| 201 |
if not re.match(r'^[a-zA-Z0-9_\-]+[=<>!~]+[0-9a-zA-Z\.]+', line):
|
| 202 |
-
# Allow simple package names too, but warn
|
| 203 |
if not re.match(r'^[a-zA-Z0-9_\-]+$', line):
|
| 204 |
errors.append(f"Invalid syntax: {line}")
|
| 205 |
-
|
| 206 |
if errors:
|
| 207 |
return f"Validation Failed: {'; '.join(errors)}"
|
| 208 |
-
|
| 209 |
logger.info("β
Requirements validation passed.")
|
| 210 |
return "SUCCESS"
|
| 211 |
|
| 212 |
validate_tool = FunctionTool(validate_requirements)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
Tool definitions for the Legacy Dependency Solver.
|
| 3 |
Includes Crawl4AI batch crawler for efficient multi-URL processing.
|
| 4 |
"""
|
| 5 |
+
from typing import List, Dict, Any
|
| 6 |
+
import json
|
| 7 |
+
import sys
|
| 8 |
+
import asyncio
|
| 9 |
+
import concurrent.futures
|
| 10 |
+
from pydantic import BaseModel, Field
|
| 11 |
|
| 12 |
from google.adk.tools import FunctionTool
|
| 13 |
from .utils import logger
|
| 14 |
+
from .config import get_memory_service # Import memory service factory
|
| 15 |
|
| 16 |
+
# --- 1. Define Schema (Module level for pickling) ---
|
| 17 |
+
class SearchResult(BaseModel):
|
| 18 |
+
relevant_facts: List[str] = Field(..., description="Specific facts/numbers found.")
|
| 19 |
+
summary: str = Field(..., description="Concise summary related to the query.")
|
| 20 |
+
confidence: str = Field(..., description="Confidence level (High/Medium/Low).")
|
| 21 |
|
| 22 |
+
# --- 2. Worker Functions (Run in Subprocess) ---
|
| 23 |
+
|
| 24 |
+
def _run_batch_crawl_worker(urls: List[str]) -> Dict[str, Any]:
|
| 25 |
"""
|
| 26 |
+
Worker function to run batch crawl in a separate process.
|
| 27 |
+
"""
|
| 28 |
+
# Enforce ProactorEventLoop on Windows for Playwright
|
| 29 |
+
if sys.platform == 'win32':
|
| 30 |
+
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
|
| 31 |
+
|
| 32 |
+
async def _async_logic():
|
| 33 |
+
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
|
| 34 |
+
|
| 35 |
+
# Shared Config
|
| 36 |
+
browser_config = BrowserConfig(
|
| 37 |
+
headless=True,
|
| 38 |
+
ignore_https_errors=True,
|
| 39 |
+
extra_args=["--ignore-certificate-errors", "--ignore-ssl-errors"]
|
| 40 |
+
)
|
| 41 |
+
run_config = CrawlerRunConfig(
|
| 42 |
+
cache_mode=CacheMode.BYPASS,
|
| 43 |
+
word_count_threshold=10,
|
| 44 |
+
)
|
| 45 |
|
| 46 |
+
results = []
|
| 47 |
+
# limit to top 3
|
| 48 |
+
target_urls = urls[:3]
|
| 49 |
+
|
| 50 |
+
async with AsyncWebCrawler(config=browser_config) as crawler:
|
| 51 |
+
for url in target_urls:
|
| 52 |
+
try:
|
| 53 |
+
crawl_result = await crawler.arun(url=url, config=run_config)
|
| 54 |
+
if crawl_result.success:
|
| 55 |
+
results.append(f"--- SOURCE: {url} ---\n{crawl_result.markdown[:15000]}\n")
|
| 56 |
+
else:
|
| 57 |
+
results.append(f"--- SOURCE: {url} ---\n[Error: Failed to crawl]\n")
|
| 58 |
+
except Exception as e:
|
| 59 |
+
results.append(f"--- SOURCE: {url} ---\n[Exception: {str(e)}]\n")
|
| 60 |
+
|
| 61 |
+
return {
|
| 62 |
+
"combined_content": "\n".join(results),
|
| 63 |
+
"status": "completed"
|
| 64 |
+
}
|
| 65 |
+
|
| 66 |
+
return asyncio.run(_async_logic())
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _run_adaptive_crawl_worker(start_url: str, user_query: str) -> Dict[str, Any]:
|
| 70 |
"""
|
| 71 |
+
Worker function to run adaptive crawl in a separate process.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
"""
|
| 73 |
+
if sys.platform == 'win32':
|
| 74 |
+
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
+
async def _async_logic():
|
| 77 |
+
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, AdaptiveConfig, LLMConfig
|
| 78 |
+
from crawl4ai.extraction_strategy import LLMExtractionStrategy
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
+
browser_config = BrowserConfig(
|
| 81 |
+
headless=True,
|
| 82 |
+
verbose=True,
|
| 83 |
+
ignore_https_errors=True,
|
| 84 |
+
extra_args=["--ignore-certificate-errors", "--ignore-ssl-errors"]
|
| 85 |
+
)
|
| 86 |
|
| 87 |
+
async with AsyncWebCrawler(config=browser_config) as crawler:
|
| 88 |
+
# Phase 1: Discovery
|
| 89 |
+
adaptive_config = AdaptiveConfig(
|
| 90 |
+
max_pages=3,
|
| 91 |
+
confidence_threshold=0.7,
|
| 92 |
+
top_k_links=2,
|
| 93 |
+
)
|
| 94 |
|
| 95 |
+
# Import inside function to avoid top-level import issues in subprocess if needed
|
| 96 |
+
from crawl4ai import AdaptiveCrawler
|
| 97 |
+
adaptive = AdaptiveCrawler(crawler, config=adaptive_config)
|
| 98 |
+
|
| 99 |
+
try:
|
| 100 |
+
await adaptive.digest(start_url=start_url, query=user_query)
|
| 101 |
+
except Exception as e:
|
| 102 |
+
return {"error": f"Crawl failed during discovery: {str(e)}"}
|
| 103 |
+
|
| 104 |
top_content = adaptive.get_relevant_content(top_k=1)
|
| 105 |
if not top_content:
|
| 106 |
+
return {"error": "No relevant content found via adaptive crawling."}
|
| 107 |
|
| 108 |
best_url = top_content[0]['url']
|
|
|
|
| 109 |
|
| 110 |
+
# Phase 2: Extraction
|
| 111 |
+
dynamic_instruction = f"""
|
| 112 |
+
Extract ONLY information matching this request: '{user_query}'.
|
| 113 |
+
If not found, state that in the summary. Do not hallucinate.
|
| 114 |
+
"""
|
| 115 |
|
| 116 |
+
extraction_config = CrawlerRunConfig(
|
| 117 |
cache_mode=CacheMode.BYPASS,
|
| 118 |
+
word_count_threshold=1,
|
| 119 |
+
page_timeout=60000,
|
| 120 |
+
extraction_strategy=LLMExtractionStrategy(
|
| 121 |
+
llm_config=LLMConfig(provider="ollama/qwen2.5:7b", api_token="ollama"),
|
| 122 |
+
schema=SearchResult.model_json_schema(),
|
| 123 |
+
extraction_type="schema",
|
| 124 |
+
instruction=dynamic_instruction,
|
| 125 |
+
),
|
| 126 |
)
|
| 127 |
|
| 128 |
+
try:
|
| 129 |
+
result = await crawler.arun(url=best_url, config=extraction_config)
|
| 130 |
+
if result.extracted_content:
|
| 131 |
+
return json.loads(result.extracted_content)
|
| 132 |
+
return {"error": "Extraction returned empty content."}
|
| 133 |
+
except json.JSONDecodeError:
|
| 134 |
+
return {"raw_output": result.extracted_content}
|
| 135 |
+
except Exception as e:
|
| 136 |
+
return {"error": f"Extraction failed: {str(e)}"}
|
| 137 |
|
| 138 |
+
return asyncio.run(_async_logic())
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
# --- 3. Main Tools (Async Wrappers) ---
|
| 142 |
+
|
| 143 |
+
async def batch_crawl_tool(urls: List[str]) -> Dict[str, Any]:
|
| 144 |
+
"""
|
| 145 |
+
Crawls a LIST of URLs in one go using a subprocess to ensure correct event loop.
|
| 146 |
+
"""
|
| 147 |
+
logger.info(f"π Batch Tool Triggered: Processing {len(urls)} URLs...")
|
| 148 |
+
|
| 149 |
+
loop = asyncio.get_running_loop()
|
| 150 |
+
with concurrent.futures.ProcessPoolExecutor() as pool:
|
| 151 |
+
try:
|
| 152 |
+
result = await loop.run_in_executor(pool, _run_batch_crawl_worker, urls)
|
| 153 |
+
return result
|
| 154 |
+
except Exception as e:
|
| 155 |
+
logger.error(f"β Batch crawl subprocess failed: {e}")
|
| 156 |
+
return {"combined_content": f"Error: {str(e)}", "status": "failed"}
|
| 157 |
|
| 158 |
+
async def adaptive_crawl_tool(start_url: str, user_query: str) -> Dict[str, Any]:
|
| 159 |
+
"""
|
| 160 |
+
Performs adaptive crawl using a subprocess.
|
| 161 |
+
"""
|
| 162 |
+
logger.info(f"π οΈ Tool Triggered: Adaptive Crawl on {start_url}")
|
| 163 |
+
|
| 164 |
+
loop = asyncio.get_running_loop()
|
| 165 |
+
with concurrent.futures.ProcessPoolExecutor() as pool:
|
| 166 |
+
try:
|
| 167 |
+
result = await loop.run_in_executor(pool, _run_adaptive_crawl_worker, start_url, user_query)
|
| 168 |
+
return result
|
| 169 |
+
except Exception as e:
|
| 170 |
+
logger.error(f"β Adaptive crawl subprocess failed: {e}")
|
| 171 |
+
return {"error": f"Subprocess failed: {str(e)}"}
|
| 172 |
|
| 173 |
|
| 174 |
+
# Convert to ADK Tools
|
| 175 |
batch_tool = FunctionTool(batch_crawl_tool)
|
| 176 |
+
adaptive_tool = FunctionTool(adaptive_crawl_tool)
|
| 177 |
|
| 178 |
|
| 179 |
# ===== STATE MANAGEMENT TOOLS =====
|
|
|
|
| 180 |
from google.adk.tools import ToolContext
|
| 181 |
|
| 182 |
def save_context(tool_context: ToolContext, key: str, value: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
tool_context.state[key] = value
|
| 184 |
logger.info(f"πΎ State Saved: {key} = {value}")
|
| 185 |
return f"Saved {key} to state."
|
| 186 |
|
| 187 |
def retrieve_context(tool_context: ToolContext, key: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 188 |
value = tool_context.state.get(key, "Not found")
|
| 189 |
logger.info(f"π State Retrieved: {key} = {value}")
|
| 190 |
return str(value)
|
|
|
|
| 193 |
retrieve_context_tool = FunctionTool(retrieve_context)
|
| 194 |
|
| 195 |
def submit_queries(tool_context: ToolContext, queries: List[str]) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 196 |
tool_context.state['search_queries'] = queries
|
| 197 |
logger.info(f"π Queries Submitted: {queries}")
|
| 198 |
return "Queries submitted successfully."
|
|
|
|
| 200 |
submit_queries_tool = FunctionTool(submit_queries)
|
| 201 |
|
| 202 |
def validate_requirements(tool_context: ToolContext, requirements_content: str) -> str:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 203 |
if not requirements_content:
|
| 204 |
return "Error: Empty requirements content."
|
|
|
|
| 205 |
lines = requirements_content.strip().split('\n')
|
| 206 |
errors = []
|
|
|
|
| 207 |
for line in lines:
|
| 208 |
line = line.strip()
|
| 209 |
if not line or line.startswith('#'):
|
| 210 |
continue
|
|
|
|
|
|
|
| 211 |
import re
|
| 212 |
if not re.match(r'^[a-zA-Z0-9_\-]+[=<>!~]+[0-9a-zA-Z\.]+', line):
|
|
|
|
| 213 |
if not re.match(r'^[a-zA-Z0-9_\-]+$', line):
|
| 214 |
errors.append(f"Invalid syntax: {line}")
|
|
|
|
| 215 |
if errors:
|
| 216 |
return f"Validation Failed: {'; '.join(errors)}"
|
|
|
|
| 217 |
logger.info("β
Requirements validation passed.")
|
| 218 |
return "SUCCESS"
|
| 219 |
|
| 220 |
validate_tool = FunctionTool(validate_requirements)
|
| 221 |
+
|
| 222 |
+
# ===== MEMORY RETRIEVAL TOOL =====
|
| 223 |
+
async def retrieve_memory(query: str) -> str:
|
| 224 |
+
"""
|
| 225 |
+
Searches long-term memory (Pinecone) for relevant past sessions.
|
| 226 |
+
Use this to recall details from previous conversations.
|
| 227 |
+
"""
|
| 228 |
+
logger.info(f"π§ Searching Memory for: {query}")
|
| 229 |
+
try:
|
| 230 |
+
# Initialize service on demand (or use singleton if configured)
|
| 231 |
+
memory_service = get_memory_service()
|
| 232 |
+
results = await memory_service.search_memory(query)
|
| 233 |
+
|
| 234 |
+
if not results:
|
| 235 |
+
return "No relevant memories found."
|
| 236 |
+
|
| 237 |
+
formatted_results = "\n---\n".join(results)
|
| 238 |
+
return f"Found relevant memories:\n{formatted_results}"
|
| 239 |
+
|
| 240 |
+
except Exception as e:
|
| 241 |
+
logger.error(f"β Memory retrieval failed: {e}")
|
| 242 |
+
return f"Error retrieving memory: {str(e)}"
|
| 243 |
+
|
| 244 |
+
retrieve_memory_tool = FunctionTool(retrieve_memory)
|
web_app.py
CHANGED
|
@@ -1,32 +1,29 @@
|
|
| 1 |
"""
|
| 2 |
-
|
| 3 |
-
Run with: adk web web_app.py --no-reload
|
| 4 |
"""
|
| 5 |
import nest_asyncio
|
| 6 |
from google.adk import Runner
|
| 7 |
-
|
| 8 |
-
from src.agents import create_root_agent
|
| 9 |
-
from src.utils import logger
|
| 10 |
|
| 11 |
-
# Apply nest_asyncio to handle event loop conflicts in the web server
|
| 12 |
nest_asyncio.apply()
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
| 17 |
-
#
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
)
|
| 31 |
-
|
| 32 |
-
logger.info("β
Web Interface Ready. Run 'adk web web_app.py --no-reload' to start.")
|
|
|
|
| 1 |
"""
|
| 2 |
+
Inspection script for Runner source.
|
|
|
|
| 3 |
"""
|
| 4 |
import nest_asyncio
|
| 5 |
from google.adk import Runner
|
| 6 |
+
import inspect
|
|
|
|
|
|
|
| 7 |
|
|
|
|
| 8 |
nest_asyncio.apply()
|
| 9 |
|
| 10 |
+
print("Source of Runner.__init__:")
|
| 11 |
+
try:
|
| 12 |
+
print(inspect.getsource(Runner.__init__))
|
| 13 |
+
except Exception as e:
|
| 14 |
+
print(f"Error getting source: {e}")
|
| 15 |
|
| 16 |
+
print("\nSource of Runner properties:")
|
| 17 |
+
# Check if app is a property or attribute
|
| 18 |
+
if hasattr(Runner, 'app'):
|
| 19 |
+
attr = getattr(Runner, 'app')
|
| 20 |
+
if isinstance(attr, property):
|
| 21 |
+
print("Found 'app' property.")
|
| 22 |
+
try:
|
| 23 |
+
print(inspect.getsource(attr.fget))
|
| 24 |
+
except:
|
| 25 |
+
print("Could not get source of fget")
|
| 26 |
+
else:
|
| 27 |
+
print(f"'app' is {type(attr)}")
|
| 28 |
+
else:
|
| 29 |
+
print("'app' not found in Runner class dict")
|
|
|
|
|
|
|
|
|