Yash030 commited on
Commit
5d3c73d
Β·
1 Parent(s): f488bb6

Added Pinecone Session Storage

Browse files
.gitignore CHANGED
@@ -47,6 +47,9 @@ env/
47
  # Logs
48
  *.log
49
 
 
 
 
50
  # OS
51
  .DS_Store
52
  Thumbs.db
 
47
  # Logs
48
  *.log
49
 
50
+ #json
51
+ *.evalset.json
52
+
53
  # OS
54
  .DS_Store
55
  Thumbs.db
requirements.txt CHANGED
@@ -6,5 +6,5 @@ nest_asyncio
6
  python-dotenv
7
  certifi
8
  litellm
9
- pinecone-client
10
  sentence-transformers
 
6
  python-dotenv
7
  certifi
8
  litellm
9
+ pinecone
10
  sentence-transformers
src/agents.py CHANGED
@@ -2,12 +2,20 @@
2
  Agent definitions for the AI-Powered Package Conflict Resolver.
3
  Defines Query Creator, Web Search, Web Crawl, and CodeSurgeon agents.
4
  """
 
 
 
 
 
 
 
 
5
  from google.adk import Agent
6
- from google.adk.agents import SequentialAgent, LoopAgent, BaseAgent, ParallelAgent
7
- from google.adk.events import Event, EventActions
8
  from google.adk.tools import google_search, load_memory
9
  from .config import get_model, get_gemini_model
10
- from .tools import batch_tool, adaptive_tool, save_context_tool, retrieve_context_tool, submit_queries_tool, validate_tool
11
  from .utils import logger
12
 
13
 
@@ -19,12 +27,12 @@ def create_query_creator_agent():
19
  agent = Agent(
20
  name="Query_Creator_Agent",
21
  model=get_gemini_model(),
22
- tools=[google_search, save_context_tool, load_memory], # Added load_memory
23
  description="Dependency Detective specialized in diagnosing Python environment conflicts",
24
  instruction="""
25
  You are the "Dependency Detective," an expert AI agent specialized in diagnosing Python environment conflicts, legacy code rot, and version mismatch errors.
26
  Use Google Search Tool if You don't Know about those issue or packages.
27
- Use `load_memory` to recall details from previous conversations if the user refers to "last time" or "previous error".
28
 
29
  YOUR GOAL:
30
  1. Analyze the input to identify the specific packages involved (e.g., "tensorflow", "numpy").
@@ -114,6 +122,35 @@ def create_community_search_agent():
114
  logger.info("βœ… Community Search agent created")
115
  return agent
116
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
 
118
  class WebCrawlAgent(Agent):
119
  """
@@ -147,16 +184,22 @@ class WebCrawlAgent(Agent):
147
  batch_result = await batch_crawl_tool.func(urls)
148
 
149
  # 2. Analyze Result (Simple Heuristic)
 
 
 
150
  # If result contains many "Error" or is very short, we might need adaptive
151
- if "Error" not in batch_result and len(batch_result) > 500:
152
- return f"**Model: Custom Logic**\n## Crawled Content Analysis\n\n{batch_result}"
153
 
154
  # 3. Fallback to Adaptive (if batch failed significantly)
155
  logger.info("⚠️ Batch crawl had issues. Falling back to Adaptive Crawl for first URL...")
156
  # For simplicity in this custom agent, we just try the first URL adaptively as a fallback
157
  adaptive_result = await adaptive_tool.func(urls[0], query="dependency conflicts version requirements")
158
 
159
- return f"**Model: Custom Logic (Adaptive Fallback)**\n## Crawled Content Analysis\n\n{adaptive_result}"
 
 
 
160
 
161
  def create_web_crawl_agent():
162
  """
@@ -202,64 +245,29 @@ def create_code_surgeon_agent():
202
  - Clear explanation of the issue
203
  - Updated requirements.txt content
204
  - Migration notes (if breaking changes exist)
205
- """
206
- )
207
- logger.info("βœ… Code Surgeon agent created")
208
- return agent
209
-
210
-
211
- def create_verification_agent():
212
- """
213
- Creates the Verification agent that checks the Code Surgeon's work.
214
- """
215
- agent = Agent(
216
- name="Verification_Agent",
217
- model=get_model(),
218
- tools=[validate_tool, save_context_tool],
219
- description="Quality Assurance specialist for dependency files",
220
- instruction="""
221
- You are the "Quality Assurance Specialist".
222
 
223
- YOUR TASK:
224
- 1. Review the 'requirements.txt' content generated by the Code Surgeon.
225
- 2. Use the `validate_requirements` tool to check for syntax errors.
226
- 3. If the tool returns "SUCCESS":
227
- - Call `save_context('verification_status', 'SUCCESS')`.
228
- - Respond with "Verification Passed".
229
- 4. If the tool returns errors:
230
- - Call `save_context('verification_status', 'FAILED')`.
231
- - Explain the errors to the Code Surgeon so they can fix it.
232
  """
233
  )
234
- logger.info("βœ… Verification agent created")
235
  return agent
236
 
237
 
238
- class StopCheckerAgent(BaseAgent):
239
- """
240
- Agent that checks the verification status and stops the loop if successful.
241
- """
242
- async def _run_async_impl(self, ctx):
243
- # Retrieve status from session state
244
- status = ctx.session.state.get("verification_status", "FAILED")
245
- logger.info(f"πŸ›‘ StopChecker: Status is {status}")
246
-
247
- should_stop = (status == "SUCCESS")
248
- if should_stop:
249
- logger.info("πŸ›‘ StopChecker: Escalating to stop loop.")
250
-
251
- # Yield an event with escalate=True if we should stop
252
- yield Event(author=self.name, actions=EventActions(escalate=should_stop))
253
-
254
 
255
  # ===== MEMORY CALLBACK =====
256
  async def auto_save_to_memory(callback_context):
257
  """Automatically save session to memory after each agent turn."""
258
  try:
259
- await callback_context._invocation_context.memory_service.add_session_to_memory(
 
260
  callback_context._invocation_context.session
261
  )
262
- logger.info("πŸ’Ύ Session automatically saved to memory.")
263
  except Exception as e:
264
  logger.error(f"❌ Failed to auto-save session: {e}")
265
 
@@ -274,12 +282,13 @@ def create_root_agent():
274
 
275
  docs_search = create_docs_search_agent()
276
  community_search = create_community_search_agent()
 
277
 
278
  # Parallel Research
279
  parallel_search = ParallelAgent(
280
  name="Parallel_Search_Team",
281
- sub_agents=[docs_search, community_search],
282
- description="Parallel search for official and community resources"
283
  )
284
 
285
  # Group Research Team
@@ -292,22 +301,13 @@ def create_root_agent():
292
  web_crawl = create_web_crawl_agent()
293
  web_crawl = create_web_crawl_agent()
294
 
295
- # Code Surgeon Loop
296
  code_surgeon = create_code_surgeon_agent()
297
- verification = create_verification_agent()
298
- stop_checker = StopCheckerAgent(name="Stop_Checker")
299
-
300
- code_surgeon_team = LoopAgent(
301
- name="Code_Surgeon_Team",
302
- sub_agents=[code_surgeon, verification, stop_checker],
303
- max_iterations=3,
304
- description="Self-correcting dependency resolution team"
305
- )
306
 
307
  # Create the sequential agent
308
  agent = SequentialAgent(
309
  name="Package_Conflict_Resolver_Root_Agent",
310
- sub_agents=[web_research_team, web_crawl, code_surgeon_team],
311
  description="Root agent managing the dependency resolution pipeline",
312
  after_agent_callback=auto_save_to_memory # Auto-save history
313
  )
@@ -317,3 +317,7 @@ def create_root_agent():
317
 
318
  # ===== MODULE-LEVEL INITIALIZATION FOR ADK WEB =====
319
  root_agent = create_root_agent()
 
 
 
 
 
2
  Agent definitions for the AI-Powered Package Conflict Resolver.
3
  Defines Query Creator, Web Search, Web Crawl, and CodeSurgeon agents.
4
  """
5
+ import sys
6
+ import asyncio
7
+ import json
8
+
9
+ # Fix for Playwright on Windows (NotImplementedError in subprocess)
10
+ if sys.platform == 'win32':
11
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
12
+
13
  from google.adk import Agent
14
+ from google.adk.agents import SequentialAgent, ParallelAgent
15
+ # from google.adk.events import Event, EventActions # Unused after removing loop
16
  from google.adk.tools import google_search, load_memory
17
  from .config import get_model, get_gemini_model
18
+ from .tools import batch_tool, adaptive_tool, save_context_tool, retrieve_context_tool, submit_queries_tool, validate_tool, retrieve_memory_tool
19
  from .utils import logger
20
 
21
 
 
27
  agent = Agent(
28
  name="Query_Creator_Agent",
29
  model=get_gemini_model(),
30
+ tools=[google_search, retrieve_memory_tool], # Added retrieve_memory_tool
31
  description="Dependency Detective specialized in diagnosing Python environment conflicts",
32
  instruction="""
33
  You are the "Dependency Detective," an expert AI agent specialized in diagnosing Python environment conflicts, legacy code rot, and version mismatch errors.
34
  Use Google Search Tool if You don't Know about those issue or packages.
35
+ Use `retrieve_memory` to recall details from previous conversations if the user refers to "last time" or "previous error".
36
 
37
  YOUR GOAL:
38
  1. Analyze the input to identify the specific packages involved (e.g., "tensorflow", "numpy").
 
122
  logger.info("βœ… Community Search agent created")
123
  return agent
124
 
125
+ def create_context_search_agent():
126
+ """
127
+ Creates the Context Search agent (General Context).
128
+ """
129
+ agent = Agent(
130
+ name="Context_Search_Agent",
131
+ model=get_gemini_model(),
132
+ tools=[google_search],
133
+ description="Search agent focused on general context and main URL",
134
+ instruction="""
135
+ You are the "Context Researcher".
136
+
137
+ YOUR GOAL:
138
+ 1. Analyze the input search queries to identify the "Main Topic" or "Core Library/Framework" (e.g., if input is "numpy float error", main topic is "numpy").
139
+ 2. Search for the Home Page, Main Documentation Hub, or Wikipedia page for this Main Topic.
140
+ 3. Provide the top 3-4 most authoritative URLs for this topic.
141
+
142
+ INPUT: List of search queries.
143
+ OUTPUT: Top 3-4 most relevant URLs.
144
+
145
+ OUTPUT FORMAT:
146
+ **Model: Gemini 2.5 Pro**
147
+ ## Context Results
148
+ {"top_urls": ["url1", "url2", "url3"]}
149
+ """
150
+ )
151
+ logger.info("βœ… Context Search agent created")
152
+ return agent
153
+
154
 
155
  class WebCrawlAgent(Agent):
156
  """
 
184
  batch_result = await batch_crawl_tool.func(urls)
185
 
186
  # 2. Analyze Result (Simple Heuristic)
187
+ # Check if we got valid content
188
+ content = batch_result.get("combined_content", "")
189
+
190
  # If result contains many "Error" or is very short, we might need adaptive
191
+ if "Error" not in content and len(content) > 500:
192
+ return f"**Model: Custom Logic**\n## Crawled Content Analysis\n\n{content}"
193
 
194
  # 3. Fallback to Adaptive (if batch failed significantly)
195
  logger.info("⚠️ Batch crawl had issues. Falling back to Adaptive Crawl for first URL...")
196
  # For simplicity in this custom agent, we just try the first URL adaptively as a fallback
197
  adaptive_result = await adaptive_tool.func(urls[0], query="dependency conflicts version requirements")
198
 
199
+ # Format adaptive result (it's a dict)
200
+ formatted_adaptive = json.dumps(adaptive_result, indent=2) if isinstance(adaptive_result, dict) else str(adaptive_result)
201
+
202
+ return f"**Model: Custom Logic (Adaptive Fallback)**\n## Crawled Content Analysis\n\n{formatted_adaptive}"
203
 
204
  def create_web_crawl_agent():
205
  """
 
245
  - Clear explanation of the issue
246
  - Updated requirements.txt content
247
  - Migration notes (if breaking changes exist)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
+ IMPORTANT:
250
+ - Call `save_context('solution', 'YOUR_SOLUTION_SUMMARY')` to store the final resolution.
251
+ - Call `save_context('requirements', 'YOUR_REQUIREMENTS_CONTENT')` to store the file content.
 
 
 
 
 
 
252
  """
253
  )
254
+ logger.info("βœ… Code Surgeon agent created")
255
  return agent
256
 
257
 
258
+ # ===== MEMORY SERVICE =====
259
+ from .config import get_memory_service
260
+ global_memory_service = get_memory_service()
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
  # ===== MEMORY CALLBACK =====
263
  async def auto_save_to_memory(callback_context):
264
  """Automatically save session to memory after each agent turn."""
265
  try:
266
+ # Use global memory service instead of context-bound one
267
+ await global_memory_service.add_session_to_memory(
268
  callback_context._invocation_context.session
269
  )
270
+ logger.info("πŸ’Ύ Session automatically saved to memory (Global Service).")
271
  except Exception as e:
272
  logger.error(f"❌ Failed to auto-save session: {e}")
273
 
 
282
 
283
  docs_search = create_docs_search_agent()
284
  community_search = create_community_search_agent()
285
+ context_search = create_context_search_agent()
286
 
287
  # Parallel Research
288
  parallel_search = ParallelAgent(
289
  name="Parallel_Search_Team",
290
+ sub_agents=[docs_search, community_search, context_search],
291
+ description="Parallel search for official, community, and general context resources"
292
  )
293
 
294
  # Group Research Team
 
301
  web_crawl = create_web_crawl_agent()
302
  web_crawl = create_web_crawl_agent()
303
 
304
+ # Code Surgeon (No Loop)
305
  code_surgeon = create_code_surgeon_agent()
 
 
 
 
 
 
 
 
 
306
 
307
  # Create the sequential agent
308
  agent = SequentialAgent(
309
  name="Package_Conflict_Resolver_Root_Agent",
310
+ sub_agents=[web_research_team, web_crawl, code_surgeon],
311
  description="Root agent managing the dependency resolution pipeline",
312
  after_agent_callback=auto_save_to_memory # Auto-save history
313
  )
 
317
 
318
  # ===== MODULE-LEVEL INITIALIZATION FOR ADK WEB =====
319
  root_agent = create_root_agent()
320
+
321
+ # Removed App definition to avoid ImportError.
322
+ # Memory is handled via global_memory_service in callback.
323
+ agent = root_agent
src/app.py CHANGED
@@ -6,11 +6,14 @@ from google.adk import App
6
  from google.adk.types import EventsCompactionConfig
7
  from .agents import root_agent
8
  from .utils import logger
 
9
 
10
- # Define the App with Events Compaction
11
  package_conflict_resolver_app = App(
12
  name="Package_Conflict_Resolver_App",
13
  root_agent=root_agent,
 
 
14
  events_compaction_config=EventsCompactionConfig(
15
  compaction_interval=3, # Trigger compaction every 3 invocations
16
  overlap_size=1, # Keep 1 previous turn for context
 
6
  from google.adk.types import EventsCompactionConfig
7
  from .agents import root_agent
8
  from .utils import logger
9
+ from .config import get_memory_service, get_session_service
10
 
11
+ # Define the App with Events Compaction and Custom Services
12
  package_conflict_resolver_app = App(
13
  name="Package_Conflict_Resolver_App",
14
  root_agent=root_agent,
15
+ memory_service=get_memory_service(),
16
+ session_service=get_session_service(),
17
  events_compaction_config=EventsCompactionConfig(
18
  compaction_interval=3, # Trigger compaction every 3 invocations
19
  overlap_size=1, # Keep 1 previous turn for context
src/config.py CHANGED
@@ -57,7 +57,8 @@ def get_session_service(db_url=None):
57
  """
58
  # Prioritize argument, then env var, then local default
59
  if not db_url:
60
- db_url = os.getenv("DATABASE_URL", "sqlite+aiosqlite:///package_conflict_resolver.db")
 
61
 
62
  session_service = DatabaseSessionService(db_url=db_url)
63
  logger.info(f"βœ… Session service initialized: {db_url.split('://')[0]}://...") # Log safe URL
@@ -74,6 +75,7 @@ def get_memory_service():
74
  Uses Pinecone if PINECONE_API_KEY is set, otherwise InMemory.
75
  """
76
  pinecone_key = os.getenv("PINECONE_API_KEY")
 
77
 
78
  if pinecone_key:
79
  try:
 
57
  """
58
  # Prioritize argument, then env var, then local default
59
  if not db_url:
60
+ # Use legacy_solver.db as it contains the existing sessions
61
+ db_url = os.getenv("DATABASE_URL", "sqlite+aiosqlite:///legacy_solver.db")
62
 
63
  session_service = DatabaseSessionService(db_url=db_url)
64
  logger.info(f"βœ… Session service initialized: {db_url.split('://')[0]}://...") # Log safe URL
 
75
  Uses Pinecone if PINECONE_API_KEY is set, otherwise InMemory.
76
  """
77
  pinecone_key = os.getenv("PINECONE_API_KEY")
78
+ logger.info(f"πŸ” Checking PINECONE_API_KEY: {'Found' if pinecone_key else 'Missing'}")
79
 
80
  if pinecone_key:
81
  try:
src/demo issue.json ADDED
The diff for this file is too large to render. See raw diff
 
src/evalset169426.evalset.json CHANGED
The diff for this file is too large to render. See raw diff
 
src/memory.py CHANGED
@@ -1,12 +1,13 @@
1
  import os
2
  import uuid
3
  from typing import List, Dict, Any
4
- from google.adk.memory import MemoryService
 
5
  from pinecone import Pinecone, ServerlessSpec
6
  from sentence_transformers import SentenceTransformer
7
  from .utils import logger
8
 
9
- class PineconeMemoryService(MemoryService):
10
  """
11
  Custom Memory Service using Pinecone for long-term vector storage.
12
  Uses 'all-MiniLM-L6-v2' for local embedding generation.
@@ -32,8 +33,10 @@ class PineconeMemoryService(MemoryService):
32
  self.index = self.pc.Index(self.index_name)
33
 
34
  # Initialize Embedding Model
35
- logger.info("🧠 Loading embedding model: all-MiniLM-L6-v2...")
 
36
  self.model = SentenceTransformer('all-MiniLM-L6-v2')
 
37
  logger.info("βœ… Pinecone Memory Service initialized")
38
 
39
  async def add_session_to_memory(self, session: Any):
@@ -41,14 +44,37 @@ class PineconeMemoryService(MemoryService):
41
  Embeds the session history and saves it to Pinecone.
42
  """
43
  try:
 
 
 
 
 
 
 
44
  # 1. Convert session to text
45
  # Assuming session has a 'history' or we can iterate turns
46
  # We'll construct a simplified text representation
47
  text_content = ""
48
- for turn in session.turns:
49
- text_content += f"{turn.role}: {turn.content}\n"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  if not text_content.strip():
 
52
  return
53
 
54
  # 2. Generate Embedding
@@ -56,15 +82,15 @@ class PineconeMemoryService(MemoryService):
56
 
57
  # 3. Create Metadata
58
  metadata = {
59
- "session_id": session.session_id,
60
  "text": text_content[:1000], # Store snippet (limit size)
61
  "timestamp": str(session.created_at) if hasattr(session, 'created_at') else ""
62
  }
63
 
64
  # 4. Upsert to Pinecone
65
  # Use session_id as vector ID
66
- self.index.upsert(vectors=[(session.session_id, vector, metadata)])
67
- logger.info(f"πŸ’Ύ Saved session {session.session_id} to Pinecone")
68
 
69
  except Exception as e:
70
  logger.error(f"❌ Failed to save to Pinecone: {e}")
 
1
  import os
2
  import uuid
3
  from typing import List, Dict, Any
4
+ from typing import List, Dict, Any
5
+ # from google.adk.memory import MemoryService # Not available in this version
6
  from pinecone import Pinecone, ServerlessSpec
7
  from sentence_transformers import SentenceTransformer
8
  from .utils import logger
9
 
10
+ class PineconeMemoryService: # Removed inheritance to avoid ImportError
11
  """
12
  Custom Memory Service using Pinecone for long-term vector storage.
13
  Uses 'all-MiniLM-L6-v2' for local embedding generation.
 
33
  self.index = self.pc.Index(self.index_name)
34
 
35
  # Initialize Embedding Model
36
+ logger.info("🧠 Loading embedding model: all-MiniLM-L6-v2... (This may take a while if downloading)")
37
+ print("DEBUG: Starting SentenceTransformer load...")
38
  self.model = SentenceTransformer('all-MiniLM-L6-v2')
39
+ print("DEBUG: SentenceTransformer loaded.")
40
  logger.info("βœ… Pinecone Memory Service initialized")
41
 
42
  async def add_session_to_memory(self, session: Any):
 
44
  Embeds the session history and saves it to Pinecone.
45
  """
46
  try:
47
+ # Get session ID safely (ADK sessions usually use .id)
48
+ session_id = getattr(session, 'id', getattr(session, 'session_id', 'UNKNOWN'))
49
+
50
+ logger.info(f"πŸ’Ύ Attempting to save session to Pinecone. Session ID: {session_id}")
51
+ # Debug session structure
52
+ # logger.info(f"Session dir: {dir(session)}")
53
+
54
  # 1. Convert session to text
55
  # Assuming session has a 'history' or we can iterate turns
56
  # We'll construct a simplified text representation
57
  text_content = ""
58
+
59
+ # Check for 'turns' or 'events'
60
+ if hasattr(session, 'turns'):
61
+ turns = session.turns
62
+ logger.info(f"Found {len(turns)} turns.")
63
+ for turn in turns:
64
+ text_content += f"{turn.role}: {turn.content}\n"
65
+ elif hasattr(session, 'events'):
66
+ events = session.events
67
+ logger.info(f"Found {len(events)} events.")
68
+ for event in events:
69
+ # Event structure might vary
70
+ author = getattr(event, 'author', 'unknown')
71
+ content = getattr(event, 'content', getattr(event, 'text', ''))
72
+ text_content += f"{author}: {content}\n"
73
+ else:
74
+ logger.warning("⚠️ Session has no 'turns' or 'events' attribute.")
75
 
76
  if not text_content.strip():
77
+ logger.warning("⚠️ Session content is empty. Skipping Pinecone save.")
78
  return
79
 
80
  # 2. Generate Embedding
 
82
 
83
  # 3. Create Metadata
84
  metadata = {
85
+ "session_id": session_id,
86
  "text": text_content[:1000], # Store snippet (limit size)
87
  "timestamp": str(session.created_at) if hasattr(session, 'created_at') else ""
88
  }
89
 
90
  # 4. Upsert to Pinecone
91
  # Use session_id as vector ID
92
+ self.index.upsert(vectors=[(session_id, vector, metadata)])
93
+ logger.info(f"πŸ’Ύ Saved session {session_id} to Pinecone")
94
 
95
  except Exception as e:
96
  logger.error(f"❌ Failed to save to Pinecone: {e}")
src/test1.evalset.json CHANGED
The diff for this file is too large to render. See raw diff
 
src/tools.py CHANGED
@@ -2,161 +2,189 @@
2
  Tool definitions for the Legacy Dependency Solver.
3
  Includes Crawl4AI batch crawler for efficient multi-URL processing.
4
  """
5
- from typing import List
6
- from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, AdaptiveConfig
 
 
 
 
7
 
8
  from google.adk.tools import FunctionTool
9
  from .utils import logger
 
10
 
 
 
 
 
 
11
 
12
- async def batch_crawl_tool(urls: List[str]) -> str:
 
 
13
  """
14
- Batch crawls multiple URLs to extract technical documentation.
15
-
16
- Args:
17
- urls: List of URLs to crawl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- Returns:
20
- Combined markdown content from all URLs
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  """
22
- logger.info(f"πŸ•·οΈ Batch crawling {len(urls)} URLs...")
23
-
24
- # Configure browser with headless mode and disable SSL verification
25
- browser_config = BrowserConfig(
26
- headless=True,
27
- verbose=True
28
- )
29
-
30
- # Configure crawler to bypass cache
31
- run_config = CrawlerRunConfig(
32
- cache_mode=CacheMode.BYPASS,
33
- word_count_threshold=10,
34
- )
35
-
36
- results = []
37
-
38
- async with AsyncWebCrawler(config=browser_config) as crawler:
39
- for url in urls:
40
- logger.info(f" πŸ“„ Crawling: {url}")
41
- try:
42
- result = await crawler.arun(url=url, config=run_config)
43
- if result.success:
44
- results.append(f"# Content from {url}\n\n{result.markdown}\n\n")
45
- logger.info(f" βœ… Success: {url}")
46
- else:
47
- error_msg = f"Error crawling {url}: {result.error_message}"
48
- results.append(f"# {error_msg}\n\n")
49
- logger.warning(f" ❌ Failed: {url} - {result.error_message}")
50
- except Exception as e:
51
- error_msg = f"Exception crawling {url}: {str(e)}"
52
- results.append(f"# {error_msg}\n\n")
53
- logger.error(f" ⚠️ Exception: {url} - {e}")
54
-
55
- combined = "\n".join(results)
56
- logger.info(f"βœ… Batch crawl completed: {len(results)} results")
57
- return combined
58
-
59
-
60
- # ===== ADAPTIVE CRAWLING (COMMENTED OUT - NOT CURRENTLY USED) =====
61
- # Keeping this code for potential future use
62
- #
63
- #
64
- async def adaptive_crawl_tool(url: str, query: str) -> str:
65
  """
66
- Adaptive crawl for single URLs when batch crawl needs deeper analysis.
67
-
68
- Args:
69
- url: The URL to crawl
70
- query: The specific query/topic to look for
71
 
72
- Returns:
73
- Extracted markdown content or error message
74
- """
75
- logger.info(f"πŸ” Adaptive crawling: {url} for '{query}'")
76
-
77
- browser_config = BrowserConfig(
78
- headless=True,
79
- verbose=True,
80
- ignore_https_errors=True,
81
- extra_args=["--ignore-certificate-errors", "--ignore-ssl-errors"]
82
- )
83
-
84
- # Adaptive config for discovery
85
- adaptive_config = AdaptiveConfig(
86
- max_pages=3,
87
- confidence_threshold=0.7,
88
- top_k_links=2,
89
- )
90
-
91
- async with AsyncWebCrawler(config=browser_config) as crawler:
92
- # We need to use the adaptive crawler wrapper or logic if available in this version of crawl4ai
93
- # Based on reference code, it uses AdaptiveCrawler
94
- from crawl4ai import AdaptiveCrawler
95
 
96
- adaptive = AdaptiveCrawler(crawler, config=adaptive_config)
 
 
 
 
 
97
 
98
- try:
99
- # Discovery
100
- await adaptive.digest(start_url=url, query=query)
 
 
 
 
101
 
 
 
 
 
 
 
 
 
 
102
  top_content = adaptive.get_relevant_content(top_k=1)
103
  if not top_content:
104
- return "No relevant content found via adaptive crawling."
105
 
106
  best_url = top_content[0]['url']
107
- logger.info(f" βœ… Best source found: {best_url}")
108
 
109
- # Extraction (simplified to just return markdown for now to avoid complex LLM config in tool)
110
- # The reference uses LLMExtractionStrategy, but we can rely on the Agent to parse the markdown.
111
- # We'll just crawl the best URL found.
 
 
112
 
113
- run_config = CrawlerRunConfig(
114
  cache_mode=CacheMode.BYPASS,
115
- word_count_threshold=10,
 
 
 
 
 
 
 
116
  )
117
 
118
- result = await crawler.arun(url=best_url, config=run_config)
119
- if result.success:
120
- return f"# Adaptive Result from {best_url}\n\n{result.markdown}"
121
- else:
122
- return f"Error crawling best url {best_url}: {result.error_message}"
123
-
124
- except Exception as e:
125
- logger.error(f" ❌ Adaptive crawl failed: {e}")
126
- return f"Adaptive crawl failed: {str(e)}"
127
 
128
- adaptive_tool = FunctionTool(adaptive_crawl_tool)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
 
132
- # Wrap tool for ADK Agent usage
133
  batch_tool = FunctionTool(batch_crawl_tool)
 
134
 
135
 
136
  # ===== STATE MANAGEMENT TOOLS =====
137
- from typing import Dict, Any
138
  from google.adk.tools import ToolContext
139
 
140
  def save_context(tool_context: ToolContext, key: str, value: str) -> str:
141
- """
142
- Saves a key-value pair to the session state.
143
- Useful for remembering packages, versions, or decisions across agents.
144
-
145
- Args:
146
- key: The key to store (e.g., 'packages', 'versions')
147
- value: The value to store
148
- """
149
  tool_context.state[key] = value
150
  logger.info(f"πŸ’Ύ State Saved: {key} = {value}")
151
  return f"Saved {key} to state."
152
 
153
  def retrieve_context(tool_context: ToolContext, key: str) -> str:
154
- """
155
- Retrieves a value from the session state.
156
-
157
- Args:
158
- key: The key to retrieve
159
- """
160
  value = tool_context.state.get(key, "Not found")
161
  logger.info(f"πŸ“‚ State Retrieved: {key} = {value}")
162
  return str(value)
@@ -165,12 +193,6 @@ save_context_tool = FunctionTool(save_context)
165
  retrieve_context_tool = FunctionTool(retrieve_context)
166
 
167
  def submit_queries(tool_context: ToolContext, queries: List[str]) -> str:
168
- """
169
- Submits the generated search queries to the shared session state.
170
-
171
- Args:
172
- queries: The list of search queries to submit.
173
- """
174
  tool_context.state['search_queries'] = queries
175
  logger.info(f"πŸš€ Queries Submitted: {queries}")
176
  return "Queries submitted successfully."
@@ -178,35 +200,45 @@ def submit_queries(tool_context: ToolContext, queries: List[str]) -> str:
178
  submit_queries_tool = FunctionTool(submit_queries)
179
 
180
  def validate_requirements(tool_context: ToolContext, requirements_content: str) -> str:
181
- """
182
- Validates the generated requirements.txt content.
183
- Checks for basic syntax and conflicting versions (mocked logic).
184
-
185
- Args:
186
- requirements_content: The content of the requirements.txt file.
187
- """
188
  if not requirements_content:
189
  return "Error: Empty requirements content."
190
-
191
  lines = requirements_content.strip().split('\n')
192
  errors = []
193
-
194
  for line in lines:
195
  line = line.strip()
196
  if not line or line.startswith('#'):
197
  continue
198
-
199
- # Basic syntax check (package==version)
200
  import re
201
  if not re.match(r'^[a-zA-Z0-9_\-]+[=<>!~]+[0-9a-zA-Z\.]+', line):
202
- # Allow simple package names too, but warn
203
  if not re.match(r'^[a-zA-Z0-9_\-]+$', line):
204
  errors.append(f"Invalid syntax: {line}")
205
-
206
  if errors:
207
  return f"Validation Failed: {'; '.join(errors)}"
208
-
209
  logger.info("βœ… Requirements validation passed.")
210
  return "SUCCESS"
211
 
212
  validate_tool = FunctionTool(validate_requirements)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  Tool definitions for the Legacy Dependency Solver.
3
  Includes Crawl4AI batch crawler for efficient multi-URL processing.
4
  """
5
+ from typing import List, Dict, Any
6
+ import json
7
+ import sys
8
+ import asyncio
9
+ import concurrent.futures
10
+ from pydantic import BaseModel, Field
11
 
12
  from google.adk.tools import FunctionTool
13
  from .utils import logger
14
+ from .config import get_memory_service # Import memory service factory
15
 
16
+ # --- 1. Define Schema (Module level for pickling) ---
17
+ class SearchResult(BaseModel):
18
+ relevant_facts: List[str] = Field(..., description="Specific facts/numbers found.")
19
+ summary: str = Field(..., description="Concise summary related to the query.")
20
+ confidence: str = Field(..., description="Confidence level (High/Medium/Low).")
21
 
22
+ # --- 2. Worker Functions (Run in Subprocess) ---
23
+
24
+ def _run_batch_crawl_worker(urls: List[str]) -> Dict[str, Any]:
25
  """
26
+ Worker function to run batch crawl in a separate process.
27
+ """
28
+ # Enforce ProactorEventLoop on Windows for Playwright
29
+ if sys.platform == 'win32':
30
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
31
+
32
+ async def _async_logic():
33
+ from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
34
+
35
+ # Shared Config
36
+ browser_config = BrowserConfig(
37
+ headless=True,
38
+ ignore_https_errors=True,
39
+ extra_args=["--ignore-certificate-errors", "--ignore-ssl-errors"]
40
+ )
41
+ run_config = CrawlerRunConfig(
42
+ cache_mode=CacheMode.BYPASS,
43
+ word_count_threshold=10,
44
+ )
45
 
46
+ results = []
47
+ # limit to top 3
48
+ target_urls = urls[:3]
49
+
50
+ async with AsyncWebCrawler(config=browser_config) as crawler:
51
+ for url in target_urls:
52
+ try:
53
+ crawl_result = await crawler.arun(url=url, config=run_config)
54
+ if crawl_result.success:
55
+ results.append(f"--- SOURCE: {url} ---\n{crawl_result.markdown[:15000]}\n")
56
+ else:
57
+ results.append(f"--- SOURCE: {url} ---\n[Error: Failed to crawl]\n")
58
+ except Exception as e:
59
+ results.append(f"--- SOURCE: {url} ---\n[Exception: {str(e)}]\n")
60
+
61
+ return {
62
+ "combined_content": "\n".join(results),
63
+ "status": "completed"
64
+ }
65
+
66
+ return asyncio.run(_async_logic())
67
+
68
+
69
+ def _run_adaptive_crawl_worker(start_url: str, user_query: str) -> Dict[str, Any]:
70
  """
71
+ Worker function to run adaptive crawl in a separate process.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  """
73
+ if sys.platform == 'win32':
74
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
 
 
 
75
 
76
+ async def _async_logic():
77
+ from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, AdaptiveConfig, LLMConfig
78
+ from crawl4ai.extraction_strategy import LLMExtractionStrategy
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
79
 
80
+ browser_config = BrowserConfig(
81
+ headless=True,
82
+ verbose=True,
83
+ ignore_https_errors=True,
84
+ extra_args=["--ignore-certificate-errors", "--ignore-ssl-errors"]
85
+ )
86
 
87
+ async with AsyncWebCrawler(config=browser_config) as crawler:
88
+ # Phase 1: Discovery
89
+ adaptive_config = AdaptiveConfig(
90
+ max_pages=3,
91
+ confidence_threshold=0.7,
92
+ top_k_links=2,
93
+ )
94
 
95
+ # Import inside function to avoid top-level import issues in subprocess if needed
96
+ from crawl4ai import AdaptiveCrawler
97
+ adaptive = AdaptiveCrawler(crawler, config=adaptive_config)
98
+
99
+ try:
100
+ await adaptive.digest(start_url=start_url, query=user_query)
101
+ except Exception as e:
102
+ return {"error": f"Crawl failed during discovery: {str(e)}"}
103
+
104
  top_content = adaptive.get_relevant_content(top_k=1)
105
  if not top_content:
106
+ return {"error": "No relevant content found via adaptive crawling."}
107
 
108
  best_url = top_content[0]['url']
 
109
 
110
+ # Phase 2: Extraction
111
+ dynamic_instruction = f"""
112
+ Extract ONLY information matching this request: '{user_query}'.
113
+ If not found, state that in the summary. Do not hallucinate.
114
+ """
115
 
116
+ extraction_config = CrawlerRunConfig(
117
  cache_mode=CacheMode.BYPASS,
118
+ word_count_threshold=1,
119
+ page_timeout=60000,
120
+ extraction_strategy=LLMExtractionStrategy(
121
+ llm_config=LLMConfig(provider="ollama/qwen2.5:7b", api_token="ollama"),
122
+ schema=SearchResult.model_json_schema(),
123
+ extraction_type="schema",
124
+ instruction=dynamic_instruction,
125
+ ),
126
  )
127
 
128
+ try:
129
+ result = await crawler.arun(url=best_url, config=extraction_config)
130
+ if result.extracted_content:
131
+ return json.loads(result.extracted_content)
132
+ return {"error": "Extraction returned empty content."}
133
+ except json.JSONDecodeError:
134
+ return {"raw_output": result.extracted_content}
135
+ except Exception as e:
136
+ return {"error": f"Extraction failed: {str(e)}"}
137
 
138
+ return asyncio.run(_async_logic())
139
+
140
+
141
+ # --- 3. Main Tools (Async Wrappers) ---
142
+
143
+ async def batch_crawl_tool(urls: List[str]) -> Dict[str, Any]:
144
+ """
145
+ Crawls a LIST of URLs in one go using a subprocess to ensure correct event loop.
146
+ """
147
+ logger.info(f"πŸš€ Batch Tool Triggered: Processing {len(urls)} URLs...")
148
+
149
+ loop = asyncio.get_running_loop()
150
+ with concurrent.futures.ProcessPoolExecutor() as pool:
151
+ try:
152
+ result = await loop.run_in_executor(pool, _run_batch_crawl_worker, urls)
153
+ return result
154
+ except Exception as e:
155
+ logger.error(f"❌ Batch crawl subprocess failed: {e}")
156
+ return {"combined_content": f"Error: {str(e)}", "status": "failed"}
157
 
158
+ async def adaptive_crawl_tool(start_url: str, user_query: str) -> Dict[str, Any]:
159
+ """
160
+ Performs adaptive crawl using a subprocess.
161
+ """
162
+ logger.info(f"πŸ› οΈ Tool Triggered: Adaptive Crawl on {start_url}")
163
+
164
+ loop = asyncio.get_running_loop()
165
+ with concurrent.futures.ProcessPoolExecutor() as pool:
166
+ try:
167
+ result = await loop.run_in_executor(pool, _run_adaptive_crawl_worker, start_url, user_query)
168
+ return result
169
+ except Exception as e:
170
+ logger.error(f"❌ Adaptive crawl subprocess failed: {e}")
171
+ return {"error": f"Subprocess failed: {str(e)}"}
172
 
173
 
174
+ # Convert to ADK Tools
175
  batch_tool = FunctionTool(batch_crawl_tool)
176
+ adaptive_tool = FunctionTool(adaptive_crawl_tool)
177
 
178
 
179
  # ===== STATE MANAGEMENT TOOLS =====
 
180
  from google.adk.tools import ToolContext
181
 
182
  def save_context(tool_context: ToolContext, key: str, value: str) -> str:
 
 
 
 
 
 
 
 
183
  tool_context.state[key] = value
184
  logger.info(f"πŸ’Ύ State Saved: {key} = {value}")
185
  return f"Saved {key} to state."
186
 
187
  def retrieve_context(tool_context: ToolContext, key: str) -> str:
 
 
 
 
 
 
188
  value = tool_context.state.get(key, "Not found")
189
  logger.info(f"πŸ“‚ State Retrieved: {key} = {value}")
190
  return str(value)
 
193
  retrieve_context_tool = FunctionTool(retrieve_context)
194
 
195
  def submit_queries(tool_context: ToolContext, queries: List[str]) -> str:
 
 
 
 
 
 
196
  tool_context.state['search_queries'] = queries
197
  logger.info(f"πŸš€ Queries Submitted: {queries}")
198
  return "Queries submitted successfully."
 
200
  submit_queries_tool = FunctionTool(submit_queries)
201
 
202
  def validate_requirements(tool_context: ToolContext, requirements_content: str) -> str:
 
 
 
 
 
 
 
203
  if not requirements_content:
204
  return "Error: Empty requirements content."
 
205
  lines = requirements_content.strip().split('\n')
206
  errors = []
 
207
  for line in lines:
208
  line = line.strip()
209
  if not line or line.startswith('#'):
210
  continue
 
 
211
  import re
212
  if not re.match(r'^[a-zA-Z0-9_\-]+[=<>!~]+[0-9a-zA-Z\.]+', line):
 
213
  if not re.match(r'^[a-zA-Z0-9_\-]+$', line):
214
  errors.append(f"Invalid syntax: {line}")
 
215
  if errors:
216
  return f"Validation Failed: {'; '.join(errors)}"
 
217
  logger.info("βœ… Requirements validation passed.")
218
  return "SUCCESS"
219
 
220
  validate_tool = FunctionTool(validate_requirements)
221
+
222
+ # ===== MEMORY RETRIEVAL TOOL =====
223
+ async def retrieve_memory(query: str) -> str:
224
+ """
225
+ Searches long-term memory (Pinecone) for relevant past sessions.
226
+ Use this to recall details from previous conversations.
227
+ """
228
+ logger.info(f"🧠 Searching Memory for: {query}")
229
+ try:
230
+ # Initialize service on demand (or use singleton if configured)
231
+ memory_service = get_memory_service()
232
+ results = await memory_service.search_memory(query)
233
+
234
+ if not results:
235
+ return "No relevant memories found."
236
+
237
+ formatted_results = "\n---\n".join(results)
238
+ return f"Found relevant memories:\n{formatted_results}"
239
+
240
+ except Exception as e:
241
+ logger.error(f"❌ Memory retrieval failed: {e}")
242
+ return f"Error retrieving memory: {str(e)}"
243
+
244
+ retrieve_memory_tool = FunctionTool(retrieve_memory)
web_app.py CHANGED
@@ -1,32 +1,29 @@
1
  """
2
- Web Interface Entry Point for ADK Web UI.
3
- Run with: adk web web_app.py --no-reload
4
  """
5
  import nest_asyncio
6
  from google.adk import Runner
7
- from src.config import get_session_service
8
- from src.agents import create_root_agent
9
- from src.utils import logger
10
 
11
- # Apply nest_asyncio to handle event loop conflicts in the web server
12
  nest_asyncio.apply()
13
 
14
- logger.info("🌐 Initializing Web Interface...")
 
 
 
 
15
 
16
- # Initialize Session Service
17
- # We use the same SQLite database as the CLI
18
- session_service = get_session_service()
19
-
20
- # Initialize the Root Agent
21
- # This is the agent that will process the web queries
22
- agent = create_root_agent()
23
-
24
- # Initialize Runner
25
- # The ADK Web UI looks for a 'runner' or 'agent' instance
26
- runner = Runner(
27
- agent=agent,
28
- app_name="package_conflict_resolver_web",
29
- session_service=session_service
30
- )
31
-
32
- logger.info("βœ… Web Interface Ready. Run 'adk web web_app.py --no-reload' to start.")
 
1
  """
2
+ Inspection script for Runner source.
 
3
  """
4
  import nest_asyncio
5
  from google.adk import Runner
6
+ import inspect
 
 
7
 
 
8
  nest_asyncio.apply()
9
 
10
+ print("Source of Runner.__init__:")
11
+ try:
12
+ print(inspect.getsource(Runner.__init__))
13
+ except Exception as e:
14
+ print(f"Error getting source: {e}")
15
 
16
+ print("\nSource of Runner properties:")
17
+ # Check if app is a property or attribute
18
+ if hasattr(Runner, 'app'):
19
+ attr = getattr(Runner, 'app')
20
+ if isinstance(attr, property):
21
+ print("Found 'app' property.")
22
+ try:
23
+ print(inspect.getsource(attr.fget))
24
+ except:
25
+ print("Could not get source of fget")
26
+ else:
27
+ print(f"'app' is {type(attr)}")
28
+ else:
29
+ print("'app' not found in Runner class dict")