Yash030 commited on
Commit
dff68cb
·
1 Parent(s): 3e9d77d

Intial Files

Browse files
Files changed (19) hide show
  1. .env.example +5 -0
  2. .gitignore +76 -0
  3. CONTRIBUTING.md +52 -0
  4. Dockerfile +37 -0
  5. KAGGLE_CAPSTONE_WRITEUP.md +121 -0
  6. LICENSE +21 -0
  7. README.md +117 -10
  8. main.py +143 -0
  9. requirements.txt +11 -0
  10. src/__init__.py +10 -0
  11. src/agent.py +1 -0
  12. src/agents.py +323 -0
  13. src/app.py +23 -0
  14. src/config.py +91 -0
  15. src/demo issue.json +0 -0
  16. src/memory.py +123 -0
  17. src/tools.py +244 -0
  18. src/utils.py +25 -0
  19. web_app.py +91 -0
.env.example ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Google API Key
2
+ # Get your key from: https://aistudio.google.com/apikey
3
+ GOOGLE_API_KEY=your_google_api_key_here
4
+ OPENROUTER_API_KEY=your_openrouter_api_key_here
5
+ PINECONE_API_KEY=your_pinecone_api_key_here
.gitignore ADDED
@@ -0,0 +1,76 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ env/
8
+ venv/
9
+ ENV/
10
+ build/
11
+ develop-eggs/
12
+ dist/
13
+ downloads/
14
+ eggs/
15
+ .eggs/
16
+ lib/
17
+ lib64/
18
+ parts/
19
+ sdist/
20
+ var/
21
+ wheels/
22
+ *.egg-info/
23
+ .installed.cfg
24
+ *.egg
25
+
26
+ # Virtual Environments
27
+ venv/
28
+ ENV/
29
+ env/
30
+ .venv
31
+
32
+ # IDE
33
+ .vscode/
34
+ .idea/
35
+ *.swp
36
+ *.swo
37
+ *~
38
+
39
+ # Environment Variables
40
+ .env
41
+
42
+ # Database
43
+ *.db
44
+ *.sqlite
45
+ *.sqlite3
46
+
47
+ # Logs
48
+ *.log
49
+
50
+ #json
51
+ *.evalset.json
52
+
53
+ # OS
54
+ .DS_Store
55
+ Thumbs.db
56
+
57
+ # Playwright browsers cache (optional - can be removed if needed)
58
+ # ms-playwright/
59
+
60
+ # Crawl4AI cache
61
+ .crawl4ai/
62
+
63
+ # Testing
64
+ .pytest_cache/
65
+ .coverage
66
+ htmlcov/
67
+
68
+ # Development/Debug files
69
+ verify_*.py
70
+ debug_*.py
71
+ inspect_*.py
72
+ test_*.py
73
+ PROJECT_RENAME_SUMMARY.md
74
+
75
+ # Old database files
76
+ legacy_solver.db
CONTRIBUTING.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing to AI-Powered Package Conflict Resolver
2
+
3
+ Thank you for your interest in contributing! 🎉
4
+
5
+ ## Getting Started
6
+
7
+ 1. Fork the repository
8
+ 2. Clone your fork: `git clone https://github.com/your-username/package_conflict_resolver.git`
9
+ 3. Create a feature branch: `git checkout -b feature/amazing-feature`
10
+ 4. Make your changes
11
+ 5. Commit your changes: `git commit -m 'Add amazing feature'`
12
+ 6. Push to the branch: `git push origin feature/amazing-feature`
13
+ 7. Open a Pull Request
14
+
15
+ ## Development Setup
16
+
17
+ ```bash
18
+ # Install dependencies
19
+ pip install -r requirements.txt
20
+
21
+ # Install browsers
22
+ crawl4ai-setup
23
+
24
+ # Set up environment
25
+ cp .env.example .env
26
+ # Add your GOOGLE_API_KEY
27
+ ```
28
+
29
+ ## Code Style
30
+
31
+ - Follow PEP 8 guidelines
32
+ - Use type hints where appropriate
33
+ - Add docstrings to functions and classes
34
+ - Keep functions focused and modular
35
+
36
+ ## Testing
37
+
38
+ Before submitting a PR:
39
+ 1. Test your changes with `python main.py`
40
+ 2. Ensure no errors in the web interface: `adk web web_app.py --no-reload`
41
+
42
+ ## Reporting Issues
43
+
44
+ When reporting issues, please include:
45
+ - Python version
46
+ - Operating system
47
+ - Error message (full stack trace)
48
+ - Steps to reproduce
49
+
50
+ ## Questions?
51
+
52
+ Feel free to open an issue for any questions or discussions!
Dockerfile ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use Python 3.11 to avoid the deprecation warning
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app
6
+
7
+ # Set environment variables
8
+ ENV PYTHONDONTWRITEBYTECODE=1 \
9
+ PYTHONUNBUFFERED=1 \
10
+ PORT=7860
11
+
12
+ # Install system dependencies (including those for Playwright/Crawl4AI if needed)
13
+ # We install basic build tools and libraries often needed by python packages
14
+ RUN apt-get update && apt-get install -y \
15
+ build-essential \
16
+ curl \
17
+ software-properties-common \
18
+ && rm -rf /var/lib/apt/lists/*
19
+
20
+ # Copy requirements first to leverage Docker cache
21
+ COPY requirements.txt .
22
+
23
+ # Install Python dependencies
24
+ RUN pip install --no-cache-dir -r requirements.txt
25
+
26
+ # Install Playwright browsers (required by crawl4ai)
27
+ RUN playwright install --with-deps chromium
28
+
29
+ # Copy the rest of the application
30
+ COPY . .
31
+
32
+ # Expose the port (Hugging Face Spaces use 7860)
33
+ EXPOSE 7860
34
+
35
+ # Run the application
36
+ # We point to 'src' because we created src/agent.py
37
+ CMD ["adk", "web", "--host", "0.0.0.0", "--port", "7860", "src"]
KAGGLE_CAPSTONE_WRITEUP.md ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Package Conflict Identifier 📦🔍
2
+ **Why "Chatting with AI" Isn't Enough for Modern Debugging**
3
+
4
+ ---
5
+
6
+ ## The Problem: "Lazy AI" vs. Real-World Bugs
7
+
8
+ We've all been there. You paste a cryptic error message into ChatGPT or Gemini, and it gives you a confident, generic answer: *"Check your syntax"* or *"Ensure your JSON is formatted correctly."*
9
+
10
+ But what if your syntax is fine? What if the error isn't in *your* code, but deep inside a library you just installed? What if it's a brand-new bug reported on GitHub only 48 hours ago?
11
+
12
+ **Static LLMs fail here because they are frozen in time.** They don't know about the bug report filed yesterday. They don't know that `library-v2.1` broke compatibility with `framework-v3.0`. They guess based on general patterns, often leading you down a rabbit hole of useless "fixes."
13
+
14
+ I built the **Package Conflict Identifier** to solve this. It doesn't just "guess"—it **investigates**.
15
+
16
+ ---
17
+
18
+ ## The "Real Web" Advantage
19
+
20
+ This isn't just a chatbot. It's an autonomous research team. When it sees an error, it doesn't rely solely on its training data. It:
21
+ 1. **Diagnoses** the specific package causing the issue.
22
+ 2. **Searches** the live web for that specific error string.
23
+ 3. **Crawls** GitHub Issues, StackOverflow, and official documentation.
24
+ 4. **Synthesizes** a solution based on *current* reality, not 2023 data.
25
+
26
+ ### Case Study: The "Ollama/LiteLLM" Bug
27
+
28
+ During development, I encountered a nasty error while trying to chain agents using **LiteLLM** and **Ollama**:
29
+
30
+ ```text
31
+ litellm.APIConnectionError: Ollama_chatException - {"error":"json: cannot unmarshal array into Go struct field ChatRequest.messages.content of type string"}
32
+ ```
33
+
34
+ #### ❌ The Generic AI Answer (ChatGPT/Gemini)
35
+ When I pasted this into a standard LLM, it said:
36
+ > *"You are sending an array instead of a string in your JSON request. Change your code to send a string."*
37
+
38
+ This was **useless**. I wasn't writing the raw JSON request; the `litellm` library was. I couldn't "just change my code."
39
+
40
+ #### ✅ The Agent's Answer
41
+ My **Package Conflict Identifier** took a different approach.
42
+ 1. **Query Creator** generated search terms: `"LiteLLM Ollama json unmarshal array error"`.
43
+ 2. **Docs Search Agent** found a specific GitHub Issue: **`BerriAI/litellm#11148`**.
44
+ 3. **Web Crawl Agent** read the issue thread and found the root cause:
45
+ > *"LiteLLM sends content as an array/object (OpenAI-style), but Ollama expects a simple string. This is a known incompatibility in LiteLLM v1.66+."*
46
+
47
+ **The Result:** Instead of wasting hours debugging my own code, the agent told me: *"This is a bug in the library. Downgrade LiteLLM or apply this specific patch."*
48
+
49
+ **This is the difference between a chatbot and an engineer.**
50
+
51
+ ---
52
+
53
+ ## System Architecture
54
+
55
+ How does it work? It uses a multi-agent pipeline to mimic a senior engineer's debugging workflow.
56
+
57
+ ```text
58
+ User Input (Error Log)
59
+ |
60
+ V
61
+ +----------------------------------+
62
+ | PHASE 1: DIAGNOSIS |
63
+ | Query Creator Agent |
64
+ | (Consults Pinecone Memory) |
65
+ +----------------------------------+
66
+ |
67
+ V
68
+ +----------------------------------+
69
+ | PHASE 2: RESEARCH |
70
+ | Parallel Research Team: |
71
+ | 1. Docs Search Agent |
72
+ | 2. Community Search Agent |
73
+ | 3. Web Crawl Agent (Firecrawl) |
74
+ +----------------------------------+
75
+ |
76
+ V
77
+ +----------------------------------+
78
+ | PHASE 3: REPAIR |
79
+ | Code Surgeon Team: |
80
+ | [Surgeon] -> [Verify] -> [Fix] |
81
+ +----------------------------------+
82
+ |
83
+ V
84
+ Output (Fixed requirements.txt)
85
+ ```
86
+
87
+ ### Detailed Component Breakdown
88
+
89
+ #### 1. Phase 1: Contextual Diagnosis (The Detective)
90
+ The entry point is the **Query Creator Agent**, powered by **Gemini 2.0 Flash Lite**. We chose Flash Lite for its speed and low latency. This agent also has access to **Pinecone Vector Memory**. Before searching the web, it queries the vector database: *"Have we seen this error before?"* This "Long-Term Memory" allows the system to get smarter over time, instantly recalling fixes for recurring issues without re-doing the research.
91
+
92
+ #### 2. Phase 2: The Parallel Research Engine (The Researchers)
93
+ Research is time-consuming. To optimize this, we use the **ParallelAgent** pattern. Two agents run simultaneously:
94
+ * **Docs Search Agent**: Uses Google Search API restricted to domains like `readthedocs.io`, `docs.python.org`, and `pypi.org`. It looks for the "official" way things should work.
95
+ * **Community Search Agent**: Restricted to `stackoverflow.com` and `github.com/issues`. It looks for the "hacky" workarounds and bug reports.
96
+
97
+ #### 3. Phase 3: Deep Web Extraction (The Crawler)
98
+ Standard search tools only give you snippets. To truly understand a bug, you need to read the code. We integrated **Firecrawl**, a specialized tool for turning websites into LLM-ready markdown. When the researchers find a promising URL (like a GitHub commit diff), the **Web Crawl Agent** (powered by **Grok** via OpenRouter) visits the page, renders the JavaScript, and extracts the raw text. Grok was chosen here for its massive context window (128k+ tokens), allowing it to ingest entire documentation pages in one go.
99
+
100
+ #### 4. Phase 4: The Self-Correcting Loop (The Surgeon)
101
+ The final phase is the **Code Surgeon**. It proposes a fix (e.g., a new `requirements.txt`). But instead of just outputting it, it enters a **Validation Loop**.
102
+ 1. **Surgeon** generates the file.
103
+ 2. **Verification Agent** (a separate model instance) acts as a "Linter." It checks: *Does this version exist? Are there obvious conflicts?*
104
+ 3. If the check fails, the Surgeon is reprimanded and forced to try again.
105
+ This "System 2 Thinking" loop significantly reduces the rate of hallucinated package versions.
106
+
107
+ ---
108
+
109
+ ## Technology Stack
110
+
111
+ * **Orchestration**: Google Agent Development Kit (ADK)
112
+ * **Reasoning**: Google Gemini 2.0 Flash Lite (Speed) & Grok (Context)
113
+ * **Web Intelligence**: Firecrawl (Deep Scraping) & Google Search API
114
+ * **Memory**: Pinecone (Long-term Vector Storage) & SQLite (Session History)
115
+
116
+ ## Conclusion
117
+
118
+ The future of coding isn't just "auto-complete." It's **auto-debug**. By giving LLMs access to the live web and structuring them into specialized agents, we can solve the complex, library-internal bugs that generic chatbots simply can't touch.
119
+
120
+ **GitHub**: [https://github.com/Yashwant00CR7/AI-Powered-Package-Conflict-Resolver]
121
+ **Built with**: Google ADK, Gemini, Grok, Firecrawl
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Legacy Dependency Solver Contributors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,10 +1,117 @@
1
- ---
2
- title: AI Package Doctor
3
- emoji: 🐠
4
- colorFrom: pink
5
- colorTo: green
6
- sdk: docker
7
- pinned: false
8
- ---
9
-
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Package Conflict Identifier 📦🔍
2
+
3
+ [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
+ [![Google ADK](https://img.shields.io/badge/Google-ADK-4285F4.svg)](https://github.com/google/adk)
6
+
7
+ > AI-powered package conflict identifier and resolver using Google's Agent Development Kit (ADK). It leverages a multi-agent architecture with Google Gemini and OpenRouter (Grok) models to diagnose dependency issues, research solutions, and generate fixed configuration files.
8
+
9
+ ## 🎯 Features
10
+
11
+ - **Advanced Multi-Agent Architecture**:
12
+ - **Context Search Agent**: Retrieves insights from past sessions using Pinecone vector memory.
13
+ - **Parallel Research Team**: Concurrent searching of Official Docs and Community forums.
14
+ - **Web Crawl Agent**: Uses **Firecrawl** (via OpenRouter) for deep web scraping of documentation.
15
+ - **Code Surgeon**: Generates and validates `requirements.txt` fixes.
16
+ - **Hybrid Model Intelligence**:
17
+ - **Google Gemini 2.0 Flash Lite**: For high-speed reasoning and orchestration.
18
+ - **Grok 4.1 Fast (via OpenRouter)**: For specialized web crawling and context analysis.
19
+ - **Persistent Memory**:
20
+ - **Short-Term**: SQLite/PostgreSQL session storage.
21
+ - **Long-Term**: Pinecone Vector Database for recalling past solutions.
22
+ - **Intelligent Tooling**:
23
+ - `retrieve_memory`: Semantic search of previous conversations.
24
+ - `google_search`: Live web search.
25
+ - `firecrawl`: Advanced web scraping.
26
+
27
+ ## 📁 Project Structure
28
+
29
+ ```
30
+ package_conflict_resolver/
31
+ ├── .env # Environment variables (API Keys)
32
+ ├── requirements.txt # Dependencies
33
+ ├── main.py # CLI Entry Point
34
+ ├── web_app.py # Web UI Entry Point (ADK Web Server)
35
+ └── src/
36
+ ├── __init__.py
37
+ ├── config.py # Configuration & Service Initialization
38
+ ├── tools.py # Custom Tools (Search, Memory, Validation)
39
+ ├── agents.py # Agent Definitions & Workflow
40
+ └── utils.py # Logging & Helpers
41
+ ```
42
+
43
+ ## 🚀 Quick Start
44
+
45
+ ### 1. Clone & Install
46
+ ```bash
47
+ git clone <your-repo-url>
48
+ cd package_conflict_resolver
49
+ pip install -r requirements.txt
50
+ ```
51
+
52
+ ### 2. Configure Environment
53
+ Create a `.env` file with your API keys:
54
+ ```env
55
+ GOOGLE_API_KEY=your_gemini_key
56
+ OPENROUTER_API_KEY=your_openrouter_key
57
+ PINECONE_API_KEY=your_pinecone_key
58
+ DATABASE_URL=sqlite+aiosqlite:///legacy_solver.db
59
+ ```
60
+
61
+ ### 3. Run the Agent
62
+
63
+ **Option A: CLI Mode (Recommended for quick tasks)**
64
+ ```bash
65
+ python main.py
66
+ ```
67
+
68
+ **Option B: Web UI (Full Experience)**
69
+ ```bash
70
+ adk web --no-reload
71
+ ```
72
+ Open [http://127.0.0.1:8000/dev-ui/](http://127.0.0.1:8000/dev-ui/) to interact with the agent visually and view chat history.
73
+
74
+ ## 🤖 Agent Workflow
75
+
76
+ 1. **Query Creator Agent**:
77
+ - Analyzes the user's error message.
78
+ - Uses `retrieve_memory` to check if this issue was solved before.
79
+ - Generates search queries for the research team.
80
+
81
+ 2. **Context Search Agent**:
82
+ - Specifically looks for relevant context in the project's long-term memory.
83
+
84
+ 3. **Parallel Research Team**:
85
+ - **Docs Search Agent**: Searches official documentation.
86
+ - **Community Search Agent**: Searches StackOverflow/GitHub.
87
+ - **Web Crawl Agent**: Deep crawls specific documentation pages using Firecrawl.
88
+
89
+ 4. **Code Surgeon**:
90
+ - Synthesizes all gathered information.
91
+ - Generates a corrected `requirements.txt` or solution plan.
92
+
93
+ ## ☁️ Deployment & Persistence
94
+
95
+ ### Database
96
+ For production (e.g., Hugging Face Spaces), use a PostgreSQL database:
97
+ ```env
98
+ DATABASE_URL=postgresql+asyncpg://user:password@host/dbname
99
+ ```
100
+
101
+ ### Long-Term Memory (Pinecone)
102
+ To enable persistent memory across restarts:
103
+ 1. Get a free API key from [Pinecone.io](https://www.pinecone.io).
104
+ 2. Set `PINECONE_API_KEY` in `.env`.
105
+ 3. The agent will automatically index and retrieve past sessions.
106
+
107
+ ## 📝 License
108
+
109
+ MIT License.
110
+
111
+ ## 🙏 Credits
112
+
113
+ Built with:
114
+ - [Google Agent Development Kit (ADK)](https://github.com/google/adk)
115
+ - [Google Gemini](https://deepmind.google/technologies/gemini/)
116
+ - [OpenRouter](https://openrouter.ai/)
117
+ - [Pinecone](https://www.pinecone.io/)
main.py ADDED
@@ -0,0 +1,143 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Main entry point for the AI-Powered Package Conflict Resolver.
3
+ Initializes and runs the agent with a test query.
4
+ """
5
+ import asyncio
6
+ import os
7
+ import nest_asyncio
8
+ from google.adk import Runner
9
+ from google.genai import types
10
+ from src.config import get_session_service
11
+ from src.agents import create_root_agent
12
+ from src.utils import logger
13
+
14
+ # Apply nest_asyncio to handle event loop conflicts
15
+ nest_asyncio.apply()
16
+
17
+
18
+ async def run_session(runner, user_input: str, session_id: str):
19
+ """
20
+ Runs an agent session with the given input.
21
+
22
+ Args:
23
+ runner: The Runner instance
24
+ user_input: User's query/request
25
+ session_id: Session identifier for state tracking
26
+ """
27
+ logger.info(f"🚀 Starting session: {session_id}")
28
+ logger.info(f"📝 User input: {user_input}")
29
+
30
+ # Create structured message
31
+ user_msg = types.Content(
32
+ role="user",
33
+ parts=[types.Part.from_text(text=user_input)]
34
+ )
35
+
36
+ # Run the agent
37
+ response_generator = runner.run(
38
+ session_id=session_id,
39
+ user_id="default_user",
40
+ new_message=user_msg
41
+ )
42
+
43
+ # Collect and display response
44
+ full_response = ""
45
+ print("\n🤖 Agent Response:\n")
46
+ for event in response_generator:
47
+ # ADK events have .content.parts structure
48
+ if hasattr(event, 'content') and event.content and hasattr(event.content, 'parts'):
49
+ if event.content.parts:
50
+ text = event.content.parts[0].text
51
+ # Filter out empty or "None" responses
52
+ if text and text != "None":
53
+ print(text, end='', flush=True)
54
+ full_response += text
55
+ # Fallback for simple text
56
+ elif hasattr(event, 'text'):
57
+ text = event.text
58
+ print(text, end='', flush=True)
59
+ full_response += text
60
+ elif isinstance(event, str):
61
+ print(event, end='', flush=True)
62
+ full_response += event
63
+
64
+ print("\n")
65
+ logger.info(f"✅ Session completed: {session_id}")
66
+ return full_response
67
+
68
+
69
+ async def main():
70
+ """Main execution function."""
71
+ logger.info("=" * 60)
72
+ logger.info("🤖 AI-Powered Package Conflict Resolver - Starting...")
73
+ logger.info("=" * 60)
74
+
75
+ # Initialize session service
76
+ session_service = get_session_service()
77
+
78
+ # Create root agent
79
+ root_agent = create_root_agent()
80
+
81
+ # Initialize runner
82
+ runner = Runner(
83
+ agent=root_agent,
84
+ app_name="package_conflict_resolver",
85
+ session_service=session_service
86
+ )
87
+ logger.info("✅ Runner initialized")
88
+
89
+ # Test query
90
+ test_query = """
91
+ I have a legacy Python project with the following dependencies in requirements.txt:
92
+
93
+ pydantic==1.10.2
94
+ fastapi==0.95.0
95
+
96
+ I'm getting deprecation warnings about regex patterns in Pydantic.
97
+ Can you help me fix this and update to compatible versions?
98
+ """
99
+
100
+ logger.info("\n" + "=" * 60)
101
+ logger.info("🧪 Running test query...")
102
+ logger.info("=" * 60 + "\n")
103
+
104
+ # Explicitly create the session first to avoid "Session not found" error
105
+ # Delete existing DB to ensure clean state
106
+ if os.path.exists("package_conflict_resolver.db"):
107
+ try:
108
+ os.remove("package_conflict_resolver.db")
109
+ logger.info("🗑️ Removed existing database file")
110
+ except Exception as e:
111
+ logger.warning(f"⚠️ Could not remove DB: {e}")
112
+
113
+ session_id = "test_session_001"
114
+ try:
115
+ # Pass app_name to ensure Runner finds it
116
+ await session_service.create_session(
117
+ session_id=session_id,
118
+ user_id="default_user",
119
+ app_name="package_conflict_resolver"
120
+ )
121
+ logger.info(f"✅ Created new session: {session_id}")
122
+ except Exception as e:
123
+ logger.warning(f"⚠️ Session creation note: {e}")
124
+
125
+ # Run the session
126
+ response = await run_session(
127
+ runner=runner,
128
+ user_input=test_query,
129
+ session_id=session_id
130
+ )
131
+
132
+ logger.info("\n" + "=" * 60)
133
+ logger.info("🎉 Test completed successfully!")
134
+ logger.info("=" * 60)
135
+
136
+
137
+ if __name__ == "__main__":
138
+ try:
139
+ asyncio.run(main())
140
+ except KeyboardInterrupt:
141
+ logger.info("\n👋 Interrupted by user")
142
+ except Exception as e:
143
+ logger.error(f"❌ Error: {e}", exc_info=True)
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ google-adk
2
+ crawl4ai
3
+ aiosqlite
4
+ sqlalchemy
5
+ nest_asyncio
6
+ python-dotenv
7
+ certifi
8
+ litellm
9
+ pinecone
10
+ sentence-transformers
11
+ uvicorn
src/__init__.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ """Legacy Dependency Solver - Modular Package"""
2
+
3
+ from .agents import root_agent
4
+ from .config import get_session_service, get_memory_service
5
+
6
+ # Initialize services for ADK to discover
7
+ session_service = get_session_service()
8
+ memory_service = get_memory_service()
9
+
10
+ __all__ = ["root_agent", "session_service", "memory_service"]
src/agent.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .agents import agent
src/agents.py ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agent definitions for the AI-Powered Package Conflict Resolver.
3
+ Defines Query Creator, Web Search, Web Crawl, and CodeSurgeon agents.
4
+ """
5
+ import sys
6
+ import asyncio
7
+ import json
8
+
9
+ # Fix for Playwright on Windows (NotImplementedError in subprocess)
10
+ if sys.platform == 'win32':
11
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
12
+
13
+ from google.adk import Agent
14
+ from google.adk.agents import SequentialAgent, ParallelAgent
15
+ # from google.adk.events import Event, EventActions # Unused after removing loop
16
+ from google.adk.tools import google_search, load_memory
17
+ from .config import get_model, get_gemini_model
18
+ from .tools import batch_tool, adaptive_tool, save_context_tool, retrieve_context_tool, submit_queries_tool, validate_tool, retrieve_memory_tool
19
+ from .utils import logger
20
+
21
+
22
+ def create_query_creator_agent():
23
+ """
24
+ Creates the Query Creator agent (Dependency Detective).
25
+ Generates search queries based on the user's problem.
26
+ """
27
+ agent = Agent(
28
+ name="Query_Creator_Agent",
29
+ model=get_gemini_model(),
30
+ tools=[google_search, retrieve_memory_tool], # Added retrieve_memory_tool
31
+ description="Dependency Detective specialized in diagnosing Python environment conflicts",
32
+ instruction="""
33
+ You are the "Dependency Detective," an expert AI agent specialized in diagnosing Python environment conflicts, legacy code rot, and version mismatch errors.
34
+ Use Google Search Tool if You don't Know about those issue or packages.
35
+ Use `retrieve_memory` to recall details from previous conversations if the user refers to "last time" or "previous error".
36
+
37
+ YOUR GOAL:
38
+ 1. Analyze the input to identify the specific packages involved (e.g., "tensorflow", "numpy").
39
+ 2. Save these package names to the session state using `save_context('packages', 'package1, package2')`.
40
+ 3. Generate a list of targeted, technical search queries that will help a downstream "Web Crawler" find the exact solution.
41
+
42
+ INPUT YOU WILL RECEIVE:
43
+ 1. A list of packages (e.g., "tensorflow, keras, numpy").
44
+ 2. An error log or description (e.g., "int32 and float mismatch").
45
+
46
+ YOUR ANALYSIS PROCESS:
47
+ 1. Extract the package names and versions from the input.
48
+ 2. Call `save_context('packages', 'extracted_package_list')`.
49
+ 3. Analyze the Error: Is it a syntax error or a compatibility error? Look for keywords like "deprecated", "mismatch", "attribute error".
50
+ 4. Analyze the Stack: Look at the libraries involved.
51
+ 5. Hypothesize Conflicts: Generate search queries that target:
52
+ - "Breaking changes" in the libraries mentioned.
53
+ - "Migration guides" for the specific error.
54
+ - "Compatibility matrices" for the package combinations.
55
+
56
+ OUTPUT FORMAT:
57
+ Start your response with:
58
+ **Model: Gemini 2.0 Flash Lite**
59
+ ## Search Queries
60
+
61
+ Return a raw JSON list of strings in your text response.
62
+ Example: ["numpy.float deprecated version", "tensorflow 2.x keras version incompatibility"]
63
+ """
64
+ )
65
+ logger.info("✅ Query Creator agent created")
66
+ return agent
67
+
68
+
69
+ def create_docs_search_agent():
70
+ """
71
+ Creates the Docs Search agent (Official Documentation).
72
+ """
73
+ agent = Agent(
74
+ name="Docs_Search_Agent",
75
+ model=get_gemini_model(),
76
+ tools=[google_search],
77
+ description="Search agent focused on official documentation",
78
+ instruction="""
79
+ You are the "Official Docs Researcher".
80
+
81
+ YOUR GOAL:
82
+ Search for official documentation, API references, and migration guides.
83
+ Focus on domains like *.org, *.io, *.dev, and official GitHub repositories.
84
+
85
+ INPUT: List of search queries.
86
+ OUTPUT: Top 4 most relevant OFFICIAL URLs.
87
+
88
+ OUTPUT FORMAT:
89
+ **Model: Gemini 2.5 Pro**
90
+ ## Official Docs Results
91
+ {"top_urls": ["url1", "url2", ...]}
92
+ """
93
+ )
94
+ logger.info("✅ Docs Search agent created")
95
+ return agent
96
+
97
+ def create_community_search_agent():
98
+ """
99
+ Creates the Community Search agent (StackOverflow, GitHub Issues).
100
+ """
101
+ agent = Agent(
102
+ name="Community_Search_Agent",
103
+ model=get_gemini_model(),
104
+ tools=[google_search],
105
+ description="Search agent focused on community discussions",
106
+ instruction="""
107
+ You are the "Community Researcher".
108
+
109
+ YOUR GOAL:
110
+ Search for community discussions, bug reports, and stackoverflow threads.
111
+ Focus on sites like stackoverflow.com, github.com/issues, reddit.com.
112
+
113
+ INPUT: List of search queries.
114
+ OUTPUT: Top 4 most relevant COMMUNITY URLs.
115
+
116
+ OUTPUT FORMAT:
117
+ **Model: Gemini 2.5 Pro**
118
+ ## Community Results
119
+ {"top_urls": ["url1", "url2", ...]}
120
+ """
121
+ )
122
+ logger.info("✅ Community Search agent created")
123
+ return agent
124
+
125
+ def create_context_search_agent():
126
+ """
127
+ Creates the Context Search agent (General Context).
128
+ """
129
+ agent = Agent(
130
+ name="Context_Search_Agent",
131
+ model=get_gemini_model(),
132
+ tools=[google_search],
133
+ description="Search agent focused on general context and main URL",
134
+ instruction="""
135
+ You are the "Context Researcher".
136
+
137
+ YOUR GOAL:
138
+ 1. Analyze the input search queries to identify the "Main Topic" or "Core Library/Framework" (e.g., if input is "numpy float error", main topic is "numpy").
139
+ 2. Search for the Home Page, Main Documentation Hub, or Wikipedia page for this Main Topic.
140
+ 3. Provide the top 3-4 most authoritative URLs for this topic.
141
+
142
+ INPUT: List of search queries.
143
+ OUTPUT: Top 3-4 most relevant URLs.
144
+
145
+ OUTPUT FORMAT:
146
+ **Model: Gemini 2.5 Pro**
147
+ ## Context Results
148
+ {"top_urls": ["url1", "url2", "url3"]}
149
+ """
150
+ )
151
+ logger.info("✅ Context Search agent created")
152
+ return agent
153
+
154
+
155
+ class WebCrawlAgent(Agent):
156
+ """
157
+ Custom Agent for Web Crawling that deterministically tries batch crawl first,
158
+ then falls back to adaptive crawl if needed.
159
+ """
160
+ def __init__(self, model, tools, **kwargs):
161
+ super().__init__(model=model, tools=tools, **kwargs)
162
+
163
+ async def run(self, input_str: str, **kwargs):
164
+ """
165
+ Custom run logic:
166
+ 1. Parse input to get URLs.
167
+ 2. Try batch_crawl_tool.
168
+ 3. Check results.
169
+ 4. If poor results, try adaptive_crawl_tool.
170
+ """
171
+ logger.info(f"🕷️ WebCrawlAgent received input: {input_str}")
172
+
173
+ # Simple heuristic to extract URLs (assuming input is JSON or list-like string)
174
+ # In a real scenario, we might use the LLM to parse it first if it's unstructured.
175
+ # For now, we'll assume the previous agent passed a list of URLs or we can regex them.
176
+ import re
177
+ urls = re.findall(r'https?://[^\s<>"]+|www\.[^\s<>"]+', input_str)
178
+
179
+ if not urls:
180
+ return "No URLs found to crawl."
181
+
182
+ # 1. Try Batch Crawl
183
+ logger.info(f"🕷️ Attempting Batch Crawl for {len(urls)} URLs")
184
+ batch_result = await batch_crawl_tool.func(urls)
185
+
186
+ # 2. Analyze Result (Simple Heuristic)
187
+ # Check if we got valid content
188
+ content = batch_result.get("combined_content", "")
189
+
190
+ # If result contains many "Error" or is very short, we might need adaptive
191
+ if "Error" not in content and len(content) > 500:
192
+ return f"**Model: Custom Logic**\n## Crawled Content Analysis\n\n{content}"
193
+
194
+ # 3. Fallback to Adaptive (if batch failed significantly)
195
+ logger.info("⚠️ Batch crawl had issues. Falling back to Adaptive Crawl for first URL...")
196
+ # For simplicity in this custom agent, we just try the first URL adaptively as a fallback
197
+ adaptive_result = await adaptive_tool.func(urls[0], query="dependency conflicts version requirements")
198
+
199
+ # Format adaptive result (it's a dict)
200
+ formatted_adaptive = json.dumps(adaptive_result, indent=2) if isinstance(adaptive_result, dict) else str(adaptive_result)
201
+
202
+ return f"**Model: Custom Logic (Adaptive Fallback)**\n## Crawled Content Analysis\n\n{formatted_adaptive}"
203
+
204
+ def create_web_crawl_agent():
205
+ """
206
+ Creates the Web Crawl agent (Content Extractor).
207
+ Now uses the Custom WebCrawlAgent class.
208
+ """
209
+ agent = WebCrawlAgent(
210
+ name="Web_Crawl_Agent",
211
+ model=get_model(),
212
+ tools=[batch_tool, adaptive_tool],
213
+ description="Technical Content Extractor using Deterministic Logic",
214
+ instruction="""
215
+ You are the "Technical Content Extractor".
216
+
217
+ (Note: This instruction is less critical now as the custom run method handles the logic,
218
+ but kept for metadata purposes).
219
+ """
220
+ )
221
+ logger.info("✅ Web Crawl agent created (Custom Class)")
222
+ return agent
223
+
224
+
225
+ def create_code_surgeon_agent():
226
+ """
227
+ Creates the CodeSurgeon agent that fixes dependency issues.
228
+ """
229
+ agent = Agent(
230
+ name="Code_Surgeon_Agent",
231
+ model=get_model(),
232
+ tools=[retrieve_context_tool, save_context_tool],
233
+ description="Expert Python developer specialized in dependency resolution",
234
+ instruction="""
235
+ You are the "Code Surgeon".
236
+
237
+ YOUR TASK:
238
+ 1. Use 'retrieve_context' to get the 'packages' and 'versions' stored by the Query Creator.
239
+ 2. Analyze the dependency conflicts provided by the user.
240
+ 3. Based on the research findings from the Web Crawl Agent, determine the correct versions.
241
+ 3. Generate a clean requirements.txt with resolved dependencies.
242
+ 4. Provide an explanation of what was fixed and why.
243
+
244
+ OUTPUT FORMAT:
245
+ - Clear explanation of the issue
246
+ - Updated requirements.txt content
247
+ - Migration notes (if breaking changes exist)
248
+
249
+ IMPORTANT:
250
+ - Call `save_context('solution', 'YOUR_SOLUTION_SUMMARY')` to store the final resolution.
251
+ - Call `save_context('requirements', 'YOUR_REQUIREMENTS_CONTENT')` to store the file content.
252
+ """
253
+ )
254
+ logger.info("✅ Code Surgeon agent created")
255
+ return agent
256
+
257
+
258
+ # ===== MEMORY SERVICE =====
259
+ from .config import get_memory_service
260
+ global_memory_service = get_memory_service()
261
+
262
+ # ===== MEMORY CALLBACK =====
263
+ async def auto_save_to_memory(callback_context):
264
+ """Automatically save session to memory after each agent turn."""
265
+ try:
266
+ # Use global memory service instead of context-bound one
267
+ await global_memory_service.add_session_to_memory(
268
+ callback_context._invocation_context.session
269
+ )
270
+ logger.info("💾 Session automatically saved to memory (Global Service).")
271
+ except Exception as e:
272
+ logger.error(f"❌ Failed to auto-save session: {e}")
273
+
274
+
275
+ def create_root_agent():
276
+ """
277
+ Creates the root agent that orchestrates the sub-agents.
278
+ """
279
+ # Create sub-agents
280
+ query_creator = create_query_creator_agent()
281
+ # load_memory removed due to model limitations
282
+
283
+ docs_search = create_docs_search_agent()
284
+ community_search = create_community_search_agent()
285
+ context_search = create_context_search_agent()
286
+
287
+ # Parallel Research
288
+ parallel_search = ParallelAgent(
289
+ name="Parallel_Search_Team",
290
+ sub_agents=[docs_search, community_search, context_search],
291
+ description="Parallel search for official, community, and general context resources"
292
+ )
293
+
294
+ # Group Research Team
295
+ web_research_team = SequentialAgent(
296
+ name="Web_Research_Team",
297
+ sub_agents=[query_creator, parallel_search],
298
+ description="Team responsible for researching dependency issues"
299
+ )
300
+
301
+ web_crawl = create_web_crawl_agent()
302
+ web_crawl = create_web_crawl_agent()
303
+
304
+ # Code Surgeon (No Loop)
305
+ code_surgeon = create_code_surgeon_agent()
306
+
307
+ # Create the sequential agent
308
+ agent = SequentialAgent(
309
+ name="Package_Conflict_Resolver_Root_Agent",
310
+ sub_agents=[web_research_team, web_crawl, code_surgeon],
311
+ description="Root agent managing the dependency resolution pipeline",
312
+ after_agent_callback=auto_save_to_memory # Auto-save history
313
+ )
314
+ logger.info("✅ Root agent created with sequential flow (Research Team -> Crawl -> Surgeon)")
315
+ return agent
316
+
317
+
318
+ # ===== MODULE-LEVEL INITIALIZATION FOR ADK WEB =====
319
+ root_agent = create_root_agent()
320
+
321
+ # Removed App definition to avoid ImportError.
322
+ # Memory is handled via global_memory_service in callback.
323
+ agent = root_agent
src/app.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ App definition for the AI-Powered Package Conflict Resolver.
3
+ Includes Events Compaction configuration.
4
+ """
5
+ from google.adk import App
6
+ from google.adk.types import EventsCompactionConfig
7
+ from .agents import root_agent
8
+ from .utils import logger
9
+ from .config import get_memory_service, get_session_service
10
+
11
+ # Define the App with Events Compaction and Custom Services
12
+ package_conflict_resolver_app = App(
13
+ name="Package_Conflict_Resolver_App",
14
+ root_agent=root_agent,
15
+ memory_service=get_memory_service(),
16
+ session_service=get_session_service(),
17
+ events_compaction_config=EventsCompactionConfig(
18
+ compaction_interval=3, # Trigger compaction every 3 invocations
19
+ overlap_size=1, # Keep 1 previous turn for context
20
+ ),
21
+ )
22
+
23
+ logger.info("✅ Package Conflict Resolver App created with Events Compaction (Interval: 3, Overlap: 1)")
src/config.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Configuration module for model initialization and environment setup.
3
+ CRITICAL: Includes Ollama integration fix for Google ADK.
4
+ """
5
+ import os
6
+ from dotenv import load_dotenv
7
+ from google.adk.models.lite_llm import LiteLlm
8
+ from google.adk.sessions import DatabaseSessionService
9
+ from google.genai import types
10
+ from .utils import logger
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+
15
+ # ===== SSL CONFIGURATION =====
16
+ # Fix for SSL certificate errors on Windows
17
+ import certifi
18
+ os.environ['SSL_CERT_FILE'] = certifi.where()
19
+ logger.info(f"🔐 SSL Cert File configured: {os.environ['SSL_CERT_FILE']}")
20
+
21
+ # ===== MODEL INITIALIZATION =====
22
+ # Using OpenRouter (Grok) via LiteLLM
23
+ def get_model():
24
+ """Returns a configured LiteLlm model instance for OpenRouter."""
25
+ # Configure OpenRouter endpoint
26
+ os.environ["OPENAI_API_BASE"] = "https://openrouter.ai/api/v1"
27
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENROUTER_API_KEY")
28
+
29
+ # Use the requested Grok model
30
+ # LiteLLM uses 'openai/' prefix for OpenAI-compatible endpoints
31
+ model = LiteLlm(model="openai/x-ai/grok-4.1-fast:free")
32
+
33
+ logger.info("✅ Model initialized: x-ai/grok-4.1-fast:free via OpenRouter")
34
+ return model
35
+
36
+
37
+ # ===== GEMINI MODEL INITIALIZATION =====
38
+ # Using Google Gemini for Search Agents
39
+ from google.adk.models.google_llm import Gemini
40
+ Model="gemini-2.0-flash-lite"
41
+ def get_gemini_model():
42
+ """Returns a configured Gemini model instance."""
43
+ model = Gemini(model=Model)
44
+ logger.info(f"✅ Model initialized: {Model}")
45
+ return model
46
+
47
+
48
+ # ===== SESSION SERVICE INITIALIZATION =====
49
+ # Using DatabaseSessionService with SQLite + AsyncIO driver
50
+ def get_session_service(db_url=None):
51
+ """
52
+ Returns a configured DatabaseSessionService instance.
53
+
54
+ Args:
55
+ db_url: Database connection string.
56
+ Defaults to DATABASE_URL env var, or local SQLite if not set.
57
+ """
58
+ # Prioritize argument, then env var, then local default
59
+ if not db_url:
60
+ # Use legacy_solver.db as it contains the existing sessions
61
+ db_url = os.getenv("DATABASE_URL", "sqlite+aiosqlite:///legacy_solver.db")
62
+
63
+ session_service = DatabaseSessionService(db_url=db_url)
64
+ logger.info(f"✅ Session service initialized: {db_url.split('://')[0]}://...") # Log safe URL
65
+ return session_service
66
+
67
+
68
+ # ===== MEMORY SERVICE INITIALIZATION =====
69
+ # Using InMemoryMemoryService for simplicity (DatabaseMemoryService not available in this ADK version)
70
+ from google.adk.memory import InMemoryMemoryService
71
+
72
+ def get_memory_service():
73
+ """
74
+ Returns a configured MemoryService instance.
75
+ Uses Pinecone if PINECONE_API_KEY is set, otherwise InMemory.
76
+ """
77
+ pinecone_key = os.getenv("PINECONE_API_KEY")
78
+ logger.info(f"🔍 Checking PINECONE_API_KEY: {'Found' if pinecone_key else 'Missing'}")
79
+
80
+ if pinecone_key:
81
+ try:
82
+ from .memory import PineconeMemoryService
83
+ memory_service = PineconeMemoryService(api_key=pinecone_key)
84
+ logger.info("✅ Memory service initialized: Pinecone (Long-Term Vector Store)")
85
+ return memory_service
86
+ except Exception as e:
87
+ logger.error(f"❌ Failed to init Pinecone, falling back to InMemory: {e}")
88
+
89
+ memory_service = InMemoryMemoryService()
90
+ logger.info("✅ Memory service initialized: InMemory (Ephemeral)")
91
+ return memory_service
src/demo issue.json ADDED
The diff for this file is too large to render. See raw diff
 
src/memory.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import uuid
3
+ from typing import List, Dict, Any
4
+ from typing import List, Dict, Any
5
+ # from google.adk.memory import MemoryService # Not available in this version
6
+ from pinecone import Pinecone, ServerlessSpec
7
+ from sentence_transformers import SentenceTransformer
8
+ from .utils import logger
9
+
10
+ class PineconeMemoryService: # Removed inheritance to avoid ImportError
11
+ """
12
+ Custom Memory Service using Pinecone for long-term vector storage.
13
+ Uses 'all-MiniLM-L6-v2' for local embedding generation.
14
+ """
15
+ def __init__(self, api_key: str, index_name: str = "adk-memory", dimension: int = 384):
16
+ self.api_key = api_key
17
+ self.index_name = index_name
18
+ self.dimension = dimension
19
+
20
+ # Initialize Pinecone
21
+ self.pc = Pinecone(api_key=self.api_key)
22
+
23
+ # Create index if not exists
24
+ if self.index_name not in self.pc.list_indexes().names():
25
+ logger.info(f"🌲 Creating Pinecone index: {self.index_name}")
26
+ self.pc.create_index(
27
+ name=self.index_name,
28
+ dimension=self.dimension,
29
+ metric="cosine",
30
+ spec=ServerlessSpec(cloud="aws", region="us-east-1") # Default free tier region
31
+ )
32
+
33
+ self.index = self.pc.Index(self.index_name)
34
+
35
+ # Initialize Embedding Model
36
+ logger.info("🧠 Loading embedding model: all-MiniLM-L6-v2... (This may take a while if downloading)")
37
+ print("DEBUG: Starting SentenceTransformer load...")
38
+ self.model = SentenceTransformer('all-MiniLM-L6-v2')
39
+ print("DEBUG: SentenceTransformer loaded.")
40
+ logger.info("✅ Pinecone Memory Service initialized")
41
+
42
+ async def add_session_to_memory(self, session: Any):
43
+ """
44
+ Embeds the session history and saves it to Pinecone.
45
+ """
46
+ try:
47
+ # Get session ID safely (ADK sessions usually use .id)
48
+ session_id = getattr(session, 'id', getattr(session, 'session_id', 'UNKNOWN'))
49
+
50
+ logger.info(f"💾 Attempting to save session to Pinecone. Session ID: {session_id}")
51
+ # Debug session structure
52
+ # logger.info(f"Session dir: {dir(session)}")
53
+
54
+ # 1. Convert session to text
55
+ # Assuming session has a 'history' or we can iterate turns
56
+ # We'll construct a simplified text representation
57
+ text_content = ""
58
+
59
+ # Check for 'turns' or 'events'
60
+ if hasattr(session, 'turns'):
61
+ turns = session.turns
62
+ logger.info(f"Found {len(turns)} turns.")
63
+ for turn in turns:
64
+ text_content += f"{turn.role}: {turn.content}\n"
65
+ elif hasattr(session, 'events'):
66
+ events = session.events
67
+ logger.info(f"Found {len(events)} events.")
68
+ for event in events:
69
+ # Event structure might vary
70
+ author = getattr(event, 'author', 'unknown')
71
+ content = getattr(event, 'content', getattr(event, 'text', ''))
72
+ text_content += f"{author}: {content}\n"
73
+ else:
74
+ logger.warning("⚠️ Session has no 'turns' or 'events' attribute.")
75
+
76
+ if not text_content.strip():
77
+ logger.warning("⚠️ Session content is empty. Skipping Pinecone save.")
78
+ return
79
+
80
+ # 2. Generate Embedding
81
+ vector = self.model.encode(text_content).tolist()
82
+
83
+ # 3. Create Metadata
84
+ metadata = {
85
+ "session_id": session_id,
86
+ "text": text_content[:1000], # Store snippet (limit size)
87
+ "timestamp": str(session.created_at) if hasattr(session, 'created_at') else ""
88
+ }
89
+
90
+ # 4. Upsert to Pinecone
91
+ # Use session_id as vector ID
92
+ self.index.upsert(vectors=[(session_id, vector, metadata)])
93
+ logger.info(f"💾 Saved session {session_id} to Pinecone")
94
+
95
+ except Exception as e:
96
+ logger.error(f"❌ Failed to save to Pinecone: {e}")
97
+
98
+ async def search_memory(self, query: str, limit: int = 3) -> List[str]:
99
+ """
100
+ Searches Pinecone for relevant past sessions.
101
+ """
102
+ try:
103
+ # 1. Embed Query
104
+ query_vector = self.model.encode(query).tolist()
105
+
106
+ # 2. Search Pinecone
107
+ results = self.index.query(
108
+ vector=query_vector,
109
+ top_k=limit,
110
+ include_metadata=True
111
+ )
112
+
113
+ # 3. Format Results
114
+ memories = []
115
+ for match in results['matches']:
116
+ if match['score'] > 0.5: # Relevance threshold
117
+ memories.append(match['metadata']['text'])
118
+
119
+ return memories
120
+
121
+ except Exception as e:
122
+ logger.error(f"❌ Failed to search Pinecone: {e}")
123
+ return []
src/tools.py ADDED
@@ -0,0 +1,244 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Tool definitions for the Legacy Dependency Solver.
3
+ Includes Crawl4AI batch crawler for efficient multi-URL processing.
4
+ """
5
+ from typing import List, Dict, Any
6
+ import json
7
+ import sys
8
+ import asyncio
9
+ import concurrent.futures
10
+ from pydantic import BaseModel, Field
11
+
12
+ from google.adk.tools import FunctionTool
13
+ from .utils import logger
14
+ from .config import get_memory_service # Import memory service factory
15
+
16
+ # --- 1. Define Schema (Module level for pickling) ---
17
+ class SearchResult(BaseModel):
18
+ relevant_facts: List[str] = Field(..., description="Specific facts/numbers found.")
19
+ summary: str = Field(..., description="Concise summary related to the query.")
20
+ confidence: str = Field(..., description="Confidence level (High/Medium/Low).")
21
+
22
+ # --- 2. Worker Functions (Run in Subprocess) ---
23
+
24
+ def _run_batch_crawl_worker(urls: List[str]) -> Dict[str, Any]:
25
+ """
26
+ Worker function to run batch crawl in a separate process.
27
+ """
28
+ # Enforce ProactorEventLoop on Windows for Playwright
29
+ if sys.platform == 'win32':
30
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
31
+
32
+ async def _async_logic():
33
+ from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode
34
+
35
+ # Shared Config
36
+ browser_config = BrowserConfig(
37
+ headless=True,
38
+ ignore_https_errors=True,
39
+ extra_args=["--ignore-certificate-errors", "--ignore-ssl-errors"]
40
+ )
41
+ run_config = CrawlerRunConfig(
42
+ cache_mode=CacheMode.BYPASS,
43
+ word_count_threshold=10,
44
+ )
45
+
46
+ results = []
47
+ # limit to top 3
48
+ target_urls = urls[:3]
49
+
50
+ async with AsyncWebCrawler(config=browser_config) as crawler:
51
+ for url in target_urls:
52
+ try:
53
+ crawl_result = await crawler.arun(url=url, config=run_config)
54
+ if crawl_result.success:
55
+ results.append(f"--- SOURCE: {url} ---\n{crawl_result.markdown[:15000]}\n")
56
+ else:
57
+ results.append(f"--- SOURCE: {url} ---\n[Error: Failed to crawl]\n")
58
+ except Exception as e:
59
+ results.append(f"--- SOURCE: {url} ---\n[Exception: {str(e)}]\n")
60
+
61
+ return {
62
+ "combined_content": "\n".join(results),
63
+ "status": "completed"
64
+ }
65
+
66
+ return asyncio.run(_async_logic())
67
+
68
+
69
+ def _run_adaptive_crawl_worker(start_url: str, user_query: str) -> Dict[str, Any]:
70
+ """
71
+ Worker function to run adaptive crawl in a separate process.
72
+ """
73
+ if sys.platform == 'win32':
74
+ asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
75
+
76
+ async def _async_logic():
77
+ from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig, CacheMode, AdaptiveConfig, LLMConfig
78
+ from crawl4ai.extraction_strategy import LLMExtractionStrategy
79
+
80
+ browser_config = BrowserConfig(
81
+ headless=True,
82
+ verbose=True,
83
+ ignore_https_errors=True,
84
+ extra_args=["--ignore-certificate-errors", "--ignore-ssl-errors"]
85
+ )
86
+
87
+ async with AsyncWebCrawler(config=browser_config) as crawler:
88
+ # Phase 1: Discovery
89
+ adaptive_config = AdaptiveConfig(
90
+ max_pages=3,
91
+ confidence_threshold=0.7,
92
+ top_k_links=2,
93
+ )
94
+
95
+ # Import inside function to avoid top-level import issues in subprocess if needed
96
+ from crawl4ai import AdaptiveCrawler
97
+ adaptive = AdaptiveCrawler(crawler, config=adaptive_config)
98
+
99
+ try:
100
+ await adaptive.digest(start_url=start_url, query=user_query)
101
+ except Exception as e:
102
+ return {"error": f"Crawl failed during discovery: {str(e)}"}
103
+
104
+ top_content = adaptive.get_relevant_content(top_k=1)
105
+ if not top_content:
106
+ return {"error": "No relevant content found via adaptive crawling."}
107
+
108
+ best_url = top_content[0]['url']
109
+
110
+ # Phase 2: Extraction
111
+ dynamic_instruction = f"""
112
+ Extract ONLY information matching this request: '{user_query}'.
113
+ If not found, state that in the summary. Do not hallucinate.
114
+ """
115
+
116
+ extraction_config = CrawlerRunConfig(
117
+ cache_mode=CacheMode.BYPASS,
118
+ word_count_threshold=1,
119
+ page_timeout=60000,
120
+ extraction_strategy=LLMExtractionStrategy(
121
+ llm_config=LLMConfig(provider="ollama/qwen2.5:7b", api_token="ollama"),
122
+ schema=SearchResult.model_json_schema(),
123
+ extraction_type="schema",
124
+ instruction=dynamic_instruction,
125
+ ),
126
+ )
127
+
128
+ try:
129
+ result = await crawler.arun(url=best_url, config=extraction_config)
130
+ if result.extracted_content:
131
+ return json.loads(result.extracted_content)
132
+ return {"error": "Extraction returned empty content."}
133
+ except json.JSONDecodeError:
134
+ return {"raw_output": result.extracted_content}
135
+ except Exception as e:
136
+ return {"error": f"Extraction failed: {str(e)}"}
137
+
138
+ return asyncio.run(_async_logic())
139
+
140
+
141
+ # --- 3. Main Tools (Async Wrappers) ---
142
+
143
+ async def batch_crawl_tool(urls: List[str]) -> Dict[str, Any]:
144
+ """
145
+ Crawls a LIST of URLs in one go using a subprocess to ensure correct event loop.
146
+ """
147
+ logger.info(f"🚀 Batch Tool Triggered: Processing {len(urls)} URLs...")
148
+
149
+ loop = asyncio.get_running_loop()
150
+ with concurrent.futures.ProcessPoolExecutor() as pool:
151
+ try:
152
+ result = await loop.run_in_executor(pool, _run_batch_crawl_worker, urls)
153
+ return result
154
+ except Exception as e:
155
+ logger.error(f"❌ Batch crawl subprocess failed: {e}")
156
+ return {"combined_content": f"Error: {str(e)}", "status": "failed"}
157
+
158
+ async def adaptive_crawl_tool(start_url: str, user_query: str) -> Dict[str, Any]:
159
+ """
160
+ Performs adaptive crawl using a subprocess.
161
+ """
162
+ logger.info(f"🛠️ Tool Triggered: Adaptive Crawl on {start_url}")
163
+
164
+ loop = asyncio.get_running_loop()
165
+ with concurrent.futures.ProcessPoolExecutor() as pool:
166
+ try:
167
+ result = await loop.run_in_executor(pool, _run_adaptive_crawl_worker, start_url, user_query)
168
+ return result
169
+ except Exception as e:
170
+ logger.error(f"❌ Adaptive crawl subprocess failed: {e}")
171
+ return {"error": f"Subprocess failed: {str(e)}"}
172
+
173
+
174
+ # Convert to ADK Tools
175
+ batch_tool = FunctionTool(batch_crawl_tool)
176
+ adaptive_tool = FunctionTool(adaptive_crawl_tool)
177
+
178
+
179
+ # ===== STATE MANAGEMENT TOOLS =====
180
+ from google.adk.tools import ToolContext
181
+
182
+ def save_context(tool_context: ToolContext, key: str, value: str) -> str:
183
+ tool_context.state[key] = value
184
+ logger.info(f"💾 State Saved: {key} = {value}")
185
+ return f"Saved {key} to state."
186
+
187
+ def retrieve_context(tool_context: ToolContext, key: str) -> str:
188
+ value = tool_context.state.get(key, "Not found")
189
+ logger.info(f"📂 State Retrieved: {key} = {value}")
190
+ return str(value)
191
+
192
+ save_context_tool = FunctionTool(save_context)
193
+ retrieve_context_tool = FunctionTool(retrieve_context)
194
+
195
+ def submit_queries(tool_context: ToolContext, queries: List[str]) -> str:
196
+ tool_context.state['search_queries'] = queries
197
+ logger.info(f"🚀 Queries Submitted: {queries}")
198
+ return "Queries submitted successfully."
199
+
200
+ submit_queries_tool = FunctionTool(submit_queries)
201
+
202
+ def validate_requirements(tool_context: ToolContext, requirements_content: str) -> str:
203
+ if not requirements_content:
204
+ return "Error: Empty requirements content."
205
+ lines = requirements_content.strip().split('\n')
206
+ errors = []
207
+ for line in lines:
208
+ line = line.strip()
209
+ if not line or line.startswith('#'):
210
+ continue
211
+ import re
212
+ if not re.match(r'^[a-zA-Z0-9_\-]+[=<>!~]+[0-9a-zA-Z\.]+', line):
213
+ if not re.match(r'^[a-zA-Z0-9_\-]+$', line):
214
+ errors.append(f"Invalid syntax: {line}")
215
+ if errors:
216
+ return f"Validation Failed: {'; '.join(errors)}"
217
+ logger.info("✅ Requirements validation passed.")
218
+ return "SUCCESS"
219
+
220
+ validate_tool = FunctionTool(validate_requirements)
221
+
222
+ # ===== MEMORY RETRIEVAL TOOL =====
223
+ async def retrieve_memory(query: str) -> str:
224
+ """
225
+ Searches long-term memory (Pinecone) for relevant past sessions.
226
+ Use this to recall details from previous conversations.
227
+ """
228
+ logger.info(f"🧠 Searching Memory for: {query}")
229
+ try:
230
+ # Initialize service on demand (or use singleton if configured)
231
+ memory_service = get_memory_service()
232
+ results = await memory_service.search_memory(query)
233
+
234
+ if not results:
235
+ return "No relevant memories found."
236
+
237
+ formatted_results = "\n---\n".join(results)
238
+ return f"Found relevant memories:\n{formatted_results}"
239
+
240
+ except Exception as e:
241
+ logger.error(f"❌ Memory retrieval failed: {e}")
242
+ return f"Error retrieving memory: {str(e)}"
243
+
244
+ retrieve_memory_tool = FunctionTool(retrieve_memory)
src/utils.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Utility functions for logging and helpers.
3
+ """
4
+ import logging
5
+ import sys
6
+
7
+
8
+ def setup_logging(level=logging.INFO):
9
+ """
10
+ Setup standard logging configuration.
11
+
12
+ Args:
13
+ level: Logging level (default: INFO)
14
+ """
15
+ logging.basicConfig(
16
+ level=level,
17
+ format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
18
+ handlers=[
19
+ logging.StreamHandler(sys.stdout)
20
+ ]
21
+ )
22
+ return logging.getLogger(__name__)
23
+
24
+
25
+ logger = setup_logging()
web_app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Web Interface Entry Point for ADK Web UI.
3
+ Run with: python web_app.py
4
+ """
5
+ import os
6
+ import nest_asyncio
7
+ import uvicorn
8
+ from typing import Optional, Any
9
+ from google.adk.cli.adk_web_server import (
10
+ AdkWebServer, BaseAgentLoader, EvalSetsManager, EvalSetResultsManager,
11
+ BaseCredentialService
12
+ )
13
+ from google.adk.artifacts import FileArtifactService
14
+ from src.config import get_session_service, get_memory_service
15
+ from src.agents import create_root_agent
16
+ from src.utils import logger
17
+
18
+ # Apply nest_asyncio to handle event loop conflicts
19
+ nest_asyncio.apply()
20
+
21
+ class SingleAgentLoader(BaseAgentLoader):
22
+ """Custom loader that serves our single root agent."""
23
+ def __init__(self, agent):
24
+ self.agent = agent
25
+ self.agent_name = "package_conflict_resolver"
26
+
27
+ def list_agents(self) -> list[str]:
28
+ return [self.agent_name]
29
+
30
+ def load_agent(self, agent_name: str):
31
+ if agent_name == self.agent_name:
32
+ return self.agent
33
+ raise ValueError(f"Agent {agent_name} not found")
34
+
35
+ class LocalCredentialService(BaseCredentialService):
36
+ """Simple credential service implementation."""
37
+ def __init__(self, base_dir: str):
38
+ self.base_dir = base_dir
39
+ os.makedirs(base_dir, exist_ok=True)
40
+
41
+ def load_credential(self, auth_config: Any, callback_context: Any) -> Optional[Any]:
42
+ # Dummy implementation: return None or load from file if needed
43
+ # For now, we don't persist credentials, so returning None is safe
44
+ return None
45
+
46
+ def save_credential(self, auth_config: Any, callback_context: Any) -> None:
47
+ # Dummy implementation: do nothing
48
+ pass
49
+
50
+ if __name__ == "__main__":
51
+ logger.info("🌐 Initializing ADK Web Server...")
52
+
53
+ # 1. Initialize Services
54
+ session_service = get_session_service()
55
+ memory_service = get_memory_service()
56
+
57
+ data_dir = os.path.abspath("data")
58
+ os.makedirs(data_dir, exist_ok=True)
59
+
60
+ # Corrected: use root_dir instead of base_dir
61
+ artifact_service = FileArtifactService(root_dir=os.path.join(data_dir, "artifacts"))
62
+
63
+ # Use custom LocalCredentialService with implemented abstract methods
64
+ credential_service = LocalCredentialService(base_dir=os.path.join(data_dir, "credentials"))
65
+
66
+ eval_sets_manager = EvalSetsManager(base_dir=os.path.join(data_dir, "eval_sets"))
67
+ eval_set_results_manager = EvalSetResultsManager(base_dir=os.path.join(data_dir, "eval_results"))
68
+
69
+ # 2. Create Agent
70
+ root_agent = create_root_agent()
71
+ agent_loader = SingleAgentLoader(root_agent)
72
+
73
+ # 3. Initialize Web Server
74
+ server = AdkWebServer(
75
+ agent_loader=agent_loader,
76
+ session_service=session_service,
77
+ memory_service=memory_service,
78
+ artifact_service=artifact_service,
79
+ credential_service=credential_service,
80
+ eval_sets_manager=eval_sets_manager,
81
+ eval_set_results_manager=eval_set_results_manager,
82
+ agents_dir=os.path.abspath("src")
83
+ )
84
+
85
+ # 4. Get FastAPI App
86
+ app = server.get_fast_api_app()
87
+
88
+ logger.info("🚀 Starting Server...")
89
+ logger.info("👉 Open: http://127.0.0.1:8000/dev-ui/")
90
+
91
+ uvicorn.run(app, host="127.0.0.1", port=8000)