Spaces:

GreymanT
/

AgenticAI-RAG

Sleeping

App Files Files Community

GreymanT commited on Dec 20, 2025

Commit

8bf4d58

verified ·

1 Parent(s): ca9a435

Upload 80 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +1 -0
api/__init__.py +2 -0
api/__pycache__/__init__.cpython-311.pyc +0 -0
api/__pycache__/main.cpython-311.pyc +0 -0
api/__pycache__/routes.cpython-311.pyc +0 -0
api/main.py +66 -0
api/routes.py +126 -0
data/chroma_db/12c6a58a-a370-4695-a9d6-a858314de1c1/data_level0.bin +3 -0
data/chroma_db/12c6a58a-a370-4695-a9d6-a858314de1c1/header.bin +3 -0
data/chroma_db/12c6a58a-a370-4695-a9d6-a858314de1c1/length.bin +3 -0
data/chroma_db/12c6a58a-a370-4695-a9d6-a858314de1c1/link_lists.bin +3 -0
data/chroma_db/chroma.sqlite3 +3 -0
requirements.txt +43 -3
scripts/__pycache__/add_documents.cpython-311.pyc +0 -0
scripts/add_documents.py +214 -0
scripts/add_sample_documents.py +67 -0
scripts/start_api.sh +12 -0
scripts/start_ui.sh +12 -0
src/__init__.py +4 -0
src/__pycache__/__init__.cpython-311.pyc +0 -0
src/agents/__init__.py +2 -0
src/agents/__pycache__/__init__.cpython-311.pyc +0 -0
src/agents/__pycache__/aggregator_agent.cpython-311.pyc +0 -0
src/agents/__pycache__/base_agent.cpython-311.pyc +0 -0
src/agents/__pycache__/cloud_agent.cpython-311.pyc +0 -0
src/agents/__pycache__/local_data_agent.cpython-311.pyc +0 -0
src/agents/__pycache__/search_agent.cpython-311.pyc +0 -0
src/agents/__pycache__/snowflake_agent.cpython-311.pyc +0 -0
src/agents/aggregator_agent.py +266 -0
src/agents/base_agent.py +305 -0
src/agents/cloud_agent.py +162 -0
src/agents/local_data_agent.py +86 -0
src/agents/search_agent.py +101 -0
src/agents/snowflake_agent.py +245 -0
src/core/__init__.py +2 -0
src/core/__pycache__/__init__.cpython-311.pyc +0 -0
src/core/__pycache__/config.cpython-311.pyc +0 -0
src/core/__pycache__/orchestrator.cpython-311.pyc +0 -0
src/core/config.py +220 -0
src/core/orchestrator.py +332 -0
src/mcp/__init__.py +2 -0
src/mcp/__pycache__/__init__.cpython-311.pyc +0 -0
src/mcp/__pycache__/mcp_server.cpython-311.pyc +0 -0
src/mcp/__pycache__/snowflake_server.cpython-311.pyc +0 -0
src/mcp/cloud_server.py +156 -0
src/mcp/local_server.py +122 -0
src/mcp/mcp_server.py +78 -0
src/mcp/search_server.py +62 -0
src/mcp/snowflake_server.py +185 -0
src/memory/__init__.py +2 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+data/chroma_db/chroma.sqlite3 filter=lfs diff=lfs merge=lfs -text

api/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """API layer."""
2	+

api/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (182 Bytes). View file

api/__pycache__/main.cpython-311.pyc ADDED Viewed

Binary file (2.79 kB). View file

api/__pycache__/routes.cpython-311.pyc ADDED Viewed

Binary file (6.59 kB). View file

api/main.py ADDED Viewed

	@@ -0,0 +1,66 @@

+"""FastAPI application main file."""
+import sys
+from pathlib import Path
+# Add parent directory to path to allow imports from src
+parent_dir = Path(__file__).parent.parent
+if str(parent_dir) not in sys.path:
+    sys.path.insert(0, str(parent_dir))
+import logging
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from src.core.config import get_settings
+from api.routes import router
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+# Initialize FastAPI app
+app = FastAPI(
+    title="Agentic RAG System API",
+    description="Production-ready Agentic RAG system with multiple agents and MCP servers",
+    version="1.0.0",
+)
+# Configure CORS
+settings = get_settings()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],  # In production, specify allowed origins
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Include routes
+app.include_router(router)
+@app.on_event("startup")
+async def startup_event():
+    """Initialize components on startup."""
+    logger.info("Starting Agentic RAG System API")
+    logger.info(f"API running on {settings.api_host}:{settings.api_port}")
+@app.on_event("shutdown")
+async def shutdown_event():
+    """Cleanup on shutdown."""
+    logger.info("Shutting down Agentic RAG System API")
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(
+        "main:app",
+        host=settings.api_host,
+        port=settings.api_port,
+        reload=settings.api_debug,
+    )

api/routes.py ADDED Viewed

	@@ -0,0 +1,126 @@

+"""API routes."""
+import logging
+from typing import Optional
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+from src.core.orchestrator import get_orchestrator
+from src.memory.long_term_memory import LongTermMemory
+logger = logging.getLogger(__name__)
+router = APIRouter()
+# Request/Response models
+class QueryRequest(BaseModel):
+    """Query request model."""
+    query: str
+    tier: str = "basic"  # "basic", "agent", or "advanced"
+    session_id: Optional[str] = None
+class QueryResponse(BaseModel):
+    """Query response model."""
+    success: bool
+    answer: Optional[str] = None
+    tier: str
+    error: Optional[str] = None
+    sources: Optional[list] = None
+    model: Optional[str] = None
+    agent: Optional[str] = None
+class HealthResponse(BaseModel):
+    """Health check response."""
+    status: str
+    version: str
+@router.get("/health", response_model=HealthResponse)
+async def health_check():
+    """Health check endpoint."""
+    return {
+        "status": "healthy",
+        "version": "1.0.0",
+    }
+@router.post("/query", response_model=QueryResponse)
+async def query(request: QueryRequest):
+    """
+    Main query endpoint supporting all tiers.
+    - **basic**: Simple RAG (retrieval + generation)
+    - **agent**: Agent with tools (calculator, web search, database)
+    - **advanced**: Multi-agent system with MCP servers
+    """
+    try:
+        orchestrator = get_orchestrator()
+        response = await orchestrator.process_query(
+            query=request.query,
+            tier=request.tier,
+            session_id=request.session_id,
+        )
+        return QueryResponse(**response)
+    except Exception as e:
+        logger.error(f"Error processing query: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/agents")
+async def get_agents():
+    """Get status of all agents."""
+    try:
+        orchestrator = get_orchestrator()
+        status = orchestrator.get_agent_status()
+        return status
+    except Exception as e:
+        logger.error(f"Error getting agent status: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/system")
+async def get_system_info():
+    """Get system information."""
+    try:
+        orchestrator = get_orchestrator()
+        info = orchestrator.get_system_info()
+        return info
+    except Exception as e:
+        logger.error(f"Error getting system info: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.get("/memory/{session_id}")
+async def get_memory(session_id: str):
+    """Get memory for a session."""
+    try:
+        long_term_memory = LongTermMemory()
+        memories = long_term_memory.get_session_memories(session_id, limit=50)
+        return {
+            "session_id": session_id,
+            "memories": memories,
+            "count": len(memories),
+        }
+    except Exception as e:
+        logger.error(f"Error getting memory: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+@router.delete("/memory/{session_id}")
+async def delete_memory(session_id: str):
+    """Delete memory for a session."""
+    try:
+        long_term_memory = LongTermMemory()
+        deleted_count = long_term_memory.delete_session_memories(session_id)
+        return {
+            "session_id": session_id,
+            "deleted": deleted_count,
+        }
+    except Exception as e:
+        logger.error(f"Error deleting memory: {e}")
+        raise HTTPException(status_code=500, detail=str(e))

data/chroma_db/12c6a58a-a370-4695-a9d6-a858314de1c1/data_level0.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:065a5aa61390e7ff9c4d37dbb028fd9a866fd618df83adeb7b41c957a09d4dc0
+size 628400

data/chroma_db/12c6a58a-a370-4695-a9d6-a858314de1c1/header.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b081be2c2276a57e995075c7de2f3cb25e903798aac36d98042045533ab28f7d
+size 100

data/chroma_db/12c6a58a-a370-4695-a9d6-a858314de1c1/length.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7a12e561363385e9dfeeab326368731c030ed4b374e7f5897ac819159d2884c5
+size 400

data/chroma_db/12c6a58a-a370-4695-a9d6-a858314de1c1/link_lists.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+size 0

data/chroma_db/chroma.sqlite3 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1235fee08e11e0ecfb47ccd075b737c7eec7d2c316a571f5512adc721b2110d
+size 1687552

requirements.txt CHANGED Viewed

@@ -1,3 +1,43 @@
-altair
-pandas
-streamlit

+# Core dependencies
+openai>=1.12.0
+chromadb>=0.4.22
+pydantic>=2.5.0
+pydantic-settings>=2.1.0
+python-dotenv>=1.0.0
+# MCP SDK
+mcp>=0.9.0
+# API framework
+fastapi>=0.109.0
+uvicorn[standard]>=0.27.0
+httpx>=0.26.0
+# UI framework
+streamlit>=1.31.0
+# Utilities
+tiktoken>=0.5.2
+numpy>=1.26.0
+aiofiles>=23.2.1
+nest-asyncio>=1.6.0  # For async handling in Streamlit
+# Testing
+pytest>=7.4.4
+pytest-asyncio>=0.23.3
+pytest-mock>=3.12.0
+# Optional: Web search providers
+tavily-python>=0.3.0
+# Optional: Database support
+sqlalchemy>=2.0.25
+# Optional: Cloud storage
+boto3>=1.34.0  # AWS S3
+google-cloud-storage>=2.14.0  # GCS
+# Optional: Snowflake
+snowflake-connector-python>=3.7.0
+pandas>=2.0.0  # For Snowflake data operations

scripts/__pycache__/add_documents.cpython-311.pyc ADDED Viewed

Binary file (10.6 kB). View file

scripts/add_documents.py ADDED Viewed

	@@ -0,0 +1,214 @@

+"""Script to add documents to the vector store from files or text."""
+import sys
+import os
+from pathlib import Path
+from typing import List, Dict, Optional
+# Add parent directory to path
+try:
+    parent_dir = Path(__file__).parent.parent
+    sys.path.insert(0, str(parent_dir))
+except (NameError, AttributeError):
+    # Handle case where __file__ is not available (e.g., when executed directly)
+    import os
+    parent_dir = Path(os.getcwd())
+    if str(parent_dir) not in sys.path:
+        sys.path.insert(0, str(parent_dir))
+# Lazy import - only import when functions are actually called
+# This prevents import errors when the module is scanned but not used
+_vector_store = None
+_vector_store_error = None
+def _get_vector_store():
+    """Lazy import of vector store."""
+    global _vector_store, _vector_store_error
+    if _vector_store_error is not None:
+        raise _vector_store_error
+    if _vector_store is None:
+        try:
+            from src.retrieval.vector_store import get_vector_store
+            _vector_store = get_vector_store()
+        except ImportError as e:
+            _vector_store_error = ImportError(
+                f"Failed to import vector store. Make sure all dependencies (including chromadb) are installed. "
+                f"Run: pip install -r requirements.txt\n"
+                f"Original error: {e}"
+            )
+            raise _vector_store_error
+    return _vector_store
+def add_text_documents(texts: List[str], metadatas: Optional[List[Dict]] = None):
+    """
+    Add text documents to the vector store.
+    Args:
+        texts: List of document texts
+        metadatas: Optional list of metadata dictionaries
+    """
+    vector_store = _get_vector_store()
+    if metadatas is None:
+        metadatas = [{}] * len(texts)
+    ids = vector_store.add_documents(texts, metadatas)
+    print(f"✅ Added {len(ids)} documents to vector store")
+    return ids
+def add_file_documents(file_paths: List[str], chunk_size: int = 1000):
+    """
+    Add documents from text files to the vector store.
+    Args:
+        file_paths: List of file paths to read
+        chunk_size: Size of text chunks (characters) for splitting large documents
+    """
+    all_documents = []
+    all_metadatas = []
+    for file_path in file_paths:
+        file_path = Path(file_path)
+        if not file_path.exists():
+            print(f"⚠️  Warning: File not found: {file_path}")
+            continue
+        try:
+            with open(file_path, 'r', encoding='utf-8') as f:
+                content = f.read()
+            # Split large documents into chunks
+            if len(content) > chunk_size:
+                chunks = [content[i:i+chunk_size] for i in range(0, len(content), chunk_size)]
+                for i, chunk in enumerate(chunks):
+                    all_documents.append(chunk)
+                    all_metadatas.append({
+                        "source": str(file_path.name),
+                        "chunk": i + 1,
+                        "type": "file"
+                    })
+            else:
+                all_documents.append(content)
+                all_metadatas.append({
+                    "source": str(file_path.name),
+                    "type": "file"
+                })
+            print(f"✅ Loaded: {file_path.name}")
+        except Exception as e:
+            print(f"❌ Error reading {file_path}: {e}")
+    if all_documents:
+        ids = add_text_documents(all_documents, all_metadatas)
+        return ids
+    else:
+        print("⚠️  No documents to add")
+        return []
+def add_from_directory(directory: str, extensions: List[str] = None):
+    """
+    Add all text files from a directory.
+    Args:
+        directory: Directory path
+        extensions: List of file extensions to include (default: ['.txt', '.md', '.py'])
+    """
+    if extensions is None:
+        extensions = ['.txt', '.md', '.py', '.json']
+    directory = Path(directory)
+    if not directory.exists():
+        print(f"❌ Directory not found: {directory}")
+        return []
+    file_paths = []
+    for ext in extensions:
+        file_paths.extend(directory.glob(f"**/*{ext}"))
+    if not file_paths:
+        print(f"⚠️  No files found with extensions {extensions} in {directory}")
+        return []
+    print(f"📁 Found {len(file_paths)} files in {directory}")
+    return add_file_documents([str(f) for f in file_paths])
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Add documents to the vector store")
+    parser.add_argument("--text", nargs="+", help="Add text documents directly")
+    parser.add_argument("--file", nargs="+", help="Add documents from files")
+    parser.add_argument("--directory", help="Add all documents from a directory")
+    parser.add_argument("--sample-docs", action="store_true", help="Add sample documents")
+    args = parser.parse_args()
+    if args.sample_docs:
+        # Add sample documents
+        sample_docs = [
+            {
+                "text": """
+                Oracle Exadata is a database machine that combines hardware and software
+                to provide high-performance database solutions. When migrating Exadata
+                workloads to the cloud, it's important to consider compatibility,
+                performance, and feature parity.
+                """,
+                "metadata": {"source": "exadata_migration_guide", "type": "documentation"},
+            },
+            {
+                "text": """
+                Cloud migration strategies for Oracle Exadata include:
+                1. Lift and shift - moving workloads with minimal changes
+                2. Replatforming - adapting to cloud-native services
+                3. Refactoring - redesigning for cloud architecture
+                Each approach has different trade-offs in terms of effort, cost, and feature availability.
+                """,
+                "metadata": {"source": "migration_strategies", "type": "guide"},
+            },
+            {
+                "text": """
+                Oracle Cloud Infrastructure (OCI) provides Exadata Cloud Service which
+                maintains full feature compatibility with on-premises Exadata. This
+                service offers the same architecture and capabilities, making it ideal
+                for migrations requiring minimal changes.
+                """,
+                "metadata": {"source": "oci_exadata", "type": "cloud_service"},
+            },
+            {
+                "text": """
+                Oracle AI Database services on AWS provide customers with a simplified path
+                to migrate Oracle Exadata workloads. These services run on AWS infrastructure
+                and offer managed database solutions that maintain Oracle compatibility while
+                leveraging AWS cloud capabilities. The services include automated migration tools,
+                performance optimization, and seamless integration with AWS services.
+                """,
+                "metadata": {"source": "oracle_aws_services", "type": "cloud_service"},
+            },
+        ]
+        documents = [doc["text"] for doc in sample_docs]
+        metadatas = [doc["metadata"] for doc in sample_docs]
+        add_text_documents(documents, metadatas)
+    elif args.text:
+        add_text_documents(args.text)
+    elif args.file:
+        add_file_documents(args.file)
+    elif args.directory:
+        add_from_directory(args.directory)
+    else:
+        print("Please specify --text, --file, --directory, or --sample-docs")
+        print("\nExamples:")
+        print("  python scripts/add_documents.py --sample-docs")
+        print("  python scripts/add_documents.py --file doc1.txt doc2.txt")
+        print("  python scripts/add_documents.py --directory data/sample_documents")
+        print("  python scripts/add_documents.py --text 'Your document text here'")

scripts/add_sample_documents.py ADDED Viewed

	@@ -0,0 +1,67 @@

+"""Script to add sample documents to the vector store."""
+import sys
+from pathlib import Path
+# Add parent directory to path
+parent_dir = Path(__file__).parent.parent
+sys.path.insert(0, str(parent_dir))
+# Lazy import to avoid issues when module is scanned but not used
+def _get_vector_store():
+    """Lazy import of vector store."""
+    try:
+        from src.retrieval.vector_store import get_vector_store
+        return get_vector_store()
+    except ImportError as e:
+        raise ImportError(
+            f"Failed to import vector store. Make sure all dependencies are installed. "
+            f"Original error: {e}"
+        )
+def add_sample_documents():
+    """Add sample documents to the vector store."""
+    vector_store = _get_vector_store()
+    sample_docs = [
+        {
+            "text": """
+            Oracle Exadata is a database machine that combines hardware and software
+            to provide high-performance database solutions. When migrating Exadata
+            workloads to the cloud, it's important to consider compatibility,
+            performance, and feature parity.
+            """,
+            "metadata": {"source": "exadata_migration_guide", "type": "documentation"},
+        },
+        {
+            "text": """
+            Cloud migration strategies for Oracle Exadata include:
+            1. Lift and shift - moving workloads with minimal changes
+            2. Replatforming - adapting to cloud-native services
+            3. Refactoring - redesigning for cloud architecture
+            Each approach has different trade-offs in terms of effort, cost, and feature availability.
+            """,
+            "metadata": {"source": "migration_strategies", "type": "guide"},
+        },
+        {
+            "text": """
+            Oracle Cloud Infrastructure (OCI) provides Exadata Cloud Service which
+            maintains full feature compatibility with on-premises Exadata. This
+            service offers the same architecture and capabilities, making it ideal
+            for migrations requiring minimal changes.
+            """,
+            "metadata": {"source": "oci_exadata", "type": "cloud_service"},
+        },
+    ]
+    documents = [doc["text"] for doc in sample_docs]
+    metadatas = [doc["metadata"] for doc in sample_docs]
+    ids = vector_store.add_documents(documents, metadatas)
+    print(f"Added {len(ids)} sample documents to vector store")
+    print(f"Document IDs: {ids}")
+if __name__ == "__main__":
+    add_sample_documents()

scripts/start_api.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+#!/bin/bash
+# Script to start the API server
+cd "$(dirname "$0")/.."
+echo "Starting Agentic RAG API server..."
+echo "API will be available at http://localhost:8000"
+echo "Press Ctrl+C to stop the server"
+echo ""
+uvicorn api.main:app --reload --host 0.0.0.0 --port 8000

scripts/start_ui.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+#!/bin/bash
+# Script to start the Streamlit UI
+cd "$(dirname "$0")/.."
+echo "Starting Agentic RAG Streamlit UI..."
+echo "UI will be available at http://localhost:8501"
+echo "Press Ctrl+C to stop the server"
+echo ""
+streamlit run ui/streamlit_app.py

src/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@


1	+ """Agentic RAG System - Main package."""
2	+
3	+ __version__ = "1.0.0"
4	+

src/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (233 Bytes). View file

src/agents/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Agent implementations."""
2	+

src/agents/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (201 Bytes). View file

src/agents/__pycache__/aggregator_agent.cpython-311.pyc ADDED Viewed

Binary file (11.9 kB). View file

src/agents/__pycache__/base_agent.cpython-311.pyc ADDED Viewed

Binary file (12.7 kB). View file

src/agents/__pycache__/cloud_agent.cpython-311.pyc ADDED Viewed

Binary file (8.68 kB). View file

src/agents/__pycache__/local_data_agent.cpython-311.pyc ADDED Viewed

Binary file (4.22 kB). View file

src/agents/__pycache__/search_agent.cpython-311.pyc ADDED Viewed

Binary file (5.08 kB). View file

src/agents/__pycache__/snowflake_agent.cpython-311.pyc ADDED Viewed

Binary file (12.3 kB). View file

src/agents/aggregator_agent.py ADDED Viewed

	@@ -0,0 +1,266 @@

+"""Aggregator agent that coordinates multiple specialized agents."""
+import logging
+from typing import List, Dict, Any, Optional
+from openai import OpenAI
+from src.agents.base_agent import BaseAgent
+from src.agents.local_data_agent import LocalDataAgent
+from src.agents.search_agent import SearchAgent
+from src.agents.cloud_agent import CloudAgent
+from src.core.config import get_settings
+logger = logging.getLogger(__name__)
+class AggregatorAgent(BaseAgent):
+    """Agent that coordinates multiple specialized agents and aggregates responses."""
+    def __init__(self, use_planning: bool = True):
+        """Initialize aggregator agent."""
+        super().__init__(
+            name="aggregator_agent",
+            description=(
+                "You are an aggregator agent that coordinates multiple specialized agents "
+                "to answer complex questions. You route queries to appropriate agents and "
+                "synthesize their responses into a comprehensive answer."
+            ),
+            use_memory=True,
+            use_planning=use_planning,
+            planning_type="cot",
+        )
+        # Initialize specialized agents
+        self.local_agent = LocalDataAgent(use_planning=False)
+        self.search_agent = SearchAgent(use_planning=True)
+        self.cloud_agent = CloudAgent(use_planning=False)
+        # Initialize Snowflake agent if configured
+        self.snowflake_agent = None
+        from src.core.config import get_settings
+        settings = get_settings()
+        if settings.has_snowflake():
+            from src.agents.snowflake_agent import SnowflakeAgent
+            snowflake_config = settings.get_snowflake_config()
+            self.snowflake_agent = SnowflakeAgent(
+                snowflake_config=snowflake_config,
+                use_planning=False
+            )
+        self.agents = {
+            "local": self.local_agent,
+            "search": self.search_agent,
+            "cloud": self.cloud_agent,
+        }
+        if self.snowflake_agent:
+            self.agents["snowflake"] = self.snowflake_agent
+    async def retrieve_context(self, query: str) -> str:
+        """
+        Retrieve context by querying relevant agents.
+        Args:
+            query: User query
+        Returns:
+            Aggregated context string
+        """
+        # Determine which agents to query based on query content
+        agents_to_query = self._select_agents(query)
+        # Query selected agents in parallel
+        results = {}
+        for agent_name, agent in agents_to_query.items():
+            try:
+                context = await agent.retrieve_context(query)
+                results[agent_name] = context
+            except Exception as e:
+                logger.error(f"Error querying {agent_name} agent: {e}")
+                results[agent_name] = f"Error: {str(e)}"
+        # Combine results
+        context_parts = ["Context from specialized agents:"]
+        for agent_name, context in results.items():
+            context_parts.append(f"\n--- {agent_name.upper()} AGENT ---")
+            context_parts.append(context)
+        return "\n".join(context_parts)
+    def _select_agents(self, query: str) -> Dict[str, BaseAgent]:
+        """
+        Select which agents to query based on the query content.
+        Args:
+            query: User query
+        Returns:
+            Dictionary of agent names to agents
+        """
+        query_lower = query.lower()
+        selected = {}
+        # Always include local agent for document queries
+        if any(keyword in query_lower for keyword in ["document", "file", "local", "data"]):
+            selected["local"] = self.local_agent
+        # Include search agent for current information or web queries
+        if any(keyword in query_lower for keyword in [
+            "current", "latest", "recent", "news", "web", "internet", "online", "search"
+        ]):
+            selected["search"] = self.search_agent
+        # Include cloud agent for cloud-related queries
+        if any(keyword in query_lower for keyword in ["cloud", "s3", "gcs", "storage", "remote"]):
+            selected["cloud"] = self.cloud_agent
+        # Include Snowflake agent for database/data warehouse queries
+        if self.snowflake_agent and any(keyword in query_lower for keyword in [
+            "snowflake", "data warehouse", "sql", "database", "query", "table", "schema"
+        ]):
+            selected["snowflake"] = self.snowflake_agent
+        # If no specific match, use local and search by default
+        if not selected:
+            selected["local"] = self.local_agent
+            selected["search"] = self.search_agent
+        return selected
+    async def process(
+        self,
+        query: str,
+        session_id: Optional[str] = None,
+        context: Optional[str] = None,
+    ) -> dict:
+        """
+        Process query by coordinating multiple agents.
+        Args:
+            query: User query
+            session_id: Optional session ID
+            context: Optional additional context
+        Returns:
+            Aggregated response dictionary
+        """
+        # Select agents to query
+        agents_to_query = self._select_agents(query)
+        # Get responses from selected agents
+        agent_responses = {}
+        for agent_name, agent in agents_to_query.items():
+            try:
+                response = await agent.process(query, session_id, context)
+                agent_responses[agent_name] = response
+            except Exception as e:
+                logger.error(f"Error processing with {agent_name} agent: {e}")
+                agent_responses[agent_name] = {
+                    "success": False,
+                    "error": str(e),
+                }
+        # Synthesize responses
+        synthesized_response = await self._synthesize_responses(
+            query=query,
+            agent_responses=agent_responses,
+            session_id=session_id,
+        )
+        return synthesized_response
+    async def _synthesize_responses(
+        self,
+        query: str,
+        agent_responses: Dict[str, dict],
+        session_id: Optional[str],
+    ) -> dict:
+        """
+        Synthesize responses from multiple agents.
+        Args:
+            query: Original query
+            agent_responses: Dictionary of agent responses
+            session_id: Optional session ID
+        Returns:
+            Synthesized response
+        """
+        # Collect successful responses
+        successful_responses = {
+            name: resp for name, resp in agent_responses.items()
+            if resp.get("success", False)
+        }
+        if not successful_responses:
+            # If no successful responses, try to return the first response with error details
+            error_messages = []
+            for name, resp in agent_responses.items():
+                error_msg = resp.get("error", "Unknown error")
+                error_messages.append(f"{name}: {error_msg}")
+            return {
+                "success": False,
+                "error": f"No agents provided successful responses. Errors: {'; '.join(error_messages)}",
+                "agent_responses": agent_responses,
+            }
+        # If only one agent responded, return its response
+        if len(successful_responses) == 1:
+            response = list(successful_responses.values())[0]
+            response["aggregated_by"] = "single_agent"
+            return response
+        # Multiple responses - synthesize using LLM
+        try:
+            # Build synthesis prompt
+            synthesis_parts = [
+                "You are synthesizing responses from multiple specialized agents.",
+                f"Original question: {query}",
+                "",
+                "Agent responses:",
+            ]
+            for agent_name, response in successful_responses.items():
+                answer = response.get("answer", "No answer provided")
+                synthesis_parts.append(f"\n{agent_name.upper()} Agent:")
+                synthesis_parts.append(answer)
+            synthesis_parts.extend([
+                "",
+                "Synthesize these responses into a comprehensive, coherent answer.",
+                "If there are conflicts, note them. If information is complementary, combine it.",
+            ])
+            synthesis_prompt = "\n".join(synthesis_parts)
+            # Call LLM for synthesis
+            messages = [
+                {"role": "system", "content": self.description},
+                {"role": "user", "content": synthesis_prompt},
+            ]
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                temperature=0.7,
+            )
+            synthesized_answer = response.choices[0].message.content
+            return {
+                "success": True,
+                "answer": synthesized_answer,
+                "agent": self.name,
+                "aggregated_by": "multiple_agents",
+                "source_agents": list(successful_responses.keys()),
+                "agent_responses": successful_responses,
+                "model": self.model,
+            }
+        except Exception as e:
+            logger.error(f"Error synthesizing responses: {e}")
+            # Fallback: return first successful response
+            first_response = list(successful_responses.values())[0]
+            first_response["aggregated_by"] = "fallback"
+            return first_response

src/agents/base_agent.py ADDED Viewed

	@@ -0,0 +1,305 @@

+"""Base agent class with common functionality."""
+import logging
+from abc import ABC, abstractmethod
+from typing import List, Dict, Any, Optional, Callable
+from openai import OpenAI
+from src.core.config import get_settings
+from src.memory.short_term_memory import ShortTermMemory
+from src.memory.long_term_memory import LongTermMemory
+from src.planning.react_planner import ReActPlanner
+from src.planning.cot_planner import CoTPlanner
+logger = logging.getLogger(__name__)
+class BaseAgent(ABC):
+    """Base class for all agents."""
+    def __init__(
+        self,
+        name: str,
+        description: str,
+        tools: Optional[List[Dict[str, Any]]] = None,
+        use_memory: bool = True,
+        use_planning: bool = False,
+        planning_type: str = "react",  # "react" or "cot"
+    ):
+        """
+        Initialize base agent.
+        Args:
+            name: Agent name
+            description: Agent description
+            tools: List of available tools
+            use_memory: Whether to use memory
+            use_planning: Whether to use planning
+            planning_type: Type of planning ("react" or "cot")
+        """
+        self.name = name
+        self.description = description
+        self.settings = get_settings()
+        # Initialize OpenAI client
+        self.client = OpenAI(**self.settings.get_openai_client_kwargs())
+        self.model = self.settings.openai_model
+        # Initialize memory
+        self.use_memory = use_memory
+        self.short_term_memory: Optional[ShortTermMemory] = None
+        self.long_term_memory: Optional[LongTermMemory] = None
+        if use_memory:
+            self.short_term_memory = ShortTermMemory()
+            self.long_term_memory = LongTermMemory()
+        # Initialize planning
+        self.use_planning = use_planning
+        self.planning_type = planning_type
+        self.planner: Optional[ReActPlanner | CoTPlanner] = None
+        if use_planning:
+            if planning_type == "react":
+                self.planner = ReActPlanner(tools=tools or [])
+            elif planning_type == "cot":
+                self.planner = CoTPlanner()
+            else:
+                logger.warning(f"Unknown planning type: {planning_type}")
+        # Tools
+        self.tools = tools or []
+        self.tool_functions: Dict[str, Callable] = {}
+    def add_tool(self, tool: Dict[str, Any], tool_function: Callable) -> None:
+        """
+        Add a tool to the agent.
+        Args:
+            tool: Tool schema
+            tool_function: Function to execute the tool
+        """
+        self.tools.append(tool)
+        self.tool_functions[tool["name"]] = tool_function
+        if self.planner and isinstance(self.planner, ReActPlanner):
+            self.planner.add_tool(tool)
+    async def process(
+        self,
+        query: str,
+        session_id: Optional[str] = None,
+        context: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Process a query using the agent.
+        Args:
+            query: User query
+            session_id: Optional session ID for memory
+            context: Optional additional context
+        Returns:
+            Response dictionary
+        """
+        try:
+            # Add user message to memory
+            if self.short_term_memory:
+                self.short_term_memory.add_message("user", query)
+            # Load long-term memory if available
+            long_term_context = ""
+            if self.long_term_memory and session_id:
+                memories = self.long_term_memory.search_memories(query, session_id, n_results=3)
+                if memories:
+                    long_term_context = "\n".join([
+                        m["content"] for m in memories
+                    ])
+            # Combine contexts
+            full_context = self._build_context(context, long_term_context)
+            # Use planning if enabled
+            if self.use_planning and self.planner:
+                response = await self._process_with_planning(query, full_context, session_id)
+            else:
+                response = await self._process_direct(query, full_context, session_id)
+            # Add assistant response to memory
+            if self.short_term_memory and "answer" in response:
+                self.short_term_memory.add_message("assistant", response["answer"])
+            # Store in long-term memory
+            if self.long_term_memory and session_id:
+                messages = self.short_term_memory.get_messages() if self.short_term_memory else []
+                self.long_term_memory.store_conversation(session_id, messages)
+            return response
+        except Exception as e:
+            logger.error(f"Error processing query in {self.name}: {e}")
+            return {
+                "success": False,
+                "error": str(e),
+                "agent": self.name,
+            }
+    async def _process_direct(
+        self,
+        query: str,
+        context: str,
+        session_id: Optional[str],
+    ) -> Dict[str, Any]:
+        """Process query directly without planning."""
+        # Build messages
+        messages = []
+        if context:
+            messages.append({
+                "role": "system",
+                "content": f"{self.description}\n\nContext: {context}",
+            })
+        else:
+            messages.append({
+                "role": "system",
+                "content": self.description,
+            })
+        # Add conversation history
+        if self.short_term_memory:
+            history = self.short_term_memory.get_messages(format_for_llm=True)
+            messages.extend(history[-5:])  # Last 5 messages
+        else:
+            messages.append({
+                "role": "user",
+                "content": query,
+            })
+        # Call LLM
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                temperature=0.7,
+            )
+            answer = response.choices[0].message.content
+            return {
+                "success": True,
+                "answer": answer,
+                "agent": self.name,
+                "model": self.model,
+            }
+        except Exception as e:
+            error_msg = str(e)
+            if "quota" in error_msg.lower() or "429" in error_msg:
+                logger.error(f"OpenAI API quota exceeded: {e}")
+                raise Exception("OpenAI API quota exceeded. Please check your billing and plan details.")
+            elif "api key" in error_msg.lower() or "401" in error_msg:
+                logger.error(f"Invalid OpenAI API key: {e}")
+                raise Exception("Invalid OpenAI API key. Please check your .env file.")
+            else:
+                logger.error(f"Error calling LLM: {e}")
+                raise
+    async def _process_with_planning(
+        self,
+        query: str,
+        context: str,
+        session_id: Optional[str],
+    ) -> Dict[str, Any]:
+        """Process query using planning."""
+        if not self.planner:
+            return await self._process_direct(query, context, session_id)
+        # Create sync LLM call function (planner expects sync)
+        def llm_call(prompt: str) -> str:
+            messages = [
+                {"role": "system", "content": self.description},
+                {"role": "user", "content": prompt},
+            ]
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                temperature=0.7,
+            )
+            return response.choices[0].message.content
+        # Generate plan (planner methods are sync)
+        if isinstance(self.planner, ReActPlanner):
+            plan = self.planner.plan(
+                query=query,
+                context=context,
+                llm_call=llm_call,
+            )
+        else:  # CoT planner
+            plan = self.planner.plan(
+                query=query,
+                context=context,
+                llm_call=llm_call,
+            )
+        # Extract final answer
+        if isinstance(self.planner, ReActPlanner):
+            answer = plan.get("final_answer", "I couldn't find a complete answer.")
+        else:
+            answer = plan.get("conclusion", "I couldn't find a complete answer.")
+        return {
+            "success": True,
+            "answer": answer,
+            "agent": self.name,
+            "plan": plan,
+            "model": self.model,
+        }
+    def _build_context(
+        self,
+        additional_context: Optional[str],
+        long_term_context: str,
+    ) -> str:
+        """Build full context string."""
+        parts = []
+        if long_term_context:
+            parts.append(f"Relevant past conversations:\n{long_term_context}")
+        if additional_context:
+            parts.append(f"Additional context:\n{additional_context}")
+        return "\n\n".join(parts)
+    async def _execute_tool(
+        self,
+        tool_name: str,
+        **kwargs,
+    ) -> Any:
+        """Execute a tool (supports both sync and async tools)."""
+        if tool_name not in self.tool_functions:
+            raise ValueError(f"Tool '{tool_name}' not found")
+        tool_func = self.tool_functions[tool_name]
+        # Check if tool is async
+        import asyncio
+        if asyncio.iscoroutinefunction(tool_func):
+            return await tool_func(**kwargs)
+        else:
+            return tool_func(**kwargs)
+    @abstractmethod
+    async def retrieve_context(self, query: str) -> str:
+        """
+        Retrieve relevant context for the query.
+        Args:
+            query: User query
+        Returns:
+            Context string
+        """
+        pass
+    def get_status(self) -> Dict[str, Any]:
+        """Get agent status."""
+        return {
+            "name": self.name,
+            "description": self.description,
+            "tools": [t["name"] for t in self.tools],
+            "memory_enabled": self.use_memory,
+            "planning_enabled": self.use_planning,
+            "planning_type": self.planning_type if self.use_planning else None,
+        }

src/agents/cloud_agent.py ADDED Viewed

	@@ -0,0 +1,162 @@

+"""Cloud storage agent for remote data access."""
+import logging
+import os
+from typing import Optional
+from src.agents.base_agent import BaseAgent
+from src.core.config import get_settings
+logger = logging.getLogger(__name__)
+class CloudAgent(BaseAgent):
+    """Agent specialized in accessing cloud storage and remote data."""
+    def __init__(self, use_planning: bool = False):
+        """Initialize cloud agent."""
+        super().__init__(
+            name="cloud_agent",
+            description=(
+                "You are a specialized agent for accessing cloud storage and remote data. "
+                "You can retrieve documents and information from cloud storage services "
+                "like AWS S3 or Google Cloud Storage."
+            ),
+            use_memory=True,
+            use_planning=use_planning,
+        )
+        self.settings = get_settings()
+        self._init_cloud_client()
+    def _init_cloud_client(self):
+        """Initialize cloud storage client based on configuration."""
+        self.cloud_type = None
+        self.client = None
+        # Check for AWS S3
+        if self.settings.aws_access_key_id and self.settings.aws_s3_bucket:
+            try:
+                import boto3
+                self.client = boto3.client(
+                    "s3",
+                    aws_access_key_id=self.settings.aws_access_key_id,
+                    aws_secret_access_key=self.settings.aws_secret_access_key,
+                    region_name=self.settings.aws_region,
+                )
+                self.cloud_type = "s3"
+                self.bucket_name = self.settings.aws_s3_bucket
+                logger.info("Initialized AWS S3 client")
+            except ImportError:
+                logger.warning("boto3 not installed, AWS S3 unavailable")
+            except Exception as e:
+                logger.error(f"Error initializing S3 client: {e}")
+        # Check for GCS
+        elif self.settings.google_application_credentials and self.settings.gcs_bucket_name:
+            try:
+                from google.cloud import storage
+                os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.settings.google_application_credentials
+                self.client = storage.Client()
+                self.cloud_type = "gcs"
+                self.bucket_name = self.settings.gcs_bucket_name
+                logger.info("Initialized Google Cloud Storage client")
+            except ImportError:
+                logger.warning("google-cloud-storage not installed, GCS unavailable")
+            except Exception as e:
+                logger.error(f"Error initializing GCS client: {e}")
+        if not self.client:
+            logger.warning("No cloud storage configured")
+    async def retrieve_context(self, query: str) -> str:
+        """
+        Retrieve relevant context from cloud storage.
+        Args:
+            query: User query
+        Returns:
+            Context string from cloud documents
+        """
+        if not self.client:
+            return "Cloud storage is not configured."
+        try:
+            if self.cloud_type == "s3":
+                return await self._retrieve_from_s3(query)
+            elif self.cloud_type == "gcs":
+                return await self._retrieve_from_gcs(query)
+            else:
+                return "Unknown cloud storage type."
+        except Exception as e:
+            logger.error(f"Error retrieving cloud context: {e}")
+            return f"Error retrieving from cloud storage: {str(e)}"
+    async def _retrieve_from_s3(self, query: str) -> str:
+        """Retrieve documents from S3."""
+        try:
+            # List objects in bucket (simplified - in production, use vector search)
+            response = self.client.list_objects_v2(
+                Bucket=self.bucket_name,
+                MaxKeys=10,
+            )
+            if "Contents" not in response:
+                return "No documents found in S3 bucket."
+            context_parts = [f"Documents in S3 bucket '{self.bucket_name}':"]
+            for obj in response["Contents"][:5]:  # Limit to 5
+                key = obj["Key"]
+                size = obj["Size"]
+                context_parts.append(f"- {key} ({size} bytes)")
+            return "\n".join(context_parts)
+        except Exception as e:
+            logger.error(f"Error listing S3 objects: {e}")
+            return f"Error accessing S3: {str(e)}"
+    async def _retrieve_from_gcs(self, query: str) -> str:
+        """Retrieve documents from GCS."""
+        try:
+            bucket = self.client.bucket(self.bucket_name)
+            blobs = list(bucket.list_blobs(max_results=10))
+            if not blobs:
+                return "No documents found in GCS bucket."
+            context_parts = [f"Documents in GCS bucket '{self.bucket_name}':"]
+            for blob in blobs[:5]:  # Limit to 5
+                context_parts.append(f"- {blob.name} ({blob.size} bytes)")
+            return "\n".join(context_parts)
+        except Exception as e:
+            logger.error(f"Error listing GCS objects: {e}")
+            return f"Error accessing GCS: {str(e)}"
+    async def process(
+        self,
+        query: str,
+        session_id: Optional[str] = None,
+        context: Optional[str] = None,
+    ) -> dict:
+        """
+        Process query with cloud storage access.
+        Args:
+            query: User query
+            session_id: Optional session ID
+            context: Optional additional context
+        Returns:
+            Response dictionary
+        """
+        # Retrieve cloud context
+        cloud_context = await self.retrieve_context(query)
+        # Combine with provided context
+        full_context = cloud_context
+        if context:
+            full_context = f"{context}\n\n{cloud_context}"
+        # Process using base agent
+        return await super().process(query, session_id, full_context)

src/agents/local_data_agent.py ADDED Viewed

	@@ -0,0 +1,86 @@

+"""Local data agent for document queries."""
+import logging
+from typing import Optional
+from src.agents.base_agent import BaseAgent
+from src.retrieval.vector_store import get_vector_store
+logger = logging.getLogger(__name__)
+class LocalDataAgent(BaseAgent):
+    """Agent specialized in querying local documents and data."""
+    def __init__(self, use_planning: bool = False):
+        """Initialize local data agent."""
+        super().__init__(
+            name="local_data_agent",
+            description=(
+                "You are a specialized agent for querying local documents and data. "
+                "You have access to a vector store of local documents and can retrieve "
+                "relevant information to answer questions."
+            ),
+            use_memory=True,
+            use_planning=use_planning,
+        )
+        self.vector_store = get_vector_store()
+    async def retrieve_context(self, query: str) -> str:
+        """
+        Retrieve relevant context from local documents.
+        Args:
+            query: User query
+        Returns:
+            Context string from retrieved documents
+        """
+        try:
+            # Search vector store
+            results = self.vector_store.search(query=query, n_results=5)
+            if not results["documents"]:
+                return "No relevant documents found in local data."
+            # Format results
+            context_parts = ["Relevant documents from local data:"]
+            for i, (doc, metadata) in enumerate(
+                zip(results["documents"], results["metadatas"]), 1
+            ):
+                source = metadata.get("source", "Unknown")
+                context_parts.append(f"\n[{i}] Source: {source}")
+                context_parts.append(f"Content: {doc[:500]}...")  # Truncate long docs
+            return "\n".join(context_parts)
+        except Exception as e:
+            logger.error(f"Error retrieving local context: {e}")
+            return f"Error retrieving local documents: {str(e)}"
+    async def process(
+        self,
+        query: str,
+        session_id: Optional[str] = None,
+        context: Optional[str] = None,
+    ) -> dict:
+        """
+        Process query with local document retrieval.
+        Args:
+            query: User query
+            session_id: Optional session ID
+            context: Optional additional context
+        Returns:
+            Response dictionary
+        """
+        # Retrieve local context
+        local_context = await self.retrieve_context(query)
+        # Combine with provided context
+        full_context = local_context
+        if context:
+            full_context = f"{context}\n\n{local_context}"
+        # Process using base agent
+        return await super().process(query, session_id, full_context)

src/agents/search_agent.py ADDED Viewed

	@@ -0,0 +1,101 @@

+"""Web search agent for online information."""
+import logging
+from typing import Optional
+from src.agents.base_agent import BaseAgent
+from src.tools.web_search import get_web_search
+logger = logging.getLogger(__name__)
+class SearchAgent(BaseAgent):
+    """Agent specialized in web search and online information."""
+    def __init__(self, use_planning: bool = True):
+        """Initialize search agent."""
+        web_search = get_web_search()
+        tools = [web_search.get_tool_schema()]
+        super().__init__(
+            name="search_agent",
+            description=(
+                "You are a specialized agent for searching the web and finding "
+                "online information. You can search the internet to answer questions "
+                "that require current or external information."
+            ),
+            tools=tools,
+            use_memory=True,
+            use_planning=use_planning,
+            planning_type="react",
+        )
+        # Register tool function (async wrapper)
+        async def web_search_tool(query: str, max_results: int = 5):
+            return await web_search.search(query, max_results)
+        self.add_tool(
+            tool=web_search.get_tool_schema(),
+            tool_function=web_search_tool,
+        )
+        self.web_search = web_search
+    async def retrieve_context(self, query: str) -> str:
+        """
+        Retrieve relevant context from web search.
+        Args:
+            query: User query
+        Returns:
+            Context string from web search results
+        """
+        try:
+            # Perform web search
+            search_results = await self.web_search.search(query, max_results=5)
+            if not search_results.get("success") or not search_results.get("results"):
+                return "No relevant information found from web search."
+            # Format results
+            context_parts = ["Web search results:"]
+            for i, result in enumerate(search_results["results"], 1):
+                title = result.get("title", "No title")
+                url = result.get("url", "")
+                content = result.get("content", "")[:300]  # Truncate
+                context_parts.append(f"\n[{i}] {title}")
+                context_parts.append(f"URL: {url}")
+                context_parts.append(f"Content: {content}...")
+            return "\n".join(context_parts)
+        except Exception as e:
+            logger.error(f"Error retrieving web context: {e}")
+            return f"Error performing web search: {str(e)}"
+    async def process(
+        self,
+        query: str,
+        session_id: Optional[str] = None,
+        context: Optional[str] = None,
+    ) -> dict:
+        """
+        Process query with web search.
+        Args:
+            query: User query
+            session_id: Optional session ID
+            context: Optional additional context
+        Returns:
+            Response dictionary
+        """
+        # Retrieve web context
+        web_context = await self.retrieve_context(query)
+        # Combine with provided context
+        full_context = web_context
+        if context:
+            full_context = f"{context}\n\n{web_context}"
+        # Process using base agent (which will use planning if enabled)
+        return await super().process(query, session_id, full_context)

src/agents/snowflake_agent.py ADDED Viewed

	@@ -0,0 +1,245 @@

+"""Snowflake data warehouse agent."""
+import logging
+from typing import Dict, List, Optional
+import json
+from src.agents.base_agent import BaseAgent
+from src.mcp.snowflake_server import SnowflakeMCPServer
+logger = logging.getLogger(__name__)
+class SnowflakeAgent(BaseAgent):
+    """Agent specialized in querying Snowflake data warehouse."""
+    def __init__(self, snowflake_config: Optional[Dict] = None, use_planning: bool = False):
+        """Initialize Snowflake agent."""
+        super().__init__(
+            name="snowflake_agent",
+            description=(
+                "You are a specialized agent for querying Snowflake data warehouse. "
+                "You can convert natural language queries to SQL and execute them "
+                "on Snowflake databases."
+            ),
+            use_memory=True,
+            use_planning=use_planning,
+        )
+        # Initialize Snowflake MCP server
+        self.snowflake_server = SnowflakeMCPServer(config=snowflake_config)
+        self.tables_cache: Optional[List[str]] = None
+    def get_available_tables(self) -> List[str]:
+        """Cache and return available tables."""
+        if not self.tables_cache:
+            try:
+                self.tables_cache = self.snowflake_server.get_tables()
+            except Exception as e:
+                logger.error(f"Error getting tables: {e}")
+                self.tables_cache = []
+        return self.tables_cache
+    def get_context_for_query(self, user_query: str) -> str:
+        """Build context about available tables and schemas."""
+        try:
+            tables = self.get_available_tables()
+            if not tables:
+                return "No tables available in Snowflake database."
+            # Get schema for relevant tables (limit to avoid token overflow)
+            context = "Available Snowflake tables:\n\n"
+            for table in tables[:10]:  # Limit to first 10 tables
+                try:
+                    schema = self.snowflake_server.get_table_schema(table)
+                    context += f"Table: {table}\n"
+                    if schema:
+                        context += "Columns: " + ", ".join([
+                            f"{col.get('COLUMN_NAME', 'unknown')} ({col.get('DATA_TYPE', 'unknown')})"
+                            for col in schema[:5]  # First 5 columns
+                        ]) + "\n\n"
+                    else:
+                        context += "Columns: (schema not available)\n\n"
+                except Exception as e:
+                    logger.warning(f"Error getting schema for {table}: {e}")
+                    context += f"Table: {table}\nColumns: (error retrieving schema)\n\n"
+            return context
+        except Exception as e:
+            logger.error(f"Error building context: {e}")
+            return f"Error building context: {str(e)}"
+    def natural_language_to_sql(self, user_query: str) -> str:
+        """Convert natural language query to SQL using LLM."""
+        context = self.get_context_for_query(user_query)
+        prompt = f"""You are a Snowflake SQL expert. Convert this natural language query to SQL.
+Database context:
+{context}
+User query: {user_query}
+Requirements:
+1. Generate ONLY valid Snowflake SQL
+2. Use proper table and column names from the context
+3. Include appropriate filters and aggregations
+4. Limit results to 100 rows for safety
+5. Return ONLY the SQL query, no explanation
+SQL Query:"""
+        try:
+            messages = [
+                {
+                    "role": "system",
+                    "content": "You are a Snowflake SQL expert. Generate only valid SQL queries.",
+                },
+                {
+                    "role": "user",
+                    "content": prompt,
+                },
+            ]
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                temperature=0.3,  # Lower temperature for more deterministic SQL
+            )
+            sql = response.choices[0].message.content.strip()
+            # Clean up any markdown code blocks
+            sql = sql.replace("```sql", "").replace("```", "").strip()
+            return sql
+        except Exception as e:
+            logger.error(f"Error generating SQL: {e}")
+            raise
+    async def retrieve_context(self, query: str) -> str:
+        """
+        Retrieve relevant context from Snowflake.
+        Args:
+            query: User query
+        Returns:
+            Context string from Snowflake
+        """
+        try:
+            # Get available tables context
+            context = self.get_context_for_query(query)
+            # If query seems to be asking for data, try to generate and execute SQL
+            if any(keyword in query.lower() for keyword in ['show', 'list', 'get', 'find', 'select']):
+                try:
+                    sql = self.natural_language_to_sql(query)
+                    results = self.snowflake_server.query(sql)
+                    if results and not any('error' in str(r).lower() for r in results):
+                        # Format results for context
+                        context += f"\n\nQuery Results:\n"
+                        context += json.dumps(results[:5], indent=2)  # First 5 rows
+                except Exception as e:
+                    logger.warning(f"Could not execute query for context: {e}")
+            return context
+        except Exception as e:
+            logger.error(f"Error retrieving Snowflake context: {e}")
+            return f"Error retrieving Snowflake context: {str(e)}"
+    async def process(
+        self,
+        query: str,
+        session_id: Optional[str] = None,
+        context: Optional[str] = None,
+    ) -> dict:
+        """
+        Process query with Snowflake data warehouse.
+        Args:
+            query: User query
+            session_id: Optional session ID
+            context: Optional additional context
+        Returns:
+            Response dictionary
+        """
+        try:
+            # Convert natural language to SQL
+            sql_query = self.natural_language_to_sql(query)
+            logger.info(f"Generated SQL: {sql_query}")
+            # Execute query
+            results = self.snowflake_server.query(sql_query)
+            # Check for errors
+            if results and isinstance(results, list) and len(results) > 0:
+                if isinstance(results[0], dict) and 'error' in results[0]:
+                    return {
+                        "success": False,
+                        "error": results[0].get('error', 'Unknown error'),
+                        "sql_query": sql_query,
+                        "agent": self.name,
+                    }
+            # Format results for LLM
+            summary = await self._summarize_results(query, results)
+            # Build full context with results
+            snowflake_context = f"SQL Query: {sql_query}\n\nResults ({len(results)} rows):\n{json.dumps(results[:10], indent=2)}"
+            full_context = f"{context}\n\n{snowflake_context}" if context else snowflake_context
+            # Process using base agent
+            return await super().process(query, session_id, full_context)
+        except Exception as e:
+            logger.error(f"Error processing Snowflake query: {e}")
+            return {
+                "success": False,
+                "error": str(e),
+                "agent": self.name,
+            }
+    async def _summarize_results(self, query: str, results: List[Dict]) -> str:
+        """Use LLM to summarize query results."""
+        if not results:
+            return "No results found."
+        # Convert results to readable format
+        results_text = json.dumps(results[:10], indent=2)
+        prompt = f"""Summarize these Snowflake query results for the user.
+Original question: {query}
+Number of results: {len(results)}
+Sample data:
+{results_text}
+Provide a clear, concise summary of the findings."""
+        try:
+            messages = [
+                {
+                    "role": "system",
+                    "content": "You are a helpful assistant that summarizes database query results.",
+                },
+                {
+                    "role": "user",
+                    "content": prompt,
+                },
+            ]
+            response = self.client.chat.completions.create(
+                model=self.model,
+                messages=messages,
+                temperature=0.7,
+            )
+            return response.choices[0].message.content
+        except Exception as e:
+            logger.error(f"Error summarizing results: {e}")
+            return f"Found {len(results)} results. (Summary generation failed: {str(e)})"

src/core/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Core orchestration and configuration."""
2	+

src/core/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (214 Bytes). View file

src/core/__pycache__/config.cpython-311.pyc ADDED Viewed

Binary file (8.94 kB). View file

src/core/__pycache__/orchestrator.cpython-311.pyc ADDED Viewed

Binary file (14.9 kB). View file

src/core/config.py ADDED Viewed

	@@ -0,0 +1,220 @@

+"""Configuration management using pydantic-settings."""
+import os
+from typing import Optional, Dict, Any
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+class Settings(BaseSettings):
+    """Application settings loaded from environment variables."""
+    model_config = SettingsConfigDict(
+        env_file=".env",
+        env_file_encoding="utf-8",
+        case_sensitive=False,
+        extra="ignore",
+    )
+    # OpenAI/OpenRouter Configuration
+    openai_api_key: str = Field(default="", description="OpenAI or OpenRouter API key")
+    openai_base_url: Optional[str] = Field(
+        default=None, description="OpenAI/OpenRouter base URL (e.g., https://openrouter.ai/api/v1)"
+    )
+    openai_model: str = Field(
+        default="gpt-4-turbo-preview", description="Model to use (OpenAI or OpenRouter model name)"
+    )
+    openai_embedding_model: str = Field(
+        default="text-embedding-3-small", description="Embedding model to use"
+    )
+    # OpenRouter specific headers (optional)
+    openrouter_http_referer: Optional[str] = Field(
+        default=None, description="HTTP-Referer header for OpenRouter (optional)"
+    )
+    openrouter_title: Optional[str] = Field(
+        default=None, description="X-Title header for OpenRouter (optional)"
+    )
+    # ChromaDB Configuration
+    chroma_db_path: str = Field(
+        default="./data/chroma_db", description="Path to ChromaDB database"
+    )
+    chroma_collection_name: str = Field(
+        default="documents", description="ChromaDB collection name"
+    )
+    # MCP Server Configuration
+    mcp_server_host: str = Field(
+        default="localhost", description="MCP server host"
+    )
+    mcp_server_port: int = Field(
+        default=8001, description="MCP server port"
+    )
+    # Memory Configuration
+    short_term_memory_size: int = Field(
+        default=10, description="Number of recent messages to keep in short-term memory"
+    )
+    long_term_memory_enabled: bool = Field(
+        default=True, description="Enable long-term memory"
+    )
+    max_context_tokens: int = Field(
+        default=4000, description="Maximum context tokens for LLM"
+    )
+    # API Configuration
+    api_host: str = Field(
+        default="0.0.0.0", description="API server host"
+    )
+    api_port: int = Field(
+        default=8000, description="API server port"
+    )
+    api_debug: bool = Field(
+        default=False, description="Enable API debug mode"
+    )
+    # Web Search Configuration (Optional)
+    tavily_api_key: Optional[str] = Field(
+        default=None, description="Tavily API key for web search"
+    )
+    serper_api_key: Optional[str] = Field(
+        default=None, description="Serper API key for web search"
+    )
+    # Database Configuration (Optional)
+    database_url: Optional[str] = Field(
+        default="sqlite:///./data/app.db", description="Database connection URL"
+    )
+    # AWS Configuration (Optional)
+    aws_access_key_id: Optional[str] = Field(
+        default=None, description="AWS access key ID"
+    )
+    aws_secret_access_key: Optional[str] = Field(
+        default=None, description="AWS secret access key"
+    )
+    aws_region: str = Field(
+        default="us-east-1", description="AWS region"
+    )
+    aws_s3_bucket: Optional[str] = Field(
+        default=None, description="AWS S3 bucket name"
+    )
+    # GCS Configuration (Optional)
+    google_application_credentials: Optional[str] = Field(
+        default=None, description="Path to GCS service account JSON"
+    )
+    gcs_bucket_name: Optional[str] = Field(
+        default=None, description="GCS bucket name"
+    )
+    # Snowflake Configuration (Optional)
+    snowflake_account: Optional[str] = Field(
+        default=None, description="Snowflake account identifier"
+    )
+    snowflake_user: Optional[str] = Field(
+        default=None, description="Snowflake username"
+    )
+    snowflake_password: Optional[str] = Field(
+        default=None, description="Snowflake password"
+    )
+    snowflake_warehouse: Optional[str] = Field(
+        default=None, description="Snowflake warehouse name"
+    )
+    snowflake_database: Optional[str] = Field(
+        default=None, description="Snowflake database name"
+    )
+    snowflake_schema: Optional[str] = Field(
+        default="PUBLIC", description="Snowflake schema name"
+    )
+    snowflake_role: Optional[str] = Field(
+        default="ACCOUNTADMIN", description="Snowflake role"
+    )
+    # Logging
+    log_level: str = Field(
+        default="INFO", description="Logging level"
+    )
+    def get_openai_client_kwargs(self) -> dict:
+        """Get kwargs for OpenAI client initialization (supports OpenRouter)."""
+        kwargs = {
+            "api_key": self.openai_api_key,
+        }
+        # If base_url is provided, use it (for OpenRouter or custom endpoints)
+        if self.openai_base_url:
+            kwargs["base_url"] = self.openai_base_url
+        # Add OpenRouter headers if configured
+        headers = {}
+        if self.openrouter_http_referer:
+            headers["HTTP-Referer"] = self.openrouter_http_referer
+        if self.openrouter_title:
+            headers["X-Title"] = self.openrouter_title
+        if headers:
+            kwargs["default_headers"] = headers
+        return kwargs
+    def get_chroma_client_kwargs(self) -> dict:
+        """Get kwargs for ChromaDB client initialization."""
+        return {
+            "path": self.chroma_db_path,
+        }
+    def has_web_search(self) -> bool:
+        """Check if web search is configured."""
+        return bool(self.tavily_api_key or self.serper_api_key)
+    def has_cloud_storage(self) -> bool:
+        """Check if cloud storage is configured."""
+        return bool(
+            (self.aws_access_key_id and self.aws_s3_bucket)
+            or (self.google_application_credentials and self.gcs_bucket_name)
+        )
+    def has_snowflake(self) -> bool:
+        """Check if Snowflake is configured."""
+        return bool(
+            self.snowflake_account
+            and self.snowflake_user
+            and self.snowflake_password
+            and self.snowflake_warehouse
+            and self.snowflake_database
+        )
+    def get_snowflake_config(self) -> Optional[Dict[str, Any]]:
+        """Get Snowflake configuration dictionary."""
+        if not self.has_snowflake():
+            return None
+        return {
+            "account": self.snowflake_account,
+            "user": self.snowflake_user,
+            "password": self.snowflake_password,
+            "warehouse": self.snowflake_warehouse,
+            "database": self.snowflake_database,
+            "schema": self.snowflake_schema,
+            "role": self.snowflake_role,
+        }
+# Global settings instance
+_settings: Optional[Settings] = None
+def get_settings() -> Settings:
+    """Get or create the global settings instance."""
+    global _settings
+    if _settings is None:
+        _settings = Settings()
+    return _settings
+def reset_settings() -> None:
+    """Reset the global settings instance (useful for testing)."""
+    global _settings
+    _settings = None

src/core/orchestrator.py ADDED Viewed

	@@ -0,0 +1,332 @@

+"""Main orchestrator for coordinating all components."""
+import logging
+from typing import Dict, Any, Optional
+from enum import Enum
+from src.core.config import get_settings
+from src.retrieval.vector_store import get_vector_store
+from src.agents.local_data_agent import LocalDataAgent
+from src.agents.search_agent import SearchAgent
+from src.agents.cloud_agent import CloudAgent
+from src.agents.aggregator_agent import AggregatorAgent
+from src.agents.snowflake_agent import SnowflakeAgent
+from src.tools.calculator import get_calculator
+from src.tools.web_search import get_web_search
+from src.tools.database_query import get_database_query
+from openai import OpenAI
+logger = logging.getLogger(__name__)
+class Tier(Enum):
+    """System tiers."""
+    BASIC_RAG = "basic"
+    AGENT_WITH_TOOLS = "agent"
+    ADVANCED_AGENTIC = "advanced"
+class Orchestrator:
+    """Main orchestrator for the RAG system."""
+    def __init__(self):
+        """Initialize orchestrator."""
+        self.settings = get_settings()
+        self.client = OpenAI(**self.settings.get_openai_client_kwargs())
+        self.model = self.settings.openai_model
+        # Initialize components
+        self.vector_store = get_vector_store()
+        # Initialize agents (lazy loading)
+        self._local_agent: Optional[LocalDataAgent] = None
+        self._search_agent: Optional[SearchAgent] = None
+        self._cloud_agent: Optional[CloudAgent] = None
+        self._snowflake_agent: Optional[SnowflakeAgent] = None
+        self._aggregator_agent: Optional[AggregatorAgent] = None
+        # Initialize tools
+        self.calculator = get_calculator()
+        self.web_search = get_web_search()
+        self.database_query = get_database_query()
+    async def process_query(
+        self,
+        query: str,
+        tier: str = "basic",
+        session_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Process a query using the specified tier.
+        Args:
+            query: User query
+            tier: System tier ("basic", "agent", or "advanced")
+            session_id: Optional session ID for memory
+        Returns:
+            Response dictionary
+        """
+        try:
+            tier_enum = Tier(tier.lower())
+            if tier_enum == Tier.BASIC_RAG:
+                return await self._process_basic_rag(query, session_id)
+            elif tier_enum == Tier.AGENT_WITH_TOOLS:
+                return await self._process_agent_with_tools(query, session_id)
+            elif tier_enum == Tier.ADVANCED_AGENTIC:
+                return await self._process_advanced_agentic(query, session_id)
+            else:
+                raise ValueError(f"Unknown tier: {tier}")
+        except ValueError as e:
+            logger.error(f"Invalid tier: {e}")
+            return {
+                "success": False,
+                "error": f"Invalid tier: {tier}",
+            }
+        except Exception as e:
+            logger.error(f"Error processing query: {e}")
+            return {
+                "success": False,
+                "error": str(e),
+            }
+    async def _process_basic_rag(
+        self,
+        query: str,
+        session_id: Optional[str],
+    ) -> Dict[str, Any]:
+        """Process query using basic RAG (retrieval + generation)."""
+        try:
+            # Check if OpenAI API key is configured
+            if not self.settings.openai_api_key:
+                return {
+                    "success": False,
+                    "error": "OpenAI API key not configured. Please set OPENAI_API_KEY in your .env file.",
+                    "tier": "basic",
+                }
+            # Retrieve relevant documents
+            results = self.vector_store.search(query=query, n_results=5)
+            # Build context - use retrieved documents if available, otherwise use empty context
+            if results["documents"]:
+                context_parts = ["Retrieved documents:"]
+                for i, (doc, metadata) in enumerate(
+                    zip(results["documents"], results["metadatas"]), 1
+                ):
+                    source = metadata.get("source", "Unknown")
+                    context_parts.append(f"\n[{i}] Source: {source}")
+                    # Ensure doc is a string
+                    doc_str = str(doc) if doc else ""
+                    context_parts.append(f"Content: {doc_str[:500]}...")
+                context = "\n".join(context_parts)
+                sources = [
+                    {"id": id, "metadata": meta}
+                    for id, meta in zip(results["ids"], results["metadatas"])
+                ]
+            else:
+                context = "No relevant documents found in the knowledge base."
+                sources = []
+            # Generate response using LLM
+            messages = [
+                {
+                    "role": "system",
+                    "content": "You are a helpful assistant that answers questions based on the provided context.",
+                },
+                {
+                    "role": "user",
+                    "content": f"Context:\n{context}\n\nQuestion: {query}",
+                },
+            ]
+            try:
+                response = self.client.chat.completions.create(
+                    model=self.model,
+                    messages=messages,
+                    temperature=0.7,
+                )
+                answer = response.choices[0].message.content
+            except Exception as api_error:
+                error_msg = str(api_error)
+                if "quota" in error_msg.lower() or "429" in error_msg:
+                    raise Exception("OpenAI API quota exceeded. Please check your billing and plan details.")
+                elif "api key" in error_msg.lower() or "401" in error_msg:
+                    raise Exception("Invalid OpenAI API key. Please check your .env file.")
+                else:
+                    raise Exception(f"OpenAI API error: {error_msg}")
+            return {
+                "success": True,
+                "answer": answer,
+                "tier": "basic",
+                "sources": sources,
+                "model": self.model,
+            }
+        except Exception as e:
+            logger.error(f"Error in basic RAG: {e}", exc_info=True)
+            return {
+                "success": False,
+                "error": f"Error processing query: {str(e)}",
+                "tier": "basic",
+            }
+    async def _process_agent_with_tools(
+        self,
+        query: str,
+        session_id: Optional[str],
+    ) -> Dict[str, Any]:
+        """Process query using agent with tools."""
+        try:
+            # Check if OpenAI API key is configured
+            if not self.settings.openai_api_key:
+                return {
+                    "success": False,
+                    "error": "OpenAI API key not configured. Please set OPENAI_API_KEY in your .env file.",
+                    "tier": "agent",
+                }
+            # Use local agent with tools enabled
+            if not self._local_agent:
+                self._local_agent = LocalDataAgent(use_planning=True)
+            # Add tools to agent
+            self._local_agent.add_tool(
+                tool=self.calculator.get_tool_schema(),
+                tool_function=lambda expression: self.calculator.calculate(expression),
+            )
+            if self.settings.has_web_search():
+                async def web_search_tool(query: str, max_results: int = 5):
+                    return await self.web_search.search(query, max_results)
+                self._local_agent.add_tool(
+                    tool=self.web_search.get_tool_schema(),
+                    tool_function=web_search_tool,
+                )
+            if self.settings.database_url:
+                def db_query_tool(sql: str, limit: int = 100):
+                    return self.database_query.query(sql, limit)
+                self._local_agent.add_tool(
+                    tool=self.database_query.get_tool_schema(),
+                    tool_function=db_query_tool,
+                )
+            # Process query
+            response = await self._local_agent.process(query, session_id)
+            return {
+                **response,
+                "tier": "agent",
+            }
+        except Exception as e:
+            logger.error(f"Error in agent with tools: {e}", exc_info=True)
+            return {
+                "success": False,
+                "error": f"Error processing query: {str(e)}",
+                "tier": "agent",
+            }
+    async def _process_advanced_agentic(
+        self,
+        query: str,
+        session_id: Optional[str],
+    ) -> Dict[str, Any]:
+        """Process query using advanced agentic RAG with multiple agents."""
+        try:
+            # Check if OpenAI API key is configured
+            if not self.settings.openai_api_key:
+                return {
+                    "success": False,
+                    "error": "OpenAI API key not configured. Please set OPENAI_API_KEY in your .env file.",
+                    "tier": "advanced",
+                }
+            # Use aggregator agent
+            if not self._aggregator_agent:
+                self._aggregator_agent = AggregatorAgent(use_planning=True)
+                # Add Snowflake agent if configured
+                if self.settings.has_snowflake() and not self._snowflake_agent:
+                    snowflake_config = self.settings.get_snowflake_config()
+                    self._snowflake_agent = SnowflakeAgent(
+                        snowflake_config=snowflake_config,
+                        use_planning=False
+                    )
+                    # Note: AggregatorAgent will automatically discover SnowflakeAgent
+                    # through its agent selection logic
+            # Process query
+            response = await self._aggregator_agent.process(query, session_id)
+            return {
+                **response,
+                "tier": "advanced",
+            }
+        except Exception as e:
+            logger.error(f"Error in advanced agentic: {e}", exc_info=True)
+            return {
+                "success": False,
+                "error": f"Error processing query: {str(e)}",
+                "tier": "advanced",
+            }
+    def get_agent_status(self) -> Dict[str, Any]:
+        """Get status of all agents."""
+        status = {
+            "tiers_available": ["basic", "agent", "advanced"],
+            "agents": {},
+        }
+        if self._local_agent:
+            status["agents"]["local"] = self._local_agent.get_status()
+        if self._search_agent:
+            status["agents"]["search"] = self._search_agent.get_status()
+        if self._cloud_agent:
+            status["agents"]["cloud"] = self._cloud_agent.get_status()
+        if self._snowflake_agent:
+            status["agents"]["snowflake"] = self._snowflake_agent.get_status()
+        if self._aggregator_agent:
+            status["agents"]["aggregator"] = self._aggregator_agent.get_status()
+        return status
+    def get_system_info(self) -> Dict[str, Any]:
+        """Get system information."""
+        return {
+            "vector_store": {
+                "document_count": self.vector_store.count(),
+                "collection_name": self.settings.chroma_collection_name,
+            },
+            "tools": {
+                "calculator": True,
+                "web_search": self.settings.has_web_search(),
+                "database": bool(self.settings.database_url),
+                "snowflake": self.settings.has_snowflake(),
+            },
+            "memory": {
+                "short_term_enabled": True,
+                "long_term_enabled": self.settings.long_term_memory_enabled,
+            },
+            "model": self.model,
+        }
+# Global instance
+_orchestrator: Optional[Orchestrator] = None
+def get_orchestrator() -> Orchestrator:
+    """Get or create the global orchestrator instance."""
+    global _orchestrator
+    if _orchestrator is None:
+        _orchestrator = Orchestrator()
+    return _orchestrator

src/mcp/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """MCP server implementations."""
2	+

src/mcp/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (203 Bytes). View file

src/mcp/__pycache__/mcp_server.cpython-311.pyc ADDED Viewed

Binary file (5.44 kB). View file

src/mcp/__pycache__/snowflake_server.cpython-311.pyc ADDED Viewed

Binary file (10 kB). View file

src/mcp/cloud_server.py ADDED Viewed

	@@ -0,0 +1,156 @@

+"""Cloud storage MCP server."""
+import logging
+from typing import Any, Dict
+try:
+    from mcp.types import Tool
+    MCP_AVAILABLE = True
+except ImportError:
+    MCP_AVAILABLE = False
+    class Tool:
+        def __init__(self, **kwargs):
+            pass
+from src.mcp.mcp_server import BaseMCPServer
+from src.core.config import get_settings
+logger = logging.getLogger(__name__)
+class CloudMCPServer(BaseMCPServer):
+    """MCP server for cloud storage operations."""
+    def __init__(self):
+        """Initialize cloud MCP server."""
+        super().__init__("cloud_storage_server")
+        self.settings = get_settings()
+        self._init_cloud_client()
+        self._register_tools()
+    def _init_cloud_client(self):
+        """Initialize cloud storage client."""
+        self.cloud_type = None
+        self.client = None
+        # Check for AWS S3
+        if self.settings.aws_access_key_id and self.settings.aws_s3_bucket:
+            try:
+                import boto3
+                self.client = boto3.client(
+                    "s3",
+                    aws_access_key_id=self.settings.aws_access_key_id,
+                    aws_secret_access_key=self.settings.aws_secret_access_key,
+                    region_name=self.settings.aws_region,
+                )
+                self.cloud_type = "s3"
+                self.bucket_name = self.settings.aws_s3_bucket
+            except Exception as e:
+                logger.error(f"Error initializing S3: {e}")
+        # Check for GCS
+        elif self.settings.google_application_credentials and self.settings.gcs_bucket_name:
+            try:
+                from google.cloud import storage
+                import os
+                os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = self.settings.google_application_credentials
+                self.client = storage.Client()
+                self.cloud_type = "gcs"
+                self.bucket_name = self.settings.gcs_bucket_name
+            except Exception as e:
+                logger.error(f"Error initializing GCS: {e}")
+    def _register_tools(self):
+        """Register cloud storage tools."""
+        if not self.client:
+            logger.warning("No cloud storage configured, skipping tool registration")
+            return
+        # List objects tool
+        list_tool = Tool(
+            name="list_cloud_objects",
+            description="List objects in cloud storage",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "prefix": {
+                        "type": "string",
+                        "description": "Object key prefix to filter",
+                    },
+                    "max_keys": {
+                        "type": "integer",
+                        "description": "Maximum number of objects to return",
+                        "default": 10,
+                    },
+                },
+            },
+        )
+        self.register_tool(list_tool)
+        # Get object tool
+        get_tool = Tool(
+            name="get_cloud_object",
+            description="Get an object from cloud storage",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "key": {
+                        "type": "string",
+                        "description": "Object key",
+                    },
+                },
+                "required": ["key"],
+            },
+        )
+        self.register_tool(get_tool)
+    async def _execute_tool(self, name: str, arguments: Dict[str, Any]) -> Any:
+        """Execute a cloud storage tool."""
+        if not self.client:
+            return {"error": "Cloud storage not configured"}
+        if name == "list_cloud_objects":
+            prefix = arguments.get("prefix", "")
+            max_keys = arguments.get("max_keys", 10)
+            if self.cloud_type == "s3":
+                response = self.client.list_objects_v2(
+                    Bucket=self.bucket_name,
+                    Prefix=prefix,
+                    MaxKeys=max_keys,
+                )
+                objects = [
+                    {"key": obj["Key"], "size": obj["Size"]}
+                    for obj in response.get("Contents", [])
+                ]
+                return {"objects": objects, "count": len(objects)}
+            elif self.cloud_type == "gcs":
+                bucket = self.client.bucket(self.bucket_name)
+                blobs = list(bucket.list_blobs(prefix=prefix, max_results=max_keys))
+                objects = [{"key": blob.name, "size": blob.size} for blob in blobs]
+                return {"objects": objects, "count": len(objects)}
+        elif name == "get_cloud_object":
+            key = arguments.get("key")
+            if self.cloud_type == "s3":
+                try:
+                    response = self.client.get_object(Bucket=self.bucket_name, Key=key)
+                    content = response["Body"].read().decode("utf-8")
+                    return {"key": key, "content": content}
+                except Exception as e:
+                    return {"error": str(e)}
+            elif self.cloud_type == "gcs":
+                try:
+                    bucket = self.client.bucket(self.bucket_name)
+                    blob = bucket.blob(key)
+                    content = blob.download_as_text()
+                    return {"key": key, "content": content}
+                except Exception as e:
+                    return {"error": str(e)}
+        else:
+            raise ValueError(f"Unknown tool: {name}")

src/mcp/local_server.py ADDED Viewed

	@@ -0,0 +1,122 @@

+"""Local data MCP server."""
+import logging
+from typing import Any, Dict
+try:
+    from mcp.types import Tool
+    MCP_AVAILABLE = True
+except ImportError:
+    MCP_AVAILABLE = False
+    # Create a mock Tool class for type hints
+    class Tool:
+        def __init__(self, **kwargs):
+            pass
+from src.mcp.mcp_server import BaseMCPServer
+from src.retrieval.vector_store import get_vector_store
+logger = logging.getLogger(__name__)
+class LocalMCPServer(BaseMCPServer):
+    """MCP server for local document operations."""
+    def __init__(self):
+        """Initialize local MCP server."""
+        super().__init__("local_data_server")
+        self.vector_store = get_vector_store()
+        self._register_tools()
+    def _register_tools(self):
+        """Register local data tools."""
+        # Search documents tool
+        search_tool = Tool(
+            name="search_local_documents",
+            description="Search local documents in the vector store",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Search query",
+                    },
+                    "n_results": {
+                        "type": "integer",
+                        "description": "Number of results to return",
+                        "default": 5,
+                    },
+                },
+                "required": ["query"],
+            },
+        )
+        self.register_tool(search_tool)
+        # Get document by ID tool
+        get_doc_tool = Tool(
+            name="get_local_document",
+            description="Get a document by its ID",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "document_id": {
+                        "type": "string",
+                        "description": "Document ID",
+                    },
+                },
+                "required": ["document_id"],
+            },
+        )
+        self.register_tool(get_doc_tool)
+        # List documents tool
+        list_docs_tool = Tool(
+            name="list_local_documents",
+            description="List all documents in the vector store",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "limit": {
+                        "type": "integer",
+                        "description": "Maximum number of documents to return",
+                        "default": 10,
+                    },
+                },
+            },
+        )
+        self.register_tool(list_docs_tool)
+    async def _execute_tool(self, name: str, arguments: Dict[str, Any]) -> Any:
+        """Execute a local data tool."""
+        if name == "search_local_documents":
+            query = arguments.get("query", "")
+            n_results = arguments.get("n_results", 5)
+            results = self.vector_store.search(query=query, n_results=n_results)
+            return {
+                "documents": results["documents"],
+                "ids": results["ids"],
+                "metadatas": results["metadatas"],
+            }
+        elif name == "get_local_document":
+            document_id = arguments.get("document_id")
+            results = self.vector_store.get_by_ids([document_id])
+            if results["documents"]:
+                return {
+                    "document": results["documents"][0],
+                    "metadata": results["metadatas"][0] if results["metadatas"] else {},
+                }
+            else:
+                return {"error": "Document not found"}
+        elif name == "list_local_documents":
+            limit = arguments.get("limit", 10)
+            count = self.vector_store.count()
+            return {
+                "total_documents": count,
+                "limit": limit,
+            }
+        else:
+            raise ValueError(f"Unknown tool: {name}")

src/mcp/mcp_server.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""Base MCP server implementation."""
+import logging
+from typing import Any, Dict, List, Optional
+import asyncio
+# Try to import MCP SDK - adjust imports based on actual SDK version
+try:
+    from mcp.server import Server
+    from mcp.server.stdio import stdio_server
+    from mcp.types import Tool, TextContent
+    MCP_AVAILABLE = True
+except ImportError:
+    # Fallback if MCP SDK structure is different
+    MCP_AVAILABLE = False
+    logger.warning("MCP SDK not available - MCP servers will not function")
+logger = logging.getLogger(__name__)
+class BaseMCPServer:
+    """Base MCP server with common functionality."""
+    def __init__(self, name: str):
+        """Initialize base MCP server."""
+        self.name = name
+        if not MCP_AVAILABLE:
+            logger.warning(f"MCP SDK not available - {name} server cannot be initialized")
+            self.server = None
+            self.tools: List[Any] = []
+            return
+        self.server = Server(name)
+        self.tools: List[Any] = []
+        self._setup_handlers()
+    def _setup_handlers(self):
+        """Setup MCP server handlers."""
+        if not self.server:
+            return
+        @self.server.list_tools()
+        async def list_tools() -> List[Any]:
+            """List available tools."""
+            return self.tools
+        @self.server.call_tool()
+        async def call_tool(name: str, arguments: Dict[str, Any]) -> List[Any]:
+            """Call a tool by name."""
+            try:
+                result = await self._execute_tool(name, arguments)
+                return [TextContent(type="text", text=str(result))]
+            except Exception as e:
+                logger.error(f"Error executing tool {name}: {e}")
+                return [TextContent(type="text", text=f"Error: {str(e)}")]
+    async def _execute_tool(self, name: str, arguments: Dict[str, Any]) -> Any:
+        """Execute a tool - to be overridden by subclasses."""
+        raise NotImplementedError("Subclasses must implement _execute_tool")
+    def register_tool(self, tool: Any):
+        """Register a tool with the server."""
+        self.tools.append(tool)
+        logger.info(f"Registered tool: {tool.name if hasattr(tool, 'name') else 'unknown'}")
+    async def run(self):
+        """Run the MCP server."""
+        if not self.server or not MCP_AVAILABLE:
+            logger.error("Cannot run MCP server - SDK not available")
+            return
+        async with stdio_server() as (read_stream, write_stream):
+            await self.server.run(
+                read_stream,
+                write_stream,
+                self.server.create_initialization_options(),
+            )

src/mcp/search_server.py ADDED Viewed

	@@ -0,0 +1,62 @@

+"""Web search MCP server."""
+import logging
+from typing import Any, Dict
+try:
+    from mcp.types import Tool
+    MCP_AVAILABLE = True
+except ImportError:
+    MCP_AVAILABLE = False
+    class Tool:
+        def __init__(self, **kwargs):
+            pass
+from src.mcp.mcp_server import BaseMCPServer
+from src.tools.web_search import get_web_search
+logger = logging.getLogger(__name__)
+class SearchMCPServer(BaseMCPServer):
+    """MCP server for web search operations."""
+    def __init__(self):
+        """Initialize search MCP server."""
+        super().__init__("web_search_server")
+        self.web_search = get_web_search()
+        self._register_tools()
+    def _register_tools(self):
+        """Register web search tools."""
+        search_tool = Tool(
+            name="web_search",
+            description="Search the web for information",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "query": {
+                        "type": "string",
+                        "description": "Search query",
+                    },
+                    "max_results": {
+                        "type": "integer",
+                        "description": "Maximum number of results",
+                        "default": 5,
+                    },
+                },
+                "required": ["query"],
+            },
+        )
+        self.register_tool(search_tool)
+    async def _execute_tool(self, name: str, arguments: Dict[str, Any]) -> Any:
+        """Execute a web search tool."""
+        if name == "web_search":
+            query = arguments.get("query", "")
+            max_results = arguments.get("max_results", 5)
+            results = await self.web_search.search(query, max_results)
+            return results
+        else:
+            raise ValueError(f"Unknown tool: {name}")

src/mcp/snowflake_server.py ADDED Viewed

	@@ -0,0 +1,185 @@

+"""MCP Server for Snowflake data warehouse."""
+import logging
+from typing import Any, Dict, List, Optional
+try:
+    from mcp.types import Tool
+    MCP_AVAILABLE = True
+except ImportError:
+    MCP_AVAILABLE = False
+    class Tool:
+        def __init__(self, **kwargs):
+            pass
+from src.mcp.mcp_server import BaseMCPServer
+logger = logging.getLogger(__name__)
+try:
+    import snowflake.connector
+    import pandas as pd
+    SNOWFLAKE_AVAILABLE = True
+except ImportError:
+    SNOWFLAKE_AVAILABLE = False
+    logger.warning("snowflake-connector-python not installed")
+class SnowflakeMCPServer(BaseMCPServer):
+    """MCP Server for Snowflake data warehouse operations."""
+    def __init__(self, config: Optional[Dict] = None):
+        """Initialize Snowflake MCP server."""
+        super().__init__("snowflake_server")
+        self.config = config or {}
+        self.connection = None
+        self.cursor = None
+        if SNOWFLAKE_AVAILABLE:
+            self._register_tools()
+    def _register_tools(self):
+        """Register Snowflake tools with MCP server."""
+        if not SNOWFLAKE_AVAILABLE:
+            logger.warning("Snowflake connector not available, skipping tool registration")
+            return
+        # Query tool
+        query_tool = Tool(
+            name="snowflake_query",
+            description="Execute SQL query on Snowflake data warehouse",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "sql": {
+                        "type": "string",
+                        "description": "SQL query to execute",
+                    },
+                },
+                "required": ["sql"],
+            },
+        )
+        self.register_tool(query_tool)
+        # List tables tool
+        list_tables_tool = Tool(
+            name="snowflake_list_tables",
+            description="List all tables in the current schema",
+            inputSchema={"type": "object", "properties": {}},
+        )
+        self.register_tool(list_tables_tool)
+        # Get table schema tool
+        schema_tool = Tool(
+            name="snowflake_get_schema",
+            description="Get schema information for a table",
+            inputSchema={
+                "type": "object",
+                "properties": {
+                    "table_name": {
+                        "type": "string",
+                        "description": "Name of the table",
+                    },
+                },
+                "required": ["table_name"],
+            },
+        )
+        self.register_tool(schema_tool)
+    def connect(self):
+        """Establish connection to Snowflake."""
+        if not SNOWFLAKE_AVAILABLE:
+            return False
+        try:
+            self.connection = snowflake.connector.connect(
+                account=self.config.get('account'),
+                user=self.config.get('user'),
+                password=self.config.get('password'),
+                warehouse=self.config.get('warehouse'),
+                database=self.config.get('database'),
+                schema=self.config.get('schema'),
+                role=self.config.get('role', 'ACCOUNTADMIN'),
+            )
+            self.cursor = self.connection.cursor()
+            logger.info(f"Connected to Snowflake account: {self.config.get('account')}")
+            return True
+        except Exception as e:
+            logger.error(f"Snowflake connection failed: {e}")
+            return False
+    def query(self, sql_query: str) -> List[Dict]:
+        """Execute SQL query on Snowflake."""
+        if not SNOWFLAKE_AVAILABLE:
+            return [{"error": "Snowflake connector not available"}]
+        if not self.connection:
+            if not self.connect():
+                return [{"error": "Failed to connect to Snowflake"}]
+        try:
+            self.cursor.execute(sql_query)
+            columns = [desc[0] for desc in self.cursor.description]
+            results = self.cursor.fetchall()
+            return [dict(zip(columns, row)) for row in results]
+        except Exception as e:
+            logger.error(f"Query error: {e}")
+            return [{"error": str(e), "query": sql_query}]
+    def get_tables(self) -> List[str]:
+        """List all tables in the current schema."""
+        if not self.config.get('database') or not self.config.get('schema'):
+            return []
+        query = f"""
+        SELECT TABLE_NAME
+        FROM {self.config['database']}.INFORMATION_SCHEMA.TABLES
+        WHERE TABLE_SCHEMA = '{self.config['schema']}'
+        """
+        results = self.query(query)
+        return [row['TABLE_NAME'] for row in results if 'TABLE_NAME' in row]
+    def get_table_schema(self, table_name: str) -> List[Dict]:
+        """Get schema information for a table."""
+        if not self.config.get('database') or not self.config.get('schema'):
+            return []
+        query = f"""
+        SELECT COLUMN_NAME, DATA_TYPE, IS_NULLABLE
+        FROM {self.config['database']}.INFORMATION_SCHEMA.COLUMNS
+        WHERE TABLE_SCHEMA = '{self.config['schema']}'
+        AND TABLE_NAME = '{table_name}'
+        """
+        return self.query(query)
+    async def _execute_tool(self, name: str, arguments: Dict[str, Any]) -> Any:
+        """Execute a Snowflake tool."""
+        if not self.config:
+            return {"error": "Snowflake configuration not provided"}
+        if name == "snowflake_query":
+            sql = arguments.get("sql", "")
+            return {"results": self.query(sql)}
+        elif name == "snowflake_list_tables":
+            return {"tables": self.get_tables()}
+        elif name == "snowflake_get_schema":
+            table_name = arguments.get("table_name")
+            if not table_name:
+                return {"error": "table_name is required"}
+            return {"schema": self.get_table_schema(table_name)}
+        else:
+            raise ValueError(f"Unknown tool: {name}")
+    def close(self):
+        """Close Snowflake connection."""
+        if self.cursor:
+            self.cursor.close()
+        if self.connection:
+            self.connection.close()
+    def __del__(self):
+        """Cleanup on deletion."""
+        self.close()

src/memory/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Memory management system."""
2	+