Spaces:
Sleeping
Sleeping
firepenguindisopanda commited on
Commit ·
1a608b5
1
Parent(s): 4da2f57
Refactor code structure for improved readability and maintainability
Browse files- app/core/llm_factory.py +4 -4
- app/core/mongodb_rag.py +378 -0
- app/core/orchestrator.py +97 -30
- app/core/rag.py +220 -121
- app/core/schemas.py +1 -10
- app/prompts/product_owner.md +23 -2
- app/routers/health.py +71 -37
- corpus_rag/RAG_INDEX_DECISION.md +64 -0
- corpus_rag/api_designer/role_playbook.txt +18 -0
- corpus_rag/api_designer/standards_quickref.txt +10 -0
- corpus_rag/business_analyst/role_playbook.txt +19 -0
- corpus_rag/business_analyst/standards_quickref.txt +10 -0
- corpus_rag/data_architect/role_playbook.txt +18 -0
- corpus_rag/data_architect/standards_quickref.txt +10 -0
- corpus_rag/devops_architect/role_playbook.txt +18 -0
- corpus_rag/devops_architect/standards_quickref.txt +10 -0
- corpus_rag/environment_engineer/role_playbook.txt +18 -0
- corpus_rag/environment_engineer/standards_quickref.txt +10 -0
- corpus_rag/product_owner/role_playbook.txt +22 -0
- corpus_rag/product_owner/standards_quickref.txt +10 -0
- corpus_rag/qa_strategist/role_playbook.txt +18 -0
- corpus_rag/qa_strategist/standards_quickref.txt +10 -0
- corpus_rag/security_analyst/role_playbook.txt +18 -0
- corpus_rag/security_analyst/standards_quickref.txt +10 -0
- corpus_rag/solution_architect/role_playbook.txt +19 -0
- corpus_rag/solution_architect/standards_quickref.txt +10 -0
- corpus_rag/technical_writer/role_playbook.txt +18 -0
- corpus_rag/technical_writer/standards_quickref.txt +10 -0
- corpus_rag/ux_designer/role_playbook.txt +18 -0
- corpus_rag/ux_designer/standards_quickref.txt +10 -0
- pyproject.toml +3 -3
- requirements.txt +60 -46
- scripts/seed_rag_data.py +316 -0
- scripts/setup_mongodb_indexes.py +237 -0
- uv.lock +136 -54
app/core/llm_factory.py
CHANGED
|
@@ -31,23 +31,23 @@ DEFAULT_EMBEDDING_MODEL = "nvidia/nv-embedqa-e5-v5"
|
|
| 31 |
AGENT_CONFIGS: dict[TeamRole, dict[str, Any]] = {
|
| 32 |
# Phase 1
|
| 33 |
TeamRole.PROJECT_REFINER: {"temperature": 0.3, "max_tokens": 2048},
|
| 34 |
-
TeamRole.PRODUCT_OWNER: {"temperature": 0.5, "max_tokens":
|
| 35 |
# Phase 2
|
| 36 |
TeamRole.BUSINESS_ANALYST: {"temperature": 0.3, "max_tokens": 3072},
|
| 37 |
TeamRole.SOLUTION_ARCHITECT: {"temperature": 0.4, "max_tokens": 3072},
|
| 38 |
-
TeamRole.DATA_ARCHITECT: {"temperature": 0.3, "max_tokens":
|
| 39 |
TeamRole.SECURITY_ANALYST: {"temperature": 0.2, "max_tokens": 2048},
|
| 40 |
# Phase 3
|
| 41 |
TeamRole.UX_DESIGNER: {"temperature": 0.8, "max_tokens": 2048},
|
| 42 |
TeamRole.API_DESIGNER: {"temperature": 0.2, "max_tokens": 4096},
|
| 43 |
-
TeamRole.QA_STRATEGIST: {"temperature": 0.3, "max_tokens":
|
| 44 |
TeamRole.DEVOPS_ARCHITECT: {"temperature": 0.3, "max_tokens": 2048},
|
| 45 |
# Phase 4
|
| 46 |
TeamRole.ENVIRONMENT_ENGINEER: {"temperature": 0.3, "max_tokens": 2048},
|
| 47 |
TeamRole.TECHNICAL_WRITER: {"temperature": 0.5, "max_tokens": 3072},
|
| 48 |
# Phase 5 / Judge
|
| 49 |
TeamRole.SPEC_COORDINATOR: {"temperature": 0.3, "max_tokens": 4096},
|
| 50 |
-
TeamRole.JUDGE: {"temperature": 0.1, "max_tokens":
|
| 51 |
}
|
| 52 |
|
| 53 |
# Default configuration for unknown roles
|
|
|
|
| 31 |
AGENT_CONFIGS: dict[TeamRole, dict[str, Any]] = {
|
| 32 |
# Phase 1
|
| 33 |
TeamRole.PROJECT_REFINER: {"temperature": 0.3, "max_tokens": 2048},
|
| 34 |
+
TeamRole.PRODUCT_OWNER: {"temperature": 0.5, "max_tokens": 4096},
|
| 35 |
# Phase 2
|
| 36 |
TeamRole.BUSINESS_ANALYST: {"temperature": 0.3, "max_tokens": 3072},
|
| 37 |
TeamRole.SOLUTION_ARCHITECT: {"temperature": 0.4, "max_tokens": 3072},
|
| 38 |
+
TeamRole.DATA_ARCHITECT: {"temperature": 0.3, "max_tokens": 4096},
|
| 39 |
TeamRole.SECURITY_ANALYST: {"temperature": 0.2, "max_tokens": 2048},
|
| 40 |
# Phase 3
|
| 41 |
TeamRole.UX_DESIGNER: {"temperature": 0.8, "max_tokens": 2048},
|
| 42 |
TeamRole.API_DESIGNER: {"temperature": 0.2, "max_tokens": 4096},
|
| 43 |
+
TeamRole.QA_STRATEGIST: {"temperature": 0.3, "max_tokens": 4096},
|
| 44 |
TeamRole.DEVOPS_ARCHITECT: {"temperature": 0.3, "max_tokens": 2048},
|
| 45 |
# Phase 4
|
| 46 |
TeamRole.ENVIRONMENT_ENGINEER: {"temperature": 0.3, "max_tokens": 2048},
|
| 47 |
TeamRole.TECHNICAL_WRITER: {"temperature": 0.5, "max_tokens": 3072},
|
| 48 |
# Phase 5 / Judge
|
| 49 |
TeamRole.SPEC_COORDINATOR: {"temperature": 0.3, "max_tokens": 4096},
|
| 50 |
+
TeamRole.JUDGE: {"temperature": 0.1, "max_tokens": 2048},
|
| 51 |
}
|
| 52 |
|
| 53 |
# Default configuration for unknown roles
|
app/core/mongodb_rag.py
ADDED
|
@@ -0,0 +1,378 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
MongoDB Atlas Vector Search RAG Service with agent-specific collections.
|
| 3 |
+
|
| 4 |
+
Each agent role has its own collection with specialized examples:
|
| 5 |
+
- Product Owner: PRDs, user stories, acceptance criteria
|
| 6 |
+
- Business Analyst: BRDs, process flows
|
| 7 |
+
- Solution Architect: System designs, ADRs
|
| 8 |
+
- etc.
|
| 9 |
+
|
| 10 |
+
This enables more relevant RAG retrieval per agent specialty.
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
import os
|
| 14 |
+
from typing import Any
|
| 15 |
+
|
| 16 |
+
from langchain_core.documents import Document
|
| 17 |
+
from langchain_core.retrievers import BaseRetriever
|
| 18 |
+
from langchain_core.vectorstores import VectorStore
|
| 19 |
+
|
| 20 |
+
from .llm_factory import get_embeddings_model
|
| 21 |
+
from .observability import get_logger
|
| 22 |
+
from .schemas import TeamRole
|
| 23 |
+
|
| 24 |
+
logger = get_logger("mongodb_rag")
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
# Mapping from TeamRole to MongoDB collection name
|
| 28 |
+
# Coordinator roles (PROJECT_REFINER, SPEC_COORDINATOR, JUDGE) don't need RAG
|
| 29 |
+
# as they synthesize from other agent outputs
|
| 30 |
+
ROLE_COLLECTION_MAP: dict[TeamRole, str | None] = {
|
| 31 |
+
TeamRole.PRODUCT_OWNER: "rag_product_owner",
|
| 32 |
+
TeamRole.BUSINESS_ANALYST: "rag_business_analyst",
|
| 33 |
+
TeamRole.SOLUTION_ARCHITECT: "rag_solution_architect",
|
| 34 |
+
TeamRole.DATA_ARCHITECT: "rag_data_architect",
|
| 35 |
+
TeamRole.SECURITY_ANALYST: "rag_security_analyst",
|
| 36 |
+
TeamRole.UX_DESIGNER: "rag_ux_designer",
|
| 37 |
+
TeamRole.API_DESIGNER: "rag_api_designer",
|
| 38 |
+
TeamRole.QA_STRATEGIST: "rag_qa_strategist",
|
| 39 |
+
TeamRole.DEVOPS_ARCHITECT: "rag_devops_architect",
|
| 40 |
+
TeamRole.ENVIRONMENT_ENGINEER: "rag_environment_engineer",
|
| 41 |
+
TeamRole.TECHNICAL_WRITER: "rag_technical_writer",
|
| 42 |
+
# Coordinator roles - no RAG needed
|
| 43 |
+
TeamRole.PROJECT_REFINER: None,
|
| 44 |
+
TeamRole.SPEC_COORDINATOR: None,
|
| 45 |
+
TeamRole.JUDGE: None,
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
# All collection names for setup/seeding scripts
|
| 49 |
+
ALL_RAG_COLLECTIONS = [
|
| 50 |
+
name for name in ROLE_COLLECTION_MAP.values() if name is not None
|
| 51 |
+
]
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class MongoDBRAGService:
|
| 55 |
+
"""
|
| 56 |
+
RAG Service with MongoDB Atlas Vector Search for agent-specific retrieval.
|
| 57 |
+
|
| 58 |
+
Features:
|
| 59 |
+
- Agent-specific collections for specialized examples
|
| 60 |
+
- Lazy initialization of vector stores
|
| 61 |
+
- Health check for connection monitoring
|
| 62 |
+
- Document management (add/delete)
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
def __init__(self):
|
| 66 |
+
self.embeddings = get_embeddings_model()
|
| 67 |
+
self._client: Any | None = None # MongoClient
|
| 68 |
+
self._db_name: str = os.getenv("MONGODB_DATABASE", "specs_before_code")
|
| 69 |
+
self._index_name: str = os.getenv("MONGODB_INDEX_NAME", "vector_index")
|
| 70 |
+
self._vector_stores: dict[TeamRole, VectorStore] = {}
|
| 71 |
+
self._initialized = False
|
| 72 |
+
self._initialize_connection()
|
| 73 |
+
|
| 74 |
+
def _initialize_connection(self) -> None:
|
| 75 |
+
"""Initialize MongoDB connection."""
|
| 76 |
+
uri = os.getenv("MONGODB_URI")
|
| 77 |
+
if not uri:
|
| 78 |
+
logger.warning("MONGODB_URI not set - MongoDB RAG disabled")
|
| 79 |
+
return
|
| 80 |
+
|
| 81 |
+
try:
|
| 82 |
+
from pymongo import MongoClient
|
| 83 |
+
|
| 84 |
+
self._client = MongoClient(
|
| 85 |
+
uri,
|
| 86 |
+
maxPoolSize=10,
|
| 87 |
+
minPoolSize=2,
|
| 88 |
+
maxIdleTimeMS=30000,
|
| 89 |
+
serverSelectionTimeoutMS=5000,
|
| 90 |
+
)
|
| 91 |
+
# Verify connection
|
| 92 |
+
self._client.admin.command("ping")
|
| 93 |
+
self._initialized = True
|
| 94 |
+
logger.info(
|
| 95 |
+
"Connected to MongoDB Atlas",
|
| 96 |
+
data={"database": self._db_name},
|
| 97 |
+
)
|
| 98 |
+
except ImportError:
|
| 99 |
+
logger.error("pymongo not installed - run: uv add pymongo")
|
| 100 |
+
self._client = None
|
| 101 |
+
except Exception as e:
|
| 102 |
+
logger.error(f"Failed to connect to MongoDB: {e}")
|
| 103 |
+
self._client = None
|
| 104 |
+
|
| 105 |
+
def _get_collection(self, role: TeamRole) -> Any | None:
|
| 106 |
+
"""Get MongoDB collection for a specific agent role."""
|
| 107 |
+
if not self._client:
|
| 108 |
+
return None
|
| 109 |
+
|
| 110 |
+
collection_name = ROLE_COLLECTION_MAP.get(role)
|
| 111 |
+
if not collection_name:
|
| 112 |
+
logger.debug(f"No RAG collection mapped for role: {role.value}")
|
| 113 |
+
return None
|
| 114 |
+
|
| 115 |
+
return self._client[self._db_name][collection_name]
|
| 116 |
+
|
| 117 |
+
def _get_vector_store(self, role: TeamRole) -> VectorStore | None:
|
| 118 |
+
"""Get or create vector store for a specific agent role (lazy init)."""
|
| 119 |
+
# Return cached store if available
|
| 120 |
+
if role in self._vector_stores:
|
| 121 |
+
return self._vector_stores[role]
|
| 122 |
+
|
| 123 |
+
collection = self._get_collection(role)
|
| 124 |
+
if collection is None:
|
| 125 |
+
return None
|
| 126 |
+
|
| 127 |
+
try:
|
| 128 |
+
from langchain_mongodb import MongoDBAtlasVectorSearch
|
| 129 |
+
|
| 130 |
+
vector_store = MongoDBAtlasVectorSearch(
|
| 131 |
+
collection=collection,
|
| 132 |
+
embedding=self.embeddings,
|
| 133 |
+
index_name=self._index_name,
|
| 134 |
+
text_key="content",
|
| 135 |
+
embedding_key="embedding",
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
self._vector_stores[role] = vector_store
|
| 139 |
+
logger.debug(f"Initialized vector store for {role.value}")
|
| 140 |
+
return vector_store
|
| 141 |
+
|
| 142 |
+
except ImportError:
|
| 143 |
+
logger.error(
|
| 144 |
+
"langchain-mongodb not installed - run: uv add langchain-mongodb"
|
| 145 |
+
)
|
| 146 |
+
return None
|
| 147 |
+
except Exception as e:
|
| 148 |
+
logger.error(f"Error creating vector store for {role.value}: {e}")
|
| 149 |
+
return None
|
| 150 |
+
|
| 151 |
+
def retrieve(
|
| 152 |
+
self,
|
| 153 |
+
query: str,
|
| 154 |
+
role: TeamRole,
|
| 155 |
+
k: int = 3,
|
| 156 |
+
) -> list[Document]:
|
| 157 |
+
"""
|
| 158 |
+
Retrieve relevant documents for a specific agent role.
|
| 159 |
+
|
| 160 |
+
Args:
|
| 161 |
+
query: The search query (usually project description or context)
|
| 162 |
+
role: The agent role to retrieve examples for
|
| 163 |
+
k: Number of documents to retrieve (default: 3)
|
| 164 |
+
|
| 165 |
+
Returns:
|
| 166 |
+
List of relevant Document objects, empty if no matches or error
|
| 167 |
+
"""
|
| 168 |
+
vector_store = self._get_vector_store(role)
|
| 169 |
+
if not vector_store:
|
| 170 |
+
logger.debug(f"No vector store available for role {role.value}")
|
| 171 |
+
return []
|
| 172 |
+
|
| 173 |
+
try:
|
| 174 |
+
docs = vector_store.similarity_search(query, k=k)
|
| 175 |
+
logger.info(
|
| 176 |
+
f"Retrieved {len(docs)} docs for {role.value}",
|
| 177 |
+
data={"role": role.value, "count": len(docs)},
|
| 178 |
+
)
|
| 179 |
+
return docs
|
| 180 |
+
except Exception as e:
|
| 181 |
+
logger.error(f"RAG retrieval error for {role.value}: {e}")
|
| 182 |
+
return []
|
| 183 |
+
|
| 184 |
+
def get_retriever(
|
| 185 |
+
self,
|
| 186 |
+
role: TeamRole,
|
| 187 |
+
k: int = 3,
|
| 188 |
+
search_type: str = "similarity",
|
| 189 |
+
) -> BaseRetriever | None:
|
| 190 |
+
"""
|
| 191 |
+
Get a LangChain retriever for a specific agent role.
|
| 192 |
+
|
| 193 |
+
Useful for LCEL chain composition with RunnablePassthrough.
|
| 194 |
+
|
| 195 |
+
Args:
|
| 196 |
+
role: The agent role for role-specific retrieval
|
| 197 |
+
k: Number of documents to retrieve
|
| 198 |
+
search_type: Type of search ("similarity" or "mmr")
|
| 199 |
+
|
| 200 |
+
Returns:
|
| 201 |
+
LangChain BaseRetriever or None if unavailable
|
| 202 |
+
"""
|
| 203 |
+
vector_store = self._get_vector_store(role)
|
| 204 |
+
if not vector_store:
|
| 205 |
+
return None
|
| 206 |
+
|
| 207 |
+
return vector_store.as_retriever(
|
| 208 |
+
search_type=search_type,
|
| 209 |
+
search_kwargs={"k": k},
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
def format_docs(self, docs: list[Document]) -> str:
|
| 213 |
+
"""
|
| 214 |
+
Format retrieved documents for prompt injection.
|
| 215 |
+
|
| 216 |
+
Args:
|
| 217 |
+
docs: List of retrieved documents
|
| 218 |
+
|
| 219 |
+
Returns:
|
| 220 |
+
Formatted string with examples, or message if empty
|
| 221 |
+
"""
|
| 222 |
+
if not docs:
|
| 223 |
+
return "No relevant examples found in knowledge base."
|
| 224 |
+
|
| 225 |
+
formatted = []
|
| 226 |
+
for i, doc in enumerate(docs, 1):
|
| 227 |
+
source = doc.metadata.get("source", "Unknown")
|
| 228 |
+
role = doc.metadata.get("role", "")
|
| 229 |
+
|
| 230 |
+
header = f"### Example {i}"
|
| 231 |
+
if source != "Unknown":
|
| 232 |
+
header += f" (Source: {source})"
|
| 233 |
+
if role:
|
| 234 |
+
header += f" [{role}]"
|
| 235 |
+
|
| 236 |
+
formatted.append(f"{header}\n{doc.page_content}")
|
| 237 |
+
|
| 238 |
+
return "\n\n---\n\n".join(formatted)
|
| 239 |
+
|
| 240 |
+
async def add_documents(
|
| 241 |
+
self,
|
| 242 |
+
documents: list[Document],
|
| 243 |
+
role: TeamRole,
|
| 244 |
+
) -> list[str]:
|
| 245 |
+
"""
|
| 246 |
+
Add documents to an agent's RAG collection.
|
| 247 |
+
|
| 248 |
+
Args:
|
| 249 |
+
documents: Documents to add (will be embedded)
|
| 250 |
+
role: Target agent role (determines collection)
|
| 251 |
+
|
| 252 |
+
Returns:
|
| 253 |
+
List of inserted document IDs
|
| 254 |
+
|
| 255 |
+
Raises:
|
| 256 |
+
ValueError: If no collection exists for the role
|
| 257 |
+
"""
|
| 258 |
+
vector_store = self._get_vector_store(role)
|
| 259 |
+
if not vector_store:
|
| 260 |
+
raise ValueError(f"No RAG collection for role {role.value}")
|
| 261 |
+
|
| 262 |
+
# Add role metadata to all documents
|
| 263 |
+
for doc in documents:
|
| 264 |
+
doc.metadata["role"] = role.value
|
| 265 |
+
|
| 266 |
+
try:
|
| 267 |
+
ids = await vector_store.aadd_documents(documents)
|
| 268 |
+
logger.info(
|
| 269 |
+
f"Added {len(ids)} documents to {role.value}",
|
| 270 |
+
data={"role": role.value, "count": len(ids)},
|
| 271 |
+
)
|
| 272 |
+
return ids
|
| 273 |
+
except Exception as e:
|
| 274 |
+
logger.error(f"Error adding documents for {role.value}: {e}")
|
| 275 |
+
raise
|
| 276 |
+
|
| 277 |
+
async def delete_documents(
|
| 278 |
+
self,
|
| 279 |
+
ids: list[str],
|
| 280 |
+
role: TeamRole,
|
| 281 |
+
) -> bool:
|
| 282 |
+
"""
|
| 283 |
+
Delete documents from an agent's collection by ID.
|
| 284 |
+
|
| 285 |
+
Args:
|
| 286 |
+
ids: Document IDs to delete
|
| 287 |
+
role: Agent role (determines collection)
|
| 288 |
+
|
| 289 |
+
Returns:
|
| 290 |
+
True if successful, False otherwise
|
| 291 |
+
"""
|
| 292 |
+
vector_store = self._get_vector_store(role)
|
| 293 |
+
if not vector_store:
|
| 294 |
+
return False
|
| 295 |
+
|
| 296 |
+
try:
|
| 297 |
+
if hasattr(vector_store, "adelete"):
|
| 298 |
+
await vector_store.adelete(ids=ids)
|
| 299 |
+
elif hasattr(vector_store, "delete"):
|
| 300 |
+
vector_store.delete(ids=ids)
|
| 301 |
+
else:
|
| 302 |
+
logger.warning("Vector store does not support deletion")
|
| 303 |
+
return False
|
| 304 |
+
return True
|
| 305 |
+
except Exception as e:
|
| 306 |
+
logger.error(f"Error deleting documents for {role.value}: {e}")
|
| 307 |
+
return False
|
| 308 |
+
|
| 309 |
+
def health_check(self) -> dict[str, Any]:
|
| 310 |
+
"""
|
| 311 |
+
Return health status of MongoDB connection.
|
| 312 |
+
|
| 313 |
+
Returns:
|
| 314 |
+
Dict with status, database name, and collection info
|
| 315 |
+
"""
|
| 316 |
+
if not self._client:
|
| 317 |
+
return {
|
| 318 |
+
"status": "disconnected",
|
| 319 |
+
"message": "MONGODB_URI not configured or connection failed",
|
| 320 |
+
}
|
| 321 |
+
|
| 322 |
+
try:
|
| 323 |
+
self._client.admin.command("ping")
|
| 324 |
+
|
| 325 |
+
# Get collection stats
|
| 326 |
+
db = self._client[self._db_name]
|
| 327 |
+
existing_collections = set(db.list_collection_names())
|
| 328 |
+
configured_collections = [c for c in ALL_RAG_COLLECTIONS if c]
|
| 329 |
+
|
| 330 |
+
return {
|
| 331 |
+
"status": "connected",
|
| 332 |
+
"database": self._db_name,
|
| 333 |
+
"index_name": self._index_name,
|
| 334 |
+
"configured_collections": configured_collections,
|
| 335 |
+
"existing_collections": [
|
| 336 |
+
c for c in configured_collections if c in existing_collections
|
| 337 |
+
],
|
| 338 |
+
"missing_collections": [
|
| 339 |
+
c for c in configured_collections if c not in existing_collections
|
| 340 |
+
],
|
| 341 |
+
}
|
| 342 |
+
except Exception as e:
|
| 343 |
+
return {
|
| 344 |
+
"status": "error",
|
| 345 |
+
"message": str(e),
|
| 346 |
+
}
|
| 347 |
+
|
| 348 |
+
def is_available(self) -> bool:
|
| 349 |
+
"""Check if MongoDB RAG is available and connected."""
|
| 350 |
+
return self._initialized and self._client is not None
|
| 351 |
+
|
| 352 |
+
def get_roles_with_rag(self) -> list[TeamRole]:
|
| 353 |
+
"""Get list of roles that have RAG collections configured."""
|
| 354 |
+
return [role for role, coll in ROLE_COLLECTION_MAP.items() if coll is not None]
|
| 355 |
+
|
| 356 |
+
|
| 357 |
+
# Singleton instance
|
| 358 |
+
_mongodb_rag_service: MongoDBRAGService | None = None
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
def get_mongodb_rag_service() -> MongoDBRAGService:
|
| 362 |
+
"""
|
| 363 |
+
Get singleton instance of MongoDB RAG service.
|
| 364 |
+
|
| 365 |
+
Uses module-level singleton for connection reuse.
|
| 366 |
+
"""
|
| 367 |
+
global _mongodb_rag_service
|
| 368 |
+
if _mongodb_rag_service is None:
|
| 369 |
+
_mongodb_rag_service = MongoDBRAGService()
|
| 370 |
+
return _mongodb_rag_service
|
| 371 |
+
|
| 372 |
+
|
| 373 |
+
def reset_mongodb_rag_service() -> None:
|
| 374 |
+
"""Reset the singleton (useful for testing)."""
|
| 375 |
+
global _mongodb_rag_service
|
| 376 |
+
if _mongodb_rag_service and _mongodb_rag_service._client:
|
| 377 |
+
_mongodb_rag_service._client.close()
|
| 378 |
+
_mongodb_rag_service = None
|
app/core/orchestrator.py
CHANGED
|
@@ -102,40 +102,112 @@ class Orchestrator:
|
|
| 102 |
) -> AsyncIterator[dict[str, Any]]:
|
| 103 |
"""
|
| 104 |
Run the pipeline with streaming output for each agent.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
"""
|
| 106 |
-
# Build initial context similarly
|
| 107 |
initial_context = f"Project Description: {project_request.description}"
|
| 108 |
|
| 109 |
-
# Gather RAG context first
|
| 110 |
yield {"type": "status", "message": "Starting Multi-Agent Pipeline..."}
|
| 111 |
|
|
|
|
|
|
|
|
|
|
| 112 |
yield {
|
| 113 |
-
"type": "
|
| 114 |
-
"message": "
|
|
|
|
| 115 |
}
|
| 116 |
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
}
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
def _build_graph(self):
|
| 141 |
graph = StateGraph(AgentState)
|
|
@@ -256,11 +328,6 @@ class Orchestrator:
|
|
| 256 |
graph.add_edge(source, judge)
|
| 257 |
|
| 258 |
# Determine success destination
|
| 259 |
-
# If multiple next_nodes, we assume they are handled by a single 'gate' or we fan out.
|
| 260 |
-
# Here we assume the input `next_nodes` leads to a single logical step (like a Gate)
|
| 261 |
-
# OR we are just passing the first one if logic dictates.
|
| 262 |
-
# In our graph, we route to Gates mostly.
|
| 263 |
-
|
| 264 |
success_dest = get_dest_value(next_nodes[0])
|
| 265 |
|
| 266 |
graph.add_conditional_edges(
|
|
|
|
| 102 |
) -> AsyncIterator[dict[str, Any]]:
|
| 103 |
"""
|
| 104 |
Run the pipeline with streaming output for each agent.
|
| 105 |
+
|
| 106 |
+
Emits SSE events:
|
| 107 |
+
- status: General status message
|
| 108 |
+
- context_ready: RAG context has been gathered
|
| 109 |
+
- agent_start: An agent has started processing
|
| 110 |
+
- chunk: A chunk of content from streaming
|
| 111 |
+
- agent_complete: An agent has finished
|
| 112 |
+
- judge_start: Judge evaluation started
|
| 113 |
+
- judge_complete: Judge evaluation finished
|
| 114 |
+
- pipeline_complete: Full pipeline finished with results
|
| 115 |
+
- error: Error occurred
|
| 116 |
"""
|
|
|
|
| 117 |
initial_context = f"Project Description: {project_request.description}"
|
| 118 |
|
|
|
|
| 119 |
yield {"type": "status", "message": "Starting Multi-Agent Pipeline..."}
|
| 120 |
|
| 121 |
+
query = initial_context
|
| 122 |
+
docs = self.rag_service.retrieve(query, k=3)
|
| 123 |
+
retrieval_context = self.rag_service.format_docs(docs)
|
| 124 |
yield {
|
| 125 |
+
"type": "context_ready",
|
| 126 |
+
"message": f"Retrieved {len(docs)} context documents",
|
| 127 |
+
"docs_count": len(docs),
|
| 128 |
}
|
| 129 |
|
| 130 |
+
all_outputs: dict[str, str] = {}
|
| 131 |
+
all_judge_results: dict[str, dict[str, Any]] = {}
|
| 132 |
+
history: list[AgentResponse] = []
|
| 133 |
+
|
| 134 |
+
execution_order = [
|
| 135 |
+
TeamRole.PROJECT_REFINER,
|
| 136 |
+
TeamRole.PRODUCT_OWNER,
|
| 137 |
+
TeamRole.BUSINESS_ANALYST,
|
| 138 |
+
TeamRole.SOLUTION_ARCHITECT,
|
| 139 |
+
TeamRole.DATA_ARCHITECT,
|
| 140 |
+
TeamRole.SECURITY_ANALYST,
|
| 141 |
+
TeamRole.UX_DESIGNER,
|
| 142 |
+
TeamRole.API_DESIGNER,
|
| 143 |
+
TeamRole.QA_STRATEGIST,
|
| 144 |
+
TeamRole.DEVOPS_ARCHITECT,
|
| 145 |
+
TeamRole.ENVIRONMENT_ENGINEER,
|
| 146 |
+
TeamRole.TECHNICAL_WRITER,
|
| 147 |
+
TeamRole.SPEC_COORDINATOR,
|
| 148 |
+
]
|
| 149 |
+
|
| 150 |
+
for role in execution_order:
|
| 151 |
+
yield {"type": "agent_start", "role": role.value}
|
| 152 |
+
|
| 153 |
+
deps = AGENT_DEPENDENCIES.get(role, [])
|
| 154 |
+
filtered_history = []
|
| 155 |
+
|
| 156 |
+
if deps == ["*"]:
|
| 157 |
+
filtered_history = history
|
| 158 |
+
else:
|
| 159 |
+
core_roles = [TeamRole.PROJECT_REFINER, TeamRole.PRODUCT_OWNER]
|
| 160 |
+
allowed_roles = set(
|
| 161 |
+
[d.value for d in deps] + [c.value for c in core_roles]
|
| 162 |
+
)
|
| 163 |
+
for msg in history:
|
| 164 |
+
if msg.role.value in allowed_roles:
|
| 165 |
+
filtered_history.append(msg)
|
| 166 |
+
|
| 167 |
+
response = await self.agent_system.process_step(
|
| 168 |
+
role=role,
|
| 169 |
+
context=initial_context,
|
| 170 |
+
previous_outputs=filtered_history,
|
| 171 |
+
feedback="",
|
| 172 |
+
retrieval_context=retrieval_context,
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
history.append(response)
|
| 176 |
+
all_outputs[role.value] = response.content
|
| 177 |
+
|
| 178 |
+
yield {
|
| 179 |
+
"type": "agent_complete",
|
| 180 |
+
"role": role.value,
|
| 181 |
+
"content_length": len(response.content),
|
| 182 |
}
|
| 183 |
+
|
| 184 |
+
if role in self.judged_roles:
|
| 185 |
+
yield {"type": "judge_start", "role": role.value}
|
| 186 |
+
|
| 187 |
+
judge_output = await self.agent_system.evaluate_step(
|
| 188 |
+
role=role, content=response.content, context=initial_context
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
all_judge_results[role.value] = {
|
| 192 |
+
"is_approved": judge_output.is_approved,
|
| 193 |
+
"score": judge_output.score,
|
| 194 |
+
"issues_count": len(judge_output.issues),
|
| 195 |
+
"recommended_action": judge_output.recommended_action,
|
| 196 |
+
"feedback": judge_output.feedback,
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
yield {
|
| 200 |
+
"type": "judge_complete",
|
| 201 |
+
"role": role.value,
|
| 202 |
+
"is_approved": judge_output.is_approved,
|
| 203 |
+
"score": judge_output.score,
|
| 204 |
+
}
|
| 205 |
+
|
| 206 |
+
yield {
|
| 207 |
+
"type": "pipeline_complete",
|
| 208 |
+
"markdown_outputs": all_outputs,
|
| 209 |
+
"judge_results": all_judge_results,
|
| 210 |
+
}
|
| 211 |
|
| 212 |
def _build_graph(self):
|
| 213 |
graph = StateGraph(AgentState)
|
|
|
|
| 328 |
graph.add_edge(source, judge)
|
| 329 |
|
| 330 |
# Determine success destination
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
success_dest = get_dest_value(next_nodes[0])
|
| 332 |
|
| 333 |
graph.add_conditional_edges(
|
app/core/rag.py
CHANGED
|
@@ -1,10 +1,14 @@
|
|
| 1 |
"""
|
| 2 |
RAG (Retrieval-Augmented Generation) Service.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
Features:
|
| 4 |
-
-
|
|
|
|
| 5 |
- LangChain Retriever interface for RAG chains
|
| 6 |
- Document ingestion from corpus directory
|
| 7 |
-
- Embedding caching via Redis (optional)
|
| 8 |
"""
|
| 9 |
|
| 10 |
import os
|
|
@@ -18,95 +22,78 @@ from langchain_core.vectorstores import InMemoryVectorStore, VectorStore
|
|
| 18 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 19 |
|
| 20 |
from .llm_factory import get_embeddings_model
|
|
|
|
| 21 |
from .observability import get_logger
|
|
|
|
| 22 |
|
| 23 |
load_dotenv()
|
| 24 |
logger = get_logger("rag")
|
|
|
|
| 25 |
# Define paths
|
| 26 |
BASE_DIR = Path(__file__).resolve().parents[2]
|
| 27 |
CORPUS_DIR = BASE_DIR / "corpus_rag"
|
| 28 |
VECTOR_STORE_PATH = BASE_DIR / "public" / "vector_store"
|
|
|
|
| 29 |
# Error messages
|
| 30 |
ERR_VECTOR_STORE_NOT_INIT = "Vector store not initialized"
|
| 31 |
|
| 32 |
|
| 33 |
class RAGService:
|
| 34 |
"""
|
| 35 |
-
RAG Service with
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
"""
|
| 41 |
|
| 42 |
def __init__(self):
|
| 43 |
self.embeddings = get_embeddings_model()
|
| 44 |
-
self.
|
| 45 |
-
self.
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
if
|
| 52 |
-
self.
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
# Get the index
|
| 66 |
-
index = pc.Index(index_name)
|
| 67 |
-
# Create LangChain vectorstore
|
| 68 |
-
self.vector_store = PineconeVectorStore(
|
| 69 |
-
index=index,
|
| 70 |
-
embedding=self.embeddings,
|
| 71 |
-
text_key="page_content",
|
| 72 |
-
)
|
| 73 |
-
logger.info("Pinecone vectorstore initialized successfully")
|
| 74 |
-
except ImportError as e:
|
| 75 |
-
logger.warning(f"Pinecone not installed: {e}. Using fallback.")
|
| 76 |
-
self._init_fallback()
|
| 77 |
-
except Exception as e:
|
| 78 |
-
logger.error(f"Failed to initialize Pinecone: {e}. Using fallback.")
|
| 79 |
-
self._init_fallback()
|
| 80 |
-
|
| 81 |
-
def _init_fallback(self):
|
| 82 |
-
"""Initialize fallback in-memory vectorstore."""
|
| 83 |
-
logger.info("Using in-memory vectorstore (development mode)")
|
| 84 |
-
# In-memory store doesn't persist to disk in this simplified version
|
| 85 |
-
# to avoid dependency on custom pickling logic from rag_simple.
|
| 86 |
-
# It simply rebuilds from corpus on startup.
|
| 87 |
-
self._build_vector_store()
|
| 88 |
-
|
| 89 |
-
def _build_vector_store(self):
|
| 90 |
-
"""Build vectorstore from corpus documents."""
|
| 91 |
if not CORPUS_DIR.exists():
|
| 92 |
logger.warning(f"Corpus directory not found: {CORPUS_DIR}")
|
| 93 |
-
self.
|
| 94 |
return
|
| 95 |
|
| 96 |
-
# Load documents
|
| 97 |
documents = self._load_documents()
|
| 98 |
-
|
| 99 |
if not documents:
|
| 100 |
-
logger.warning("No documents found
|
| 101 |
-
self.
|
| 102 |
return
|
| 103 |
|
| 104 |
-
# Split documents into chunks
|
| 105 |
chunks = self._split_documents(documents)
|
| 106 |
logger.info(f"Created {len(chunks)} chunks from {len(documents)} documents")
|
| 107 |
|
| 108 |
-
|
| 109 |
-
|
|
|
|
|
|
|
|
|
|
| 110 |
|
| 111 |
def _load_documents(self) -> list[Document]:
|
| 112 |
"""Load documents from corpus directory."""
|
|
@@ -139,60 +126,91 @@ class RAGService:
|
|
| 139 |
)
|
| 140 |
return text_splitter.split_documents(documents)
|
| 141 |
|
| 142 |
-
def
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
documents
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
def get_retriever(
|
| 156 |
self,
|
|
|
|
| 157 |
k: int = 3,
|
| 158 |
filter: dict[str, Any] | None = None,
|
| 159 |
search_type: str = "similarity",
|
| 160 |
-
) -> BaseRetriever:
|
| 161 |
"""
|
| 162 |
Get a LangChain Retriever for RAG chains.
|
| 163 |
-
"""
|
| 164 |
-
if not self.vector_store:
|
| 165 |
-
raise RuntimeError(ERR_VECTOR_STORE_NOT_INIT)
|
| 166 |
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
|
|
|
|
|
|
| 170 |
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
search_kwargs=search_kwargs,
|
| 174 |
-
)
|
| 175 |
-
|
| 176 |
-
def retrieve(self, query: str, k: int = 3) -> list[Document]:
|
| 177 |
-
"""
|
| 178 |
-
Retrieve relevant documents for a query.
|
| 179 |
"""
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
|
| 192 |
def format_docs(self, docs: list[Document]) -> str:
|
| 193 |
"""
|
| 194 |
Format retrieved documents into a string for context injection.
|
|
|
|
|
|
|
|
|
|
| 195 |
"""
|
|
|
|
|
|
|
|
|
|
| 196 |
if not docs:
|
| 197 |
return "No relevant context found."
|
| 198 |
|
|
@@ -205,36 +223,117 @@ class RAGService:
|
|
| 205 |
async def add_documents(
|
| 206 |
self,
|
| 207 |
documents: list[Document],
|
|
|
|
| 208 |
ids: list[str] | None = None,
|
| 209 |
) -> list[str]:
|
| 210 |
"""
|
| 211 |
Add documents to the vectorstore.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 212 |
"""
|
| 213 |
-
if
|
| 214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
|
| 216 |
-
|
| 217 |
-
if hasattr(self.vector_store, "aadd_documents"):
|
| 218 |
-
return await self.vector_store.aadd_documents(documents, ids=ids)
|
| 219 |
-
else:
|
| 220 |
-
return self.vector_store.add_documents(documents, ids=ids)
|
| 221 |
|
| 222 |
-
async def delete_documents(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
"""
|
| 224 |
Delete documents from the vectorstore by ID.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
"""
|
| 226 |
-
if
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
self.
|
| 234 |
-
|
| 235 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 236 |
return False
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
"""
|
| 2 |
RAG (Retrieval-Augmented Generation) Service.
|
| 3 |
+
|
| 4 |
+
Unified RAG interface with MongoDB Atlas as primary and in-memory fallback.
|
| 5 |
+
Supports both role-specific retrieval (via MongoDB) and generic retrieval.
|
| 6 |
+
|
| 7 |
Features:
|
| 8 |
+
- MongoDB Atlas Vector Search for production (agent-specific collections)
|
| 9 |
+
- In-memory vectorstore fallback for local development
|
| 10 |
- LangChain Retriever interface for RAG chains
|
| 11 |
- Document ingestion from corpus directory
|
|
|
|
| 12 |
"""
|
| 13 |
|
| 14 |
import os
|
|
|
|
| 22 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 23 |
|
| 24 |
from .llm_factory import get_embeddings_model
|
| 25 |
+
from .mongodb_rag import MongoDBRAGService, get_mongodb_rag_service
|
| 26 |
from .observability import get_logger
|
| 27 |
+
from .schemas import TeamRole
|
| 28 |
|
| 29 |
load_dotenv()
|
| 30 |
logger = get_logger("rag")
|
| 31 |
+
|
| 32 |
# Define paths
|
| 33 |
BASE_DIR = Path(__file__).resolve().parents[2]
|
| 34 |
CORPUS_DIR = BASE_DIR / "corpus_rag"
|
| 35 |
VECTOR_STORE_PATH = BASE_DIR / "public" / "vector_store"
|
| 36 |
+
|
| 37 |
# Error messages
|
| 38 |
ERR_VECTOR_STORE_NOT_INIT = "Vector store not initialized"
|
| 39 |
|
| 40 |
|
| 41 |
class RAGService:
|
| 42 |
"""
|
| 43 |
+
Unified RAG Service with MongoDB primary and in-memory fallback.
|
| 44 |
+
|
| 45 |
+
Priority order:
|
| 46 |
+
1. MongoDB Atlas Vector Search (if MONGODB_URI configured)
|
| 47 |
+
2. In-memory vectorstore (development fallback)
|
| 48 |
+
|
| 49 |
+
For role-specific retrieval, use the `role` parameter in retrieve/get_retriever.
|
| 50 |
+
When role is provided and MongoDB is available, retrieval is from agent-specific
|
| 51 |
+
collections for more relevant examples.
|
| 52 |
"""
|
| 53 |
|
| 54 |
def __init__(self):
|
| 55 |
self.embeddings = get_embeddings_model()
|
| 56 |
+
self._mongodb_service: MongoDBRAGService | None = None
|
| 57 |
+
self._fallback_store: VectorStore | None = None
|
| 58 |
+
self._initialize()
|
| 59 |
+
|
| 60 |
+
def _initialize(self) -> None:
|
| 61 |
+
"""Initialize RAG backends in priority order."""
|
| 62 |
+
# Try MongoDB first
|
| 63 |
+
if os.getenv("MONGODB_URI"):
|
| 64 |
+
self._mongodb_service = get_mongodb_rag_service()
|
| 65 |
+
if self._mongodb_service.is_available():
|
| 66 |
+
logger.info("Using MongoDB Atlas for RAG (primary)")
|
| 67 |
+
# Still initialize fallback for non-role-specific queries
|
| 68 |
+
self._init_fallback_store()
|
| 69 |
+
return
|
| 70 |
+
|
| 71 |
+
# Fallback to in-memory only
|
| 72 |
+
logger.info("Using in-memory vector store only (MongoDB unavailable)")
|
| 73 |
+
self._init_fallback_store()
|
| 74 |
+
|
| 75 |
+
def _init_fallback_store(self) -> None:
|
| 76 |
+
"""Initialize fallback in-memory vectorstore from corpus."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
if not CORPUS_DIR.exists():
|
| 78 |
logger.warning(f"Corpus directory not found: {CORPUS_DIR}")
|
| 79 |
+
self._fallback_store = InMemoryVectorStore(embedding=self.embeddings)
|
| 80 |
return
|
| 81 |
|
| 82 |
+
# Load and split documents
|
| 83 |
documents = self._load_documents()
|
|
|
|
| 84 |
if not documents:
|
| 85 |
+
logger.warning("No documents found for fallback store")
|
| 86 |
+
self._fallback_store = InMemoryVectorStore(embedding=self.embeddings)
|
| 87 |
return
|
| 88 |
|
|
|
|
| 89 |
chunks = self._split_documents(documents)
|
| 90 |
logger.info(f"Created {len(chunks)} chunks from {len(documents)} documents")
|
| 91 |
|
| 92 |
+
self._fallback_store = InMemoryVectorStore.from_documents(
|
| 93 |
+
documents=chunks,
|
| 94 |
+
embedding=self.embeddings,
|
| 95 |
+
)
|
| 96 |
+
logger.info("Fallback in-memory vectorstore initialized")
|
| 97 |
|
| 98 |
def _load_documents(self) -> list[Document]:
|
| 99 |
"""Load documents from corpus directory."""
|
|
|
|
| 126 |
)
|
| 127 |
return text_splitter.split_documents(documents)
|
| 128 |
|
| 129 |
+
def retrieve(
|
| 130 |
+
self,
|
| 131 |
+
query: str,
|
| 132 |
+
role: TeamRole | None = None,
|
| 133 |
+
k: int = 3,
|
| 134 |
+
) -> list[Document]:
|
| 135 |
+
"""
|
| 136 |
+
Retrieve relevant documents for a query.
|
| 137 |
|
| 138 |
+
Args:
|
| 139 |
+
query: Search query (project description or context)
|
| 140 |
+
role: Optional agent role for role-specific retrieval (MongoDB only)
|
| 141 |
+
k: Number of documents to retrieve
|
| 142 |
+
|
| 143 |
+
Returns:
|
| 144 |
+
List of relevant documents
|
| 145 |
+
"""
|
| 146 |
+
# Use MongoDB for role-specific retrieval if available
|
| 147 |
+
if self._mongodb_service and self._mongodb_service.is_available() and role:
|
| 148 |
+
docs = self._mongodb_service.retrieve(query, role, k)
|
| 149 |
+
if docs:
|
| 150 |
+
return docs
|
| 151 |
+
# Fall through to fallback if no MongoDB results
|
| 152 |
+
|
| 153 |
+
# Fallback retrieval
|
| 154 |
+
if self._fallback_store:
|
| 155 |
+
try:
|
| 156 |
+
docs = self._fallback_store.similarity_search(query, k=k)
|
| 157 |
+
logger.debug(f"Retrieved {len(docs)} documents from fallback store")
|
| 158 |
+
return docs
|
| 159 |
+
except Exception as e:
|
| 160 |
+
logger.error(f"Error during fallback retrieval: {e}")
|
| 161 |
+
return []
|
| 162 |
+
|
| 163 |
+
logger.warning("No vector store available for retrieval")
|
| 164 |
+
return []
|
| 165 |
|
| 166 |
def get_retriever(
|
| 167 |
self,
|
| 168 |
+
role: TeamRole | None = None,
|
| 169 |
k: int = 3,
|
| 170 |
filter: dict[str, Any] | None = None,
|
| 171 |
search_type: str = "similarity",
|
| 172 |
+
) -> BaseRetriever | None:
|
| 173 |
"""
|
| 174 |
Get a LangChain Retriever for RAG chains.
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
+
Args:
|
| 177 |
+
role: Optional agent role for role-specific retrieval
|
| 178 |
+
k: Number of documents to retrieve
|
| 179 |
+
filter: Optional filter dict (fallback store only)
|
| 180 |
+
search_type: "similarity" or "mmr"
|
| 181 |
|
| 182 |
+
Returns:
|
| 183 |
+
LangChain BaseRetriever or None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 184 |
"""
|
| 185 |
+
# Use MongoDB for role-specific retrieval if available
|
| 186 |
+
if self._mongodb_service and self._mongodb_service.is_available() and role:
|
| 187 |
+
retriever = self._mongodb_service.get_retriever(role, k, search_type)
|
| 188 |
+
if retriever:
|
| 189 |
+
return retriever
|
| 190 |
+
|
| 191 |
+
# Fallback retriever
|
| 192 |
+
if self._fallback_store:
|
| 193 |
+
search_kwargs: dict[str, Any] = {"k": k}
|
| 194 |
+
if filter:
|
| 195 |
+
search_kwargs["filter"] = filter
|
| 196 |
+
|
| 197 |
+
return self._fallback_store.as_retriever(
|
| 198 |
+
search_type=search_type,
|
| 199 |
+
search_kwargs=search_kwargs,
|
| 200 |
+
)
|
| 201 |
+
|
| 202 |
+
return None
|
| 203 |
|
| 204 |
def format_docs(self, docs: list[Document]) -> str:
|
| 205 |
"""
|
| 206 |
Format retrieved documents into a string for context injection.
|
| 207 |
+
|
| 208 |
+
Uses MongoDB service formatter if available (includes role metadata),
|
| 209 |
+
otherwise uses simple formatting.
|
| 210 |
"""
|
| 211 |
+
if self._mongodb_service and self._mongodb_service.is_available():
|
| 212 |
+
return self._mongodb_service.format_docs(docs)
|
| 213 |
+
|
| 214 |
if not docs:
|
| 215 |
return "No relevant context found."
|
| 216 |
|
|
|
|
| 223 |
async def add_documents(
|
| 224 |
self,
|
| 225 |
documents: list[Document],
|
| 226 |
+
role: TeamRole | None = None,
|
| 227 |
ids: list[str] | None = None,
|
| 228 |
) -> list[str]:
|
| 229 |
"""
|
| 230 |
Add documents to the vectorstore.
|
| 231 |
+
|
| 232 |
+
Args:
|
| 233 |
+
documents: Documents to add
|
| 234 |
+
role: Agent role (required for MongoDB, determines collection)
|
| 235 |
+
ids: Optional document IDs
|
| 236 |
+
|
| 237 |
+
Returns:
|
| 238 |
+
List of document IDs
|
| 239 |
"""
|
| 240 |
+
# Add to MongoDB if role specified and available
|
| 241 |
+
if self._mongodb_service and self._mongodb_service.is_available() and role:
|
| 242 |
+
return await self._mongodb_service.add_documents(documents, role)
|
| 243 |
+
|
| 244 |
+
# Add to fallback store
|
| 245 |
+
if self._fallback_store:
|
| 246 |
+
if hasattr(self._fallback_store, "aadd_documents"):
|
| 247 |
+
return await self._fallback_store.aadd_documents(documents, ids=ids)
|
| 248 |
+
else:
|
| 249 |
+
return self._fallback_store.add_documents(documents, ids=ids)
|
| 250 |
|
| 251 |
+
raise RuntimeError(ERR_VECTOR_STORE_NOT_INIT)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 252 |
|
| 253 |
+
async def delete_documents(
|
| 254 |
+
self,
|
| 255 |
+
ids: list[str],
|
| 256 |
+
role: TeamRole | None = None,
|
| 257 |
+
) -> bool:
|
| 258 |
"""
|
| 259 |
Delete documents from the vectorstore by ID.
|
| 260 |
+
|
| 261 |
+
Args:
|
| 262 |
+
ids: Document IDs to delete
|
| 263 |
+
role: Agent role (required for MongoDB deletion)
|
| 264 |
+
|
| 265 |
+
Returns:
|
| 266 |
+
True if successful
|
| 267 |
"""
|
| 268 |
+
# Delete from MongoDB if role specified and available
|
| 269 |
+
if self._mongodb_service and self._mongodb_service.is_available() and role:
|
| 270 |
+
return await self._mongodb_service.delete_documents(ids, role)
|
| 271 |
+
|
| 272 |
+
# Delete from fallback store
|
| 273 |
+
if self._fallback_store:
|
| 274 |
+
try:
|
| 275 |
+
if hasattr(self._fallback_store, "adelete"):
|
| 276 |
+
await self._fallback_store.adelete(ids=ids)
|
| 277 |
+
elif hasattr(self._fallback_store, "delete"):
|
| 278 |
+
self._fallback_store.delete(ids=ids)
|
| 279 |
+
else:
|
| 280 |
+
logger.warning("Fallback store does not support deletion")
|
| 281 |
+
return False
|
| 282 |
+
return True
|
| 283 |
+
except Exception as e:
|
| 284 |
+
logger.error(f"Error deleting documents: {e}")
|
| 285 |
return False
|
| 286 |
+
|
| 287 |
+
return False
|
| 288 |
+
|
| 289 |
+
def health_check(self) -> dict[str, Any]:
|
| 290 |
+
"""
|
| 291 |
+
Return health status of RAG service.
|
| 292 |
+
|
| 293 |
+
Returns:
|
| 294 |
+
Dict with status and backend information
|
| 295 |
+
"""
|
| 296 |
+
result: dict[str, Any] = {
|
| 297 |
+
"fallback_store_initialized": self._fallback_store is not None,
|
| 298 |
+
}
|
| 299 |
+
|
| 300 |
+
if self._mongodb_service:
|
| 301 |
+
result["mongodb"] = self._mongodb_service.health_check()
|
| 302 |
+
result["primary_backend"] = (
|
| 303 |
+
"mongodb" if self._mongodb_service.is_available() else "fallback"
|
| 304 |
+
)
|
| 305 |
+
else:
|
| 306 |
+
result["mongodb"] = {"status": "not_configured"}
|
| 307 |
+
result["primary_backend"] = "fallback"
|
| 308 |
+
|
| 309 |
+
return result
|
| 310 |
+
|
| 311 |
+
def is_mongodb_available(self) -> bool:
|
| 312 |
+
"""Check if MongoDB RAG backend is available."""
|
| 313 |
+
return (
|
| 314 |
+
self._mongodb_service is not None and self._mongodb_service.is_available()
|
| 315 |
+
)
|
| 316 |
+
|
| 317 |
+
def get_roles_with_rag(self) -> list[TeamRole]:
|
| 318 |
+
"""Get list of agent roles that have RAG collections configured."""
|
| 319 |
+
if self._mongodb_service:
|
| 320 |
+
return self._mongodb_service.get_roles_with_rag()
|
| 321 |
+
return []
|
| 322 |
+
|
| 323 |
+
|
| 324 |
+
# Module-level singleton
|
| 325 |
+
_rag_service: RAGService | None = None
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
def get_rag_service() -> RAGService:
|
| 329 |
+
"""Get singleton RAG service instance."""
|
| 330 |
+
global _rag_service
|
| 331 |
+
if _rag_service is None:
|
| 332 |
+
_rag_service = RAGService()
|
| 333 |
+
return _rag_service
|
| 334 |
+
|
| 335 |
+
|
| 336 |
+
def reset_rag_service() -> None:
|
| 337 |
+
"""Reset the RAG service singleton (for testing)."""
|
| 338 |
+
global _rag_service
|
| 339 |
+
_rag_service = None
|
app/core/schemas.py
CHANGED
|
@@ -44,10 +44,6 @@ class TeamRole(str, Enum):
|
|
| 44 |
JUDGE = "judge"
|
| 45 |
|
| 46 |
|
| 47 |
-
# ─────────────────────────────────────────────────────────────────────────────
|
| 48 |
-
# Enhanced Judge Output Schema
|
| 49 |
-
# ─────────────────────────────────────────────────────────────────────────────
|
| 50 |
-
|
| 51 |
|
| 52 |
class JudgeIssue(BaseModel):
|
| 53 |
id: str = Field(..., description="Issue ID or related FR/NFR ID")
|
|
@@ -69,11 +65,6 @@ class JudgeOutput(BaseModel):
|
|
| 69 |
reasoning: str
|
| 70 |
|
| 71 |
|
| 72 |
-
# ─────────────────────────────────────────────────────────────────────────────
|
| 73 |
-
# Existing Schemas (unchanged structure, enhanced docs)
|
| 74 |
-
# ─────────────────────────────────────────────────────────────────────────────
|
| 75 |
-
|
| 76 |
-
|
| 77 |
class AgentMessage(BaseModel):
|
| 78 |
role: TeamRole
|
| 79 |
content: str
|
|
@@ -130,7 +121,7 @@ class TokenData(BaseModel):
|
|
| 130 |
class ProjectBase(BaseModel):
|
| 131 |
title: str
|
| 132 |
description: str | None = None
|
| 133 |
-
artifacts: dict[str, Any]
|
| 134 |
|
| 135 |
|
| 136 |
class ProjectCreate(ProjectBase):
|
|
|
|
| 44 |
JUDGE = "judge"
|
| 45 |
|
| 46 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
|
| 48 |
class JudgeIssue(BaseModel):
|
| 49 |
id: str = Field(..., description="Issue ID or related FR/NFR ID")
|
|
|
|
| 65 |
reasoning: str
|
| 66 |
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
class AgentMessage(BaseModel):
|
| 69 |
role: TeamRole
|
| 70 |
content: str
|
|
|
|
| 121 |
class ProjectBase(BaseModel):
|
| 122 |
title: str
|
| 123 |
description: str | None = None
|
| 124 |
+
artifacts: dict[str, Any]
|
| 125 |
|
| 126 |
|
| 127 |
class ProjectCreate(ProjectBase):
|
app/prompts/product_owner.md
CHANGED
|
@@ -13,10 +13,12 @@ Great product ownership means translating user needs and business goals into a c
|
|
| 13 |
1. Analyze input from the Project Refiner.
|
| 14 |
2. Define a concise product vision aligned with strategic objectives.
|
| 15 |
3. Identify and prioritize key features using MoSCoW (Must, Should, Could, Won't).
|
| 16 |
-
4. Write user stories that capture real user goals and benefits (INVEST criteria).
|
| 17 |
5. Establish clear, testable acceptance criteria for each feature.
|
| 18 |
6. Document assumptions and open questions.
|
| 19 |
|
|
|
|
|
|
|
| 20 |
**Output Structure:**
|
| 21 |
## MARKDOWN
|
| 22 |
|
|
@@ -27,20 +29,39 @@ Great product ownership means translating user needs and business goals into a c
|
|
| 27 |
### Must Have (MVP)
|
| 28 |
- **F1:** [Title] - [Brief description]
|
| 29 |
- **F2:** [Title] - [Brief description]
|
|
|
|
|
|
|
| 30 |
|
| 31 |
### Should Have (Post-MVP)
|
| 32 |
-
- **
|
|
|
|
| 33 |
|
| 34 |
## User Stories
|
| 35 |
1. **US1:** As a [user type], I want [goal] so that [benefit]
|
| 36 |
- **Acceptance Criteria:**
|
| 37 |
- [Criterion 1]
|
| 38 |
- [Criterion 2]
|
|
|
|
|
|
|
| 39 |
2. **US2:** As a [user type], I want [goal] so that [benefit]
|
| 40 |
- **Acceptance Criteria:**
|
| 41 |
- [Criterion 1]
|
| 42 |
- [Criterion 2]
|
| 43 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
## Assumptions & Constraints
|
| 45 |
[List any assumptions made about the project scope or constraints]
|
| 46 |
|
|
|
|
| 13 |
1. Analyze input from the Project Refiner.
|
| 14 |
2. Define a concise product vision aligned with strategic objectives.
|
| 15 |
3. Identify and prioritize key features using MoSCoW (Must, Should, Could, Won't).
|
| 16 |
+
4. Write **AT LEAST 4-6 user stories** that capture real user goals and benefits (INVEST criteria).
|
| 17 |
5. Establish clear, testable acceptance criteria for each feature.
|
| 18 |
6. Document assumptions and open questions.
|
| 19 |
|
| 20 |
+
**IMPORTANT:** Generate AT LEAST 4 user stories (US1, US2, US3, US4 minimum). For MVP scope, aim for 4-6 user stories to provide adequate coverage.
|
| 21 |
+
|
| 22 |
**Output Structure:**
|
| 23 |
## MARKDOWN
|
| 24 |
|
|
|
|
| 29 |
### Must Have (MVP)
|
| 30 |
- **F1:** [Title] - [Brief description]
|
| 31 |
- **F2:** [Title] - [Brief description]
|
| 32 |
+
- **F3:** [Title] - [Brief description]
|
| 33 |
+
- **F4:** [Title] - [Brief description]
|
| 34 |
|
| 35 |
### Should Have (Post-MVP)
|
| 36 |
+
- **F5:** [Title] - [Brief description]
|
| 37 |
+
- **F6:** [Title] - [Brief description]
|
| 38 |
|
| 39 |
## User Stories
|
| 40 |
1. **US1:** As a [user type], I want [goal] so that [benefit]
|
| 41 |
- **Acceptance Criteria:**
|
| 42 |
- [Criterion 1]
|
| 43 |
- [Criterion 2]
|
| 44 |
+
- [Criterion 3]
|
| 45 |
+
|
| 46 |
2. **US2:** As a [user type], I want [goal] so that [benefit]
|
| 47 |
- **Acceptance Criteria:**
|
| 48 |
- [Criterion 1]
|
| 49 |
- [Criterion 2]
|
| 50 |
|
| 51 |
+
3. **US3:** As a [user type], I want [goal] so that [benefit]
|
| 52 |
+
- **Acceptance Criteria:**
|
| 53 |
+
- [Criterion 1]
|
| 54 |
+
- [Criterion 2]
|
| 55 |
+
|
| 56 |
+
4. **US4:** As a [user type], I want [goal] so that [benefit]
|
| 57 |
+
- **Acceptance Criteria:**
|
| 58 |
+
- [Criterion 1]
|
| 59 |
+
- [Criterion 2]
|
| 60 |
+
|
| 61 |
+
5. **US5:** As a [user type], I want [goal] so that [benefit]
|
| 62 |
+
- **Acceptance Criteria:**
|
| 63 |
+
- [Criterion 1]
|
| 64 |
+
|
| 65 |
## Assumptions & Constraints
|
| 66 |
[List any assumptions made about the project scope or constraints]
|
| 67 |
|
app/routers/health.py
CHANGED
|
@@ -26,6 +26,14 @@ from app.core.resilience import get_all_circuit_breakers, get_all_request_queues
|
|
| 26 |
|
| 27 |
load_dotenv()
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
router = APIRouter(prefix="/health", tags=["Health"])
|
| 30 |
|
| 31 |
|
|
@@ -60,15 +68,17 @@ async def readiness_probe():
|
|
| 60 |
if not any(check in checks for check in critical_checks):
|
| 61 |
is_ready = True
|
| 62 |
|
| 63 |
-
status_code =
|
|
|
|
|
|
|
| 64 |
|
| 65 |
return JSONResponse(
|
| 66 |
status_code=status_code,
|
| 67 |
content={
|
| 68 |
"status": "ready" if is_ready else "not_ready",
|
| 69 |
"checks": checks,
|
| 70 |
-
"timestamp": datetime.now(UTC).isoformat()
|
| 71 |
-
}
|
| 72 |
)
|
| 73 |
|
| 74 |
|
|
@@ -87,24 +97,19 @@ async def detailed_health_check():
|
|
| 87 |
|
| 88 |
# Get circuit breaker status
|
| 89 |
circuit_breakers = {
|
| 90 |
-
name: cb.get_status()
|
| 91 |
-
for name, cb in get_all_circuit_breakers().items()
|
| 92 |
}
|
| 93 |
|
| 94 |
# Get request queue status
|
| 95 |
request_queues = {
|
| 96 |
-
name: queue.get_status()
|
| 97 |
-
for name, queue in get_all_request_queues().items()
|
| 98 |
}
|
| 99 |
|
| 100 |
# Get provider status
|
| 101 |
provider_status = get_provider_manager().get_provider_status()
|
| 102 |
|
| 103 |
# Overall status
|
| 104 |
-
all_healthy = all(
|
| 105 |
-
check.get("status") == "healthy"
|
| 106 |
-
for check in checks.values()
|
| 107 |
-
)
|
| 108 |
|
| 109 |
return {
|
| 110 |
"status": "healthy" if all_healthy else "degraded",
|
|
@@ -116,15 +121,9 @@ async def detailed_health_check():
|
|
| 116 |
"circuit_breakers": circuit_breakers,
|
| 117 |
"request_queues": request_queues,
|
| 118 |
"llm_providers": provider_status,
|
| 119 |
-
"performance": {
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
},
|
| 123 |
-
"errors": {
|
| 124 |
-
"window_seconds": 3600,
|
| 125 |
-
"summary": error_summary
|
| 126 |
-
},
|
| 127 |
-
"cost": cost_stats
|
| 128 |
}
|
| 129 |
|
| 130 |
|
|
@@ -140,8 +139,7 @@ async def get_metrics():
|
|
| 140 |
# Format as Prometheus-style metrics (simplified)
|
| 141 |
metrics = {
|
| 142 |
"specsbeforecode_requests_total": sum(
|
| 143 |
-
stats.get("count", 0)
|
| 144 |
-
for stats in performance_stats.values()
|
| 145 |
),
|
| 146 |
"specsbeforecode_tokens_used_monthly": cost_stats.get("monthly_tokens_used", 0),
|
| 147 |
"specsbeforecode_budget_remaining": cost_stats.get("budget_remaining", 0),
|
|
@@ -152,12 +150,54 @@ async def get_metrics():
|
|
| 152 |
for op, stats in performance_stats.items():
|
| 153 |
safe_op = op.replace(".", "_").replace("-", "_")
|
| 154 |
metrics[f"specsbeforecode_op_{safe_op}_count"] = stats.get("count", 0)
|
| 155 |
-
metrics[f"specsbeforecode_op_{safe_op}_avg_duration_ms"] = stats.get(
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
return metrics
|
| 159 |
|
| 160 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
async def _run_health_checks() -> dict[str, dict[str, Any]]:
|
| 162 |
"""Run all health checks and return results.
|
| 163 |
|
|
@@ -191,14 +231,11 @@ def _check_database() -> dict[str, Any]:
|
|
| 191 |
with engine.connect() as conn:
|
| 192 |
conn.execute(text("SELECT 1"))
|
| 193 |
|
| 194 |
-
return {
|
| 195 |
-
"status": "healthy",
|
| 196 |
-
"message": "Database connection successful"
|
| 197 |
-
}
|
| 198 |
except Exception as e:
|
| 199 |
return {
|
| 200 |
"status": "unhealthy",
|
| 201 |
-
"message": f"Database connection failed: {str(e)}"
|
| 202 |
}
|
| 203 |
|
| 204 |
|
|
@@ -208,10 +245,7 @@ def _check_nvidia_api() -> dict[str, Any]:
|
|
| 208 |
base_url = os.getenv("NVIDIA_BASE_URL")
|
| 209 |
|
| 210 |
if not api_key:
|
| 211 |
-
return {
|
| 212 |
-
"status": "unhealthy",
|
| 213 |
-
"message": "NVIDIA_API_KEY not configured"
|
| 214 |
-
}
|
| 215 |
|
| 216 |
# Check circuit breaker status instead of making actual API call
|
| 217 |
circuit_breakers = get_all_circuit_breakers()
|
|
@@ -220,13 +254,13 @@ def _check_nvidia_api() -> dict[str, Any]:
|
|
| 220 |
if nvidia_cb and nvidia_cb.state.value == "open":
|
| 221 |
return {
|
| 222 |
"status": "degraded",
|
| 223 |
-
"message": "Circuit breaker is open - service may be experiencing issues"
|
| 224 |
}
|
| 225 |
|
| 226 |
return {
|
| 227 |
"status": "healthy",
|
| 228 |
"message": "NVIDIA API configured",
|
| 229 |
-
"base_url": base_url
|
| 230 |
}
|
| 231 |
|
| 232 |
|
|
@@ -238,12 +272,12 @@ def _check_langsmith() -> dict[str, Any]:
|
|
| 238 |
if not api_key:
|
| 239 |
return {
|
| 240 |
"status": "degraded",
|
| 241 |
-
"message": "LangSmith API key not configured - observability limited"
|
| 242 |
}
|
| 243 |
|
| 244 |
return {
|
| 245 |
"status": "healthy",
|
| 246 |
"message": "LangSmith configured",
|
| 247 |
"tracing_enabled": tracing,
|
| 248 |
-
"project": os.getenv("LANGSMITH_PROJECT", "default")
|
| 249 |
}
|
|
|
|
| 26 |
|
| 27 |
load_dotenv()
|
| 28 |
|
| 29 |
+
|
| 30 |
+
# Lazy import for RAG to avoid circular imports
|
| 31 |
+
def _get_rag_service():
|
| 32 |
+
from app.core.rag import get_rag_service
|
| 33 |
+
|
| 34 |
+
return get_rag_service()
|
| 35 |
+
|
| 36 |
+
|
| 37 |
router = APIRouter(prefix="/health", tags=["Health"])
|
| 38 |
|
| 39 |
|
|
|
|
| 68 |
if not any(check in checks for check in critical_checks):
|
| 69 |
is_ready = True
|
| 70 |
|
| 71 |
+
status_code = (
|
| 72 |
+
status.HTTP_200_OK if is_ready else status.HTTP_503_SERVICE_UNAVAILABLE
|
| 73 |
+
)
|
| 74 |
|
| 75 |
return JSONResponse(
|
| 76 |
status_code=status_code,
|
| 77 |
content={
|
| 78 |
"status": "ready" if is_ready else "not_ready",
|
| 79 |
"checks": checks,
|
| 80 |
+
"timestamp": datetime.now(UTC).isoformat(),
|
| 81 |
+
},
|
| 82 |
)
|
| 83 |
|
| 84 |
|
|
|
|
| 97 |
|
| 98 |
# Get circuit breaker status
|
| 99 |
circuit_breakers = {
|
| 100 |
+
name: cb.get_status() for name, cb in get_all_circuit_breakers().items()
|
|
|
|
| 101 |
}
|
| 102 |
|
| 103 |
# Get request queue status
|
| 104 |
request_queues = {
|
| 105 |
+
name: queue.get_status() for name, queue in get_all_request_queues().items()
|
|
|
|
| 106 |
}
|
| 107 |
|
| 108 |
# Get provider status
|
| 109 |
provider_status = get_provider_manager().get_provider_status()
|
| 110 |
|
| 111 |
# Overall status
|
| 112 |
+
all_healthy = all(check.get("status") == "healthy" for check in checks.values())
|
|
|
|
|
|
|
|
|
|
| 113 |
|
| 114 |
return {
|
| 115 |
"status": "healthy" if all_healthy else "degraded",
|
|
|
|
| 121 |
"circuit_breakers": circuit_breakers,
|
| 122 |
"request_queues": request_queues,
|
| 123 |
"llm_providers": provider_status,
|
| 124 |
+
"performance": {"window_seconds": 300, "operations": performance_stats},
|
| 125 |
+
"errors": {"window_seconds": 3600, "summary": error_summary},
|
| 126 |
+
"cost": cost_stats,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
}
|
| 128 |
|
| 129 |
|
|
|
|
| 139 |
# Format as Prometheus-style metrics (simplified)
|
| 140 |
metrics = {
|
| 141 |
"specsbeforecode_requests_total": sum(
|
| 142 |
+
stats.get("count", 0) for stats in performance_stats.values()
|
|
|
|
| 143 |
),
|
| 144 |
"specsbeforecode_tokens_used_monthly": cost_stats.get("monthly_tokens_used", 0),
|
| 145 |
"specsbeforecode_budget_remaining": cost_stats.get("budget_remaining", 0),
|
|
|
|
| 150 |
for op, stats in performance_stats.items():
|
| 151 |
safe_op = op.replace(".", "_").replace("-", "_")
|
| 152 |
metrics[f"specsbeforecode_op_{safe_op}_count"] = stats.get("count", 0)
|
| 153 |
+
metrics[f"specsbeforecode_op_{safe_op}_avg_duration_ms"] = stats.get(
|
| 154 |
+
"avg_duration_ms", 0
|
| 155 |
+
)
|
| 156 |
+
metrics[f"specsbeforecode_op_{safe_op}_success_rate"] = stats.get(
|
| 157 |
+
"success_rate", 0
|
| 158 |
+
)
|
| 159 |
|
| 160 |
return metrics
|
| 161 |
|
| 162 |
|
| 163 |
+
@router.get("/rag")
|
| 164 |
+
async def rag_health_check():
|
| 165 |
+
"""
|
| 166 |
+
Check RAG (Retrieval-Augmented Generation) service health.
|
| 167 |
+
|
| 168 |
+
Returns:
|
| 169 |
+
- MongoDB connection status
|
| 170 |
+
- Configured collections
|
| 171 |
+
- Missing collections that need setup
|
| 172 |
+
- Fallback store status
|
| 173 |
+
"""
|
| 174 |
+
try:
|
| 175 |
+
rag_service = _get_rag_service()
|
| 176 |
+
health = rag_service.health_check()
|
| 177 |
+
|
| 178 |
+
# Add role information
|
| 179 |
+
roles_with_rag = rag_service.get_roles_with_rag()
|
| 180 |
+
health["roles_with_rag"] = [role.value for role in roles_with_rag]
|
| 181 |
+
health["mongodb_available"] = rag_service.is_mongodb_available()
|
| 182 |
+
|
| 183 |
+
# Determine overall status
|
| 184 |
+
if health.get("mongodb", {}).get("status") == "connected":
|
| 185 |
+
health["status"] = "healthy"
|
| 186 |
+
elif health.get("fallback_store_initialized"):
|
| 187 |
+
health["status"] = "degraded"
|
| 188 |
+
health["message"] = "Using in-memory fallback - MongoDB not available"
|
| 189 |
+
else:
|
| 190 |
+
health["status"] = "unhealthy"
|
| 191 |
+
health["message"] = "No RAG backend available"
|
| 192 |
+
|
| 193 |
+
return health
|
| 194 |
+
except Exception as e:
|
| 195 |
+
return {
|
| 196 |
+
"status": "error",
|
| 197 |
+
"message": str(e),
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
|
| 201 |
async def _run_health_checks() -> dict[str, dict[str, Any]]:
|
| 202 |
"""Run all health checks and return results.
|
| 203 |
|
|
|
|
| 231 |
with engine.connect() as conn:
|
| 232 |
conn.execute(text("SELECT 1"))
|
| 233 |
|
| 234 |
+
return {"status": "healthy", "message": "Database connection successful"}
|
|
|
|
|
|
|
|
|
|
| 235 |
except Exception as e:
|
| 236 |
return {
|
| 237 |
"status": "unhealthy",
|
| 238 |
+
"message": f"Database connection failed: {str(e)}",
|
| 239 |
}
|
| 240 |
|
| 241 |
|
|
|
|
| 245 |
base_url = os.getenv("NVIDIA_BASE_URL")
|
| 246 |
|
| 247 |
if not api_key:
|
| 248 |
+
return {"status": "unhealthy", "message": "NVIDIA_API_KEY not configured"}
|
|
|
|
|
|
|
|
|
|
| 249 |
|
| 250 |
# Check circuit breaker status instead of making actual API call
|
| 251 |
circuit_breakers = get_all_circuit_breakers()
|
|
|
|
| 254 |
if nvidia_cb and nvidia_cb.state.value == "open":
|
| 255 |
return {
|
| 256 |
"status": "degraded",
|
| 257 |
+
"message": "Circuit breaker is open - service may be experiencing issues",
|
| 258 |
}
|
| 259 |
|
| 260 |
return {
|
| 261 |
"status": "healthy",
|
| 262 |
"message": "NVIDIA API configured",
|
| 263 |
+
"base_url": base_url,
|
| 264 |
}
|
| 265 |
|
| 266 |
|
|
|
|
| 272 |
if not api_key:
|
| 273 |
return {
|
| 274 |
"status": "degraded",
|
| 275 |
+
"message": "LangSmith API key not configured - observability limited",
|
| 276 |
}
|
| 277 |
|
| 278 |
return {
|
| 279 |
"status": "healthy",
|
| 280 |
"message": "LangSmith configured",
|
| 281 |
"tracing_enabled": tracing,
|
| 282 |
+
"project": os.getenv("LANGSMITH_PROJECT", "default"),
|
| 283 |
}
|
corpus_rag/RAG_INDEX_DECISION.md
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# RAG Index Decision Log
|
| 2 |
+
|
| 3 |
+
**Date:** February 6, 2026
|
| 4 |
+
**Context:** Testing on MongoDB Atlas Free Tier (M0)
|
| 5 |
+
|
| 6 |
+
## Decision
|
| 7 |
+
|
| 8 |
+
Due to the free tier's limitation of **3 Atlas Search (FTS) indexes maximum**, only 3 agent collections have active vector search indexes.
|
| 9 |
+
|
| 10 |
+
### Selected Agents for Indexing
|
| 11 |
+
|
| 12 |
+
| Rank | Agent Role | Rationale |
|
| 13 |
+
|------|------------|-----------|
|
| 14 |
+
| 1 | **Product Owner** | Foundation of all downstream artifacts. PRD quality affects every subsequent phase. |
|
| 15 |
+
| 2 | **Solution Architect** | Critical technical decisions impact feasibility, scalability, and integration across all phases. |
|
| 16 |
+
| 3 | **Technical Writer** | Documentation quality directly affects user-facing output and project success metrics. |
|
| 17 |
+
|
| 18 |
+
### Excluded Agents
|
| 19 |
+
|
| 20 |
+
| Agent Role | Reason |
|
| 21 |
+
|------------|--------|
|
| 22 |
+
| Business Analyst | Lower priority for MVP testing; business rules can be derived from PO output |
|
| 23 |
+
| Security Analyst | Security patterns can use generic guidance; threat modeling less critical for prototypes |
|
| 24 |
+
| UX Designer | UI patterns are more intuitive; prototyping benefits less from RAG |
|
| 25 |
+
| API Designer | API contracts can be derived from architecture |
|
| 26 |
+
| QA Strategist | Testing approaches are relatively standardized |
|
| 27 |
+
| DevOps Architect | Deployment patterns less critical for initial spec generation |
|
| 28 |
+
| Environment Engineer | Setup guides are procedural and template-based |
|
| 29 |
+
| Data Architect | Data models can be derived from architecture and requirements |
|
| 30 |
+
| API Designer | (already covered) |
|
| 31 |
+
|
| 32 |
+
## Future Upgrade Path
|
| 33 |
+
|
| 34 |
+
When upgrading to M10+ cluster:
|
| 35 |
+
1. Enable indexes on all 11 collections
|
| 36 |
+
2. Update `scripts/setup_mongodb_indexes.py` to remove the index limit logic
|
| 37 |
+
3. Run seed script again if needed
|
| 38 |
+
|
| 39 |
+
## Current Index Status
|
| 40 |
+
|
| 41 |
+
```
|
| 42 |
+
Active Indexes (3):
|
| 43 |
+
- rag_product_owner
|
| 44 |
+
- rag_solution_architect
|
| 45 |
+
- rag_technical_writer
|
| 46 |
+
|
| 47 |
+
Collections without Indexes (8):
|
| 48 |
+
- rag_business_analyst
|
| 49 |
+
- rag_data_architect
|
| 50 |
+
- rag_security_analyst
|
| 51 |
+
- rag_ux_designer
|
| 52 |
+
- rag_api_designer
|
| 53 |
+
- rag_qa_strategist
|
| 54 |
+
- rag_devops_architect
|
| 55 |
+
- rag_environment_engineer
|
| 56 |
+
```
|
| 57 |
+
|
| 58 |
+
## Impact on Agents
|
| 59 |
+
|
| 60 |
+
Agents without RAG indexes will fall back to:
|
| 61 |
+
1. In-memory vector store (if documents exist)
|
| 62 |
+
2. No retrieval (empty context)
|
| 63 |
+
|
| 64 |
+
For production, all agents should have their own RAG collections indexed.
|
corpus_rag/api_designer/role_playbook.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Role: API Designer
|
| 2 |
+
Purpose: Define API contracts, endpoints, and data shapes for services.
|
| 3 |
+
|
| 4 |
+
Core responsibilities
|
| 5 |
+
- Design REST/GraphQL endpoints and schemas.
|
| 6 |
+
- Specify request/response formats and error handling.
|
| 7 |
+
- Align with data models and business requirements.
|
| 8 |
+
|
| 9 |
+
Required sections in output
|
| 10 |
+
- API overview and base URL
|
| 11 |
+
- Endpoints with methods and payloads
|
| 12 |
+
- Error model and status codes
|
| 13 |
+
- Authentication and rate limiting notes
|
| 14 |
+
|
| 15 |
+
Quality gates
|
| 16 |
+
- Consistency: aligns with data models and UX needs
|
| 17 |
+
- Completeness: covers all workflows
|
| 18 |
+
- Clarity: unambiguous request/response schemas
|
corpus_rag/api_designer/standards_quickref.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Standards and frameworks (brief summaries)
|
| 2 |
+
|
| 3 |
+
OpenAPI Specification (OAS)
|
| 4 |
+
Standard for describing REST APIs. Use to produce a machine-readable contract and validation.
|
| 5 |
+
|
| 6 |
+
Microsoft REST API Guidelines
|
| 7 |
+
Conventions for resource naming, filtering, pagination, and errors. Use to standardize endpoints.
|
| 8 |
+
|
| 9 |
+
Google API Design Guide
|
| 10 |
+
Best practices for consistency and long-term evolution. Use for naming and error handling guidance.
|
corpus_rag/business_analyst/role_playbook.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Role: Business Analyst
|
| 2 |
+
Purpose: Translate product goals into detailed business requirements, workflows, and business rules.
|
| 3 |
+
|
| 4 |
+
Core responsibilities
|
| 5 |
+
- Document workflows and process flows.
|
| 6 |
+
- Capture business rules, exceptions, and edge cases.
|
| 7 |
+
- Clarify functional requirements and constraints.
|
| 8 |
+
|
| 9 |
+
Required sections in output
|
| 10 |
+
- Business objectives and stakeholders
|
| 11 |
+
- Process flows (happy path and exceptions)
|
| 12 |
+
- Business rules and validations
|
| 13 |
+
- Functional requirements list
|
| 14 |
+
- Assumptions, risks, and dependencies
|
| 15 |
+
|
| 16 |
+
Quality gates
|
| 17 |
+
- Completeness: all major workflows covered
|
| 18 |
+
- Consistency: aligns with PRD and data models
|
| 19 |
+
- Traceability: each requirement ties to a goal or story
|
corpus_rag/business_analyst/standards_quickref.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Standards and frameworks (brief summaries)
|
| 2 |
+
|
| 3 |
+
BABOK (Business Analysis Body of Knowledge)
|
| 4 |
+
Industry standard for eliciting, analyzing, and managing requirements. Use it to structure stakeholder analysis and requirement validation.
|
| 5 |
+
|
| 6 |
+
BPMN 2.0 (Business Process Model and Notation)
|
| 7 |
+
Standard visual language for process flows. Use to represent end-to-end workflows, decision points, and exceptions.
|
| 8 |
+
|
| 9 |
+
ISO/IEC/IEEE 29148
|
| 10 |
+
Defines good requirements characteristics. Use to validate clarity, consistency, and testability.
|
corpus_rag/data_architect/role_playbook.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Role: Data Architect
|
| 2 |
+
Purpose: Define data models, entities, relationships, and data governance concerns.
|
| 3 |
+
|
| 4 |
+
Core responsibilities
|
| 5 |
+
- Produce entity-relationship model and data dictionary.
|
| 6 |
+
- Define data validation rules and lifecycle.
|
| 7 |
+
- Ensure alignment with API contracts and business rules.
|
| 8 |
+
|
| 9 |
+
Required sections in output
|
| 10 |
+
- Entities and relationships
|
| 11 |
+
- Key fields and constraints
|
| 12 |
+
- Data dictionary (field definitions)
|
| 13 |
+
- Data lifecycle and retention notes
|
| 14 |
+
|
| 15 |
+
Quality gates
|
| 16 |
+
- Consistency: matches API and business requirements
|
| 17 |
+
- Completeness: covers all core domain entities
|
| 18 |
+
- Integrity: constraints and validation rules defined
|
corpus_rag/data_architect/standards_quickref.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Standards and frameworks (brief summaries)
|
| 2 |
+
|
| 3 |
+
DAMA-DMBOK
|
| 4 |
+
Comprehensive data management guide. Use for governance, data quality, and stewardship considerations.
|
| 5 |
+
|
| 6 |
+
UML for data models
|
| 7 |
+
Standard notation for entities and relationships. Use to document ERDs with clear cardinalities.
|
| 8 |
+
|
| 9 |
+
Database Answers patterns
|
| 10 |
+
Collection of common data model patterns. Use for inspiration and validation of schema design.
|
corpus_rag/devops_architect/role_playbook.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Role: DevOps Architect
|
| 2 |
+
Purpose: Define CI/CD, deployment, and operational reliability plan.
|
| 3 |
+
|
| 4 |
+
Core responsibilities
|
| 5 |
+
- Design pipelines, environments, and monitoring.
|
| 6 |
+
- Define infrastructure requirements and scaling strategy.
|
| 7 |
+
- Align with security and architecture constraints.
|
| 8 |
+
|
| 9 |
+
Required sections in output
|
| 10 |
+
- CI/CD pipeline overview
|
| 11 |
+
- Environments and deployment strategy
|
| 12 |
+
- Monitoring, logging, and alerting
|
| 13 |
+
- Backup and rollback strategy
|
| 14 |
+
|
| 15 |
+
Quality gates
|
| 16 |
+
- Reliability: clear rollback and monitoring
|
| 17 |
+
- Security: secrets management and least privilege
|
| 18 |
+
- Consistency: aligns with architecture choices
|
corpus_rag/devops_architect/standards_quickref.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Standards and frameworks (brief summaries)
|
| 2 |
+
|
| 3 |
+
Google SRE Book
|
| 4 |
+
Reliability principles: SLIs/SLOs, error budgets, incident response. Use to set operational targets.
|
| 5 |
+
|
| 6 |
+
12-Factor App
|
| 7 |
+
Guides cloud-native deployment (stateless, config via env). Use to shape deployment and scaling approach.
|
| 8 |
+
|
| 9 |
+
Docker best practices
|
| 10 |
+
Image hygiene, minimal layers, and security hardening. Use for containerized build guidance.
|
corpus_rag/environment_engineer/role_playbook.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Role: Environment Engineer
|
| 2 |
+
Purpose: Define local setup, developer tooling, and environment configuration.
|
| 3 |
+
|
| 4 |
+
Core responsibilities
|
| 5 |
+
- Provide step-by-step local setup and prerequisites.
|
| 6 |
+
- Document environment variables and tooling versions.
|
| 7 |
+
- Ensure developer onboarding is fast and repeatable.
|
| 8 |
+
|
| 9 |
+
Required sections in output
|
| 10 |
+
- Prerequisites and tooling versions
|
| 11 |
+
- Setup steps (install, configure, run)
|
| 12 |
+
- Environment variable reference
|
| 13 |
+
- Troubleshooting tips
|
| 14 |
+
|
| 15 |
+
Quality gates
|
| 16 |
+
- Reproducibility: steps work from clean machine
|
| 17 |
+
- Clarity: copy/paste friendly commands
|
| 18 |
+
- Consistency: matches DevOps and architecture choices
|
corpus_rag/environment_engineer/standards_quickref.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Standards and frameworks (brief summaries)
|
| 2 |
+
|
| 3 |
+
12-Factor App
|
| 4 |
+
Configuration and portability guidance. Use to structure environment variables and secrets.
|
| 5 |
+
|
| 6 |
+
Official tool docs (Node, Python, Docker, Git)
|
| 7 |
+
Use vendor-recommended install paths and versions to avoid inconsistencies.
|
| 8 |
+
|
| 9 |
+
Node/Python best practices
|
| 10 |
+
Use virtual envs, lock files, and deterministic builds. Apply to keep setup stable.
|
corpus_rag/product_owner/role_playbook.txt
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Role: Product Owner
|
| 2 |
+
Purpose: Translate refined project brief into a clear product requirements document (PRD) and prioritized user stories.
|
| 3 |
+
|
| 4 |
+
Core responsibilities
|
| 5 |
+
- Define product goals, scope, success metrics, and non-goals.
|
| 6 |
+
- Produce a PRD with features, user stories, acceptance criteria, and priorities.
|
| 7 |
+
- Resolve ambiguities from the project refiner output.
|
| 8 |
+
|
| 9 |
+
Required sections in output
|
| 10 |
+
- Product vision and target users
|
| 11 |
+
- Problem statement and goals
|
| 12 |
+
- Scope and non-goals
|
| 13 |
+
- Feature list with priority (MVP vs later)
|
| 14 |
+
- User stories with acceptance criteria
|
| 15 |
+
- Constraints, assumptions, and dependencies
|
| 16 |
+
- Success metrics and KPIs
|
| 17 |
+
|
| 18 |
+
Quality gates
|
| 19 |
+
- Completeness: all required sections present
|
| 20 |
+
- Clarity: unambiguous, testable acceptance criteria
|
| 21 |
+
- Feasibility: within constraints and timeline
|
| 22 |
+
- Consistency: aligns with project brief and later phase inputs
|
corpus_rag/product_owner/standards_quickref.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Standards and frameworks (brief summaries)
|
| 2 |
+
|
| 3 |
+
IEEE 29148 (Requirements Engineering)
|
| 4 |
+
Defines best practices for requirements quality: complete, consistent, unambiguous, verifiable, and feasible. Use it to structure the PRD and validate acceptance criteria.
|
| 5 |
+
|
| 6 |
+
User Story Mapping (Jeff Patton)
|
| 7 |
+
Organizes stories along user activities to keep scope visible. Apply to ensure MVP coverage and reveal gaps in flows.
|
| 8 |
+
|
| 9 |
+
Atlassian PRD guidance
|
| 10 |
+
Practical PRD structure: problem, goals, scope, user stories, risks. Use as a template for readable stakeholder documents.
|
corpus_rag/qa_strategist/role_playbook.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Role: QA Strategist
|
| 2 |
+
Purpose: Define test strategy, coverage, and quality assurance approach.
|
| 3 |
+
|
| 4 |
+
Core responsibilities
|
| 5 |
+
- Create test plan across unit, integration, and E2E layers.
|
| 6 |
+
- Define risk-based testing priorities.
|
| 7 |
+
- Provide quality gates and acceptance criteria.
|
| 8 |
+
|
| 9 |
+
Required sections in output
|
| 10 |
+
- Test strategy and pyramid distribution
|
| 11 |
+
- Key test types and tooling
|
| 12 |
+
- Risk areas and regression scope
|
| 13 |
+
- Release quality gates
|
| 14 |
+
|
| 15 |
+
Quality gates
|
| 16 |
+
- Coverage: critical flows fully tested
|
| 17 |
+
- Balance: unit/integration/E2E ratios reasonable
|
| 18 |
+
- Traceability: tests map to requirements
|
corpus_rag/qa_strategist/standards_quickref.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Standards and frameworks (brief summaries)
|
| 2 |
+
|
| 3 |
+
ISO/IEC 25010
|
| 4 |
+
Software quality model (reliability, security, usability, etc.). Use to define quality attributes and test focus.
|
| 5 |
+
|
| 6 |
+
ISTQB Foundation Syllabus
|
| 7 |
+
Standard testing terminology and practices. Use for test design and reporting consistency.
|
| 8 |
+
|
| 9 |
+
Test Pyramid
|
| 10 |
+
Guides proportion of tests (more unit, fewer E2E). Use to keep suite fast and reliable.
|
corpus_rag/security_analyst/role_playbook.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Role: Security Analyst
|
| 2 |
+
Purpose: Identify threats, define security requirements, and recommend mitigations.
|
| 3 |
+
|
| 4 |
+
Core responsibilities
|
| 5 |
+
- Produce threat model and security requirements.
|
| 6 |
+
- Specify authN/authZ approach, data protection, and logging.
|
| 7 |
+
- Highlight OWASP risks and mitigations.
|
| 8 |
+
|
| 9 |
+
Required sections in output
|
| 10 |
+
- Threat model (assets, threats, mitigations)
|
| 11 |
+
- Security requirements and controls
|
| 12 |
+
- Authentication and authorization strategy
|
| 13 |
+
- Data protection and privacy considerations
|
| 14 |
+
|
| 15 |
+
Quality gates
|
| 16 |
+
- Coverage: top web threats addressed
|
| 17 |
+
- Consistency: aligns with architecture and data model
|
| 18 |
+
- Practicality: mitigations are implementable
|
corpus_rag/security_analyst/standards_quickref.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Standards and frameworks (brief summaries)
|
| 2 |
+
|
| 3 |
+
OWASP Top 10
|
| 4 |
+
Top web app security risks (e.g., injection, auth failures). Use as a baseline checklist for threats and mitigations.
|
| 5 |
+
|
| 6 |
+
OWASP ASVS
|
| 7 |
+
Verification standard with security control requirements. Use to derive testable security requirements by level.
|
| 8 |
+
|
| 9 |
+
NIST Cybersecurity Framework
|
| 10 |
+
High-level framework (Identify, Protect, Detect, Respond, Recover). Use to ensure broad security posture.
|
corpus_rag/solution_architect/role_playbook.txt
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Role: Solution Architect
|
| 2 |
+
Purpose: Define system architecture, tech stack decisions, and high-level design patterns.
|
| 3 |
+
|
| 4 |
+
Core responsibilities
|
| 5 |
+
- Choose frontend/backend architecture (SPA/SSR), hosting approach, and integrations.
|
| 6 |
+
- Produce system design (components, data flow, deployment).
|
| 7 |
+
- Capture trade-offs and rationale (ADRs).
|
| 8 |
+
|
| 9 |
+
Required sections in output
|
| 10 |
+
- Architecture overview and constraints
|
| 11 |
+
- Key components and interfaces
|
| 12 |
+
- Tech stack choices with rationale
|
| 13 |
+
- Deployment topology and integration points
|
| 14 |
+
- Non-functional requirements (scalability, reliability)
|
| 15 |
+
|
| 16 |
+
Quality gates
|
| 17 |
+
- Feasibility: implementable with chosen stack
|
| 18 |
+
- Consistency: aligns with security, data, and API design
|
| 19 |
+
- Clarity: diagrams or structured description of components
|
corpus_rag/solution_architect/standards_quickref.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Standards and frameworks (brief summaries)
|
| 2 |
+
|
| 3 |
+
C4 Model
|
| 4 |
+
Simple, layered diagrams (Context, Container, Component, Code). Use to communicate architecture to different audiences.
|
| 5 |
+
|
| 6 |
+
12-Factor App
|
| 7 |
+
Best practices for cloud-native apps: config in env, stateless processes, logs as streams. Use to guide backend design and deployment.
|
| 8 |
+
|
| 9 |
+
AWS Well-Architected Framework
|
| 10 |
+
Five pillars (operational excellence, security, reliability, performance, cost). Use as a checklist for architecture decisions.
|
corpus_rag/technical_writer/role_playbook.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Role: Technical Writer
|
| 2 |
+
Purpose: Create clear, structured documentation using consistent style and information architecture.
|
| 3 |
+
|
| 4 |
+
Core responsibilities
|
| 5 |
+
- Define doc set structure and templates.
|
| 6 |
+
- Standardize tone, terminology, and formatting.
|
| 7 |
+
- Ensure documentation is complete and user-focused.
|
| 8 |
+
|
| 9 |
+
Required sections in output
|
| 10 |
+
- Documentation structure (overview, how-to, reference)
|
| 11 |
+
- Writing style guidelines
|
| 12 |
+
- Template examples
|
| 13 |
+
- Update and maintenance guidance
|
| 14 |
+
|
| 15 |
+
Quality gates
|
| 16 |
+
- Clarity: readable and actionable content
|
| 17 |
+
- Consistency: uniform structure across docs
|
| 18 |
+
- Completeness: covers user journeys and references
|
corpus_rag/technical_writer/standards_quickref.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Standards and frameworks (brief summaries)
|
| 2 |
+
|
| 3 |
+
Google Developer Documentation Style Guide
|
| 4 |
+
Best practices for clear, consistent technical writing. Use to standardize tone and terminology.
|
| 5 |
+
|
| 6 |
+
Microsoft Writing Style Guide
|
| 7 |
+
Guidelines for UI labels, procedures, and accessibility. Use for consistent phrasing.
|
| 8 |
+
|
| 9 |
+
Diátaxis Framework
|
| 10 |
+
Documentation types: tutorials, how-to guides, reference, explanation. Use to structure the doc set.
|
corpus_rag/ux_designer/role_playbook.txt
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Role: UX Designer
|
| 2 |
+
Purpose: Define user flows, interaction patterns, and usability guidelines.
|
| 3 |
+
|
| 4 |
+
Core responsibilities
|
| 5 |
+
- Create user journeys and key screens/components.
|
| 6 |
+
- Ensure accessibility and usability best practices.
|
| 7 |
+
- Align UX with business workflows and data requirements.
|
| 8 |
+
|
| 9 |
+
Required sections in output
|
| 10 |
+
- User personas and primary tasks
|
| 11 |
+
- User flows (happy path and exceptions)
|
| 12 |
+
- Key UI components and layout guidelines
|
| 13 |
+
- Accessibility and usability notes
|
| 14 |
+
|
| 15 |
+
Quality gates
|
| 16 |
+
- Usability: clear flows and minimal friction
|
| 17 |
+
- Accessibility: meets WCAG 2.1 AA basics
|
| 18 |
+
- Consistency: aligns with business processes
|
corpus_rag/ux_designer/standards_quickref.txt
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Standards and frameworks (brief summaries)
|
| 2 |
+
|
| 3 |
+
Nielsen Norman Heuristics
|
| 4 |
+
10 usability principles (visibility, consistency, error prevention). Use to evaluate interaction design.
|
| 5 |
+
|
| 6 |
+
WCAG 2.1
|
| 7 |
+
Accessibility guidelines (perceivable, operable, understandable, robust). Use to ensure keyboard access and contrast.
|
| 8 |
+
|
| 9 |
+
Material Design (web UI patterns)
|
| 10 |
+
UI guidelines for consistency and components. Use as reference for layout and interaction patterns.
|
pyproject.toml
CHANGED
|
@@ -1,5 +1,3 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
[tool.ruff.lint]
|
| 4 |
exclude = ["tests", "tests/*", "*/tests/*", "*/tests.py"]
|
| 5 |
|
|
@@ -39,7 +37,9 @@ dependencies = [
|
|
| 39 |
# Database
|
| 40 |
"sqlalchemy>=2.0.0",
|
| 41 |
"psycopg2-binary>=2.9.9",
|
| 42 |
-
|
|
|
|
|
|
|
| 43 |
"hydra-core>=1.3.2",
|
| 44 |
"lightning>=2.6.0",
|
| 45 |
"fiddle>=0.3.0",
|
|
|
|
|
|
|
|
|
|
| 1 |
[tool.ruff.lint]
|
| 2 |
exclude = ["tests", "tests/*", "*/tests/*", "*/tests.py"]
|
| 3 |
|
|
|
|
| 37 |
# Database
|
| 38 |
"sqlalchemy>=2.0.0",
|
| 39 |
"psycopg2-binary>=2.9.9",
|
| 40 |
+
# MongoDB Vector Store
|
| 41 |
+
"pymongo>=4.6.0",
|
| 42 |
+
"langchain-mongodb>=0.1.0",
|
| 43 |
"hydra-core>=1.3.2",
|
| 44 |
"lightning>=2.6.0",
|
| 45 |
"fiddle>=0.3.0",
|
requirements.txt
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
# This file was autogenerated by uv via the following command:
|
| 2 |
-
# uv pip compile pyproject.toml -
|
| 3 |
absl-py==2.3.1
|
| 4 |
# via fiddle
|
| 5 |
aiohappyeyeballs==2.6.1
|
|
@@ -32,7 +32,6 @@ certifi==2025.11.12
|
|
| 32 |
# via
|
| 33 |
# httpcore
|
| 34 |
# httpx
|
| 35 |
-
# pinecone
|
| 36 |
# requests
|
| 37 |
cffi==2.0.0
|
| 38 |
# via cryptography
|
|
@@ -50,12 +49,14 @@ cryptography==46.0.3
|
|
| 50 |
# via python-jose
|
| 51 |
defusedxml==0.7.1
|
| 52 |
# via fpdf2
|
|
|
|
|
|
|
| 53 |
ecdsa==0.19.1
|
| 54 |
# via python-jose
|
| 55 |
fastapi==0.123.5
|
| 56 |
-
# via
|
| 57 |
fiddle==0.3.0
|
| 58 |
-
# via
|
| 59 |
filelock==3.20.2
|
| 60 |
# via torch
|
| 61 |
filetype==1.2.0
|
|
@@ -63,7 +64,7 @@ filetype==1.2.0
|
|
| 63 |
fonttools==4.61.0
|
| 64 |
# via fpdf2
|
| 65 |
fpdf2==2.8.5
|
| 66 |
-
# via
|
| 67 |
frozenlist==1.8.0
|
| 68 |
# via
|
| 69 |
# aiohttp
|
|
@@ -76,7 +77,7 @@ fsspec==2025.12.0
|
|
| 76 |
google-api-core==2.28.1
|
| 77 |
# via google-api-python-client
|
| 78 |
google-api-python-client==2.187.0
|
| 79 |
-
# via
|
| 80 |
google-auth==2.41.1
|
| 81 |
# via
|
| 82 |
# google-api-core
|
|
@@ -86,7 +87,7 @@ google-auth==2.41.1
|
|
| 86 |
google-auth-httplib2==0.2.1
|
| 87 |
# via google-api-python-client
|
| 88 |
google-auth-oauthlib==1.2.3
|
| 89 |
-
# via
|
| 90 |
googleapis-common-protos==1.72.0
|
| 91 |
# via google-api-core
|
| 92 |
graphviz==0.21
|
|
@@ -109,7 +110,7 @@ httpx==0.28.1
|
|
| 109 |
# langsmith
|
| 110 |
# upstash-redis
|
| 111 |
hydra-core==1.3.2
|
| 112 |
-
# via
|
| 113 |
idna==3.11
|
| 114 |
# via
|
| 115 |
# anyio
|
|
@@ -124,20 +125,34 @@ jsonpatch==1.33
|
|
| 124 |
# via langchain-core
|
| 125 |
jsonpointer==3.0.0
|
| 126 |
# via jsonpatch
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
langchain-core==1.1.0
|
| 128 |
# via
|
| 129 |
-
#
|
|
|
|
|
|
|
|
|
|
| 130 |
# langchain-nvidia-ai-endpoints
|
| 131 |
# langchain-text-splitters
|
| 132 |
# langgraph
|
| 133 |
# langgraph-checkpoint
|
| 134 |
# langgraph-prebuilt
|
|
|
|
|
|
|
| 135 |
langchain-nvidia-ai-endpoints==1.0.0
|
| 136 |
-
# via
|
| 137 |
langchain-text-splitters==1.0.0
|
| 138 |
-
# via
|
|
|
|
|
|
|
|
|
|
| 139 |
langgraph==1.0.4
|
| 140 |
-
# via
|
|
|
|
|
|
|
| 141 |
langgraph-checkpoint==3.0.1
|
| 142 |
# via
|
| 143 |
# langgraph
|
|
@@ -148,12 +163,15 @@ langgraph-sdk==0.2.12
|
|
| 148 |
# via langgraph
|
| 149 |
langsmith==0.4.53
|
| 150 |
# via
|
| 151 |
-
#
|
|
|
|
| 152 |
# langchain-core
|
|
|
|
|
|
|
| 153 |
libcst==1.8.6
|
| 154 |
# via fiddle
|
| 155 |
lightning==2.6.0
|
| 156 |
-
# via
|
| 157 |
lightning-utilities==0.15.2
|
| 158 |
# via
|
| 159 |
# lightning
|
|
@@ -171,7 +189,8 @@ networkx==3.6.1
|
|
| 171 |
# via torch
|
| 172 |
numpy==2.3.5
|
| 173 |
# via
|
| 174 |
-
#
|
|
|
|
| 175 |
# torchmetrics
|
| 176 |
oauthlib==3.3.1
|
| 177 |
# via requests-oauthlib
|
|
@@ -181,7 +200,6 @@ orjson==3.11.4
|
|
| 181 |
# via
|
| 182 |
# langgraph-sdk
|
| 183 |
# langsmith
|
| 184 |
-
# pinecone
|
| 185 |
ormsgpack==1.12.0
|
| 186 |
# via langgraph-checkpoint
|
| 187 |
packaging==24.2
|
|
@@ -191,19 +209,12 @@ packaging==24.2
|
|
| 191 |
# langsmith
|
| 192 |
# lightning
|
| 193 |
# lightning-utilities
|
| 194 |
-
# pinecone-plugin-assistant
|
| 195 |
# pytorch-lightning
|
| 196 |
# torchmetrics
|
| 197 |
passlib==1.7.4
|
| 198 |
-
# via
|
| 199 |
pillow==12.0.0
|
| 200 |
# via fpdf2
|
| 201 |
-
pinecone==8.0.0
|
| 202 |
-
# via ideasprinter-api (pyproject.toml)
|
| 203 |
-
pinecone-plugin-assistant==3.0.1
|
| 204 |
-
# via pinecone
|
| 205 |
-
pinecone-plugin-interface==0.0.7
|
| 206 |
-
# via pinecone
|
| 207 |
propcache==0.4.1
|
| 208 |
# via
|
| 209 |
# aiohttp
|
|
@@ -216,7 +227,7 @@ protobuf==6.33.2
|
|
| 216 |
# googleapis-common-protos
|
| 217 |
# proto-plus
|
| 218 |
psycopg2-binary==2.9.11
|
| 219 |
-
# via
|
| 220 |
pyasn1==0.6.1
|
| 221 |
# via
|
| 222 |
# pyasn1-modules
|
|
@@ -228,37 +239,46 @@ pycparser==2.23
|
|
| 228 |
# via cffi
|
| 229 |
pydantic==2.12.4
|
| 230 |
# via
|
| 231 |
-
#
|
| 232 |
# fastapi
|
|
|
|
|
|
|
| 233 |
# langchain-core
|
| 234 |
# langgraph
|
| 235 |
# langsmith
|
| 236 |
pydantic-core==2.41.5
|
| 237 |
# via pydantic
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 238 |
pyparsing==3.2.5
|
| 239 |
# via httplib2
|
| 240 |
-
python-dateutil==2.9.0.post0
|
| 241 |
-
# via pinecone
|
| 242 |
python-dotenv==1.2.1
|
| 243 |
-
# via
|
| 244 |
python-jose==3.5.0
|
| 245 |
-
# via
|
| 246 |
python-multipart==0.0.20
|
| 247 |
-
# via
|
| 248 |
pytorch-lightning==2.6.0
|
| 249 |
# via lightning
|
| 250 |
pyyaml==6.0.3
|
| 251 |
# via
|
|
|
|
| 252 |
# langchain-core
|
| 253 |
-
# libcst
|
| 254 |
# lightning
|
| 255 |
# omegaconf
|
| 256 |
# pytorch-lightning
|
|
|
|
|
|
|
| 257 |
requests==2.32.5
|
| 258 |
# via
|
| 259 |
# google-api-core
|
|
|
|
| 260 |
# langsmith
|
| 261 |
-
# pinecone-plugin-assistant
|
| 262 |
# requests-oauthlib
|
| 263 |
# requests-toolbelt
|
| 264 |
requests-oauthlib==2.0.0
|
|
@@ -274,11 +294,11 @@ setuptools==80.9.0
|
|
| 274 |
# lightning-utilities
|
| 275 |
# torch
|
| 276 |
six==1.17.0
|
| 277 |
-
# via
|
| 278 |
-
# ecdsa
|
| 279 |
-
# python-dateutil
|
| 280 |
sqlalchemy==2.0.45
|
| 281 |
-
# via
|
|
|
|
|
|
|
| 282 |
starlette==0.50.0
|
| 283 |
# via fastapi
|
| 284 |
sympy==1.14.0
|
|
@@ -300,35 +320,29 @@ tqdm==4.67.1
|
|
| 300 |
# pytorch-lightning
|
| 301 |
typing-extensions==4.15.0
|
| 302 |
# via
|
| 303 |
-
# aiosignal
|
| 304 |
-
# anyio
|
| 305 |
# fastapi
|
| 306 |
# fiddle
|
| 307 |
# langchain-core
|
| 308 |
# lightning
|
| 309 |
# lightning-utilities
|
| 310 |
-
# pinecone
|
| 311 |
# pydantic
|
| 312 |
# pydantic-core
|
| 313 |
# pytorch-lightning
|
| 314 |
# sqlalchemy
|
| 315 |
-
# starlette
|
| 316 |
# torch
|
| 317 |
# typing-inspection
|
| 318 |
typing-inspection==0.4.2
|
| 319 |
# via pydantic
|
| 320 |
upstash-redis==1.5.0
|
| 321 |
-
# via
|
| 322 |
uritemplate==4.2.0
|
| 323 |
# via google-api-python-client
|
| 324 |
urllib3==2.5.0
|
| 325 |
-
# via
|
| 326 |
-
# pinecone
|
| 327 |
-
# requests
|
| 328 |
uuid-utils==0.12.0
|
| 329 |
# via langsmith
|
| 330 |
uvicorn==0.38.0
|
| 331 |
-
# via
|
| 332 |
xxhash==3.6.0
|
| 333 |
# via langgraph
|
| 334 |
yarl==1.22.0
|
|
|
|
| 1 |
# This file was autogenerated by uv via the following command:
|
| 2 |
+
# uv pip compile pyproject.toml --output-file requirements.txt
|
| 3 |
absl-py==2.3.1
|
| 4 |
# via fiddle
|
| 5 |
aiohappyeyeballs==2.6.1
|
|
|
|
| 32 |
# via
|
| 33 |
# httpcore
|
| 34 |
# httpx
|
|
|
|
| 35 |
# requests
|
| 36 |
cffi==2.0.0
|
| 37 |
# via cryptography
|
|
|
|
| 49 |
# via python-jose
|
| 50 |
defusedxml==0.7.1
|
| 51 |
# via fpdf2
|
| 52 |
+
dnspython==2.8.0
|
| 53 |
+
# via pymongo
|
| 54 |
ecdsa==0.19.1
|
| 55 |
# via python-jose
|
| 56 |
fastapi==0.123.5
|
| 57 |
+
# via specs-before-code-api (pyproject.toml)
|
| 58 |
fiddle==0.3.0
|
| 59 |
+
# via specs-before-code-api (pyproject.toml)
|
| 60 |
filelock==3.20.2
|
| 61 |
# via torch
|
| 62 |
filetype==1.2.0
|
|
|
|
| 64 |
fonttools==4.61.0
|
| 65 |
# via fpdf2
|
| 66 |
fpdf2==2.8.5
|
| 67 |
+
# via specs-before-code-api (pyproject.toml)
|
| 68 |
frozenlist==1.8.0
|
| 69 |
# via
|
| 70 |
# aiohttp
|
|
|
|
| 77 |
google-api-core==2.28.1
|
| 78 |
# via google-api-python-client
|
| 79 |
google-api-python-client==2.187.0
|
| 80 |
+
# via specs-before-code-api (pyproject.toml)
|
| 81 |
google-auth==2.41.1
|
| 82 |
# via
|
| 83 |
# google-api-core
|
|
|
|
| 87 |
google-auth-httplib2==0.2.1
|
| 88 |
# via google-api-python-client
|
| 89 |
google-auth-oauthlib==1.2.3
|
| 90 |
+
# via specs-before-code-api (pyproject.toml)
|
| 91 |
googleapis-common-protos==1.72.0
|
| 92 |
# via google-api-core
|
| 93 |
graphviz==0.21
|
|
|
|
| 110 |
# langsmith
|
| 111 |
# upstash-redis
|
| 112 |
hydra-core==1.3.2
|
| 113 |
+
# via specs-before-code-api (pyproject.toml)
|
| 114 |
idna==3.11
|
| 115 |
# via
|
| 116 |
# anyio
|
|
|
|
| 125 |
# via langchain-core
|
| 126 |
jsonpointer==3.0.0
|
| 127 |
# via jsonpatch
|
| 128 |
+
langchain==1.1.2
|
| 129 |
+
# via langchain-mongodb
|
| 130 |
+
langchain-classic==1.0.0
|
| 131 |
+
# via langchain-mongodb
|
| 132 |
langchain-core==1.1.0
|
| 133 |
# via
|
| 134 |
+
# specs-before-code-api (pyproject.toml)
|
| 135 |
+
# langchain
|
| 136 |
+
# langchain-classic
|
| 137 |
+
# langchain-mongodb
|
| 138 |
# langchain-nvidia-ai-endpoints
|
| 139 |
# langchain-text-splitters
|
| 140 |
# langgraph
|
| 141 |
# langgraph-checkpoint
|
| 142 |
# langgraph-prebuilt
|
| 143 |
+
langchain-mongodb==0.9.0
|
| 144 |
+
# via specs-before-code-api (pyproject.toml)
|
| 145 |
langchain-nvidia-ai-endpoints==1.0.0
|
| 146 |
+
# via specs-before-code-api (pyproject.toml)
|
| 147 |
langchain-text-splitters==1.0.0
|
| 148 |
+
# via
|
| 149 |
+
# specs-before-code-api (pyproject.toml)
|
| 150 |
+
# langchain-classic
|
| 151 |
+
# langchain-mongodb
|
| 152 |
langgraph==1.0.4
|
| 153 |
+
# via
|
| 154 |
+
# specs-before-code-api (pyproject.toml)
|
| 155 |
+
# langchain
|
| 156 |
langgraph-checkpoint==3.0.1
|
| 157 |
# via
|
| 158 |
# langgraph
|
|
|
|
| 163 |
# via langgraph
|
| 164 |
langsmith==0.4.53
|
| 165 |
# via
|
| 166 |
+
# specs-before-code-api (pyproject.toml)
|
| 167 |
+
# langchain-classic
|
| 168 |
# langchain-core
|
| 169 |
+
lark==1.3.1
|
| 170 |
+
# via langchain-mongodb
|
| 171 |
libcst==1.8.6
|
| 172 |
# via fiddle
|
| 173 |
lightning==2.6.0
|
| 174 |
+
# via specs-before-code-api (pyproject.toml)
|
| 175 |
lightning-utilities==0.15.2
|
| 176 |
# via
|
| 177 |
# lightning
|
|
|
|
| 189 |
# via torch
|
| 190 |
numpy==2.3.5
|
| 191 |
# via
|
| 192 |
+
# specs-before-code-api (pyproject.toml)
|
| 193 |
+
# langchain-mongodb
|
| 194 |
# torchmetrics
|
| 195 |
oauthlib==3.3.1
|
| 196 |
# via requests-oauthlib
|
|
|
|
| 200 |
# via
|
| 201 |
# langgraph-sdk
|
| 202 |
# langsmith
|
|
|
|
| 203 |
ormsgpack==1.12.0
|
| 204 |
# via langgraph-checkpoint
|
| 205 |
packaging==24.2
|
|
|
|
| 209 |
# langsmith
|
| 210 |
# lightning
|
| 211 |
# lightning-utilities
|
|
|
|
| 212 |
# pytorch-lightning
|
| 213 |
# torchmetrics
|
| 214 |
passlib==1.7.4
|
| 215 |
+
# via specs-before-code-api (pyproject.toml)
|
| 216 |
pillow==12.0.0
|
| 217 |
# via fpdf2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
propcache==0.4.1
|
| 219 |
# via
|
| 220 |
# aiohttp
|
|
|
|
| 227 |
# googleapis-common-protos
|
| 228 |
# proto-plus
|
| 229 |
psycopg2-binary==2.9.11
|
| 230 |
+
# via specs-before-code-api (pyproject.toml)
|
| 231 |
pyasn1==0.6.1
|
| 232 |
# via
|
| 233 |
# pyasn1-modules
|
|
|
|
| 239 |
# via cffi
|
| 240 |
pydantic==2.12.4
|
| 241 |
# via
|
| 242 |
+
# specs-before-code-api (pyproject.toml)
|
| 243 |
# fastapi
|
| 244 |
+
# langchain
|
| 245 |
+
# langchain-classic
|
| 246 |
# langchain-core
|
| 247 |
# langgraph
|
| 248 |
# langsmith
|
| 249 |
pydantic-core==2.41.5
|
| 250 |
# via pydantic
|
| 251 |
+
pymongo==4.16.0
|
| 252 |
+
# via
|
| 253 |
+
# specs-before-code-api (pyproject.toml)
|
| 254 |
+
# langchain-mongodb
|
| 255 |
+
# pymongo-search-utils
|
| 256 |
+
pymongo-search-utils==0.3.0
|
| 257 |
+
# via langchain-mongodb
|
| 258 |
pyparsing==3.2.5
|
| 259 |
# via httplib2
|
|
|
|
|
|
|
| 260 |
python-dotenv==1.2.1
|
| 261 |
+
# via specs-before-code-api (pyproject.toml)
|
| 262 |
python-jose==3.5.0
|
| 263 |
+
# via specs-before-code-api (pyproject.toml)
|
| 264 |
python-multipart==0.0.20
|
| 265 |
+
# via specs-before-code-api (pyproject.toml)
|
| 266 |
pytorch-lightning==2.6.0
|
| 267 |
# via lightning
|
| 268 |
pyyaml==6.0.3
|
| 269 |
# via
|
| 270 |
+
# langchain-classic
|
| 271 |
# langchain-core
|
|
|
|
| 272 |
# lightning
|
| 273 |
# omegaconf
|
| 274 |
# pytorch-lightning
|
| 275 |
+
pyyaml-ft==8.0.0
|
| 276 |
+
# via libcst
|
| 277 |
requests==2.32.5
|
| 278 |
# via
|
| 279 |
# google-api-core
|
| 280 |
+
# langchain-classic
|
| 281 |
# langsmith
|
|
|
|
| 282 |
# requests-oauthlib
|
| 283 |
# requests-toolbelt
|
| 284 |
requests-oauthlib==2.0.0
|
|
|
|
| 294 |
# lightning-utilities
|
| 295 |
# torch
|
| 296 |
six==1.17.0
|
| 297 |
+
# via ecdsa
|
|
|
|
|
|
|
| 298 |
sqlalchemy==2.0.45
|
| 299 |
+
# via
|
| 300 |
+
# specs-before-code-api (pyproject.toml)
|
| 301 |
+
# langchain-classic
|
| 302 |
starlette==0.50.0
|
| 303 |
# via fastapi
|
| 304 |
sympy==1.14.0
|
|
|
|
| 320 |
# pytorch-lightning
|
| 321 |
typing-extensions==4.15.0
|
| 322 |
# via
|
|
|
|
|
|
|
| 323 |
# fastapi
|
| 324 |
# fiddle
|
| 325 |
# langchain-core
|
| 326 |
# lightning
|
| 327 |
# lightning-utilities
|
|
|
|
| 328 |
# pydantic
|
| 329 |
# pydantic-core
|
| 330 |
# pytorch-lightning
|
| 331 |
# sqlalchemy
|
|
|
|
| 332 |
# torch
|
| 333 |
# typing-inspection
|
| 334 |
typing-inspection==0.4.2
|
| 335 |
# via pydantic
|
| 336 |
upstash-redis==1.5.0
|
| 337 |
+
# via specs-before-code-api (pyproject.toml)
|
| 338 |
uritemplate==4.2.0
|
| 339 |
# via google-api-python-client
|
| 340 |
urllib3==2.5.0
|
| 341 |
+
# via requests
|
|
|
|
|
|
|
| 342 |
uuid-utils==0.12.0
|
| 343 |
# via langsmith
|
| 344 |
uvicorn==0.38.0
|
| 345 |
+
# via specs-before-code-api (pyproject.toml)
|
| 346 |
xxhash==3.6.0
|
| 347 |
# via langgraph
|
| 348 |
yarl==1.22.0
|
scripts/seed_rag_data.py
ADDED
|
@@ -0,0 +1,316 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Seed MongoDB collections with example documents for each agent role.
|
| 4 |
+
|
| 5 |
+
This script:
|
| 6 |
+
1. Reads documents from corpus_rag/<agent_role>/ directories
|
| 7 |
+
2. Splits them into chunks
|
| 8 |
+
3. Embeds and stores them in the appropriate MongoDB collection
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
Usage:
|
| 12 |
+
uv run python scripts/seed_rag_data.py # Seed all collections
|
| 13 |
+
uv run python scripts/seed_rag_data.py --role product_owner # Seed specific role
|
| 14 |
+
uv run python scripts/seed_rag_data.py --dry-run # Preview without inserting
|
| 15 |
+
|
| 16 |
+
Environment Variables:
|
| 17 |
+
MONGODB_URI - MongoDB connection string
|
| 18 |
+
MONGODB_DATABASE - Database name (default: specs_before_code)
|
| 19 |
+
NVIDIA_API_KEY - Required for generating embeddings
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
import argparse
|
| 23 |
+
import asyncio
|
| 24 |
+
import os
|
| 25 |
+
import sys
|
| 26 |
+
from pathlib import Path
|
| 27 |
+
|
| 28 |
+
# Add parent directory to path for imports
|
| 29 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 30 |
+
|
| 31 |
+
from dotenv import load_dotenv
|
| 32 |
+
|
| 33 |
+
load_dotenv()
|
| 34 |
+
|
| 35 |
+
# Mapping from TeamRole enum values to directory names
|
| 36 |
+
ROLE_DIRECTORIES = {
|
| 37 |
+
"product_owner": "product_owner",
|
| 38 |
+
"business_analyst": "business_analyst",
|
| 39 |
+
"solution_architect": "solution_architect",
|
| 40 |
+
"data_architect": "data_architect",
|
| 41 |
+
"security_analyst": "security_analyst",
|
| 42 |
+
"ux_designer": "ux_designer",
|
| 43 |
+
"api_designer": "api_designer",
|
| 44 |
+
"qa_strategist": "qa_strategist",
|
| 45 |
+
"devops_architect": "devops_architect",
|
| 46 |
+
"environment_engineer": "environment_engineer",
|
| 47 |
+
"technical_writer": "technical_writer",
|
| 48 |
+
}
|
| 49 |
+
|
| 50 |
+
# Base directory for corpus files
|
| 51 |
+
CORPUS_DIR = Path(__file__).parent.parent / "corpus_rag"
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
async def seed_collection(
|
| 55 |
+
role_name: str,
|
| 56 |
+
directory_name: str,
|
| 57 |
+
dry_run: bool = False,
|
| 58 |
+
) -> dict:
|
| 59 |
+
"""
|
| 60 |
+
Seed a single agent's collection with documents from directory.
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
role_name: The TeamRole enum value (e.g., "product_owner")
|
| 64 |
+
directory_name: The subdirectory under corpus_rag/
|
| 65 |
+
dry_run: If True, only count documents without inserting
|
| 66 |
+
|
| 67 |
+
Returns:
|
| 68 |
+
Dict with stats: chunks_found, chunks_inserted, errors
|
| 69 |
+
"""
|
| 70 |
+
from langchain_core.documents import Document
|
| 71 |
+
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 72 |
+
|
| 73 |
+
from app.core.mongodb_rag import get_mongodb_rag_service
|
| 74 |
+
from app.core.schemas import TeamRole
|
| 75 |
+
|
| 76 |
+
result = {
|
| 77 |
+
"role": role_name,
|
| 78 |
+
"files_found": 0,
|
| 79 |
+
"chunks_found": 0,
|
| 80 |
+
"chunks_inserted": 0,
|
| 81 |
+
"errors": [],
|
| 82 |
+
}
|
| 83 |
+
|
| 84 |
+
# Get the TeamRole enum value
|
| 85 |
+
try:
|
| 86 |
+
role = TeamRole(role_name)
|
| 87 |
+
except ValueError:
|
| 88 |
+
result["errors"].append(f"Invalid role: {role_name}")
|
| 89 |
+
return result
|
| 90 |
+
|
| 91 |
+
# Check directory
|
| 92 |
+
dir_path = CORPUS_DIR / directory_name
|
| 93 |
+
if not dir_path.exists():
|
| 94 |
+
result["errors"].append(f"Directory not found: {dir_path}")
|
| 95 |
+
return result
|
| 96 |
+
|
| 97 |
+
# Configure text splitter
|
| 98 |
+
splitter = RecursiveCharacterTextSplitter(
|
| 99 |
+
chunk_size=1000,
|
| 100 |
+
chunk_overlap=200,
|
| 101 |
+
add_start_index=True,
|
| 102 |
+
separators=["\n\n", "\n", ". ", " ", ""],
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
# Find and process documents
|
| 106 |
+
documents = []
|
| 107 |
+
supported_extensions = [".md", ".txt", ".yaml", ".yml"]
|
| 108 |
+
|
| 109 |
+
for file_path in dir_path.glob("**/*"):
|
| 110 |
+
if file_path.suffix.lower() not in supported_extensions:
|
| 111 |
+
continue
|
| 112 |
+
if file_path.is_dir():
|
| 113 |
+
continue
|
| 114 |
+
|
| 115 |
+
result["files_found"] += 1
|
| 116 |
+
|
| 117 |
+
try:
|
| 118 |
+
content = file_path.read_text(encoding="utf-8")
|
| 119 |
+
if not content.strip():
|
| 120 |
+
continue
|
| 121 |
+
|
| 122 |
+
# Split into chunks
|
| 123 |
+
chunks = splitter.split_text(content)
|
| 124 |
+
|
| 125 |
+
for i, chunk in enumerate(chunks):
|
| 126 |
+
documents.append(
|
| 127 |
+
Document(
|
| 128 |
+
page_content=chunk,
|
| 129 |
+
metadata={
|
| 130 |
+
"source": file_path.name,
|
| 131 |
+
"chunk_index": i,
|
| 132 |
+
"total_chunks": len(chunks),
|
| 133 |
+
"role": role_name,
|
| 134 |
+
"file_path": str(file_path.relative_to(CORPUS_DIR)),
|
| 135 |
+
},
|
| 136 |
+
)
|
| 137 |
+
)
|
| 138 |
+
except Exception as e:
|
| 139 |
+
result["errors"].append(f"Error reading {file_path.name}: {e}")
|
| 140 |
+
|
| 141 |
+
result["chunks_found"] = len(documents)
|
| 142 |
+
|
| 143 |
+
if not documents:
|
| 144 |
+
result["errors"].append("No documents found to seed")
|
| 145 |
+
return result
|
| 146 |
+
|
| 147 |
+
# Dry run - just report what would be done
|
| 148 |
+
if dry_run:
|
| 149 |
+
print(f" [DRY RUN] Would insert {len(documents)} chunks")
|
| 150 |
+
return result
|
| 151 |
+
|
| 152 |
+
# Insert documents
|
| 153 |
+
try:
|
| 154 |
+
rag_service = get_mongodb_rag_service()
|
| 155 |
+
if not rag_service.is_available():
|
| 156 |
+
result["errors"].append("MongoDB not available")
|
| 157 |
+
return result
|
| 158 |
+
|
| 159 |
+
ids = await rag_service.add_documents(documents, role)
|
| 160 |
+
result["chunks_inserted"] = len(ids)
|
| 161 |
+
except Exception as e:
|
| 162 |
+
result["errors"].append(f"Error inserting documents: {e}")
|
| 163 |
+
|
| 164 |
+
return result
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
async def seed_all(
|
| 168 |
+
roles: list[str] | None = None,
|
| 169 |
+
dry_run: bool = False,
|
| 170 |
+
) -> None:
|
| 171 |
+
"""
|
| 172 |
+
Seed all (or specified) agent collections.
|
| 173 |
+
|
| 174 |
+
Args:
|
| 175 |
+
roles: List of role names to seed, or None for all
|
| 176 |
+
dry_run: If True, only count documents without inserting
|
| 177 |
+
"""
|
| 178 |
+
if roles:
|
| 179 |
+
# Filter to only requested roles
|
| 180 |
+
to_seed = {k: v for k, v in ROLE_DIRECTORIES.items() if k in roles}
|
| 181 |
+
if not to_seed:
|
| 182 |
+
print(f"Error: No valid roles in {roles}")
|
| 183 |
+
print(f"Valid roles: {list(ROLE_DIRECTORIES.keys())}")
|
| 184 |
+
return
|
| 185 |
+
else:
|
| 186 |
+
to_seed = ROLE_DIRECTORIES
|
| 187 |
+
|
| 188 |
+
print(f"Seeding RAG collections{' [DRY RUN]' if dry_run else ''}...")
|
| 189 |
+
print(f"Corpus directory: {CORPUS_DIR}")
|
| 190 |
+
print()
|
| 191 |
+
|
| 192 |
+
total_chunks = 0
|
| 193 |
+
total_inserted = 0
|
| 194 |
+
total_errors = 0
|
| 195 |
+
|
| 196 |
+
for role_name, directory in to_seed.items():
|
| 197 |
+
print(f"Processing: {role_name}")
|
| 198 |
+
|
| 199 |
+
result = await seed_collection(role_name, directory, dry_run)
|
| 200 |
+
|
| 201 |
+
total_chunks += result["chunks_found"]
|
| 202 |
+
total_inserted += result["chunks_inserted"]
|
| 203 |
+
|
| 204 |
+
if result["errors"]:
|
| 205 |
+
total_errors += len(result["errors"])
|
| 206 |
+
for err in result["errors"]:
|
| 207 |
+
print(f" {err}")
|
| 208 |
+
elif result["chunks_inserted"] > 0:
|
| 209 |
+
print(
|
| 210 |
+
f" Inserted {result['chunks_inserted']} chunks from {result['files_found']} files"
|
| 211 |
+
)
|
| 212 |
+
elif result["chunks_found"] > 0 and dry_run:
|
| 213 |
+
print(
|
| 214 |
+
f" - Found {result['chunks_found']} chunks from {result['files_found']} files"
|
| 215 |
+
)
|
| 216 |
+
else:
|
| 217 |
+
print(f" - No documents found in corpus_rag/{directory}/")
|
| 218 |
+
|
| 219 |
+
# Summary
|
| 220 |
+
print("\n" + "=" * 50)
|
| 221 |
+
print("SEEDING COMPLETE")
|
| 222 |
+
print("=" * 50)
|
| 223 |
+
print(f"Total chunks found: {total_chunks}")
|
| 224 |
+
if not dry_run:
|
| 225 |
+
print(f"Total chunks inserted: {total_inserted}")
|
| 226 |
+
if total_errors:
|
| 227 |
+
print(f"Total errors: {total_errors}")
|
| 228 |
+
print()
|
| 229 |
+
|
| 230 |
+
|
| 231 |
+
def create_corpus_directories() -> None:
|
| 232 |
+
"""Create the corpus_rag directory structure."""
|
| 233 |
+
print(f"Creating corpus directories in: {CORPUS_DIR}")
|
| 234 |
+
print()
|
| 235 |
+
|
| 236 |
+
CORPUS_DIR.mkdir(exist_ok=True)
|
| 237 |
+
|
| 238 |
+
for role_name, directory in ROLE_DIRECTORIES.items():
|
| 239 |
+
dir_path = CORPUS_DIR / directory
|
| 240 |
+
dir_path.mkdir(exist_ok=True)
|
| 241 |
+
|
| 242 |
+
# Create a README placeholder
|
| 243 |
+
readme_path = dir_path / "README.md"
|
| 244 |
+
if not readme_path.exists():
|
| 245 |
+
readme_path.write_text(
|
| 246 |
+
f"# {role_name.replace('_', ' ').title()} Examples\n\n"
|
| 247 |
+
f"Place example documents for the {role_name} agent here.\n\n"
|
| 248 |
+
f"## Supported Formats\n"
|
| 249 |
+
f"- `.md` (Markdown)\n"
|
| 250 |
+
f"- `.txt` (Plain text)\n"
|
| 251 |
+
f"- `.yaml` / `.yml` (YAML)\n\n"
|
| 252 |
+
f"## Content Guidelines\n"
|
| 253 |
+
f"Add high-quality examples that the {role_name} agent can learn from.\n",
|
| 254 |
+
encoding="utf-8",
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
print(f" {directory}/")
|
| 258 |
+
|
| 259 |
+
print("\nDone! Add your example documents to the directories above.")
|
| 260 |
+
|
| 261 |
+
|
| 262 |
+
def main():
|
| 263 |
+
parser = argparse.ArgumentParser(
|
| 264 |
+
description="Seed MongoDB RAG collections with example documents"
|
| 265 |
+
)
|
| 266 |
+
parser.add_argument(
|
| 267 |
+
"--role",
|
| 268 |
+
type=str,
|
| 269 |
+
help="Specific role to seed (e.g., product_owner)",
|
| 270 |
+
)
|
| 271 |
+
parser.add_argument(
|
| 272 |
+
"--dry-run",
|
| 273 |
+
action="store_true",
|
| 274 |
+
help="Preview what would be seeded without inserting",
|
| 275 |
+
)
|
| 276 |
+
parser.add_argument(
|
| 277 |
+
"--create-dirs",
|
| 278 |
+
action="store_true",
|
| 279 |
+
help="Create corpus_rag directory structure",
|
| 280 |
+
)
|
| 281 |
+
parser.add_argument(
|
| 282 |
+
"--list-roles",
|
| 283 |
+
action="store_true",
|
| 284 |
+
help="List available role names",
|
| 285 |
+
)
|
| 286 |
+
|
| 287 |
+
args = parser.parse_args()
|
| 288 |
+
|
| 289 |
+
if args.list_roles:
|
| 290 |
+
print("Available roles:")
|
| 291 |
+
for role in ROLE_DIRECTORIES:
|
| 292 |
+
print(f" - {role}")
|
| 293 |
+
return
|
| 294 |
+
|
| 295 |
+
if args.create_dirs:
|
| 296 |
+
create_corpus_directories()
|
| 297 |
+
return
|
| 298 |
+
|
| 299 |
+
# Check MongoDB URI
|
| 300 |
+
if not os.getenv("MONGODB_URI") and not args.dry_run:
|
| 301 |
+
print("Error: MONGODB_URI environment variable not set")
|
| 302 |
+
print("Set it in your .env file or use --dry-run to preview")
|
| 303 |
+
sys.exit(1)
|
| 304 |
+
|
| 305 |
+
# Check NVIDIA API key (needed for embeddings)
|
| 306 |
+
if not os.getenv("NVIDIA_API_KEY") and not args.dry_run:
|
| 307 |
+
print("Error: NVIDIA_API_KEY environment variable not set")
|
| 308 |
+
print("Required for generating embeddings")
|
| 309 |
+
sys.exit(1)
|
| 310 |
+
|
| 311 |
+
roles = [args.role] if args.role else None
|
| 312 |
+
asyncio.run(seed_all(roles=roles, dry_run=args.dry_run))
|
| 313 |
+
|
| 314 |
+
|
| 315 |
+
if __name__ == "__main__":
|
| 316 |
+
main()
|
scripts/setup_mongodb_indexes.py
ADDED
|
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Setup MongoDB Atlas Vector Search indexes for all agent RAG collections.
|
| 4 |
+
|
| 5 |
+
This script creates:
|
| 6 |
+
1. Collections for each agent role (if they don't exist)
|
| 7 |
+
2. Vector search indexes on each collection
|
| 8 |
+
|
| 9 |
+
Prerequisites:
|
| 10 |
+
- MongoDB Atlas M10+ cluster (Vector Search requires dedicated cluster)
|
| 11 |
+
- MONGODB_URI environment variable set
|
| 12 |
+
- Database user with dbAdmin permissions
|
| 13 |
+
|
| 14 |
+
Usage:
|
| 15 |
+
uv run python scripts/setup_mongodb_indexes.py
|
| 16 |
+
|
| 17 |
+
Environment Variables:
|
| 18 |
+
MONGODB_URI - MongoDB connection string
|
| 19 |
+
MONGODB_DATABASE - Database name (default: specs_before_code)
|
| 20 |
+
MONGODB_INDEX_NAME - Vector index name (default: vector_index)
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
import os
|
| 24 |
+
import sys
|
| 25 |
+
from pathlib import Path
|
| 26 |
+
|
| 27 |
+
# Add parent directory to path for imports
|
| 28 |
+
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 29 |
+
|
| 30 |
+
from dotenv import load_dotenv
|
| 31 |
+
|
| 32 |
+
load_dotenv()
|
| 33 |
+
|
| 34 |
+
# Collection names for all agent roles with RAG
|
| 35 |
+
RAG_COLLECTIONS = [
|
| 36 |
+
"rag_product_owner",
|
| 37 |
+
"rag_business_analyst",
|
| 38 |
+
"rag_solution_architect",
|
| 39 |
+
"rag_data_architect",
|
| 40 |
+
"rag_security_analyst",
|
| 41 |
+
"rag_ux_designer",
|
| 42 |
+
"rag_api_designer",
|
| 43 |
+
"rag_qa_strategist",
|
| 44 |
+
"rag_devops_architect",
|
| 45 |
+
"rag_environment_engineer",
|
| 46 |
+
"rag_technical_writer",
|
| 47 |
+
]
|
| 48 |
+
|
| 49 |
+
# Vector search index definition
|
| 50 |
+
# Configured for nvidia/nv-embed-v1 which outputs 4096-dimensional embeddings
|
| 51 |
+
VECTOR_INDEX_DEFINITION = {
|
| 52 |
+
"fields": [
|
| 53 |
+
{
|
| 54 |
+
"type": "vector",
|
| 55 |
+
"path": "embedding",
|
| 56 |
+
"numDimensions": 4096, # nv-embed-v1 output dimension
|
| 57 |
+
"similarity": "cosine",
|
| 58 |
+
},
|
| 59 |
+
# Filter fields for metadata queries
|
| 60 |
+
{
|
| 61 |
+
"type": "filter",
|
| 62 |
+
"path": "metadata.source",
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"type": "filter",
|
| 66 |
+
"path": "metadata.role",
|
| 67 |
+
},
|
| 68 |
+
]
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def setup_indexes() -> None:
|
| 73 |
+
"""Create vector search indexes for all agent collections."""
|
| 74 |
+
try:
|
| 75 |
+
from pymongo import MongoClient
|
| 76 |
+
from pymongo.operations import SearchIndexModel
|
| 77 |
+
except ImportError:
|
| 78 |
+
print("Error: pymongo not installed. Run: uv add pymongo")
|
| 79 |
+
sys.exit(1)
|
| 80 |
+
|
| 81 |
+
# Get configuration
|
| 82 |
+
uri = os.getenv("MONGODB_URI")
|
| 83 |
+
db_name = os.getenv("MONGODB_DATABASE", "specs_before_code")
|
| 84 |
+
index_name = os.getenv("MONGODB_INDEX_NAME", "vector_index")
|
| 85 |
+
|
| 86 |
+
if not uri:
|
| 87 |
+
print("Error: MONGODB_URI environment variable not set")
|
| 88 |
+
print("\nSet it in your .env file:")
|
| 89 |
+
print('MONGODB_URI="mongodb+srv://<user>:<pass>@<cluster>.mongodb.net/"')
|
| 90 |
+
sys.exit(1)
|
| 91 |
+
|
| 92 |
+
print("Connecting to MongoDB Atlas...")
|
| 93 |
+
print(f"Database: {db_name}")
|
| 94 |
+
print(f"Index name: {index_name}")
|
| 95 |
+
print()
|
| 96 |
+
|
| 97 |
+
try:
|
| 98 |
+
client = MongoClient(uri, serverSelectionTimeoutMS=5000)
|
| 99 |
+
# Test connection
|
| 100 |
+
client.admin.command("ping")
|
| 101 |
+
print("Connected successfully!\n")
|
| 102 |
+
except Exception as e:
|
| 103 |
+
print(f"Error: Failed to connect to MongoDB: {e}")
|
| 104 |
+
sys.exit(1)
|
| 105 |
+
|
| 106 |
+
db = client[db_name]
|
| 107 |
+
existing_collections = set(db.list_collection_names())
|
| 108 |
+
|
| 109 |
+
created_collections = []
|
| 110 |
+
created_indexes = []
|
| 111 |
+
existing_indexes = []
|
| 112 |
+
errors = []
|
| 113 |
+
|
| 114 |
+
for collection_name in RAG_COLLECTIONS:
|
| 115 |
+
print(f"Setting up: {collection_name}")
|
| 116 |
+
|
| 117 |
+
# Create collection if it doesn't exist
|
| 118 |
+
if collection_name not in existing_collections:
|
| 119 |
+
try:
|
| 120 |
+
db.create_collection(collection_name)
|
| 121 |
+
created_collections.append(collection_name)
|
| 122 |
+
print(" Created collection")
|
| 123 |
+
except Exception as e:
|
| 124 |
+
errors.append(f" Failed to create collection: {e}")
|
| 125 |
+
print(errors[-1])
|
| 126 |
+
continue
|
| 127 |
+
else:
|
| 128 |
+
print(" - Collection exists")
|
| 129 |
+
|
| 130 |
+
collection = db[collection_name]
|
| 131 |
+
|
| 132 |
+
# Check if index already exists
|
| 133 |
+
try:
|
| 134 |
+
existing = list(collection.list_search_indexes())
|
| 135 |
+
index_exists = any(idx.get("name") == index_name for idx in existing)
|
| 136 |
+
|
| 137 |
+
if index_exists:
|
| 138 |
+
print(f" - Index '{index_name}' already exists")
|
| 139 |
+
existing_indexes.append(collection_name)
|
| 140 |
+
continue
|
| 141 |
+
except Exception:
|
| 142 |
+
# list_search_indexes might fail if no indexes exist
|
| 143 |
+
pass
|
| 144 |
+
|
| 145 |
+
# Create vector search index
|
| 146 |
+
try:
|
| 147 |
+
search_index = SearchIndexModel(
|
| 148 |
+
definition=VECTOR_INDEX_DEFINITION,
|
| 149 |
+
name=index_name,
|
| 150 |
+
type="vectorSearch",
|
| 151 |
+
)
|
| 152 |
+
collection.create_search_index(model=search_index)
|
| 153 |
+
created_indexes.append(collection_name)
|
| 154 |
+
print(f" Created vector index '{index_name}'")
|
| 155 |
+
except Exception as e:
|
| 156 |
+
error_msg = str(e).lower()
|
| 157 |
+
if "already exists" in error_msg:
|
| 158 |
+
print(f" - Index '{index_name}' already exists")
|
| 159 |
+
existing_indexes.append(collection_name)
|
| 160 |
+
else:
|
| 161 |
+
errors.append(f" Failed to create index: {e}")
|
| 162 |
+
print(errors[-1])
|
| 163 |
+
|
| 164 |
+
# Summary
|
| 165 |
+
print("\n" + "=" * 50)
|
| 166 |
+
print("SETUP COMPLETE")
|
| 167 |
+
print("=" * 50)
|
| 168 |
+
|
| 169 |
+
if created_collections:
|
| 170 |
+
print(f"\nCollections created ({len(created_collections)}):")
|
| 171 |
+
for name in created_collections:
|
| 172 |
+
print(f" - {name}")
|
| 173 |
+
|
| 174 |
+
if created_indexes:
|
| 175 |
+
print(f"\nIndexes created ({len(created_indexes)}):")
|
| 176 |
+
for name in created_indexes:
|
| 177 |
+
print(f" - {name}")
|
| 178 |
+
print("\nNote: Vector search indexes take 1-2 minutes to become active.")
|
| 179 |
+
print("Check status in Atlas UI: Database > Atlas Search")
|
| 180 |
+
|
| 181 |
+
if existing_indexes:
|
| 182 |
+
print(f"\nExisting indexes ({len(existing_indexes)}):")
|
| 183 |
+
for name in existing_indexes:
|
| 184 |
+
print(f" - {name}")
|
| 185 |
+
|
| 186 |
+
if errors:
|
| 187 |
+
print(f"\nErrors ({len(errors)}):")
|
| 188 |
+
for err in errors:
|
| 189 |
+
print(err)
|
| 190 |
+
|
| 191 |
+
print()
|
| 192 |
+
client.close()
|
| 193 |
+
|
| 194 |
+
|
| 195 |
+
def check_index_status() -> None:
|
| 196 |
+
"""Check the status of vector search indexes."""
|
| 197 |
+
try:
|
| 198 |
+
from pymongo import MongoClient
|
| 199 |
+
except ImportError:
|
| 200 |
+
print("Error: pymongo not installed")
|
| 201 |
+
return
|
| 202 |
+
|
| 203 |
+
uri = os.getenv("MONGODB_URI")
|
| 204 |
+
db_name = os.getenv("MONGODB_DATABASE", "specs_before_code")
|
| 205 |
+
index_name = os.getenv("MONGODB_INDEX_NAME", "vector_index")
|
| 206 |
+
|
| 207 |
+
if not uri:
|
| 208 |
+
print("Error: MONGODB_URI not set")
|
| 209 |
+
return
|
| 210 |
+
|
| 211 |
+
client = MongoClient(uri)
|
| 212 |
+
db = client[db_name]
|
| 213 |
+
|
| 214 |
+
print(f"Checking index status in {db_name}...\n")
|
| 215 |
+
|
| 216 |
+
for collection_name in RAG_COLLECTIONS:
|
| 217 |
+
collection = db[collection_name]
|
| 218 |
+
try:
|
| 219 |
+
indexes = list(collection.list_search_indexes())
|
| 220 |
+
for idx in indexes:
|
| 221 |
+
if idx.get("name") == index_name:
|
| 222 |
+
status = idx.get("status", "unknown")
|
| 223 |
+
print(f"{collection_name}: {status}")
|
| 224 |
+
break
|
| 225 |
+
else:
|
| 226 |
+
print(f"{collection_name}: no index")
|
| 227 |
+
except Exception as e:
|
| 228 |
+
print(f"{collection_name}: error - {e}")
|
| 229 |
+
|
| 230 |
+
client.close()
|
| 231 |
+
|
| 232 |
+
|
| 233 |
+
if __name__ == "__main__":
|
| 234 |
+
if len(sys.argv) > 1 and sys.argv[1] == "--status":
|
| 235 |
+
check_index_status()
|
| 236 |
+
else:
|
| 237 |
+
setup_indexes()
|
uv.lock
CHANGED
|
@@ -568,6 +568,15 @@ wheels = [
|
|
| 568 |
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
|
| 569 |
]
|
| 570 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 571 |
[[package]]
|
| 572 |
name = "ecdsa"
|
| 573 |
version = "0.19.1"
|
|
@@ -1032,6 +1041,38 @@ wheels = [
|
|
| 1032 |
{ url = "https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595, upload-time = "2024-06-10T19:24:40.698Z" },
|
| 1033 |
]
|
| 1034 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1035 |
[[package]]
|
| 1036 |
name = "langchain-core"
|
| 1037 |
version = "1.2.5"
|
|
@@ -1051,6 +1092,25 @@ wheels = [
|
|
| 1051 |
{ url = "https://files.pythonhosted.org/packages/83/bd/9df897cbc98290bf71140104ee5b9777cf5291afb80333aa7da5a497339b/langchain_core-1.2.5-py3-none-any.whl", hash = "sha256:3255944ef4e21b2551facb319bfc426057a40247c0a05de5bd6f2fc021fbfa34", size = 484851, upload-time = "2025-12-22T23:45:30.525Z" },
|
| 1052 |
]
|
| 1053 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1054 |
[[package]]
|
| 1055 |
name = "langchain-nvidia-ai-endpoints"
|
| 1056 |
version = "1.0.0"
|
|
@@ -1152,6 +1212,15 @@ wheels = [
|
|
| 1152 |
{ url = "https://files.pythonhosted.org/packages/ed/d8/91a8b483b30e0708a8911df10b4ce04ebf2b4b8dde8d020c124aec77380a/langsmith-0.5.2-py3-none-any.whl", hash = "sha256:42f8b853a18dd4d5f7fa38c8ff29e38da065a727022da410d91b3e13819aacc1", size = 283311, upload-time = "2025-12-30T13:41:33.915Z" },
|
| 1153 |
]
|
| 1154 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1155 |
[[package]]
|
| 1156 |
name = "libcst"
|
| 1157 |
version = "1.8.6"
|
|
@@ -1921,46 +1990,6 @@ wheels = [
|
|
| 1921 |
{ url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" },
|
| 1922 |
]
|
| 1923 |
|
| 1924 |
-
[[package]]
|
| 1925 |
-
name = "pinecone"
|
| 1926 |
-
version = "8.0.0"
|
| 1927 |
-
source = { registry = "https://pypi.org/simple" }
|
| 1928 |
-
dependencies = [
|
| 1929 |
-
{ name = "certifi" },
|
| 1930 |
-
{ name = "orjson" },
|
| 1931 |
-
{ name = "pinecone-plugin-assistant" },
|
| 1932 |
-
{ name = "pinecone-plugin-interface" },
|
| 1933 |
-
{ name = "python-dateutil" },
|
| 1934 |
-
{ name = "typing-extensions" },
|
| 1935 |
-
{ name = "urllib3" },
|
| 1936 |
-
]
|
| 1937 |
-
sdist = { url = "https://files.pythonhosted.org/packages/33/13/f4c481a6a93dab92132d6d863b70a0e6c903f62940389435b31cf0c7d7d2/pinecone-8.0.0.tar.gz", hash = "sha256:feca7ff607706c09ffbd127ec93fa3b7110896b30c0d7a57672da73c69698d53", size = 1092653, upload-time = "2025-11-18T18:21:30.584Z" }
|
| 1938 |
-
wheels = [
|
| 1939 |
-
{ url = "https://files.pythonhosted.org/packages/7b/6c/1d870a9211eb8f0bf60214182de001b480f94590eca9d6164a5d6d7de031/pinecone-8.0.0-py3-none-any.whl", hash = "sha256:95f714a496a91d80f3405165aedfea76ca8ac16e51e618df0434241838e353f8", size = 745902, upload-time = "2025-11-18T18:21:25.584Z" },
|
| 1940 |
-
]
|
| 1941 |
-
|
| 1942 |
-
[[package]]
|
| 1943 |
-
name = "pinecone-plugin-assistant"
|
| 1944 |
-
version = "3.0.1"
|
| 1945 |
-
source = { registry = "https://pypi.org/simple" }
|
| 1946 |
-
dependencies = [
|
| 1947 |
-
{ name = "packaging" },
|
| 1948 |
-
{ name = "requests" },
|
| 1949 |
-
]
|
| 1950 |
-
sdist = { url = "https://files.pythonhosted.org/packages/08/1a/33249870c9e8c774dafc038419b48aa63b380b461e9a1c1cb042db31be49/pinecone_plugin_assistant-3.0.1.tar.gz", hash = "sha256:6b00e94ef1bf55ed601d2316ee6f71f96f93bf2155277a826638395e1090dde3", size = 152060, upload-time = "2025-11-11T07:45:07.224Z" }
|
| 1951 |
-
wheels = [
|
| 1952 |
-
{ url = "https://files.pythonhosted.org/packages/06/88/4b801675b4d58c5f8acd96bfd4847e6d7bc1a93ee4ff916e913dd6bda2de/pinecone_plugin_assistant-3.0.1-py3-none-any.whl", hash = "sha256:cd86ca5c98137221170e90fe81e03bbe71999992096da68c77f4af3503017622", size = 280865, upload-time = "2025-11-11T07:45:06.055Z" },
|
| 1953 |
-
]
|
| 1954 |
-
|
| 1955 |
-
[[package]]
|
| 1956 |
-
name = "pinecone-plugin-interface"
|
| 1957 |
-
version = "0.0.7"
|
| 1958 |
-
source = { registry = "https://pypi.org/simple" }
|
| 1959 |
-
sdist = { url = "https://files.pythonhosted.org/packages/f4/fb/e8a4063264953ead9e2b24d9b390152c60f042c951c47f4592e9996e57ff/pinecone_plugin_interface-0.0.7.tar.gz", hash = "sha256:b8e6675e41847333aa13923cc44daa3f85676d7157324682dc1640588a982846", size = 3370, upload-time = "2024-06-05T01:57:52.093Z" }
|
| 1960 |
-
wheels = [
|
| 1961 |
-
{ url = "https://files.pythonhosted.org/packages/3b/1d/a21fdfcd6d022cb64cef5c2a29ee6691c6c103c4566b41646b080b7536a5/pinecone_plugin_interface-0.0.7-py3-none-any.whl", hash = "sha256:875857ad9c9fc8bbc074dbe780d187a2afd21f5bfe0f3b08601924a61ef1bba8", size = 6249, upload-time = "2024-06-05T01:57:50.583Z" },
|
| 1962 |
-
]
|
| 1963 |
-
|
| 1964 |
[[package]]
|
| 1965 |
name = "platformdirs"
|
| 1966 |
version = "4.5.1"
|
|
@@ -2256,6 +2285,69 @@ wheels = [
|
|
| 2256 |
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
|
| 2257 |
]
|
| 2258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2259 |
[[package]]
|
| 2260 |
name = "pyparsing"
|
| 2261 |
version = "3.3.1"
|
|
@@ -2308,18 +2400,6 @@ wheels = [
|
|
| 2308 |
{ url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
|
| 2309 |
]
|
| 2310 |
|
| 2311 |
-
[[package]]
|
| 2312 |
-
name = "python-dateutil"
|
| 2313 |
-
version = "2.9.0.post0"
|
| 2314 |
-
source = { registry = "https://pypi.org/simple" }
|
| 2315 |
-
dependencies = [
|
| 2316 |
-
{ name = "six" },
|
| 2317 |
-
]
|
| 2318 |
-
sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
|
| 2319 |
-
wheels = [
|
| 2320 |
-
{ url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
|
| 2321 |
-
]
|
| 2322 |
-
|
| 2323 |
[[package]]
|
| 2324 |
name = "python-dotenv"
|
| 2325 |
version = "1.2.1"
|
|
@@ -2565,6 +2645,7 @@ dependencies = [
|
|
| 2565 |
{ name = "hydra-core" },
|
| 2566 |
{ name = "jinja2" },
|
| 2567 |
{ name = "langchain-core" },
|
|
|
|
| 2568 |
{ name = "langchain-nvidia-ai-endpoints" },
|
| 2569 |
{ name = "langchain-text-splitters" },
|
| 2570 |
{ name = "langgraph" },
|
|
@@ -2572,9 +2653,9 @@ dependencies = [
|
|
| 2572 |
{ name = "lightning" },
|
| 2573 |
{ name = "numpy" },
|
| 2574 |
{ name = "passlib", extra = ["bcrypt"] },
|
| 2575 |
-
{ name = "pinecone" },
|
| 2576 |
{ name = "psycopg2-binary" },
|
| 2577 |
{ name = "pydantic" },
|
|
|
|
| 2578 |
{ name = "python-dotenv" },
|
| 2579 |
{ name = "python-jose", extra = ["cryptography"] },
|
| 2580 |
{ name = "python-multipart" },
|
|
@@ -2618,6 +2699,7 @@ requires-dist = [
|
|
| 2618 |
{ name = "hydra-core", specifier = ">=1.3.2" },
|
| 2619 |
{ name = "jinja2", specifier = ">=3.1.6" },
|
| 2620 |
{ name = "langchain-core", specifier = ">=0.1.0" },
|
|
|
|
| 2621 |
{ name = "langchain-nvidia-ai-endpoints", specifier = ">=1.0.0" },
|
| 2622 |
{ name = "langchain-text-splitters", specifier = ">=0.0.1" },
|
| 2623 |
{ name = "langgraph", specifier = ">=0.2.39" },
|
|
@@ -2626,9 +2708,9 @@ requires-dist = [
|
|
| 2626 |
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10.0" },
|
| 2627 |
{ name = "numpy", specifier = ">=1.26.0" },
|
| 2628 |
{ name = "passlib", extras = ["bcrypt"], specifier = ">=1.7.4" },
|
| 2629 |
-
{ name = "pinecone" },
|
| 2630 |
{ name = "psycopg2-binary", specifier = ">=2.9.9" },
|
| 2631 |
{ name = "pydantic", specifier = ">=2.7.0" },
|
|
|
|
| 2632 |
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
|
| 2633 |
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" },
|
| 2634 |
{ name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0.0" },
|
|
|
|
| 568 |
{ url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
|
| 569 |
]
|
| 570 |
|
| 571 |
+
[[package]]
|
| 572 |
+
name = "dnspython"
|
| 573 |
+
version = "2.8.0"
|
| 574 |
+
source = { registry = "https://pypi.org/simple" }
|
| 575 |
+
sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" }
|
| 576 |
+
wheels = [
|
| 577 |
+
{ url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" },
|
| 578 |
+
]
|
| 579 |
+
|
| 580 |
[[package]]
|
| 581 |
name = "ecdsa"
|
| 582 |
version = "0.19.1"
|
|
|
|
| 1041 |
{ url = "https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595, upload-time = "2024-06-10T19:24:40.698Z" },
|
| 1042 |
]
|
| 1043 |
|
| 1044 |
+
[[package]]
|
| 1045 |
+
name = "langchain"
|
| 1046 |
+
version = "1.2.4"
|
| 1047 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1048 |
+
dependencies = [
|
| 1049 |
+
{ name = "langchain-core" },
|
| 1050 |
+
{ name = "langgraph" },
|
| 1051 |
+
{ name = "pydantic" },
|
| 1052 |
+
]
|
| 1053 |
+
sdist = { url = "https://files.pythonhosted.org/packages/ed/3f/371267e88c153500a75c0e9daf9645a69955cfe6f85699955241ac0fa6e2/langchain-1.2.4.tar.gz", hash = "sha256:65119ff1c2ac8cc2410739b0fb2773f8fbfbe83357df9bab8a5fceafb9e04aa1", size = 552340, upload-time = "2026-01-14T19:35:26.556Z" }
|
| 1054 |
+
wheels = [
|
| 1055 |
+
{ url = "https://files.pythonhosted.org/packages/0c/71/667887579bb3cf3c2db88224849f7362a8c3f118666e426a51058ee43d9c/langchain-1.2.4-py3-none-any.whl", hash = "sha256:182ac9f3c4559c5a6477e00d60ff8a56212ec4db6f101a4957492818dc3ce3e9", size = 107949, upload-time = "2026-01-14T19:35:24.7Z" },
|
| 1056 |
+
]
|
| 1057 |
+
|
| 1058 |
+
[[package]]
|
| 1059 |
+
name = "langchain-classic"
|
| 1060 |
+
version = "1.0.1"
|
| 1061 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1062 |
+
dependencies = [
|
| 1063 |
+
{ name = "langchain-core" },
|
| 1064 |
+
{ name = "langchain-text-splitters" },
|
| 1065 |
+
{ name = "langsmith" },
|
| 1066 |
+
{ name = "pydantic" },
|
| 1067 |
+
{ name = "pyyaml" },
|
| 1068 |
+
{ name = "requests" },
|
| 1069 |
+
{ name = "sqlalchemy" },
|
| 1070 |
+
]
|
| 1071 |
+
sdist = { url = "https://files.pythonhosted.org/packages/7c/4b/bd03518418ece4c13192a504449b58c28afee915dc4a6f4b02622458cb1b/langchain_classic-1.0.1.tar.gz", hash = "sha256:40a499684df36b005a1213735dc7f8dca8f5eb67978d6ec763e7a49780864fdc", size = 10516020, upload-time = "2025-12-23T22:55:22.615Z" }
|
| 1072 |
+
wheels = [
|
| 1073 |
+
{ url = "https://files.pythonhosted.org/packages/83/0f/eab87f017d7fe28e8c11fff614f4cdbfae32baadb77d0f79e9f922af1df2/langchain_classic-1.0.1-py3-none-any.whl", hash = "sha256:131d83a02bb80044c68fedc1ab4ae885d5b8f8c2c742d8ab9e7534ad9cda8e80", size = 1040666, upload-time = "2025-12-23T22:55:21.025Z" },
|
| 1074 |
+
]
|
| 1075 |
+
|
| 1076 |
[[package]]
|
| 1077 |
name = "langchain-core"
|
| 1078 |
version = "1.2.5"
|
|
|
|
| 1092 |
{ url = "https://files.pythonhosted.org/packages/83/bd/9df897cbc98290bf71140104ee5b9777cf5291afb80333aa7da5a497339b/langchain_core-1.2.5-py3-none-any.whl", hash = "sha256:3255944ef4e21b2551facb319bfc426057a40247c0a05de5bd6f2fc021fbfa34", size = 484851, upload-time = "2025-12-22T23:45:30.525Z" },
|
| 1093 |
]
|
| 1094 |
|
| 1095 |
+
[[package]]
|
| 1096 |
+
name = "langchain-mongodb"
|
| 1097 |
+
version = "0.11.0"
|
| 1098 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1099 |
+
dependencies = [
|
| 1100 |
+
{ name = "langchain" },
|
| 1101 |
+
{ name = "langchain-classic" },
|
| 1102 |
+
{ name = "langchain-core" },
|
| 1103 |
+
{ name = "langchain-text-splitters" },
|
| 1104 |
+
{ name = "lark" },
|
| 1105 |
+
{ name = "numpy" },
|
| 1106 |
+
{ name = "pymongo" },
|
| 1107 |
+
{ name = "pymongo-search-utils" },
|
| 1108 |
+
]
|
| 1109 |
+
sdist = { url = "https://files.pythonhosted.org/packages/ba/0e/03027bbf0ae3ee71d00e32f5c64395cbee05393e6e5dc56e2d88320db542/langchain_mongodb-0.11.0.tar.gz", hash = "sha256:db483f12e8a4fdbbcfb0594881962fd1f0afcb38a3d42ee0d5fe8a2be20e1e86", size = 356447, upload-time = "2026-01-15T17:00:37.102Z" }
|
| 1110 |
+
wheels = [
|
| 1111 |
+
{ url = "https://files.pythonhosted.org/packages/1e/a1/a4ef0c7027166540a4aced056b1fd7194e4519932d2a846fd2cfd9f057cb/langchain_mongodb-0.11.0-py3-none-any.whl", hash = "sha256:7e1f43684c907d1f1fee4dbc480dd4909b3ebf03b5d3dad105ed9f4a4280d49f", size = 62037, upload-time = "2026-01-15T17:00:36.258Z" },
|
| 1112 |
+
]
|
| 1113 |
+
|
| 1114 |
[[package]]
|
| 1115 |
name = "langchain-nvidia-ai-endpoints"
|
| 1116 |
version = "1.0.0"
|
|
|
|
| 1212 |
{ url = "https://files.pythonhosted.org/packages/ed/d8/91a8b483b30e0708a8911df10b4ce04ebf2b4b8dde8d020c124aec77380a/langsmith-0.5.2-py3-none-any.whl", hash = "sha256:42f8b853a18dd4d5f7fa38c8ff29e38da065a727022da410d91b3e13819aacc1", size = 283311, upload-time = "2025-12-30T13:41:33.915Z" },
|
| 1213 |
]
|
| 1214 |
|
| 1215 |
+
[[package]]
|
| 1216 |
+
name = "lark"
|
| 1217 |
+
version = "1.3.1"
|
| 1218 |
+
source = { registry = "https://pypi.org/simple" }
|
| 1219 |
+
sdist = { url = "https://files.pythonhosted.org/packages/da/34/28fff3ab31ccff1fd4f6c7c7b0ceb2b6968d8ea4950663eadcb5720591a0/lark-1.3.1.tar.gz", hash = "sha256:b426a7a6d6d53189d318f2b6236ab5d6429eaf09259f1ca33eb716eed10d2905", size = 382732, upload-time = "2025-10-27T18:25:56.653Z" }
|
| 1220 |
+
wheels = [
|
| 1221 |
+
{ url = "https://files.pythonhosted.org/packages/82/3d/14ce75ef66813643812f3093ab17e46d3a206942ce7376d31ec2d36229e7/lark-1.3.1-py3-none-any.whl", hash = "sha256:c629b661023a014c37da873b4ff58a817398d12635d3bbb2c5a03be7fe5d1e12", size = 113151, upload-time = "2025-10-27T18:25:54.882Z" },
|
| 1222 |
+
]
|
| 1223 |
+
|
| 1224 |
[[package]]
|
| 1225 |
name = "libcst"
|
| 1226 |
version = "1.8.6"
|
|
|
|
| 1990 |
{ url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" },
|
| 1991 |
]
|
| 1992 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1993 |
[[package]]
|
| 1994 |
name = "platformdirs"
|
| 1995 |
version = "4.5.1"
|
|
|
|
| 2285 |
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
|
| 2286 |
]
|
| 2287 |
|
| 2288 |
+
[[package]]
|
| 2289 |
+
name = "pymongo"
|
| 2290 |
+
version = "4.16.0"
|
| 2291 |
+
source = { registry = "https://pypi.org/simple" }
|
| 2292 |
+
dependencies = [
|
| 2293 |
+
{ name = "dnspython" },
|
| 2294 |
+
]
|
| 2295 |
+
sdist = { url = "https://files.pythonhosted.org/packages/65/9c/a4895c4b785fc9865a84a56e14b5bd21ca75aadc3dab79c14187cdca189b/pymongo-4.16.0.tar.gz", hash = "sha256:8ba8405065f6e258a6f872fe62d797a28f383a12178c7153c01ed04e845c600c", size = 2495323, upload-time = "2026-01-07T18:05:48.107Z" }
|
| 2296 |
+
wheels = [
|
| 2297 |
+
{ url = "https://files.pythonhosted.org/packages/6a/03/6dd7c53cbde98de469a3e6fb893af896dca644c476beb0f0c6342bcc368b/pymongo-4.16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bd4911c40a43a821dfd93038ac824b756b6e703e26e951718522d29f6eb166a8", size = 917619, upload-time = "2026-01-07T18:04:19.173Z" },
|
| 2298 |
+
{ url = "https://files.pythonhosted.org/packages/73/e1/328915f2734ea1f355dc9b0e98505ff670f5fab8be5e951d6ed70971c6aa/pymongo-4.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25a6b03a68f9907ea6ec8bc7cf4c58a1b51a18e23394f962a6402f8e46d41211", size = 917364, upload-time = "2026-01-07T18:04:20.861Z" },
|
| 2299 |
+
{ url = "https://files.pythonhosted.org/packages/41/fe/4769874dd9812a1bc2880a9785e61eba5340da966af888dd430392790ae0/pymongo-4.16.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:91ac0cb0fe2bf17616c2039dac88d7c9a5088f5cb5829b27c9d250e053664d31", size = 1686901, upload-time = "2026-01-07T18:04:22.219Z" },
|
| 2300 |
+
{ url = "https://files.pythonhosted.org/packages/fa/8d/15707b9669fdc517bbc552ac60da7124dafe7ac1552819b51e97ed4038b4/pymongo-4.16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf0ec79e8ca7077f455d14d915d629385153b6a11abc0b93283ed73a8013e376", size = 1723034, upload-time = "2026-01-07T18:04:24.055Z" },
|
| 2301 |
+
{ url = "https://files.pythonhosted.org/packages/5b/af/3d5d16ff11d447d40c1472da1b366a31c7380d7ea2922a449c7f7f495567/pymongo-4.16.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2d0082631a7510318befc2b4fdab140481eb4b9dd62d9245e042157085da2a70", size = 1797161, upload-time = "2026-01-07T18:04:25.964Z" },
|
| 2302 |
+
{ url = "https://files.pythonhosted.org/packages/fb/04/725ab8664eeec73ec125b5a873448d80f5d8cf2750aaaf804cbc538a50a5/pymongo-4.16.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85dc2f3444c346ea019a371e321ac868a4fab513b7a55fe368f0cc78de8177cc", size = 1780938, upload-time = "2026-01-07T18:04:28.745Z" },
|
| 2303 |
+
{ url = "https://files.pythonhosted.org/packages/22/50/dd7e9095e1ca35f93c3c844c92eb6eb0bc491caeb2c9bff3b32fe3c9b18f/pymongo-4.16.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dabbf3c14de75a20cc3c30bf0c6527157224a93dfb605838eabb1a2ee3be008d", size = 1714342, upload-time = "2026-01-07T18:04:30.331Z" },
|
| 2304 |
+
{ url = "https://files.pythonhosted.org/packages/03/c9/542776987d5c31ae8e93e92680ea2b6e5a2295f398b25756234cabf38a39/pymongo-4.16.0-cp312-cp312-win32.whl", hash = "sha256:60307bb91e0ab44e560fe3a211087748b2b5f3e31f403baf41f5b7b0a70bd104", size = 887868, upload-time = "2026-01-07T18:04:32.124Z" },
|
| 2305 |
+
{ url = "https://files.pythonhosted.org/packages/2e/d4/b4045a7ccc5680fb496d01edf749c7a9367cc8762fbdf7516cf807ef679b/pymongo-4.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:f513b2c6c0d5c491f478422f6b5b5c27ac1af06a54c93ef8631806f7231bd92e", size = 907554, upload-time = "2026-01-07T18:04:33.685Z" },
|
| 2306 |
+
{ url = "https://files.pythonhosted.org/packages/60/4c/33f75713d50d5247f2258405142c0318ff32c6f8976171c4fcae87a9dbdf/pymongo-4.16.0-cp312-cp312-win_arm64.whl", hash = "sha256:dfc320f08ea9a7ec5b2403dc4e8150636f0d6150f4b9792faaae539c88e7db3b", size = 892971, upload-time = "2026-01-07T18:04:35.594Z" },
|
| 2307 |
+
{ url = "https://files.pythonhosted.org/packages/47/84/148d8b5da8260f4679d6665196ae04ab14ffdf06f5fe670b0ab11942951f/pymongo-4.16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d15f060bc6d0964a8bb70aba8f0cb6d11ae99715438f640cff11bbcf172eb0e8", size = 972009, upload-time = "2026-01-07T18:04:38.303Z" },
|
| 2308 |
+
{ url = "https://files.pythonhosted.org/packages/1e/5e/9f3a8daf583d0adaaa033a3e3e58194d2282737dc164014ff33c7a081103/pymongo-4.16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a19ea46a0fe71248965305a020bc076a163311aefbaa1d83e47d06fa30ac747", size = 971784, upload-time = "2026-01-07T18:04:39.669Z" },
|
| 2309 |
+
{ url = "https://files.pythonhosted.org/packages/ad/f2/b6c24361fcde24946198573c0176406bfd5f7b8538335f3d939487055322/pymongo-4.16.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:311d4549d6bf1f8c61d025965aebb5ba29d1481dc6471693ab91610aaffbc0eb", size = 1947174, upload-time = "2026-01-07T18:04:41.368Z" },
|
| 2310 |
+
{ url = "https://files.pythonhosted.org/packages/47/1a/8634192f98cf740b3d174e1018dd0350018607d5bd8ac35a666dc49c732b/pymongo-4.16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:46ffb728d92dd5b09fc034ed91acf5595657c7ca17d4cf3751322cd554153c17", size = 1991727, upload-time = "2026-01-07T18:04:42.965Z" },
|
| 2311 |
+
{ url = "https://files.pythonhosted.org/packages/5a/2f/0c47ac84572b28e23028a23a3798a1f725e1c23b0cf1c1424678d16aff42/pymongo-4.16.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:acda193f440dd88c2023cb00aa8bd7b93a9df59978306d14d87a8b12fe426b05", size = 2082497, upload-time = "2026-01-07T18:04:44.652Z" },
|
| 2312 |
+
{ url = "https://files.pythonhosted.org/packages/ba/57/9f46ef9c862b2f0cf5ce798f3541c201c574128d31ded407ba4b3918d7b6/pymongo-4.16.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5d9fdb386cf958e6ef6ff537d6149be7edb76c3268cd6833e6c36aa447e4443f", size = 2064947, upload-time = "2026-01-07T18:04:46.228Z" },
|
| 2313 |
+
{ url = "https://files.pythonhosted.org/packages/b8/56/5421c0998f38e32288100a07f6cb2f5f9f352522157c901910cb2927e211/pymongo-4.16.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91899dd7fb9a8c50f09c3c1cf0cb73bfbe2737f511f641f19b9650deb61c00ca", size = 1980478, upload-time = "2026-01-07T18:04:48.017Z" },
|
| 2314 |
+
{ url = "https://files.pythonhosted.org/packages/92/93/bfc448d025e12313a937d6e1e0101b50cc9751636b4b170e600fe3203063/pymongo-4.16.0-cp313-cp313-win32.whl", hash = "sha256:2cd60cd1e05de7f01927f8e25ca26b3ea2c09de8723241e5d3bcfdc70eaff76b", size = 934672, upload-time = "2026-01-07T18:04:49.538Z" },
|
| 2315 |
+
{ url = "https://files.pythonhosted.org/packages/96/10/12710a5e01218d50c3dd165fd72c5ed2699285f77348a3b1a119a191d826/pymongo-4.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3ead8a0050c53eaa55935895d6919d393d0328ec24b2b9115bdbe881aa222673", size = 959237, upload-time = "2026-01-07T18:04:51.382Z" },
|
| 2316 |
+
{ url = "https://files.pythonhosted.org/packages/0c/56/d288bcd1d05bc17ec69df1d0b1d67bc710c7c5dbef86033a5a4d2e2b08e6/pymongo-4.16.0-cp313-cp313-win_arm64.whl", hash = "sha256:dbbc5b254c36c37d10abb50e899bc3939bbb7ab1e7c659614409af99bd3e7675", size = 940909, upload-time = "2026-01-07T18:04:52.904Z" },
|
| 2317 |
+
{ url = "https://files.pythonhosted.org/packages/30/9e/4d343f8d0512002fce17915a89477b9f916bda1205729e042d8f23acf194/pymongo-4.16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8a254d49a9ffe9d7f888e3c677eed3729b14ce85abb08cd74732cead6ccc3c66", size = 1026634, upload-time = "2026-01-07T18:04:54.359Z" },
|
| 2318 |
+
{ url = "https://files.pythonhosted.org/packages/c3/e3/341f88c5535df40c0450fda915f582757bb7d988cdfc92990a5e27c4c324/pymongo-4.16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a1bf44e13cf2d44d2ea2e928a8140d5d667304abe1a61c4d55b4906f389fbe64", size = 1026252, upload-time = "2026-01-07T18:04:56.642Z" },
|
| 2319 |
+
{ url = "https://files.pythonhosted.org/packages/af/64/9471b22eb98f0a2ca0b8e09393de048502111b2b5b14ab1bd9e39708aab5/pymongo-4.16.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f1c5f1f818b669875d191323a48912d3fcd2e4906410e8297bb09ac50c4d5ccc", size = 2207399, upload-time = "2026-01-07T18:04:58.255Z" },
|
| 2320 |
+
{ url = "https://files.pythonhosted.org/packages/87/ac/47c4d50b25a02f21764f140295a2efaa583ee7f17992a5e5fa542b3a690f/pymongo-4.16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77cfd37a43a53b02b7bd930457c7994c924ad8bbe8dff91817904bcbf291b371", size = 2260595, upload-time = "2026-01-07T18:04:59.788Z" },
|
| 2321 |
+
{ url = "https://files.pythonhosted.org/packages/ee/1b/0ce1ce9dd036417646b2fe6f63b58127acff3cf96eeb630c34ec9cd675ff/pymongo-4.16.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:36ef2fee50eee669587d742fb456e349634b4fcf8926208766078b089054b24b", size = 2366958, upload-time = "2026-01-07T18:05:01.942Z" },
|
| 2322 |
+
{ url = "https://files.pythonhosted.org/packages/3e/3c/a5a17c0d413aa9d6c17bc35c2b472e9e79cda8068ba8e93433b5f43028e9/pymongo-4.16.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55f8d5a6fe2fa0b823674db2293f92d74cd5f970bc0360f409a1fc21003862d3", size = 2346081, upload-time = "2026-01-07T18:05:03.576Z" },
|
| 2323 |
+
{ url = "https://files.pythonhosted.org/packages/65/19/f815533d1a88fb8a3b6c6e895bb085ffdae68ccb1e6ed7102202a307f8e2/pymongo-4.16.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9caacac0dd105e2555521002e2d17afc08665187017b466b5753e84c016628e6", size = 2246053, upload-time = "2026-01-07T18:05:05.459Z" },
|
| 2324 |
+
{ url = "https://files.pythonhosted.org/packages/c6/88/4be3ec78828dc64b212c123114bd6ae8db5b7676085a7b43cc75d0131bd2/pymongo-4.16.0-cp314-cp314-win32.whl", hash = "sha256:c789236366525c3ee3cd6e4e450a9ff629a7d1f4d88b8e18a0aea0615fd7ecf8", size = 989461, upload-time = "2026-01-07T18:05:07.018Z" },
|
| 2325 |
+
{ url = "https://files.pythonhosted.org/packages/af/5a/ab8d5af76421b34db483c9c8ebc3a2199fb80ae63dc7e18f4cf1df46306a/pymongo-4.16.0-cp314-cp314-win_amd64.whl", hash = "sha256:2b0714d7764efb29bf9d3c51c964aed7c4c7237b341f9346f15ceaf8321fdb35", size = 1017803, upload-time = "2026-01-07T18:05:08.499Z" },
|
| 2326 |
+
{ url = "https://files.pythonhosted.org/packages/f6/f4/98d68020728ac6423cf02d17cfd8226bf6cce5690b163d30d3f705e8297e/pymongo-4.16.0-cp314-cp314-win_arm64.whl", hash = "sha256:12762e7cc0f8374a8cae3b9f9ed8dabb5d438c7b33329232dd9b7de783454033", size = 997184, upload-time = "2026-01-07T18:05:09.944Z" },
|
| 2327 |
+
{ url = "https://files.pythonhosted.org/packages/50/00/dc3a271daf06401825b9c1f4f76f018182c7738281ea54b9762aea0560c1/pymongo-4.16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1c01e8a7cd0ea66baf64a118005535ab5bf9f9eb63a1b50ac3935dccf9a54abe", size = 1083303, upload-time = "2026-01-07T18:05:11.702Z" },
|
| 2328 |
+
{ url = "https://files.pythonhosted.org/packages/b8/4b/b5375ee21d12eababe46215011ebc63801c0d2c5ffdf203849d0d79f9852/pymongo-4.16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4c4872299ebe315a79f7f922051061634a64fda95b6b17677ba57ef00b2ba2a4", size = 1083233, upload-time = "2026-01-07T18:05:13.182Z" },
|
| 2329 |
+
{ url = "https://files.pythonhosted.org/packages/ee/e3/52efa3ca900622c7dcb56c5e70f15c906816d98905c22d2ee1f84d9a7b60/pymongo-4.16.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:78037d02389745e247fe5ab0bcad5d1ab30726eaac3ad79219c7d6bbb07eec53", size = 2527438, upload-time = "2026-01-07T18:05:14.981Z" },
|
| 2330 |
+
{ url = "https://files.pythonhosted.org/packages/cb/96/43b1be151c734e7766c725444bcbfa1de6b60cc66bfb406203746839dd25/pymongo-4.16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c126fb72be2518395cc0465d4bae03125119136462e1945aea19840e45d89cfc", size = 2600399, upload-time = "2026-01-07T18:05:16.794Z" },
|
| 2331 |
+
{ url = "https://files.pythonhosted.org/packages/e7/62/fa64a5045dfe3a1cd9217232c848256e7bc0136cffb7da4735c5e0d30e40/pymongo-4.16.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f3867dc225d9423c245a51eaac2cfcd53dde8e0a8d8090bb6aed6e31bd6c2d4f", size = 2720960, upload-time = "2026-01-07T18:05:18.498Z" },
|
| 2332 |
+
{ url = "https://files.pythonhosted.org/packages/54/7b/01577eb97e605502821273a5bc16ce0fb0be5c978fe03acdbff471471202/pymongo-4.16.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f25001a955073b80510c0c3db0e043dbbc36904fd69e511c74e3d8640b8a5111", size = 2699344, upload-time = "2026-01-07T18:05:20.073Z" },
|
| 2333 |
+
{ url = "https://files.pythonhosted.org/packages/55/68/6ef6372d516f703479c3b6cbbc45a5afd307173b1cbaccd724e23919bb1a/pymongo-4.16.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d9885aad05f82fd7ea0c9ca505d60939746b39263fa273d0125170da8f59098", size = 2577133, upload-time = "2026-01-07T18:05:22.052Z" },
|
| 2334 |
+
{ url = "https://files.pythonhosted.org/packages/15/c7/b5337093bb01da852f945802328665f85f8109dbe91d81ea2afe5ff059b9/pymongo-4.16.0-cp314-cp314t-win32.whl", hash = "sha256:948152b30eddeae8355495f9943a3bf66b708295c0b9b6f467de1c620f215487", size = 1040560, upload-time = "2026-01-07T18:05:23.888Z" },
|
| 2335 |
+
{ url = "https://files.pythonhosted.org/packages/96/8c/5b448cd1b103f3889d5713dda37304c81020ff88e38a826e8a75ddff4610/pymongo-4.16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f6e42c1bc985d9beee884780ae6048790eb4cd565c46251932906bdb1630034a", size = 1075081, upload-time = "2026-01-07T18:05:26.874Z" },
|
| 2336 |
+
{ url = "https://files.pythonhosted.org/packages/32/cd/ddc794cdc8500f6f28c119c624252fb6dfb19481c6d7ed150f13cf468a6d/pymongo-4.16.0-cp314-cp314t-win_arm64.whl", hash = "sha256:6b2a20edb5452ac8daa395890eeb076c570790dfce6b7a44d788af74c2f8cf96", size = 1047725, upload-time = "2026-01-07T18:05:28.47Z" },
|
| 2337 |
+
]
|
| 2338 |
+
|
| 2339 |
+
[[package]]
|
| 2340 |
+
name = "pymongo-search-utils"
|
| 2341 |
+
version = "0.3.0"
|
| 2342 |
+
source = { registry = "https://pypi.org/simple" }
|
| 2343 |
+
dependencies = [
|
| 2344 |
+
{ name = "pymongo" },
|
| 2345 |
+
]
|
| 2346 |
+
sdist = { url = "https://files.pythonhosted.org/packages/d9/aa/3eb266ffc74ec52bbf6dd92d311ab4fc3225c2ac8f1a2e6abe98f7288867/pymongo_search_utils-0.3.0.tar.gz", hash = "sha256:56148987ce9ff191eb1cd0f56c01d3dae497a3cb6d7b7db75ec894a9afcbe418", size = 13728, upload-time = "2026-02-03T22:18:24.481Z" }
|
| 2347 |
+
wheels = [
|
| 2348 |
+
{ url = "https://files.pythonhosted.org/packages/55/ed/87d3ed0e45b9230bacb9edcb913d515e6756bc2df3384e5f192662c38ce8/pymongo_search_utils-0.3.0-py3-none-any.whl", hash = "sha256:9b9ef8dfbd57da530ce7c2bde10aec8f462605080a9ed4e9a41679170c8742bf", size = 19467, upload-time = "2026-02-03T22:18:23.398Z" },
|
| 2349 |
+
]
|
| 2350 |
+
|
| 2351 |
[[package]]
|
| 2352 |
name = "pyparsing"
|
| 2353 |
version = "3.3.1"
|
|
|
|
| 2400 |
{ url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
|
| 2401 |
]
|
| 2402 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2403 |
[[package]]
|
| 2404 |
name = "python-dotenv"
|
| 2405 |
version = "1.2.1"
|
|
|
|
| 2645 |
{ name = "hydra-core" },
|
| 2646 |
{ name = "jinja2" },
|
| 2647 |
{ name = "langchain-core" },
|
| 2648 |
+
{ name = "langchain-mongodb" },
|
| 2649 |
{ name = "langchain-nvidia-ai-endpoints" },
|
| 2650 |
{ name = "langchain-text-splitters" },
|
| 2651 |
{ name = "langgraph" },
|
|
|
|
| 2653 |
{ name = "lightning" },
|
| 2654 |
{ name = "numpy" },
|
| 2655 |
{ name = "passlib", extra = ["bcrypt"] },
|
|
|
|
| 2656 |
{ name = "psycopg2-binary" },
|
| 2657 |
{ name = "pydantic" },
|
| 2658 |
+
{ name = "pymongo" },
|
| 2659 |
{ name = "python-dotenv" },
|
| 2660 |
{ name = "python-jose", extra = ["cryptography"] },
|
| 2661 |
{ name = "python-multipart" },
|
|
|
|
| 2699 |
{ name = "hydra-core", specifier = ">=1.3.2" },
|
| 2700 |
{ name = "jinja2", specifier = ">=3.1.6" },
|
| 2701 |
{ name = "langchain-core", specifier = ">=0.1.0" },
|
| 2702 |
+
{ name = "langchain-mongodb", specifier = ">=0.1.0" },
|
| 2703 |
{ name = "langchain-nvidia-ai-endpoints", specifier = ">=1.0.0" },
|
| 2704 |
{ name = "langchain-text-splitters", specifier = ">=0.0.1" },
|
| 2705 |
{ name = "langgraph", specifier = ">=0.2.39" },
|
|
|
|
| 2708 |
{ name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10.0" },
|
| 2709 |
{ name = "numpy", specifier = ">=1.26.0" },
|
| 2710 |
{ name = "passlib", extras = ["bcrypt"], specifier = ">=1.7.4" },
|
|
|
|
| 2711 |
{ name = "psycopg2-binary", specifier = ">=2.9.9" },
|
| 2712 |
{ name = "pydantic", specifier = ">=2.7.0" },
|
| 2713 |
+
{ name = "pymongo", specifier = ">=4.6.0" },
|
| 2714 |
{ name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
|
| 2715 |
{ name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" },
|
| 2716 |
{ name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0.0" },
|