firepenguindisopanda commited on
Commit
1a608b5
·
1 Parent(s): 4da2f57

Refactor code structure for improved readability and maintainability

Browse files
Files changed (35) hide show
  1. app/core/llm_factory.py +4 -4
  2. app/core/mongodb_rag.py +378 -0
  3. app/core/orchestrator.py +97 -30
  4. app/core/rag.py +220 -121
  5. app/core/schemas.py +1 -10
  6. app/prompts/product_owner.md +23 -2
  7. app/routers/health.py +71 -37
  8. corpus_rag/RAG_INDEX_DECISION.md +64 -0
  9. corpus_rag/api_designer/role_playbook.txt +18 -0
  10. corpus_rag/api_designer/standards_quickref.txt +10 -0
  11. corpus_rag/business_analyst/role_playbook.txt +19 -0
  12. corpus_rag/business_analyst/standards_quickref.txt +10 -0
  13. corpus_rag/data_architect/role_playbook.txt +18 -0
  14. corpus_rag/data_architect/standards_quickref.txt +10 -0
  15. corpus_rag/devops_architect/role_playbook.txt +18 -0
  16. corpus_rag/devops_architect/standards_quickref.txt +10 -0
  17. corpus_rag/environment_engineer/role_playbook.txt +18 -0
  18. corpus_rag/environment_engineer/standards_quickref.txt +10 -0
  19. corpus_rag/product_owner/role_playbook.txt +22 -0
  20. corpus_rag/product_owner/standards_quickref.txt +10 -0
  21. corpus_rag/qa_strategist/role_playbook.txt +18 -0
  22. corpus_rag/qa_strategist/standards_quickref.txt +10 -0
  23. corpus_rag/security_analyst/role_playbook.txt +18 -0
  24. corpus_rag/security_analyst/standards_quickref.txt +10 -0
  25. corpus_rag/solution_architect/role_playbook.txt +19 -0
  26. corpus_rag/solution_architect/standards_quickref.txt +10 -0
  27. corpus_rag/technical_writer/role_playbook.txt +18 -0
  28. corpus_rag/technical_writer/standards_quickref.txt +10 -0
  29. corpus_rag/ux_designer/role_playbook.txt +18 -0
  30. corpus_rag/ux_designer/standards_quickref.txt +10 -0
  31. pyproject.toml +3 -3
  32. requirements.txt +60 -46
  33. scripts/seed_rag_data.py +316 -0
  34. scripts/setup_mongodb_indexes.py +237 -0
  35. uv.lock +136 -54
app/core/llm_factory.py CHANGED
@@ -31,23 +31,23 @@ DEFAULT_EMBEDDING_MODEL = "nvidia/nv-embedqa-e5-v5"
31
  AGENT_CONFIGS: dict[TeamRole, dict[str, Any]] = {
32
  # Phase 1
33
  TeamRole.PROJECT_REFINER: {"temperature": 0.3, "max_tokens": 2048},
34
- TeamRole.PRODUCT_OWNER: {"temperature": 0.5, "max_tokens": 2048},
35
  # Phase 2
36
  TeamRole.BUSINESS_ANALYST: {"temperature": 0.3, "max_tokens": 3072},
37
  TeamRole.SOLUTION_ARCHITECT: {"temperature": 0.4, "max_tokens": 3072},
38
- TeamRole.DATA_ARCHITECT: {"temperature": 0.3, "max_tokens": 2048},
39
  TeamRole.SECURITY_ANALYST: {"temperature": 0.2, "max_tokens": 2048},
40
  # Phase 3
41
  TeamRole.UX_DESIGNER: {"temperature": 0.8, "max_tokens": 2048},
42
  TeamRole.API_DESIGNER: {"temperature": 0.2, "max_tokens": 4096},
43
- TeamRole.QA_STRATEGIST: {"temperature": 0.3, "max_tokens": 2048},
44
  TeamRole.DEVOPS_ARCHITECT: {"temperature": 0.3, "max_tokens": 2048},
45
  # Phase 4
46
  TeamRole.ENVIRONMENT_ENGINEER: {"temperature": 0.3, "max_tokens": 2048},
47
  TeamRole.TECHNICAL_WRITER: {"temperature": 0.5, "max_tokens": 3072},
48
  # Phase 5 / Judge
49
  TeamRole.SPEC_COORDINATOR: {"temperature": 0.3, "max_tokens": 4096},
50
- TeamRole.JUDGE: {"temperature": 0.1, "max_tokens": 1024},
51
  }
52
 
53
  # Default configuration for unknown roles
 
31
  AGENT_CONFIGS: dict[TeamRole, dict[str, Any]] = {
32
  # Phase 1
33
  TeamRole.PROJECT_REFINER: {"temperature": 0.3, "max_tokens": 2048},
34
+ TeamRole.PRODUCT_OWNER: {"temperature": 0.5, "max_tokens": 4096},
35
  # Phase 2
36
  TeamRole.BUSINESS_ANALYST: {"temperature": 0.3, "max_tokens": 3072},
37
  TeamRole.SOLUTION_ARCHITECT: {"temperature": 0.4, "max_tokens": 3072},
38
+ TeamRole.DATA_ARCHITECT: {"temperature": 0.3, "max_tokens": 4096},
39
  TeamRole.SECURITY_ANALYST: {"temperature": 0.2, "max_tokens": 2048},
40
  # Phase 3
41
  TeamRole.UX_DESIGNER: {"temperature": 0.8, "max_tokens": 2048},
42
  TeamRole.API_DESIGNER: {"temperature": 0.2, "max_tokens": 4096},
43
+ TeamRole.QA_STRATEGIST: {"temperature": 0.3, "max_tokens": 4096},
44
  TeamRole.DEVOPS_ARCHITECT: {"temperature": 0.3, "max_tokens": 2048},
45
  # Phase 4
46
  TeamRole.ENVIRONMENT_ENGINEER: {"temperature": 0.3, "max_tokens": 2048},
47
  TeamRole.TECHNICAL_WRITER: {"temperature": 0.5, "max_tokens": 3072},
48
  # Phase 5 / Judge
49
  TeamRole.SPEC_COORDINATOR: {"temperature": 0.3, "max_tokens": 4096},
50
+ TeamRole.JUDGE: {"temperature": 0.1, "max_tokens": 2048},
51
  }
52
 
53
  # Default configuration for unknown roles
app/core/mongodb_rag.py ADDED
@@ -0,0 +1,378 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MongoDB Atlas Vector Search RAG Service with agent-specific collections.
3
+
4
+ Each agent role has its own collection with specialized examples:
5
+ - Product Owner: PRDs, user stories, acceptance criteria
6
+ - Business Analyst: BRDs, process flows
7
+ - Solution Architect: System designs, ADRs
8
+ - etc.
9
+
10
+ This enables more relevant RAG retrieval per agent specialty.
11
+ """
12
+
13
+ import os
14
+ from typing import Any
15
+
16
+ from langchain_core.documents import Document
17
+ from langchain_core.retrievers import BaseRetriever
18
+ from langchain_core.vectorstores import VectorStore
19
+
20
+ from .llm_factory import get_embeddings_model
21
+ from .observability import get_logger
22
+ from .schemas import TeamRole
23
+
24
+ logger = get_logger("mongodb_rag")
25
+
26
+
27
+ # Mapping from TeamRole to MongoDB collection name
28
+ # Coordinator roles (PROJECT_REFINER, SPEC_COORDINATOR, JUDGE) don't need RAG
29
+ # as they synthesize from other agent outputs
30
+ ROLE_COLLECTION_MAP: dict[TeamRole, str | None] = {
31
+ TeamRole.PRODUCT_OWNER: "rag_product_owner",
32
+ TeamRole.BUSINESS_ANALYST: "rag_business_analyst",
33
+ TeamRole.SOLUTION_ARCHITECT: "rag_solution_architect",
34
+ TeamRole.DATA_ARCHITECT: "rag_data_architect",
35
+ TeamRole.SECURITY_ANALYST: "rag_security_analyst",
36
+ TeamRole.UX_DESIGNER: "rag_ux_designer",
37
+ TeamRole.API_DESIGNER: "rag_api_designer",
38
+ TeamRole.QA_STRATEGIST: "rag_qa_strategist",
39
+ TeamRole.DEVOPS_ARCHITECT: "rag_devops_architect",
40
+ TeamRole.ENVIRONMENT_ENGINEER: "rag_environment_engineer",
41
+ TeamRole.TECHNICAL_WRITER: "rag_technical_writer",
42
+ # Coordinator roles - no RAG needed
43
+ TeamRole.PROJECT_REFINER: None,
44
+ TeamRole.SPEC_COORDINATOR: None,
45
+ TeamRole.JUDGE: None,
46
+ }
47
+
48
+ # All collection names for setup/seeding scripts
49
+ ALL_RAG_COLLECTIONS = [
50
+ name for name in ROLE_COLLECTION_MAP.values() if name is not None
51
+ ]
52
+
53
+
54
+ class MongoDBRAGService:
55
+ """
56
+ RAG Service with MongoDB Atlas Vector Search for agent-specific retrieval.
57
+
58
+ Features:
59
+ - Agent-specific collections for specialized examples
60
+ - Lazy initialization of vector stores
61
+ - Health check for connection monitoring
62
+ - Document management (add/delete)
63
+ """
64
+
65
+ def __init__(self):
66
+ self.embeddings = get_embeddings_model()
67
+ self._client: Any | None = None # MongoClient
68
+ self._db_name: str = os.getenv("MONGODB_DATABASE", "specs_before_code")
69
+ self._index_name: str = os.getenv("MONGODB_INDEX_NAME", "vector_index")
70
+ self._vector_stores: dict[TeamRole, VectorStore] = {}
71
+ self._initialized = False
72
+ self._initialize_connection()
73
+
74
+ def _initialize_connection(self) -> None:
75
+ """Initialize MongoDB connection."""
76
+ uri = os.getenv("MONGODB_URI")
77
+ if not uri:
78
+ logger.warning("MONGODB_URI not set - MongoDB RAG disabled")
79
+ return
80
+
81
+ try:
82
+ from pymongo import MongoClient
83
+
84
+ self._client = MongoClient(
85
+ uri,
86
+ maxPoolSize=10,
87
+ minPoolSize=2,
88
+ maxIdleTimeMS=30000,
89
+ serverSelectionTimeoutMS=5000,
90
+ )
91
+ # Verify connection
92
+ self._client.admin.command("ping")
93
+ self._initialized = True
94
+ logger.info(
95
+ "Connected to MongoDB Atlas",
96
+ data={"database": self._db_name},
97
+ )
98
+ except ImportError:
99
+ logger.error("pymongo not installed - run: uv add pymongo")
100
+ self._client = None
101
+ except Exception as e:
102
+ logger.error(f"Failed to connect to MongoDB: {e}")
103
+ self._client = None
104
+
105
+ def _get_collection(self, role: TeamRole) -> Any | None:
106
+ """Get MongoDB collection for a specific agent role."""
107
+ if not self._client:
108
+ return None
109
+
110
+ collection_name = ROLE_COLLECTION_MAP.get(role)
111
+ if not collection_name:
112
+ logger.debug(f"No RAG collection mapped for role: {role.value}")
113
+ return None
114
+
115
+ return self._client[self._db_name][collection_name]
116
+
117
+ def _get_vector_store(self, role: TeamRole) -> VectorStore | None:
118
+ """Get or create vector store for a specific agent role (lazy init)."""
119
+ # Return cached store if available
120
+ if role in self._vector_stores:
121
+ return self._vector_stores[role]
122
+
123
+ collection = self._get_collection(role)
124
+ if collection is None:
125
+ return None
126
+
127
+ try:
128
+ from langchain_mongodb import MongoDBAtlasVectorSearch
129
+
130
+ vector_store = MongoDBAtlasVectorSearch(
131
+ collection=collection,
132
+ embedding=self.embeddings,
133
+ index_name=self._index_name,
134
+ text_key="content",
135
+ embedding_key="embedding",
136
+ )
137
+
138
+ self._vector_stores[role] = vector_store
139
+ logger.debug(f"Initialized vector store for {role.value}")
140
+ return vector_store
141
+
142
+ except ImportError:
143
+ logger.error(
144
+ "langchain-mongodb not installed - run: uv add langchain-mongodb"
145
+ )
146
+ return None
147
+ except Exception as e:
148
+ logger.error(f"Error creating vector store for {role.value}: {e}")
149
+ return None
150
+
151
+ def retrieve(
152
+ self,
153
+ query: str,
154
+ role: TeamRole,
155
+ k: int = 3,
156
+ ) -> list[Document]:
157
+ """
158
+ Retrieve relevant documents for a specific agent role.
159
+
160
+ Args:
161
+ query: The search query (usually project description or context)
162
+ role: The agent role to retrieve examples for
163
+ k: Number of documents to retrieve (default: 3)
164
+
165
+ Returns:
166
+ List of relevant Document objects, empty if no matches or error
167
+ """
168
+ vector_store = self._get_vector_store(role)
169
+ if not vector_store:
170
+ logger.debug(f"No vector store available for role {role.value}")
171
+ return []
172
+
173
+ try:
174
+ docs = vector_store.similarity_search(query, k=k)
175
+ logger.info(
176
+ f"Retrieved {len(docs)} docs for {role.value}",
177
+ data={"role": role.value, "count": len(docs)},
178
+ )
179
+ return docs
180
+ except Exception as e:
181
+ logger.error(f"RAG retrieval error for {role.value}: {e}")
182
+ return []
183
+
184
+ def get_retriever(
185
+ self,
186
+ role: TeamRole,
187
+ k: int = 3,
188
+ search_type: str = "similarity",
189
+ ) -> BaseRetriever | None:
190
+ """
191
+ Get a LangChain retriever for a specific agent role.
192
+
193
+ Useful for LCEL chain composition with RunnablePassthrough.
194
+
195
+ Args:
196
+ role: The agent role for role-specific retrieval
197
+ k: Number of documents to retrieve
198
+ search_type: Type of search ("similarity" or "mmr")
199
+
200
+ Returns:
201
+ LangChain BaseRetriever or None if unavailable
202
+ """
203
+ vector_store = self._get_vector_store(role)
204
+ if not vector_store:
205
+ return None
206
+
207
+ return vector_store.as_retriever(
208
+ search_type=search_type,
209
+ search_kwargs={"k": k},
210
+ )
211
+
212
+ def format_docs(self, docs: list[Document]) -> str:
213
+ """
214
+ Format retrieved documents for prompt injection.
215
+
216
+ Args:
217
+ docs: List of retrieved documents
218
+
219
+ Returns:
220
+ Formatted string with examples, or message if empty
221
+ """
222
+ if not docs:
223
+ return "No relevant examples found in knowledge base."
224
+
225
+ formatted = []
226
+ for i, doc in enumerate(docs, 1):
227
+ source = doc.metadata.get("source", "Unknown")
228
+ role = doc.metadata.get("role", "")
229
+
230
+ header = f"### Example {i}"
231
+ if source != "Unknown":
232
+ header += f" (Source: {source})"
233
+ if role:
234
+ header += f" [{role}]"
235
+
236
+ formatted.append(f"{header}\n{doc.page_content}")
237
+
238
+ return "\n\n---\n\n".join(formatted)
239
+
240
+ async def add_documents(
241
+ self,
242
+ documents: list[Document],
243
+ role: TeamRole,
244
+ ) -> list[str]:
245
+ """
246
+ Add documents to an agent's RAG collection.
247
+
248
+ Args:
249
+ documents: Documents to add (will be embedded)
250
+ role: Target agent role (determines collection)
251
+
252
+ Returns:
253
+ List of inserted document IDs
254
+
255
+ Raises:
256
+ ValueError: If no collection exists for the role
257
+ """
258
+ vector_store = self._get_vector_store(role)
259
+ if not vector_store:
260
+ raise ValueError(f"No RAG collection for role {role.value}")
261
+
262
+ # Add role metadata to all documents
263
+ for doc in documents:
264
+ doc.metadata["role"] = role.value
265
+
266
+ try:
267
+ ids = await vector_store.aadd_documents(documents)
268
+ logger.info(
269
+ f"Added {len(ids)} documents to {role.value}",
270
+ data={"role": role.value, "count": len(ids)},
271
+ )
272
+ return ids
273
+ except Exception as e:
274
+ logger.error(f"Error adding documents for {role.value}: {e}")
275
+ raise
276
+
277
+ async def delete_documents(
278
+ self,
279
+ ids: list[str],
280
+ role: TeamRole,
281
+ ) -> bool:
282
+ """
283
+ Delete documents from an agent's collection by ID.
284
+
285
+ Args:
286
+ ids: Document IDs to delete
287
+ role: Agent role (determines collection)
288
+
289
+ Returns:
290
+ True if successful, False otherwise
291
+ """
292
+ vector_store = self._get_vector_store(role)
293
+ if not vector_store:
294
+ return False
295
+
296
+ try:
297
+ if hasattr(vector_store, "adelete"):
298
+ await vector_store.adelete(ids=ids)
299
+ elif hasattr(vector_store, "delete"):
300
+ vector_store.delete(ids=ids)
301
+ else:
302
+ logger.warning("Vector store does not support deletion")
303
+ return False
304
+ return True
305
+ except Exception as e:
306
+ logger.error(f"Error deleting documents for {role.value}: {e}")
307
+ return False
308
+
309
+ def health_check(self) -> dict[str, Any]:
310
+ """
311
+ Return health status of MongoDB connection.
312
+
313
+ Returns:
314
+ Dict with status, database name, and collection info
315
+ """
316
+ if not self._client:
317
+ return {
318
+ "status": "disconnected",
319
+ "message": "MONGODB_URI not configured or connection failed",
320
+ }
321
+
322
+ try:
323
+ self._client.admin.command("ping")
324
+
325
+ # Get collection stats
326
+ db = self._client[self._db_name]
327
+ existing_collections = set(db.list_collection_names())
328
+ configured_collections = [c for c in ALL_RAG_COLLECTIONS if c]
329
+
330
+ return {
331
+ "status": "connected",
332
+ "database": self._db_name,
333
+ "index_name": self._index_name,
334
+ "configured_collections": configured_collections,
335
+ "existing_collections": [
336
+ c for c in configured_collections if c in existing_collections
337
+ ],
338
+ "missing_collections": [
339
+ c for c in configured_collections if c not in existing_collections
340
+ ],
341
+ }
342
+ except Exception as e:
343
+ return {
344
+ "status": "error",
345
+ "message": str(e),
346
+ }
347
+
348
+ def is_available(self) -> bool:
349
+ """Check if MongoDB RAG is available and connected."""
350
+ return self._initialized and self._client is not None
351
+
352
+ def get_roles_with_rag(self) -> list[TeamRole]:
353
+ """Get list of roles that have RAG collections configured."""
354
+ return [role for role, coll in ROLE_COLLECTION_MAP.items() if coll is not None]
355
+
356
+
357
+ # Singleton instance
358
+ _mongodb_rag_service: MongoDBRAGService | None = None
359
+
360
+
361
+ def get_mongodb_rag_service() -> MongoDBRAGService:
362
+ """
363
+ Get singleton instance of MongoDB RAG service.
364
+
365
+ Uses module-level singleton for connection reuse.
366
+ """
367
+ global _mongodb_rag_service
368
+ if _mongodb_rag_service is None:
369
+ _mongodb_rag_service = MongoDBRAGService()
370
+ return _mongodb_rag_service
371
+
372
+
373
+ def reset_mongodb_rag_service() -> None:
374
+ """Reset the singleton (useful for testing)."""
375
+ global _mongodb_rag_service
376
+ if _mongodb_rag_service and _mongodb_rag_service._client:
377
+ _mongodb_rag_service._client.close()
378
+ _mongodb_rag_service = None
app/core/orchestrator.py CHANGED
@@ -102,40 +102,112 @@ class Orchestrator:
102
  ) -> AsyncIterator[dict[str, Any]]:
103
  """
104
  Run the pipeline with streaming output for each agent.
 
 
 
 
 
 
 
 
 
 
 
105
  """
106
- # Build initial context similarly
107
  initial_context = f"Project Description: {project_request.description}"
108
 
109
- # Gather RAG context first
110
  yield {"type": "status", "message": "Starting Multi-Agent Pipeline..."}
111
 
 
 
 
112
  yield {
113
- "type": "status",
114
- "message": "Pipeline execution started. Please wait for agents...",
 
115
  }
116
 
117
- async for event in self.graph.astream(
118
- {
119
- "context": initial_context,
120
- "retrieval_context": "",
121
- "history": [],
122
- "outputs": {},
123
- "current_role": "",
124
- "feedback": "",
125
- "retry_count": 0,
126
- "judge_results": {},
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  }
128
- ):
129
- # Inspect event to see which node finished
130
- for key, value in event.items():
131
- if key in [r.value for r in TeamRole]:
132
- yield {
133
- "type": "agent_complete",
134
- "role": key,
135
- "content_length": len(value.get("outputs", {}).get(key, "")),
136
- }
137
-
138
- yield {"type": "pipeline_complete", "markdown_outputs": {}, "judge_results": {}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  def _build_graph(self):
141
  graph = StateGraph(AgentState)
@@ -256,11 +328,6 @@ class Orchestrator:
256
  graph.add_edge(source, judge)
257
 
258
  # Determine success destination
259
- # If multiple next_nodes, we assume they are handled by a single 'gate' or we fan out.
260
- # Here we assume the input `next_nodes` leads to a single logical step (like a Gate)
261
- # OR we are just passing the first one if logic dictates.
262
- # In our graph, we route to Gates mostly.
263
-
264
  success_dest = get_dest_value(next_nodes[0])
265
 
266
  graph.add_conditional_edges(
 
102
  ) -> AsyncIterator[dict[str, Any]]:
103
  """
104
  Run the pipeline with streaming output for each agent.
105
+
106
+ Emits SSE events:
107
+ - status: General status message
108
+ - context_ready: RAG context has been gathered
109
+ - agent_start: An agent has started processing
110
+ - chunk: A chunk of content from streaming
111
+ - agent_complete: An agent has finished
112
+ - judge_start: Judge evaluation started
113
+ - judge_complete: Judge evaluation finished
114
+ - pipeline_complete: Full pipeline finished with results
115
+ - error: Error occurred
116
  """
 
117
  initial_context = f"Project Description: {project_request.description}"
118
 
 
119
  yield {"type": "status", "message": "Starting Multi-Agent Pipeline..."}
120
 
121
+ query = initial_context
122
+ docs = self.rag_service.retrieve(query, k=3)
123
+ retrieval_context = self.rag_service.format_docs(docs)
124
  yield {
125
+ "type": "context_ready",
126
+ "message": f"Retrieved {len(docs)} context documents",
127
+ "docs_count": len(docs),
128
  }
129
 
130
+ all_outputs: dict[str, str] = {}
131
+ all_judge_results: dict[str, dict[str, Any]] = {}
132
+ history: list[AgentResponse] = []
133
+
134
+ execution_order = [
135
+ TeamRole.PROJECT_REFINER,
136
+ TeamRole.PRODUCT_OWNER,
137
+ TeamRole.BUSINESS_ANALYST,
138
+ TeamRole.SOLUTION_ARCHITECT,
139
+ TeamRole.DATA_ARCHITECT,
140
+ TeamRole.SECURITY_ANALYST,
141
+ TeamRole.UX_DESIGNER,
142
+ TeamRole.API_DESIGNER,
143
+ TeamRole.QA_STRATEGIST,
144
+ TeamRole.DEVOPS_ARCHITECT,
145
+ TeamRole.ENVIRONMENT_ENGINEER,
146
+ TeamRole.TECHNICAL_WRITER,
147
+ TeamRole.SPEC_COORDINATOR,
148
+ ]
149
+
150
+ for role in execution_order:
151
+ yield {"type": "agent_start", "role": role.value}
152
+
153
+ deps = AGENT_DEPENDENCIES.get(role, [])
154
+ filtered_history = []
155
+
156
+ if deps == ["*"]:
157
+ filtered_history = history
158
+ else:
159
+ core_roles = [TeamRole.PROJECT_REFINER, TeamRole.PRODUCT_OWNER]
160
+ allowed_roles = set(
161
+ [d.value for d in deps] + [c.value for c in core_roles]
162
+ )
163
+ for msg in history:
164
+ if msg.role.value in allowed_roles:
165
+ filtered_history.append(msg)
166
+
167
+ response = await self.agent_system.process_step(
168
+ role=role,
169
+ context=initial_context,
170
+ previous_outputs=filtered_history,
171
+ feedback="",
172
+ retrieval_context=retrieval_context,
173
+ )
174
+
175
+ history.append(response)
176
+ all_outputs[role.value] = response.content
177
+
178
+ yield {
179
+ "type": "agent_complete",
180
+ "role": role.value,
181
+ "content_length": len(response.content),
182
  }
183
+
184
+ if role in self.judged_roles:
185
+ yield {"type": "judge_start", "role": role.value}
186
+
187
+ judge_output = await self.agent_system.evaluate_step(
188
+ role=role, content=response.content, context=initial_context
189
+ )
190
+
191
+ all_judge_results[role.value] = {
192
+ "is_approved": judge_output.is_approved,
193
+ "score": judge_output.score,
194
+ "issues_count": len(judge_output.issues),
195
+ "recommended_action": judge_output.recommended_action,
196
+ "feedback": judge_output.feedback,
197
+ }
198
+
199
+ yield {
200
+ "type": "judge_complete",
201
+ "role": role.value,
202
+ "is_approved": judge_output.is_approved,
203
+ "score": judge_output.score,
204
+ }
205
+
206
+ yield {
207
+ "type": "pipeline_complete",
208
+ "markdown_outputs": all_outputs,
209
+ "judge_results": all_judge_results,
210
+ }
211
 
212
  def _build_graph(self):
213
  graph = StateGraph(AgentState)
 
328
  graph.add_edge(source, judge)
329
 
330
  # Determine success destination
 
 
 
 
 
331
  success_dest = get_dest_value(next_nodes[0])
332
 
333
  graph.add_conditional_edges(
app/core/rag.py CHANGED
@@ -1,10 +1,14 @@
1
  """
2
  RAG (Retrieval-Augmented Generation) Service.
 
 
 
 
3
  Features:
4
- - Pinecone vectorstore for production (with fallback to in-memory)
 
5
  - LangChain Retriever interface for RAG chains
6
  - Document ingestion from corpus directory
7
- - Embedding caching via Redis (optional)
8
  """
9
 
10
  import os
@@ -18,95 +22,78 @@ from langchain_core.vectorstores import InMemoryVectorStore, VectorStore
18
  from langchain_text_splitters import RecursiveCharacterTextSplitter
19
 
20
  from .llm_factory import get_embeddings_model
 
21
  from .observability import get_logger
 
22
 
23
  load_dotenv()
24
  logger = get_logger("rag")
 
25
  # Define paths
26
  BASE_DIR = Path(__file__).resolve().parents[2]
27
  CORPUS_DIR = BASE_DIR / "corpus_rag"
28
  VECTOR_STORE_PATH = BASE_DIR / "public" / "vector_store"
 
29
  # Error messages
30
  ERR_VECTOR_STORE_NOT_INIT = "Vector store not initialized"
31
 
32
 
33
  class RAGService:
34
  """
35
- RAG Service with Pinecone vectorstore integration.
36
- Supports:
37
- - Pinecone for production (requires PINECONE_API_KEY and PINECONE_INDEX)
38
- - In-memory vectorstore for local development
39
- - LangChain Retriever interface for LCEL chains
 
 
 
 
40
  """
41
 
42
  def __init__(self):
43
  self.embeddings = get_embeddings_model()
44
- self.vector_store: VectorStore | None = None
45
- self._initialize_vector_store()
46
-
47
- def _initialize_vector_store(self):
48
- """Initialize vectorstore with Pinecone or fallback."""
49
- pinecone_api_key = os.getenv("PINECONE_API_KEY")
50
- pinecone_index = os.getenv("PINECONE_INDEX")
51
- if pinecone_api_key and pinecone_index:
52
- self._init_pinecone(pinecone_api_key, pinecone_index)
53
- else:
54
- self._init_fallback()
55
-
56
- def _init_pinecone(self, api_key: str, index_name: str):
57
- """Initialize Pinecone vectorstore."""
58
- try:
59
- from langchain_pinecone import PineconeVectorStore
60
- from pinecone import Pinecone
61
-
62
- logger.info("Initializing Pinecone vectorstore", data={"index": index_name})
63
- # Initialize Pinecone client
64
- pc = Pinecone(api_key=api_key)
65
- # Get the index
66
- index = pc.Index(index_name)
67
- # Create LangChain vectorstore
68
- self.vector_store = PineconeVectorStore(
69
- index=index,
70
- embedding=self.embeddings,
71
- text_key="page_content",
72
- )
73
- logger.info("Pinecone vectorstore initialized successfully")
74
- except ImportError as e:
75
- logger.warning(f"Pinecone not installed: {e}. Using fallback.")
76
- self._init_fallback()
77
- except Exception as e:
78
- logger.error(f"Failed to initialize Pinecone: {e}. Using fallback.")
79
- self._init_fallback()
80
-
81
- def _init_fallback(self):
82
- """Initialize fallback in-memory vectorstore."""
83
- logger.info("Using in-memory vectorstore (development mode)")
84
- # In-memory store doesn't persist to disk in this simplified version
85
- # to avoid dependency on custom pickling logic from rag_simple.
86
- # It simply rebuilds from corpus on startup.
87
- self._build_vector_store()
88
-
89
- def _build_vector_store(self):
90
- """Build vectorstore from corpus documents."""
91
  if not CORPUS_DIR.exists():
92
  logger.warning(f"Corpus directory not found: {CORPUS_DIR}")
93
- self._create_empty_store()
94
  return
95
 
96
- # Load documents
97
  documents = self._load_documents()
98
-
99
  if not documents:
100
- logger.warning("No documents found to ingest")
101
- self._create_empty_store()
102
  return
103
 
104
- # Split documents into chunks
105
  chunks = self._split_documents(documents)
106
  logger.info(f"Created {len(chunks)} chunks from {len(documents)} documents")
107
 
108
- # Create vector store
109
- self._create_store_from_documents(chunks)
 
 
 
110
 
111
  def _load_documents(self) -> list[Document]:
112
  """Load documents from corpus directory."""
@@ -139,60 +126,91 @@ class RAGService:
139
  )
140
  return text_splitter.split_documents(documents)
141
 
142
- def _create_empty_store(self):
143
- """Create an empty in-memory vectorstore."""
144
- self.vector_store = InMemoryVectorStore(embedding=self.embeddings)
145
- logger.info("Created empty in-memory vectorstore")
 
 
 
 
146
 
147
- def _create_store_from_documents(self, documents: list[Document]):
148
- """Create vectorstore from documents."""
149
- self.vector_store = InMemoryVectorStore.from_documents(
150
- documents=documents,
151
- embedding=self.embeddings,
152
- )
153
- logger.info("Created in-memory vectorstore from documents")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
154
 
155
  def get_retriever(
156
  self,
 
157
  k: int = 3,
158
  filter: dict[str, Any] | None = None,
159
  search_type: str = "similarity",
160
- ) -> BaseRetriever:
161
  """
162
  Get a LangChain Retriever for RAG chains.
163
- """
164
- if not self.vector_store:
165
- raise RuntimeError(ERR_VECTOR_STORE_NOT_INIT)
166
 
167
- search_kwargs = {"k": k}
168
- if filter:
169
- search_kwargs["filter"] = filter
 
 
170
 
171
- return self.vector_store.as_retriever(
172
- search_type=search_type,
173
- search_kwargs=search_kwargs,
174
- )
175
-
176
- def retrieve(self, query: str, k: int = 3) -> list[Document]:
177
- """
178
- Retrieve relevant documents for a query.
179
  """
180
- if not self.vector_store:
181
- logger.warning("Vector store not initialized")
182
- return []
183
-
184
- try:
185
- docs = self.vector_store.similarity_search(query, k=k)
186
- logger.debug(f"Retrieved {len(docs)} documents for query")
187
- return docs
188
- except Exception as e:
189
- logger.error(f"Error during retrieval: {e}")
190
- return []
 
 
 
 
 
 
 
191
 
192
  def format_docs(self, docs: list[Document]) -> str:
193
  """
194
  Format retrieved documents into a string for context injection.
 
 
 
195
  """
 
 
 
196
  if not docs:
197
  return "No relevant context found."
198
 
@@ -205,36 +223,117 @@ class RAGService:
205
  async def add_documents(
206
  self,
207
  documents: list[Document],
 
208
  ids: list[str] | None = None,
209
  ) -> list[str]:
210
  """
211
  Add documents to the vectorstore.
 
 
 
 
 
 
 
 
212
  """
213
- if not self.vector_store:
214
- raise RuntimeError(ERR_VECTOR_STORE_NOT_INIT)
 
 
 
 
 
 
 
 
215
 
216
- # Use add_documents if available (async support varies by store)
217
- if hasattr(self.vector_store, "aadd_documents"):
218
- return await self.vector_store.aadd_documents(documents, ids=ids)
219
- else:
220
- return self.vector_store.add_documents(documents, ids=ids)
221
 
222
- async def delete_documents(self, ids: list[str]) -> bool:
 
 
 
 
223
  """
224
  Delete documents from the vectorstore by ID.
 
 
 
 
 
 
 
225
  """
226
- if not self.vector_store:
227
- raise RuntimeError(ERR_VECTOR_STORE_NOT_INIT)
228
-
229
- try:
230
- if hasattr(self.vector_store, "adelete"):
231
- await self.vector_store.adelete(ids=ids)
232
- elif hasattr(self.vector_store, "delete"):
233
- self.vector_store.delete(ids=ids)
234
- else:
235
- logger.warning("Vectorstore does not support deletion")
 
 
 
 
 
 
 
236
  return False
237
- return True
238
- except Exception as e:
239
- logger.error(f"Error deleting documents: {e}")
240
- return False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  """
2
  RAG (Retrieval-Augmented Generation) Service.
3
+
4
+ Unified RAG interface with MongoDB Atlas as primary and in-memory fallback.
5
+ Supports both role-specific retrieval (via MongoDB) and generic retrieval.
6
+
7
  Features:
8
+ - MongoDB Atlas Vector Search for production (agent-specific collections)
9
+ - In-memory vectorstore fallback for local development
10
  - LangChain Retriever interface for RAG chains
11
  - Document ingestion from corpus directory
 
12
  """
13
 
14
  import os
 
22
  from langchain_text_splitters import RecursiveCharacterTextSplitter
23
 
24
  from .llm_factory import get_embeddings_model
25
+ from .mongodb_rag import MongoDBRAGService, get_mongodb_rag_service
26
  from .observability import get_logger
27
+ from .schemas import TeamRole
28
 
29
  load_dotenv()
30
  logger = get_logger("rag")
31
+
32
  # Define paths
33
  BASE_DIR = Path(__file__).resolve().parents[2]
34
  CORPUS_DIR = BASE_DIR / "corpus_rag"
35
  VECTOR_STORE_PATH = BASE_DIR / "public" / "vector_store"
36
+
37
  # Error messages
38
  ERR_VECTOR_STORE_NOT_INIT = "Vector store not initialized"
39
 
40
 
41
  class RAGService:
42
  """
43
+ Unified RAG Service with MongoDB primary and in-memory fallback.
44
+
45
+ Priority order:
46
+ 1. MongoDB Atlas Vector Search (if MONGODB_URI configured)
47
+ 2. In-memory vectorstore (development fallback)
48
+
49
+ For role-specific retrieval, use the `role` parameter in retrieve/get_retriever.
50
+ When role is provided and MongoDB is available, retrieval is from agent-specific
51
+ collections for more relevant examples.
52
  """
53
 
54
  def __init__(self):
55
  self.embeddings = get_embeddings_model()
56
+ self._mongodb_service: MongoDBRAGService | None = None
57
+ self._fallback_store: VectorStore | None = None
58
+ self._initialize()
59
+
60
+ def _initialize(self) -> None:
61
+ """Initialize RAG backends in priority order."""
62
+ # Try MongoDB first
63
+ if os.getenv("MONGODB_URI"):
64
+ self._mongodb_service = get_mongodb_rag_service()
65
+ if self._mongodb_service.is_available():
66
+ logger.info("Using MongoDB Atlas for RAG (primary)")
67
+ # Still initialize fallback for non-role-specific queries
68
+ self._init_fallback_store()
69
+ return
70
+
71
+ # Fallback to in-memory only
72
+ logger.info("Using in-memory vector store only (MongoDB unavailable)")
73
+ self._init_fallback_store()
74
+
75
+ def _init_fallback_store(self) -> None:
76
+ """Initialize fallback in-memory vectorstore from corpus."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  if not CORPUS_DIR.exists():
78
  logger.warning(f"Corpus directory not found: {CORPUS_DIR}")
79
+ self._fallback_store = InMemoryVectorStore(embedding=self.embeddings)
80
  return
81
 
82
+ # Load and split documents
83
  documents = self._load_documents()
 
84
  if not documents:
85
+ logger.warning("No documents found for fallback store")
86
+ self._fallback_store = InMemoryVectorStore(embedding=self.embeddings)
87
  return
88
 
 
89
  chunks = self._split_documents(documents)
90
  logger.info(f"Created {len(chunks)} chunks from {len(documents)} documents")
91
 
92
+ self._fallback_store = InMemoryVectorStore.from_documents(
93
+ documents=chunks,
94
+ embedding=self.embeddings,
95
+ )
96
+ logger.info("Fallback in-memory vectorstore initialized")
97
 
98
  def _load_documents(self) -> list[Document]:
99
  """Load documents from corpus directory."""
 
126
  )
127
  return text_splitter.split_documents(documents)
128
 
129
+ def retrieve(
130
+ self,
131
+ query: str,
132
+ role: TeamRole | None = None,
133
+ k: int = 3,
134
+ ) -> list[Document]:
135
+ """
136
+ Retrieve relevant documents for a query.
137
 
138
+ Args:
139
+ query: Search query (project description or context)
140
+ role: Optional agent role for role-specific retrieval (MongoDB only)
141
+ k: Number of documents to retrieve
142
+
143
+ Returns:
144
+ List of relevant documents
145
+ """
146
+ # Use MongoDB for role-specific retrieval if available
147
+ if self._mongodb_service and self._mongodb_service.is_available() and role:
148
+ docs = self._mongodb_service.retrieve(query, role, k)
149
+ if docs:
150
+ return docs
151
+ # Fall through to fallback if no MongoDB results
152
+
153
+ # Fallback retrieval
154
+ if self._fallback_store:
155
+ try:
156
+ docs = self._fallback_store.similarity_search(query, k=k)
157
+ logger.debug(f"Retrieved {len(docs)} documents from fallback store")
158
+ return docs
159
+ except Exception as e:
160
+ logger.error(f"Error during fallback retrieval: {e}")
161
+ return []
162
+
163
+ logger.warning("No vector store available for retrieval")
164
+ return []
165
 
166
  def get_retriever(
167
  self,
168
+ role: TeamRole | None = None,
169
  k: int = 3,
170
  filter: dict[str, Any] | None = None,
171
  search_type: str = "similarity",
172
+ ) -> BaseRetriever | None:
173
  """
174
  Get a LangChain Retriever for RAG chains.
 
 
 
175
 
176
+ Args:
177
+ role: Optional agent role for role-specific retrieval
178
+ k: Number of documents to retrieve
179
+ filter: Optional filter dict (fallback store only)
180
+ search_type: "similarity" or "mmr"
181
 
182
+ Returns:
183
+ LangChain BaseRetriever or None
 
 
 
 
 
 
184
  """
185
+ # Use MongoDB for role-specific retrieval if available
186
+ if self._mongodb_service and self._mongodb_service.is_available() and role:
187
+ retriever = self._mongodb_service.get_retriever(role, k, search_type)
188
+ if retriever:
189
+ return retriever
190
+
191
+ # Fallback retriever
192
+ if self._fallback_store:
193
+ search_kwargs: dict[str, Any] = {"k": k}
194
+ if filter:
195
+ search_kwargs["filter"] = filter
196
+
197
+ return self._fallback_store.as_retriever(
198
+ search_type=search_type,
199
+ search_kwargs=search_kwargs,
200
+ )
201
+
202
+ return None
203
 
204
  def format_docs(self, docs: list[Document]) -> str:
205
  """
206
  Format retrieved documents into a string for context injection.
207
+
208
+ Uses MongoDB service formatter if available (includes role metadata),
209
+ otherwise uses simple formatting.
210
  """
211
+ if self._mongodb_service and self._mongodb_service.is_available():
212
+ return self._mongodb_service.format_docs(docs)
213
+
214
  if not docs:
215
  return "No relevant context found."
216
 
 
223
  async def add_documents(
224
  self,
225
  documents: list[Document],
226
+ role: TeamRole | None = None,
227
  ids: list[str] | None = None,
228
  ) -> list[str]:
229
  """
230
  Add documents to the vectorstore.
231
+
232
+ Args:
233
+ documents: Documents to add
234
+ role: Agent role (required for MongoDB, determines collection)
235
+ ids: Optional document IDs
236
+
237
+ Returns:
238
+ List of document IDs
239
  """
240
+ # Add to MongoDB if role specified and available
241
+ if self._mongodb_service and self._mongodb_service.is_available() and role:
242
+ return await self._mongodb_service.add_documents(documents, role)
243
+
244
+ # Add to fallback store
245
+ if self._fallback_store:
246
+ if hasattr(self._fallback_store, "aadd_documents"):
247
+ return await self._fallback_store.aadd_documents(documents, ids=ids)
248
+ else:
249
+ return self._fallback_store.add_documents(documents, ids=ids)
250
 
251
+ raise RuntimeError(ERR_VECTOR_STORE_NOT_INIT)
 
 
 
 
252
 
253
+ async def delete_documents(
254
+ self,
255
+ ids: list[str],
256
+ role: TeamRole | None = None,
257
+ ) -> bool:
258
  """
259
  Delete documents from the vectorstore by ID.
260
+
261
+ Args:
262
+ ids: Document IDs to delete
263
+ role: Agent role (required for MongoDB deletion)
264
+
265
+ Returns:
266
+ True if successful
267
  """
268
+ # Delete from MongoDB if role specified and available
269
+ if self._mongodb_service and self._mongodb_service.is_available() and role:
270
+ return await self._mongodb_service.delete_documents(ids, role)
271
+
272
+ # Delete from fallback store
273
+ if self._fallback_store:
274
+ try:
275
+ if hasattr(self._fallback_store, "adelete"):
276
+ await self._fallback_store.adelete(ids=ids)
277
+ elif hasattr(self._fallback_store, "delete"):
278
+ self._fallback_store.delete(ids=ids)
279
+ else:
280
+ logger.warning("Fallback store does not support deletion")
281
+ return False
282
+ return True
283
+ except Exception as e:
284
+ logger.error(f"Error deleting documents: {e}")
285
  return False
286
+
287
+ return False
288
+
289
+ def health_check(self) -> dict[str, Any]:
290
+ """
291
+ Return health status of RAG service.
292
+
293
+ Returns:
294
+ Dict with status and backend information
295
+ """
296
+ result: dict[str, Any] = {
297
+ "fallback_store_initialized": self._fallback_store is not None,
298
+ }
299
+
300
+ if self._mongodb_service:
301
+ result["mongodb"] = self._mongodb_service.health_check()
302
+ result["primary_backend"] = (
303
+ "mongodb" if self._mongodb_service.is_available() else "fallback"
304
+ )
305
+ else:
306
+ result["mongodb"] = {"status": "not_configured"}
307
+ result["primary_backend"] = "fallback"
308
+
309
+ return result
310
+
311
+ def is_mongodb_available(self) -> bool:
312
+ """Check if MongoDB RAG backend is available."""
313
+ return (
314
+ self._mongodb_service is not None and self._mongodb_service.is_available()
315
+ )
316
+
317
+ def get_roles_with_rag(self) -> list[TeamRole]:
318
+ """Get list of agent roles that have RAG collections configured."""
319
+ if self._mongodb_service:
320
+ return self._mongodb_service.get_roles_with_rag()
321
+ return []
322
+
323
+
324
+ # Module-level singleton
325
+ _rag_service: RAGService | None = None
326
+
327
+
328
+ def get_rag_service() -> RAGService:
329
+ """Get singleton RAG service instance."""
330
+ global _rag_service
331
+ if _rag_service is None:
332
+ _rag_service = RAGService()
333
+ return _rag_service
334
+
335
+
336
+ def reset_rag_service() -> None:
337
+ """Reset the RAG service singleton (for testing)."""
338
+ global _rag_service
339
+ _rag_service = None
app/core/schemas.py CHANGED
@@ -44,10 +44,6 @@ class TeamRole(str, Enum):
44
  JUDGE = "judge"
45
 
46
 
47
- # ─────────────────────────────────────────────────────────────────────────────
48
- # Enhanced Judge Output Schema
49
- # ─────────────────────────────────────────────────────────────────────────────
50
-
51
 
52
  class JudgeIssue(BaseModel):
53
  id: str = Field(..., description="Issue ID or related FR/NFR ID")
@@ -69,11 +65,6 @@ class JudgeOutput(BaseModel):
69
  reasoning: str
70
 
71
 
72
- # ─────────────────────────────────────────────────────────────────────────────
73
- # Existing Schemas (unchanged structure, enhanced docs)
74
- # ─────────────────────────────────────────────────────────────────────────────
75
-
76
-
77
  class AgentMessage(BaseModel):
78
  role: TeamRole
79
  content: str
@@ -130,7 +121,7 @@ class TokenData(BaseModel):
130
  class ProjectBase(BaseModel):
131
  title: str
132
  description: str | None = None
133
- artifacts: dict[str, Any] # JSON content
134
 
135
 
136
  class ProjectCreate(ProjectBase):
 
44
  JUDGE = "judge"
45
 
46
 
 
 
 
 
47
 
48
  class JudgeIssue(BaseModel):
49
  id: str = Field(..., description="Issue ID or related FR/NFR ID")
 
65
  reasoning: str
66
 
67
 
 
 
 
 
 
68
  class AgentMessage(BaseModel):
69
  role: TeamRole
70
  content: str
 
121
  class ProjectBase(BaseModel):
122
  title: str
123
  description: str | None = None
124
+ artifacts: dict[str, Any]
125
 
126
 
127
  class ProjectCreate(ProjectBase):
app/prompts/product_owner.md CHANGED
@@ -13,10 +13,12 @@ Great product ownership means translating user needs and business goals into a c
13
  1. Analyze input from the Project Refiner.
14
  2. Define a concise product vision aligned with strategic objectives.
15
  3. Identify and prioritize key features using MoSCoW (Must, Should, Could, Won't).
16
- 4. Write user stories that capture real user goals and benefits (INVEST criteria).
17
  5. Establish clear, testable acceptance criteria for each feature.
18
  6. Document assumptions and open questions.
19
 
 
 
20
  **Output Structure:**
21
  ## MARKDOWN
22
 
@@ -27,20 +29,39 @@ Great product ownership means translating user needs and business goals into a c
27
  ### Must Have (MVP)
28
  - **F1:** [Title] - [Brief description]
29
  - **F2:** [Title] - [Brief description]
 
 
30
 
31
  ### Should Have (Post-MVP)
32
- - **F3:** [Title] - [Brief description]
 
33
 
34
  ## User Stories
35
  1. **US1:** As a [user type], I want [goal] so that [benefit]
36
  - **Acceptance Criteria:**
37
  - [Criterion 1]
38
  - [Criterion 2]
 
 
39
  2. **US2:** As a [user type], I want [goal] so that [benefit]
40
  - **Acceptance Criteria:**
41
  - [Criterion 1]
42
  - [Criterion 2]
43
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  ## Assumptions & Constraints
45
  [List any assumptions made about the project scope or constraints]
46
 
 
13
  1. Analyze input from the Project Refiner.
14
  2. Define a concise product vision aligned with strategic objectives.
15
  3. Identify and prioritize key features using MoSCoW (Must, Should, Could, Won't).
16
+ 4. Write **AT LEAST 4-6 user stories** that capture real user goals and benefits (INVEST criteria).
17
  5. Establish clear, testable acceptance criteria for each feature.
18
  6. Document assumptions and open questions.
19
 
20
+ **IMPORTANT:** Generate AT LEAST 4 user stories (US1, US2, US3, US4 minimum). For MVP scope, aim for 4-6 user stories to provide adequate coverage.
21
+
22
  **Output Structure:**
23
  ## MARKDOWN
24
 
 
29
  ### Must Have (MVP)
30
  - **F1:** [Title] - [Brief description]
31
  - **F2:** [Title] - [Brief description]
32
+ - **F3:** [Title] - [Brief description]
33
+ - **F4:** [Title] - [Brief description]
34
 
35
  ### Should Have (Post-MVP)
36
+ - **F5:** [Title] - [Brief description]
37
+ - **F6:** [Title] - [Brief description]
38
 
39
  ## User Stories
40
  1. **US1:** As a [user type], I want [goal] so that [benefit]
41
  - **Acceptance Criteria:**
42
  - [Criterion 1]
43
  - [Criterion 2]
44
+ - [Criterion 3]
45
+
46
  2. **US2:** As a [user type], I want [goal] so that [benefit]
47
  - **Acceptance Criteria:**
48
  - [Criterion 1]
49
  - [Criterion 2]
50
 
51
+ 3. **US3:** As a [user type], I want [goal] so that [benefit]
52
+ - **Acceptance Criteria:**
53
+ - [Criterion 1]
54
+ - [Criterion 2]
55
+
56
+ 4. **US4:** As a [user type], I want [goal] so that [benefit]
57
+ - **Acceptance Criteria:**
58
+ - [Criterion 1]
59
+ - [Criterion 2]
60
+
61
+ 5. **US5:** As a [user type], I want [goal] so that [benefit]
62
+ - **Acceptance Criteria:**
63
+ - [Criterion 1]
64
+
65
  ## Assumptions & Constraints
66
  [List any assumptions made about the project scope or constraints]
67
 
app/routers/health.py CHANGED
@@ -26,6 +26,14 @@ from app.core.resilience import get_all_circuit_breakers, get_all_request_queues
26
 
27
  load_dotenv()
28
 
 
 
 
 
 
 
 
 
29
  router = APIRouter(prefix="/health", tags=["Health"])
30
 
31
 
@@ -60,15 +68,17 @@ async def readiness_probe():
60
  if not any(check in checks for check in critical_checks):
61
  is_ready = True
62
 
63
- status_code = status.HTTP_200_OK if is_ready else status.HTTP_503_SERVICE_UNAVAILABLE
 
 
64
 
65
  return JSONResponse(
66
  status_code=status_code,
67
  content={
68
  "status": "ready" if is_ready else "not_ready",
69
  "checks": checks,
70
- "timestamp": datetime.now(UTC).isoformat()
71
- }
72
  )
73
 
74
 
@@ -87,24 +97,19 @@ async def detailed_health_check():
87
 
88
  # Get circuit breaker status
89
  circuit_breakers = {
90
- name: cb.get_status()
91
- for name, cb in get_all_circuit_breakers().items()
92
  }
93
 
94
  # Get request queue status
95
  request_queues = {
96
- name: queue.get_status()
97
- for name, queue in get_all_request_queues().items()
98
  }
99
 
100
  # Get provider status
101
  provider_status = get_provider_manager().get_provider_status()
102
 
103
  # Overall status
104
- all_healthy = all(
105
- check.get("status") == "healthy"
106
- for check in checks.values()
107
- )
108
 
109
  return {
110
  "status": "healthy" if all_healthy else "degraded",
@@ -116,15 +121,9 @@ async def detailed_health_check():
116
  "circuit_breakers": circuit_breakers,
117
  "request_queues": request_queues,
118
  "llm_providers": provider_status,
119
- "performance": {
120
- "window_seconds": 300,
121
- "operations": performance_stats
122
- },
123
- "errors": {
124
- "window_seconds": 3600,
125
- "summary": error_summary
126
- },
127
- "cost": cost_stats
128
  }
129
 
130
 
@@ -140,8 +139,7 @@ async def get_metrics():
140
  # Format as Prometheus-style metrics (simplified)
141
  metrics = {
142
  "specsbeforecode_requests_total": sum(
143
- stats.get("count", 0)
144
- for stats in performance_stats.values()
145
  ),
146
  "specsbeforecode_tokens_used_monthly": cost_stats.get("monthly_tokens_used", 0),
147
  "specsbeforecode_budget_remaining": cost_stats.get("budget_remaining", 0),
@@ -152,12 +150,54 @@ async def get_metrics():
152
  for op, stats in performance_stats.items():
153
  safe_op = op.replace(".", "_").replace("-", "_")
154
  metrics[f"specsbeforecode_op_{safe_op}_count"] = stats.get("count", 0)
155
- metrics[f"specsbeforecode_op_{safe_op}_avg_duration_ms"] = stats.get("avg_duration_ms", 0)
156
- metrics[f"specsbeforecode_op_{safe_op}_success_rate"] = stats.get("success_rate", 0)
 
 
 
 
157
 
158
  return metrics
159
 
160
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
161
  async def _run_health_checks() -> dict[str, dict[str, Any]]:
162
  """Run all health checks and return results.
163
 
@@ -191,14 +231,11 @@ def _check_database() -> dict[str, Any]:
191
  with engine.connect() as conn:
192
  conn.execute(text("SELECT 1"))
193
 
194
- return {
195
- "status": "healthy",
196
- "message": "Database connection successful"
197
- }
198
  except Exception as e:
199
  return {
200
  "status": "unhealthy",
201
- "message": f"Database connection failed: {str(e)}"
202
  }
203
 
204
 
@@ -208,10 +245,7 @@ def _check_nvidia_api() -> dict[str, Any]:
208
  base_url = os.getenv("NVIDIA_BASE_URL")
209
 
210
  if not api_key:
211
- return {
212
- "status": "unhealthy",
213
- "message": "NVIDIA_API_KEY not configured"
214
- }
215
 
216
  # Check circuit breaker status instead of making actual API call
217
  circuit_breakers = get_all_circuit_breakers()
@@ -220,13 +254,13 @@ def _check_nvidia_api() -> dict[str, Any]:
220
  if nvidia_cb and nvidia_cb.state.value == "open":
221
  return {
222
  "status": "degraded",
223
- "message": "Circuit breaker is open - service may be experiencing issues"
224
  }
225
 
226
  return {
227
  "status": "healthy",
228
  "message": "NVIDIA API configured",
229
- "base_url": base_url
230
  }
231
 
232
 
@@ -238,12 +272,12 @@ def _check_langsmith() -> dict[str, Any]:
238
  if not api_key:
239
  return {
240
  "status": "degraded",
241
- "message": "LangSmith API key not configured - observability limited"
242
  }
243
 
244
  return {
245
  "status": "healthy",
246
  "message": "LangSmith configured",
247
  "tracing_enabled": tracing,
248
- "project": os.getenv("LANGSMITH_PROJECT", "default")
249
  }
 
26
 
27
  load_dotenv()
28
 
29
+
30
+ # Lazy import for RAG to avoid circular imports
31
+ def _get_rag_service():
32
+ from app.core.rag import get_rag_service
33
+
34
+ return get_rag_service()
35
+
36
+
37
  router = APIRouter(prefix="/health", tags=["Health"])
38
 
39
 
 
68
  if not any(check in checks for check in critical_checks):
69
  is_ready = True
70
 
71
+ status_code = (
72
+ status.HTTP_200_OK if is_ready else status.HTTP_503_SERVICE_UNAVAILABLE
73
+ )
74
 
75
  return JSONResponse(
76
  status_code=status_code,
77
  content={
78
  "status": "ready" if is_ready else "not_ready",
79
  "checks": checks,
80
+ "timestamp": datetime.now(UTC).isoformat(),
81
+ },
82
  )
83
 
84
 
 
97
 
98
  # Get circuit breaker status
99
  circuit_breakers = {
100
+ name: cb.get_status() for name, cb in get_all_circuit_breakers().items()
 
101
  }
102
 
103
  # Get request queue status
104
  request_queues = {
105
+ name: queue.get_status() for name, queue in get_all_request_queues().items()
 
106
  }
107
 
108
  # Get provider status
109
  provider_status = get_provider_manager().get_provider_status()
110
 
111
  # Overall status
112
+ all_healthy = all(check.get("status") == "healthy" for check in checks.values())
 
 
 
113
 
114
  return {
115
  "status": "healthy" if all_healthy else "degraded",
 
121
  "circuit_breakers": circuit_breakers,
122
  "request_queues": request_queues,
123
  "llm_providers": provider_status,
124
+ "performance": {"window_seconds": 300, "operations": performance_stats},
125
+ "errors": {"window_seconds": 3600, "summary": error_summary},
126
+ "cost": cost_stats,
 
 
 
 
 
 
127
  }
128
 
129
 
 
139
  # Format as Prometheus-style metrics (simplified)
140
  metrics = {
141
  "specsbeforecode_requests_total": sum(
142
+ stats.get("count", 0) for stats in performance_stats.values()
 
143
  ),
144
  "specsbeforecode_tokens_used_monthly": cost_stats.get("monthly_tokens_used", 0),
145
  "specsbeforecode_budget_remaining": cost_stats.get("budget_remaining", 0),
 
150
  for op, stats in performance_stats.items():
151
  safe_op = op.replace(".", "_").replace("-", "_")
152
  metrics[f"specsbeforecode_op_{safe_op}_count"] = stats.get("count", 0)
153
+ metrics[f"specsbeforecode_op_{safe_op}_avg_duration_ms"] = stats.get(
154
+ "avg_duration_ms", 0
155
+ )
156
+ metrics[f"specsbeforecode_op_{safe_op}_success_rate"] = stats.get(
157
+ "success_rate", 0
158
+ )
159
 
160
  return metrics
161
 
162
 
163
+ @router.get("/rag")
164
+ async def rag_health_check():
165
+ """
166
+ Check RAG (Retrieval-Augmented Generation) service health.
167
+
168
+ Returns:
169
+ - MongoDB connection status
170
+ - Configured collections
171
+ - Missing collections that need setup
172
+ - Fallback store status
173
+ """
174
+ try:
175
+ rag_service = _get_rag_service()
176
+ health = rag_service.health_check()
177
+
178
+ # Add role information
179
+ roles_with_rag = rag_service.get_roles_with_rag()
180
+ health["roles_with_rag"] = [role.value for role in roles_with_rag]
181
+ health["mongodb_available"] = rag_service.is_mongodb_available()
182
+
183
+ # Determine overall status
184
+ if health.get("mongodb", {}).get("status") == "connected":
185
+ health["status"] = "healthy"
186
+ elif health.get("fallback_store_initialized"):
187
+ health["status"] = "degraded"
188
+ health["message"] = "Using in-memory fallback - MongoDB not available"
189
+ else:
190
+ health["status"] = "unhealthy"
191
+ health["message"] = "No RAG backend available"
192
+
193
+ return health
194
+ except Exception as e:
195
+ return {
196
+ "status": "error",
197
+ "message": str(e),
198
+ }
199
+
200
+
201
  async def _run_health_checks() -> dict[str, dict[str, Any]]:
202
  """Run all health checks and return results.
203
 
 
231
  with engine.connect() as conn:
232
  conn.execute(text("SELECT 1"))
233
 
234
+ return {"status": "healthy", "message": "Database connection successful"}
 
 
 
235
  except Exception as e:
236
  return {
237
  "status": "unhealthy",
238
+ "message": f"Database connection failed: {str(e)}",
239
  }
240
 
241
 
 
245
  base_url = os.getenv("NVIDIA_BASE_URL")
246
 
247
  if not api_key:
248
+ return {"status": "unhealthy", "message": "NVIDIA_API_KEY not configured"}
 
 
 
249
 
250
  # Check circuit breaker status instead of making actual API call
251
  circuit_breakers = get_all_circuit_breakers()
 
254
  if nvidia_cb and nvidia_cb.state.value == "open":
255
  return {
256
  "status": "degraded",
257
+ "message": "Circuit breaker is open - service may be experiencing issues",
258
  }
259
 
260
  return {
261
  "status": "healthy",
262
  "message": "NVIDIA API configured",
263
+ "base_url": base_url,
264
  }
265
 
266
 
 
272
  if not api_key:
273
  return {
274
  "status": "degraded",
275
+ "message": "LangSmith API key not configured - observability limited",
276
  }
277
 
278
  return {
279
  "status": "healthy",
280
  "message": "LangSmith configured",
281
  "tracing_enabled": tracing,
282
+ "project": os.getenv("LANGSMITH_PROJECT", "default"),
283
  }
corpus_rag/RAG_INDEX_DECISION.md ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # RAG Index Decision Log
2
+
3
+ **Date:** February 6, 2026
4
+ **Context:** Testing on MongoDB Atlas Free Tier (M0)
5
+
6
+ ## Decision
7
+
8
+ Due to the free tier's limitation of **3 Atlas Search (FTS) indexes maximum**, only 3 agent collections have active vector search indexes.
9
+
10
+ ### Selected Agents for Indexing
11
+
12
+ | Rank | Agent Role | Rationale |
13
+ |------|------------|-----------|
14
+ | 1 | **Product Owner** | Foundation of all downstream artifacts. PRD quality affects every subsequent phase. |
15
+ | 2 | **Solution Architect** | Critical technical decisions impact feasibility, scalability, and integration across all phases. |
16
+ | 3 | **Technical Writer** | Documentation quality directly affects user-facing output and project success metrics. |
17
+
18
+ ### Excluded Agents
19
+
20
+ | Agent Role | Reason |
21
+ |------------|--------|
22
+ | Business Analyst | Lower priority for MVP testing; business rules can be derived from PO output |
23
+ | Security Analyst | Security patterns can use generic guidance; threat modeling less critical for prototypes |
24
+ | UX Designer | UI patterns are more intuitive; prototyping benefits less from RAG |
25
+ | API Designer | API contracts can be derived from architecture |
26
+ | QA Strategist | Testing approaches are relatively standardized |
27
+ | DevOps Architect | Deployment patterns less critical for initial spec generation |
28
+ | Environment Engineer | Setup guides are procedural and template-based |
29
+ | Data Architect | Data models can be derived from architecture and requirements |
30
+ | API Designer | (already covered) |
31
+
32
+ ## Future Upgrade Path
33
+
34
+ When upgrading to M10+ cluster:
35
+ 1. Enable indexes on all 11 collections
36
+ 2. Update `scripts/setup_mongodb_indexes.py` to remove the index limit logic
37
+ 3. Run seed script again if needed
38
+
39
+ ## Current Index Status
40
+
41
+ ```
42
+ Active Indexes (3):
43
+ - rag_product_owner
44
+ - rag_solution_architect
45
+ - rag_technical_writer
46
+
47
+ Collections without Indexes (8):
48
+ - rag_business_analyst
49
+ - rag_data_architect
50
+ - rag_security_analyst
51
+ - rag_ux_designer
52
+ - rag_api_designer
53
+ - rag_qa_strategist
54
+ - rag_devops_architect
55
+ - rag_environment_engineer
56
+ ```
57
+
58
+ ## Impact on Agents
59
+
60
+ Agents without RAG indexes will fall back to:
61
+ 1. In-memory vector store (if documents exist)
62
+ 2. No retrieval (empty context)
63
+
64
+ For production, all agents should have their own RAG collections indexed.
corpus_rag/api_designer/role_playbook.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Role: API Designer
2
+ Purpose: Define API contracts, endpoints, and data shapes for services.
3
+
4
+ Core responsibilities
5
+ - Design REST/GraphQL endpoints and schemas.
6
+ - Specify request/response formats and error handling.
7
+ - Align with data models and business requirements.
8
+
9
+ Required sections in output
10
+ - API overview and base URL
11
+ - Endpoints with methods and payloads
12
+ - Error model and status codes
13
+ - Authentication and rate limiting notes
14
+
15
+ Quality gates
16
+ - Consistency: aligns with data models and UX needs
17
+ - Completeness: covers all workflows
18
+ - Clarity: unambiguous request/response schemas
corpus_rag/api_designer/standards_quickref.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Standards and frameworks (brief summaries)
2
+
3
+ OpenAPI Specification (OAS)
4
+ Standard for describing REST APIs. Use to produce a machine-readable contract and validation.
5
+
6
+ Microsoft REST API Guidelines
7
+ Conventions for resource naming, filtering, pagination, and errors. Use to standardize endpoints.
8
+
9
+ Google API Design Guide
10
+ Best practices for consistency and long-term evolution. Use for naming and error handling guidance.
corpus_rag/business_analyst/role_playbook.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Role: Business Analyst
2
+ Purpose: Translate product goals into detailed business requirements, workflows, and business rules.
3
+
4
+ Core responsibilities
5
+ - Document workflows and process flows.
6
+ - Capture business rules, exceptions, and edge cases.
7
+ - Clarify functional requirements and constraints.
8
+
9
+ Required sections in output
10
+ - Business objectives and stakeholders
11
+ - Process flows (happy path and exceptions)
12
+ - Business rules and validations
13
+ - Functional requirements list
14
+ - Assumptions, risks, and dependencies
15
+
16
+ Quality gates
17
+ - Completeness: all major workflows covered
18
+ - Consistency: aligns with PRD and data models
19
+ - Traceability: each requirement ties to a goal or story
corpus_rag/business_analyst/standards_quickref.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Standards and frameworks (brief summaries)
2
+
3
+ BABOK (Business Analysis Body of Knowledge)
4
+ Industry standard for eliciting, analyzing, and managing requirements. Use it to structure stakeholder analysis and requirement validation.
5
+
6
+ BPMN 2.0 (Business Process Model and Notation)
7
+ Standard visual language for process flows. Use to represent end-to-end workflows, decision points, and exceptions.
8
+
9
+ ISO/IEC/IEEE 29148
10
+ Defines good requirements characteristics. Use to validate clarity, consistency, and testability.
corpus_rag/data_architect/role_playbook.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Role: Data Architect
2
+ Purpose: Define data models, entities, relationships, and data governance concerns.
3
+
4
+ Core responsibilities
5
+ - Produce entity-relationship model and data dictionary.
6
+ - Define data validation rules and lifecycle.
7
+ - Ensure alignment with API contracts and business rules.
8
+
9
+ Required sections in output
10
+ - Entities and relationships
11
+ - Key fields and constraints
12
+ - Data dictionary (field definitions)
13
+ - Data lifecycle and retention notes
14
+
15
+ Quality gates
16
+ - Consistency: matches API and business requirements
17
+ - Completeness: covers all core domain entities
18
+ - Integrity: constraints and validation rules defined
corpus_rag/data_architect/standards_quickref.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Standards and frameworks (brief summaries)
2
+
3
+ DAMA-DMBOK
4
+ Comprehensive data management guide. Use for governance, data quality, and stewardship considerations.
5
+
6
+ UML for data models
7
+ Standard notation for entities and relationships. Use to document ERDs with clear cardinalities.
8
+
9
+ Database Answers patterns
10
+ Collection of common data model patterns. Use for inspiration and validation of schema design.
corpus_rag/devops_architect/role_playbook.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Role: DevOps Architect
2
+ Purpose: Define CI/CD, deployment, and operational reliability plan.
3
+
4
+ Core responsibilities
5
+ - Design pipelines, environments, and monitoring.
6
+ - Define infrastructure requirements and scaling strategy.
7
+ - Align with security and architecture constraints.
8
+
9
+ Required sections in output
10
+ - CI/CD pipeline overview
11
+ - Environments and deployment strategy
12
+ - Monitoring, logging, and alerting
13
+ - Backup and rollback strategy
14
+
15
+ Quality gates
16
+ - Reliability: clear rollback and monitoring
17
+ - Security: secrets management and least privilege
18
+ - Consistency: aligns with architecture choices
corpus_rag/devops_architect/standards_quickref.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Standards and frameworks (brief summaries)
2
+
3
+ Google SRE Book
4
+ Reliability principles: SLIs/SLOs, error budgets, incident response. Use to set operational targets.
5
+
6
+ 12-Factor App
7
+ Guides cloud-native deployment (stateless, config via env). Use to shape deployment and scaling approach.
8
+
9
+ Docker best practices
10
+ Image hygiene, minimal layers, and security hardening. Use for containerized build guidance.
corpus_rag/environment_engineer/role_playbook.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Role: Environment Engineer
2
+ Purpose: Define local setup, developer tooling, and environment configuration.
3
+
4
+ Core responsibilities
5
+ - Provide step-by-step local setup and prerequisites.
6
+ - Document environment variables and tooling versions.
7
+ - Ensure developer onboarding is fast and repeatable.
8
+
9
+ Required sections in output
10
+ - Prerequisites and tooling versions
11
+ - Setup steps (install, configure, run)
12
+ - Environment variable reference
13
+ - Troubleshooting tips
14
+
15
+ Quality gates
16
+ - Reproducibility: steps work from clean machine
17
+ - Clarity: copy/paste friendly commands
18
+ - Consistency: matches DevOps and architecture choices
corpus_rag/environment_engineer/standards_quickref.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Standards and frameworks (brief summaries)
2
+
3
+ 12-Factor App
4
+ Configuration and portability guidance. Use to structure environment variables and secrets.
5
+
6
+ Official tool docs (Node, Python, Docker, Git)
7
+ Use vendor-recommended install paths and versions to avoid inconsistencies.
8
+
9
+ Node/Python best practices
10
+ Use virtual envs, lock files, and deterministic builds. Apply to keep setup stable.
corpus_rag/product_owner/role_playbook.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Role: Product Owner
2
+ Purpose: Translate refined project brief into a clear product requirements document (PRD) and prioritized user stories.
3
+
4
+ Core responsibilities
5
+ - Define product goals, scope, success metrics, and non-goals.
6
+ - Produce a PRD with features, user stories, acceptance criteria, and priorities.
7
+ - Resolve ambiguities from the project refiner output.
8
+
9
+ Required sections in output
10
+ - Product vision and target users
11
+ - Problem statement and goals
12
+ - Scope and non-goals
13
+ - Feature list with priority (MVP vs later)
14
+ - User stories with acceptance criteria
15
+ - Constraints, assumptions, and dependencies
16
+ - Success metrics and KPIs
17
+
18
+ Quality gates
19
+ - Completeness: all required sections present
20
+ - Clarity: unambiguous, testable acceptance criteria
21
+ - Feasibility: within constraints and timeline
22
+ - Consistency: aligns with project brief and later phase inputs
corpus_rag/product_owner/standards_quickref.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Standards and frameworks (brief summaries)
2
+
3
+ IEEE 29148 (Requirements Engineering)
4
+ Defines best practices for requirements quality: complete, consistent, unambiguous, verifiable, and feasible. Use it to structure the PRD and validate acceptance criteria.
5
+
6
+ User Story Mapping (Jeff Patton)
7
+ Organizes stories along user activities to keep scope visible. Apply to ensure MVP coverage and reveal gaps in flows.
8
+
9
+ Atlassian PRD guidance
10
+ Practical PRD structure: problem, goals, scope, user stories, risks. Use as a template for readable stakeholder documents.
corpus_rag/qa_strategist/role_playbook.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Role: QA Strategist
2
+ Purpose: Define test strategy, coverage, and quality assurance approach.
3
+
4
+ Core responsibilities
5
+ - Create test plan across unit, integration, and E2E layers.
6
+ - Define risk-based testing priorities.
7
+ - Provide quality gates and acceptance criteria.
8
+
9
+ Required sections in output
10
+ - Test strategy and pyramid distribution
11
+ - Key test types and tooling
12
+ - Risk areas and regression scope
13
+ - Release quality gates
14
+
15
+ Quality gates
16
+ - Coverage: critical flows fully tested
17
+ - Balance: unit/integration/E2E ratios reasonable
18
+ - Traceability: tests map to requirements
corpus_rag/qa_strategist/standards_quickref.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Standards and frameworks (brief summaries)
2
+
3
+ ISO/IEC 25010
4
+ Software quality model (reliability, security, usability, etc.). Use to define quality attributes and test focus.
5
+
6
+ ISTQB Foundation Syllabus
7
+ Standard testing terminology and practices. Use for test design and reporting consistency.
8
+
9
+ Test Pyramid
10
+ Guides proportion of tests (more unit, fewer E2E). Use to keep suite fast and reliable.
corpus_rag/security_analyst/role_playbook.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Role: Security Analyst
2
+ Purpose: Identify threats, define security requirements, and recommend mitigations.
3
+
4
+ Core responsibilities
5
+ - Produce threat model and security requirements.
6
+ - Specify authN/authZ approach, data protection, and logging.
7
+ - Highlight OWASP risks and mitigations.
8
+
9
+ Required sections in output
10
+ - Threat model (assets, threats, mitigations)
11
+ - Security requirements and controls
12
+ - Authentication and authorization strategy
13
+ - Data protection and privacy considerations
14
+
15
+ Quality gates
16
+ - Coverage: top web threats addressed
17
+ - Consistency: aligns with architecture and data model
18
+ - Practicality: mitigations are implementable
corpus_rag/security_analyst/standards_quickref.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Standards and frameworks (brief summaries)
2
+
3
+ OWASP Top 10
4
+ Top web app security risks (e.g., injection, auth failures). Use as a baseline checklist for threats and mitigations.
5
+
6
+ OWASP ASVS
7
+ Verification standard with security control requirements. Use to derive testable security requirements by level.
8
+
9
+ NIST Cybersecurity Framework
10
+ High-level framework (Identify, Protect, Detect, Respond, Recover). Use to ensure broad security posture.
corpus_rag/solution_architect/role_playbook.txt ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Role: Solution Architect
2
+ Purpose: Define system architecture, tech stack decisions, and high-level design patterns.
3
+
4
+ Core responsibilities
5
+ - Choose frontend/backend architecture (SPA/SSR), hosting approach, and integrations.
6
+ - Produce system design (components, data flow, deployment).
7
+ - Capture trade-offs and rationale (ADRs).
8
+
9
+ Required sections in output
10
+ - Architecture overview and constraints
11
+ - Key components and interfaces
12
+ - Tech stack choices with rationale
13
+ - Deployment topology and integration points
14
+ - Non-functional requirements (scalability, reliability)
15
+
16
+ Quality gates
17
+ - Feasibility: implementable with chosen stack
18
+ - Consistency: aligns with security, data, and API design
19
+ - Clarity: diagrams or structured description of components
corpus_rag/solution_architect/standards_quickref.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Standards and frameworks (brief summaries)
2
+
3
+ C4 Model
4
+ Simple, layered diagrams (Context, Container, Component, Code). Use to communicate architecture to different audiences.
5
+
6
+ 12-Factor App
7
+ Best practices for cloud-native apps: config in env, stateless processes, logs as streams. Use to guide backend design and deployment.
8
+
9
+ AWS Well-Architected Framework
10
+ Five pillars (operational excellence, security, reliability, performance, cost). Use as a checklist for architecture decisions.
corpus_rag/technical_writer/role_playbook.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Role: Technical Writer
2
+ Purpose: Create clear, structured documentation using consistent style and information architecture.
3
+
4
+ Core responsibilities
5
+ - Define doc set structure and templates.
6
+ - Standardize tone, terminology, and formatting.
7
+ - Ensure documentation is complete and user-focused.
8
+
9
+ Required sections in output
10
+ - Documentation structure (overview, how-to, reference)
11
+ - Writing style guidelines
12
+ - Template examples
13
+ - Update and maintenance guidance
14
+
15
+ Quality gates
16
+ - Clarity: readable and actionable content
17
+ - Consistency: uniform structure across docs
18
+ - Completeness: covers user journeys and references
corpus_rag/technical_writer/standards_quickref.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Standards and frameworks (brief summaries)
2
+
3
+ Google Developer Documentation Style Guide
4
+ Best practices for clear, consistent technical writing. Use to standardize tone and terminology.
5
+
6
+ Microsoft Writing Style Guide
7
+ Guidelines for UI labels, procedures, and accessibility. Use for consistent phrasing.
8
+
9
+ Diátaxis Framework
10
+ Documentation types: tutorials, how-to guides, reference, explanation. Use to structure the doc set.
corpus_rag/ux_designer/role_playbook.txt ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Role: UX Designer
2
+ Purpose: Define user flows, interaction patterns, and usability guidelines.
3
+
4
+ Core responsibilities
5
+ - Create user journeys and key screens/components.
6
+ - Ensure accessibility and usability best practices.
7
+ - Align UX with business workflows and data requirements.
8
+
9
+ Required sections in output
10
+ - User personas and primary tasks
11
+ - User flows (happy path and exceptions)
12
+ - Key UI components and layout guidelines
13
+ - Accessibility and usability notes
14
+
15
+ Quality gates
16
+ - Usability: clear flows and minimal friction
17
+ - Accessibility: meets WCAG 2.1 AA basics
18
+ - Consistency: aligns with business processes
corpus_rag/ux_designer/standards_quickref.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Standards and frameworks (brief summaries)
2
+
3
+ Nielsen Norman Heuristics
4
+ 10 usability principles (visibility, consistency, error prevention). Use to evaluate interaction design.
5
+
6
+ WCAG 2.1
7
+ Accessibility guidelines (perceivable, operable, understandable, robust). Use to ensure keyboard access and contrast.
8
+
9
+ Material Design (web UI patterns)
10
+ UI guidelines for consistency and components. Use as reference for layout and interaction patterns.
pyproject.toml CHANGED
@@ -1,5 +1,3 @@
1
-
2
-
3
  [tool.ruff.lint]
4
  exclude = ["tests", "tests/*", "*/tests/*", "*/tests.py"]
5
 
@@ -39,7 +37,9 @@ dependencies = [
39
  # Database
40
  "sqlalchemy>=2.0.0",
41
  "psycopg2-binary>=2.9.9",
42
- "pinecone",
 
 
43
  "hydra-core>=1.3.2",
44
  "lightning>=2.6.0",
45
  "fiddle>=0.3.0",
 
 
 
1
  [tool.ruff.lint]
2
  exclude = ["tests", "tests/*", "*/tests/*", "*/tests.py"]
3
 
 
37
  # Database
38
  "sqlalchemy>=2.0.0",
39
  "psycopg2-binary>=2.9.9",
40
+ # MongoDB Vector Store
41
+ "pymongo>=4.6.0",
42
+ "langchain-mongodb>=0.1.0",
43
  "hydra-core>=1.3.2",
44
  "lightning>=2.6.0",
45
  "fiddle>=0.3.0",
requirements.txt CHANGED
@@ -1,5 +1,5 @@
1
  # This file was autogenerated by uv via the following command:
2
- # uv pip compile pyproject.toml -o requirements.txt --python-version 3.12
3
  absl-py==2.3.1
4
  # via fiddle
5
  aiohappyeyeballs==2.6.1
@@ -32,7 +32,6 @@ certifi==2025.11.12
32
  # via
33
  # httpcore
34
  # httpx
35
- # pinecone
36
  # requests
37
  cffi==2.0.0
38
  # via cryptography
@@ -50,12 +49,14 @@ cryptography==46.0.3
50
  # via python-jose
51
  defusedxml==0.7.1
52
  # via fpdf2
 
 
53
  ecdsa==0.19.1
54
  # via python-jose
55
  fastapi==0.123.5
56
- # via ideasprinter-api (pyproject.toml)
57
  fiddle==0.3.0
58
- # via ideasprinter-api (pyproject.toml)
59
  filelock==3.20.2
60
  # via torch
61
  filetype==1.2.0
@@ -63,7 +64,7 @@ filetype==1.2.0
63
  fonttools==4.61.0
64
  # via fpdf2
65
  fpdf2==2.8.5
66
- # via ideasprinter-api (pyproject.toml)
67
  frozenlist==1.8.0
68
  # via
69
  # aiohttp
@@ -76,7 +77,7 @@ fsspec==2025.12.0
76
  google-api-core==2.28.1
77
  # via google-api-python-client
78
  google-api-python-client==2.187.0
79
- # via ideasprinter-api (pyproject.toml)
80
  google-auth==2.41.1
81
  # via
82
  # google-api-core
@@ -86,7 +87,7 @@ google-auth==2.41.1
86
  google-auth-httplib2==0.2.1
87
  # via google-api-python-client
88
  google-auth-oauthlib==1.2.3
89
- # via ideasprinter-api (pyproject.toml)
90
  googleapis-common-protos==1.72.0
91
  # via google-api-core
92
  graphviz==0.21
@@ -109,7 +110,7 @@ httpx==0.28.1
109
  # langsmith
110
  # upstash-redis
111
  hydra-core==1.3.2
112
- # via ideasprinter-api (pyproject.toml)
113
  idna==3.11
114
  # via
115
  # anyio
@@ -124,20 +125,34 @@ jsonpatch==1.33
124
  # via langchain-core
125
  jsonpointer==3.0.0
126
  # via jsonpatch
 
 
 
 
127
  langchain-core==1.1.0
128
  # via
129
- # ideasprinter-api (pyproject.toml)
 
 
 
130
  # langchain-nvidia-ai-endpoints
131
  # langchain-text-splitters
132
  # langgraph
133
  # langgraph-checkpoint
134
  # langgraph-prebuilt
 
 
135
  langchain-nvidia-ai-endpoints==1.0.0
136
- # via ideasprinter-api (pyproject.toml)
137
  langchain-text-splitters==1.0.0
138
- # via ideasprinter-api (pyproject.toml)
 
 
 
139
  langgraph==1.0.4
140
- # via ideasprinter-api (pyproject.toml)
 
 
141
  langgraph-checkpoint==3.0.1
142
  # via
143
  # langgraph
@@ -148,12 +163,15 @@ langgraph-sdk==0.2.12
148
  # via langgraph
149
  langsmith==0.4.53
150
  # via
151
- # ideasprinter-api (pyproject.toml)
 
152
  # langchain-core
 
 
153
  libcst==1.8.6
154
  # via fiddle
155
  lightning==2.6.0
156
- # via ideasprinter-api (pyproject.toml)
157
  lightning-utilities==0.15.2
158
  # via
159
  # lightning
@@ -171,7 +189,8 @@ networkx==3.6.1
171
  # via torch
172
  numpy==2.3.5
173
  # via
174
- # ideasprinter-api (pyproject.toml)
 
175
  # torchmetrics
176
  oauthlib==3.3.1
177
  # via requests-oauthlib
@@ -181,7 +200,6 @@ orjson==3.11.4
181
  # via
182
  # langgraph-sdk
183
  # langsmith
184
- # pinecone
185
  ormsgpack==1.12.0
186
  # via langgraph-checkpoint
187
  packaging==24.2
@@ -191,19 +209,12 @@ packaging==24.2
191
  # langsmith
192
  # lightning
193
  # lightning-utilities
194
- # pinecone-plugin-assistant
195
  # pytorch-lightning
196
  # torchmetrics
197
  passlib==1.7.4
198
- # via ideasprinter-api (pyproject.toml)
199
  pillow==12.0.0
200
  # via fpdf2
201
- pinecone==8.0.0
202
- # via ideasprinter-api (pyproject.toml)
203
- pinecone-plugin-assistant==3.0.1
204
- # via pinecone
205
- pinecone-plugin-interface==0.0.7
206
- # via pinecone
207
  propcache==0.4.1
208
  # via
209
  # aiohttp
@@ -216,7 +227,7 @@ protobuf==6.33.2
216
  # googleapis-common-protos
217
  # proto-plus
218
  psycopg2-binary==2.9.11
219
- # via ideasprinter-api (pyproject.toml)
220
  pyasn1==0.6.1
221
  # via
222
  # pyasn1-modules
@@ -228,37 +239,46 @@ pycparser==2.23
228
  # via cffi
229
  pydantic==2.12.4
230
  # via
231
- # ideasprinter-api (pyproject.toml)
232
  # fastapi
 
 
233
  # langchain-core
234
  # langgraph
235
  # langsmith
236
  pydantic-core==2.41.5
237
  # via pydantic
 
 
 
 
 
 
 
238
  pyparsing==3.2.5
239
  # via httplib2
240
- python-dateutil==2.9.0.post0
241
- # via pinecone
242
  python-dotenv==1.2.1
243
- # via ideasprinter-api (pyproject.toml)
244
  python-jose==3.5.0
245
- # via ideasprinter-api (pyproject.toml)
246
  python-multipart==0.0.20
247
- # via ideasprinter-api (pyproject.toml)
248
  pytorch-lightning==2.6.0
249
  # via lightning
250
  pyyaml==6.0.3
251
  # via
 
252
  # langchain-core
253
- # libcst
254
  # lightning
255
  # omegaconf
256
  # pytorch-lightning
 
 
257
  requests==2.32.5
258
  # via
259
  # google-api-core
 
260
  # langsmith
261
- # pinecone-plugin-assistant
262
  # requests-oauthlib
263
  # requests-toolbelt
264
  requests-oauthlib==2.0.0
@@ -274,11 +294,11 @@ setuptools==80.9.0
274
  # lightning-utilities
275
  # torch
276
  six==1.17.0
277
- # via
278
- # ecdsa
279
- # python-dateutil
280
  sqlalchemy==2.0.45
281
- # via ideasprinter-api (pyproject.toml)
 
 
282
  starlette==0.50.0
283
  # via fastapi
284
  sympy==1.14.0
@@ -300,35 +320,29 @@ tqdm==4.67.1
300
  # pytorch-lightning
301
  typing-extensions==4.15.0
302
  # via
303
- # aiosignal
304
- # anyio
305
  # fastapi
306
  # fiddle
307
  # langchain-core
308
  # lightning
309
  # lightning-utilities
310
- # pinecone
311
  # pydantic
312
  # pydantic-core
313
  # pytorch-lightning
314
  # sqlalchemy
315
- # starlette
316
  # torch
317
  # typing-inspection
318
  typing-inspection==0.4.2
319
  # via pydantic
320
  upstash-redis==1.5.0
321
- # via ideasprinter-api (pyproject.toml)
322
  uritemplate==4.2.0
323
  # via google-api-python-client
324
  urllib3==2.5.0
325
- # via
326
- # pinecone
327
- # requests
328
  uuid-utils==0.12.0
329
  # via langsmith
330
  uvicorn==0.38.0
331
- # via ideasprinter-api (pyproject.toml)
332
  xxhash==3.6.0
333
  # via langgraph
334
  yarl==1.22.0
 
1
  # This file was autogenerated by uv via the following command:
2
+ # uv pip compile pyproject.toml --output-file requirements.txt
3
  absl-py==2.3.1
4
  # via fiddle
5
  aiohappyeyeballs==2.6.1
 
32
  # via
33
  # httpcore
34
  # httpx
 
35
  # requests
36
  cffi==2.0.0
37
  # via cryptography
 
49
  # via python-jose
50
  defusedxml==0.7.1
51
  # via fpdf2
52
+ dnspython==2.8.0
53
+ # via pymongo
54
  ecdsa==0.19.1
55
  # via python-jose
56
  fastapi==0.123.5
57
+ # via specs-before-code-api (pyproject.toml)
58
  fiddle==0.3.0
59
+ # via specs-before-code-api (pyproject.toml)
60
  filelock==3.20.2
61
  # via torch
62
  filetype==1.2.0
 
64
  fonttools==4.61.0
65
  # via fpdf2
66
  fpdf2==2.8.5
67
+ # via specs-before-code-api (pyproject.toml)
68
  frozenlist==1.8.0
69
  # via
70
  # aiohttp
 
77
  google-api-core==2.28.1
78
  # via google-api-python-client
79
  google-api-python-client==2.187.0
80
+ # via specs-before-code-api (pyproject.toml)
81
  google-auth==2.41.1
82
  # via
83
  # google-api-core
 
87
  google-auth-httplib2==0.2.1
88
  # via google-api-python-client
89
  google-auth-oauthlib==1.2.3
90
+ # via specs-before-code-api (pyproject.toml)
91
  googleapis-common-protos==1.72.0
92
  # via google-api-core
93
  graphviz==0.21
 
110
  # langsmith
111
  # upstash-redis
112
  hydra-core==1.3.2
113
+ # via specs-before-code-api (pyproject.toml)
114
  idna==3.11
115
  # via
116
  # anyio
 
125
  # via langchain-core
126
  jsonpointer==3.0.0
127
  # via jsonpatch
128
+ langchain==1.1.2
129
+ # via langchain-mongodb
130
+ langchain-classic==1.0.0
131
+ # via langchain-mongodb
132
  langchain-core==1.1.0
133
  # via
134
+ # specs-before-code-api (pyproject.toml)
135
+ # langchain
136
+ # langchain-classic
137
+ # langchain-mongodb
138
  # langchain-nvidia-ai-endpoints
139
  # langchain-text-splitters
140
  # langgraph
141
  # langgraph-checkpoint
142
  # langgraph-prebuilt
143
+ langchain-mongodb==0.9.0
144
+ # via specs-before-code-api (pyproject.toml)
145
  langchain-nvidia-ai-endpoints==1.0.0
146
+ # via specs-before-code-api (pyproject.toml)
147
  langchain-text-splitters==1.0.0
148
+ # via
149
+ # specs-before-code-api (pyproject.toml)
150
+ # langchain-classic
151
+ # langchain-mongodb
152
  langgraph==1.0.4
153
+ # via
154
+ # specs-before-code-api (pyproject.toml)
155
+ # langchain
156
  langgraph-checkpoint==3.0.1
157
  # via
158
  # langgraph
 
163
  # via langgraph
164
  langsmith==0.4.53
165
  # via
166
+ # specs-before-code-api (pyproject.toml)
167
+ # langchain-classic
168
  # langchain-core
169
+ lark==1.3.1
170
+ # via langchain-mongodb
171
  libcst==1.8.6
172
  # via fiddle
173
  lightning==2.6.0
174
+ # via specs-before-code-api (pyproject.toml)
175
  lightning-utilities==0.15.2
176
  # via
177
  # lightning
 
189
  # via torch
190
  numpy==2.3.5
191
  # via
192
+ # specs-before-code-api (pyproject.toml)
193
+ # langchain-mongodb
194
  # torchmetrics
195
  oauthlib==3.3.1
196
  # via requests-oauthlib
 
200
  # via
201
  # langgraph-sdk
202
  # langsmith
 
203
  ormsgpack==1.12.0
204
  # via langgraph-checkpoint
205
  packaging==24.2
 
209
  # langsmith
210
  # lightning
211
  # lightning-utilities
 
212
  # pytorch-lightning
213
  # torchmetrics
214
  passlib==1.7.4
215
+ # via specs-before-code-api (pyproject.toml)
216
  pillow==12.0.0
217
  # via fpdf2
 
 
 
 
 
 
218
  propcache==0.4.1
219
  # via
220
  # aiohttp
 
227
  # googleapis-common-protos
228
  # proto-plus
229
  psycopg2-binary==2.9.11
230
+ # via specs-before-code-api (pyproject.toml)
231
  pyasn1==0.6.1
232
  # via
233
  # pyasn1-modules
 
239
  # via cffi
240
  pydantic==2.12.4
241
  # via
242
+ # specs-before-code-api (pyproject.toml)
243
  # fastapi
244
+ # langchain
245
+ # langchain-classic
246
  # langchain-core
247
  # langgraph
248
  # langsmith
249
  pydantic-core==2.41.5
250
  # via pydantic
251
+ pymongo==4.16.0
252
+ # via
253
+ # specs-before-code-api (pyproject.toml)
254
+ # langchain-mongodb
255
+ # pymongo-search-utils
256
+ pymongo-search-utils==0.3.0
257
+ # via langchain-mongodb
258
  pyparsing==3.2.5
259
  # via httplib2
 
 
260
  python-dotenv==1.2.1
261
+ # via specs-before-code-api (pyproject.toml)
262
  python-jose==3.5.0
263
+ # via specs-before-code-api (pyproject.toml)
264
  python-multipart==0.0.20
265
+ # via specs-before-code-api (pyproject.toml)
266
  pytorch-lightning==2.6.0
267
  # via lightning
268
  pyyaml==6.0.3
269
  # via
270
+ # langchain-classic
271
  # langchain-core
 
272
  # lightning
273
  # omegaconf
274
  # pytorch-lightning
275
+ pyyaml-ft==8.0.0
276
+ # via libcst
277
  requests==2.32.5
278
  # via
279
  # google-api-core
280
+ # langchain-classic
281
  # langsmith
 
282
  # requests-oauthlib
283
  # requests-toolbelt
284
  requests-oauthlib==2.0.0
 
294
  # lightning-utilities
295
  # torch
296
  six==1.17.0
297
+ # via ecdsa
 
 
298
  sqlalchemy==2.0.45
299
+ # via
300
+ # specs-before-code-api (pyproject.toml)
301
+ # langchain-classic
302
  starlette==0.50.0
303
  # via fastapi
304
  sympy==1.14.0
 
320
  # pytorch-lightning
321
  typing-extensions==4.15.0
322
  # via
 
 
323
  # fastapi
324
  # fiddle
325
  # langchain-core
326
  # lightning
327
  # lightning-utilities
 
328
  # pydantic
329
  # pydantic-core
330
  # pytorch-lightning
331
  # sqlalchemy
 
332
  # torch
333
  # typing-inspection
334
  typing-inspection==0.4.2
335
  # via pydantic
336
  upstash-redis==1.5.0
337
+ # via specs-before-code-api (pyproject.toml)
338
  uritemplate==4.2.0
339
  # via google-api-python-client
340
  urllib3==2.5.0
341
+ # via requests
 
 
342
  uuid-utils==0.12.0
343
  # via langsmith
344
  uvicorn==0.38.0
345
+ # via specs-before-code-api (pyproject.toml)
346
  xxhash==3.6.0
347
  # via langgraph
348
  yarl==1.22.0
scripts/seed_rag_data.py ADDED
@@ -0,0 +1,316 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Seed MongoDB collections with example documents for each agent role.
4
+
5
+ This script:
6
+ 1. Reads documents from corpus_rag/<agent_role>/ directories
7
+ 2. Splits them into chunks
8
+ 3. Embeds and stores them in the appropriate MongoDB collection
9
+
10
+
11
+ Usage:
12
+ uv run python scripts/seed_rag_data.py # Seed all collections
13
+ uv run python scripts/seed_rag_data.py --role product_owner # Seed specific role
14
+ uv run python scripts/seed_rag_data.py --dry-run # Preview without inserting
15
+
16
+ Environment Variables:
17
+ MONGODB_URI - MongoDB connection string
18
+ MONGODB_DATABASE - Database name (default: specs_before_code)
19
+ NVIDIA_API_KEY - Required for generating embeddings
20
+ """
21
+
22
+ import argparse
23
+ import asyncio
24
+ import os
25
+ import sys
26
+ from pathlib import Path
27
+
28
+ # Add parent directory to path for imports
29
+ sys.path.insert(0, str(Path(__file__).parent.parent))
30
+
31
+ from dotenv import load_dotenv
32
+
33
+ load_dotenv()
34
+
35
+ # Mapping from TeamRole enum values to directory names
36
+ ROLE_DIRECTORIES = {
37
+ "product_owner": "product_owner",
38
+ "business_analyst": "business_analyst",
39
+ "solution_architect": "solution_architect",
40
+ "data_architect": "data_architect",
41
+ "security_analyst": "security_analyst",
42
+ "ux_designer": "ux_designer",
43
+ "api_designer": "api_designer",
44
+ "qa_strategist": "qa_strategist",
45
+ "devops_architect": "devops_architect",
46
+ "environment_engineer": "environment_engineer",
47
+ "technical_writer": "technical_writer",
48
+ }
49
+
50
+ # Base directory for corpus files
51
+ CORPUS_DIR = Path(__file__).parent.parent / "corpus_rag"
52
+
53
+
54
+ async def seed_collection(
55
+ role_name: str,
56
+ directory_name: str,
57
+ dry_run: bool = False,
58
+ ) -> dict:
59
+ """
60
+ Seed a single agent's collection with documents from directory.
61
+
62
+ Args:
63
+ role_name: The TeamRole enum value (e.g., "product_owner")
64
+ directory_name: The subdirectory under corpus_rag/
65
+ dry_run: If True, only count documents without inserting
66
+
67
+ Returns:
68
+ Dict with stats: chunks_found, chunks_inserted, errors
69
+ """
70
+ from langchain_core.documents import Document
71
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
72
+
73
+ from app.core.mongodb_rag import get_mongodb_rag_service
74
+ from app.core.schemas import TeamRole
75
+
76
+ result = {
77
+ "role": role_name,
78
+ "files_found": 0,
79
+ "chunks_found": 0,
80
+ "chunks_inserted": 0,
81
+ "errors": [],
82
+ }
83
+
84
+ # Get the TeamRole enum value
85
+ try:
86
+ role = TeamRole(role_name)
87
+ except ValueError:
88
+ result["errors"].append(f"Invalid role: {role_name}")
89
+ return result
90
+
91
+ # Check directory
92
+ dir_path = CORPUS_DIR / directory_name
93
+ if not dir_path.exists():
94
+ result["errors"].append(f"Directory not found: {dir_path}")
95
+ return result
96
+
97
+ # Configure text splitter
98
+ splitter = RecursiveCharacterTextSplitter(
99
+ chunk_size=1000,
100
+ chunk_overlap=200,
101
+ add_start_index=True,
102
+ separators=["\n\n", "\n", ". ", " ", ""],
103
+ )
104
+
105
+ # Find and process documents
106
+ documents = []
107
+ supported_extensions = [".md", ".txt", ".yaml", ".yml"]
108
+
109
+ for file_path in dir_path.glob("**/*"):
110
+ if file_path.suffix.lower() not in supported_extensions:
111
+ continue
112
+ if file_path.is_dir():
113
+ continue
114
+
115
+ result["files_found"] += 1
116
+
117
+ try:
118
+ content = file_path.read_text(encoding="utf-8")
119
+ if not content.strip():
120
+ continue
121
+
122
+ # Split into chunks
123
+ chunks = splitter.split_text(content)
124
+
125
+ for i, chunk in enumerate(chunks):
126
+ documents.append(
127
+ Document(
128
+ page_content=chunk,
129
+ metadata={
130
+ "source": file_path.name,
131
+ "chunk_index": i,
132
+ "total_chunks": len(chunks),
133
+ "role": role_name,
134
+ "file_path": str(file_path.relative_to(CORPUS_DIR)),
135
+ },
136
+ )
137
+ )
138
+ except Exception as e:
139
+ result["errors"].append(f"Error reading {file_path.name}: {e}")
140
+
141
+ result["chunks_found"] = len(documents)
142
+
143
+ if not documents:
144
+ result["errors"].append("No documents found to seed")
145
+ return result
146
+
147
+ # Dry run - just report what would be done
148
+ if dry_run:
149
+ print(f" [DRY RUN] Would insert {len(documents)} chunks")
150
+ return result
151
+
152
+ # Insert documents
153
+ try:
154
+ rag_service = get_mongodb_rag_service()
155
+ if not rag_service.is_available():
156
+ result["errors"].append("MongoDB not available")
157
+ return result
158
+
159
+ ids = await rag_service.add_documents(documents, role)
160
+ result["chunks_inserted"] = len(ids)
161
+ except Exception as e:
162
+ result["errors"].append(f"Error inserting documents: {e}")
163
+
164
+ return result
165
+
166
+
167
+ async def seed_all(
168
+ roles: list[str] | None = None,
169
+ dry_run: bool = False,
170
+ ) -> None:
171
+ """
172
+ Seed all (or specified) agent collections.
173
+
174
+ Args:
175
+ roles: List of role names to seed, or None for all
176
+ dry_run: If True, only count documents without inserting
177
+ """
178
+ if roles:
179
+ # Filter to only requested roles
180
+ to_seed = {k: v for k, v in ROLE_DIRECTORIES.items() if k in roles}
181
+ if not to_seed:
182
+ print(f"Error: No valid roles in {roles}")
183
+ print(f"Valid roles: {list(ROLE_DIRECTORIES.keys())}")
184
+ return
185
+ else:
186
+ to_seed = ROLE_DIRECTORIES
187
+
188
+ print(f"Seeding RAG collections{' [DRY RUN]' if dry_run else ''}...")
189
+ print(f"Corpus directory: {CORPUS_DIR}")
190
+ print()
191
+
192
+ total_chunks = 0
193
+ total_inserted = 0
194
+ total_errors = 0
195
+
196
+ for role_name, directory in to_seed.items():
197
+ print(f"Processing: {role_name}")
198
+
199
+ result = await seed_collection(role_name, directory, dry_run)
200
+
201
+ total_chunks += result["chunks_found"]
202
+ total_inserted += result["chunks_inserted"]
203
+
204
+ if result["errors"]:
205
+ total_errors += len(result["errors"])
206
+ for err in result["errors"]:
207
+ print(f" {err}")
208
+ elif result["chunks_inserted"] > 0:
209
+ print(
210
+ f" Inserted {result['chunks_inserted']} chunks from {result['files_found']} files"
211
+ )
212
+ elif result["chunks_found"] > 0 and dry_run:
213
+ print(
214
+ f" - Found {result['chunks_found']} chunks from {result['files_found']} files"
215
+ )
216
+ else:
217
+ print(f" - No documents found in corpus_rag/{directory}/")
218
+
219
+ # Summary
220
+ print("\n" + "=" * 50)
221
+ print("SEEDING COMPLETE")
222
+ print("=" * 50)
223
+ print(f"Total chunks found: {total_chunks}")
224
+ if not dry_run:
225
+ print(f"Total chunks inserted: {total_inserted}")
226
+ if total_errors:
227
+ print(f"Total errors: {total_errors}")
228
+ print()
229
+
230
+
231
+ def create_corpus_directories() -> None:
232
+ """Create the corpus_rag directory structure."""
233
+ print(f"Creating corpus directories in: {CORPUS_DIR}")
234
+ print()
235
+
236
+ CORPUS_DIR.mkdir(exist_ok=True)
237
+
238
+ for role_name, directory in ROLE_DIRECTORIES.items():
239
+ dir_path = CORPUS_DIR / directory
240
+ dir_path.mkdir(exist_ok=True)
241
+
242
+ # Create a README placeholder
243
+ readme_path = dir_path / "README.md"
244
+ if not readme_path.exists():
245
+ readme_path.write_text(
246
+ f"# {role_name.replace('_', ' ').title()} Examples\n\n"
247
+ f"Place example documents for the {role_name} agent here.\n\n"
248
+ f"## Supported Formats\n"
249
+ f"- `.md` (Markdown)\n"
250
+ f"- `.txt` (Plain text)\n"
251
+ f"- `.yaml` / `.yml` (YAML)\n\n"
252
+ f"## Content Guidelines\n"
253
+ f"Add high-quality examples that the {role_name} agent can learn from.\n",
254
+ encoding="utf-8",
255
+ )
256
+
257
+ print(f" {directory}/")
258
+
259
+ print("\nDone! Add your example documents to the directories above.")
260
+
261
+
262
+ def main():
263
+ parser = argparse.ArgumentParser(
264
+ description="Seed MongoDB RAG collections with example documents"
265
+ )
266
+ parser.add_argument(
267
+ "--role",
268
+ type=str,
269
+ help="Specific role to seed (e.g., product_owner)",
270
+ )
271
+ parser.add_argument(
272
+ "--dry-run",
273
+ action="store_true",
274
+ help="Preview what would be seeded without inserting",
275
+ )
276
+ parser.add_argument(
277
+ "--create-dirs",
278
+ action="store_true",
279
+ help="Create corpus_rag directory structure",
280
+ )
281
+ parser.add_argument(
282
+ "--list-roles",
283
+ action="store_true",
284
+ help="List available role names",
285
+ )
286
+
287
+ args = parser.parse_args()
288
+
289
+ if args.list_roles:
290
+ print("Available roles:")
291
+ for role in ROLE_DIRECTORIES:
292
+ print(f" - {role}")
293
+ return
294
+
295
+ if args.create_dirs:
296
+ create_corpus_directories()
297
+ return
298
+
299
+ # Check MongoDB URI
300
+ if not os.getenv("MONGODB_URI") and not args.dry_run:
301
+ print("Error: MONGODB_URI environment variable not set")
302
+ print("Set it in your .env file or use --dry-run to preview")
303
+ sys.exit(1)
304
+
305
+ # Check NVIDIA API key (needed for embeddings)
306
+ if not os.getenv("NVIDIA_API_KEY") and not args.dry_run:
307
+ print("Error: NVIDIA_API_KEY environment variable not set")
308
+ print("Required for generating embeddings")
309
+ sys.exit(1)
310
+
311
+ roles = [args.role] if args.role else None
312
+ asyncio.run(seed_all(roles=roles, dry_run=args.dry_run))
313
+
314
+
315
+ if __name__ == "__main__":
316
+ main()
scripts/setup_mongodb_indexes.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Setup MongoDB Atlas Vector Search indexes for all agent RAG collections.
4
+
5
+ This script creates:
6
+ 1. Collections for each agent role (if they don't exist)
7
+ 2. Vector search indexes on each collection
8
+
9
+ Prerequisites:
10
+ - MongoDB Atlas M10+ cluster (Vector Search requires dedicated cluster)
11
+ - MONGODB_URI environment variable set
12
+ - Database user with dbAdmin permissions
13
+
14
+ Usage:
15
+ uv run python scripts/setup_mongodb_indexes.py
16
+
17
+ Environment Variables:
18
+ MONGODB_URI - MongoDB connection string
19
+ MONGODB_DATABASE - Database name (default: specs_before_code)
20
+ MONGODB_INDEX_NAME - Vector index name (default: vector_index)
21
+ """
22
+
23
+ import os
24
+ import sys
25
+ from pathlib import Path
26
+
27
+ # Add parent directory to path for imports
28
+ sys.path.insert(0, str(Path(__file__).parent.parent))
29
+
30
+ from dotenv import load_dotenv
31
+
32
+ load_dotenv()
33
+
34
+ # Collection names for all agent roles with RAG
35
+ RAG_COLLECTIONS = [
36
+ "rag_product_owner",
37
+ "rag_business_analyst",
38
+ "rag_solution_architect",
39
+ "rag_data_architect",
40
+ "rag_security_analyst",
41
+ "rag_ux_designer",
42
+ "rag_api_designer",
43
+ "rag_qa_strategist",
44
+ "rag_devops_architect",
45
+ "rag_environment_engineer",
46
+ "rag_technical_writer",
47
+ ]
48
+
49
+ # Vector search index definition
50
+ # Configured for nvidia/nv-embed-v1 which outputs 4096-dimensional embeddings
51
+ VECTOR_INDEX_DEFINITION = {
52
+ "fields": [
53
+ {
54
+ "type": "vector",
55
+ "path": "embedding",
56
+ "numDimensions": 4096, # nv-embed-v1 output dimension
57
+ "similarity": "cosine",
58
+ },
59
+ # Filter fields for metadata queries
60
+ {
61
+ "type": "filter",
62
+ "path": "metadata.source",
63
+ },
64
+ {
65
+ "type": "filter",
66
+ "path": "metadata.role",
67
+ },
68
+ ]
69
+ }
70
+
71
+
72
+ def setup_indexes() -> None:
73
+ """Create vector search indexes for all agent collections."""
74
+ try:
75
+ from pymongo import MongoClient
76
+ from pymongo.operations import SearchIndexModel
77
+ except ImportError:
78
+ print("Error: pymongo not installed. Run: uv add pymongo")
79
+ sys.exit(1)
80
+
81
+ # Get configuration
82
+ uri = os.getenv("MONGODB_URI")
83
+ db_name = os.getenv("MONGODB_DATABASE", "specs_before_code")
84
+ index_name = os.getenv("MONGODB_INDEX_NAME", "vector_index")
85
+
86
+ if not uri:
87
+ print("Error: MONGODB_URI environment variable not set")
88
+ print("\nSet it in your .env file:")
89
+ print('MONGODB_URI="mongodb+srv://<user>:<pass>@<cluster>.mongodb.net/"')
90
+ sys.exit(1)
91
+
92
+ print("Connecting to MongoDB Atlas...")
93
+ print(f"Database: {db_name}")
94
+ print(f"Index name: {index_name}")
95
+ print()
96
+
97
+ try:
98
+ client = MongoClient(uri, serverSelectionTimeoutMS=5000)
99
+ # Test connection
100
+ client.admin.command("ping")
101
+ print("Connected successfully!\n")
102
+ except Exception as e:
103
+ print(f"Error: Failed to connect to MongoDB: {e}")
104
+ sys.exit(1)
105
+
106
+ db = client[db_name]
107
+ existing_collections = set(db.list_collection_names())
108
+
109
+ created_collections = []
110
+ created_indexes = []
111
+ existing_indexes = []
112
+ errors = []
113
+
114
+ for collection_name in RAG_COLLECTIONS:
115
+ print(f"Setting up: {collection_name}")
116
+
117
+ # Create collection if it doesn't exist
118
+ if collection_name not in existing_collections:
119
+ try:
120
+ db.create_collection(collection_name)
121
+ created_collections.append(collection_name)
122
+ print(" Created collection")
123
+ except Exception as e:
124
+ errors.append(f" Failed to create collection: {e}")
125
+ print(errors[-1])
126
+ continue
127
+ else:
128
+ print(" - Collection exists")
129
+
130
+ collection = db[collection_name]
131
+
132
+ # Check if index already exists
133
+ try:
134
+ existing = list(collection.list_search_indexes())
135
+ index_exists = any(idx.get("name") == index_name for idx in existing)
136
+
137
+ if index_exists:
138
+ print(f" - Index '{index_name}' already exists")
139
+ existing_indexes.append(collection_name)
140
+ continue
141
+ except Exception:
142
+ # list_search_indexes might fail if no indexes exist
143
+ pass
144
+
145
+ # Create vector search index
146
+ try:
147
+ search_index = SearchIndexModel(
148
+ definition=VECTOR_INDEX_DEFINITION,
149
+ name=index_name,
150
+ type="vectorSearch",
151
+ )
152
+ collection.create_search_index(model=search_index)
153
+ created_indexes.append(collection_name)
154
+ print(f" Created vector index '{index_name}'")
155
+ except Exception as e:
156
+ error_msg = str(e).lower()
157
+ if "already exists" in error_msg:
158
+ print(f" - Index '{index_name}' already exists")
159
+ existing_indexes.append(collection_name)
160
+ else:
161
+ errors.append(f" Failed to create index: {e}")
162
+ print(errors[-1])
163
+
164
+ # Summary
165
+ print("\n" + "=" * 50)
166
+ print("SETUP COMPLETE")
167
+ print("=" * 50)
168
+
169
+ if created_collections:
170
+ print(f"\nCollections created ({len(created_collections)}):")
171
+ for name in created_collections:
172
+ print(f" - {name}")
173
+
174
+ if created_indexes:
175
+ print(f"\nIndexes created ({len(created_indexes)}):")
176
+ for name in created_indexes:
177
+ print(f" - {name}")
178
+ print("\nNote: Vector search indexes take 1-2 minutes to become active.")
179
+ print("Check status in Atlas UI: Database > Atlas Search")
180
+
181
+ if existing_indexes:
182
+ print(f"\nExisting indexes ({len(existing_indexes)}):")
183
+ for name in existing_indexes:
184
+ print(f" - {name}")
185
+
186
+ if errors:
187
+ print(f"\nErrors ({len(errors)}):")
188
+ for err in errors:
189
+ print(err)
190
+
191
+ print()
192
+ client.close()
193
+
194
+
195
+ def check_index_status() -> None:
196
+ """Check the status of vector search indexes."""
197
+ try:
198
+ from pymongo import MongoClient
199
+ except ImportError:
200
+ print("Error: pymongo not installed")
201
+ return
202
+
203
+ uri = os.getenv("MONGODB_URI")
204
+ db_name = os.getenv("MONGODB_DATABASE", "specs_before_code")
205
+ index_name = os.getenv("MONGODB_INDEX_NAME", "vector_index")
206
+
207
+ if not uri:
208
+ print("Error: MONGODB_URI not set")
209
+ return
210
+
211
+ client = MongoClient(uri)
212
+ db = client[db_name]
213
+
214
+ print(f"Checking index status in {db_name}...\n")
215
+
216
+ for collection_name in RAG_COLLECTIONS:
217
+ collection = db[collection_name]
218
+ try:
219
+ indexes = list(collection.list_search_indexes())
220
+ for idx in indexes:
221
+ if idx.get("name") == index_name:
222
+ status = idx.get("status", "unknown")
223
+ print(f"{collection_name}: {status}")
224
+ break
225
+ else:
226
+ print(f"{collection_name}: no index")
227
+ except Exception as e:
228
+ print(f"{collection_name}: error - {e}")
229
+
230
+ client.close()
231
+
232
+
233
+ if __name__ == "__main__":
234
+ if len(sys.argv) > 1 and sys.argv[1] == "--status":
235
+ check_index_status()
236
+ else:
237
+ setup_indexes()
uv.lock CHANGED
@@ -568,6 +568,15 @@ wheels = [
568
  { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
569
  ]
570
 
 
 
 
 
 
 
 
 
 
571
  [[package]]
572
  name = "ecdsa"
573
  version = "0.19.1"
@@ -1032,6 +1041,38 @@ wheels = [
1032
  { url = "https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595, upload-time = "2024-06-10T19:24:40.698Z" },
1033
  ]
1034
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1035
  [[package]]
1036
  name = "langchain-core"
1037
  version = "1.2.5"
@@ -1051,6 +1092,25 @@ wheels = [
1051
  { url = "https://files.pythonhosted.org/packages/83/bd/9df897cbc98290bf71140104ee5b9777cf5291afb80333aa7da5a497339b/langchain_core-1.2.5-py3-none-any.whl", hash = "sha256:3255944ef4e21b2551facb319bfc426057a40247c0a05de5bd6f2fc021fbfa34", size = 484851, upload-time = "2025-12-22T23:45:30.525Z" },
1052
  ]
1053
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1054
  [[package]]
1055
  name = "langchain-nvidia-ai-endpoints"
1056
  version = "1.0.0"
@@ -1152,6 +1212,15 @@ wheels = [
1152
  { url = "https://files.pythonhosted.org/packages/ed/d8/91a8b483b30e0708a8911df10b4ce04ebf2b4b8dde8d020c124aec77380a/langsmith-0.5.2-py3-none-any.whl", hash = "sha256:42f8b853a18dd4d5f7fa38c8ff29e38da065a727022da410d91b3e13819aacc1", size = 283311, upload-time = "2025-12-30T13:41:33.915Z" },
1153
  ]
1154
 
 
 
 
 
 
 
 
 
 
1155
  [[package]]
1156
  name = "libcst"
1157
  version = "1.8.6"
@@ -1921,46 +1990,6 @@ wheels = [
1921
  { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" },
1922
  ]
1923
 
1924
- [[package]]
1925
- name = "pinecone"
1926
- version = "8.0.0"
1927
- source = { registry = "https://pypi.org/simple" }
1928
- dependencies = [
1929
- { name = "certifi" },
1930
- { name = "orjson" },
1931
- { name = "pinecone-plugin-assistant" },
1932
- { name = "pinecone-plugin-interface" },
1933
- { name = "python-dateutil" },
1934
- { name = "typing-extensions" },
1935
- { name = "urllib3" },
1936
- ]
1937
- sdist = { url = "https://files.pythonhosted.org/packages/33/13/f4c481a6a93dab92132d6d863b70a0e6c903f62940389435b31cf0c7d7d2/pinecone-8.0.0.tar.gz", hash = "sha256:feca7ff607706c09ffbd127ec93fa3b7110896b30c0d7a57672da73c69698d53", size = 1092653, upload-time = "2025-11-18T18:21:30.584Z" }
1938
- wheels = [
1939
- { url = "https://files.pythonhosted.org/packages/7b/6c/1d870a9211eb8f0bf60214182de001b480f94590eca9d6164a5d6d7de031/pinecone-8.0.0-py3-none-any.whl", hash = "sha256:95f714a496a91d80f3405165aedfea76ca8ac16e51e618df0434241838e353f8", size = 745902, upload-time = "2025-11-18T18:21:25.584Z" },
1940
- ]
1941
-
1942
- [[package]]
1943
- name = "pinecone-plugin-assistant"
1944
- version = "3.0.1"
1945
- source = { registry = "https://pypi.org/simple" }
1946
- dependencies = [
1947
- { name = "packaging" },
1948
- { name = "requests" },
1949
- ]
1950
- sdist = { url = "https://files.pythonhosted.org/packages/08/1a/33249870c9e8c774dafc038419b48aa63b380b461e9a1c1cb042db31be49/pinecone_plugin_assistant-3.0.1.tar.gz", hash = "sha256:6b00e94ef1bf55ed601d2316ee6f71f96f93bf2155277a826638395e1090dde3", size = 152060, upload-time = "2025-11-11T07:45:07.224Z" }
1951
- wheels = [
1952
- { url = "https://files.pythonhosted.org/packages/06/88/4b801675b4d58c5f8acd96bfd4847e6d7bc1a93ee4ff916e913dd6bda2de/pinecone_plugin_assistant-3.0.1-py3-none-any.whl", hash = "sha256:cd86ca5c98137221170e90fe81e03bbe71999992096da68c77f4af3503017622", size = 280865, upload-time = "2025-11-11T07:45:06.055Z" },
1953
- ]
1954
-
1955
- [[package]]
1956
- name = "pinecone-plugin-interface"
1957
- version = "0.0.7"
1958
- source = { registry = "https://pypi.org/simple" }
1959
- sdist = { url = "https://files.pythonhosted.org/packages/f4/fb/e8a4063264953ead9e2b24d9b390152c60f042c951c47f4592e9996e57ff/pinecone_plugin_interface-0.0.7.tar.gz", hash = "sha256:b8e6675e41847333aa13923cc44daa3f85676d7157324682dc1640588a982846", size = 3370, upload-time = "2024-06-05T01:57:52.093Z" }
1960
- wheels = [
1961
- { url = "https://files.pythonhosted.org/packages/3b/1d/a21fdfcd6d022cb64cef5c2a29ee6691c6c103c4566b41646b080b7536a5/pinecone_plugin_interface-0.0.7-py3-none-any.whl", hash = "sha256:875857ad9c9fc8bbc074dbe780d187a2afd21f5bfe0f3b08601924a61ef1bba8", size = 6249, upload-time = "2024-06-05T01:57:50.583Z" },
1962
- ]
1963
-
1964
  [[package]]
1965
  name = "platformdirs"
1966
  version = "4.5.1"
@@ -2256,6 +2285,69 @@ wheels = [
2256
  { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
2257
  ]
2258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2259
  [[package]]
2260
  name = "pyparsing"
2261
  version = "3.3.1"
@@ -2308,18 +2400,6 @@ wheels = [
2308
  { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
2309
  ]
2310
 
2311
- [[package]]
2312
- name = "python-dateutil"
2313
- version = "2.9.0.post0"
2314
- source = { registry = "https://pypi.org/simple" }
2315
- dependencies = [
2316
- { name = "six" },
2317
- ]
2318
- sdist = { url = "https://files.pythonhosted.org/packages/66/c0/0c8b6ad9f17a802ee498c46e004a0eb49bc148f2fd230864601a86dcf6db/python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3", size = 342432, upload-time = "2024-03-01T18:36:20.211Z" }
2319
- wheels = [
2320
- { url = "https://files.pythonhosted.org/packages/ec/57/56b9bcc3c9c6a792fcbaf139543cee77261f3651ca9da0c93f5c1221264b/python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427", size = 229892, upload-time = "2024-03-01T18:36:18.57Z" },
2321
- ]
2322
-
2323
  [[package]]
2324
  name = "python-dotenv"
2325
  version = "1.2.1"
@@ -2565,6 +2645,7 @@ dependencies = [
2565
  { name = "hydra-core" },
2566
  { name = "jinja2" },
2567
  { name = "langchain-core" },
 
2568
  { name = "langchain-nvidia-ai-endpoints" },
2569
  { name = "langchain-text-splitters" },
2570
  { name = "langgraph" },
@@ -2572,9 +2653,9 @@ dependencies = [
2572
  { name = "lightning" },
2573
  { name = "numpy" },
2574
  { name = "passlib", extra = ["bcrypt"] },
2575
- { name = "pinecone" },
2576
  { name = "psycopg2-binary" },
2577
  { name = "pydantic" },
 
2578
  { name = "python-dotenv" },
2579
  { name = "python-jose", extra = ["cryptography"] },
2580
  { name = "python-multipart" },
@@ -2618,6 +2699,7 @@ requires-dist = [
2618
  { name = "hydra-core", specifier = ">=1.3.2" },
2619
  { name = "jinja2", specifier = ">=3.1.6" },
2620
  { name = "langchain-core", specifier = ">=0.1.0" },
 
2621
  { name = "langchain-nvidia-ai-endpoints", specifier = ">=1.0.0" },
2622
  { name = "langchain-text-splitters", specifier = ">=0.0.1" },
2623
  { name = "langgraph", specifier = ">=0.2.39" },
@@ -2626,9 +2708,9 @@ requires-dist = [
2626
  { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10.0" },
2627
  { name = "numpy", specifier = ">=1.26.0" },
2628
  { name = "passlib", extras = ["bcrypt"], specifier = ">=1.7.4" },
2629
- { name = "pinecone" },
2630
  { name = "psycopg2-binary", specifier = ">=2.9.9" },
2631
  { name = "pydantic", specifier = ">=2.7.0" },
 
2632
  { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
2633
  { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" },
2634
  { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0.0" },
 
568
  { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" },
569
  ]
570
 
571
+ [[package]]
572
+ name = "dnspython"
573
+ version = "2.8.0"
574
+ source = { registry = "https://pypi.org/simple" }
575
+ sdist = { url = "https://files.pythonhosted.org/packages/8c/8b/57666417c0f90f08bcafa776861060426765fdb422eb10212086fb811d26/dnspython-2.8.0.tar.gz", hash = "sha256:181d3c6996452cb1189c4046c61599b84a5a86e099562ffde77d26984ff26d0f", size = 368251, upload-time = "2025-09-07T18:58:00.022Z" }
576
+ wheels = [
577
+ { url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" },
578
+ ]
579
+
580
  [[package]]
581
  name = "ecdsa"
582
  version = "0.19.1"
 
1041
  { url = "https://files.pythonhosted.org/packages/71/92/5e77f98553e9e75130c78900d000368476aed74276eb8ae8796f65f00918/jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942", size = 7595, upload-time = "2024-06-10T19:24:40.698Z" },
1042
  ]
1043
 
1044
+ [[package]]
1045
+ name = "langchain"
1046
+ version = "1.2.4"
1047
+ source = { registry = "https://pypi.org/simple" }
1048
+ dependencies = [
1049
+ { name = "langchain-core" },
1050
+ { name = "langgraph" },
1051
+ { name = "pydantic" },
1052
+ ]
1053
+ sdist = { url = "https://files.pythonhosted.org/packages/ed/3f/371267e88c153500a75c0e9daf9645a69955cfe6f85699955241ac0fa6e2/langchain-1.2.4.tar.gz", hash = "sha256:65119ff1c2ac8cc2410739b0fb2773f8fbfbe83357df9bab8a5fceafb9e04aa1", size = 552340, upload-time = "2026-01-14T19:35:26.556Z" }
1054
+ wheels = [
1055
+ { url = "https://files.pythonhosted.org/packages/0c/71/667887579bb3cf3c2db88224849f7362a8c3f118666e426a51058ee43d9c/langchain-1.2.4-py3-none-any.whl", hash = "sha256:182ac9f3c4559c5a6477e00d60ff8a56212ec4db6f101a4957492818dc3ce3e9", size = 107949, upload-time = "2026-01-14T19:35:24.7Z" },
1056
+ ]
1057
+
1058
+ [[package]]
1059
+ name = "langchain-classic"
1060
+ version = "1.0.1"
1061
+ source = { registry = "https://pypi.org/simple" }
1062
+ dependencies = [
1063
+ { name = "langchain-core" },
1064
+ { name = "langchain-text-splitters" },
1065
+ { name = "langsmith" },
1066
+ { name = "pydantic" },
1067
+ { name = "pyyaml" },
1068
+ { name = "requests" },
1069
+ { name = "sqlalchemy" },
1070
+ ]
1071
+ sdist = { url = "https://files.pythonhosted.org/packages/7c/4b/bd03518418ece4c13192a504449b58c28afee915dc4a6f4b02622458cb1b/langchain_classic-1.0.1.tar.gz", hash = "sha256:40a499684df36b005a1213735dc7f8dca8f5eb67978d6ec763e7a49780864fdc", size = 10516020, upload-time = "2025-12-23T22:55:22.615Z" }
1072
+ wheels = [
1073
+ { url = "https://files.pythonhosted.org/packages/83/0f/eab87f017d7fe28e8c11fff614f4cdbfae32baadb77d0f79e9f922af1df2/langchain_classic-1.0.1-py3-none-any.whl", hash = "sha256:131d83a02bb80044c68fedc1ab4ae885d5b8f8c2c742d8ab9e7534ad9cda8e80", size = 1040666, upload-time = "2025-12-23T22:55:21.025Z" },
1074
+ ]
1075
+
1076
  [[package]]
1077
  name = "langchain-core"
1078
  version = "1.2.5"
 
1092
  { url = "https://files.pythonhosted.org/packages/83/bd/9df897cbc98290bf71140104ee5b9777cf5291afb80333aa7da5a497339b/langchain_core-1.2.5-py3-none-any.whl", hash = "sha256:3255944ef4e21b2551facb319bfc426057a40247c0a05de5bd6f2fc021fbfa34", size = 484851, upload-time = "2025-12-22T23:45:30.525Z" },
1093
  ]
1094
 
1095
+ [[package]]
1096
+ name = "langchain-mongodb"
1097
+ version = "0.11.0"
1098
+ source = { registry = "https://pypi.org/simple" }
1099
+ dependencies = [
1100
+ { name = "langchain" },
1101
+ { name = "langchain-classic" },
1102
+ { name = "langchain-core" },
1103
+ { name = "langchain-text-splitters" },
1104
+ { name = "lark" },
1105
+ { name = "numpy" },
1106
+ { name = "pymongo" },
1107
+ { name = "pymongo-search-utils" },
1108
+ ]
1109
+ sdist = { url = "https://files.pythonhosted.org/packages/ba/0e/03027bbf0ae3ee71d00e32f5c64395cbee05393e6e5dc56e2d88320db542/langchain_mongodb-0.11.0.tar.gz", hash = "sha256:db483f12e8a4fdbbcfb0594881962fd1f0afcb38a3d42ee0d5fe8a2be20e1e86", size = 356447, upload-time = "2026-01-15T17:00:37.102Z" }
1110
+ wheels = [
1111
+ { url = "https://files.pythonhosted.org/packages/1e/a1/a4ef0c7027166540a4aced056b1fd7194e4519932d2a846fd2cfd9f057cb/langchain_mongodb-0.11.0-py3-none-any.whl", hash = "sha256:7e1f43684c907d1f1fee4dbc480dd4909b3ebf03b5d3dad105ed9f4a4280d49f", size = 62037, upload-time = "2026-01-15T17:00:36.258Z" },
1112
+ ]
1113
+
1114
  [[package]]
1115
  name = "langchain-nvidia-ai-endpoints"
1116
  version = "1.0.0"
 
1212
  { url = "https://files.pythonhosted.org/packages/ed/d8/91a8b483b30e0708a8911df10b4ce04ebf2b4b8dde8d020c124aec77380a/langsmith-0.5.2-py3-none-any.whl", hash = "sha256:42f8b853a18dd4d5f7fa38c8ff29e38da065a727022da410d91b3e13819aacc1", size = 283311, upload-time = "2025-12-30T13:41:33.915Z" },
1213
  ]
1214
 
1215
+ [[package]]
1216
+ name = "lark"
1217
+ version = "1.3.1"
1218
+ source = { registry = "https://pypi.org/simple" }
1219
+ sdist = { url = "https://files.pythonhosted.org/packages/da/34/28fff3ab31ccff1fd4f6c7c7b0ceb2b6968d8ea4950663eadcb5720591a0/lark-1.3.1.tar.gz", hash = "sha256:b426a7a6d6d53189d318f2b6236ab5d6429eaf09259f1ca33eb716eed10d2905", size = 382732, upload-time = "2025-10-27T18:25:56.653Z" }
1220
+ wheels = [
1221
+ { url = "https://files.pythonhosted.org/packages/82/3d/14ce75ef66813643812f3093ab17e46d3a206942ce7376d31ec2d36229e7/lark-1.3.1-py3-none-any.whl", hash = "sha256:c629b661023a014c37da873b4ff58a817398d12635d3bbb2c5a03be7fe5d1e12", size = 113151, upload-time = "2025-10-27T18:25:54.882Z" },
1222
+ ]
1223
+
1224
  [[package]]
1225
  name = "libcst"
1226
  version = "1.8.6"
 
1990
  { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" },
1991
  ]
1992
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1993
  [[package]]
1994
  name = "platformdirs"
1995
  version = "4.5.1"
 
2285
  { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
2286
  ]
2287
 
2288
+ [[package]]
2289
+ name = "pymongo"
2290
+ version = "4.16.0"
2291
+ source = { registry = "https://pypi.org/simple" }
2292
+ dependencies = [
2293
+ { name = "dnspython" },
2294
+ ]
2295
+ sdist = { url = "https://files.pythonhosted.org/packages/65/9c/a4895c4b785fc9865a84a56e14b5bd21ca75aadc3dab79c14187cdca189b/pymongo-4.16.0.tar.gz", hash = "sha256:8ba8405065f6e258a6f872fe62d797a28f383a12178c7153c01ed04e845c600c", size = 2495323, upload-time = "2026-01-07T18:05:48.107Z" }
2296
+ wheels = [
2297
+ { url = "https://files.pythonhosted.org/packages/6a/03/6dd7c53cbde98de469a3e6fb893af896dca644c476beb0f0c6342bcc368b/pymongo-4.16.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:bd4911c40a43a821dfd93038ac824b756b6e703e26e951718522d29f6eb166a8", size = 917619, upload-time = "2026-01-07T18:04:19.173Z" },
2298
+ { url = "https://files.pythonhosted.org/packages/73/e1/328915f2734ea1f355dc9b0e98505ff670f5fab8be5e951d6ed70971c6aa/pymongo-4.16.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:25a6b03a68f9907ea6ec8bc7cf4c58a1b51a18e23394f962a6402f8e46d41211", size = 917364, upload-time = "2026-01-07T18:04:20.861Z" },
2299
+ { url = "https://files.pythonhosted.org/packages/41/fe/4769874dd9812a1bc2880a9785e61eba5340da966af888dd430392790ae0/pymongo-4.16.0-cp312-cp312-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:91ac0cb0fe2bf17616c2039dac88d7c9a5088f5cb5829b27c9d250e053664d31", size = 1686901, upload-time = "2026-01-07T18:04:22.219Z" },
2300
+ { url = "https://files.pythonhosted.org/packages/fa/8d/15707b9669fdc517bbc552ac60da7124dafe7ac1552819b51e97ed4038b4/pymongo-4.16.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:cf0ec79e8ca7077f455d14d915d629385153b6a11abc0b93283ed73a8013e376", size = 1723034, upload-time = "2026-01-07T18:04:24.055Z" },
2301
+ { url = "https://files.pythonhosted.org/packages/5b/af/3d5d16ff11d447d40c1472da1b366a31c7380d7ea2922a449c7f7f495567/pymongo-4.16.0-cp312-cp312-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:2d0082631a7510318befc2b4fdab140481eb4b9dd62d9245e042157085da2a70", size = 1797161, upload-time = "2026-01-07T18:04:25.964Z" },
2302
+ { url = "https://files.pythonhosted.org/packages/fb/04/725ab8664eeec73ec125b5a873448d80f5d8cf2750aaaf804cbc538a50a5/pymongo-4.16.0-cp312-cp312-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:85dc2f3444c346ea019a371e321ac868a4fab513b7a55fe368f0cc78de8177cc", size = 1780938, upload-time = "2026-01-07T18:04:28.745Z" },
2303
+ { url = "https://files.pythonhosted.org/packages/22/50/dd7e9095e1ca35f93c3c844c92eb6eb0bc491caeb2c9bff3b32fe3c9b18f/pymongo-4.16.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dabbf3c14de75a20cc3c30bf0c6527157224a93dfb605838eabb1a2ee3be008d", size = 1714342, upload-time = "2026-01-07T18:04:30.331Z" },
2304
+ { url = "https://files.pythonhosted.org/packages/03/c9/542776987d5c31ae8e93e92680ea2b6e5a2295f398b25756234cabf38a39/pymongo-4.16.0-cp312-cp312-win32.whl", hash = "sha256:60307bb91e0ab44e560fe3a211087748b2b5f3e31f403baf41f5b7b0a70bd104", size = 887868, upload-time = "2026-01-07T18:04:32.124Z" },
2305
+ { url = "https://files.pythonhosted.org/packages/2e/d4/b4045a7ccc5680fb496d01edf749c7a9367cc8762fbdf7516cf807ef679b/pymongo-4.16.0-cp312-cp312-win_amd64.whl", hash = "sha256:f513b2c6c0d5c491f478422f6b5b5c27ac1af06a54c93ef8631806f7231bd92e", size = 907554, upload-time = "2026-01-07T18:04:33.685Z" },
2306
+ { url = "https://files.pythonhosted.org/packages/60/4c/33f75713d50d5247f2258405142c0318ff32c6f8976171c4fcae87a9dbdf/pymongo-4.16.0-cp312-cp312-win_arm64.whl", hash = "sha256:dfc320f08ea9a7ec5b2403dc4e8150636f0d6150f4b9792faaae539c88e7db3b", size = 892971, upload-time = "2026-01-07T18:04:35.594Z" },
2307
+ { url = "https://files.pythonhosted.org/packages/47/84/148d8b5da8260f4679d6665196ae04ab14ffdf06f5fe670b0ab11942951f/pymongo-4.16.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:d15f060bc6d0964a8bb70aba8f0cb6d11ae99715438f640cff11bbcf172eb0e8", size = 972009, upload-time = "2026-01-07T18:04:38.303Z" },
2308
+ { url = "https://files.pythonhosted.org/packages/1e/5e/9f3a8daf583d0adaaa033a3e3e58194d2282737dc164014ff33c7a081103/pymongo-4.16.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:4a19ea46a0fe71248965305a020bc076a163311aefbaa1d83e47d06fa30ac747", size = 971784, upload-time = "2026-01-07T18:04:39.669Z" },
2309
+ { url = "https://files.pythonhosted.org/packages/ad/f2/b6c24361fcde24946198573c0176406bfd5f7b8538335f3d939487055322/pymongo-4.16.0-cp313-cp313-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:311d4549d6bf1f8c61d025965aebb5ba29d1481dc6471693ab91610aaffbc0eb", size = 1947174, upload-time = "2026-01-07T18:04:41.368Z" },
2310
+ { url = "https://files.pythonhosted.org/packages/47/1a/8634192f98cf740b3d174e1018dd0350018607d5bd8ac35a666dc49c732b/pymongo-4.16.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:46ffb728d92dd5b09fc034ed91acf5595657c7ca17d4cf3751322cd554153c17", size = 1991727, upload-time = "2026-01-07T18:04:42.965Z" },
2311
+ { url = "https://files.pythonhosted.org/packages/5a/2f/0c47ac84572b28e23028a23a3798a1f725e1c23b0cf1c1424678d16aff42/pymongo-4.16.0-cp313-cp313-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:acda193f440dd88c2023cb00aa8bd7b93a9df59978306d14d87a8b12fe426b05", size = 2082497, upload-time = "2026-01-07T18:04:44.652Z" },
2312
+ { url = "https://files.pythonhosted.org/packages/ba/57/9f46ef9c862b2f0cf5ce798f3541c201c574128d31ded407ba4b3918d7b6/pymongo-4.16.0-cp313-cp313-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:5d9fdb386cf958e6ef6ff537d6149be7edb76c3268cd6833e6c36aa447e4443f", size = 2064947, upload-time = "2026-01-07T18:04:46.228Z" },
2313
+ { url = "https://files.pythonhosted.org/packages/b8/56/5421c0998f38e32288100a07f6cb2f5f9f352522157c901910cb2927e211/pymongo-4.16.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:91899dd7fb9a8c50f09c3c1cf0cb73bfbe2737f511f641f19b9650deb61c00ca", size = 1980478, upload-time = "2026-01-07T18:04:48.017Z" },
2314
+ { url = "https://files.pythonhosted.org/packages/92/93/bfc448d025e12313a937d6e1e0101b50cc9751636b4b170e600fe3203063/pymongo-4.16.0-cp313-cp313-win32.whl", hash = "sha256:2cd60cd1e05de7f01927f8e25ca26b3ea2c09de8723241e5d3bcfdc70eaff76b", size = 934672, upload-time = "2026-01-07T18:04:49.538Z" },
2315
+ { url = "https://files.pythonhosted.org/packages/96/10/12710a5e01218d50c3dd165fd72c5ed2699285f77348a3b1a119a191d826/pymongo-4.16.0-cp313-cp313-win_amd64.whl", hash = "sha256:3ead8a0050c53eaa55935895d6919d393d0328ec24b2b9115bdbe881aa222673", size = 959237, upload-time = "2026-01-07T18:04:51.382Z" },
2316
+ { url = "https://files.pythonhosted.org/packages/0c/56/d288bcd1d05bc17ec69df1d0b1d67bc710c7c5dbef86033a5a4d2e2b08e6/pymongo-4.16.0-cp313-cp313-win_arm64.whl", hash = "sha256:dbbc5b254c36c37d10abb50e899bc3939bbb7ab1e7c659614409af99bd3e7675", size = 940909, upload-time = "2026-01-07T18:04:52.904Z" },
2317
+ { url = "https://files.pythonhosted.org/packages/30/9e/4d343f8d0512002fce17915a89477b9f916bda1205729e042d8f23acf194/pymongo-4.16.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:8a254d49a9ffe9d7f888e3c677eed3729b14ce85abb08cd74732cead6ccc3c66", size = 1026634, upload-time = "2026-01-07T18:04:54.359Z" },
2318
+ { url = "https://files.pythonhosted.org/packages/c3/e3/341f88c5535df40c0450fda915f582757bb7d988cdfc92990a5e27c4c324/pymongo-4.16.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a1bf44e13cf2d44d2ea2e928a8140d5d667304abe1a61c4d55b4906f389fbe64", size = 1026252, upload-time = "2026-01-07T18:04:56.642Z" },
2319
+ { url = "https://files.pythonhosted.org/packages/af/64/9471b22eb98f0a2ca0b8e09393de048502111b2b5b14ab1bd9e39708aab5/pymongo-4.16.0-cp314-cp314-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:f1c5f1f818b669875d191323a48912d3fcd2e4906410e8297bb09ac50c4d5ccc", size = 2207399, upload-time = "2026-01-07T18:04:58.255Z" },
2320
+ { url = "https://files.pythonhosted.org/packages/87/ac/47c4d50b25a02f21764f140295a2efaa583ee7f17992a5e5fa542b3a690f/pymongo-4.16.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:77cfd37a43a53b02b7bd930457c7994c924ad8bbe8dff91817904bcbf291b371", size = 2260595, upload-time = "2026-01-07T18:04:59.788Z" },
2321
+ { url = "https://files.pythonhosted.org/packages/ee/1b/0ce1ce9dd036417646b2fe6f63b58127acff3cf96eeb630c34ec9cd675ff/pymongo-4.16.0-cp314-cp314-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:36ef2fee50eee669587d742fb456e349634b4fcf8926208766078b089054b24b", size = 2366958, upload-time = "2026-01-07T18:05:01.942Z" },
2322
+ { url = "https://files.pythonhosted.org/packages/3e/3c/a5a17c0d413aa9d6c17bc35c2b472e9e79cda8068ba8e93433b5f43028e9/pymongo-4.16.0-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:55f8d5a6fe2fa0b823674db2293f92d74cd5f970bc0360f409a1fc21003862d3", size = 2346081, upload-time = "2026-01-07T18:05:03.576Z" },
2323
+ { url = "https://files.pythonhosted.org/packages/65/19/f815533d1a88fb8a3b6c6e895bb085ffdae68ccb1e6ed7102202a307f8e2/pymongo-4.16.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9caacac0dd105e2555521002e2d17afc08665187017b466b5753e84c016628e6", size = 2246053, upload-time = "2026-01-07T18:05:05.459Z" },
2324
+ { url = "https://files.pythonhosted.org/packages/c6/88/4be3ec78828dc64b212c123114bd6ae8db5b7676085a7b43cc75d0131bd2/pymongo-4.16.0-cp314-cp314-win32.whl", hash = "sha256:c789236366525c3ee3cd6e4e450a9ff629a7d1f4d88b8e18a0aea0615fd7ecf8", size = 989461, upload-time = "2026-01-07T18:05:07.018Z" },
2325
+ { url = "https://files.pythonhosted.org/packages/af/5a/ab8d5af76421b34db483c9c8ebc3a2199fb80ae63dc7e18f4cf1df46306a/pymongo-4.16.0-cp314-cp314-win_amd64.whl", hash = "sha256:2b0714d7764efb29bf9d3c51c964aed7c4c7237b341f9346f15ceaf8321fdb35", size = 1017803, upload-time = "2026-01-07T18:05:08.499Z" },
2326
+ { url = "https://files.pythonhosted.org/packages/f6/f4/98d68020728ac6423cf02d17cfd8226bf6cce5690b163d30d3f705e8297e/pymongo-4.16.0-cp314-cp314-win_arm64.whl", hash = "sha256:12762e7cc0f8374a8cae3b9f9ed8dabb5d438c7b33329232dd9b7de783454033", size = 997184, upload-time = "2026-01-07T18:05:09.944Z" },
2327
+ { url = "https://files.pythonhosted.org/packages/50/00/dc3a271daf06401825b9c1f4f76f018182c7738281ea54b9762aea0560c1/pymongo-4.16.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1c01e8a7cd0ea66baf64a118005535ab5bf9f9eb63a1b50ac3935dccf9a54abe", size = 1083303, upload-time = "2026-01-07T18:05:11.702Z" },
2328
+ { url = "https://files.pythonhosted.org/packages/b8/4b/b5375ee21d12eababe46215011ebc63801c0d2c5ffdf203849d0d79f9852/pymongo-4.16.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:4c4872299ebe315a79f7f922051061634a64fda95b6b17677ba57ef00b2ba2a4", size = 1083233, upload-time = "2026-01-07T18:05:13.182Z" },
2329
+ { url = "https://files.pythonhosted.org/packages/ee/e3/52efa3ca900622c7dcb56c5e70f15c906816d98905c22d2ee1f84d9a7b60/pymongo-4.16.0-cp314-cp314t-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:78037d02389745e247fe5ab0bcad5d1ab30726eaac3ad79219c7d6bbb07eec53", size = 2527438, upload-time = "2026-01-07T18:05:14.981Z" },
2330
+ { url = "https://files.pythonhosted.org/packages/cb/96/43b1be151c734e7766c725444bcbfa1de6b60cc66bfb406203746839dd25/pymongo-4.16.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c126fb72be2518395cc0465d4bae03125119136462e1945aea19840e45d89cfc", size = 2600399, upload-time = "2026-01-07T18:05:16.794Z" },
2331
+ { url = "https://files.pythonhosted.org/packages/e7/62/fa64a5045dfe3a1cd9217232c848256e7bc0136cffb7da4735c5e0d30e40/pymongo-4.16.0-cp314-cp314t-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f3867dc225d9423c245a51eaac2cfcd53dde8e0a8d8090bb6aed6e31bd6c2d4f", size = 2720960, upload-time = "2026-01-07T18:05:18.498Z" },
2332
+ { url = "https://files.pythonhosted.org/packages/54/7b/01577eb97e605502821273a5bc16ce0fb0be5c978fe03acdbff471471202/pymongo-4.16.0-cp314-cp314t-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:f25001a955073b80510c0c3db0e043dbbc36904fd69e511c74e3d8640b8a5111", size = 2699344, upload-time = "2026-01-07T18:05:20.073Z" },
2333
+ { url = "https://files.pythonhosted.org/packages/55/68/6ef6372d516f703479c3b6cbbc45a5afd307173b1cbaccd724e23919bb1a/pymongo-4.16.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d9885aad05f82fd7ea0c9ca505d60939746b39263fa273d0125170da8f59098", size = 2577133, upload-time = "2026-01-07T18:05:22.052Z" },
2334
+ { url = "https://files.pythonhosted.org/packages/15/c7/b5337093bb01da852f945802328665f85f8109dbe91d81ea2afe5ff059b9/pymongo-4.16.0-cp314-cp314t-win32.whl", hash = "sha256:948152b30eddeae8355495f9943a3bf66b708295c0b9b6f467de1c620f215487", size = 1040560, upload-time = "2026-01-07T18:05:23.888Z" },
2335
+ { url = "https://files.pythonhosted.org/packages/96/8c/5b448cd1b103f3889d5713dda37304c81020ff88e38a826e8a75ddff4610/pymongo-4.16.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f6e42c1bc985d9beee884780ae6048790eb4cd565c46251932906bdb1630034a", size = 1075081, upload-time = "2026-01-07T18:05:26.874Z" },
2336
+ { url = "https://files.pythonhosted.org/packages/32/cd/ddc794cdc8500f6f28c119c624252fb6dfb19481c6d7ed150f13cf468a6d/pymongo-4.16.0-cp314-cp314t-win_arm64.whl", hash = "sha256:6b2a20edb5452ac8daa395890eeb076c570790dfce6b7a44d788af74c2f8cf96", size = 1047725, upload-time = "2026-01-07T18:05:28.47Z" },
2337
+ ]
2338
+
2339
+ [[package]]
2340
+ name = "pymongo-search-utils"
2341
+ version = "0.3.0"
2342
+ source = { registry = "https://pypi.org/simple" }
2343
+ dependencies = [
2344
+ { name = "pymongo" },
2345
+ ]
2346
+ sdist = { url = "https://files.pythonhosted.org/packages/d9/aa/3eb266ffc74ec52bbf6dd92d311ab4fc3225c2ac8f1a2e6abe98f7288867/pymongo_search_utils-0.3.0.tar.gz", hash = "sha256:56148987ce9ff191eb1cd0f56c01d3dae497a3cb6d7b7db75ec894a9afcbe418", size = 13728, upload-time = "2026-02-03T22:18:24.481Z" }
2347
+ wheels = [
2348
+ { url = "https://files.pythonhosted.org/packages/55/ed/87d3ed0e45b9230bacb9edcb913d515e6756bc2df3384e5f192662c38ce8/pymongo_search_utils-0.3.0-py3-none-any.whl", hash = "sha256:9b9ef8dfbd57da530ce7c2bde10aec8f462605080a9ed4e9a41679170c8742bf", size = 19467, upload-time = "2026-02-03T22:18:23.398Z" },
2349
+ ]
2350
+
2351
  [[package]]
2352
  name = "pyparsing"
2353
  version = "3.3.1"
 
2400
  { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
2401
  ]
2402
 
 
 
 
 
 
 
 
 
 
 
 
 
2403
  [[package]]
2404
  name = "python-dotenv"
2405
  version = "1.2.1"
 
2645
  { name = "hydra-core" },
2646
  { name = "jinja2" },
2647
  { name = "langchain-core" },
2648
+ { name = "langchain-mongodb" },
2649
  { name = "langchain-nvidia-ai-endpoints" },
2650
  { name = "langchain-text-splitters" },
2651
  { name = "langgraph" },
 
2653
  { name = "lightning" },
2654
  { name = "numpy" },
2655
  { name = "passlib", extra = ["bcrypt"] },
 
2656
  { name = "psycopg2-binary" },
2657
  { name = "pydantic" },
2658
+ { name = "pymongo" },
2659
  { name = "python-dotenv" },
2660
  { name = "python-jose", extra = ["cryptography"] },
2661
  { name = "python-multipart" },
 
2699
  { name = "hydra-core", specifier = ">=1.3.2" },
2700
  { name = "jinja2", specifier = ">=3.1.6" },
2701
  { name = "langchain-core", specifier = ">=0.1.0" },
2702
+ { name = "langchain-mongodb", specifier = ">=0.1.0" },
2703
  { name = "langchain-nvidia-ai-endpoints", specifier = ">=1.0.0" },
2704
  { name = "langchain-text-splitters", specifier = ">=0.0.1" },
2705
  { name = "langgraph", specifier = ">=0.2.39" },
 
2708
  { name = "mypy", marker = "extra == 'dev'", specifier = ">=1.10.0" },
2709
  { name = "numpy", specifier = ">=1.26.0" },
2710
  { name = "passlib", extras = ["bcrypt"], specifier = ">=1.7.4" },
 
2711
  { name = "psycopg2-binary", specifier = ">=2.9.9" },
2712
  { name = "pydantic", specifier = ">=2.7.0" },
2713
+ { name = "pymongo", specifier = ">=4.6.0" },
2714
  { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
2715
  { name = "pytest-asyncio", marker = "extra == 'dev'", specifier = ">=0.23.0" },
2716
  { name = "pytest-cov", marker = "extra == 'dev'", specifier = ">=4.0.0" },