diff --git a/.agents/skills/api-docs-generator/SKILL.md b/.agents/skills/api-docs-generator/SKILL.md
new file mode 100644
index 0000000000000000000000000000000000000000..db222b38dec944f674704a0907b1d277f0c350e4
--- /dev/null
+++ b/.agents/skills/api-docs-generator/SKILL.md
@@ -0,0 +1,946 @@
+---
+name: api-docs-generator
+description: Generates API documentation using OpenAPI/Swagger specifications with interactive documentation, code examples, and SDK generation. Use when users request "API documentation", "OpenAPI spec", "Swagger docs", "document API endpoints", or "generate API reference".
+---
+
+# API Docs Generator
+
+Create comprehensive API documentation with OpenAPI specifications and interactive documentation.
+
+## Core Workflow
+
+1. **Analyze API endpoints**: Review routes, methods, parameters
+2. **Define OpenAPI spec**: Create specification in YAML/JSON
+3. **Add schemas**: Define request/response models
+4. **Include examples**: Add realistic example values
+5. **Generate documentation**: Deploy interactive docs
+6. **Create SDK**: Optional client library generation
+
+## OpenAPI Specification Structure
+
+```yaml
+# openapi.yaml
+openapi: 3.1.0
+
+info:
+ title: My API
+ version: 1.0.0
+ description: |
+ API description with **Markdown** support.
+
+ ## Authentication
+ All endpoints require Bearer token authentication.
+ contact:
+ name: API Support
+ email: api@example.com
+ url: https://docs.example.com
+ license:
+ name: MIT
+ url: https://opensource.org/licenses/MIT
+
+servers:
+ - url: https://api.example.com/v1
+ description: Production
+ - url: https://staging-api.example.com/v1
+ description: Staging
+ - url: http://localhost:3000/v1
+ description: Development
+
+tags:
+ - name: Users
+ description: User management endpoints
+ - name: Products
+ description: Product catalog endpoints
+ - name: Orders
+ description: Order processing endpoints
+
+paths:
+ # Endpoints defined here
+
+components:
+ # Reusable schemas, security, etc.
+```
+
+## Path Definitions
+
+### Basic CRUD Endpoints
+
+```yaml
+paths:
+ /users:
+ get:
+ tags:
+ - Users
+ summary: List all users
+ description: Retrieve a paginated list of users
+ operationId: listUsers
+ parameters:
+ - $ref: '#/components/parameters/PageParam'
+ - $ref: '#/components/parameters/LimitParam'
+ - name: role
+ in: query
+ description: Filter by user role
+ schema:
+ type: string
+ enum: [admin, user, guest]
+ responses:
+ '200':
+ description: Successful response
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/UserList'
+ example:
+ data:
+ - id: "usr_123"
+ email: "john@example.com"
+ name: "John Doe"
+ role: "admin"
+ createdAt: "2024-01-15T10:30:00Z"
+ pagination:
+ page: 1
+ limit: 20
+ total: 150
+ '401':
+ $ref: '#/components/responses/Unauthorized'
+ '500':
+ $ref: '#/components/responses/InternalError'
+
+ post:
+ tags:
+ - Users
+ summary: Create a new user
+ description: Create a new user account
+ operationId: createUser
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/CreateUserRequest'
+ example:
+ email: "newuser@example.com"
+ name: "New User"
+ password: "securePassword123"
+ role: "user"
+ responses:
+ '201':
+ description: User created successfully
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/User'
+ '400':
+ $ref: '#/components/responses/BadRequest'
+ '409':
+ description: User already exists
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ code: "USER_EXISTS"
+ message: "A user with this email already exists"
+ '422':
+ $ref: '#/components/responses/ValidationError'
+
+ /users/{userId}:
+ parameters:
+ - $ref: '#/components/parameters/UserId'
+
+ get:
+ tags:
+ - Users
+ summary: Get user by ID
+ description: Retrieve a specific user by their ID
+ operationId: getUserById
+ responses:
+ '200':
+ description: Successful response
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/User'
+ '404':
+ $ref: '#/components/responses/NotFound'
+
+ patch:
+ tags:
+ - Users
+ summary: Update user
+ description: Update an existing user's information
+ operationId: updateUser
+ requestBody:
+ required: true
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/UpdateUserRequest'
+ responses:
+ '200':
+ description: User updated successfully
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/User'
+ '404':
+ $ref: '#/components/responses/NotFound'
+ '422':
+ $ref: '#/components/responses/ValidationError'
+
+ delete:
+ tags:
+ - Users
+ summary: Delete user
+ description: Permanently delete a user
+ operationId: deleteUser
+ responses:
+ '204':
+ description: User deleted successfully
+ '404':
+ $ref: '#/components/responses/NotFound'
+```
+
+## Component Schemas
+
+### Data Models
+
+```yaml
+components:
+ schemas:
+ # Base User Schema
+ User:
+ type: object
+ properties:
+ id:
+ type: string
+ format: uuid
+ description: Unique user identifier
+ example: "usr_123abc"
+ readOnly: true
+ email:
+ type: string
+ format: email
+ description: User's email address
+ example: "john@example.com"
+ name:
+ type: string
+ minLength: 1
+ maxLength: 100
+ description: User's full name
+ example: "John Doe"
+ role:
+ $ref: '#/components/schemas/UserRole'
+ avatar:
+ type: string
+ format: uri
+ nullable: true
+ description: URL to user's avatar image
+ example: "https://cdn.example.com/avatars/123.jpg"
+ createdAt:
+ type: string
+ format: date-time
+ description: Account creation timestamp
+ readOnly: true
+ updatedAt:
+ type: string
+ format: date-time
+ description: Last update timestamp
+ readOnly: true
+ required:
+ - id
+ - email
+ - name
+ - role
+ - createdAt
+
+ UserRole:
+ type: string
+ enum:
+ - admin
+ - user
+ - guest
+ description: User's role in the system
+ example: "user"
+
+ # Request Schemas
+ CreateUserRequest:
+ type: object
+ properties:
+ email:
+ type: string
+ format: email
+ name:
+ type: string
+ minLength: 1
+ maxLength: 100
+ password:
+ type: string
+ format: password
+ minLength: 8
+ description: Must contain at least one uppercase, one lowercase, and one number
+ role:
+ $ref: '#/components/schemas/UserRole'
+ required:
+ - email
+ - name
+ - password
+
+ UpdateUserRequest:
+ type: object
+ properties:
+ name:
+ type: string
+ minLength: 1
+ maxLength: 100
+ role:
+ $ref: '#/components/schemas/UserRole'
+ avatar:
+ type: string
+ format: uri
+ nullable: true
+ minProperties: 1
+
+ # List Response
+ UserList:
+ type: object
+ properties:
+ data:
+ type: array
+ items:
+ $ref: '#/components/schemas/User'
+ pagination:
+ $ref: '#/components/schemas/Pagination'
+
+ Pagination:
+ type: object
+ properties:
+ page:
+ type: integer
+ minimum: 1
+ example: 1
+ limit:
+ type: integer
+ minimum: 1
+ maximum: 100
+ example: 20
+ total:
+ type: integer
+ minimum: 0
+ example: 150
+ hasMore:
+ type: boolean
+ example: true
+
+ # Error Schemas
+ Error:
+ type: object
+ properties:
+ code:
+ type: string
+ description: Machine-readable error code
+ example: "VALIDATION_ERROR"
+ message:
+ type: string
+ description: Human-readable error message
+ example: "The request body is invalid"
+ details:
+ type: array
+ items:
+ $ref: '#/components/schemas/ErrorDetail'
+ required:
+ - code
+ - message
+
+ ErrorDetail:
+ type: object
+ properties:
+ field:
+ type: string
+ description: The field that caused the error
+ example: "email"
+ message:
+ type: string
+ description: Description of the validation error
+ example: "Must be a valid email address"
+```
+
+## Parameters and Responses
+
+```yaml
+components:
+ parameters:
+ UserId:
+ name: userId
+ in: path
+ required: true
+ description: Unique user identifier
+ schema:
+ type: string
+ format: uuid
+ example: "usr_123abc"
+
+ PageParam:
+ name: page
+ in: query
+ description: Page number for pagination
+ schema:
+ type: integer
+ minimum: 1
+ default: 1
+ example: 1
+
+ LimitParam:
+ name: limit
+ in: query
+ description: Number of items per page
+ schema:
+ type: integer
+ minimum: 1
+ maximum: 100
+ default: 20
+ example: 20
+
+ SortParam:
+ name: sort
+ in: query
+ description: Sort field and direction
+ schema:
+ type: string
+ pattern: '^[a-zA-Z]+:(asc|desc)$'
+ example: "createdAt:desc"
+
+ responses:
+ BadRequest:
+ description: Bad request - invalid input
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ code: "BAD_REQUEST"
+ message: "Invalid request format"
+
+ Unauthorized:
+ description: Authentication required
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ code: "UNAUTHORIZED"
+ message: "Authentication token is missing or invalid"
+
+ Forbidden:
+ description: Permission denied
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ code: "FORBIDDEN"
+ message: "You don't have permission to access this resource"
+
+ NotFound:
+ description: Resource not found
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ code: "NOT_FOUND"
+ message: "The requested resource was not found"
+
+ ValidationError:
+ description: Validation error
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ code: "VALIDATION_ERROR"
+ message: "Request validation failed"
+ details:
+ - field: "email"
+ message: "Must be a valid email address"
+ - field: "password"
+ message: "Must be at least 8 characters"
+
+ InternalError:
+ description: Internal server error
+ content:
+ application/json:
+ schema:
+ $ref: '#/components/schemas/Error'
+ example:
+ code: "INTERNAL_ERROR"
+ message: "An unexpected error occurred"
+```
+
+## Security Definitions
+
+```yaml
+components:
+ securitySchemes:
+ BearerAuth:
+ type: http
+ scheme: bearer
+ bearerFormat: JWT
+ description: |
+ JWT token obtained from the /auth/login endpoint.
+
+ Example: `Authorization: Bearer eyJhbGciOiJIUzI1...`
+
+ ApiKeyAuth:
+ type: apiKey
+ in: header
+ name: X-API-Key
+ description: API key for server-to-server communication
+
+ OAuth2:
+ type: oauth2
+ description: OAuth 2.0 authentication
+ flows:
+ authorizationCode:
+ authorizationUrl: https://auth.example.com/oauth/authorize
+ tokenUrl: https://auth.example.com/oauth/token
+ scopes:
+ read:users: Read user information
+ write:users: Create and modify users
+ admin: Full administrative access
+
+# Apply security globally
+security:
+ - BearerAuth: []
+
+# Or per-endpoint
+paths:
+ /public/health:
+ get:
+ security: [] # No auth required
+ summary: Health check
+ responses:
+ '200':
+ description: Service is healthy
+```
+
+## Express/Node.js Integration
+
+### Generate from Code with express-openapi
+
+```typescript
+// src/docs/openapi.ts
+import { OpenAPIV3_1 } from 'openapi-types';
+
+export const openApiDocument: OpenAPIV3_1.Document = {
+ openapi: '3.1.0',
+ info: {
+ title: 'My API',
+ version: '1.0.0',
+ description: 'API documentation',
+ },
+ servers: [
+ { url: 'http://localhost:3000', description: 'Development' },
+ ],
+ paths: {},
+ components: {
+ schemas: {},
+ securitySchemes: {
+ BearerAuth: {
+ type: 'http',
+ scheme: 'bearer',
+ bearerFormat: 'JWT',
+ },
+ },
+ },
+};
+```
+
+### Swagger UI Express
+
+```typescript
+// src/docs/swagger.ts
+import swaggerUi from 'swagger-ui-express';
+import YAML from 'yamljs';
+import path from 'path';
+import { Express } from 'express';
+
+export function setupSwagger(app: Express) {
+ const swaggerDocument = YAML.load(
+ path.join(__dirname, '../../openapi.yaml')
+ );
+
+ const options: swaggerUi.SwaggerUiOptions = {
+ explorer: true,
+ customSiteTitle: 'API Documentation',
+ customCss: '.swagger-ui .topbar { display: none }',
+ swaggerOptions: {
+ persistAuthorization: true,
+ displayRequestDuration: true,
+ filter: true,
+ showExtensions: true,
+ },
+ };
+
+ app.use('/docs', swaggerUi.serve, swaggerUi.setup(swaggerDocument, options));
+ app.get('/openapi.json', (req, res) => res.json(swaggerDocument));
+}
+```
+
+### Zod to OpenAPI
+
+```typescript
+// src/schemas/user.ts
+import { z } from 'zod';
+import { extendZodWithOpenApi } from '@asteasolutions/zod-to-openapi';
+
+extendZodWithOpenApi(z);
+
+export const UserSchema = z.object({
+ id: z.string().uuid().openapi({ example: 'usr_123abc' }),
+ email: z.string().email().openapi({ example: 'john@example.com' }),
+ name: z.string().min(1).max(100).openapi({ example: 'John Doe' }),
+ role: z.enum(['admin', 'user', 'guest']).openapi({ example: 'user' }),
+ createdAt: z.string().datetime(),
+}).openapi('User');
+
+export const CreateUserSchema = z.object({
+ email: z.string().email(),
+ name: z.string().min(1).max(100),
+ password: z.string().min(8),
+ role: z.enum(['admin', 'user', 'guest']).optional().default('user'),
+}).openapi('CreateUserRequest');
+```
+
+```typescript
+// src/docs/generator.ts
+import {
+ OpenAPIRegistry,
+ OpenApiGeneratorV31,
+} from '@asteasolutions/zod-to-openapi';
+import { UserSchema, CreateUserSchema } from '../schemas/user';
+
+const registry = new OpenAPIRegistry();
+
+// Register schemas
+registry.register('User', UserSchema);
+registry.register('CreateUserRequest', CreateUserSchema);
+
+// Register endpoints
+registry.registerPath({
+ method: 'get',
+ path: '/users',
+ tags: ['Users'],
+ summary: 'List all users',
+ responses: {
+ 200: {
+ description: 'List of users',
+ content: {
+ 'application/json': {
+ schema: z.array(UserSchema),
+ },
+ },
+ },
+ },
+});
+
+registry.registerPath({
+ method: 'post',
+ path: '/users',
+ tags: ['Users'],
+ summary: 'Create a user',
+ request: {
+ body: {
+ content: {
+ 'application/json': {
+ schema: CreateUserSchema,
+ },
+ },
+ },
+ },
+ responses: {
+ 201: {
+ description: 'User created',
+ content: {
+ 'application/json': {
+ schema: UserSchema,
+ },
+ },
+ },
+ },
+});
+
+// Generate OpenAPI document
+const generator = new OpenApiGeneratorV31(registry.definitions);
+export const openApiDocument = generator.generateDocument({
+ openapi: '3.1.0',
+ info: {
+ title: 'My API',
+ version: '1.0.0',
+ },
+});
+```
+
+## FastAPI Integration
+
+```python
+# main.py
+from fastapi import FastAPI, HTTPException, Query
+from fastapi.openapi.utils import get_openapi
+from pydantic import BaseModel, EmailStr, Field
+from typing import Optional
+from datetime import datetime
+from enum import Enum
+
+app = FastAPI(
+ title="My API",
+ description="API documentation with FastAPI",
+ version="1.0.0",
+ docs_url="/docs",
+ redoc_url="/redoc",
+)
+
+
+class UserRole(str, Enum):
+ admin = "admin"
+ user = "user"
+ guest = "guest"
+
+
+class UserBase(BaseModel):
+ email: EmailStr = Field(..., example="john@example.com")
+ name: str = Field(..., min_length=1, max_length=100, example="John Doe")
+ role: UserRole = Field(default=UserRole.user, example="user")
+
+
+class UserCreate(UserBase):
+ password: str = Field(..., min_length=8, example="securePassword123")
+
+
+class User(UserBase):
+ id: str = Field(..., example="usr_123abc")
+ created_at: datetime
+ updated_at: Optional[datetime] = None
+
+ class Config:
+ from_attributes = True
+
+
+class UserList(BaseModel):
+ data: list[User]
+ total: int
+ page: int
+ limit: int
+
+
+@app.get(
+ "/users",
+ response_model=UserList,
+ tags=["Users"],
+ summary="List all users",
+ description="Retrieve a paginated list of users",
+)
+async def list_users(
+ page: int = Query(1, ge=1, description="Page number"),
+ limit: int = Query(20, ge=1, le=100, description="Items per page"),
+ role: Optional[UserRole] = Query(None, description="Filter by role"),
+):
+ # Implementation
+ pass
+
+
+@app.post(
+ "/users",
+ response_model=User,
+ status_code=201,
+ tags=["Users"],
+ summary="Create a new user",
+ responses={
+ 409: {"description": "User already exists"},
+ 422: {"description": "Validation error"},
+ },
+)
+async def create_user(user: UserCreate):
+ # Implementation
+ pass
+
+
+# Custom OpenAPI schema
+def custom_openapi():
+ if app.openapi_schema:
+ return app.openapi_schema
+
+ openapi_schema = get_openapi(
+ title="My API",
+ version="1.0.0",
+ description="API documentation",
+ routes=app.routes,
+ )
+
+ # Add security scheme
+ openapi_schema["components"]["securitySchemes"] = {
+ "BearerAuth": {
+ "type": "http",
+ "scheme": "bearer",
+ "bearerFormat": "JWT",
+ }
+ }
+ openapi_schema["security"] = [{"BearerAuth": []}]
+
+ app.openapi_schema = openapi_schema
+ return app.openapi_schema
+
+
+app.openapi = custom_openapi
+```
+
+## Documentation Generators
+
+### Redoc
+
+```html
+
+
+
+
+ API Documentation
+
+
+
+
+
+
+
+
+
+
+```
+
+### Stoplight Elements
+
+```html
+
+
+
+ API Documentation
+
+
+
+
+
+
+
+```
+
+## SDK Generation
+
+### OpenAPI Generator
+
+```bash
+# Install OpenAPI Generator
+npm install -g @openapitools/openapi-generator-cli
+
+# Generate TypeScript client
+openapi-generator-cli generate \
+ -i openapi.yaml \
+ -g typescript-fetch \
+ -o ./sdk/typescript \
+ --additional-properties=supportsES6=true,npmName=@myorg/api-client
+
+# Generate Python client
+openapi-generator-cli generate \
+ -i openapi.yaml \
+ -g python \
+ -o ./sdk/python \
+ --additional-properties=packageName=myapi_client
+```
+
+### Configuration
+
+```yaml
+# openapitools.json
+{
+ "$schema": "https://raw.githubusercontent.com/OpenAPITools/openapi-generator/master/modules/openapi-generator-gradle-plugin/src/main/resources/openapitools.json",
+ "spaces": 2,
+ "generator-cli": {
+ "version": "7.0.0",
+ "generators": {
+ "typescript-client": {
+ "generatorName": "typescript-fetch",
+ "inputSpec": "./openapi.yaml",
+ "output": "./sdk/typescript",
+ "additionalProperties": {
+ "supportsES6": true,
+ "npmName": "@myorg/api-client",
+ "npmVersion": "1.0.0"
+ }
+ }
+ }
+ }
+}
+```
+
+## Validation
+
+### Spectral Linting
+
+```yaml
+# .spectral.yaml
+extends: ["spectral:oas", "spectral:asyncapi"]
+
+rules:
+ operation-operationId: error
+ operation-description: warn
+ operation-tags: error
+ info-contact: warn
+ info-license: warn
+ oas3-schema: error
+ oas3-valid-media-example: warn
+
+ # Custom rules
+ path-must-have-tag:
+ given: "$.paths[*][*]"
+ severity: error
+ then:
+ field: tags
+ function: truthy
+```
+
+```bash
+# Run linting
+npx @stoplight/spectral-cli lint openapi.yaml
+```
+
+## Best Practices
+
+1. **Use $ref for reusability**: Define schemas once, reference everywhere
+2. **Include examples**: Add realistic examples for all schemas
+3. **Document errors**: Describe all possible error responses
+4. **Version your API**: Use URL or header versioning
+5. **Group with tags**: Organize endpoints logically
+6. **Add descriptions**: Explain every parameter and field
+7. **Use security schemes**: Document authentication clearly
+8. **Validate spec**: Use Spectral or similar tools
+9. **Generate SDKs**: Automate client library creation
+10. **Keep spec in sync**: Generate from code or validate against it
+
+## Output Checklist
+
+Every API documentation should include:
+
+- [ ] Complete OpenAPI 3.x specification
+- [ ] All endpoints documented with examples
+- [ ] Request/response schemas with types
+- [ ] Error responses documented
+- [ ] Authentication scheme defined
+- [ ] Parameters described with examples
+- [ ] Interactive documentation deployed (Swagger UI/Redoc)
+- [ ] Specification validated with linter
+- [ ] SDK generation configured
+- [ ] Versioning strategy documented
diff --git a/.agents/skills/api-rate-limiting/SKILL.md b/.agents/skills/api-rate-limiting/SKILL.md
new file mode 100644
index 0000000000000000000000000000000000000000..4e43b19b2732b1e14e494045de65b7648c4099d7
--- /dev/null
+++ b/.agents/skills/api-rate-limiting/SKILL.md
@@ -0,0 +1,371 @@
+---
+name: api-rate-limiting
+description: Implement API rate limiting strategies using token bucket, sliding window, and fixed window algorithms. Use when protecting APIs from abuse, managing traffic, or implementing tiered rate limits.
+---
+
+# API Rate Limiting
+
+## Overview
+
+Protect APIs from abuse and manage traffic using various rate limiting algorithms with per-user, per-IP, and per-endpoint strategies.
+
+## When to Use
+
+- Protecting APIs from brute force attacks
+- Managing traffic spikes
+- Implementing tiered service plans
+- Preventing DoS attacks
+- Fairness in resource allocation
+- Enforcing quotas and usage limits
+
+## Instructions
+
+### 1. **Token Bucket Algorithm**
+
+```javascript
+// Token Bucket Rate Limiter
+class TokenBucket {
+ constructor(capacity, refillRate) {
+ this.capacity = capacity;
+ this.tokens = capacity;
+ this.refillRate = refillRate; // tokens per second
+ this.lastRefillTime = Date.now();
+ }
+
+ refill() {
+ const now = Date.now();
+ const timePassed = (now - this.lastRefillTime) / 1000;
+ const tokensToAdd = timePassed * this.refillRate;
+
+ this.tokens = Math.min(this.capacity, this.tokens + tokensToAdd);
+ this.lastRefillTime = now;
+ }
+
+ consume(tokens = 1) {
+ this.refill();
+
+ if (this.tokens >= tokens) {
+ this.tokens -= tokens;
+ return true;
+ }
+ return false;
+ }
+
+ available() {
+ this.refill();
+ return Math.floor(this.tokens);
+ }
+}
+
+// Express middleware
+const express = require('express');
+const app = express();
+
+const rateLimiters = new Map();
+
+const tokenBucketRateLimit = (capacity, refillRate) => {
+ return (req, res, next) => {
+ const key = req.user?.id || req.ip;
+
+ if (!rateLimiters.has(key)) {
+ rateLimiters.set(key, new TokenBucket(capacity, refillRate));
+ }
+
+ const limiter = rateLimiters.get(key);
+
+ if (limiter.consume(1)) {
+ res.setHeader('X-RateLimit-Limit', capacity);
+ res.setHeader('X-RateLimit-Remaining', limiter.available());
+ next();
+ } else {
+ res.status(429).json({
+ error: 'Rate limit exceeded',
+ retryAfter: Math.ceil(1 / limiter.refillRate)
+ });
+ }
+ };
+};
+
+app.get('/api/data', tokenBucketRateLimit(100, 10), (req, res) => {
+ res.json({ data: 'api response' });
+});
+```
+
+### 2. **Sliding Window Algorithm**
+
+```javascript
+class SlidingWindowLimiter {
+ constructor(maxRequests, windowSizeSeconds) {
+ this.maxRequests = maxRequests;
+ this.windowSize = windowSizeSeconds * 1000; // Convert to ms
+ this.requests = [];
+ }
+
+ isAllowed() {
+ const now = Date.now();
+ const windowStart = now - this.windowSize;
+
+ // Remove old requests outside window
+ this.requests = this.requests.filter(time => time > windowStart);
+
+ if (this.requests.length < this.maxRequests) {
+ this.requests.push(now);
+ return true;
+ }
+ return false;
+ }
+
+ remaining() {
+ const now = Date.now();
+ const windowStart = now - this.windowSize;
+ this.requests = this.requests.filter(time => time > windowStart);
+ return Math.max(0, this.maxRequests - this.requests.length);
+ }
+}
+
+const slidingWindowRateLimit = (maxRequests, windowSeconds) => {
+ const limiters = new Map();
+
+ return (req, res, next) => {
+ const key = req.user?.id || req.ip;
+
+ if (!limiters.has(key)) {
+ limiters.set(key, new SlidingWindowLimiter(maxRequests, windowSeconds));
+ }
+
+ const limiter = limiters.get(key);
+
+ if (limiter.isAllowed()) {
+ res.setHeader('X-RateLimit-Limit', maxRequests);
+ res.setHeader('X-RateLimit-Remaining', limiter.remaining());
+ next();
+ } else {
+ res.status(429).json({ error: 'Rate limit exceeded' });
+ }
+ };
+};
+
+app.get('/api/search', slidingWindowRateLimit(30, 60), (req, res) => {
+ res.json({ results: [] });
+});
+```
+
+### 3. **Redis-Based Rate Limiting**
+
+```javascript
+const redis = require('redis');
+const client = redis.createClient();
+
+// Sliding window with Redis
+const redisRateLimit = (maxRequests, windowSeconds) => {
+ return async (req, res, next) => {
+ const key = `ratelimit:${req.user?.id || req.ip}`;
+ const now = Date.now();
+ const windowStart = now - (windowSeconds * 1000);
+
+ try {
+ // Remove old requests
+ await client.zremrangebyscore(key, 0, windowStart);
+
+ // Count requests in window
+ const count = await client.zcard(key);
+
+ if (count < maxRequests) {
+ // Add current request
+ await client.zadd(key, now, `${now}-${Math.random()}`);
+ // Set expiration
+ await client.expire(key, windowSeconds);
+
+ res.setHeader('X-RateLimit-Limit', maxRequests);
+ res.setHeader('X-RateLimit-Remaining', maxRequests - count - 1);
+ next();
+ } else {
+ const oldestRequest = await client.zrange(key, 0, 0);
+ const resetTime = parseInt(oldestRequest[0]) + (windowSeconds * 1000);
+ const retryAfter = Math.ceil((resetTime - now) / 1000);
+
+ res.set('Retry-After', retryAfter);
+ res.status(429).json({
+ error: 'Rate limit exceeded',
+ retryAfter
+ });
+ }
+ } catch (error) {
+ console.error('Rate limit error:', error);
+ next(); // Allow request if Redis fails
+ }
+ };
+};
+
+app.get('/api/expensive', redisRateLimit(10, 60), (req, res) => {
+ res.json({ result: 'expensive operation' });
+});
+```
+
+### 4. **Tiered Rate Limiting**
+
+```javascript
+const RATE_LIMITS = {
+ free: { requests: 100, window: 3600 }, // 100 per hour
+ pro: { requests: 10000, window: 3600 }, // 10,000 per hour
+ enterprise: { requests: null, window: null } // Unlimited
+};
+
+const tieredRateLimit = async (req, res, next) => {
+ const user = req.user;
+ const plan = user?.plan || 'free';
+ const limits = RATE_LIMITS[plan];
+
+ if (!limits.requests) {
+ return next(); // Unlimited plan
+ }
+
+ const key = `ratelimit:${user.id}`;
+ const now = Date.now();
+ const windowStart = now - (limits.window * 1000);
+
+ try {
+ await client.zremrangebyscore(key, 0, windowStart);
+ const count = await client.zcard(key);
+
+ if (count < limits.requests) {
+ await client.zadd(key, now, `${now}-${Math.random()}`);
+ await client.expire(key, limits.window);
+
+ res.setHeader('X-RateLimit-Limit', limits.requests);
+ res.setHeader('X-RateLimit-Remaining', limits.requests - count - 1);
+ res.setHeader('X-Plan', plan);
+ next();
+ } else {
+ res.status(429).json({
+ error: 'Rate limit exceeded',
+ plan,
+ upgradeUrl: '/plans'
+ });
+ }
+ } catch (error) {
+ next();
+ }
+};
+
+app.use(tieredRateLimit);
+```
+
+### 5. **Python Rate Limiting (Flask)**
+
+```python
+from flask import Flask, request, jsonify
+from flask_limiter import Limiter
+from flask_limiter.util import get_remote_address
+from datetime import datetime, timedelta
+import redis
+
+app = Flask(__name__)
+limiter = Limiter(
+ app=app,
+ key_func=get_remote_address,
+ default_limits=["200 per day", "50 per hour"]
+)
+
+# Custom rate limit based on user plan
+redis_client = redis.Redis(host='localhost', port=6379)
+
+def get_rate_limit(user_id):
+ plan = redis_client.get(f'user:{user_id}:plan').decode()
+ limits = {
+ 'free': (100, 3600),
+ 'pro': (10000, 3600),
+ 'enterprise': (None, None)
+ }
+ return limits.get(plan, (100, 3600))
+
+@app.route('/api/data', methods=['GET'])
+@limiter.limit("30 per minute")
+def get_data():
+ return jsonify({'data': 'api response'}), 200
+
+@app.route('/api/premium', methods=['GET'])
+def get_premium_data():
+ user_id = request.user_id
+ max_requests, window = get_rate_limit(user_id)
+
+ if max_requests is None:
+ return jsonify({'data': 'unlimited data'}), 200
+
+ key = f'ratelimit:{user_id}'
+ current = redis_client.incr(key)
+ redis_client.expire(key, window)
+
+ if current <= max_requests:
+ return jsonify({'data': 'premium data'}), 200
+ else:
+ return jsonify({'error': 'Rate limit exceeded'}), 429
+```
+
+### 6. **Response Headers**
+
+```javascript
+// Standard rate limit headers
+res.setHeader('X-RateLimit-Limit', maxRequests); // Total requests allowed
+res.setHeader('X-RateLimit-Remaining', remaining); // Remaining requests
+res.setHeader('X-RateLimit-Reset', resetTime); // Unix timestamp of reset
+res.setHeader('Retry-After', secondsToWait); // How long to wait
+
+// 429 Too Many Requests response
+{
+ "error": "Rate limit exceeded",
+ "code": "RATE_LIMIT_EXCEEDED",
+ "retryAfter": 60,
+ "resetAt": "2025-01-15T15:00:00Z"
+}
+```
+
+## Best Practices
+
+### ✅ DO
+- Include rate limit headers in responses
+- Use Redis for distributed rate limiting
+- Implement tiered limits for different user plans
+- Set appropriate window sizes and limits
+- Monitor rate limit metrics
+- Provide clear retry guidance
+- Document rate limits in API docs
+- Test under high load
+
+### ❌ DON'T
+- Use in-memory storage in production
+- Set limits too restrictively
+- Forget to include Retry-After header
+- Ignore distributed scenarios
+- Make rate limits public (security)
+- Use simple counters for distributed systems
+- Forget cleanup of old data
+
+## Monitoring
+
+```javascript
+// Track rate limit metrics
+const metrics = {
+ totalRequests: 0,
+ limitedRequests: 0,
+ byUser: new Map()
+};
+
+app.use((req, res, next) => {
+ metrics.totalRequests++;
+ res.on('finish', () => {
+ if (res.statusCode === 429) {
+ metrics.limitedRequests++;
+ }
+ });
+ next();
+});
+
+app.get('/metrics/rate-limit', (req, res) => {
+ res.json({
+ totalRequests: metrics.totalRequests,
+ limitedRequests: metrics.limitedRequests,
+ percentage: (metrics.limitedRequests / metrics.totalRequests * 100).toFixed(2)
+ });
+});
+```
diff --git a/.agents/skills/api-security-hardening/SKILL.md b/.agents/skills/api-security-hardening/SKILL.md
new file mode 100644
index 0000000000000000000000000000000000000000..d2a07b42ce02390727c05b6bb6e6643efa321b10
--- /dev/null
+++ b/.agents/skills/api-security-hardening/SKILL.md
@@ -0,0 +1,659 @@
+---
+name: api-security-hardening
+description: Secure REST APIs with authentication, rate limiting, CORS, input validation, and security middleware. Use when building or hardening API endpoints against common attacks.
+---
+
+# API Security Hardening
+
+## Overview
+
+Implement comprehensive API security measures including authentication, authorization, rate limiting, input validation, and attack prevention to protect against common vulnerabilities.
+
+## When to Use
+
+- New API development
+- Security audit remediation
+- Production API hardening
+- Compliance requirements
+- High-traffic API protection
+- Public API exposure
+
+## Implementation Examples
+
+### 1. **Node.js/Express API Security**
+
+```javascript
+// secure-api.js - Comprehensive API security
+const express = require('express');
+const helmet = require('helmet');
+const rateLimit = require('express-rate-limit');
+const mongoSanitize = require('express-mongo-sanitize');
+const xss = require('xss-clean');
+const hpp = require('hpp');
+const cors = require('cors');
+const jwt = require('jsonwebtoken');
+const validator = require('validator');
+
+class SecureAPIServer {
+ constructor() {
+ this.app = express();
+ this.setupSecurityMiddleware();
+ this.setupRoutes();
+ }
+
+ setupSecurityMiddleware() {
+ // 1. Helmet - Set security headers
+ this.app.use(helmet({
+ contentSecurityPolicy: {
+ directives: {
+ defaultSrc: ["'self'"],
+ styleSrc: ["'self'", "'unsafe-inline'"],
+ scriptSrc: ["'self'"],
+ imgSrc: ["'self'", "data:", "https:"]
+ }
+ },
+ hsts: {
+ maxAge: 31536000,
+ includeSubDomains: true,
+ preload: true
+ }
+ }));
+
+ // 2. CORS configuration
+ const corsOptions = {
+ origin: (origin, callback) => {
+ const whitelist = [
+ 'https://example.com',
+ 'https://app.example.com'
+ ];
+
+ if (!origin || whitelist.includes(origin)) {
+ callback(null, true);
+ } else {
+ callback(new Error('Not allowed by CORS'));
+ }
+ },
+ credentials: true,
+ optionsSuccessStatus: 200,
+ methods: ['GET', 'POST', 'PUT', 'DELETE'],
+ allowedHeaders: ['Content-Type', 'Authorization']
+ };
+
+ this.app.use(cors(corsOptions));
+
+ // 3. Rate limiting
+ const generalLimiter = rateLimit({
+ windowMs: 15 * 60 * 1000, // 15 minutes
+ max: 100, // limit each IP to 100 requests per windowMs
+ message: 'Too many requests from this IP',
+ standardHeaders: true,
+ legacyHeaders: false,
+ handler: (req, res) => {
+ res.status(429).json({
+ error: 'rate_limit_exceeded',
+ message: 'Too many requests, please try again later',
+ retryAfter: req.rateLimit.resetTime
+ });
+ }
+ });
+
+ const authLimiter = rateLimit({
+ windowMs: 15 * 60 * 1000,
+ max: 5, // Stricter limit for auth endpoints
+ skipSuccessfulRequests: true
+ });
+
+ this.app.use('/api/', generalLimiter);
+ this.app.use('/api/auth/', authLimiter);
+
+ // 4. Body parsing with size limits
+ this.app.use(express.json({ limit: '10kb' }));
+ this.app.use(express.urlencoded({ extended: true, limit: '10kb' }));
+
+ // 5. NoSQL injection prevention
+ this.app.use(mongoSanitize());
+
+ // 6. XSS protection
+ this.app.use(xss());
+
+ // 7. HTTP Parameter Pollution prevention
+ this.app.use(hpp());
+
+ // 8. Request ID for tracking
+ this.app.use((req, res, next) => {
+ req.id = require('crypto').randomUUID();
+ res.setHeader('X-Request-ID', req.id);
+ next();
+ });
+
+ // 9. Security logging
+ this.app.use(this.securityLogger());
+ }
+
+ securityLogger() {
+ return (req, res, next) => {
+ const startTime = Date.now();
+
+ res.on('finish', () => {
+ const duration = Date.now() - startTime;
+
+ const logEntry = {
+ timestamp: new Date().toISOString(),
+ requestId: req.id,
+ method: req.method,
+ path: req.path,
+ statusCode: res.statusCode,
+ duration,
+ ip: req.ip,
+ userAgent: req.get('user-agent')
+ };
+
+ // Log suspicious activity
+ if (res.statusCode === 401 || res.statusCode === 403) {
+ console.warn('Security event:', logEntry);
+ }
+
+ if (res.statusCode >= 500) {
+ console.error('Server error:', logEntry);
+ }
+ });
+
+ next();
+ };
+ }
+
+ // JWT authentication middleware
+ authenticateJWT() {
+ return (req, res, next) => {
+ const authHeader = req.headers.authorization;
+
+ if (!authHeader || !authHeader.startsWith('Bearer ')) {
+ return res.status(401).json({
+ error: 'unauthorized',
+ message: 'Missing or invalid authorization header'
+ });
+ }
+
+ const token = authHeader.substring(7);
+
+ try {
+ const decoded = jwt.verify(token, process.env.JWT_SECRET, {
+ algorithms: ['HS256'],
+ issuer: 'api.example.com',
+ audience: 'api.example.com'
+ });
+
+ req.user = decoded;
+ next();
+ } catch (error) {
+ if (error.name === 'TokenExpiredError') {
+ return res.status(401).json({
+ error: 'token_expired',
+ message: 'Token has expired'
+ });
+ }
+
+ return res.status(401).json({
+ error: 'invalid_token',
+ message: 'Invalid token'
+ });
+ }
+ };
+ }
+
+ // Input validation middleware
+ validateInput(schema) {
+ return (req, res, next) => {
+ const errors = [];
+
+ // Validate request body
+ if (schema.body) {
+ for (const [field, rules] of Object.entries(schema.body)) {
+ const value = req.body[field];
+
+ if (rules.required && !value) {
+ errors.push(`${field} is required`);
+ continue;
+ }
+
+ if (value) {
+ // Type validation
+ if (rules.type === 'email' && !validator.isEmail(value)) {
+ errors.push(`${field} must be a valid email`);
+ }
+
+ if (rules.type === 'uuid' && !validator.isUUID(value)) {
+ errors.push(`${field} must be a valid UUID`);
+ }
+
+ if (rules.type === 'url' && !validator.isURL(value)) {
+ errors.push(`${field} must be a valid URL`);
+ }
+
+ // Length validation
+ if (rules.minLength && value.length < rules.minLength) {
+ errors.push(`${field} must be at least ${rules.minLength} characters`);
+ }
+
+ if (rules.maxLength && value.length > rules.maxLength) {
+ errors.push(`${field} must be at most ${rules.maxLength} characters`);
+ }
+
+ // Pattern validation
+ if (rules.pattern && !rules.pattern.test(value)) {
+ errors.push(`${field} format is invalid`);
+ }
+ }
+ }
+ }
+
+ if (errors.length > 0) {
+ return res.status(400).json({
+ error: 'validation_error',
+ message: 'Input validation failed',
+ details: errors
+ });
+ }
+
+ next();
+ };
+ }
+
+ // Authorization middleware
+ authorize(...roles) {
+ return (req, res, next) => {
+ if (!req.user) {
+ return res.status(401).json({
+ error: 'unauthorized',
+ message: 'Authentication required'
+ });
+ }
+
+ if (roles.length > 0 && !roles.includes(req.user.role)) {
+ return res.status(403).json({
+ error: 'forbidden',
+ message: 'Insufficient permissions'
+ });
+ }
+
+ next();
+ };
+ }
+
+ setupRoutes() {
+ // Public endpoint
+ this.app.get('/api/health', (req, res) => {
+ res.json({ status: 'healthy' });
+ });
+
+ // Protected endpoint with validation
+ this.app.post('/api/users',
+ this.authenticateJWT(),
+ this.authorize('admin'),
+ this.validateInput({
+ body: {
+ email: { required: true, type: 'email' },
+ name: { required: true, minLength: 2, maxLength: 100 },
+ password: { required: true, minLength: 8 }
+ }
+ }),
+ async (req, res) => {
+ try {
+ // Sanitized and validated input
+ const { email, name, password } = req.body;
+
+ // Process request
+ res.status(201).json({
+ message: 'User created successfully',
+ userId: '123'
+ });
+ } catch (error) {
+ res.status(500).json({
+ error: 'internal_error',
+ message: 'An error occurred'
+ });
+ }
+ }
+ );
+
+ // Error handling middleware
+ this.app.use((err, req, res, next) => {
+ console.error('Unhandled error:', err);
+
+ res.status(500).json({
+ error: 'internal_error',
+ message: 'An unexpected error occurred',
+ requestId: req.id
+ });
+ });
+ }
+
+ start(port = 3000) {
+ this.app.listen(port, () => {
+ console.log(`Secure API server running on port ${port}`);
+ });
+ }
+}
+
+// Usage
+const server = new SecureAPIServer();
+server.start(3000);
+```
+
+### 2. **Python FastAPI Security**
+
+```python
+# secure_api.py
+from fastapi import FastAPI, HTTPException, Depends, Security, status
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.middleware.trustedhost import TrustedHostMiddleware
+from slowapi import Limiter, _rate_limit_exceeded_handler
+from slowapi.util import get_remote_address
+from slowapi.errors import RateLimitExceeded
+from pydantic import BaseModel, EmailStr, validator, Field
+import jwt
+from datetime import datetime, timedelta
+import re
+from typing import Optional, List
+import secrets
+
+app = FastAPI()
+security = HTTPBearer()
+limiter = Limiter(key_func=get_remote_address)
+
+# Rate limiting
+app.state.limiter = limiter
+app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
+
+# CORS configuration
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=[
+ "https://example.com",
+ "https://app.example.com"
+ ],
+ allow_credentials=True,
+ allow_methods=["GET", "POST", "PUT", "DELETE"],
+ allow_headers=["Content-Type", "Authorization"],
+ max_age=3600
+)
+
+# Trusted hosts
+app.add_middleware(
+ TrustedHostMiddleware,
+ allowed_hosts=["example.com", "*.example.com"]
+)
+
+# Security headers middleware
+@app.middleware("http")
+async def add_security_headers(request, call_next):
+ response = await call_next(request)
+
+ response.headers["X-Content-Type-Options"] = "nosniff"
+ response.headers["X-Frame-Options"] = "DENY"
+ response.headers["X-XSS-Protection"] = "1; mode=block"
+ response.headers["Strict-Transport-Security"] = "max-age=31536000; includeSubDomains"
+ response.headers["Content-Security-Policy"] = "default-src 'self'"
+ response.headers["Referrer-Policy"] = "strict-origin-when-cross-origin"
+ response.headers["Permissions-Policy"] = "geolocation=(), microphone=(), camera=()"
+
+ return response
+
+# Input validation models
+class CreateUserRequest(BaseModel):
+ email: EmailStr
+ name: str = Field(..., min_length=2, max_length=100)
+ password: str = Field(..., min_length=8)
+
+ @validator('password')
+ def validate_password(cls, v):
+ if not re.search(r'[A-Z]', v):
+ raise ValueError('Password must contain uppercase letter')
+ if not re.search(r'[a-z]', v):
+ raise ValueError('Password must contain lowercase letter')
+ if not re.search(r'\d', v):
+ raise ValueError('Password must contain digit')
+ if not re.search(r'[!@#$%^&*]', v):
+ raise ValueError('Password must contain special character')
+ return v
+
+ @validator('name')
+ def validate_name(cls, v):
+ # Prevent XSS in name field
+ if re.search(r'[<>]', v):
+ raise ValueError('Name contains invalid characters')
+ return v
+
+class APIKeyRequest(BaseModel):
+ name: str = Field(..., max_length=100)
+ expires_in_days: int = Field(30, ge=1, le=365)
+
+# JWT token verification
+def verify_token(credentials: HTTPAuthorizationCredentials = Security(security)):
+ try:
+ token = credentials.credentials
+
+ payload = jwt.decode(
+ token,
+ "your-secret-key",
+ algorithms=["HS256"],
+ audience="api.example.com",
+ issuer="api.example.com"
+ )
+
+ return payload
+
+ except jwt.ExpiredSignatureError:
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Token has expired"
+ )
+ except jwt.InvalidTokenError:
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid token"
+ )
+
+# Role-based authorization
+def require_role(required_roles: List[str]):
+ def role_checker(token_payload: dict = Depends(verify_token)):
+ user_role = token_payload.get('role')
+
+ if user_role not in required_roles:
+ raise HTTPException(
+ status_code=status.HTTP_403_FORBIDDEN,
+ detail="Insufficient permissions"
+ )
+
+ return token_payload
+
+ return role_checker
+
+# API key authentication
+def verify_api_key(api_key: str):
+ # Constant-time comparison to prevent timing attacks
+ if not secrets.compare_digest(api_key, "expected-api-key"):
+ raise HTTPException(
+ status_code=status.HTTP_401_UNAUTHORIZED,
+ detail="Invalid API key"
+ )
+ return True
+
+# Endpoints
+@app.get("/api/health")
+@limiter.limit("100/minute")
+async def health_check():
+ return {"status": "healthy"}
+
+@app.post("/api/users")
+@limiter.limit("10/minute")
+async def create_user(
+ user: CreateUserRequest,
+ token_payload: dict = Depends(require_role(["admin"]))
+):
+ """Create new user (admin only)"""
+
+ # Hash password before storing
+ # hashed_password = bcrypt.hashpw(user.password.encode(), bcrypt.gensalt())
+
+ return {
+ "message": "User created successfully",
+ "user_id": "123"
+ }
+
+@app.post("/api/keys")
+@limiter.limit("5/hour")
+async def create_api_key(
+ request: APIKeyRequest,
+ token_payload: dict = Depends(verify_token)
+):
+ """Generate API key"""
+
+ # Generate secure random API key
+ api_key = secrets.token_urlsafe(32)
+
+ expires_at = datetime.now() + timedelta(days=request.expires_in_days)
+
+ return {
+ "api_key": api_key,
+ "expires_at": expires_at.isoformat(),
+ "name": request.name
+ }
+
+@app.get("/api/protected")
+async def protected_endpoint(token_payload: dict = Depends(verify_token)):
+ return {
+ "message": "Access granted",
+ "user_id": token_payload.get("sub")
+ }
+
+if __name__ == "__main__":
+ import uvicorn
+ uvicorn.run(app, host="0.0.0.0", port=8000, ssl_certfile="cert.pem", ssl_keyfile="key.pem")
+```
+
+### 3. **API Gateway Security Configuration**
+
+```yaml
+# nginx-api-gateway.conf
+# Nginx API Gateway with security hardening
+
+http {
+ # Security headers
+ add_header X-Frame-Options "DENY" always;
+ add_header X-Content-Type-Options "nosniff" always;
+ add_header X-XSS-Protection "1; mode=block" always;
+ add_header Strict-Transport-Security "max-age=31536000; includeSubDomains" always;
+ add_header Content-Security-Policy "default-src 'self'" always;
+
+ # Rate limiting zones
+ limit_req_zone $binary_remote_addr zone=api_limit:10m rate=10r/s;
+ limit_req_zone $binary_remote_addr zone=auth_limit:10m rate=1r/s;
+ limit_conn_zone $binary_remote_addr zone=conn_limit:10m;
+
+ # Request body size limit
+ client_max_body_size 10M;
+ client_body_buffer_size 128k;
+
+ # Timeout settings
+ client_body_timeout 12;
+ client_header_timeout 12;
+ send_timeout 10;
+
+ server {
+ listen 443 ssl http2;
+ server_name api.example.com;
+
+ # SSL configuration
+ ssl_certificate /etc/ssl/certs/api.example.com.crt;
+ ssl_certificate_key /etc/ssl/private/api.example.com.key;
+ ssl_protocols TLSv1.2 TLSv1.3;
+ ssl_ciphers HIGH:!aNULL:!MD5;
+ ssl_prefer_server_ciphers on;
+ ssl_session_cache shared:SSL:10m;
+ ssl_session_timeout 10m;
+
+ # API endpoints
+ location /api/ {
+ # Rate limiting
+ limit_req zone=api_limit burst=20 nodelay;
+ limit_conn conn_limit 10;
+
+ # CORS headers
+ add_header Access-Control-Allow-Origin "https://app.example.com" always;
+ add_header Access-Control-Allow-Methods "GET, POST, PUT, DELETE" always;
+ add_header Access-Control-Allow-Headers "Authorization, Content-Type" always;
+
+ # Block common exploits
+ if ($request_method !~ ^(GET|POST|PUT|DELETE|HEAD)$ ) {
+ return 444;
+ }
+
+ # Proxy to backend
+ proxy_pass http://backend:3000;
+ proxy_set_header Host $host;
+ proxy_set_header X-Real-IP $remote_addr;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+
+ # Timeouts
+ proxy_connect_timeout 60s;
+ proxy_send_timeout 60s;
+ proxy_read_timeout 60s;
+ }
+
+ # Auth endpoints with stricter limits
+ location /api/auth/ {
+ limit_req zone=auth_limit burst=5 nodelay;
+
+ proxy_pass http://backend:3000;
+ }
+
+ # Block access to sensitive files
+ location ~ /\. {
+ deny all;
+ return 404;
+ }
+ }
+}
+```
+
+## Best Practices
+
+### ✅ DO
+- Use HTTPS everywhere
+- Implement rate limiting
+- Validate all inputs
+- Use security headers
+- Log security events
+- Implement CORS properly
+- Use strong authentication
+- Version your APIs
+
+### ❌ DON'T
+- Expose stack traces
+- Return detailed errors
+- Trust user input
+- Use HTTP for APIs
+- Skip input validation
+- Ignore rate limiting
+
+## Security Checklist
+
+- [ ] HTTPS enforced
+- [ ] Authentication required
+- [ ] Authorization implemented
+- [ ] Rate limiting active
+- [ ] Input validation
+- [ ] CORS configured
+- [ ] Security headers set
+- [ ] Error handling secure
+- [ ] Logging enabled
+- [ ] API versioning
+
+## Resources
+
+- [OWASP API Security Top 10](https://owasp.org/www-project-api-security/)
+- [API Security Best Practices](https://github.com/shieldfy/API-Security-Checklist)
+- [JWT Best Practices](https://tools.ietf.org/html/rfc8725)
diff --git a/.agents/skills/find-skills/SKILL.md b/.agents/skills/find-skills/SKILL.md
new file mode 100644
index 0000000000000000000000000000000000000000..c797184ee66bbfd8b60c58b5efdb91880ae8e991
--- /dev/null
+++ b/.agents/skills/find-skills/SKILL.md
@@ -0,0 +1,133 @@
+---
+name: find-skills
+description: Helps users discover and install agent skills when they ask questions like "how do I do X", "find a skill for X", "is there a skill that can...", or express interest in extending capabilities. This skill should be used when the user is looking for functionality that might exist as an installable skill.
+---
+
+# Find Skills
+
+This skill helps you discover and install skills from the open agent skills ecosystem.
+
+## When to Use This Skill
+
+Use this skill when the user:
+
+- Asks "how do I do X" where X might be a common task with an existing skill
+- Says "find a skill for X" or "is there a skill for X"
+- Asks "can you do X" where X is a specialized capability
+- Expresses interest in extending agent capabilities
+- Wants to search for tools, templates, or workflows
+- Mentions they wish they had help with a specific domain (design, testing, deployment, etc.)
+
+## What is the Skills CLI?
+
+The Skills CLI (`npx skills`) is the package manager for the open agent skills ecosystem. Skills are modular packages that extend agent capabilities with specialized knowledge, workflows, and tools.
+
+**Key commands:**
+
+- `npx skills find [query]` - Search for skills interactively or by keyword
+- `npx skills add ` - Install a skill from GitHub or other sources
+- `npx skills check` - Check for skill updates
+- `npx skills update` - Update all installed skills
+
+**Browse skills at:** https://skills.sh/
+
+## How to Help Users Find Skills
+
+### Step 1: Understand What They Need
+
+When a user asks for help with something, identify:
+
+1. The domain (e.g., React, testing, design, deployment)
+2. The specific task (e.g., writing tests, creating animations, reviewing PRs)
+3. Whether this is a common enough task that a skill likely exists
+
+### Step 2: Search for Skills
+
+Run the find command with a relevant query:
+
+```bash
+npx skills find [query]
+```
+
+For example:
+
+- User asks "how do I make my React app faster?" → `npx skills find react performance`
+- User asks "can you help me with PR reviews?" → `npx skills find pr review`
+- User asks "I need to create a changelog" → `npx skills find changelog`
+
+The command will return results like:
+
+```
+Install with npx skills add
+
+vercel-labs/agent-skills@vercel-react-best-practices
+└ https://skills.sh/vercel-labs/agent-skills/vercel-react-best-practices
+```
+
+### Step 3: Present Options to the User
+
+When you find relevant skills, present them to the user with:
+
+1. The skill name and what it does
+2. The install command they can run
+3. A link to learn more at skills.sh
+
+Example response:
+
+```
+I found a skill that might help! The "vercel-react-best-practices" skill provides
+React and Next.js performance optimization guidelines from Vercel Engineering.
+
+To install it:
+npx skills add vercel-labs/agent-skills@vercel-react-best-practices
+
+Learn more: https://skills.sh/vercel-labs/agent-skills/vercel-react-best-practices
+```
+
+### Step 4: Offer to Install
+
+If the user wants to proceed, you can install the skill for them:
+
+```bash
+npx skills add -g -y
+```
+
+The `-g` flag installs globally (user-level) and `-y` skips confirmation prompts.
+
+## Common Skill Categories
+
+When searching, consider these common categories:
+
+| Category | Example Queries |
+| --------------- | ---------------------------------------- |
+| Web Development | react, nextjs, typescript, css, tailwind |
+| Testing | testing, jest, playwright, e2e |
+| DevOps | deploy, docker, kubernetes, ci-cd |
+| Documentation | docs, readme, changelog, api-docs |
+| Code Quality | review, lint, refactor, best-practices |
+| Design | ui, ux, design-system, accessibility |
+| Productivity | workflow, automation, git |
+
+## Tips for Effective Searches
+
+1. **Use specific keywords**: "react testing" is better than just "testing"
+2. **Try alternative terms**: If "deploy" doesn't work, try "deployment" or "ci-cd"
+3. **Check popular sources**: Many skills come from `vercel-labs/agent-skills` or `ComposioHQ/awesome-claude-skills`
+
+## When No Skills Are Found
+
+If no relevant skills exist:
+
+1. Acknowledge that no existing skill was found
+2. Offer to help with the task directly using your general capabilities
+3. Suggest the user could create their own skill with `npx skills init`
+
+Example:
+
+```
+I searched for skills related to "xyz" but didn't find any matches.
+I can still help you with this task directly! Would you like me to proceed?
+
+If this is something you do often, you could create your own skill:
+npx skills init my-xyz-skill
+```
diff --git a/.agents/skills/github-actions-templates/SKILL.md b/.agents/skills/github-actions-templates/SKILL.md
new file mode 100644
index 0000000000000000000000000000000000000000..691f4bcd879d8469ccb040ee13760679cd6cff5f
--- /dev/null
+++ b/.agents/skills/github-actions-templates/SKILL.md
@@ -0,0 +1,334 @@
+---
+name: github-actions-templates
+description: Create production-ready GitHub Actions workflows for automated testing, building, and deploying applications. Use when setting up CI/CD with GitHub Actions, automating development workflows, or creating reusable workflow templates.
+---
+
+# GitHub Actions Templates
+
+Production-ready GitHub Actions workflow patterns for testing, building, and deploying applications.
+
+## Purpose
+
+Create efficient, secure GitHub Actions workflows for continuous integration and deployment across various tech stacks.
+
+## When to Use
+
+- Automate testing and deployment
+- Build Docker images and push to registries
+- Deploy to Kubernetes clusters
+- Run security scans
+- Implement matrix builds for multiple environments
+
+## Common Workflow Patterns
+
+### Pattern 1: Test Workflow
+
+```yaml
+name: Test
+
+on:
+ push:
+ branches: [main, develop]
+ pull_request:
+ branches: [main]
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+
+ strategy:
+ matrix:
+ node-version: [18.x, 20.x]
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Use Node.js ${{ matrix.node-version }}
+ uses: actions/setup-node@v4
+ with:
+ node-version: ${{ matrix.node-version }}
+ cache: "npm"
+
+ - name: Install dependencies
+ run: npm ci
+
+ - name: Run linter
+ run: npm run lint
+
+ - name: Run tests
+ run: npm test
+
+ - name: Upload coverage
+ uses: codecov/codecov-action@v3
+ with:
+ files: ./coverage/lcov.info
+```
+
+**Reference:** See `assets/test-workflow.yml`
+
+### Pattern 2: Build and Push Docker Image
+
+```yaml
+name: Build and Push
+
+on:
+ push:
+ branches: [main]
+ tags: ["v*"]
+
+env:
+ REGISTRY: ghcr.io
+ IMAGE_NAME: ${{ github.repository }}
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ permissions:
+ contents: read
+ packages: write
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Log in to Container Registry
+ uses: docker/login-action@v3
+ with:
+ registry: ${{ env.REGISTRY }}
+ username: ${{ github.actor }}
+ password: ${{ secrets.GITHUB_TOKEN }}
+
+ - name: Extract metadata
+ id: meta
+ uses: docker/metadata-action@v5
+ with:
+ images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
+ tags: |
+ type=ref,event=branch
+ type=ref,event=pr
+ type=semver,pattern={{version}}
+ type=semver,pattern={{major}}.{{minor}}
+
+ - name: Build and push
+ uses: docker/build-push-action@v5
+ with:
+ context: .
+ push: true
+ tags: ${{ steps.meta.outputs.tags }}
+ labels: ${{ steps.meta.outputs.labels }}
+ cache-from: type=gha
+ cache-to: type=gha,mode=max
+```
+
+**Reference:** See `assets/deploy-workflow.yml`
+
+### Pattern 3: Deploy to Kubernetes
+
+```yaml
+name: Deploy to Kubernetes
+
+on:
+ push:
+ branches: [main]
+
+jobs:
+ deploy:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Configure AWS credentials
+ uses: aws-actions/configure-aws-credentials@v4
+ with:
+ aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
+ aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
+ aws-region: us-west-2
+
+ - name: Update kubeconfig
+ run: |
+ aws eks update-kubeconfig --name production-cluster --region us-west-2
+
+ - name: Deploy to Kubernetes
+ run: |
+ kubectl apply -f k8s/
+ kubectl rollout status deployment/my-app -n production
+ kubectl get services -n production
+
+ - name: Verify deployment
+ run: |
+ kubectl get pods -n production
+ kubectl describe deployment my-app -n production
+```
+
+### Pattern 4: Matrix Build
+
+```yaml
+name: Matrix Build
+
+on: [push, pull_request]
+
+jobs:
+ build:
+ runs-on: ${{ matrix.os }}
+
+ strategy:
+ matrix:
+ os: [ubuntu-latest, macos-latest, windows-latest]
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Set up Python
+ uses: actions/setup-python@v5
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r requirements.txt
+
+ - name: Run tests
+ run: pytest
+```
+
+**Reference:** See `assets/matrix-build.yml`
+
+## Workflow Best Practices
+
+1. **Use specific action versions** (@v4, not @latest)
+2. **Cache dependencies** to speed up builds
+3. **Use secrets** for sensitive data
+4. **Implement status checks** on PRs
+5. **Use matrix builds** for multi-version testing
+6. **Set appropriate permissions**
+7. **Use reusable workflows** for common patterns
+8. **Implement approval gates** for production
+9. **Add notification steps** for failures
+10. **Use self-hosted runners** for sensitive workloads
+
+## Reusable Workflows
+
+```yaml
+# .github/workflows/reusable-test.yml
+name: Reusable Test Workflow
+
+on:
+ workflow_call:
+ inputs:
+ node-version:
+ required: true
+ type: string
+ secrets:
+ NPM_TOKEN:
+ required: true
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-node@v4
+ with:
+ node-version: ${{ inputs.node-version }}
+ - run: npm ci
+ - run: npm test
+```
+
+**Use reusable workflow:**
+
+```yaml
+jobs:
+ call-test:
+ uses: ./.github/workflows/reusable-test.yml
+ with:
+ node-version: "20.x"
+ secrets:
+ NPM_TOKEN: ${{ secrets.NPM_TOKEN }}
+```
+
+## Security Scanning
+
+```yaml
+name: Security Scan
+
+on:
+ push:
+ branches: [main]
+ pull_request:
+ branches: [main]
+
+jobs:
+ security:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Run Trivy vulnerability scanner
+ uses: aquasecurity/trivy-action@master
+ with:
+ scan-type: "fs"
+ scan-ref: "."
+ format: "sarif"
+ output: "trivy-results.sarif"
+
+ - name: Upload Trivy results to GitHub Security
+ uses: github/codeql-action/upload-sarif@v2
+ with:
+ sarif_file: "trivy-results.sarif"
+
+ - name: Run Snyk Security Scan
+ uses: snyk/actions/node@master
+ env:
+ SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
+```
+
+## Deployment with Approvals
+
+```yaml
+name: Deploy to Production
+
+on:
+ push:
+ tags: ["v*"]
+
+jobs:
+ deploy:
+ runs-on: ubuntu-latest
+ environment:
+ name: production
+ url: https://app.example.com
+
+ steps:
+ - uses: actions/checkout@v4
+
+ - name: Deploy application
+ run: |
+ echo "Deploying to production..."
+ # Deployment commands here
+
+ - name: Notify Slack
+ if: success()
+ uses: slackapi/slack-github-action@v1
+ with:
+ webhook-url: ${{ secrets.SLACK_WEBHOOK }}
+ payload: |
+ {
+ "text": "Deployment to production completed successfully!"
+ }
+```
+
+## Reference Files
+
+- `assets/test-workflow.yml` - Testing workflow template
+- `assets/deploy-workflow.yml` - Deployment workflow template
+- `assets/matrix-build.yml` - Matrix build template
+- `references/common-workflows.md` - Common workflow patterns
+
+## Related Skills
+
+- `gitlab-ci-patterns` - For GitLab CI workflows
+- `deployment-pipeline-design` - For pipeline architecture
+- `secrets-management` - For secrets handling
diff --git a/.agents/skills/github-pr-review-workflow/SKILL.md b/.agents/skills/github-pr-review-workflow/SKILL.md
new file mode 100644
index 0000000000000000000000000000000000000000..10628e44a230ede805994d43733d464fe811e3d4
--- /dev/null
+++ b/.agents/skills/github-pr-review-workflow/SKILL.md
@@ -0,0 +1,485 @@
+---
+name: github-pr-review-workflow
+description: Complete workflow for handling GitHub PR reviews using gh pr-review extension
+---
+
+# GitHub PR Review Workflow
+
+Complete workflow for reviewing, addressing feedback, and resolving threads in GitHub pull requests using the `gh-pr-review` extension from agynio/gh-pr-review.
+
+**For gh-pr-review documentation, see:** https://github.com/agynio/gh-pr-review
+
+---
+
+## Installation
+
+**Install gh-pr-review extension (if not already installed):**
+
+```bash
+gh extension install agynio/gh-pr-review
+```
+
+**Verify installation:**
+
+```bash
+gh pr-review --help
+```
+
+---
+
+## Workflow Overview
+
+```
+PR Review Request
+ ├─ Get PR number/repo context
+ ├─ List all review threads
+ ├─ Analyze feedback and comments
+ ├─ Validate whether each comment applies and explain decisions
+ ├─ Implement fixes in code
+ ├─ Run tests (unit + lint + typecheck)
+ ├─ Reply to all open review threads with explanations
+ ├─ Wait up to 5 minutes for follow-up
+ ├─ Resolve review threads (or address follow-ups)
+ └─ Commit and push changes
+```
+
+---
+
+## Step-by-Step Process
+
+### 1. Get PR Context
+
+**Get current PR details:**
+
+```bash
+# Get PR number
+gh pr view --json number
+
+# Get PR title and status
+gh pr view --json title,author,state,reviews
+
+# Get repository info (for gh pr-review)
+git remote get-url origin
+```
+
+**Output:** PR number (e.g., ``) and repo (e.g., ``)
+
+---
+
+### 2. List Review Threads
+
+**List all review threads (active and outdated):**
+
+```bash
+# From PR root directory
+gh pr-review threads list --pr --repo
+
+# Example:
+gh pr-review threads list --pr --repo
+```
+
+**Response format:**
+
+```json
+[
+ {
+ "threadId": "",
+ "isResolved": false,
+ "updatedAt": "2026-01-17T22:48:36Z",
+ "path": "path/to/file.ts",
+ "line": 42,
+ "isOutdated": false
+ }
+]
+```
+
+**Key fields:**
+
+- `threadId`: GraphQL node ID for resolving/replying
+- `isResolved`: Current status
+- `isOutdated`: Whether code has changed since comment
+- `path` + `line`: File location
+
+If all review threads are resolved or none are present, search for normal comments and analyse them:
+
+```bash
+gh pr view --comments --json author,comments,reviews
+```
+
+
+---
+
+### 3. Read and Analyze Feedback
+
+**Get review comments via GitHub API:**
+
+```bash
+# Get all review comments for PR
+gh api repos///pulls//comments
+
+# With jq for cleaner output
+gh api repos///pulls//comments \
+ --jq '.[] | {id,body,author,created_at,line,path}'
+```
+
+**Read the specific files mentioned:**
+
+```bash
+# Read the file context to understand feedback
+cat
+# or use Read tool
+```
+
+**Categorize feedback:**
+
+- **High priority**: Security issues, bugs, breaking changes
+- **Medium priority**: Code quality, maintainability, test coverage
+- **Low priority**: Style, documentation, nice-to-haves
+
+**Validate applicability before changing code (required):**
+
+- Confirm each comment is accurate and relevant to the current code.
+- If a suggestion is incorrect, outdated, or doesn’t make sense in this codebase, **reply with a detailed explanation** of why it was not implemented.
+- Do not skip a change simply because it is time-consuming—either implement it or explain clearly why it should not be done.
+
+---
+
+### 4. Implement Fixes
+
+**Edit the files mentioned in review:**
+
+```bash
+# Use Edit tool or bash
+edit
+```
+
+**Follow repository conventions:**
+
+- Check existing patterns in similar files
+- Follow AGENTS.md guidelines
+- Maintain code style consistency
+- Add/update tests for new logic
+
+---
+
+### 5. Verify Changes (CRITICAL)
+
+**Always run tests before replying:**
+
+```bash
+# Run project tests
+bun run test
+
+# Or specific test suites
+bun run test:unit
+bun run test:unit:watch
+bun run test:e2e
+```
+
+**Run type checking:**
+
+```bash
+bun run typecheck
+# or
+tsc --noEmit
+```
+
+**Run linting:**
+
+```bash
+bun run lint
+# or
+eslint
+```
+
+**Verify all pass:**
+
+- ✓ No TypeScript errors
+- ✓ No ESLint warnings/errors
+- ✓ All unit tests pass
+- ✓ E2E tests pass (if relevant)
+
+---
+
+### 6. Commit and Push Changes
+
+**Stage and commit changes:**
+
+```bash
+# Check status
+git status
+
+# Stage modified files
+git add
+
+# Commit with clear message
+git commit -m "():
+
+# Example:
+git commit -m "refactor(emails): centralize from name logic and improve sanitization
+
+- Extract RESEND_FROM_NAME constant to lib/emails/from-address.ts
+- Replace duplicated logic in lib/auth.ts and app/actions/contact.ts
+- Improve formatFromAddress sanitization (RFC 5322 chars)
+- Add test cases for additional sanitization patterns"
+```
+
+**Push to remote:**
+
+```bash
+git push
+```
+
+**Verify working tree:**
+
+```bash
+git status
+# Should show: "nothing to commit, working tree clean"
+```
+
+---
+
+### 7. Reply to Review Threads
+
+**Reply with explanation of fixes:**
+
+```bash
+gh pr-review comments reply \
+ --pr \
+ --repo \
+ --thread-id \
+ --body ""
+```
+
+**Best practices for replies:**
+
+- Acknowledge the feedback
+- Explain what was changed
+- Reference specific commit(s) if relevant
+- Be concise but clear
+- Use code fences for code snippets
+
+**Example reply:**
+
+```bash
+gh pr-review comments reply \
+ --pr \
+ --repo \
+ --thread-id \
+ --body "$(cat <<'EOF'
+@reviewer Thanks for the feedback! I've addressed your suggestions:
+
+1. Applied the requested refactor in the relevant module
+2. Removed duplicated logic in the affected call sites
+3. Improved sanitization to match the project’s expectations
+4. Added/updated tests for the new behavior
+
+Changes committed in abc1234, all tests pass.
+EOF
+)"
+```
+
+**Note:** Use heredoc for multi-line bodies to avoid shell escaping issues.
+**Note:** Always start replies with `@reviewer` (e.g., `@gemini-code-assist ...` or `@greptile …`) after you push changes. There can be multiple reviewers, so always look for the exact comment from which reviewer the comment is.
+
+If this was a normal comment and not a review (see step 2), you can use this to answer:
+
+```bash
+gh pr comment --body "$(cat <<'EOF'
+@reviewer …
+EOF
+)"
+```
+
+You can also just react to the comment if appropriate.
+
+**Reply to all open threads first:**
+
+1. Respond to every open comment with what you did **or** why it was not done.
+2. Only after all replies are posted, proceed to the wait/resolve phase.
+
+---
+
+### 8. Wait for Follow-ups and Resolve Threads
+
+**After implementing fixes, pushing the commit, and replying to all open comments, wait up to 5 minutes for follow-ups:**
+
+```bash
+# Wait for a minute for reviewer response
+sleep 60
+
+# Re-check for new replies or new threads
+gh pr-review threads list --pr --repo
+```
+
+Do this step up to 5 times to wait for up to 5 minutes.
+
+**If there is a follow-up hint, address it (steps 3-7) and then resolve.**
+
+**If there is a confirmation, resolve the thread:**
+
+```bash
+gh pr-review threads resolve \
+ --pr \
+ --repo \
+ --thread-id
+```
+
+**Response:**
+
+```json
+{
+ "thread_node_id": "PRRT_kwDOQkQlKs5p24lu",
+ "is_resolved": true
+}
+```
+
+**Batch resolve multiple threads:**
+
+```bash
+# Resolve outdated threads first
+gh pr-review threads resolve --pr --repo --thread-id
+gh pr-review threads resolve --pr --repo --thread-id
+
+# Then resolve active threads after replying
+gh pr-review threads resolve --pr --repo --thread-id
+```
+
+**Strategy:**
+
+1. Resolve outdated threads (isOutdated: true) - no reply needed
+2. Reply to active threads explaining fixes (or non-changes)
+3. Wait up to 5 minutes for a response
+4. Resolve active threads after confirmation or no response
+
+---
+
+### 9. Verify All Threads Resolved
+
+**Final check:**
+
+```bash
+gh pr-review threads list --pr --repo
+```
+
+**Expected output:** All threads show `isResolved: true`
+
+---
+
+## Complete Example Workflow
+
+```bash
+# 1. Get PR context
+gh pr view --json number
+git remote get-url origin
+
+# 2. List review threads
+gh pr-review threads list --pr --repo
+
+# 3. Read comments and files
+gh api repos///pulls//comments --jq '.[] | {id,body,path,line}'
+cat path/to/file.ts
+
+# 4. Implement fixes
+edit path/to/file.ts
+
+# 5. Run tests
+bun run test:unit -- tests/path/to/file.test.ts
+bun run typecheck
+bun run lint
+
+# 6. Commit and push
+git add lib/emails/from-address.ts
+git commit -m "fix: address PR review feedback"
+git push
+
+# 7. Reply to threads
+gh pr-review comments reply --pr --repo \
+ --thread-id --body "$(cat <<'EOF'
+@reviewer Thanks for review! I've addressed all feedback:
+1. Centralized logic
+2. Improved sanitization
+3. Added tests
+
+Changes in abc1234.
+EOF
+)"
+
+# 8. Wait then resolve threads
+sleep 300
+gh pr-review threads list --pr --repo
+gh pr-review threads resolve --pr --repo \
+ --thread-id
+
+# 9. Verify
+gh pr-review threads list --pr --repo
+git status
+```
+
+---
+
+## gh-pr-review Commands Reference
+
+| Command | Purpose |
+| -------------------------------- | ------------------------- |
+| `gh pr-review threads list` | List all review threads |
+| `gh pr-review threads resolve` | Resolve a specific thread |
+| `gh pr-review threads unresolve` | Reopen a resolved thread |
+| `gh pr-review comments reply` | Reply to a review thread |
+| `gh pr-review review` | Manage pending reviews |
+
+**Common flags:**
+
+- `--pr `: Pull request number
+- `-R, --repo `: Repository identifier
+- `--thread-id `: GraphQL thread node ID
+
+---
+
+## Troubleshooting
+
+| Issue | Solution |
+| -------------------------------------------------------- | ------------------------------------------------------------- |
+| `command not found: gh-pr-review` | Install extension: `gh extension install agynio/gh-pr-review` |
+| `must specify a pull request via --pr` | Run from PR directory or add `--pr ` |
+| `--repo must be owner/repo when using numeric selectors` | Add `-R ` or run from authenticated repo |
+| Shell escaping issues with `--body` | Use heredoc: `--body "$(cat <<'EOF'\n...\nEOF)"` |
+| Thread not found | Check threadId is exact GraphQL ID, not PR number |
+
+---
+
+## Best Practices
+
+**Before replying:**
+
+- ✓ Read all review comments carefully
+- ✓ Understand the intent (suggestion vs. blocker)
+- ✓ Check if similar issues exist elsewhere
+
+**When implementing fixes:**
+
+- ✓ Follow existing code patterns
+- ✓ Update/add tests for changes
+- ✓ Run full test suite
+- ✓ Check lint and type errors
+
+**When replying:**
+
+- ✓ Be polite and appreciative
+- ✓ Explain what was changed
+- ✓ Reference specific files/lines
+- ✓ Keep it concise
+
+**Before resolving:**
+
+- ✓ Ensure all issues are addressed
+- ✓ Verify tests pass
+- ✓ Commit changes to branch
+
+---
+
+## Resources
+
+- [gh-pr-review GitHub](https://github.com/agynio/gh-pr-review)
+- [GitHub GraphQL API: Pull Requests](https://docs.github.com/en/graphql/guides/using-the-graphql-api-for-pull-requests)
+- [gh CLI Documentation](https://cli.github.com/manual/)
diff --git a/.agents/skills/owasp-security-check/SKILL.md b/.agents/skills/owasp-security-check/SKILL.md
new file mode 100644
index 0000000000000000000000000000000000000000..cc9205253d4417ad928f761248c6b0133a060462
--- /dev/null
+++ b/.agents/skills/owasp-security-check/SKILL.md
@@ -0,0 +1,451 @@
+---
+name: owasp-security-check
+description: Security audit guidelines for web applications and REST APIs based on OWASP Top 10 and web security best practices. Use when checking code for vulnerabilities, reviewing auth/authz, auditing APIs, or before production deployment.
+---
+
+# OWASP Security Check
+
+Comprehensive security audit patterns for web applications and REST APIs. Contains 20 rules across 5 categories covering OWASP Top 10 and common web vulnerabilities.
+
+## When to Apply
+
+Use this skill when:
+
+- Auditing a codebase for security vulnerabilities
+- Reviewing user-provided file or folder for security issues
+- Checking authentication/authorization implementations
+- Evaluating REST API security
+- Assessing data protection measures
+- Reviewing configuration and deployment settings
+- Before production deployment
+- After adding new features that handle sensitive data
+
+## How to Use This Skill
+
+1. **Identify application type** - Web app, REST API, SPA, SSR, or mixed
+2. **Scan by priority** - Start with CRITICAL rules, then HIGH, then MEDIUM
+3. **Review relevant rule files** - Load specific rules from @rules/ directory
+4. **Report findings** - Note severity, file location, and impact
+5. **Provide remediation** - Give concrete code examples for fixes
+
+## Audit Workflow
+
+### Step 1: Systematic Review by Priority
+
+Work through categories by priority:
+
+1. **CRITICAL**: Authentication & Authorization, Data Protection, Input/Output Security
+2. **HIGH**: Configuration & Headers
+3. **MEDIUM**: API & Monitoring
+
+### Step 2: Generate Report
+
+Format findings as:
+
+- **Severity**: CRITICAL | HIGH | MEDIUM | LOW
+- **Category**: Rule name
+- **File**: Path and line number
+- **Issue**: What's wrong
+- **Impact**: Security consequence
+- **Fix**: Code example of remediation
+
+## Rules Summary
+
+### Authentication & Authorization (CRITICAL)
+
+#### broken-access-control - @rules/broken-access-control.md
+
+Check for missing authorization, IDOR, privilege escalation.
+
+```typescript
+// Bad: No authorization check
+async function getUser(req: Request): Promise {
+ let url = new URL(req.url);
+ let userId = url.searchParams.get("id");
+ let user = await db.user.findUnique({ where: { id: userId } });
+ return new Response(JSON.stringify(user));
+}
+
+// Good: Verify ownership
+async function getUser(req: Request): Promise {
+ let session = await getSession(req);
+ let url = new URL(req.url);
+ let userId = url.searchParams.get("id");
+
+ if (session.userId !== userId && !session.isAdmin) {
+ return new Response("Forbidden", { status: 403 });
+ }
+
+ let user = await db.user.findUnique({ where: { id: userId } });
+ return new Response(JSON.stringify(user));
+}
+```
+
+#### authentication-failures - @rules/authentication-failures.md
+
+Check for weak authentication, missing MFA, session issues.
+
+```typescript
+// Bad: Weak password check
+if (password.length >= 6) {
+ /* allow */
+}
+
+// Good: Strong password requirements
+function validatePassword(password: string) {
+ if (password.length < 12) return false;
+ if (!/[A-Z]/.test(password)) return false;
+ if (!/[a-z]/.test(password)) return false;
+ if (!/[0-9]/.test(password)) return false;
+ if (!/[^A-Za-z0-9]/.test(password)) return false;
+ return true;
+}
+```
+
+### Data Protection (CRITICAL)
+
+#### cryptographic-failures - @rules/cryptographic-failures.md
+
+Check for weak encryption, plaintext storage, bad hashing.
+
+```typescript
+// Bad: MD5 for passwords
+let hash = crypto.createHash("md5").update(password).digest("hex");
+
+// Good: bcrypt with salt
+let hash = await bcrypt(password, 12);
+```
+
+#### sensitive-data-exposure - @rules/sensitive-data-exposure.md
+
+Check for PII in logs/responses, error messages leaking info.
+
+```typescript
+// Bad: Exposing sensitive data
+return new Response(JSON.stringify(user)); // Contains password hash, email, etc.
+
+// Good: Return only needed fields
+return new Response(
+ JSON.stringify({
+ id: user.id,
+ username: user.username,
+ displayName: user.displayName,
+ }),
+);
+```
+
+#### data-integrity-failures - @rules/data-integrity-failures.md
+
+Check for unsigned data, insecure deserialization.
+
+```typescript
+// Bad: Trusting unsigned JWT
+let decoded = JSON.parse(atob(token.split(".")[1]));
+if (decoded.isAdmin) {
+ /* grant access */
+}
+
+// Good: Verify signature
+let payload = await verifyJWT(token, secret);
+```
+
+#### secrets-management - @rules/secrets-management.md
+
+Check for hardcoded secrets, exposed env vars.
+
+```typescript
+// Bad: Hardcoded secret
+const API_KEY = "sk_live_a1b2c3d4e5f6";
+
+// Good: Environment variables
+let API_KEY = process.env.API_KEY;
+if (!API_KEY) throw new Error("API_KEY not configured");
+```
+
+### Input/Output Security (CRITICAL)
+
+#### injection-attacks - @rules/injection-attacks.md
+
+Check for SQL, XSS, NoSQL, Command, Path Traversal injection.
+
+```typescript
+// Bad: SQL injection
+let query = `SELECT * FROM users WHERE email = '${email}'`;
+
+// Good: Parameterized query
+let user = await db.user.findUnique({ where: { email } });
+```
+
+#### ssrf-attacks - @rules/ssrf-attacks.md
+
+Check for unvalidated URLs, internal network access.
+
+```typescript
+// Bad: Fetching user-provided URL
+let url = await req.json().then((d) => d.url);
+let response = await fetch(url);
+
+// Good: Validate against allowlist
+const ALLOWED_DOMAINS = ["api.example.com", "cdn.example.com"];
+let url = new URL(await req.json().then((d) => d.url));
+if (!ALLOWED_DOMAINS.includes(url.hostname)) {
+ return new Response("Invalid URL", { status: 400 });
+}
+```
+
+#### file-upload-security - @rules/file-upload-security.md
+
+Check for unrestricted uploads, MIME validation.
+
+```typescript
+// Bad: No file type validation
+let file = await req.formData().then((fd) => fd.get("file"));
+await writeFile(`./uploads/${file.name}`, file);
+
+// Good: Validate type and extension
+const ALLOWED_TYPES = ["image/jpeg", "image/png", "image/webp"];
+const ALLOWED_EXTS = [".jpg", ".jpeg", ".png", ".webp"];
+let file = await req.formData().then((fd) => fd.get("file") as File);
+
+if (!ALLOWED_TYPES.includes(file.type)) {
+ return new Response("Invalid file type", { status: 400 });
+}
+```
+
+#### redirect-validation - @rules/redirect-validation.md
+
+Check for open redirects, unvalidated redirect URLs.
+
+```typescript
+// Bad: Unvalidated redirect
+let returnUrl = new URL(req.url).searchParams.get("return");
+return Response.redirect(returnUrl);
+
+// Good: Validate redirect URL
+let returnUrl = new URL(req.url).searchParams.get("return");
+let allowed = ["/dashboard", "/profile", "/settings"];
+if (!allowed.includes(returnUrl)) {
+ return Response.redirect("/");
+}
+```
+
+### Configuration & Headers (HIGH)
+
+#### insecure-design - @rules/insecure-design.md
+
+Check for security anti-patterns in architecture.
+
+```typescript
+// Bad: Security by obscurity
+let isAdmin = req.headers.get("x-admin-secret") === "admin123";
+
+// Good: Proper role-based access control
+let session = await getSession(req);
+let isAdmin = await db.user
+ .findUnique({
+ where: { id: session.userId },
+ })
+ .then((u) => u.role === "ADMIN");
+```
+
+#### security-misconfiguration - @rules/security-misconfiguration.md
+
+Check for default configs, debug mode, error handling.
+
+```typescript
+// Bad: Exposing stack traces
+catch (error) {
+ return new Response(error.stack, { status: 500 });
+}
+
+// Good: Generic error message
+catch (error) {
+ console.error(error); // Log server-side only
+ return new Response("Internal server error", { status: 500 });
+}
+```
+
+#### security-headers - @rules/security-headers.md
+
+Check for CSP, HSTS, X-Frame-Options, etc.
+
+```typescript
+// Bad: No security headers
+return new Response(html);
+
+// Good: Security headers set
+return new Response(html, {
+ headers: {
+ "Content-Security-Policy": "default-src 'self'",
+ "X-Frame-Options": "DENY",
+ "X-Content-Type-Options": "nosniff",
+ "Strict-Transport-Security": "max-age=31536000; includeSubDomains",
+ },
+});
+```
+
+#### cors-configuration - @rules/cors-configuration.md
+
+Check for overly permissive CORS.
+
+```typescript
+// Bad: Wildcard with credentials
+headers.set("Access-Control-Allow-Origin", "*");
+headers.set("Access-Control-Allow-Credentials", "true");
+
+// Good: Specific origin
+let allowedOrigins = ["https://app.example.com"];
+let origin = req.headers.get("origin");
+if (origin && allowedOrigins.includes(origin)) {
+ headers.set("Access-Control-Allow-Origin", origin);
+}
+```
+
+#### csrf-protection - @rules/csrf-protection.md
+
+Check for CSRF tokens, SameSite cookies.
+
+```typescript
+// Bad: No CSRF protection
+let cookies = parseCookies(req.headers.get("cookie"));
+let session = await getSession(cookies.sessionId);
+
+// Good: SameSite cookie + token validation
+return new Response("OK", {
+ headers: {
+ "Set-Cookie": "session=abc; SameSite=Strict; Secure; HttpOnly",
+ },
+});
+```
+
+#### session-security - @rules/session-security.md
+
+Check for cookie flags, JWT issues, token storage.
+
+```typescript
+// Bad: Insecure cookie
+return new Response("OK", {
+ headers: { "Set-Cookie": "session=abc123" },
+});
+
+// Good: Secure cookie with all flags
+return new Response("OK", {
+ headers: {
+ "Set-Cookie":
+ "session=abc123; Secure; HttpOnly; SameSite=Strict; Path=/; Max-Age=3600",
+ },
+});
+```
+
+### API & Monitoring (MEDIUM-HIGH)
+
+#### api-security - @rules/api-security.md
+
+Check for REST API vulnerabilities, mass assignment.
+
+```typescript
+// Bad: Mass assignment vulnerability
+let userData = await req.json();
+await db.user.update({ where: { id }, data: userData });
+
+// Good: Explicitly allow fields
+let { displayName, bio } = await req.json();
+await db.user.update({
+ where: { id },
+ data: { displayName, bio }, // Only allowed fields
+});
+```
+
+#### rate-limiting - @rules/rate-limiting.md
+
+Check for missing rate limits, brute force prevention.
+
+```typescript
+// Bad: No rate limiting
+async function login(req: Request): Promise {
+ let { email, password } = await req.json();
+ // Allows unlimited login attempts
+}
+
+// Good: Rate limiting
+let ip = req.headers.get("x-forwarded-for");
+let { success } = await ratelimit.limit(ip);
+if (!success) {
+ return new Response("Too many requests", { status: 429 });
+}
+```
+
+#### logging-monitoring - @rules/logging-monitoring.md
+
+Check for insufficient logging, sensitive data in logs.
+
+```typescript
+// Bad: Logging sensitive data
+console.log("User login:", { email, password, ssn });
+
+// Good: Log events without sensitive data
+console.log("User login attempt", {
+ email,
+ ip: req.headers.get("x-forwarded-for"),
+ timestamp: new Date().toISOString(),
+});
+```
+
+#### vulnerable-dependencies - @rules/vulnerable-dependencies.md
+
+Check for outdated packages, known CVEs.
+
+```bash
+# Bad: No dependency checking
+npm install
+
+# Good: Regular audits
+npm audit
+npm audit fix
+```
+
+## Common Vulnerability Patterns
+
+Quick reference of patterns to look for:
+
+- **User input without validation**: `req.json()` → immediate use
+- **Missing auth checks**: Routes without authorization middleware
+- **Hardcoded secrets**: Strings containing "password", "secret", "key"
+- **SQL injection**: String concatenation in queries
+- **XSS**: `dangerouslySetInnerHTML`, `.innerHTML`
+- **Weak crypto**: `md5`, `sha1` for passwords
+- **Missing headers**: No CSP, HSTS, or security headers
+- **CORS wildcards**: `Access-Control-Allow-Origin: *` with credentials
+- **Insecure cookies**: Missing Secure, HttpOnly, SameSite flags
+- **Path traversal**: User input in file paths without validation
+
+## Severity Quick Reference
+
+**Fix Immediately (CRITICAL):**
+
+- SQL/XSS/Command Injection
+- Missing authentication on sensitive endpoints
+- Hardcoded secrets in code
+- Plaintext password storage
+- IDOR vulnerabilities
+
+**Fix Soon (HIGH):**
+
+- Missing CSRF protection
+- Weak password requirements
+- Missing security headers
+- Overly permissive CORS
+- Insecure session management
+
+**Fix When Possible (MEDIUM):**
+
+- Missing rate limiting
+- Incomplete logging
+- Outdated dependencies (no known exploits)
+- Missing input validation on non-critical fields
+
+**Improve (LOW):**
+
+- Missing optional security headers
+- Verbose error messages (non-production)
+- Suboptimal crypto parameters
diff --git a/.agents/skills/owasp-security-check/rules/api-security.md b/.agents/skills/owasp-security-check/rules/api-security.md
new file mode 100644
index 0000000000000000000000000000000000000000..ad087689eb6ba72240c8d4213d3a3fe793008890
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/api-security.md
@@ -0,0 +1,148 @@
+---
+title: REST API Security
+impact: MEDIUM
+tags: [api, rest, mass-assignment, versioning]
+---
+
+# REST API Security
+
+Check for REST API vulnerabilities including mass assignment, lack of validation, and missing resource limits.
+
+> **Related:** Input validation in [injection-attacks.md](injection-attacks.md). Authentication in [authentication-failures.md](authentication-failures.md). Rate limiting in [rate-limiting.md](rate-limiting.md).
+
+## Why
+
+- **Mass assignment**: Users modify protected fields
+- **Over-fetching**: Expose unnecessary data
+- **Resource exhaustion**: Unlimited result sets
+- **API abuse**: Missing versioning and documentation
+
+## What to Check
+
+- [ ] Mass assignment in update operations
+- [ ] No pagination on list endpoints
+- [ ] Missing Content-Type validation
+- [ ] No API versioning
+- [ ] Excessive data in responses
+- [ ] Missing rate limits
+
+## Bad Patterns
+
+```typescript
+// Bad: Mass assignment
+async function updateUser(req: Request): Promise {
+ let session = await getSession(req);
+ let data = await req.json();
+
+ // VULNERABLE: User can set isAdmin, role, etc.!
+ await db.users.update({
+ where: { id: session.userId },
+ data, // Dangerous - accepts all fields!
+ });
+
+ return new Response("Updated");
+}
+
+// Bad: No pagination
+async function getUsers(req: Request): Promise {
+ // VULNERABLE: Could return millions of records
+ let users = await db.users.findMany();
+
+ return Response.json(users);
+}
+
+// Bad: No input validation
+async function createPost(req: Request): Promise {
+ let data = await req.json();
+
+ // VULNERABLE: No validation of data types or values
+ await db.posts.create({ data });
+
+ return new Response("Created", { status: 201 });
+}
+```
+
+## Good Patterns
+
+```typescript
+// Good: Explicit field allowlist
+async function updateUser(req: Request): Promise {
+ let session = await getSession(req);
+ let body = await req.json();
+
+ let allowedFields = {
+ displayName: body.displayName,
+ bio: body.bio,
+ avatar: body.avatar,
+ };
+
+ if (
+ allowedFields.displayName &&
+ typeof allowedFields.displayName !== "string"
+ ) {
+ return new Response("Invalid displayName", { status: 400 });
+ }
+
+ await db.users.update({
+ where: { id: session.userId },
+ data: allowedFields,
+ });
+
+ return new Response("Updated");
+}
+
+// Good: Pagination with limits
+async function getUsers(req: Request): Promise {
+ let url = new URL(req.url);
+ let page = parseInt(url.searchParams.get("page") || "1");
+ let limit = Math.min(parseInt(url.searchParams.get("limit") || "20"), 100);
+
+ let users = await db.users.findMany({
+ take: limit,
+ skip: (page - 1) * limit,
+ });
+
+ return Response.json({ data: users, page, limit });
+}
+
+// Good: Input validation
+async function createPost(req: Request): Promise {
+ let session = await getSession(req);
+ let body = await req.json();
+
+ if (
+ !body.title ||
+ typeof body.title !== "string" ||
+ body.title.length > 200
+ ) {
+ return new Response("Invalid title", { status: 400 });
+ }
+
+ if (
+ !body.content ||
+ typeof body.content !== "string" ||
+ body.content.length > 50000
+ ) {
+ return new Response("Invalid content", { status: 400 });
+ }
+
+ await db.posts.create({
+ data: {
+ title: body.title,
+ content: body.content,
+ authorId: session.userId,
+ },
+ });
+
+ return new Response("Created", { status: 201 });
+}
+```
+
+## Rules
+
+1. **Prevent mass assignment** - Explicitly define allowed fields
+2. **Always paginate lists** - Enforce maximum page size
+3. **Validate input types** - Check types and constraints
+4. **Version your API** - Use `/api/v1/` prefix for versioning
+5. **Limit response data** - Return only necessary fields
+6. **Validate Content-Type** - Ensure correct headers
diff --git a/.agents/skills/owasp-security-check/rules/authentication-failures.md b/.agents/skills/owasp-security-check/rules/authentication-failures.md
new file mode 100644
index 0000000000000000000000000000000000000000..564503317357cf9ed9251ce6546d2cdbf571293b
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/authentication-failures.md
@@ -0,0 +1,146 @@
+---
+title: Authentication Failures
+impact: CRITICAL
+tags: [authentication, passwords, mfa, sessions, owasp-a07]
+---
+
+# Authentication Failures
+
+Check for weak authentication mechanisms, missing MFA, session management issues, and credential handling vulnerabilities.
+
+> **Related:** Session security in [session-security.md](session-security.md). Rate limiting in [rate-limiting.md](rate-limiting.md).
+
+## Why
+
+- **Account takeover**: Attackers gain unauthorized access to user accounts
+- **Credential stuffing**: Weak auth enables automated attacks
+- **Session hijacking**: Improper session management allows theft
+- **Brute force attacks**: Weak passwords and no rate limiting enable guessing
+
+## What to Check
+
+- [ ] Weak password requirements (length < 12, no complexity)
+- [ ] No multi-factor authentication option
+- [ ] Passwords stored in plaintext or with weak hashing (MD5, SHA1)
+- [ ] Missing account lockout after failed attempts
+- [ ] Session tokens predictable or not securely generated
+- [ ] No session expiration or timeout
+- [ ] Session not regenerated after login
+- [ ] Credentials exposed in URLs or logs
+
+## Bad Patterns
+
+```typescript
+// Bad: Weak password hashing (SHA-256 too fast)
+const hash = crypto.createHash("sha256").update(password).digest("hex");
+
+// Bad: No password requirements
+async function signup(req: Request): Promise {
+ let { email, password } = await req.json();
+ // Accepts "123" as valid password!
+ await db.users.create({
+ data: { email, password: await bcrypt(password, 10) },
+ });
+}
+
+// Bad: Timing attack reveals if email exists
+const user = await db.users.findUnique({ where: { email } });
+if (!user) return new Response("Invalid", { status: 401 }); // Early return!
+if (!(await bcrypt.compare(password, user.password))) {
+ return new Response("Invalid", { status: 401 });
+}
+
+// Bad: No rate limiting or account lockout
+async function login(req: Request): Promise {
+ // Unlimited attempts allowed!
+ let user = await authenticate(email, password);
+}
+```
+
+## Good Patterns
+
+```typescript
+// Good: bcrypt with proper cost factor
+const hash = await bcrypt(password, 12); // Cost factor 12+
+
+// Good: Strong password validation
+function validatePassword(password: string): string | null {
+ if (password.length < 12) return "Password must be ≥12 characters";
+ if (!/[A-Z]/.test(password)) return "Must include uppercase";
+ if (!/[a-z]/.test(password)) return "Must include lowercase";
+ if (!/[0-9]/.test(password)) return "Must include number";
+ return null;
+}
+
+async function signup(req: Request): Promise {
+ let { email, password } = await req.json();
+
+ let error = validatePassword(password);
+ if (error) return new Response(error, { status: 400 });
+
+ await db.users.create({
+ data: { email, password: await bcrypt(password, 12) },
+ });
+}
+
+// Good: Constant-time comparison
+async function login(req: Request): Promise {
+ let { email, password } = await req.json();
+ let user = await db.users.findUnique({ where: { email } });
+
+ // Always compare (constant time)
+ let hash = user?.password || "$2b$12$fakehash...";
+ let valid = await bcrypt.compare(password, hash);
+
+ if (!user || !valid) {
+ return new Response("Invalid credentials", { status: 401 });
+ }
+
+ return createSession(user);
+}
+
+// Good: Account lockout after failed attempts
+async function loginWithLockout(req: Request): Promise {
+ let { email, password } = await req.json();
+ let user = await db.users.findUnique({ where: { email } });
+
+ if (user?.lockedUntil && user.lockedUntil > new Date()) {
+ return new Response("Account locked", { status: 423 });
+ }
+
+ let valid = user && (await bcrypt.compare(password, user.password));
+
+ if (!user || !valid) {
+ let attempts = (user?.failedAttempts || 0) + 1;
+ await db.users.update({
+ where: { email },
+ data: {
+ failedAttempts: attempts,
+ lockedUntil:
+ attempts >= 5 ? new Date(Date.now() + 30 * 60 * 1000) : null,
+ },
+ });
+ return new Response("Invalid credentials", { status: 401 });
+ }
+
+ // Reset on success
+ await db.users.update({
+ where: { id: user.id },
+ data: { failedAttempts: 0, lockedUntil: null },
+ });
+
+ return createSession(user);
+}
+```
+
+## Rules
+
+1. **Require strong passwords** - Minimum 12 characters with complexity
+2. **Hash passwords properly** - Use bcrypt, argon2, or scrypt (never MD5/SHA1)
+3. **Implement rate limiting** - Limit authentication attempts per IP/account
+4. **Use secure session tokens** - Cryptographically random tokens
+5. **Set session expiration** - Both absolute and idle timeout
+6. **Regenerate session on login** - Prevent session fixation attacks
+7. **Implement account lockout** - Temporarily lock after multiple failures
+8. **Support MFA** - Especially for privileged accounts
+9. **Never log credentials** - Don't log passwords, tokens, or reset links
diff --git a/.agents/skills/owasp-security-check/rules/broken-access-control.md b/.agents/skills/owasp-security-check/rules/broken-access-control.md
new file mode 100644
index 0000000000000000000000000000000000000000..5ed9a44c0dde7f663d7619b91ce04d413405c1ab
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/broken-access-control.md
@@ -0,0 +1,111 @@
+---
+title: Broken Access Control
+impact: CRITICAL
+tags: [access-control, authorization, idor, owasp-a01]
+---
+
+# Broken Access Control
+
+Check for missing authorization checks, insecure direct object references (IDOR), privilege escalation, and path traversal.
+
+> **Related:** Path traversal in [injection-attacks.md](injection-attacks.md) and [file-upload-security.md](file-upload-security.md).
+
+## Why
+
+- **Data breach**: Users access others' sensitive data
+- **Privilege escalation**: Regular users gain admin access
+- **Data manipulation**: Unauthorized modification or deletion
+- **Compliance violation**: GDPR, HIPAA, PCI-DSS penalties
+
+## What to Check
+
+- [ ] Routes accessing resources without verifying ownership
+- [ ] User IDs taken from request params without validation
+- [ ] Admin endpoints without role checks
+- [ ] File paths constructed from user input
+- [ ] Authorization checks that can be bypassed
+- [ ] Horizontal privilege escalation (user A→user B's data)
+- [ ] Vertical privilege escalation (user→admin functions)
+
+## Bad Patterns
+
+```typescript
+// Bad: No authorization check
+const userId = url.searchParams.get("id");
+const user = await db.users.findUnique({ where: { id: userId } });
+return Response.json(user); // Anyone can access!
+
+// Bad: No role check
+await db.users.delete({ where: { id: userId } }); // No admin verification!
+
+// Bad: Path traversal
+const filename = url.searchParams.get("file");
+const content = await fs.readFile(`./uploads/${filename}`, "utf-8");
+```
+
+## Good Patterns
+
+```typescript
+// Good: Verify ownership before access
+async function getUserProfile(req: Request): Promise {
+ let session = await getSession(req);
+ let url = new URL(req.url);
+ let userId = url.searchParams.get("id");
+
+ if (session.userId !== userId && !session.isAdmin) {
+ return new Response("Forbidden", { status: 403 });
+ }
+
+ let user = await db.users.findUnique({ where: { id: userId } });
+ return Response.json(user);
+}
+
+// Good: Role-based access control
+async function deleteUser(req: Request): Promise {
+ let session = await getSession(req);
+
+ let user = await db.users.findUnique({
+ where: { id: session.userId },
+ select: { role: true },
+ });
+
+ if (user.role !== "ADMIN") {
+ return new Response("Forbidden", { status: 403 });
+ }
+
+ let url = new URL(req.url);
+ let userId = url.searchParams.get("id");
+
+ await db.users.delete({ where: { id: userId } });
+ return new Response("Deleted");
+}
+
+// Good: Prevent path traversal
+async function downloadFile(req: Request): Promise {
+ let url = new URL(req.url);
+ let filename = url.searchParams.get("file");
+ let ALLOWED = ["terms.pdf", "privacy.pdf", "guide.pdf"];
+
+ if (
+ !filename ||
+ !ALLOWED.includes(filename) ||
+ filename.includes("..") ||
+ filename.includes("/")
+ ) {
+ return new Response("Invalid file", { status: 400 });
+ }
+
+ let content = await fs.readFile(`./documents/${filename}`, "utf-8");
+ return new Response(content);
+}
+```
+
+## Rules
+
+1. **Never trust user input for authorization** - Verify against server-side session
+2. **Check ownership on every resource access** - Don't assume URL ID is valid
+3. **Implement deny-by-default** - Require explicit permission grants
+4. **Use role-based access control** - Define clear roles and check them
+5. **Validate file paths** - Never construct paths directly from user input
+6. **Log authorization failures** - Track denied access for monitoring
+7. **Test with different roles** - Verify unprivileged users can't access privileged resources
diff --git a/.agents/skills/owasp-security-check/rules/cors-configuration.md b/.agents/skills/owasp-security-check/rules/cors-configuration.md
new file mode 100644
index 0000000000000000000000000000000000000000..0286861c1bd28e6ae4630b70cc3aaff02b23cf66
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/cors-configuration.md
@@ -0,0 +1,117 @@
+---
+title: CORS Configuration
+impact: HIGH
+tags: [cors, cross-origin, same-origin-policy, owasp]
+---
+
+# CORS Configuration
+
+Check for overly permissive Cross-Origin Resource Sharing (CORS) policies that allow unauthorized cross-origin requests.
+
+> **Related:** CSRF protection in [csrf-protection.md](csrf-protection.md). Security headers in [security-headers.md](security-headers.md).
+
+## Why
+
+- **Unauthorized access**: Malicious sites can access your API
+- **Credential theft**: CORS with credentials exposes sensitive data
+- **CSRF attacks**: Improper CORS enables cross-site attacks
+- **Data leakage**: Private APIs exposed to untrusted origins
+
+## What to Check
+
+- [ ] `Access-Control-Allow-Origin: *` with credentials
+- [ ] Reflecting request origin without validation
+- [ ] Missing origin validation
+- [ ] Overly permissive allowed methods/headers
+- [ ] No CORS policy on sensitive endpoints
+
+## Bad Patterns
+
+```typescript
+// Bad: Wildcard with credentials
+return Response.json(data, {
+ headers: {
+ "Access-Control-Allow-Origin": "*",
+ "Access-Control-Allow-Credentials": "true",
+ },
+});
+
+// Bad: Reflecting any origin
+const origin = req.headers.get("origin");
+return Response.json(data, {
+ headers: {
+ "Access-Control-Allow-Origin": origin || "*",
+ "Access-Control-Allow-Credentials": "true",
+ },
+});
+
+// Bad: Weak regex
+return /.*\.yourdomain\.com/.test(origin); // evil-yourdomain.com matches!
+```
+
+## Good Patterns
+
+```typescript
+// Good: Strict origin allowlist
+const ALLOWED_ORIGINS = [
+ "https://yourdomain.com",
+ "https://app.yourdomain.com",
+ "https://admin.yourdomain.com",
+];
+
+async function handler(req: Request): Promise {
+ let origin = req.headers.get("origin");
+ let corsHeaders: Record = {};
+
+ if (origin && ALLOWED_ORIGINS.includes(origin)) {
+ corsHeaders["Access-Control-Allow-Origin"] = origin;
+ corsHeaders["Access-Control-Allow-Credentials"] = "true";
+ corsHeaders["Access-Control-Allow-Methods"] = "GET, POST, PUT, DELETE";
+ corsHeaders["Access-Control-Allow-Headers"] = "Content-Type, Authorization";
+ }
+
+ return Response.json(data, { headers: corsHeaders });
+}
+
+// Good: Environment-based CORS
+function getAllowedOrigins(): string[] {
+ if (process.env.NODE_ENV === "production") {
+ return ["https://yourdomain.com", "https://app.yourdomain.com"];
+ }
+ return ["http://localhost:3000", "http://localhost:5173"];
+}
+
+// Good: Preflight request handling
+async function corsHandler(req: Request): Response | null {
+ let origin = req.headers.get("origin");
+ let allowed = getAllowedOrigins();
+
+ if (!origin || !allowed.includes(origin)) {
+ return new Response("Origin not allowed", { status: 403 });
+ }
+
+ let corsHeaders = {
+ "Access-Control-Allow-Origin": origin,
+ "Access-Control-Allow-Credentials": "true",
+ "Access-Control-Allow-Methods": "GET, POST, PUT, DELETE, PATCH",
+ "Access-Control-Allow-Headers": "Content-Type, Authorization",
+ "Access-Control-Max-Age": "86400",
+ };
+
+ if (req.method === "OPTIONS") {
+ return new Response(null, { status: 204, headers: corsHeaders });
+ }
+
+ return null;
+}
+```
+
+## Rules
+
+1. **Never use `Access-Control-Allow-Origin: *` with credentials** - Pick one or the other
+2. **Use strict origin allowlist** - Explicitly list allowed origins
+3. **Validate origin before reflecting** - Don't blindly reflect request origin
+4. **Separate dev and prod origins** - Don't allow localhost in production
+5. **Limit allowed methods** - Only necessary HTTP methods
+6. **Limit allowed headers** - Only required headers
+7. **Handle preflight requests** - Respond to OPTIONS correctly
diff --git a/.agents/skills/owasp-security-check/rules/cryptographic-failures.md b/.agents/skills/owasp-security-check/rules/cryptographic-failures.md
new file mode 100644
index 0000000000000000000000000000000000000000..60db90894c64cdea5ecd36160241aea9087a0aad
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/cryptographic-failures.md
@@ -0,0 +1,125 @@
+---
+title: Cryptographic Failures
+impact: CRITICAL
+tags: [cryptography, encryption, hashing, tls, owasp-a02]
+---
+
+# Cryptographic Failures
+
+Check for weak encryption, improper key management, plaintext storage of sensitive data, and missing encryption in transit.
+
+> **Related:** Password hashing in [authentication-failures.md](authentication-failures.md). Secrets in [secrets-management.md](secrets-management.md). Data signing in [data-integrity-failures.md](data-integrity-failures.md).
+
+## Why
+
+- **Data breach**: Sensitive data exposed if stolen
+- **Compliance violation**: GDPR, PCI-DSS require encryption
+- **Man-in-the-middle**: Unencrypted connections intercepted
+- **Password compromise**: Weak hashing enables rainbow table attacks
+
+## What to Check
+
+- [ ] Sensitive data stored in plaintext (passwords, tokens, PII)
+- [ ] Weak hashing algorithms (MD5, SHA1) for passwords
+- [ ] Weak encryption algorithms (DES, RC4, ECB mode)
+- [ ] Hardcoded encryption keys or predictable keys
+- [ ] Missing HTTPS/TLS for data transmission
+- [ ] Insufficient key length (< 2048 bits for RSA, < 256 bits symmetric)
+- [ ] No encryption for sensitive data at rest
+
+## Bad Patterns
+
+```typescript
+// Bad: MD5 for password hashing
+async function hashPassword(password: string): Promise {
+ // VULNERABLE: MD5 is too fast, easily cracked
+ return crypto.createHash("md5").update(password).digest("hex");
+}
+
+// Bad: Storing passwords in plaintext
+await db.users.create({
+ data: {
+ email,
+ password, // VULNERABLE: Plaintext!
+ },
+});
+
+// Bad: Weak encryption algorithm
+const cipher = crypto.createCipher("des", "weak-key"); // VULNERABLE: DES is weak
+
+// Bad: Hardcoded encryption key
+const ENCRYPTION_KEY = "my-secret-key-12345"; // VULNERABLE: Hardcoded
+
+function encryptData(data: string): string {
+ const cipher = crypto.createCipheriv("aes-256-cbc", ENCRYPTION_KEY, iv);
+ return cipher.update(data, "utf8", "hex");
+}
+
+// Bad: No encryption for sensitive data
+await db.creditCards.create({
+ data: {
+ number: "4111111111111111", // VULNERABLE: Plaintext
+ cvv: "123",
+ expiresAt: "12/25",
+ },
+});
+```
+
+## Good Patterns
+
+```typescript
+// Good: bcrypt for password hashing
+async function hashPassword(password: string): Promise {
+ return await bcrypt(password, 12);
+}
+
+// Good: AES-256-GCM encryption
+function encryptData(plaintext: string): { encrypted: string; iv: string } {
+ let key = Buffer.from(process.env.ENCRYPTION_KEY!, "hex");
+ let iv = crypto.randomBytes(16);
+
+ let cipher = crypto.createCipheriv("aes-256-gcm", key, iv);
+ let encrypted = cipher.update(plaintext, "utf8", "hex");
+ encrypted += cipher.final("hex");
+ encrypted += cipher.getAuthTag().toString("hex");
+
+ return { encrypted, iv: iv.toString("hex") };
+}
+
+function decryptData(encrypted: string, ivHex: string): string {
+ let key = Buffer.from(process.env.ENCRYPTION_KEY!, "hex");
+ let iv = Buffer.from(ivHex, "hex");
+ let authTag = Buffer.from(encrypted.slice(-32), "hex");
+ let ciphertext = encrypted.slice(0, -32);
+
+ let decipher = crypto.createDecipheriv("aes-256-gcm", key, iv);
+ decipher.setAuthTag(authTag);
+
+ return decipher.update(ciphertext, "hex", "utf8") + decipher.final("utf8");
+}
+
+// Good: Encrypt sensitive fields
+async function saveCreditCard(req: Request): Promise {
+ let { number, cvv } = await req.json();
+
+ let { encrypted: encryptedNumber, iv: numberIv } = encryptData(number);
+ let { encrypted: encryptedCvv, iv: cvvIv } = encryptData(cvv);
+
+ await db.creditCards.create({
+ data: { encryptedNumber, numberIv, encryptedCvv, cvvIv },
+ });
+
+ return new Response("Saved", { status: 201 });
+}
+```
+
+## Rules
+
+1. **Use strong password hashing** - bcrypt, argon2, or scrypt (never MD5/SHA1)
+2. **Use modern encryption** - AES-256-GCM or ChaCha20-Poly1305
+3. **Never hardcode keys** - Use environment variables or key management systems
+4. **Encrypt sensitive data at rest** - PII, credentials, financial data
+5. **Enforce HTTPS/TLS** - All data in transit must be encrypted
+6. **Use sufficient key lengths** - RSA ≥ 2048 bits, symmetric ≥ 256 bits
+7. **Generate random IVs** - New random IV for each encryption operation
+8. **Rotate keys regularly** - Implement key rotation policies
diff --git a/.agents/skills/owasp-security-check/rules/csrf-protection.md b/.agents/skills/owasp-security-check/rules/csrf-protection.md
new file mode 100644
index 0000000000000000000000000000000000000000..bc513988b43f5559ac8fdc9bdb8836e4d4fd8746
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/csrf-protection.md
@@ -0,0 +1,132 @@
+---
+title: CSRF Protection
+impact: HIGH
+tags: [csrf, tokens, cookies, same-site]
+---
+
+# CSRF Protection
+
+Check for Cross-Site Request Forgery protection on state-changing operations.
+
+> **Related:** Session cookie configuration is covered in [session-security.md](session-security.md). CORS configuration is covered in [cors-configuration.md](cors-configuration.md).
+
+## Why
+
+- **Unauthorized actions**: Attackers perform actions as victim
+- **Account takeover**: Change email/password without consent
+- **Financial fraud**: Unauthorized transfers
+- **Data manipulation**: Modify user data
+
+## What to Check
+
+**Vulnerability Indicators:**
+
+- [ ] State-changing endpoints accept GET requests
+- [ ] No CSRF tokens on forms
+- [ ] Cookies without SameSite attribute
+- [ ] Missing Origin/Referer validation
+- [ ] No double-submit cookie pattern
+
+## Bad Patterns
+
+```typescript
+// Bad: No SameSite on cookie
+return new Response("OK", {
+ headers: { "Set-Cookie": "session=abc123; HttpOnly; Secure" },
+});
+
+// Bad: State change via GET
+async function deleteAccount(req: Request): Promise {
+ let userId = new URL(req.url).searchParams.get("id");
+ await db.users.delete({ where: { id: userId } });
+}
+
+// Bad: No CSRF token
+const { to, amount } = await req.json();
+await transfer(to, amount); // Attacker can trigger!
+```
+
+## Good Patterns
+
+```typescript
+// Good: SameSite cookie
+async function login(req: Request): Promise {
+ return new Response("OK", {
+ headers: {
+ "Set-Cookie": "session=abc123; HttpOnly; Secure; SameSite=Strict; Path=/",
+ },
+ });
+}
+
+// Good: CSRF token validation
+async function generateCSRFToken(sessionId: string): Promise {
+ let token = crypto.randomBytes(32).toString("hex");
+
+ await db.csrfToken.create({
+ data: {
+ token,
+ sessionId,
+ expiresAt: new Date(Date.now() + 60 * 60 * 1000),
+ },
+ });
+
+ return token;
+}
+
+async function validateCSRFToken(
+ sessionId: string,
+ token: string,
+): Promise {
+ let stored = await db.csrfToken.findFirst({
+ where: { token, sessionId, expiresAt: { gt: new Date() } },
+ });
+
+ if (stored) {
+ await db.csrfToken.delete({ where: { id: stored.id } });
+ return true;
+ }
+ return false;
+}
+
+async function transferMoney(req: Request): Promise {
+ let session = await getSession(req);
+ let { to, amount, csrfToken } = await req.json();
+
+ if (!(await validateCSRFToken(session.id, csrfToken))) {
+ return new Response("Invalid CSRF token", { status: 403 });
+ }
+
+ await transfer(to, amount);
+ return new Response("OK");
+}
+
+// Good: Double-submit cookie pattern
+async function setupCSRF(req: Request): Promise {
+ let token = crypto.randomBytes(32).toString("hex");
+
+ return Response.json(
+ { csrfToken: token },
+ {
+ headers: {
+ "Set-Cookie": `csrf=${token}; SameSite=Strict; Secure`,
+ "Content-Type": "application/json",
+ },
+ },
+ );
+}
+
+async function validateDoubleSubmit(req: Request): Promise {
+ let cookies = parseCookies(req.headers.get("cookie"));
+ let { csrfToken } = await req.json();
+
+ return cookies.csrf === csrfToken;
+}
+```
+
+## Rules
+
+1. **Use SameSite=Strict or Lax** - On all session cookies
+2. **No state changes via GET** - Use POST/PUT/DELETE
+3. **Implement CSRF tokens** - For session-based auth
+4. **Double-submit cookie** - Alternative to tokens
+5. **Validate Origin header** - Additional protection layer
diff --git a/.agents/skills/owasp-security-check/rules/data-integrity-failures.md b/.agents/skills/owasp-security-check/rules/data-integrity-failures.md
new file mode 100644
index 0000000000000000000000000000000000000000..e6868bdabf0efdee314b1c36b6d3ef67383698da
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/data-integrity-failures.md
@@ -0,0 +1,137 @@
+---
+title: Software and Data Integrity Failures
+impact: CRITICAL
+tags: [integrity, jwt, serialization, ci-cd, owasp-a08]
+---
+
+# Software and Data Integrity Failures
+
+Check for unsigned data, insecure deserialization, and lack of integrity verification in code and data.
+
+> **Related:** JWT signing in [cryptographic-failures.md](cryptographic-failures.md) and [session-security.md](session-security.md). Dependency integrity in [vulnerable-dependencies.md](vulnerable-dependencies.md).
+
+## Why
+
+- **Data tampering**: Attackers modify unsigned data
+- **Remote code execution**: Insecure deserialization exploits
+- **Supply chain attacks**: Unsigned packages or builds
+- **Trust violations**: Cannot verify data authenticity
+
+## What to Check
+
+- [ ] JWT tokens decoded without signature verification
+- [ ] Accepting unsigned or unverified data
+- [ ] Insecure deserialization of user input
+- [ ] No integrity checks on file downloads
+- [ ] Missing code signing in CI/CD
+- [ ] Auto-update without verification
+- [ ] Using eval() or Function() with external data
+
+## Bad Patterns
+
+```typescript
+// Bad: No signature verification
+async function handleWebhook(req: Request): Promise {
+ const payload = await req.json();
+ // Trusting payload without verification!
+ await processOrder(payload);
+}
+
+// Bad: JWT without verification
+async function getUser(req: Request): Promise {
+ let token = req.headers.get("authorization")?.split(" ")[1];
+ let payload = JSON.parse(atob(token!.split(".")[1])); // Just decode!
+ // Attacker can modify payload
+ return Response.json({ userId: payload.sub });
+}
+
+// Bad: No integrity check on downloads
+async function downloadUpdate(req: Request): Promise {
+ let file = await fetch("https://cdn.example.com/update.zip");
+ // No checksum verification
+ return new Response(file.body);
+}
+```
+
+## Good Patterns
+
+```typescript
+// Good: Verify webhook signature
+async function handleWebhook(req: Request): Promise {
+ let signature = req.headers.get("x-webhook-signature");
+ let payload = await req.text();
+
+ let expected = crypto
+ .createHmac("sha256", process.env.WEBHOOK_SECRET!)
+ .update(payload)
+ .digest("hex");
+
+ if (signature !== expected) {
+ return new Response("Invalid signature", { status: 401 });
+ }
+
+ await processOrder(JSON.parse(payload));
+ return new Response("OK");
+}
+
+// Good: Verify JWT signature
+async function getUser(req: Request): Promise {
+ let token = req.headers.get("authorization")?.split(" ")[1];
+
+ if (!token) {
+ return new Response("Unauthorized", { status: 401 });
+ }
+
+ let payload = await verifyJWT(token, process.env.JWT_SECRET!);
+
+ let user = await db.users.findUnique({
+ where: { id: payload.sub },
+ });
+
+ return Response.json(user);
+}
+
+// Good: Verify file integrity with checksum
+async function downloadUpdate(req: Request): Promise {
+ let file = await fetch("https://cdn.example.com/update.zip");
+ let buffer = await file.arrayBuffer();
+
+ let hash = crypto
+ .createHash("sha256")
+ .update(Buffer.from(buffer))
+ .digest("hex");
+ let expected = "a1b2c3d4..."; // From trusted source
+
+ if (hash !== expected) {
+ return new Response("Integrity check failed", { status: 400 });
+ }
+
+ return new Response(buffer);
+}
+
+// Good: Signed cookies
+function signCookie(value: string, secret: string): string {
+ let sig = crypto.createHmac("sha256", secret).update(value).digest("hex");
+ return `${value}.${sig}`;
+}
+
+function verifyCookie(signedValue: string, secret: string): string | null {
+ let [value, signature] = signedValue.split(".");
+ let expected = crypto
+ .createHmac("sha256", secret)
+ .update(value)
+ .digest("hex");
+ return signature === expected ? value : null;
+}
+```
+
+## Rules
+
+1. **Always verify JWT signatures** - Never decode without verification
+2. **Never trust client data** - Look up prices, roles, permissions server-side
+3. **Use JSON.parse, never eval** - Safe deserialization only
+4. **Use Subresource Integrity** - For all CDN-loaded scripts/styles
+5. **Sign cookies** - Use HMAC for tamper detection
+6. **Verify checksums** - For downloaded code and updates
+7. **Lock dependency versions** - Use lockfiles to ensure integrity
+8. **Sign code in CI/CD** - Verify builds haven't been tampered with
diff --git a/.agents/skills/owasp-security-check/rules/file-upload-security.md b/.agents/skills/owasp-security-check/rules/file-upload-security.md
new file mode 100644
index 0000000000000000000000000000000000000000..a698a382e293522f497fd2b8c82ebf2ace4e744c
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/file-upload-security.md
@@ -0,0 +1,111 @@
+---
+title: File Upload Security
+impact: HIGH
+tags: [file-upload, mime-types, path-traversal]
+---
+
+# File Upload Security
+
+Check for secure file upload handling including type validation, size limits, and safe storage.
+
+> **Related:** Path traversal is also covered in [injection-attacks.md](injection-attacks.md) and [broken-access-control.md](broken-access-control.md). XSS prevention is covered in [injection-attacks.md](injection-attacks.md) and [security-headers.md](security-headers.md).
+
+## Why
+
+- **Malware upload**: Attackers upload malicious files
+- **Path traversal**: Overwrite system files
+- **XSS via files**: SVG/HTML files execute scripts
+- **Resource exhaustion**: Huge file uploads
+
+## What to Check
+
+**Vulnerability Indicators:**
+
+- [ ] No file type validation
+- [ ] No file size limits
+- [ ] Original filename used for storage
+- [ ] Files stored in web-accessible directory
+- [ ] No MIME type validation
+- [ ] Both extension and MIME type not checked
+
+## Bad Patterns
+
+```typescript
+// Bad: No validation
+async function uploadFile(req: Request): Promise {
+ let formData = await req.formData();
+ let file = formData.get("file") as File;
+
+ // No type or size checking!
+ await writeFile(`./uploads/${file.name}`, file);
+
+ return new Response("Uploaded");
+}
+
+// Bad: Using original filename
+await writeFile(`./public/uploads/${file.name}`, buffer);
+// User could upload "../../etc/passwd"
+```
+
+## Good Patterns
+
+```typescript
+// Good: Comprehensive file validation
+const ALLOWED_MIME_TYPES = ["image/jpeg", "image/png", "image/webp"];
+const ALLOWED_EXTENSIONS = [".jpg", ".jpeg", ".png", ".webp"];
+const MAX_FILE_SIZE = 5 * 1024 * 1024; // 5MB
+
+async function uploadFile(req: Request): Promise {
+ let formData = await req.formData();
+ let file = formData.get("file") as File;
+
+ if (!file) {
+ return new Response("No file provided", { status: 400 });
+ }
+
+ if (!ALLOWED_MIME_TYPES.includes(file.type)) {
+ return new Response("Invalid file type", { status: 400 });
+ }
+
+ if (file.size > MAX_FILE_SIZE) {
+ return new Response("File too large", { status: 400 });
+ }
+
+ let ext = path.extname(file.name).toLowerCase();
+ if (!ALLOWED_EXTENSIONS.includes(ext)) {
+ return new Response("Invalid file extension", { status: 400 });
+ }
+
+ // Generate safe random filename
+ let randomName = crypto.randomBytes(16).toString("hex");
+ let safeFilename = `${randomName}${ext}`;
+
+ // Store outside web root
+ let uploadPath = path.join(process.cwd(), "private", "uploads", safeFilename);
+
+ let buffer = await file.arrayBuffer();
+ await writeFile(uploadPath, Buffer.from(buffer));
+
+ // Store metadata
+ let uploadedFile = await db.file.create({
+ data: {
+ filename: safeFilename,
+ originalName: file.name.slice(0, 255),
+ mimeType: file.type,
+ size: file.size,
+ uploadedAt: new Date(),
+ },
+ });
+
+ return Response.json(uploadedFile, { status: 201 });
+}
+```
+
+## Rules
+
+1. **Validate MIME type** - Check file.type
+2. **Validate extension** - Check file extension
+3. **Enforce size limits** - Prevent huge uploads
+4. **Generate random filenames** - Don't use user input
+5. **Store outside web root** - Not in public/
+6. **Validate both MIME and extension** - Double check
diff --git a/.agents/skills/owasp-security-check/rules/injection-attacks.md b/.agents/skills/owasp-security-check/rules/injection-attacks.md
new file mode 100644
index 0000000000000000000000000000000000000000..9c3eab894dacfaa0c1b2d30854a0d0d2015ec081
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/injection-attacks.md
@@ -0,0 +1,103 @@
+---
+title: Injection Attack Prevention
+impact: CRITICAL
+tags: [injection, sql, xss, nosql, command-injection, path-traversal, owasp-a03]
+---
+
+# Injection Attack Prevention
+
+Check for SQL injection, XSS, NoSQL injection, Command injection, and Path Traversal through proper input validation and output encoding.
+
+> **Related:** XSS headers in [security-headers.md](security-headers.md). File upload path traversal in [file-upload-security.md](file-upload-security.md).
+
+## Why
+
+- **Data breach**: SQL/NoSQL injection exposes entire databases
+- **Account takeover**: XSS steals session cookies and credentials
+- **Remote code execution**: Command injection compromises servers
+- **Data manipulation**: Unauthorized modification or deletion
+
+## What to Check
+
+- [ ] String concatenation or template literals in database queries
+- [ ] User input rendered in HTML without escaping
+- [ ] User input passed to shell commands (`exec`, `spawn` with `shell: true`)
+- [ ] User input used in file paths without validation
+- [ ] Dynamic code execution (`eval`, `Function` constructor, `setTimeout` with strings)
+- [ ] `dangerouslySetInnerHTML` or `.innerHTML` with user content
+- [ ] NoSQL queries accepting raw objects with `$where`, `$regex`, `$ne` operators
+
+## Bad Patterns
+
+```typescript
+// Bad: SQL injection
+const query = `SELECT * FROM users WHERE email = '${email}'`;
+
+// Bad: XSS via dangerouslySetInnerHTML
+
+
+// Bad: Command injection
+execSync(`convert ${filename} output.jpg`);
+
+// Bad: Path traversal
+const content = await fs.readFile(`./uploads/${filename}`, "utf-8");
+```
+
+## Good Patterns
+
+```typescript
+// Good: Parameterized query
+async function getUser(req: Request): Promise {
+ let url = new URL(req.url);
+ let email = url.searchParams.get("email");
+
+ let user = await db.users.findUnique({ where: { email } });
+ return Response.json(user);
+}
+
+// Good: React auto-escapes by default
+function UserComment({ comment }: { comment: string }) {
+ return {comment}
;
+}
+
+// Good: Avoid shell commands, validate strictly
+async function convertImage(req: Request): Promise {
+ let formData = await req.formData();
+ let file = formData.get("file") as File;
+
+ let ALLOWED = ["image/jpeg", "image/png", "image/webp"];
+
+ if (!ALLOWED.includes(file.type)) {
+ return new Response("Invalid type", { status: 400 });
+ }
+
+ let buffer = await file.arrayBuffer();
+ // Use image library, not shell
+ return new Response("Uploaded", { status: 200 });
+}
+
+// Good: Allowlist for file paths
+async function readFile(req: Request): Promise {
+ let url = new URL(req.url);
+ let filename = url.searchParams.get("file");
+
+ let ALLOWED = ["terms.pdf", "privacy.pdf", "guide.pdf"];
+
+ if (!filename || !ALLOWED.includes(filename) || filename.includes("..")) {
+ return new Response("Invalid file", { status: 400 });
+ }
+
+ let content = await fs.readFile(`./documents/${filename}`, "utf-8");
+ return new Response(content);
+}
+```
+
+## Rules
+
+1. **Always use parameterized queries** - Never concatenate user input into SQL
+2. **Validate all input** - Use type checks and format validation
+3. **Escape output by context** - HTML, JavaScript, SQL require different escaping
+4. **Use allowlists over denylists** - Explicitly allow known-good values
+5. **Never use eval()** - Find safe alternatives for dynamic execution
+6. **Avoid shell commands** - Use libraries or built-in APIs instead
+7. **Validate file paths** - Prevent directory traversal with strict validation
diff --git a/.agents/skills/owasp-security-check/rules/insecure-design.md b/.agents/skills/owasp-security-check/rules/insecure-design.md
new file mode 100644
index 0000000000000000000000000000000000000000..1930d9042d4ac40645565f6e51689bcf5b7f5be1
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/insecure-design.md
@@ -0,0 +1,142 @@
+---
+title: Insecure Design
+impact: HIGH
+tags: [design, architecture, threat-modeling, owasp-a04]
+---
+
+# Insecure Design
+
+Check for security anti-patterns and flaws in application architecture that can't be fixed by implementation alone.
+
+## Why
+
+- **Fundamental flaws**: Can't be patched, require redesign
+- **Business logic bypass**: Attackers exploit workflow flaws
+- **Privilege escalation**: Design allows unauthorized access
+- **Data corruption**: Race conditions and logic errors
+
+## What to Check
+
+**Vulnerability Indicators:**
+
+- [ ] Security by obscurity instead of proper access control
+- [ ] Missing rate limiting on expensive operations
+- [ ] No input validation on business logic
+- [ ] Race conditions in multi-step workflows
+- [ ] Trust boundaries not defined
+- [ ] Missing defense in depth
+- [ ] No threat modeling performed
+
+## Bad Patterns
+
+```typescript
+// Bad: Security by obscurity
+if (req.headers.get("x-admin-secret") === "admin123") {
+ // Admin operations
+}
+
+// Bad: Race condition in balance check
+const balance = await getBalance(from);
+if (balance >= amount) {
+ // Race: balance could change here!
+ await updateBalance(from, balance - amount);
+}
+
+// Bad: No rate limiting
+async function generateReport(req: Request): Promise {
+ const report = await runExpensiveQuery(); // Can DoS
+ return new Response(report);
+}
+
+// Bad: Trust user role from client
+const { isAdmin } = await req.json();
+if (isAdmin) {
+ await db.users.delete({ where: { id } }); // User can claim admin!
+}
+```
+
+## Good Patterns
+
+```typescript
+// Good: Proper RBAC
+async function adminEndpoint(req: Request): Promise {
+ let session = await getSession(req);
+ let user = await db.users.findUnique({
+ where: { id: session.userId },
+ select: { role: true },
+ });
+
+ if (user.role !== "ADMIN") {
+ return new Response("Forbidden", { status: 403 });
+ }
+
+ // Admin operations
+}
+
+// Good: Transaction for atomic operations
+async function transferMoney(from: string, to: string, amount: number) {
+ await db.$transaction(async (tx) => {
+ let fromAccount = await tx.account.findUnique({
+ where: { id: from },
+ select: { balance: true },
+ });
+
+ if (!fromAccount || fromAccount.balance < amount) {
+ throw new Error("Insufficient funds");
+ }
+
+ await tx.account.update({
+ where: { id: from },
+ data: { balance: { decrement: amount } },
+ });
+
+ await tx.account.update({
+ where: { id: to },
+ data: { balance: { increment: amount } },
+ });
+ });
+}
+
+// Good: Rate limiting on expensive operations
+async function generateReport(req: Request): Promise {
+ let session = await getSession(req);
+
+ let { success } = await reportLimit.limit(session.userId);
+ if (!success) {
+ return new Response("Rate limit exceeded", { status: 429 });
+ }
+
+ let report = await runExpensiveQuery();
+ return new Response(report);
+}
+
+// Good: Server-side role verification
+async function deleteUser(req: Request): Promise {
+ let session = await getSession(req);
+
+ let user = await db.users.findUnique({
+ where: { id: session.userId },
+ select: { role: true },
+ });
+
+ if (user.role !== "ADMIN") {
+ return new Response("Forbidden", { status: 403 });
+ }
+
+ let { targetUserId } = await req.json();
+ await db.users.delete({ where: { id: targetUserId } });
+
+ return new Response("Deleted");
+}
+```
+
+## Rules
+
+1. **Don't rely on security by obscurity** - Use proper authentication
+2. **Use transactions for atomic operations** - Prevent race conditions
+3. **Rate limit expensive operations** - Prevent resource exhaustion
+4. **Verify privileges server-side** - Never trust client data
+5. **Implement defense in depth** - Multiple layers of security
+6. **Perform threat modeling** - Identify risks in design phase
+7. **Define trust boundaries** - Know what to validate
+8. **Fail securely** - Default deny, not default allow
diff --git a/.agents/skills/owasp-security-check/rules/logging-monitoring.md b/.agents/skills/owasp-security-check/rules/logging-monitoring.md
new file mode 100644
index 0000000000000000000000000000000000000000..4adb57fb63e3e834be5584982d38d736bdb82dd1
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/logging-monitoring.md
@@ -0,0 +1,151 @@
+---
+title: Security Logging and Monitoring Failures
+impact: MEDIUM
+tags: [logging, monitoring, incident-response, owasp-a09]
+---
+
+# Security Logging and Monitoring Failures
+
+Check for insufficient logging of security events, missing monitoring, and lack of incident response capabilities.
+
+> **Related:** Preventing sensitive data in logs is covered in [sensitive-data-exposure.md](sensitive-data-exposure.md).
+
+## Why
+
+- **Delayed breach detection**: Attacks go unnoticed for months
+- **No audit trail**: Can't investigate incidents
+- **Compliance violations**: Regulations require logging
+- **Unable to respond**: No visibility into attacks
+
+## What to Check
+
+**Vulnerability Indicators:**
+
+- [ ] No logging of authentication attempts
+- [ ] Sensitive data in logs (passwords, tokens)
+- [ ] No monitoring or alerting on suspicious activity
+- [ ] Logs not retained long enough
+- [ ] No log integrity protection
+- [ ] Missing request IDs for tracing
+
+## Bad Patterns
+
+```typescript
+// Bad: No logging of security events
+async function login(req: Request): Promise {
+ let { email, password } = await req.json();
+
+ let user = await authenticate(email, password);
+
+ if (!user) {
+ // No logging of failed attempt
+ return new Response("Invalid credentials", { status: 401 });
+ }
+
+ return createSession(user);
+}
+
+// Bad: Logging sensitive data
+console.log("User data:", {
+ email,
+ password, // Don't log passwords!
+ creditCard,
+});
+
+// Bad: No structured logging
+console.log("User logged in");
+```
+
+## Good Patterns
+
+```typescript
+// Good: Log security events with context
+async function login(req: Request): Promise {
+ let { email, password } = await req.json();
+ let ip = req.headers.get("x-forwarded-for");
+
+ let user = await authenticate(email, password);
+
+ if (!user) {
+ logger.warn("Failed login", {
+ email,
+ ip,
+ timestamp: new Date().toISOString(),
+ });
+ return new Response("Invalid credentials", { status: 401 });
+ }
+
+ logger.info("Successful login", { userId: user.id, email, ip });
+ return createSession(user);
+}
+
+// Good: Structured logging with sanitization
+function createLogger() {
+ let sensitiveKeys = ["password", "token", "secret", "apiKey"];
+
+ function sanitize(obj: any): any {
+ if (typeof obj !== "object" || obj === null) return obj;
+ let sanitized: any = {};
+ for (const [key, value] of Object.entries(obj)) {
+ sanitized[key] = sensitiveKeys.some((sk) =>
+ key.toLowerCase().includes(sk),
+ )
+ ? "[REDACTED]"
+ : typeof value === "object"
+ ? sanitize(value)
+ : value;
+ }
+ return sanitized;
+ }
+
+ return {
+ info(message: string, context?: Record) {
+ console.log(
+ JSON.stringify({
+ level: "info",
+ message,
+ context: context ? sanitize(context) : undefined,
+ timestamp: new Date().toISOString(),
+ }),
+ );
+ },
+ warn(message: string, context?: Record) {
+ console.warn(
+ JSON.stringify({
+ level: "warn",
+ message,
+ context: context ? sanitize(context) : undefined,
+ timestamp: new Date().toISOString(),
+ }),
+ );
+ },
+ error(message: string, error: Error, context?: Record) {
+ console.error(
+ JSON.stringify({
+ level: "error",
+ message,
+ context: {
+ error: error.message,
+ stack: error.stack,
+ ...sanitize(context || {}),
+ },
+ timestamp: new Date().toISOString(),
+ }),
+ );
+ },
+ };
+}
+
+const logger = createLogger();
+```
+
+## Rules
+
+1. **Log all authentication events** - Successes and failures
+2. **Log authorization failures** - When access is denied
+3. **Don't log sensitive data** - Sanitize passwords, tokens, PII
+4. **Use structured logging** - JSON format for parsing
+5. **Include context** - User ID, IP, timestamp, request ID
+6. **Monitor and alert** - Set up alerts for suspicious patterns
+7. **Retain logs appropriately** - Balance storage and compliance
+8. **Protect log integrity** - Prevent tampering
diff --git a/.agents/skills/owasp-security-check/rules/rate-limiting.md b/.agents/skills/owasp-security-check/rules/rate-limiting.md
new file mode 100644
index 0000000000000000000000000000000000000000..a1096ce64d213fcac6642395f0c0e1c497c260c2
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/rate-limiting.md
@@ -0,0 +1,127 @@
+---
+title: Rate Limiting and DoS Prevention
+impact: MEDIUM
+tags: [rate-limiting, dos, brute-force]
+---
+
+# Rate Limiting and DoS Prevention
+
+Check for rate limiting on authentication endpoints, APIs, and resource-intensive operations to prevent abuse and denial of service.
+
+> **Related:** Authentication rate limiting is covered in [authentication-failures.md](authentication-failures.md). API rate limiting is covered in [api-security.md](api-security.md).
+
+## Why
+
+- **Brute force prevention**: Stop password guessing attacks
+- **Resource exhaustion**: Prevent server overload
+- **Cost control**: Limit API abuse and costs
+- **Fair usage**: Ensure availability for all users
+
+## What to Check
+
+**Vulnerability Indicators:**
+
+- [ ] No rate limiting on login/signup endpoints
+- [ ] No rate limiting on password reset
+- [ ] Unlimited API requests
+- [ ] No throttling on expensive operations
+- [ ] Missing 429 (Too Many Requests) responses
+
+## Bad Patterns
+
+```typescript
+// Bad: No rate limiting on login
+async function login(req: Request): Promise {
+ let { email, password } = await req.json();
+
+ // Allows unlimited login attempts
+ let user = await authenticate(email, password);
+
+ if (!user) {
+ return new Response("Invalid credentials", { status: 401 });
+ }
+
+ return createSession(user);
+}
+
+// Bad: No API rate limiting
+async function apiEndpoint(req: Request): Promise {
+ // Can be called unlimited times
+ let data = await expensiveQuery();
+ return Response.json(data);
+}
+```
+
+## Good Patterns
+
+```typescript
+// Good: Rate limiting with Redis
+const loginRateLimit = new Ratelimit({
+ redis,
+ limiter: Ratelimit.slidingWindow(5, "15m"), // 5 attempts per 15 min
+ analytics: true,
+});
+
+async function login(req: Request): Promise {
+ let ip = req.headers.get("x-forwarded-for") || "unknown";
+
+ let { success, limit, remaining, reset } = await loginRateLimit.limit(ip);
+
+ if (!success) {
+ return new Response("Too many login attempts", {
+ status: 429,
+ headers: {
+ "Retry-After": String(Math.ceil((reset - Date.now()) / 1000)),
+ "X-RateLimit-Limit": String(limit),
+ "X-RateLimit-Remaining": String(remaining),
+ "X-RateLimit-Reset": String(reset),
+ },
+ });
+ }
+
+ let { email, password } = await req.json();
+ let user = await authenticate(email, password);
+
+ if (!user) {
+ return new Response("Invalid credentials", { status: 401 });
+ }
+
+ return createSession(user);
+}
+
+// Good: Per-user API rate limiting
+const apiRateLimit = new Ratelimit({
+ redis,
+ limiter: Ratelimit.slidingWindow(100, "1h"),
+});
+
+async function apiEndpoint(req: Request): Promise {
+ let session = await getSession(req);
+ if (!session) return new Response("Unauthorized", { status: 401 });
+
+ let { success } = await apiRateLimit.limit(session.userId);
+ if (!success) return new Response("Rate limit exceeded", { status: 429 });
+
+ let data = await performOperation();
+ return Response.json(data);
+}
+
+// Good: Tiered rate limiting
+function getRateLimit(tier: string): Ratelimit {
+ let limits = {
+ free: Ratelimit.slidingWindow(10, "1h"),
+ pro: Ratelimit.slidingWindow(100, "1h"),
+ enterprise: Ratelimit.slidingWindow(1000, "1h"),
+ };
+ return new Ratelimit({ redis, limiter: limits[tier] || limits.free });
+}
+```
+
+## Rules
+
+1. **Rate limit auth endpoints** - Prevent brute force
+2. **Per-IP and per-user limits** - Multiple layers
+3. **Return 429 status** - Standard rate limit response
+4. **Include retry headers** - Retry-After, X-RateLimit-\*
+5. **Different limits for tiers** - Free vs paid users
+6. **Rate limit expensive operations** - Reports, exports, search
diff --git a/.agents/skills/owasp-security-check/rules/redirect-validation.md b/.agents/skills/owasp-security-check/rules/redirect-validation.md
new file mode 100644
index 0000000000000000000000000000000000000000..242e72b07915043d2b01cf3650f2a76c04816fab
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/redirect-validation.md
@@ -0,0 +1,110 @@
+---
+title: Open Redirect Prevention
+impact: MEDIUM
+tags: [redirects, phishing, open-redirect]
+---
+
+# Open Redirect Prevention
+
+Check for unvalidated redirect and forward URLs that could be used for phishing attacks.
+
+> **Related:** SSRF prevention (server-side URL validation) is covered in [ssrf-attacks.md](ssrf-attacks.md).
+
+## Why
+
+- **Phishing attacks**: Legitimate domain redirects to malicious site
+- **Credential theft**: Users trust your domain and enter credentials
+- **OAuth attacks**: Redirect after auth to steal tokens
+- **Trust abuse**: Your domain's reputation exploited
+
+## What to Check
+
+**Vulnerability Indicators:**
+
+- [ ] Redirect URLs from query parameters
+- [ ] No validation of redirect target
+- [ ] External redirects allowed without warning
+- [ ] OAuth return_uri not validated
+
+## Bad Patterns
+
+```typescript
+// Bad: Unvalidated redirect
+async function callback(req: Request): Promise {
+ let url = new URL(req.url);
+ let returnUrl = url.searchParams.get("return");
+
+ // Attacker can set return=https://evil.com
+ return Response.redirect(returnUrl!);
+}
+
+// Bad: No validation on OAuth callback
+async function oauthCallback(req: Request): Promise {
+ let url = new URL(req.url);
+ let redirectUri = url.searchParams.get("redirect_uri");
+
+ // Complete OAuth flow...
+
+ return Response.redirect(redirectUri!);
+}
+```
+
+## Good Patterns
+
+```typescript
+// Good: Validate against allowlist
+const ALLOWED_REDIRECTS = ["/dashboard", "/profile", "/settings"];
+
+async function callback(req: Request): Promise {
+ let url = new URL(req.url);
+ let returnUrl = url.searchParams.get("return") || "/";
+
+ if (!ALLOWED_REDIRECTS.includes(returnUrl)) {
+ return Response.redirect("/");
+ }
+
+ return Response.redirect(returnUrl);
+}
+
+// Good: Validate URL is relative
+function isValidRedirect(url: string): boolean {
+ return url.startsWith("/") && !url.startsWith("//");
+}
+
+async function callback(req: Request): Promise {
+ let url = new URL(req.url);
+ let returnUrl = url.searchParams.get("return") || "/";
+
+ if (!isValidRedirect(returnUrl)) {
+ return Response.redirect("/");
+ }
+
+ return Response.redirect(returnUrl);
+}
+
+// Good: Validate OAuth redirect_uri
+const ALLOWED_OAUTH_REDIRECTS = [
+ "https://app.example.com/callback",
+ "https://admin.example.com/callback",
+];
+
+async function oauthCallback(req: Request): Promise {
+ let url = new URL(req.url);
+ let redirectUri = url.searchParams.get("redirect_uri");
+
+ if (!redirectUri || !ALLOWED_OAUTH_REDIRECTS.includes(redirectUri)) {
+ return new Response("Invalid redirect_uri", { status: 400 });
+ }
+
+ // Complete OAuth flow...
+ return Response.redirect(redirectUri);
+}
+```
+
+## Rules
+
+1. **Validate redirect URLs** - Use allowlist
+2. **Only allow relative URLs** - Starts with / not //
+3. **Never trust user input** - For redirect targets
+4. **Validate OAuth redirects** - Pre-registered URIs only
+5. **Default to safe redirect** - Home page if invalid
diff --git a/.agents/skills/owasp-security-check/rules/secrets-management.md b/.agents/skills/owasp-security-check/rules/secrets-management.md
new file mode 100644
index 0000000000000000000000000000000000000000..73042a291f159421f0334f1d168c110defbd863b
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/secrets-management.md
@@ -0,0 +1,111 @@
+---
+title: Secrets Management
+impact: CRITICAL
+tags: [secrets, api-keys, environment-variables, credentials]
+---
+
+# Secrets Management
+
+Check for hardcoded secrets, exposed API keys, and improper credential management.
+
+> **Related:** Encryption key management in [cryptographic-failures.md](cryptographic-failures.md). Sensitive data exposure in [sensitive-data-exposure.md](sensitive-data-exposure.md).
+
+## Why
+
+- **Credential exposure**: API keys in code can be stolen
+- **Repository leaks**: Committed secrets in Git history
+- **Unauthorized access**: Exposed keys grant system access
+- **Compliance violations**: Regulations require secret protection
+
+## What to Check
+
+- [ ] Hardcoded API keys, passwords, tokens in code
+- [ ] Secrets committed to version control
+- [ ] .env files committed to repository
+- [ ] API keys in client-side code
+- [ ] Secrets in logs or error messages
+- [ ] No secret rotation policy
+
+## Bad Patterns
+
+```typescript
+// Bad: Hardcoded API key
+const STRIPE_SECRET_KEY = "sk_live_51H..."; // VULNERABLE!
+
+// Bad: Hardcoded database password
+const db = createConnection({
+ host: "localhost",
+ user: "admin",
+ password: "SuperSecret123!" // VULNERABLE!
+});
+
+// Bad: Secret in client-side code
+const config = {
+ apiKey: "AIzaSyB..." // VULNERABLE: Exposed in browser
+};
+
+// Bad: .env file committed to Git
+// .env (in repository) - VULNERABLE!
+DATABASE_URL=postgresql://user:password@localhost/db
+API_SECRET=my-secret-key
+
+// Bad: Logging secrets
+console.log("Connecting with API key:", process.env.API_KEY);
+```
+
+## Good Patterns
+
+```typescript
+// Good: Use environment variables
+const STRIPE_SECRET_KEY = process.env.STRIPE_SECRET_KEY;
+
+if (!STRIPE_SECRET_KEY) {
+ throw new Error("STRIPE_SECRET_KEY not set");
+}
+
+// Good: Validate env vars at startup
+function validateEnv() {
+ let required = ["DATABASE_URL", "JWT_SECRET", "STRIPE_SECRET_KEY"];
+ let missing = required.filter((key) => !process.env[key]);
+ if (missing.length > 0) {
+ throw new Error(`Missing env vars: ${missing.join(", ")}`);
+ }
+}
+
+// Good: Add .env to .gitignore (never commit secrets)
+// Good: Provide .env.example for documentation (safe to commit)
+
+// Good: Secret rotation
+async function rotateApiKey(userId: string) {
+ let newKey = crypto.randomBytes(32).toString("hex");
+ await db.apiKeys.create({
+ data: {
+ userId,
+ key: newKey,
+ expiresAt: new Date(Date.now() + 90 * 24 * 60 * 60 * 1000),
+ },
+ });
+ return newKey;
+}
+
+// Good: Use secret management service
+async function getSecret(name: string): Promise {
+ if (process.env.NODE_ENV === "production") {
+ return await secretsManager.getSecretValue(name);
+ }
+ let value = process.env[name];
+ if (!value) throw new Error(`Secret ${name} not found`);
+ return value;
+}
+```
+
+## Rules
+
+1. **Never hardcode secrets** - Use environment variables or secret managers
+2. **Add .env to .gitignore** - Never commit secret files
+3. **Rotate secrets regularly** - Implement expiration and rotation
+4. **Validate env vars at startup** - Fail fast if secrets missing
+5. **Don't log secrets** - Sanitize logs to remove sensitive values
+6. **No secrets in client code** - Keep API keys server-side only
+7. **Use secret management services** - For production (AWS Secrets Manager, Vault, etc.)
+8. **Scan Git history** - Use tools to find accidentally committed secrets
diff --git a/.agents/skills/owasp-security-check/rules/security-headers.md b/.agents/skills/owasp-security-check/rules/security-headers.md
new file mode 100644
index 0000000000000000000000000000000000000000..40132b336a484b9fb5db13d04404ce31277126b2
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/security-headers.md
@@ -0,0 +1,120 @@
+---
+title: Security Headers
+impact: HIGH
+tags: [headers, csp, hsts, xss, clickjacking, owasp]
+---
+
+# Security Headers
+
+Check for proper HTTP security headers that protect against XSS, clickjacking, MIME sniffing, and downgrade attacks.
+
+> **Related:** XSS input validation in [injection-attacks.md](injection-attacks.md). CORS in [cors-configuration.md](cors-configuration.md).
+
+## Why
+
+- **XSS protection**: CSP prevents script injection
+- **Clickjacking prevention**: X-Frame-Options stops iframe embedding
+- **HTTPS enforcement**: HSTS ensures encrypted connections
+- **MIME sniffing attacks**: X-Content-Type-Options prevents content confusion
+- **Information leakage**: Referrer-Policy controls referrer data
+
+## What to Check
+
+- [ ] Missing Content-Security-Policy header
+- [ ] Missing Strict-Transport-Security (HSTS)
+- [ ] Missing X-Frame-Options
+- [ ] Missing X-Content-Type-Options
+- [ ] Overly permissive CSP (`unsafe-inline`, `unsafe-eval`)
+- [ ] No Permissions-Policy
+- [ ] Missing Referrer-Policy
+
+## Bad Patterns
+
+```typescript
+// Bad: No security headers
+async function handler(req: Request): Promise {
+ let html = "Hello";
+
+ // VULNERABLE: Missing all security headers
+ return new Response(html, {
+ headers: { "Content-Type": "text/html" },
+ });
+}
+
+// Bad: Permissive CSP
+const headers = {
+ // VULNERABLE: unsafe-inline allows XSS
+ "Content-Security-Policy": "default-src * 'unsafe-inline' 'unsafe-eval'",
+};
+```
+
+## Good Patterns
+
+```typescript
+// Good: Comprehensive security headers
+function getSecurityHeaders(): Record {
+ return {
+ "Content-Security-Policy": [
+ "default-src 'self'",
+ "script-src 'self'",
+ "style-src 'self' 'unsafe-inline'",
+ "img-src 'self' data: https:",
+ "font-src 'self'",
+ "connect-src 'self'",
+ "frame-ancestors 'none'",
+ "base-uri 'self'",
+ "form-action 'self'",
+ ].join("; "),
+ "X-Frame-Options": "DENY",
+ "Strict-Transport-Security": "max-age=31536000; includeSubDomains; preload",
+ "X-Content-Type-Options": "nosniff",
+ "Referrer-Policy": "strict-origin-when-cross-origin",
+ "Permissions-Policy": "camera=(), microphone=(), geolocation=()",
+ };
+}
+
+async function handler(req: Request): Promise {
+ let html = "Hello";
+
+ return new Response(html, {
+ headers: {
+ "Content-Type": "text/html",
+ ...getSecurityHeaders(),
+ },
+ });
+}
+
+// Good: CSP with nonces for inline scripts
+async function renderPage(req: Request): Promise {
+ let nonce = crypto.randomBytes(16).toString("base64");
+
+ let html = `
+
+
+
+
+
+ Content
+
+ `;
+
+ return new Response(html, {
+ headers: {
+ "Content-Type": "text/html",
+ "Content-Security-Policy": `default-src 'self'; script-src 'self' 'nonce-${nonce}'`,
+ },
+ });
+}
+```
+
+## Rules
+
+1. **Always set CSP** - Strict policy without `unsafe-inline`/`unsafe-eval`
+2. **Enable HSTS** - Minimum 1 year, include subdomains
+3. **Set X-Frame-Options** - Use `DENY` or `SAMEORIGIN`
+4. **Set X-Content-Type-Options** - Always `nosniff`
+5. **Configure Referrer-Policy** - `strict-origin-when-cross-origin`
+6. **Use nonces for inline scripts** - When inline scripts are needed
+7. **Set Permissions-Policy** - Restrict unnecessary browser features
diff --git a/.agents/skills/owasp-security-check/rules/security-misconfiguration.md b/.agents/skills/owasp-security-check/rules/security-misconfiguration.md
new file mode 100644
index 0000000000000000000000000000000000000000..86ac1b81084cb57eac183735f4c91f68a4eda8d4
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/security-misconfiguration.md
@@ -0,0 +1,110 @@
+---
+title: Security Misconfiguration
+impact: HIGH
+tags: [configuration, defaults, error-handling, owasp-a05]
+---
+
+# Security Misconfiguration
+
+Check for insecure default configurations, unnecessary features enabled, verbose error messages, and missing security patches.
+
+## Why
+
+- **Information disclosure**: Verbose errors reveal system details
+- **Unauthorized access**: Default credentials still active
+- **Attack surface**: Unnecessary features expose vulnerabilities
+- **Known vulnerabilities**: Outdated software with public exploits
+
+## What to Check
+
+**Vulnerability Indicators:**
+
+- [ ] Debug mode enabled in production
+- [ ] Default credentials not changed
+- [ ] Unnecessary features/endpoints enabled
+- [ ] Detailed error messages in production
+- [ ] Directory listing enabled
+- [ ] Outdated dependencies
+- [ ] Missing security patches
+
+## Bad Patterns
+
+```typescript
+// Bad: Debug mode in production
+const DEBUG = true; // Should be from env
+if (DEBUG) {
+ console.log("Detailed system info:", process.env);
+}
+
+// Bad: Verbose error messages
+catch (error) {
+ return Response.json({
+ error: error.message,
+ stack: error.stack,
+ query: sqlQuery,
+ env: process.env
+ }, { status: 500 });
+}
+
+// Bad: Default credentials
+const ADMIN_PASSWORD = "admin123";
+
+// Bad: Unnecessary admin endpoints exposed
+async function debugInfo(req: Request): Promise {
+ return Response.json({
+ env: process.env,
+ config: appConfig,
+ routes: allRoutes
+ });
+}
+```
+
+## Good Patterns
+
+```typescript
+// Good: Environment-aware configuration
+const isProduction = process.env.NODE_ENV === "production";
+
+const config = {
+ debug: !isProduction,
+ logLevel: isProduction ? "error" : "debug",
+ errorDetails: !isProduction
+};
+
+// Good: Generic error messages in production
+catch (error) {
+ console.error("Error:", error);
+
+ let message = isProduction
+ ? "An error occurred"
+ : error.message;
+
+ return Response.json({ error: message }, { status: 500 });
+}
+
+// Good: Strong credentials from environment
+const ADMIN_PASSWORD = process.env.ADMIN_PASSWORD;
+if (!ADMIN_PASSWORD || ADMIN_PASSWORD.length < 20) {
+ throw new Error("ADMIN_PASSWORD must be set and strong");
+}
+
+// Good: Disable debug endpoints in production
+async function debugInfo(req: Request): Promise {
+ if (process.env.NODE_ENV === "production") {
+ return new Response("Not found", { status: 404 });
+ }
+
+ return Response.json({ routes: publicRoutes });
+}
+```
+
+## Rules
+
+1. **Disable debug mode in production** - No verbose logging or errors
+2. **Change default credentials** - Require strong passwords
+3. **Disable unnecessary features** - Minimize attack surface
+4. **Generic error messages** - Don't reveal system details
+5. **Keep dependencies updated** - Regularly patch vulnerabilities
+6. **Remove development endpoints** - No debug/admin routes in production
+7. **Secure default configurations** - Fail securely by default
+8. **Regular security audits** - npm audit, dependency checks
diff --git a/.agents/skills/owasp-security-check/rules/sensitive-data-exposure.md b/.agents/skills/owasp-security-check/rules/sensitive-data-exposure.md
new file mode 100644
index 0000000000000000000000000000000000000000..55bc12f1a6ec1c126f57ac5c896eb9c08798f92b
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/sensitive-data-exposure.md
@@ -0,0 +1,133 @@
+---
+title: Sensitive Data Exposure
+impact: CRITICAL
+tags: [data-exposure, pii, privacy, information-disclosure, owasp]
+---
+
+# Sensitive Data Exposure
+
+Check for PII, credentials, and sensitive data exposed in API responses, error messages, logs, or client-side code.
+
+> **Related:** Encryption in [cryptographic-failures.md](cryptographic-failures.md). Secrets in [secrets-management.md](secrets-management.md). Logging in [logging-monitoring.md](logging-monitoring.md).
+
+## Why
+
+- **Privacy violation**: Exposes users' personal information
+- **Compliance risk**: GDPR, CCPA, HIPAA violations
+- **Identity theft**: PII enables fraud and impersonation
+- **Credential theft**: Exposed secrets enable account takeover
+
+## What to Check
+
+- [ ] Password hashes returned in API responses
+- [ ] Email, phone, SSN in public endpoints
+- [ ] Error messages revealing stack traces or database info
+- [ ] Debug information in production
+- [ ] API keys, tokens in client-side code
+- [ ] Excessive data in responses (return only what's needed)
+- [ ] Sensitive data logged to console or files
+
+## Bad Patterns
+
+```typescript
+// Bad: Returning all user fields including sensitive data
+async function getUser(req: Request): Promise {
+ let user = await db.users.findUnique({ where: { id } });
+ // Returns password hash, email, tokens, etc.
+ return Response.json(user);
+}
+
+// Bad: Logging sensitive data
+console.log("User login:", { email, password, creditCard });
+
+// Bad: Exposing internal IDs
+return Response.json({
+ internalUserId: user.id,
+ databaseId: user.dbId,
+});
+```
+
+## Good Patterns
+
+```typescript
+// Good: Explicit field selection
+async function getUser(req: Request): Promise {
+ let session = await getSession(req);
+
+ let user = await db.users.findUnique({
+ where: { id: session.userId },
+ select: {
+ id: true,
+ name: true,
+ avatar: true,
+ createdAt: true,
+ // Excludes: password, email, tokens, etc.
+ },
+ });
+
+ return Response.json(user);
+}
+
+// Good: DTO for public profiles
+async function getUserProfile(req: Request): Promise {
+ let url = new URL(req.url);
+ let userId = url.searchParams.get("id");
+
+ let user = await db.users.findUnique({
+ where: { id: userId },
+ select: { id: true, name: true, avatar: true, bio: true },
+ });
+
+ return Response.json(user);
+}
+
+// Good: Conditional field exposure
+async function getUserProfile(req: Request): Promise {
+ let session = await getSession(req);
+ let url = new URL(req.url);
+ let userId = url.searchParams.get("id");
+ let isOwn = session?.userId === userId;
+
+ let user = await db.users.findUnique({
+ where: { id: userId },
+ select: {
+ id: true,
+ name: true,
+ avatar: true,
+ bio: true,
+ email: isOwn,
+ emailVerified: isOwn,
+ },
+ });
+
+ return Response.json(user);
+}
+
+// Good: Sanitize logs
+function sanitizeForLogging(obj: any): any {
+ let sensitive = ["password", "token", "secret", "apiKey", "creditCard"];
+ let sanitized = { ...obj };
+
+ for (const key of Object.keys(sanitized)) {
+ if (sensitive.some((s) => key.toLowerCase().includes(s))) {
+ sanitized[key] = "[REDACTED]";
+ }
+ }
+
+ return sanitized;
+}
+
+console.log("Login attempt:", sanitizeForLogging({ email, password }));
+// Output: { email: "user@example.com", password: "[REDACTED]" }
+```
+
+## Rules
+
+1. **Never return password hashes** - Even hashed, they can be cracked
+2. **Use explicit field selection** - Don't return entire database records
+3. **Create DTOs for responses** - Define exactly what fields are public
+4. **Generic error messages** - Don't expose system details to users
+5. **Log full errors server-side** - Return generic messages to clients
+6. **Sanitize logs** - Redact passwords, tokens, PII before logging
+7. **Different views for different users** - Own profile vs others' profiles
+8. **Disable debug in production** - No verbose errors or stack traces
diff --git a/.agents/skills/owasp-security-check/rules/session-security.md b/.agents/skills/owasp-security-check/rules/session-security.md
new file mode 100644
index 0000000000000000000000000000000000000000..3a871d3c8f0e255498e7ff9bbda754e5c3a0f1bb
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/session-security.md
@@ -0,0 +1,119 @@
+---
+title: Session Security
+impact: HIGH
+tags: [sessions, cookies, jwt, tokens]
+---
+
+# Session Security
+
+Check for secure session management including cookie flags, token storage, and session lifecycle.
+
+> **Related:** Authentication is covered in [authentication-failures.md](authentication-failures.md). CSRF protection is covered in [csrf-protection.md](csrf-protection.md).
+
+## Why
+
+- **Session hijacking**: Attackers steal session tokens
+- **Session fixation**: Attackers set known session ID
+- **XSS token theft**: JavaScript access to tokens
+- **CSRF attacks**: Missing cookie protection
+
+## What to Check
+
+**Vulnerability Indicators:**
+
+- [ ] Cookies missing HttpOnly flag
+- [ ] Cookies missing Secure flag
+- [ ] Cookies missing SameSite attribute
+- [ ] JWT stored in localStorage
+- [ ] Sessions never expire
+- [ ] Session not regenerated after login
+- [ ] Predictable session IDs
+
+## Bad Patterns
+
+```typescript
+// Bad: No security flags on cookie
+return new Response("OK", {
+ headers: { "Set-Cookie": `session=${sessionId}` },
+});
+
+// Bad: Session never expires
+await db.session.create({
+ data: { id: sessionId, userId }, // No expiresAt!
+});
+
+// Bad: Predictable session ID
+const sessionId = `${Date.now()}-${Math.random()}`;
+```
+
+## Good Patterns
+
+```typescript
+// Good: Secure cookie with all flags
+async function createSession(userId: string): Promise {
+ let sessionId = crypto.randomBytes(32).toString("hex");
+
+ await db.session.create({
+ data: {
+ id: sessionId,
+ userId,
+ expiresAt: new Date(Date.now() + 60 * 60 * 1000), // 1 hour
+ createdAt: new Date(),
+ },
+ });
+
+ return new Response("OK", {
+ headers: {
+ "Set-Cookie": [
+ `session=${sessionId}`,
+ "HttpOnly",
+ "Secure",
+ "SameSite=Strict",
+ "Path=/",
+ "Max-Age=3600",
+ ].join("; "),
+ },
+ });
+}
+
+// Good: Session validation with expiry
+async function validateSession(req: Request): Promise {
+ let sessionId = getCookie(req, "session");
+ if (!sessionId) return null;
+
+ let session = await db.session.findUnique({ where: { id: sessionId } });
+ if (!session || session.expiresAt < new Date()) {
+ if (session) await db.session.delete({ where: { id: sessionId } });
+ return null;
+ }
+
+ // Extend session (sliding expiration)
+ await db.session.update({
+ where: { id: sessionId },
+ data: { expiresAt: new Date(Date.now() + 60 * 60 * 1000) },
+ });
+
+ return session.userId;
+}
+
+// Good: Logout invalidates session
+async function logout(req: Request): Promise {
+ let sessionId = getCookie(req, "session");
+ if (sessionId) await db.session.delete({ where: { id: sessionId } });
+
+ return new Response("OK", {
+ headers: { "Set-Cookie": "session=; Max-Age=0; Path=/" },
+ });
+}
+```
+
+## Rules
+
+1. **Set HttpOnly flag** - Prevent XSS token theft
+2. **Set Secure flag** - HTTPS only
+3. **Set SameSite=Strict** - CSRF protection
+4. **Use cryptographically random IDs** - crypto.randomBytes
+5. **Set expiration** - Both absolute and idle timeout
+6. **Regenerate on login** - Prevent session fixation
+7. **Don't store in localStorage** - Use HttpOnly cookies
+8. **Validate on every request** - Check expiry and validity
diff --git a/.agents/skills/owasp-security-check/rules/ssrf-attacks.md b/.agents/skills/owasp-security-check/rules/ssrf-attacks.md
new file mode 100644
index 0000000000000000000000000000000000000000..0cb106f29606914c41d2c13276537e50d05a356b
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/ssrf-attacks.md
@@ -0,0 +1,100 @@
+---
+title: Server-Side Request Forgery (SSRF)
+impact: CRITICAL
+tags: [ssrf, url-validation, owasp-a10]
+---
+
+# Server-Side Request Forgery (SSRF)
+
+Check for unvalidated URLs that allow attackers to make requests to internal services or arbitrary external URLs.
+
+> **Related:** URL validation in redirects is covered in [redirect-validation.md](redirect-validation.md).
+
+## Why
+
+- **Internal network access**: Attackers reach internal services
+- **Cloud metadata exposure**: Access to AWS/GCP metadata endpoints
+- **Port scanning**: Map internal network
+- **Bypass firewall**: Access protected resources
+
+## What to Check
+
+**Vulnerability Indicators:**
+
+- [ ] User-provided URLs passed to fetch/axios without validation
+- [ ] No allowlist for allowed domains
+- [ ] Missing checks for internal IP ranges
+- [ ] Webhook URLs not validated
+- [ ] URL redirects followed automatically
+
+## Bad Patterns
+
+```typescript
+// Bad: Fetching user-provided URL
+async function fetchUrl(req: Request): Promise {
+ let { url } = await req.json();
+
+ // SSRF: Can access internal services!
+ let response = await fetch(url);
+ let data = await response.text();
+
+ return new Response(data);
+}
+
+// Bad: No validation on webhook URL
+async function registerWebhook(req: Request): Promise {
+ let { webhookUrl } = await req.json();
+
+ await db.webhook.create({
+ data: { url: webhookUrl },
+ });
+
+ // Later: fetch(webhookUrl) - could be internal
+}
+```
+
+## Good Patterns
+
+```typescript
+// Good: Validate against allowlist
+const ALLOWED_DOMAINS = ["api.example.com", "cdn.example.com"];
+
+async function fetchUrl(req: Request): Promise {
+ let { url } = await req.json();
+ let parsedUrl = new URL(url);
+
+ if (parsedUrl.protocol !== "https:") {
+ return new Response("Only HTTPS allowed", { status: 400 });
+ }
+
+ if (!ALLOWED_DOMAINS.includes(parsedUrl.hostname)) {
+ return new Response("Domain not allowed", { status: 400 });
+ }
+
+ if (isInternalIP(parsedUrl.hostname)) {
+ return new Response("Internal IPs not allowed", { status: 400 });
+ }
+
+ let response = await fetch(url, { redirect: "manual" });
+ return new Response(await response.text());
+}
+
+function isInternalIP(hostname: string): boolean {
+ return [
+ /^127\./,
+ /^10\./,
+ /^172\.(1[6-9]|2[0-9]|3[0-1])\./,
+ /^192\.168\./,
+ /^169\.254\./,
+ /^localhost$/i,
+ ].some((range) => range.test(hostname));
+}
+```
+
+## Rules
+
+1. **Validate URLs against allowlist** - Never trust user URLs
+2. **Block internal IP ranges** - 127.0.0.1, 10.x, 192.168.x, etc.
+3. **Enforce HTTPS** - No HTTP or other protocols
+4. **Disable redirects** - Or validate redirect targets
+5. **Block cloud metadata** - 169.254.169.254 (AWS/GCP/Azure)
diff --git a/.agents/skills/owasp-security-check/rules/vulnerable-dependencies.md b/.agents/skills/owasp-security-check/rules/vulnerable-dependencies.md
new file mode 100644
index 0000000000000000000000000000000000000000..e4e73c60b3c86344ce4585365eb5db384bf81844
--- /dev/null
+++ b/.agents/skills/owasp-security-check/rules/vulnerable-dependencies.md
@@ -0,0 +1,99 @@
+---
+title: Vulnerable and Outdated Dependencies
+impact: MEDIUM
+tags: [dependencies, supply-chain, owasp-a06]
+---
+
+# Vulnerable and Outdated Dependencies
+
+Check for outdated packages with known security vulnerabilities and supply chain risks.
+
+## Why
+
+- **Known exploits**: Public CVEs make attacks easy
+- **Supply chain attacks**: Compromised packages
+- **Transitive dependencies**: Vulnerabilities deep in dependency tree
+- **Maintenance risk**: Unmaintained packages won't get patches
+
+## What to Check
+
+- [ ] Dependencies with known CVEs or security advisories
+- [ ] Severely outdated packages (major versions behind current)
+- [ ] Packages without recent updates (abandoned/unmaintained)
+- [ ] Missing dependency lockfiles
+- [ ] Wildcard or loose version constraints in production
+- [ ] Unused dependencies bloating the project
+- [ ] Development dependencies bundled in production builds
+- [ ] Transitive vulnerabilities in indirect dependencies
+
+## Bad Patterns
+
+```typescript
+// Bad: Wildcard versions allow unexpected updates
+// package.json
+{
+ "dependencies": {
+ "express": "*", // Any version can be installed
+ "react": "^18.0.0" // Minor/patch versions can change
+ }
+}
+
+// Bad: No lockfile means versions drift between installs
+// Missing: package-lock.json, yarn.lock, pnpm-lock.yaml, etc.
+
+// Bad: Dev dependencies mixed with production
+{
+ "dependencies": {
+ "express": "4.18.2",
+ "jest": "29.5.0", // Should be devDependency
+ "eslint": "8.40.0" // Should be devDependency
+ }
+}
+```
+
+## Good Patterns
+
+````typescript
+// Good: Pinned versions with lockfile
+{
+ "dependencies": {
+ "express": "4.18.2", // Exact version pinned
+ "react": "18.2.0"
+ },
+ "devDependencies": {
+ "jest": "29.5.0",
+ "eslint": "8.40.0"
+ }
+}
+// Plus: Lockfile committed (package-lock.json, yarn.lock, etc.)
+
+// Good: Regular dependency audits in CI/CD
+// .github/workflows/security.yml
+```yaml
+name: Security Audit
+on: [push, pull_request]
+jobs:
+ audit:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - run: npm audit --production # Or: pip-audit, bundle audit, etc.
+````
+
+**Before installing new packages:**
+
+- Check package age and download stats
+- Review maintainer history
+- Scan for known vulnerabilities
+- Verify package scope matches intent (avoid typosquatting)
+
+## Rules
+
+1. **Always use lockfiles** - Commit dependency lockfiles for reproducible builds
+2. **Pin production versions** - Use exact versions for production dependencies
+3. **Audit regularly** - Run security audits in CI/CD and before deployments
+4. **Keep dependencies updated** - Use automated update tools
+5. **Separate dev dependencies** - Keep development tools separate from production
+6. **Remove unused packages** - Regularly clean up unused dependencies
+7. **Review before adding** - Check package age, maintainers, and reputation
+8. **Monitor advisories** - Subscribe to security advisories for critical dependencies
diff --git a/.agents/skills/python-testing-patterns/SKILL.md b/.agents/skills/python-testing-patterns/SKILL.md
new file mode 100644
index 0000000000000000000000000000000000000000..66938947287573c8330aaf70dfdb3976f0fc4393
--- /dev/null
+++ b/.agents/skills/python-testing-patterns/SKILL.md
@@ -0,0 +1,1050 @@
+---
+name: python-testing-patterns
+description: Implement comprehensive testing strategies with pytest, fixtures, mocking, and test-driven development. Use when writing Python tests, setting up test suites, or implementing testing best practices.
+---
+
+# Python Testing Patterns
+
+Comprehensive guide to implementing robust testing strategies in Python using pytest, fixtures, mocking, parameterization, and test-driven development practices.
+
+## When to Use This Skill
+
+- Writing unit tests for Python code
+- Setting up test suites and test infrastructure
+- Implementing test-driven development (TDD)
+- Creating integration tests for APIs and services
+- Mocking external dependencies and services
+- Testing async code and concurrent operations
+- Setting up continuous testing in CI/CD
+- Implementing property-based testing
+- Testing database operations
+- Debugging failing tests
+
+## Core Concepts
+
+### 1. Test Types
+
+- **Unit Tests**: Test individual functions/classes in isolation
+- **Integration Tests**: Test interaction between components
+- **Functional Tests**: Test complete features end-to-end
+- **Performance Tests**: Measure speed and resource usage
+
+### 2. Test Structure (AAA Pattern)
+
+- **Arrange**: Set up test data and preconditions
+- **Act**: Execute the code under test
+- **Assert**: Verify the results
+
+### 3. Test Coverage
+
+- Measure what code is exercised by tests
+- Identify untested code paths
+- Aim for meaningful coverage, not just high percentages
+
+### 4. Test Isolation
+
+- Tests should be independent
+- No shared state between tests
+- Each test should clean up after itself
+
+## Quick Start
+
+```python
+# test_example.py
+def add(a, b):
+ return a + b
+
+def test_add():
+ """Basic test example."""
+ result = add(2, 3)
+ assert result == 5
+
+def test_add_negative():
+ """Test with negative numbers."""
+ assert add(-1, 1) == 0
+
+# Run with: pytest test_example.py
+```
+
+## Fundamental Patterns
+
+### Pattern 1: Basic pytest Tests
+
+```python
+# test_calculator.py
+import pytest
+
+class Calculator:
+ """Simple calculator for testing."""
+
+ def add(self, a: float, b: float) -> float:
+ return a + b
+
+ def subtract(self, a: float, b: float) -> float:
+ return a - b
+
+ def multiply(self, a: float, b: float) -> float:
+ return a * b
+
+ def divide(self, a: float, b: float) -> float:
+ if b == 0:
+ raise ValueError("Cannot divide by zero")
+ return a / b
+
+
+def test_addition():
+ """Test addition."""
+ calc = Calculator()
+ assert calc.add(2, 3) == 5
+ assert calc.add(-1, 1) == 0
+ assert calc.add(0, 0) == 0
+
+
+def test_subtraction():
+ """Test subtraction."""
+ calc = Calculator()
+ assert calc.subtract(5, 3) == 2
+ assert calc.subtract(0, 5) == -5
+
+
+def test_multiplication():
+ """Test multiplication."""
+ calc = Calculator()
+ assert calc.multiply(3, 4) == 12
+ assert calc.multiply(0, 5) == 0
+
+
+def test_division():
+ """Test division."""
+ calc = Calculator()
+ assert calc.divide(6, 3) == 2
+ assert calc.divide(5, 2) == 2.5
+
+
+def test_division_by_zero():
+ """Test division by zero raises error."""
+ calc = Calculator()
+ with pytest.raises(ValueError, match="Cannot divide by zero"):
+ calc.divide(5, 0)
+```
+
+### Pattern 2: Fixtures for Setup and Teardown
+
+```python
+# test_database.py
+import pytest
+from typing import Generator
+
+class Database:
+ """Simple database class."""
+
+ def __init__(self, connection_string: str):
+ self.connection_string = connection_string
+ self.connected = False
+
+ def connect(self):
+ """Connect to database."""
+ self.connected = True
+
+ def disconnect(self):
+ """Disconnect from database."""
+ self.connected = False
+
+ def query(self, sql: str) -> list:
+ """Execute query."""
+ if not self.connected:
+ raise RuntimeError("Not connected")
+ return [{"id": 1, "name": "Test"}]
+
+
+@pytest.fixture
+def db() -> Generator[Database, None, None]:
+ """Fixture that provides connected database."""
+ # Setup
+ database = Database("sqlite:///:memory:")
+ database.connect()
+
+ # Provide to test
+ yield database
+
+ # Teardown
+ database.disconnect()
+
+
+def test_database_query(db):
+ """Test database query with fixture."""
+ results = db.query("SELECT * FROM users")
+ assert len(results) == 1
+ assert results[0]["name"] == "Test"
+
+
+@pytest.fixture(scope="session")
+def app_config():
+ """Session-scoped fixture - created once per test session."""
+ return {
+ "database_url": "postgresql://localhost/test",
+ "api_key": "test-key",
+ "debug": True
+ }
+
+
+@pytest.fixture(scope="module")
+def api_client(app_config):
+ """Module-scoped fixture - created once per test module."""
+ # Setup expensive resource
+ client = {"config": app_config, "session": "active"}
+ yield client
+ # Cleanup
+ client["session"] = "closed"
+
+
+def test_api_client(api_client):
+ """Test using api client fixture."""
+ assert api_client["session"] == "active"
+ assert api_client["config"]["debug"] is True
+```
+
+### Pattern 3: Parameterized Tests
+
+```python
+# test_validation.py
+import pytest
+
+def is_valid_email(email: str) -> bool:
+ """Check if email is valid."""
+ return "@" in email and "." in email.split("@")[1]
+
+
+@pytest.mark.parametrize("email,expected", [
+ ("user@example.com", True),
+ ("test.user@domain.co.uk", True),
+ ("invalid.email", False),
+ ("@example.com", False),
+ ("user@domain", False),
+ ("", False),
+])
+def test_email_validation(email, expected):
+ """Test email validation with various inputs."""
+ assert is_valid_email(email) == expected
+
+
+@pytest.mark.parametrize("a,b,expected", [
+ (2, 3, 5),
+ (0, 0, 0),
+ (-1, 1, 0),
+ (100, 200, 300),
+ (-5, -5, -10),
+])
+def test_addition_parameterized(a, b, expected):
+ """Test addition with multiple parameter sets."""
+ from test_calculator import Calculator
+ calc = Calculator()
+ assert calc.add(a, b) == expected
+
+
+# Using pytest.param for special cases
+@pytest.mark.parametrize("value,expected", [
+ pytest.param(1, True, id="positive"),
+ pytest.param(0, False, id="zero"),
+ pytest.param(-1, False, id="negative"),
+])
+def test_is_positive(value, expected):
+ """Test with custom test IDs."""
+ assert (value > 0) == expected
+```
+
+### Pattern 4: Mocking with unittest.mock
+
+```python
+# test_api_client.py
+import pytest
+from unittest.mock import Mock, patch, MagicMock
+import requests
+
+class APIClient:
+ """Simple API client."""
+
+ def __init__(self, base_url: str):
+ self.base_url = base_url
+
+ def get_user(self, user_id: int) -> dict:
+ """Fetch user from API."""
+ response = requests.get(f"{self.base_url}/users/{user_id}")
+ response.raise_for_status()
+ return response.json()
+
+ def create_user(self, data: dict) -> dict:
+ """Create new user."""
+ response = requests.post(f"{self.base_url}/users", json=data)
+ response.raise_for_status()
+ return response.json()
+
+
+def test_get_user_success():
+ """Test successful API call with mock."""
+ client = APIClient("https://api.example.com")
+
+ mock_response = Mock()
+ mock_response.json.return_value = {"id": 1, "name": "John Doe"}
+ mock_response.raise_for_status.return_value = None
+
+ with patch("requests.get", return_value=mock_response) as mock_get:
+ user = client.get_user(1)
+
+ assert user["id"] == 1
+ assert user["name"] == "John Doe"
+ mock_get.assert_called_once_with("https://api.example.com/users/1")
+
+
+def test_get_user_not_found():
+ """Test API call with 404 error."""
+ client = APIClient("https://api.example.com")
+
+ mock_response = Mock()
+ mock_response.raise_for_status.side_effect = requests.HTTPError("404 Not Found")
+
+ with patch("requests.get", return_value=mock_response):
+ with pytest.raises(requests.HTTPError):
+ client.get_user(999)
+
+
+@patch("requests.post")
+def test_create_user(mock_post):
+ """Test user creation with decorator syntax."""
+ client = APIClient("https://api.example.com")
+
+ mock_post.return_value.json.return_value = {"id": 2, "name": "Jane Doe"}
+ mock_post.return_value.raise_for_status.return_value = None
+
+ user_data = {"name": "Jane Doe", "email": "jane@example.com"}
+ result = client.create_user(user_data)
+
+ assert result["id"] == 2
+ mock_post.assert_called_once()
+ call_args = mock_post.call_args
+ assert call_args.kwargs["json"] == user_data
+```
+
+### Pattern 5: Testing Exceptions
+
+```python
+# test_exceptions.py
+import pytest
+
+def divide(a: float, b: float) -> float:
+ """Divide a by b."""
+ if b == 0:
+ raise ZeroDivisionError("Division by zero")
+ if not isinstance(a, (int, float)) or not isinstance(b, (int, float)):
+ raise TypeError("Arguments must be numbers")
+ return a / b
+
+
+def test_zero_division():
+ """Test exception is raised for division by zero."""
+ with pytest.raises(ZeroDivisionError):
+ divide(10, 0)
+
+
+def test_zero_division_with_message():
+ """Test exception message."""
+ with pytest.raises(ZeroDivisionError, match="Division by zero"):
+ divide(5, 0)
+
+
+def test_type_error():
+ """Test type error exception."""
+ with pytest.raises(TypeError, match="must be numbers"):
+ divide("10", 5)
+
+
+def test_exception_info():
+ """Test accessing exception info."""
+ with pytest.raises(ValueError) as exc_info:
+ int("not a number")
+
+ assert "invalid literal" in str(exc_info.value)
+```
+
+## Advanced Patterns
+
+### Pattern 6: Testing Async Code
+
+```python
+# test_async.py
+import pytest
+import asyncio
+
+async def fetch_data(url: str) -> dict:
+ """Fetch data asynchronously."""
+ await asyncio.sleep(0.1)
+ return {"url": url, "data": "result"}
+
+
+@pytest.mark.asyncio
+async def test_fetch_data():
+ """Test async function."""
+ result = await fetch_data("https://api.example.com")
+ assert result["url"] == "https://api.example.com"
+ assert "data" in result
+
+
+@pytest.mark.asyncio
+async def test_concurrent_fetches():
+ """Test concurrent async operations."""
+ urls = ["url1", "url2", "url3"]
+ tasks = [fetch_data(url) for url in urls]
+ results = await asyncio.gather(*tasks)
+
+ assert len(results) == 3
+ assert all("data" in r for r in results)
+
+
+@pytest.fixture
+async def async_client():
+ """Async fixture."""
+ client = {"connected": True}
+ yield client
+ client["connected"] = False
+
+
+@pytest.mark.asyncio
+async def test_with_async_fixture(async_client):
+ """Test using async fixture."""
+ assert async_client["connected"] is True
+```
+
+### Pattern 7: Monkeypatch for Testing
+
+```python
+# test_environment.py
+import os
+import pytest
+
+def get_database_url() -> str:
+ """Get database URL from environment."""
+ return os.environ.get("DATABASE_URL", "sqlite:///:memory:")
+
+
+def test_database_url_default():
+ """Test default database URL."""
+ # Will use actual environment variable if set
+ url = get_database_url()
+ assert url
+
+
+def test_database_url_custom(monkeypatch):
+ """Test custom database URL with monkeypatch."""
+ monkeypatch.setenv("DATABASE_URL", "postgresql://localhost/test")
+ assert get_database_url() == "postgresql://localhost/test"
+
+
+def test_database_url_not_set(monkeypatch):
+ """Test when env var is not set."""
+ monkeypatch.delenv("DATABASE_URL", raising=False)
+ assert get_database_url() == "sqlite:///:memory:"
+
+
+class Config:
+ """Configuration class."""
+
+ def __init__(self):
+ self.api_key = "production-key"
+
+ def get_api_key(self):
+ return self.api_key
+
+
+def test_monkeypatch_attribute(monkeypatch):
+ """Test monkeypatching object attributes."""
+ config = Config()
+ monkeypatch.setattr(config, "api_key", "test-key")
+ assert config.get_api_key() == "test-key"
+```
+
+### Pattern 8: Temporary Files and Directories
+
+```python
+# test_file_operations.py
+import pytest
+from pathlib import Path
+
+def save_data(filepath: Path, data: str):
+ """Save data to file."""
+ filepath.write_text(data)
+
+
+def load_data(filepath: Path) -> str:
+ """Load data from file."""
+ return filepath.read_text()
+
+
+def test_file_operations(tmp_path):
+ """Test file operations with temporary directory."""
+ # tmp_path is a pathlib.Path object
+ test_file = tmp_path / "test_data.txt"
+
+ # Save data
+ save_data(test_file, "Hello, World!")
+
+ # Verify file exists
+ assert test_file.exists()
+
+ # Load and verify data
+ data = load_data(test_file)
+ assert data == "Hello, World!"
+
+
+def test_multiple_files(tmp_path):
+ """Test with multiple temporary files."""
+ files = {
+ "file1.txt": "Content 1",
+ "file2.txt": "Content 2",
+ "file3.txt": "Content 3"
+ }
+
+ for filename, content in files.items():
+ filepath = tmp_path / filename
+ save_data(filepath, content)
+
+ # Verify all files created
+ assert len(list(tmp_path.iterdir())) == 3
+
+ # Verify contents
+ for filename, expected_content in files.items():
+ filepath = tmp_path / filename
+ assert load_data(filepath) == expected_content
+```
+
+### Pattern 9: Custom Fixtures and Conftest
+
+```python
+# conftest.py
+"""Shared fixtures for all tests."""
+import pytest
+
+@pytest.fixture(scope="session")
+def database_url():
+ """Provide database URL for all tests."""
+ return "postgresql://localhost/test_db"
+
+
+@pytest.fixture(autouse=True)
+def reset_database(database_url):
+ """Auto-use fixture that runs before each test."""
+ # Setup: Clear database
+ print(f"Clearing database: {database_url}")
+ yield
+ # Teardown: Clean up
+ print("Test completed")
+
+
+@pytest.fixture
+def sample_user():
+ """Provide sample user data."""
+ return {
+ "id": 1,
+ "name": "Test User",
+ "email": "test@example.com"
+ }
+
+
+@pytest.fixture
+def sample_users():
+ """Provide list of sample users."""
+ return [
+ {"id": 1, "name": "User 1"},
+ {"id": 2, "name": "User 2"},
+ {"id": 3, "name": "User 3"},
+ ]
+
+
+# Parametrized fixture
+@pytest.fixture(params=["sqlite", "postgresql", "mysql"])
+def db_backend(request):
+ """Fixture that runs tests with different database backends."""
+ return request.param
+
+
+def test_with_db_backend(db_backend):
+ """This test will run 3 times with different backends."""
+ print(f"Testing with {db_backend}")
+ assert db_backend in ["sqlite", "postgresql", "mysql"]
+```
+
+### Pattern 10: Property-Based Testing
+
+```python
+# test_properties.py
+from hypothesis import given, strategies as st
+import pytest
+
+def reverse_string(s: str) -> str:
+ """Reverse a string."""
+ return s[::-1]
+
+
+@given(st.text())
+def test_reverse_twice_is_original(s):
+ """Property: reversing twice returns original."""
+ assert reverse_string(reverse_string(s)) == s
+
+
+@given(st.text())
+def test_reverse_length(s):
+ """Property: reversed string has same length."""
+ assert len(reverse_string(s)) == len(s)
+
+
+@given(st.integers(), st.integers())
+def test_addition_commutative(a, b):
+ """Property: addition is commutative."""
+ assert a + b == b + a
+
+
+@given(st.lists(st.integers()))
+def test_sorted_list_properties(lst):
+ """Property: sorted list is ordered."""
+ sorted_lst = sorted(lst)
+
+ # Same length
+ assert len(sorted_lst) == len(lst)
+
+ # All elements present
+ assert set(sorted_lst) == set(lst)
+
+ # Is ordered
+ for i in range(len(sorted_lst) - 1):
+ assert sorted_lst[i] <= sorted_lst[i + 1]
+```
+
+## Test Design Principles
+
+### One Behavior Per Test
+
+Each test should verify exactly one behavior. This makes failures easy to diagnose and tests easy to maintain.
+
+```python
+# BAD - testing multiple behaviors
+def test_user_service():
+ user = service.create_user(data)
+ assert user.id is not None
+ assert user.email == data["email"]
+ updated = service.update_user(user.id, {"name": "New"})
+ assert updated.name == "New"
+
+# GOOD - focused tests
+def test_create_user_assigns_id():
+ user = service.create_user(data)
+ assert user.id is not None
+
+def test_create_user_stores_email():
+ user = service.create_user(data)
+ assert user.email == data["email"]
+
+def test_update_user_changes_name():
+ user = service.create_user(data)
+ updated = service.update_user(user.id, {"name": "New"})
+ assert updated.name == "New"
+```
+
+### Test Error Paths
+
+Always test failure cases, not just happy paths.
+
+```python
+def test_get_user_raises_not_found():
+ with pytest.raises(UserNotFoundError) as exc_info:
+ service.get_user("nonexistent-id")
+
+ assert "nonexistent-id" in str(exc_info.value)
+
+def test_create_user_rejects_invalid_email():
+ with pytest.raises(ValueError, match="Invalid email format"):
+ service.create_user({"email": "not-an-email"})
+```
+
+## Testing Best Practices
+
+### Test Organization
+
+```python
+# tests/
+# __init__.py
+# conftest.py # Shared fixtures
+# test_unit/ # Unit tests
+# test_models.py
+# test_utils.py
+# test_integration/ # Integration tests
+# test_api.py
+# test_database.py
+# test_e2e/ # End-to-end tests
+# test_workflows.py
+```
+
+### Test Naming Convention
+
+A common pattern: `test___`. Adapt to your team's preferences.
+
+```python
+# Pattern: test___
+def test_create_user_with_valid_data_returns_user():
+ ...
+
+def test_create_user_with_duplicate_email_raises_conflict():
+ ...
+
+def test_get_user_with_unknown_id_returns_none():
+ ...
+
+# Good test names - clear and descriptive
+def test_user_creation_with_valid_data():
+ """Clear name describes what is being tested."""
+ pass
+
+def test_login_fails_with_invalid_password():
+ """Name describes expected behavior."""
+ pass
+
+def test_api_returns_404_for_missing_resource():
+ """Specific about inputs and expected outcomes."""
+ pass
+
+# Bad test names - avoid these
+def test_1(): # Not descriptive
+ pass
+
+def test_user(): # Too vague
+ pass
+
+def test_function(): # Doesn't explain what's tested
+ pass
+```
+
+### Testing Retry Behavior
+
+Verify that retry logic works correctly using mock side effects.
+
+```python
+from unittest.mock import Mock
+
+def test_retries_on_transient_error():
+ """Test that service retries on transient failures."""
+ client = Mock()
+ # Fail twice, then succeed
+ client.request.side_effect = [
+ ConnectionError("Failed"),
+ ConnectionError("Failed"),
+ {"status": "ok"},
+ ]
+
+ service = ServiceWithRetry(client, max_retries=3)
+ result = service.fetch()
+
+ assert result == {"status": "ok"}
+ assert client.request.call_count == 3
+
+def test_gives_up_after_max_retries():
+ """Test that service stops retrying after max attempts."""
+ client = Mock()
+ client.request.side_effect = ConnectionError("Failed")
+
+ service = ServiceWithRetry(client, max_retries=3)
+
+ with pytest.raises(ConnectionError):
+ service.fetch()
+
+ assert client.request.call_count == 3
+
+def test_does_not_retry_on_permanent_error():
+ """Test that permanent errors are not retried."""
+ client = Mock()
+ client.request.side_effect = ValueError("Invalid input")
+
+ service = ServiceWithRetry(client, max_retries=3)
+
+ with pytest.raises(ValueError):
+ service.fetch()
+
+ # Only called once - no retry for ValueError
+ assert client.request.call_count == 1
+```
+
+### Mocking Time with Freezegun
+
+Use freezegun to control time in tests for predictable time-dependent behavior.
+
+```python
+from freezegun import freeze_time
+from datetime import datetime, timedelta
+
+@freeze_time("2026-01-15 10:00:00")
+def test_token_expiry():
+ """Test token expires at correct time."""
+ token = create_token(expires_in_seconds=3600)
+ assert token.expires_at == datetime(2026, 1, 15, 11, 0, 0)
+
+@freeze_time("2026-01-15 10:00:00")
+def test_is_expired_returns_false_before_expiry():
+ """Test token is not expired when within validity period."""
+ token = create_token(expires_in_seconds=3600)
+ assert not token.is_expired()
+
+@freeze_time("2026-01-15 12:00:00")
+def test_is_expired_returns_true_after_expiry():
+ """Test token is expired after validity period."""
+ token = Token(expires_at=datetime(2026, 1, 15, 11, 30, 0))
+ assert token.is_expired()
+
+def test_with_time_travel():
+ """Test behavior across time using freeze_time context."""
+ with freeze_time("2026-01-01") as frozen_time:
+ item = create_item()
+ assert item.created_at == datetime(2026, 1, 1)
+
+ # Move forward in time
+ frozen_time.move_to("2026-01-15")
+ assert item.age_days == 14
+```
+
+### Test Markers
+
+```python
+# test_markers.py
+import pytest
+
+@pytest.mark.slow
+def test_slow_operation():
+ """Mark slow tests."""
+ import time
+ time.sleep(2)
+
+
+@pytest.mark.integration
+def test_database_integration():
+ """Mark integration tests."""
+ pass
+
+
+@pytest.mark.skip(reason="Feature not implemented yet")
+def test_future_feature():
+ """Skip tests temporarily."""
+ pass
+
+
+@pytest.mark.skipif(os.name == "nt", reason="Unix only test")
+def test_unix_specific():
+ """Conditional skip."""
+ pass
+
+
+@pytest.mark.xfail(reason="Known bug #123")
+def test_known_bug():
+ """Mark expected failures."""
+ assert False
+
+
+# Run with:
+# pytest -m slow # Run only slow tests
+# pytest -m "not slow" # Skip slow tests
+# pytest -m integration # Run integration tests
+```
+
+### Coverage Reporting
+
+```bash
+# Install coverage
+pip install pytest-cov
+
+# Run tests with coverage
+pytest --cov=myapp tests/
+
+# Generate HTML report
+pytest --cov=myapp --cov-report=html tests/
+
+# Fail if coverage below threshold
+pytest --cov=myapp --cov-fail-under=80 tests/
+
+# Show missing lines
+pytest --cov=myapp --cov-report=term-missing tests/
+```
+
+## Testing Database Code
+
+```python
+# test_database_models.py
+import pytest
+from sqlalchemy import create_engine, Column, Integer, String
+from sqlalchemy.ext.declarative import declarative_base
+from sqlalchemy.orm import sessionmaker, Session
+
+Base = declarative_base()
+
+
+class User(Base):
+ """User model."""
+ __tablename__ = "users"
+
+ id = Column(Integer, primary_key=True)
+ name = Column(String(50))
+ email = Column(String(100), unique=True)
+
+
+@pytest.fixture(scope="function")
+def db_session() -> Session:
+ """Create in-memory database for testing."""
+ engine = create_engine("sqlite:///:memory:")
+ Base.metadata.create_all(engine)
+
+ SessionLocal = sessionmaker(bind=engine)
+ session = SessionLocal()
+
+ yield session
+
+ session.close()
+
+
+def test_create_user(db_session):
+ """Test creating a user."""
+ user = User(name="Test User", email="test@example.com")
+ db_session.add(user)
+ db_session.commit()
+
+ assert user.id is not None
+ assert user.name == "Test User"
+
+
+def test_query_user(db_session):
+ """Test querying users."""
+ user1 = User(name="User 1", email="user1@example.com")
+ user2 = User(name="User 2", email="user2@example.com")
+
+ db_session.add_all([user1, user2])
+ db_session.commit()
+
+ users = db_session.query(User).all()
+ assert len(users) == 2
+
+
+def test_unique_email_constraint(db_session):
+ """Test unique email constraint."""
+ from sqlalchemy.exc import IntegrityError
+
+ user1 = User(name="User 1", email="same@example.com")
+ user2 = User(name="User 2", email="same@example.com")
+
+ db_session.add(user1)
+ db_session.commit()
+
+ db_session.add(user2)
+
+ with pytest.raises(IntegrityError):
+ db_session.commit()
+```
+
+## CI/CD Integration
+
+```yaml
+# .github/workflows/test.yml
+name: Tests
+
+on: [push, pull_request]
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+
+ strategy:
+ matrix:
+ python-version: ["3.9", "3.10", "3.11", "3.12"]
+
+ steps:
+ - uses: actions/checkout@v3
+
+ - name: Set up Python
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+
+ - name: Install dependencies
+ run: |
+ pip install -e ".[dev]"
+ pip install pytest pytest-cov
+
+ - name: Run tests
+ run: |
+ pytest --cov=myapp --cov-report=xml
+
+ - name: Upload coverage
+ uses: codecov/codecov-action@v3
+ with:
+ file: ./coverage.xml
+```
+
+## Configuration Files
+
+```ini
+# pytest.ini
+[pytest]
+testpaths = tests
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+addopts =
+ -v
+ --strict-markers
+ --tb=short
+ --cov=myapp
+ --cov-report=term-missing
+markers =
+ slow: marks tests as slow
+ integration: marks integration tests
+ unit: marks unit tests
+ e2e: marks end-to-end tests
+```
+
+```toml
+# pyproject.toml
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py"]
+addopts = [
+ "-v",
+ "--cov=myapp",
+ "--cov-report=term-missing",
+]
+
+[tool.coverage.run]
+source = ["myapp"]
+omit = ["*/tests/*", "*/migrations/*"]
+
+[tool.coverage.report]
+exclude_lines = [
+ "pragma: no cover",
+ "def __repr__",
+ "raise AssertionError",
+ "raise NotImplementedError",
+]
+```
+
+## Resources
+
+- **pytest documentation**: https://docs.pytest.org/
+- **unittest.mock**: https://docs.python.org/3/library/unittest.mock.html
+- **hypothesis**: Property-based testing
+- **pytest-asyncio**: Testing async code
+- **pytest-cov**: Coverage reporting
+- **pytest-mock**: pytest wrapper for mock
+
+## Best Practices Summary
+
+1. **Write tests first** (TDD) or alongside code
+2. **One assertion per test** when possible
+3. **Use descriptive test names** that explain behavior
+4. **Keep tests independent** and isolated
+5. **Use fixtures** for setup and teardown
+6. **Mock external dependencies** appropriately
+7. **Parametrize tests** to reduce duplication
+8. **Test edge cases** and error conditions
+9. **Measure coverage** but focus on quality
+10. **Run tests in CI/CD** on every commit
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index b353b30871abf9fb8fc13ebd8db3351eb462f745..b9e0ba7bff1898840031fead4ede3230b102adbf 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -155,14 +155,19 @@ cp .env.template .env
### 5. Run Tests
```bash
-# Run all tests
-pytest
+# Run unit tests (30 tests)
+.venv\Scripts\python.exe -m pytest tests/ -q \
+ --ignore=tests/test_basic.py \
+ --ignore=tests/test_diabetes_patient.py \
+ --ignore=tests/test_evolution_loop.py \
+ --ignore=tests/test_evolution_quick.py \
+ --ignore=tests/test_evaluation_system.py
# Run with coverage
-pytest --cov=src --cov-report=html
+.venv\Scripts\python.exe -m pytest --cov=src tests/
# Run specific test file
-pytest tests/test_basic.py
+.venv\Scripts\python.exe -m pytest tests/test_codebase_fixes.py -v
```
## Style Guidelines
diff --git a/QUICKSTART.md b/QUICKSTART.md
index c6265a555e25cc8f0a7188499de611f599833c02..f8e30fed2007c71c6736eac4a0cf98b6121548ba 100644
--- a/QUICKSTART.md
+++ b/QUICKSTART.md
@@ -1,25 +1,25 @@
-# 🚀 Quick Start Guide - MediGuard AI RAG-Helper
+# Quick Start Guide - RagBot
Get up and running in **5 minutes**!
-## Step 1: Prerequisites ✅
+## Step 1: Prerequisites
Before you begin, ensure you have:
-- ✅ **Python 3.11+** installed ([Download](https://www.python.org/downloads/))
-- ✅ **Git** installed ([Download](https://git-scm.com/downloads))
-- ✅ **FREE API Key** from one of:
+- **Python 3.11+** installed ([Download](https://www.python.org/downloads/))
+- **Git** installed ([Download](https://git-scm.com/downloads))
+- **FREE API Key** from one of:
- [Groq](https://console.groq.com/keys) - Recommended (Fast & Free)
- [Google Gemini](https://aistudio.google.com/app/apikey) - Alternative
**System Requirements:**
- 4GB+ RAM
- 2GB free disk space
-- No GPU required! 🎉
+- No GPU required
---
-## Step 2: Installation 📥
+## Step 2: Installation
### Clone the Repository
@@ -52,7 +52,7 @@ pip install -r requirements.txt
---
-## Step 3: Configuration ⚙️
+## Step 3: Configuration
### Copy Environment Template
@@ -95,7 +95,7 @@ EMBEDDING_PROVIDER="google"
---
-## Step 4: Verify Installation ✓
+## Step 4: Verify Installation
Quick system check:
@@ -112,7 +112,7 @@ If you see "✅ Success!" you're good to go!
---
-## Step 5: Run Your First Analysis 🎯
+## Step 5: Run Your First Analysis
### Interactive Chat Mode
@@ -134,7 +134,7 @@ You: My glucose is 185, HbA1c is 8.2, and cholesterol is 210
---
-## Common Commands 📝
+## Common Commands
### Chat Interface
```bash
@@ -150,17 +150,16 @@ quit # Exit
### Python API
```python
from src.workflow import create_guild
-from src.state import PatientInput
# Create the guild
guild = create_guild()
# Analyze biomarkers
-result = guild.run(PatientInput(
- biomarkers={"Glucose": 185, "HbA1c": 8.2},
- model_prediction={"disease": "Diabetes", "confidence": 0.87},
- patient_context={"age": 52, "gender": "male"}
-))
+result = guild.run({
+ "biomarkers": {"Glucose": 185, "HbA1c": 8.2},
+ "model_prediction": {"disease": "Diabetes", "confidence": 0.87},
+ "patient_context": {"age": 52, "gender": "male"}
+})
print(result)
```
@@ -177,7 +176,7 @@ python -m uvicorn app.main:app --reload
---
-## Troubleshooting 🔧
+## Troubleshooting
### Import Error: "No module named 'langchain'"
@@ -224,14 +223,14 @@ python src/pdf_processor.py
---
-## Next Steps 🎓
+## Next Steps
### Learn More
- **[Full Documentation](README.md)** - Complete system overview
-- **[API Guide](api/README.md)** - REST API documentation
+- **[API Guide](docs/API.md)** - REST API documentation
- **[Contributing](CONTRIBUTING.md)** - How to contribute
-- **[Architecture](docs/)** - Deep dive into system design
+- **[Architecture](docs/ARCHITECTURE.md)** - Deep dive into system design
### Customize
@@ -242,22 +241,27 @@ python src/pdf_processor.py
### Run Tests
```bash
-# Quick test
-python tests/test_basic.py
-
-# Full evaluation
-python tests/test_evaluation_system.py
+# Run unit tests (30 tests, no API keys needed)
+.venv\Scripts\python.exe -m pytest tests/ -q \
+ --ignore=tests/test_basic.py \
+ --ignore=tests/test_diabetes_patient.py \
+ --ignore=tests/test_evolution_loop.py \
+ --ignore=tests/test_evolution_quick.py \
+ --ignore=tests/test_evaluation_system.py
+
+# Run integration tests (requires Groq/Gemini API key)
+.venv\Scripts\python.exe -m pytest tests/test_diabetes_patient.py -v
```
---
-## Example Session 📋
+## Example Session
```
$ python scripts/chat.py
======================================================================
-🤖 MediGuard AI RAG-Helper - Interactive Chat
+RagBot - Interactive Chat
======================================================================
You can:
@@ -295,7 +299,7 @@ Your elevated glucose and HbA1c indicate Type 2 Diabetes...
---
-## Getting Help 💬
+## Getting Help
- **Issues**: [GitHub Issues](https://github.com/yourusername/RagBot/issues)
- **Discussions**: [GitHub Discussions](https://github.com/yourusername/RagBot/discussions)
@@ -303,11 +307,11 @@ Your elevated glucose and HbA1c indicate Type 2 Diabetes...
---
-## Quick Reference Card 📇
+## Quick Reference Card
```
┌─────────────────────────────────────────────────────────┐
-│ MediGuard AI Cheat Sheet │
+│ RagBot Cheat Sheet │
├─────────────────────────────────────────────────────────┤
│ START CHAT: python scripts/chat.py │
│ START API: cd api && uvicorn app.main:app --reload │
@@ -320,8 +324,10 @@ Your elevated glucose and HbA1c indicate Type 2 Diabetes...
│ quit - Exit │
├─────────────────────────────────────────────────────────┤
│ SUPPORTED BIOMARKERS: 24 total │
-│ Glucose, HbA1c, Cholesterol, LDL, HDL, Triglycerides │
-│ Hemoglobin, Platelets, WBC, RBC, and more... │
+│ Glucose, HbA1c, Cholesterol, LDL Cholesterol, │
+│ HDL Cholesterol, Triglycerides, Hemoglobin, │
+│ Platelets, White Blood Cells, Red Blood Cells, │
+│ BMI, Systolic Blood Pressure, and more... │
├─────────────────────────────────────────────────────────┤
│ DETECTED DISEASES: 5 types │
│ Diabetes, Anemia, Heart Disease, │
@@ -331,4 +337,4 @@ Your elevated glucose and HbA1c indicate Type 2 Diabetes...
---
-**Ready to revolutionize healthcare AI? Let's go! 🚀**
+**Ready to analyze biomarkers? Let's go!**
diff --git a/README.md b/README.md
index ea49fb37609cd4a8de3f5c25671d6275dd89f551..8afa76196b53c4047d8bc1dac3699c8aa2242a50 100644
--- a/README.md
+++ b/README.md
@@ -2,16 +2,17 @@
A production-ready biomarker analysis system combining 6 specialized AI agents with medical knowledge retrieval to provide evidence-based insights on blood test results in **15-25 seconds**.
-## ✨ Key Features
+## Key Features
- **6 Specialist Agents** - Biomarker validation, disease prediction, RAG-powered analysis, confidence assessment
-- **Medical Knowledge Base** - 750+ pages of clinical guidelines (FAISS vector store, local embeddings)
+- **Medical Knowledge Base** - 750+ pages of clinical guidelines (FAISS vector store)
- **Multiple Interfaces** - Interactive CLI chat, REST API, ready for web/mobile integration
- **Evidence-Based** - All recommendations backed by retrieved medical literature
-- **Free & Offline** - Uses free Groq API + local embeddings (no embedding API costs)
-- **Production-Ready** - Full error handling, safety alerts, confidence scoring
+- **Free Cloud LLMs** - Uses Groq (LLaMA 3.3-70B) or Google Gemini - no cost
+- **Biomarker Normalization** - 80+ aliases mapped to 24 canonical biomarker names
+- **Production-Ready** - Full error handling, safety alerts, confidence scoring, 30 unit tests
-## 🚀 Quick Start
+## Quick Start
**Installation (5 minutes):**
@@ -36,7 +37,7 @@ python scripts/chat.py
See **[QUICKSTART.md](QUICKSTART.md)** for detailed setup instructions.
-## 📚 Documentation
+## Documentation
| Document | Purpose |
|----------|---------|
@@ -48,7 +49,7 @@ See **[QUICKSTART.md](QUICKSTART.md)** for detailed setup instructions.
| [**scripts/README.md**](scripts/README.md) | Utility scripts reference |
| [**examples/README.md**](examples/) | Web/mobile integration examples |
-## 💻 Usage
+## Usage
### Interactive CLI
@@ -57,116 +58,134 @@ python scripts/chat.py
You: My glucose is 140 and HbA1c is 10
-🔴 Primary Finding: Diabetes (85% confidence)
-⚠️ Critical Alerts: Hyperglycemia, elevated HbA1c
-✅ Recommendations: Seek medical attention, lifestyle changes
-🌱 Actions: Physical activity, reduce carbs, weight loss
+Primary Finding: Diabetes (100% confidence)
+Critical Alerts: Hyperglycemia, elevated HbA1c
+Recommendations: Seek medical attention, lifestyle changes
+Actions: Physical activity, reduce carbs, weight loss
```
### REST API
```bash
# Start server
-python -m uvicorn api.app.main:app
+cd api
+python -m uvicorn app.main:app
-# POST /api/v1/analyze
-curl -X POST http://localhost:8000/api/v1/analyze \
+# Analyze biomarkers (structured input)
+curl -X POST http://localhost:8000/api/v1/analyze/structured \
-H "Content-Type: application/json" \
-d '{
"biomarkers": {"Glucose": 140, "HbA1c": 10.0}
}'
+
+# Analyze biomarkers (natural language)
+curl -X POST http://localhost:8000/api/v1/analyze/natural \
+ -H "Content-Type: application/json" \
+ -d '{
+ "message": "My glucose is 140 and HbA1c is 10"
+ }'
```
See **[docs/API.md](docs/API.md)** for full API reference.
-## 🏗️ Project Structure
+## Project Structure
```
RagBot/
├── src/ # Core application
+│ ├── __init__.py
│ ├── workflow.py # Multi-agent orchestration (LangGraph)
+│ ├── state.py # Pydantic state models
│ ├── biomarker_validator.py # Validation logic
+│ ├── biomarker_normalization.py # Name normalization (80+ aliases)
+│ ├── llm_config.py # LLM/embedding provider config
│ ├── pdf_processor.py # Vector store management
+│ ├── config.py # Global configuration
│ └── agents/ # 6 specialist agents
+│ ├── __init__.py
+│ ├── biomarker_analyzer.py
+│ ├── disease_explainer.py
+│ ├── biomarker_linker.py
+│ ├── clinical_guidelines.py
+│ ├── confidence_assessor.py
+│ └── response_synthesizer.py
│
-├── api/ # REST API (optional)
+├── api/ # REST API (FastAPI)
│ ├── app/main.py # FastAPI server
-│ └── app/routes/ # API endpoints
+│ ├── app/routes/ # API endpoints
+│ ├── app/models/schemas.py # Pydantic request/response schemas
+│ └── app/services/ # Business logic
│
├── scripts/ # Utilities
-│ ├── chat.py # Interactive CLI
+│ ├── chat.py # Interactive CLI chatbot
│ └── setup_embeddings.py # Vector store builder
│
├── config/ # Configuration
-│ └── biomarker_references.json # Reference ranges
+│ └── biomarker_references.json # 24 biomarker reference ranges
│
├── data/ # Data storage
│ ├── medical_pdfs/ # Source documents
│ └── vector_stores/ # FAISS database
│
-├── tests/ # Test suite
+├── tests/ # Test suite (30 tests)
├── examples/ # Integration examples
├── docs/ # Documentation
-│ ├── ARCHITECTURE.md # System design
-│ ├── API.md # API reference
-│ ├── DEVELOPMENT.md # Development guide
-│ ├── archive/ # Old docs
-│ └── plans/ # Planning docs
│
├── QUICKSTART.md # Setup guide
├── CONTRIBUTING.md # Contribution guidelines
├── requirements.txt # Python dependencies
-├── .env.template # Configuration template
└── LICENSE
```
-## 🔧 Technology Stack
+## Technology Stack
| Component | Technology | Purpose |
|-----------|-----------|---------|
| Orchestration | **LangGraph** | Multi-agent workflow control |
| LLM | **Groq (LLaMA 3.3-70B)** | Fast, free inference |
-| Embeddings | **HuggingFace (sentence-transformers)** | Local, offline embeddings |
+| LLM (Alt) | **Google Gemini 2.0 Flash** | Free alternative |
+| Embeddings | **Google Gemini / HuggingFace** | Vector representations |
| Vector DB | **FAISS** | Efficient similarity search |
| API | **FastAPI** | REST endpoints |
-| Data | **Pydantic V2** | Type validation |
+| Validation | **Pydantic V2** | Type safety & schemas |
-## 🔍 How It Works
+## How It Works
```
User Input ("My glucose is 140...")
- ↓
-[Biomarker Extraction] → Parse & normalize
- ↓
-[Prediction Agent] → Disease hypothesis
- ↓
-[RAG Retrieval] → Get medical docs from vector store
- ↓
-[6 Parallel Agents] → Analyze from different angles
- ├─ Biomarker Analyzer (validation)
- ├─ Disease Explainer (RAG)
- ├─ Biomarker-Disease Linker (RAG)
- ├─ Clinical Guidelines (RAG)
- ├─ Confidence Assessor (scoring)
- └─ Response Synthesizer (summary)
- ↓
-[Output] → Comprehensive report with safety alerts
+ |
+[Biomarker Extraction] -> Parse & normalize (80+ aliases)
+ |
+[Disease Prediction] -> Rule-based + LLM hypothesis
+ |
+[RAG Retrieval] -> Get medical docs from FAISS vector store
+ |
+[6 Agent Pipeline via LangGraph]
+ |-- Biomarker Analyzer (validation + safety alerts)
+ |-- Disease Explainer (RAG pathophysiology)
+ |-- Biomarker-Disease Linker (RAG key drivers)
+ |-- Clinical Guidelines (RAG recommendations)
+ |-- Confidence Assessor (reliability scoring)
+ +-- Response Synthesizer (final structured report)
+ |
+[Output] -> Comprehensive report with safety alerts
```
-## 📊 Supported Biomarkers
+## Supported Biomarkers (24)
-24+ biomarkers including:
-- **Glucose Control**: Glucose, HbA1c, Fasting Glucose
-- **Lipids**: Total Cholesterol, LDL, HDL, Triglycerides
-- **Cardiac**: Troponin, BNP, CK-MB
-- **Blood Cells**: WBC, RBC, Hemoglobin, Hematocrit, Platelets
-- **Liver**: ALT, AST, Albumin, Bilirubin
-- **Kidney**: Creatinine, BUN, eGFR
-- And more...
+- **Glucose Control**: Glucose, HbA1c, Insulin
+- **Lipids**: Cholesterol, LDL Cholesterol, HDL Cholesterol, Triglycerides
+- **Body Metrics**: BMI
+- **Blood Cells**: Hemoglobin, Platelets, White Blood Cells, Red Blood Cells, Hematocrit
+- **RBC Indices**: Mean Corpuscular Volume, Mean Corpuscular Hemoglobin, MCHC
+- **Cardiovascular**: Heart Rate, Systolic Blood Pressure, Diastolic Blood Pressure, Troponin
+- **Inflammation**: C-reactive Protein
+- **Liver**: ALT, AST
+- **Kidney**: Creatinine
-See `config/biomarker_references.json` for complete list.
+See [config/biomarker_references.json](config/biomarker_references.json) for full reference ranges.
-## 🎯 Disease Coverage
+## Disease Coverage
- Diabetes
- Anemia
@@ -175,48 +194,40 @@ See `config/biomarker_references.json` for complete list.
- Thalassemia
- (Extensible - add custom domains)
-## 🔒 Privacy & Security
+## Privacy & Security
- All processing runs **locally** after setup
-- No personal health data sent to APIs (except LLM inference)
+- No personal health data stored
- Embeddings computed locally or cached
-- Fully **HIPAA-compliant** architecture ready
- Vector store derived from public medical literature
-- Can operate completely offline after initial setup
+- Can operate completely offline with Ollama provider
-## 📈 Performance
+## Performance
-- **Response Time**: 15-25 seconds (8 agents + RAG retrieval)
-- **Knowledge Base**: 750 pages → 2,609 document chunks
-- **Embedding Dimensions**: 384
-- **Cost**: Free (Groq API + local embeddings)
+- **Response Time**: 15-25 seconds (6 agents + RAG retrieval)
+- **Knowledge Base**: 750 pages, 2,609 document chunks
+- **Cost**: Free (Groq/Gemini API + local/cloud embeddings)
- **Hardware**: CPU-only (no GPU needed)
-## 🚀 Deployment Options
-
-1. **CLI** - Interactive chatbot (development/testing)
-2. **REST API** - FastAPI server (production)
-3. **Docker** - Containerized deployment
-4. **Embedded** - Direct Python library import
-5. **Web** - JavaScript/React integration
-6. **Mobile** - React Native / Flutter
-
-See **[examples/README.md](examples/)** for integration patterns.
-
-## 🧪 Testing
+## Testing
```bash
-# Run all tests
-pytest tests/ -v
-
-# Test specific module
-pytest tests/test_diabetes_patient.py -v
-
-# Coverage report
-pytest --cov=src tests/
+# Run unit tests (30 tests)
+.venv\Scripts\python.exe -m pytest tests/ -q \
+ --ignore=tests/test_basic.py \
+ --ignore=tests/test_diabetes_patient.py \
+ --ignore=tests/test_evolution_loop.py \
+ --ignore=tests/test_evolution_quick.py \
+ --ignore=tests/test_evaluation_system.py
+
+# Run specific test file
+.venv\Scripts\python.exe -m pytest tests/test_codebase_fixes.py -v
+
+# Run all tests (includes integration tests requiring LLM API keys)
+.venv\Scripts\python.exe -m pytest tests/ -v
```
-## 🤝 Contributing
+## Contributing
Contributions welcome! See **[CONTRIBUTING.md](CONTRIBUTING.md)** for:
- Code style guidelines
@@ -224,7 +235,7 @@ Contributions welcome! See **[CONTRIBUTING.md](CONTRIBUTING.md)** for:
- Testing requirements
- Development setup
-## 📖 Development
+## Development
Want to extend RagBot?
@@ -233,17 +244,11 @@ Want to extend RagBot?
- **Create custom agents**: [docs/DEVELOPMENT.md](docs/DEVELOPMENT.md#creating-a-custom-analysis-agent)
- **Switch LLM providers**: [docs/DEVELOPMENT.md](docs/DEVELOPMENT.md#switching-llm-providers)
-## 📋 License
+## License
MIT License - See [LICENSE](LICENSE)
-## 🙋 Support
-
-- **Issues**: GitHub Issues for bugs and feature requests
-- **Discussion**: GitHub Discussions for questions
-- **Docs**: Full documentation in `/docs` folder
-
-## 🔗 Resources
+## Resources
- [LangGraph Documentation](https://langchain-ai.github.io/langgraph/)
- [Groq API Docs](https://console.groq.com)
@@ -252,8 +257,8 @@ MIT License - See [LICENSE](LICENSE)
---
-**Ready to get started?** → [QUICKSTART.md](QUICKSTART.md)
+**Ready to get started?** -> [QUICKSTART.md](QUICKSTART.md)
-**Want to understand the architecture?** → [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md)
+**Want to understand the architecture?** -> [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md)
-**Looking to integrate with your app?** → [examples/README.md](examples/)
+**Looking to integrate with your app?** -> [examples/README.md](examples/)
diff --git a/START_HERE.md b/START_HERE.md
new file mode 100644
index 0000000000000000000000000000000000000000..287f871b4669c9d228ff8a7101196309077a93f0
--- /dev/null
+++ b/START_HERE.md
@@ -0,0 +1,80 @@
+# Start Here — RagBot
+
+Welcome to **RagBot**, a multi-agent RAG system for medical biomarker analysis.
+
+## 5-Minute Setup
+
+```bash
+# 1. Clone and install
+git clone https://github.com/yourusername/ragbot.git
+cd ragbot
+python -m venv .venv
+.venv\Scripts\activate # Windows
+pip install -r requirements.txt
+
+# 2. Add your free API key to .env
+# Get one at https://console.groq.com/keys (Groq, recommended)
+# or https://aistudio.google.com/app/apikey (Google Gemini)
+cp .env.template .env
+# Edit .env with your key
+
+# 3. Start chatting
+python scripts/chat.py
+```
+
+For the full walkthrough, see [QUICKSTART.md](QUICKSTART.md).
+
+---
+
+## Key Documentation
+
+| Document | What it covers |
+|----------|----------------|
+| [QUICKSTART.md](QUICKSTART.md) | Detailed setup, configuration, troubleshooting |
+| [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md) | System design, agent pipeline, data flow |
+| [docs/API.md](docs/API.md) | REST API endpoints and usage examples |
+| [docs/DEVELOPMENT.md](docs/DEVELOPMENT.md) | Extending the system — new biomarkers, agents, domains |
+| [CONTRIBUTING.md](CONTRIBUTING.md) | Code style, PR process, testing guidelines |
+| [scripts/README.md](scripts/README.md) | CLI scripts and utilities |
+| [examples/README.md](examples/) | Web/mobile integration examples |
+
+---
+
+## Project at a Glance
+
+- **6 specialist AI agents** orchestrated via LangGraph
+- **24 supported biomarkers** with 80+ name aliases
+- **FAISS vector store** over 750 pages of medical literature
+- **Free LLM inference** via Groq (LLaMA 3.3-70B) or Google Gemini
+- **Two interfaces**: interactive CLI chat + REST API (FastAPI)
+- **30 unit tests** passing, Pydantic V2 throughout
+
+---
+
+## Quick Commands
+
+```bash
+# Interactive chat
+python scripts/chat.py
+
+# Run unit tests
+.venv\Scripts\python.exe -m pytest tests/ -q ^
+ --ignore=tests/test_basic.py ^
+ --ignore=tests/test_diabetes_patient.py ^
+ --ignore=tests/test_evolution_loop.py ^
+ --ignore=tests/test_evolution_quick.py ^
+ --ignore=tests/test_evaluation_system.py
+
+# Start REST API
+cd api && python -m uvicorn app.main:app --reload
+
+# Rebuild vector store (after adding new PDFs)
+python scripts/setup_embeddings.py
+```
+
+---
+
+## Need Help?
+
+- Check [QUICKSTART.md — Troubleshooting](QUICKSTART.md#troubleshooting)
+- Open a [GitHub Issue](https://github.com/yourusername/RagBot/issues)
diff --git a/api/ARCHITECTURE.md b/api/ARCHITECTURE.md
index fa1d41730bb5ccd13f60004ffa7672818a62b4d4..6634ec36e26a4111d132c8442233cc11c8fd03db 100644
--- a/api/ARCHITECTURE.md
+++ b/api/ARCHITECTURE.md
@@ -1,20 +1,20 @@
# RagBot API - Architecture Diagrams
-## 🏗️ System Architecture
+## System Architecture
```
┌─────────────────────────────────────────────────────────────────┐
-│ YOUR LAPTOP (MVP Setup) │
+│ RagBot API Server │
├─────────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────────┐ ┌──────────────────────────┐ │
-│ │ Ollama Server │◄─────────────┤ FastAPI API Server │ │
-│ │ Port: 11434 │ LLM Calls │ Port: 8000 │ │
+│ │ Cloud LLM API │◄─────────────┤ FastAPI Server │ │
+│ │ (Groq/Gemini) │ LLM Calls │ Port: 8000 │ │
│ │ │ │ │ │
│ │ Models: │ │ Endpoints: │ │
-│ │ - llama3.1:8b │ │ - /api/v1/health │ │
-│ │ - qwen2:7b │ │ - /api/v1/biomarkers │ │
-│ │ - nomic-embed │ │ - /api/v1/analyze/* │ │
+│ │ - LLaMA 3.3-70B│ │ - /api/v1/health │ │
+│ │ - Gemini Flash │ │ - /api/v1/biomarkers │ │
+│ │ (or Ollama) │ │ - /api/v1/analyze/* │ │
│ └─────────────────┘ └───────────┬──────────────┘ │
│ │ │
│ ┌───────────▼──────────────┐ │
@@ -24,7 +24,7 @@
│ │ - 6 Specialist Agents │ │
│ │ - LangGraph Workflow │ │
│ │ - FAISS Vector Store │ │
-│ │ - 2,861 medical chunks │ │
+│ │ - 2,609 medical chunks │ │
│ └──────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────────┘
diff --git a/api/GETTING_STARTED.md b/api/GETTING_STARTED.md
index 9842989848d8b861bc4d412ca1c34fadedbc500d..5e1a7fd51d5fc56600dfea6168b8c6cbf81d374e 100644
--- a/api/GETTING_STARTED.md
+++ b/api/GETTING_STARTED.md
@@ -4,39 +4,31 @@ Follow these steps to get your API running in 5 minutes:
---
-## ✅ Prerequisites Check
+## Prerequisites
Before starting, ensure you have:
-1. **Ollama installed and running**
- ```powershell
- # Check if Ollama is running
- curl http://localhost:11434/api/version
-
- # If not, start it
- ollama serve
- ```
-
-2. **Required models pulled**
- ```powershell
- ollama list
-
- # If missing, pull them
- ollama pull llama3.1:8b-instruct
- ollama pull qwen2:7b
- ```
-
-3. **Python 3.11+**
+1. **Python 3.11+** installed
```powershell
python --version
```
-4. **RagBot dependencies installed**
+2. **A free API key** from one of:
+ - [Groq](https://console.groq.com/keys) — Recommended (fast, free LLaMA 3.3-70B)
+ - [Google Gemini](https://aistudio.google.com/app/apikey) — Alternative
+
+3. **RagBot dependencies installed**
```powershell
# From RagBot root directory
pip install -r requirements.txt
```
+4. **`.env` configured** in project root with your API key:
+ ```
+ GROQ_API_KEY=gsk_...
+ LLM_PROVIDER=groq
+ ```
+
---
## 🚀 Step 1: Install API Dependencies (30 seconds)
diff --git a/api/IMPLEMENTATION_COMPLETE.md b/api/IMPLEMENTATION_COMPLETE.md
deleted file mode 100644
index 61ffe982159d7434d394f03020150576b247b7e5..0000000000000000000000000000000000000000
--- a/api/IMPLEMENTATION_COMPLETE.md
+++ /dev/null
@@ -1,452 +0,0 @@
-# RagBot API - Implementation Complete ✅
-
-**Date:** November 23, 2025
-**Status:** ✅ COMPLETE - Ready to Run
-
----
-
-## 📦 What Was Built
-
-A complete FastAPI REST API that exposes your RagBot system for web integration.
-
-### ✅ All 15 Tasks Completed
-
-1. ✅ API folder structure created
-2. ✅ Pydantic request/response models (comprehensive schemas)
-3. ✅ Biomarker extraction service (natural language → JSON)
-4. ✅ RagBot workflow wrapper (analysis orchestration)
-5. ✅ Health check endpoint
-6. ✅ Biomarkers list endpoint
-7. ✅ Natural language analysis endpoint
-8. ✅ Structured analysis endpoint
-9. ✅ Example endpoint (pre-run diabetes case)
-10. ✅ FastAPI main application (with CORS, error handling, logging)
-11. ✅ requirements.txt
-12. ✅ Dockerfile (multi-stage)
-13. ✅ docker-compose.yml
-14. ✅ Comprehensive README
-15. ✅ .env configuration
-
-**Bonus Files:**
-- ✅ .gitignore
-- ✅ test_api.ps1 (PowerShell test suite)
-- ✅ QUICK_REFERENCE.md (cheat sheet)
-
----
-
-## 📁 Complete Structure
-
-```
-RagBot/
-├── api/ ⭐ NEW - Your API!
-│ ├── app/
-│ │ ├── __init__.py
-│ │ ├── main.py # FastAPI application
-│ │ ├── models/
-│ │ │ ├── __init__.py
-│ │ │ └── schemas.py # 15+ Pydantic models
-│ │ ├── routes/
-│ │ │ ├── __init__.py
-│ │ │ ├── analyze.py # 3 analysis endpoints
-│ │ │ ├── biomarkers.py # List endpoint
-│ │ │ └── health.py # Health check
-│ │ └── services/
-│ │ ├── __init__.py
-│ │ ├── extraction.py # Natural language extraction
-│ │ └── ragbot.py # Workflow wrapper (370 lines)
-│ ├── .env # Configuration (ready to use)
-│ ├── .env.example # Template
-│ ├── .gitignore
-│ ├── requirements.txt # FastAPI dependencies
-│ ├── Dockerfile # Multi-stage build
-│ ├── docker-compose.yml # One-command deployment
-│ ├── README.md # 500+ lines documentation
-│ ├── QUICK_REFERENCE.md # Cheat sheet
-│ └── test_api.ps1 # Test suite
-│
-└── [Original RagBot files unchanged]
-```
-
----
-
-## 🎯 API Endpoints
-
-### 5 Endpoints Ready to Use:
-
-1. **GET /api/v1/health**
- - Check API status
- - Verify Ollama connection
- - Vector store status
-
-2. **GET /api/v1/biomarkers**
- - List all 24 supported biomarkers
- - Reference ranges
- - Clinical significance
-
-3. **POST /api/v1/analyze/natural**
- - Natural language input
- - LLM extraction
- - Full detailed analysis
-
-4. **POST /api/v1/analyze/structured**
- - Direct JSON biomarkers
- - Skip extraction
- - Full detailed analysis
-
-5. **GET /api/v1/example**
- - Pre-run diabetes case
- - Testing/demo
- - Same as CLI `example` command
-
----
-
-## 🚀 How to Run
-
-### Option 1: Local Development
-
-```powershell
-# From api/ directory
-cd C:\Users\admin\OneDrive\Documents\GitHub\RagBot\api
-
-# Install dependencies (first time only)
-pip install -r ../requirements.txt
-pip install -r requirements.txt
-
-# Start Ollama (in separate terminal)
-ollama serve
-
-# Start API
-python -m uvicorn app.main:app --reload --port 8000
-```
-
-**API will be at:** http://localhost:8000
-
-### Option 2: Docker (One Command)
-
-```powershell
-cd C:\Users\admin\OneDrive\Documents\GitHub\RagBot\api
-docker-compose up --build
-```
-
-**API will be at:** http://localhost:8000
-
----
-
-## ✅ Test Your API
-
-### Quick Test (PowerShell)
-```powershell
-.\test_api.ps1
-```
-
-This runs 6 tests:
-1. ✅ API online check
-2. ✅ Health check
-3. ✅ Biomarkers list
-4. ✅ Example endpoint
-5. ✅ Structured analysis
-6. ✅ Natural language analysis
-
-### Manual Test (cURL)
-```bash
-# Health check
-curl http://localhost:8000/api/v1/health
-
-# Get example
-curl http://localhost:8000/api/v1/example
-
-# Natural language analysis
-curl -X POST http://localhost:8000/api/v1/analyze/natural \
- -H "Content-Type: application/json" \
- -d "{\"message\": \"My glucose is 185 and HbA1c is 8.2\"}"
-```
-
----
-
-## 📖 Documentation
-
-Once running, visit:
-- **Swagger UI:** http://localhost:8000/docs
-- **ReDoc:** http://localhost:8000/redoc
-- **API Info:** http://localhost:8000/
-
----
-
-## 🎨 Response Format
-
-**Full Detailed Response Includes:**
-- ✅ Extracted biomarkers (if natural language)
-- ✅ Disease prediction with confidence
-- ✅ All biomarker flags (status, ranges, warnings)
-- ✅ Safety alerts (critical values)
-- ✅ Key drivers (why this prediction)
-- ✅ Disease explanation (pathophysiology, citations)
-- ✅ Recommendations (immediate actions, lifestyle, monitoring)
-- ✅ Confidence assessment (reliability, limitations)
-- ✅ All agent outputs (complete workflow detail)
-- ✅ Workflow metadata (SOP version, timestamps)
-- ✅ Conversational summary (human-friendly text)
-- ✅ Processing time
-
-**Nothing is hidden - full transparency!**
-
----
-
-## 🔌 Integration Examples
-
-### From Your Backend (Node.js)
-```javascript
-const axios = require('axios');
-
-async function analyzeBiomarkers(userInput) {
- const response = await axios.post('http://localhost:8000/api/v1/analyze/natural', {
- message: userInput,
- patient_context: {
- age: 52,
- gender: 'male'
- }
- });
-
- return response.data;
-}
-
-// Use it
-const result = await analyzeBiomarkers("My glucose is 185 and HbA1c is 8.2");
-console.log(result.prediction.disease); // "Diabetes"
-console.log(result.conversational_summary); // Full friendly text
-```
-
-### From Your Backend (Python)
-```python
-import requests
-
-def analyze_biomarkers(user_input):
- response = requests.post(
- 'http://localhost:8000/api/v1/analyze/natural',
- json={
- 'message': user_input,
- 'patient_context': {'age': 52, 'gender': 'male'}
- }
- )
- return response.json()
-
-# Use it
-result = analyze_biomarkers("My glucose is 185 and HbA1c is 8.2")
-print(result['prediction']['disease']) # Diabetes
-```
-
----
-
-## 🏗️ Architecture
-
-```
-┌─────────────────────────────────────────┐
-│ YOUR LAPTOP (MVP) │
-├─────────────────────────────────────────┤
-│ │
-│ ┌──────────┐ ┌────────────────┐ │
-│ │ Ollama │◄─────┤ FastAPI:8000 │ │
-│ │ :11434 │ │ │ │
-│ └──────────┘ └────────┬───────┘ │
-│ │ │
-│ ┌─────────▼────────┐ │
-│ │ RagBot Core │ │
-│ │ (imported pkg) │ │
-│ └──────────────────┘ │
-│ │
-└─────────────────────────────────────────┘
- ▲
- │ HTTP Requests (JSON)
- │
- ┌─────────┴─────────┐
- │ Your Backend │
- │ Server :3000 │
- └─────────┬─────────┘
- │
- ┌─────────▼─────────┐
- │ Your Frontend │
- │ (Website) │
- └───────────────────┘
-```
-
----
-
-## ⚙️ Key Features Implemented
-
-### 1. Natural Language Extraction ✅
-- Uses llama3.1:8b-instruct
-- Handles 30+ biomarker name variations
-- Extracts patient context (age, gender, BMI)
-
-### 2. Complete Workflow Integration ✅
-- Imports from existing RagBot
-- Zero changes to source code
-- All 6 agents execute
-- Full RAG retrieval
-
-### 3. Comprehensive Responses ✅
-- Every field from workflow preserved
-- Agent outputs included
-- Citations and evidence
-- Conversational summary generated
-
-### 4. Error Handling ✅
-- Validation errors (422)
-- Extraction failures (400)
-- Service unavailable (503)
-- Internal errors (500)
-- Detailed error messages
-
-### 5. CORS Support ✅
-- Allows all origins (MVP)
-- Configurable in .env
-- Ready for production lockdown
-
-### 6. Docker Ready ✅
-- Multi-stage build
-- Health checks
-- Volume mounts
-- Resource limits
-
----
-
-## 📊 Performance
-
-- **Startup:** 10-30 seconds (loads vector store)
-- **Analysis:** 3-10 seconds per request
-- **Concurrent:** Supported (FastAPI async)
-- **Memory:** ~2-4GB
-
----
-
-## 🔒 Security Notes
-
-**Current Setup (MVP):**
-- ✅ CORS: All origins allowed
-- ✅ Authentication: None
-- ✅ HTTPS: Not configured
-- ✅ Rate Limiting: Not implemented
-
-**For Production (TODO):**
-- 🔐 Restrict CORS to your domain
-- 🔐 Add API key authentication
-- 🔐 Enable HTTPS
-- 🔐 Implement rate limiting
-- 🔐 Add request logging
-
----
-
-## 🎓 Next Steps
-
-### 1. Start the API
-```powershell
-cd api
-python -m uvicorn app.main:app --reload --port 8000
-```
-
-### 2. Test It
-```powershell
-.\test_api.ps1
-```
-
-### 3. Integrate with Your Backend
-```javascript
-// Your backend makes requests to localhost:8000
-const result = await fetch('http://localhost:8000/api/v1/analyze/natural', {
- method: 'POST',
- headers: {'Content-Type': 'application/json'},
- body: JSON.stringify({message: userInput})
-});
-```
-
-### 4. Display Results on Frontend
-```javascript
-// Your frontend gets data from your backend
-// Display conversational_summary or build custom UI from analysis object
-```
-
----
-
-## 📚 Documentation Files
-
-1. **README.md** - Complete guide (500+ lines)
- - Quick start
- - All endpoints
- - Request/response examples
- - Deployment instructions
- - Troubleshooting
- - Integration examples
-
-2. **QUICK_REFERENCE.md** - Cheat sheet
- - Common commands
- - Code snippets
- - Quick fixes
-
-3. **Swagger UI** - Interactive docs
- - http://localhost:8000/docs
- - Try endpoints live
- - See all schemas
-
----
-
-## ✨ What Makes This Special
-
-1. **No Source Code Changes** ✅
- - RagBot repo untouched
- - Imports as package
- - Completely separate
-
-2. **Full Detail Preserved** ✅
- - Every agent output
- - All citations
- - Complete metadata
- - Nothing hidden
-
-3. **Natural Language + Structured** ✅
- - Both input methods
- - Automatic extraction
- - Or direct biomarkers
-
-4. **Production Ready** ✅
- - Error handling
- - Logging
- - Health checks
- - Docker support
-
-5. **Developer Friendly** ✅
- - Auto-generated docs
- - Type safety (Pydantic)
- - Hot reload
- - Test suite
-
----
-
-## 🎉 You're Ready!
-
-Everything is implemented and ready to use. Just:
-
-1. **Start Ollama:** `ollama serve`
-2. **Start API:** `python -m uvicorn app.main:app --reload --port 8000`
-3. **Test:** `.\test_api.ps1`
-4. **Integrate:** Make HTTP requests from your backend
-
-Your RagBot is now API-ready! 🚀
-
----
-
-## 🤝 Support
-
-- Check [README.md](README.md) for detailed docs
-- Check [QUICK_REFERENCE.md](QUICK_REFERENCE.md) for snippets
-- Visit http://localhost:8000/docs for interactive API docs
-- All code is well-commented
-
----
-
-**Built:** November 23, 2025
-**Status:** ✅ Production-Ready MVP
-**Lines of Code:** ~1,800 (API only)
-**Files Created:** 20
-**Time to Deploy:** 2 minutes with Docker
-
-🎊 **Congratulations! Your RAG-BOT is now web-ready!** 🎊
diff --git a/api/QUICK_REFERENCE.md b/api/QUICK_REFERENCE.md
index f3040e2eadfa0d7d9a6a02003913f38fdb9e8d27..7f6b030df7073ddd14548b3944bde56ed046f774 100644
--- a/api/QUICK_REFERENCE.md
+++ b/api/QUICK_REFERENCE.md
@@ -6,7 +6,7 @@
```powershell
# From api/ directory
cd C:\Users\admin\OneDrive\Documents\GitHub\RagBot\api
-python -m uvicorn app.main:app --reload --port 8000
+..\.\.venv\Scripts\python.exe -m uvicorn app.main:app --reload --port 8000
```
### Start API (Docker)
@@ -93,19 +93,18 @@ netstat -ano | findstr :8000
taskkill /PID /F
```
-### Ollama not connecting
+### LLM provider errors
```powershell
-# Check Ollama is running
-curl http://localhost:11434/api/version
-
-# Start Ollama if not running
-ollama serve
+# Check your .env has the right keys
+# Default provider is Groq (GROQ_API_KEY required)
+# Alternative: Google Gemini (GOOGLE_API_KEY)
+# Optional: Ollama (local, no key needed)
```
### Vector store not loading
```powershell
# From RagBot root
-python scripts/setup_embeddings.py
+.\.venv\Scripts\python.exe scripts/setup_embeddings.py
```
---
@@ -199,5 +198,5 @@ curl http://localhost:8000/api/v1/example
---
-**Last Updated:** 2025-11-23
+**Last Updated:** February 2026
**API Version:** 1.0.0
diff --git a/api/README.md b/api/README.md
index 323dec5cfa28b48681591efa2457459131c1e50d..ff7968e66b5704f21909befa984444e338bc2923 100644
--- a/api/README.md
+++ b/api/README.md
@@ -31,17 +31,11 @@ This API wraps the RagBot clinical analysis system, providing:
### Prerequisites
-1. **Ollama running locally**:
- ```bash
- ollama serve
- ```
-
-2. **Required models**:
- ```bash
- ollama pull llama3.1:8b-instruct
- ollama pull qwen2:7b
- ollama pull nomic-embed-text
- ```
+1. **Python 3.11+** installed
+2. **Free API key** from one of:
+ - [Groq](https://console.groq.com/keys) — Recommended (fast, free)
+ - [Google Gemini](https://aistudio.google.com/app/apikey) — Alternative
+3. **RagBot dependencies installed** (see root README)
### Option 1: Run Locally (Development)
@@ -53,8 +47,9 @@ cd api
pip install -r ../requirements.txt
pip install -r requirements.txt
-# Copy environment file
-cp .env.example .env
+# Ensure .env is configured in project root with your API keys
+# GROQ_API_KEY=gsk_...
+# LLM_PROVIDER=groq
# Run server
python -m uvicorn app.main:app --reload --port 8000
@@ -82,10 +77,10 @@ GET /api/v1/health
```json
{
"status": "healthy",
- "timestamp": "2025-11-23T10:30:00Z",
- "ollama_status": "connected",
+ "timestamp": "2026-02-23T10:30:00Z",
+ "llm_status": "connected",
"vector_store_loaded": true,
- "available_models": ["llama3.1:8b-instruct", "qwen2:7b"],
+ "available_models": ["llama-3.3-70b-versatile (Groq)"],
"uptime_seconds": 3600.0,
"version": "1.0.0"
}
@@ -406,10 +401,10 @@ api/
# Test health endpoint
curl http://localhost:8000/api/v1/health
-# Test example case (doesn't require Ollama extraction)
+# Test example case
curl http://localhost:8000/api/v1/example
-# Test natural language (requires Ollama)
+# Test natural language
curl -X POST http://localhost:8000/api/v1/analyze/natural \
-H "Content-Type: application/json" \
-d '{"message": "glucose 140, HbA1c 7.5"}'
@@ -427,17 +422,18 @@ uvicorn app.main:app --reload --port 8000
## 🔧 Troubleshooting
-### Issue: "Ollama connection failed"
+### Issue: "API key not found"
-**Symptom:** Health check shows `ollama_status: "disconnected"`
+**Symptom:** Health check shows `llm_status: "disconnected"`
**Solutions:**
-1. Start Ollama: `ollama serve`
-2. Check Ollama is running: `curl http://localhost:11434/api/version`
-3. Verify models are pulled:
+1. Ensure `.env` in project root has your API key:
```bash
- ollama list
+ GROQ_API_KEY=gsk_...
+ LLM_PROVIDER=groq
```
+2. Get a free key at https://console.groq.com/keys
+3. Restart the API server after editing `.env`
---
@@ -466,25 +462,30 @@ uvicorn app.main:app --reload --port 8000
---
-### Issue: Docker container can't reach Ollama
+### Issue: Docker container can't reach LLM API
**Symptom:** Container health check fails
**Solutions:**
+Ensure your API keys are passed as environment variables in `docker-compose.yml`:
+```yaml
+environment:
+ - GROQ_API_KEY=${GROQ_API_KEY}
+ - LLM_PROVIDER=groq
+```
+
+For local Ollama (optional):
+
**Windows/Mac (Docker Desktop):**
```yaml
-# In docker-compose.yml
environment:
- OLLAMA_BASE_URL=http://host.docker.internal:11434
```
**Linux:**
```yaml
-# In docker-compose.yml
network_mode: "host"
-environment:
- - OLLAMA_BASE_URL=http://localhost:11434
```
---
@@ -568,9 +569,9 @@ For issues or questions:
## 📊 Performance Notes
- **Initial startup:** 10-30 seconds (loads vector store)
-- **Analysis time:** 3-10 seconds per request
+- **Analysis time:** 15-25 seconds per request (6 agents + RAG retrieval)
- **Concurrent requests:** Supported (FastAPI async)
-- **Memory usage:** ~2-4GB (vector store + models)
+- **Memory usage:** ~2-4GB (vector store + embeddings model)
---
diff --git a/api/app/main.py b/api/app/main.py
index ba64bdaf757e06fba22540f2c082a2bb443c910b..dbe8e96d8b56175d6a3fdfa4e38c185700e86d36 100644
--- a/api/app/main.py
+++ b/api/app/main.py
@@ -38,25 +38,25 @@ async def lifespan(app: FastAPI):
Initializes RagBot service on startup (loads vector store, models).
"""
logger.info("=" * 70)
- logger.info("🚀 Starting RagBot API Server")
+ logger.info("Starting RagBot API Server")
logger.info("=" * 70)
# Startup: Initialize RagBot service
try:
ragbot_service = get_ragbot_service()
ragbot_service.initialize()
- logger.info("✅ RagBot service initialized successfully")
+ logger.info("RagBot service initialized successfully")
except Exception as e:
- logger.error(f"❌ Failed to initialize RagBot service: {e}")
- logger.warning("⚠️ API will start but health checks will fail")
+ logger.error(f"Failed to initialize RagBot service: {e}")
+ logger.warning("API will start but health checks will fail")
- logger.info("✅ API server ready to accept requests")
+ logger.info("API server ready to accept requests")
logger.info("=" * 70)
yield # Server runs here
# Shutdown
- logger.info("🛑 Shutting down RagBot API Server")
+ logger.info("Shutting down RagBot API Server")
# ============================================================================
diff --git a/api/app/routes/analyze.py b/api/app/routes/analyze.py
index 5b15184252e7fd8b5bc6726b652095943fce410f..f500bbfb549bc3a687efc3c2d7f21da8e5396c91 100644
--- a/api/app/routes/analyze.py
+++ b/api/app/routes/analyze.py
@@ -229,11 +229,11 @@ async def get_example():
"Platelets": 220000.0,
"Cholesterol": 235.0,
"Triglycerides": 210.0,
- "HDL": 38.0,
- "LDL": 165.0,
+ "HDL Cholesterol": 38.0,
+ "LDL Cholesterol": 165.0,
"BMI": 31.2,
- "Systolic BP": 142.0,
- "Diastolic BP": 88.0
+ "Systolic Blood Pressure": 142.0,
+ "Diastolic Blood Pressure": 88.0
}
patient_context = {
diff --git a/api/app/routes/biomarkers.py b/api/app/routes/biomarkers.py
index ebdf229721c2532920dbd5c208d8c2b54a584ded..15a63f5326d919c2b8a3e2dca766174454058e79 100644
--- a/api/app/routes/biomarkers.py
+++ b/api/app/routes/biomarkers.py
@@ -10,9 +10,6 @@ from fastapi import APIRouter, HTTPException
from app.models.schemas import BiomarkersListResponse, BiomarkerInfo, BiomarkerReferenceRange
-# Add parent to path
-sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
-
router = APIRouter(prefix="/api/v1", tags=["biomarkers"])
diff --git a/api/app/routes/health.py b/api/app/routes/health.py
index 0435b8c2222de8e7892a89e1f31477027d74d628..d151a18148ab309b6cbe538a4de086ce8e2c2e17 100644
--- a/api/app/routes/health.py
+++ b/api/app/routes/health.py
@@ -8,9 +8,6 @@ from pathlib import Path
from datetime import datetime
from fastapi import APIRouter, HTTPException
-# Add parent paths for imports
-sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
-
from app.models.schemas import HealthResponse
from app.services.ragbot import get_ragbot_service
from app import __version__
@@ -71,7 +68,7 @@ async def health_check():
return HealthResponse(
status=overall_status,
timestamp=datetime.now().isoformat(),
- ollama_status=llm_status, # Keep field name for backward compatibility
+ llm_status=llm_status,
vector_store_loaded=vector_store_loaded,
available_models=available_models,
uptime_seconds=ragbot_service.get_uptime_seconds(),
diff --git a/api/app/services/extraction.py b/api/app/services/extraction.py
index b6d315ed5cede2ba2b6b7598bb9ec149ac3f9dad..129af93a3ea85b4e3a8a3039c12e39756f3b688a 100644
--- a/api/app/services/extraction.py
+++ b/api/app/services/extraction.py
@@ -12,6 +12,7 @@ from typing import Dict, Any, Tuple
sys.path.insert(0, str(Path(__file__).parent.parent.parent.parent))
from langchain_core.prompts import ChatPromptTemplate
+from src.biomarker_normalization import normalize_biomarker_name
from src.llm_config import get_chat_model
@@ -48,96 +49,26 @@ If you cannot find any biomarkers, return {{"biomarkers": {{}}, "patient_context
# ============================================================================
-# BIOMARKER NAME NORMALIZATION
+# EXTRACTION HELPERS
# ============================================================================
-def normalize_biomarker_name(name: str) -> str:
- """
- Normalize biomarker names to standard format.
- Handles 30+ common variations (e.g., blood sugar -> Glucose)
-
- Args:
- name: Raw biomarker name from user input
-
- Returns:
- Standardized biomarker name
- """
- name_lower = name.lower().replace(" ", "").replace("-", "").replace("_", "")
-
- # Comprehensive mapping of variations to standard names
- mappings = {
- # Glucose variations
- "glucose": "Glucose",
- "bloodsugar": "Glucose",
- "bloodglucose": "Glucose",
-
- # Lipid panel
- "cholesterol": "Cholesterol",
- "totalcholesterol": "Cholesterol",
- "triglycerides": "Triglycerides",
- "trig": "Triglycerides",
- "ldl": "LDL",
- "ldlcholesterol": "LDL",
- "hdl": "HDL",
- "hdlcholesterol": "HDL",
-
- # Diabetes markers
- "hba1c": "HbA1c",
- "a1c": "HbA1c",
- "hemoglobina1c": "HbA1c",
- "insulin": "Insulin",
-
- # Body metrics
- "bmi": "BMI",
- "bodymassindex": "BMI",
-
- # Complete Blood Count (CBC)
- "hemoglobin": "Hemoglobin",
- "hgb": "Hemoglobin",
- "hb": "Hemoglobin",
- "platelets": "Platelets",
- "plt": "Platelets",
- "wbc": "WBC",
- "whitebloodcells": "WBC",
- "whitecells": "WBC",
- "rbc": "RBC",
- "redbloodcells": "RBC",
- "redcells": "RBC",
- "hematocrit": "Hematocrit",
- "hct": "Hematocrit",
-
- # Red blood cell indices
- "mcv": "MCV",
- "meancorpuscularvolume": "MCV",
- "mch": "MCH",
- "meancorpuscularhemoglobin": "MCH",
- "mchc": "MCHC",
-
- # Cardiovascular
- "heartrate": "Heart Rate",
- "hr": "Heart Rate",
- "pulse": "Heart Rate",
- "systolicbp": "Systolic BP",
- "systolic": "Systolic BP",
- "sbp": "Systolic BP",
- "diastolicbp": "Diastolic BP",
- "diastolic": "Diastolic BP",
- "dbp": "Diastolic BP",
- "troponin": "Troponin",
-
- # Inflammation and liver
- "creactiveprotein": "C-reactive Protein",
- "crp": "C-reactive Protein",
- "alt": "ALT",
- "alanineaminotransferase": "ALT",
- "ast": "AST",
- "aspartateaminotransferase": "AST",
-
- # Kidney
- "creatinine": "Creatinine",
- }
-
- return mappings.get(name_lower, name)
+def _parse_llm_json(content: str) -> Dict[str, Any]:
+ """Parse JSON payload from LLM output with fallback recovery."""
+ text = content.strip()
+
+ if "```json" in text:
+ text = text.split("```json")[1].split("```")[0].strip()
+ elif "```" in text:
+ text = text.split("```")[1].split("```")[0].strip()
+
+ try:
+ return json.loads(text)
+ except json.JSONDecodeError:
+ left = text.find("{")
+ right = text.rfind("}")
+ if left != -1 and right != -1 and right > left:
+ return json.loads(text[left:right + 1])
+ raise
# ============================================================================
@@ -177,13 +108,7 @@ def extract_biomarkers(
response = chain.invoke({"user_message": user_message})
content = response.content.strip()
- # Parse JSON from LLM response (handle markdown code blocks)
- if "```json" in content:
- content = content.split("```json")[1].split("```")[0].strip()
- elif "```" in content:
- content = content.split("```")[1].split("```")[0].strip()
-
- extracted = json.loads(content)
+ extracted = _parse_llm_json(content)
biomarkers = extracted.get("biomarkers", {})
patient_context = extracted.get("patient_context", {})
@@ -235,63 +160,73 @@ def predict_disease_simple(biomarkers: Dict[str, float]) -> Dict[str, Any]:
"Thalassemia": 0.0
}
+ # Helper: check both abbreviated and normalized biomarker names
+ # Returns None when biomarker is not present (avoids false triggers)
+ def _get(name, *alt_names):
+ val = biomarkers.get(name, None)
+ if val is not None:
+ return val
+ for alt in alt_names:
+ val = biomarkers.get(alt, None)
+ if val is not None:
+ return val
+ return None
+
# Diabetes indicators
- glucose = biomarkers.get("Glucose", 0)
- hba1c = biomarkers.get("HbA1c", 0)
- if glucose > 126:
+ glucose = _get("Glucose")
+ hba1c = _get("HbA1c")
+ if glucose is not None and glucose > 126:
scores["Diabetes"] += 0.4
- if glucose > 180:
+ if glucose is not None and glucose > 180:
scores["Diabetes"] += 0.2
- if hba1c >= 6.5:
+ if hba1c is not None and hba1c >= 6.5:
scores["Diabetes"] += 0.5
# Anemia indicators
- hemoglobin = biomarkers.get("Hemoglobin", 0)
- mcv = biomarkers.get("MCV", 0)
- if hemoglobin < 12.0:
+ hemoglobin = _get("Hemoglobin")
+ mcv = _get("Mean Corpuscular Volume", "MCV")
+ if hemoglobin is not None and hemoglobin < 12.0:
scores["Anemia"] += 0.6
- if hemoglobin < 10.0:
+ if hemoglobin is not None and hemoglobin < 10.0:
scores["Anemia"] += 0.2
- if mcv < 80:
+ if mcv is not None and mcv < 80:
scores["Anemia"] += 0.2
# Heart disease indicators
- cholesterol = biomarkers.get("Cholesterol", 0)
- troponin = biomarkers.get("Troponin", 0)
- ldl = biomarkers.get("LDL", 0)
- if cholesterol > 240:
+ cholesterol = _get("Cholesterol")
+ troponin = _get("Troponin")
+ ldl = _get("LDL Cholesterol", "LDL")
+ if cholesterol is not None and cholesterol > 240:
scores["Heart Disease"] += 0.3
- if troponin > 0.04:
+ if troponin is not None and troponin > 0.04:
scores["Heart Disease"] += 0.6
- if ldl > 190:
+ if ldl is not None and ldl > 190:
scores["Heart Disease"] += 0.2
# Thrombocytopenia indicators
- platelets = biomarkers.get("Platelets", 0)
- if platelets < 150000:
+ platelets = _get("Platelets")
+ if platelets is not None and platelets < 150000:
scores["Thrombocytopenia"] += 0.6
- if platelets < 50000:
+ if platelets is not None and platelets < 50000:
scores["Thrombocytopenia"] += 0.3
# Thalassemia indicators (simplified)
- if mcv < 80 and hemoglobin < 12.0:
+ if mcv is not None and hemoglobin is not None and mcv < 80 and hemoglobin < 12.0:
scores["Thalassemia"] += 0.4
# Find top prediction
top_disease = max(scores, key=scores.get)
- confidence = scores[top_disease]
-
- # Ensure minimum confidence
- if confidence < 0.5:
- confidence = 0.5
- top_disease = "Diabetes" # Default
+ confidence = min(scores[top_disease], 1.0) # Cap at 1.0 for Pydantic validation
+
+ if confidence == 0.0:
+ top_disease = "Undetermined"
# Normalize probabilities to sum to 1.0
total = sum(scores.values())
if total > 0:
- probabilities = {k: v/total for k, v in scores.items()}
+ probabilities = {k: v / total for k, v in scores.items()}
else:
- probabilities = {k: 1.0/len(scores) for k in scores}
+ probabilities = {k: 1.0 / len(scores) for k in scores}
return {
"disease": top_disease,
diff --git a/api/app/services/ragbot.py b/api/app/services/ragbot.py
index 86d8270f2b3dd18cc4663a0f5552f6d79eac5dd9..a5e9250ea9657dde46536fb15ea79845aca8c70b 100644
--- a/api/app/services/ragbot.py
+++ b/api/app/services/ragbot.py
@@ -39,7 +39,7 @@ class RagBotService:
if self.initialized:
return
- print("🔧 Initializing RagBot workflow...")
+ print("INFO: Initializing RagBot workflow...")
start_time = time.time()
# Save current directory
@@ -51,17 +51,17 @@ class RagBotService:
# This ensures vector store paths resolve correctly
ragbot_root = Path(__file__).parent.parent.parent.parent
os.chdir(ragbot_root)
- print(f"📂 Working directory: {ragbot_root}")
+ print(f"INFO: Working directory: {ragbot_root}")
self.guild = create_guild()
self.initialized = True
self.init_time = datetime.now()
elapsed = (time.time() - start_time) * 1000
- print(f"✅ RagBot initialized successfully ({elapsed:.0f}ms)")
+ print(f"OK: RagBot initialized successfully ({elapsed:.0f}ms)")
except Exception as e:
- print(f"❌ Failed to initialize RagBot: {e}")
+ print(f"ERROR: Failed to initialize RagBot: {e}")
raise
finally:
@@ -132,7 +132,7 @@ class RagBotService:
except Exception as e:
# Re-raise with context
- raise RuntimeError(f"Analysis failed: {str(e)}") from e
+ raise RuntimeError(f"Analysis failed during workflow execution: {str(e)}") from e
def _format_response(
self,
@@ -147,8 +147,18 @@ class RagBotService:
"""
Format complete detailed response from workflow result.
Preserves ALL data from workflow execution.
+
+ workflow_result is now the full LangGraph state dict containing:
+ - final_response: dict from response_synthesizer
+ - agent_outputs: list of AgentOutput objects
+ - biomarker_flags: list of BiomarkerFlag objects
+ - safety_alerts: list of SafetyAlert objects
+ - sop_version, processing_timestamp, etc.
"""
+ # The synthesizer output is nested inside final_response
+ final_response = workflow_result.get("final_response", {}) or {}
+
# Extract main prediction
prediction = Prediction(
disease=model_prediction["disease"],
@@ -156,35 +166,68 @@ class RagBotService:
probabilities=model_prediction.get("probabilities", {})
)
- # Extract biomarker flags
- biomarker_flags = [
- BiomarkerFlag(**flag)
- for flag in workflow_result.get("biomarker_flags", [])
- ]
-
- # Extract safety alerts
- safety_alerts = [
- SafetyAlert(**alert)
- for alert in workflow_result.get("safety_alerts", [])
- ]
-
- # Extract key drivers
- key_drivers_data = workflow_result.get("key_drivers", [])
+ # Biomarker flags: prefer state-level data (BiomarkerFlag objects from validator),
+ # fall back to synthesizer output
+ state_flags = workflow_result.get("biomarker_flags", [])
+ if state_flags:
+ biomarker_flags = []
+ for flag in state_flags:
+ if hasattr(flag, 'model_dump'):
+ biomarker_flags.append(BiomarkerFlag(**flag.model_dump()))
+ elif isinstance(flag, dict):
+ biomarker_flags.append(BiomarkerFlag(**flag))
+ else:
+ biomarker_flags_source = final_response.get("biomarker_flags", [])
+ if not biomarker_flags_source:
+ biomarker_flags_source = final_response.get("analysis", {}).get("biomarker_flags", [])
+ biomarker_flags = [
+ BiomarkerFlag(**flag) if isinstance(flag, dict) else BiomarkerFlag(**flag.model_dump())
+ for flag in biomarker_flags_source
+ ]
+
+ # Safety alerts: prefer state-level data, fall back to synthesizer
+ state_alerts = workflow_result.get("safety_alerts", [])
+ if state_alerts:
+ safety_alerts = []
+ for alert in state_alerts:
+ if hasattr(alert, 'model_dump'):
+ safety_alerts.append(SafetyAlert(**alert.model_dump()))
+ elif isinstance(alert, dict):
+ safety_alerts.append(SafetyAlert(**alert))
+ else:
+ safety_alerts_source = final_response.get("safety_alerts", [])
+ if not safety_alerts_source:
+ safety_alerts_source = final_response.get("analysis", {}).get("safety_alerts", [])
+ safety_alerts = [
+ SafetyAlert(**alert) if isinstance(alert, dict) else SafetyAlert(**alert.model_dump())
+ for alert in safety_alerts_source
+ ]
+
+ # Extract key drivers from synthesizer output
+ key_drivers_data = final_response.get("key_drivers", [])
+ if not key_drivers_data:
+ key_drivers_data = final_response.get("analysis", {}).get("key_drivers", [])
key_drivers = []
for driver in key_drivers_data:
if isinstance(driver, dict):
key_drivers.append(KeyDriver(**driver))
- # Disease explanation
- disease_exp_data = workflow_result.get("disease_explanation", {})
+ # Disease explanation from synthesizer
+ disease_exp_data = final_response.get("disease_explanation", {})
+ if not disease_exp_data:
+ disease_exp_data = final_response.get("analysis", {}).get("disease_explanation", {})
disease_explanation = DiseaseExplanation(
pathophysiology=disease_exp_data.get("pathophysiology", ""),
citations=disease_exp_data.get("citations", []),
retrieved_chunks=disease_exp_data.get("retrieved_chunks")
)
- # Recommendations
- recs_data = workflow_result.get("recommendations", {})
+ # Recommendations from synthesizer
+ recs_data = final_response.get("recommendations", {})
+ if not recs_data:
+ recs_data = final_response.get("clinical_recommendations", {})
+ if not recs_data:
+ recs_data = final_response.get("analysis", {}).get("recommendations", {})
recommendations = Recommendations(
immediate_actions=recs_data.get("immediate_actions", []),
lifestyle_changes=recs_data.get("lifestyle_changes", []),
@@ -192,8 +235,10 @@ class RagBotService:
follow_up=recs_data.get("follow_up")
)
- # Confidence assessment
- conf_data = workflow_result.get("confidence_assessment", {})
+ # Confidence assessment from synthesizer
+ conf_data = final_response.get("confidence_assessment", {})
+ if not conf_data:
+ conf_data = final_response.get("analysis", {}).get("confidence_assessment", {})
confidence_assessment = ConfidenceAssessment(
prediction_reliability=conf_data.get("prediction_reliability", "UNKNOWN"),
evidence_strength=conf_data.get("evidence_strength", "UNKNOWN"),
@@ -202,7 +247,9 @@ class RagBotService:
)
# Alternative diagnoses
- alternative_diagnoses = workflow_result.get("alternative_diagnoses")
+ alternative_diagnoses = final_response.get("alternative_diagnoses")
+ if alternative_diagnoses is None:
+ alternative_diagnoses = final_response.get("analysis", {}).get("alternative_diagnoses")
# Assemble complete analysis
analysis = Analysis(
@@ -215,11 +262,13 @@ class RagBotService:
alternative_diagnoses=alternative_diagnoses
)
- # Agent outputs (preserve full detail)
+ # Agent outputs from state (these are src.state.AgentOutput objects)
agent_outputs_data = workflow_result.get("agent_outputs", [])
agent_outputs = []
for agent_out in agent_outputs_data:
- if isinstance(agent_out, dict):
+ if hasattr(agent_out, 'model_dump'):
+ agent_outputs.append(AgentOutput(**agent_out.model_dump()))
+ elif isinstance(agent_out, dict):
agent_outputs.append(AgentOutput(**agent_out))
# Workflow metadata
@@ -231,7 +280,9 @@ class RagBotService:
}
# Conversational summary (if available)
- conversational_summary = workflow_result.get("conversational_summary")
+ conversational_summary = final_response.get("conversational_summary")
+ if not conversational_summary:
+ conversational_summary = final_response.get("patient_summary", {}).get("narrative")
# Generate conversational summary if not present
if not conversational_summary:
@@ -271,34 +322,33 @@ class RagBotService:
"""Generate a simple conversational summary"""
summary_parts = []
- summary_parts.append("Hi there! 👋\n")
+ summary_parts.append("Hi there!\n")
summary_parts.append("Based on your biomarkers, I analyzed your results.\n")
# Prediction
- confidence_emoji = "🔴" if prediction.confidence > 0.7 else "🟡"
- summary_parts.append(f"\n{confidence_emoji} **Primary Finding:** {prediction.disease}")
+ summary_parts.append(f"\nPrimary Finding: {prediction.disease}")
summary_parts.append(f" Confidence: {prediction.confidence:.0%}\n")
# Safety alerts
if safety_alerts:
- summary_parts.append("\n⚠️ **IMPORTANT SAFETY ALERTS:**")
+ summary_parts.append("\nIMPORTANT SAFETY ALERTS:")
for alert in safety_alerts[:3]: # Top 3
- summary_parts.append(f" • {alert.biomarker}: {alert.message}")
- summary_parts.append(f" → {alert.action}")
+ summary_parts.append(f" - {alert.biomarker}: {alert.message}")
+ summary_parts.append(f" Action: {alert.action}")
# Key drivers
if key_drivers:
- summary_parts.append("\n🔍 **Why this prediction?**")
+ summary_parts.append("\nWhy this prediction?")
for driver in key_drivers[:3]: # Top 3
- summary_parts.append(f" • **{driver.biomarker}** ({driver.value}): {driver.explanation[:100]}...")
+ summary_parts.append(f" - {driver.biomarker} ({driver.value}): {driver.explanation[:100]}...")
# Recommendations
if recommendations.immediate_actions:
- summary_parts.append("\n✅ **What You Should Do:**")
+ summary_parts.append("\nWhat You Should Do:")
for i, action in enumerate(recommendations.immediate_actions[:3], 1):
summary_parts.append(f" {i}. {action}")
- summary_parts.append("\nℹ️ **Important:** This is an AI-assisted analysis, NOT medical advice.")
+ summary_parts.append("\nImportant: This is an AI-assisted analysis, NOT medical advice.")
summary_parts.append(" Please consult a healthcare professional for proper diagnosis and treatment.")
return "\n".join(summary_parts)
diff --git a/config/biomarker_references.json b/config/biomarker_references.json
index 480a92f44a252253a4e677aef560803165e86752..0bbf8a08a129450308c6b981ccda6a581f0d3b78 100644
--- a/config/biomarker_references.json
+++ b/config/biomarker_references.json
@@ -3,8 +3,8 @@
"Glucose": {
"unit": "mg/dL",
"normal_range": {"min": 70, "max": 100},
- "critical_low": 70,
- "critical_high": 126,
+ "critical_low": 54,
+ "critical_high": 400,
"type": "fasting",
"gender_specific": false,
"description": "Fasting blood glucose level",
@@ -142,8 +142,8 @@
"BMI": {
"unit": "kg/m²",
"normal_range": {"min": 18.5, "max": 24.9},
- "critical_low": 18.5,
- "critical_high": 30,
+ "critical_low": 15,
+ "critical_high": 50,
"gender_specific": false,
"description": "Body Mass Index",
"clinical_significance": {
@@ -154,8 +154,8 @@
"Systolic Blood Pressure": {
"unit": "mmHg",
"normal_range": {"min": 90, "max": 120},
- "critical_low": 90,
- "critical_high": 140,
+ "critical_low": 70,
+ "critical_high": 180,
"gender_specific": false,
"description": "Blood pressure during heart contraction",
"clinical_significance": {
@@ -166,8 +166,8 @@
"Diastolic Blood Pressure": {
"unit": "mmHg",
"normal_range": {"min": 60, "max": 80},
- "critical_low": 60,
- "critical_high": 90,
+ "critical_low": 40,
+ "critical_high": 120,
"gender_specific": false,
"description": "Blood pressure during heart relaxation",
"clinical_significance": {
@@ -190,7 +190,7 @@
"unit": "%",
"normal_range": {"min": 0, "max": 5.7},
"critical_low": null,
- "critical_high": 6.5,
+ "critical_high": 14,
"gender_specific": false,
"description": "3-month average blood glucose",
"clinical_significance": {
@@ -274,7 +274,7 @@
"unit": "ng/mL",
"normal_range": {"min": 0, "max": 0.04},
"critical_low": null,
- "critical_high": 0.04,
+ "critical_high": 0.4,
"gender_specific": false,
"description": "Cardiac muscle damage marker",
"clinical_significance": {
diff --git a/data/chat_reports/report_Diabetes_20260223_124903.json b/data/chat_reports/report_Diabetes_20260223_124903.json
new file mode 100644
index 0000000000000000000000000000000000000000..ea5398e1333690f8dd7e6c7980bbf35aeca0d875
--- /dev/null
+++ b/data/chat_reports/report_Diabetes_20260223_124903.json
@@ -0,0 +1,322 @@
+{
+ "timestamp": "20260223_124903",
+ "biomarkers_input": {
+ "Glucose": 140.0,
+ "HbA1c": 7.5
+ },
+ "final_response": {
+ "patient_summary": {
+ "total_biomarkers_tested": 2,
+ "biomarkers_in_normal_range": 0,
+ "biomarkers_out_of_range": 2,
+ "critical_values": 0,
+ "overall_risk_profile": "The patient's biomarker results indicate a high risk profile for diabetes, with both glucose and HbA1c levels exceeding normal ranges. The most concerning findings are the elevated glucose level of 140.0 mg/dL and HbA1c level of 7.5%, which suggest impaired glucose regulation. These results align with the predicted disease of diabetes, supporting the likelihood of an underlying diabetic condition.",
+ "narrative": "Based on your test results, it's likely that you may have diabetes, with our system showing an 85% confidence level in this prediction. Your glucose and HbA1c levels, which are important indicators of blood sugar control, are higher than normal, suggesting that your body may be having trouble regulating its blood sugar levels. I want to emphasize that it's essential to discuss these results with your doctor, who can provide a definitive diagnosis and guidance on the best course of action. Please know that while these results may be concerning, many people with diabetes are able to manage their condition and lead healthy, active lives with the right treatment and support."
+ },
+ "prediction_explanation": {
+ "primary_disease": "Diabetes",
+ "confidence": 0.85,
+ "key_drivers": [
+ {
+ "biomarker": "Glucose",
+ "value": 140.0,
+ "contribution": "31%",
+ "explanation": "Your glucose level is 140.0 mg/dL, which is higher than normal, indicating that you may have hyperglycemia, a condition where there is too much sugar in the blood, which is a key characteristic of diabetes. This result suggests that you may be at risk for diabetes or may already have the condition, and further evaluation and management may be necessary to prevent complications.",
+ "evidence": "3 Prevention and management \nof complications of diabetes \nAcute complications of diabetes\nTwo important acute complications are hypoglycaemia and hyperglycaemic \nemergencies. Hypoglycaemia\nHypoglycae"
+ },
+ {
+ "biomarker": "HbA1c",
+ "value": 7.5,
+ "contribution": "31%",
+ "explanation": "Your HbA1c result of 7.5% is higher than the target level of 7%, which may indicate that your blood sugar levels are not well-controlled, suggesting a possible diagnosis of Type 2 Diabetes. This means that your body may not be producing or using insulin properly, leading to elevated blood glucose levels, and your doctor may use this result as part of a comprehensive evaluation to determine the best course of treatment.",
+ "evidence": "Diabetes (Type 2) \u2014 Extensive RAG Reference\nGenerated for MediGuard AI RAG-Helper \u007f 2025-11-22\n1. What diabetes is (focused on Type 2)\nDiabetes mellitus is a chronic metabolic disease characterized by"
+ }
+ ],
+ "mechanism_summary": "",
+ "pathophysiology": "Diabetes mellitus is a group of metabolic disorders characterized by the presence of hyperglycemia due to defects in insulin secretion, insulin action, or both. The underlying biological mechanisms involve impaired insulin secretion, insulin resistance, or a combination of both, leading to elevated blood glucose levels. This can result from various factors, including genetic predisposition, autoimmune destruction of beta-cells, infection-related beta-cell destruction, and other rare immune-mediated diseases. The persistent hyperglycemia can damage blood vessels and nerves, increasing the risk of cardiovascular disease, kidney failure, vision loss, and neuropathy.\n",
+ "pdf_references": [
+ "diabetes.pdf (Page 8)",
+ "diabetes.pdf (Page 4)",
+ "diabetes.pdf (Page 11)",
+ "MediGuard_Diabetes_Guidelines_Extensive.pdf (Page 0)",
+ "diabetes.pdf (Page 10)"
+ ]
+ },
+ "confidence_assessment": {
+ "prediction_reliability": "MODERATE",
+ "evidence_strength": "MODERATE",
+ "limitations": [
+ "Missing data: 22 biomarker(s) not provided",
+ "Multiple critical values detected; professional evaluation essential"
+ ],
+ "recommendation": "Moderate confidence prediction. Medical consultation recommended for professional evaluation and additional testing if needed.",
+ "assessment_summary": "The overall reliability of this prediction is moderate, with an 85% confidence level from the ML model, indicating a reasonable likelihood of diabetes but also some degree of uncertainty. Key limitations, including two identified, suggest that while the evidence strength is moderate, there are potential weaknesses in the assessment that could impact accuracy. Therefore, it is essential to consult a professional medical practitioner to confirm the diagnosis and develop an appropriate treatment plan, as patient safety and accurate diagnosis are paramount.",
+ "alternative_diagnoses": [
+ {
+ "disease": "Anemia",
+ "probability": 0.08,
+ "note": "Consider discussing with healthcare provider"
+ }
+ ]
+ },
+ "safety_alerts": [
+ {
+ "severity": "MEDIUM",
+ "biomarker": "Glucose",
+ "message": "Glucose is 140.0 mg/dL, above normal range (70-100 mg/dL). Hyperglycemia - diabetes risk, requires further testing",
+ "action": "Consult with healthcare provider"
+ },
+ {
+ "severity": "MEDIUM",
+ "biomarker": "HbA1c",
+ "message": "HbA1c is 7.5 %, above normal range (0-5.7 %). Diabetes (\u00e2\u2030\u00a56.5%), Prediabetes (5.7-6.4%)",
+ "action": "Consult with healthcare provider"
+ }
+ ],
+ "metadata": {
+ "timestamp": "2026-02-23T12:46:39.146732",
+ "system_version": "MediGuard AI RAG-Helper v1.0",
+ "sop_version": "Baseline",
+ "agents_executed": [
+ "Biomarker Analyzer",
+ "Biomarker-Disease Linker",
+ "Clinical Guidelines",
+ "Disease Explainer",
+ "Confidence Assessor"
+ ],
+ "disclaimer": "This is an AI-assisted analysis tool for patient self-assessment. It is NOT a substitute for professional medical advice, diagnosis, or treatment. Always consult qualified healthcare providers for medical decisions."
+ },
+ "biomarker_flags": [
+ {
+ "name": "Glucose",
+ "value": 140.0,
+ "unit": "mg/dL",
+ "status": "HIGH",
+ "reference_range": "70-100 mg/dL",
+ "warning": "Glucose is 140.0 mg/dL, above normal range (70-100 mg/dL). Hyperglycemia - diabetes risk, requires further testing"
+ },
+ {
+ "name": "HbA1c",
+ "value": 7.5,
+ "unit": "%",
+ "status": "HIGH",
+ "reference_range": "0-5.7 %",
+ "warning": "HbA1c is 7.5 %, above normal range (0-5.7 %). Diabetes (\u00e2\u2030\u00a56.5%), Prediabetes (5.7-6.4%)"
+ }
+ ],
+ "key_drivers": [
+ {
+ "biomarker": "Glucose",
+ "value": 140.0,
+ "contribution": "31%",
+ "explanation": "Your glucose level is 140.0 mg/dL, which is higher than normal, indicating that you may have hyperglycemia, a condition where there is too much sugar in the blood, which is a key characteristic of diabetes. This result suggests that you may be at risk for diabetes or may already have the condition, and further evaluation and management may be necessary to prevent complications.",
+ "evidence": "3 Prevention and management \nof complications of diabetes \nAcute complications of diabetes\nTwo important acute complications are hypoglycaemia and hyperglycaemic \nemergencies. Hypoglycaemia\nHypoglycaemia (abnormally low blood glucose) is a frequent iatrogenic \ncomplication in diabetic patients, occurring particularly in patients receiving \nsulfonylurea or insulin. Introduction\nDefinition of diabetes\nDiabetes mellitus, commonly known as diabetes, is a group of metabolic disorders \ncharacterized b"
+ },
+ {
+ "biomarker": "HbA1c",
+ "value": 7.5,
+ "contribution": "31%",
+ "explanation": "Your HbA1c result of 7.5% is higher than the target level of 7%, which may indicate that your blood sugar levels are not well-controlled, suggesting a possible diagnosis of Type 2 Diabetes. This means that your body may not be producing or using insulin properly, leading to elevated blood glucose levels, and your doctor may use this result as part of a comprehensive evaluation to determine the best course of treatment.",
+ "evidence": "Diabetes (Type 2) \u2014 Extensive RAG Reference\nGenerated for MediGuard AI RAG-Helper \u007f 2025-11-22\n1. What diabetes is (focused on Type 2)\nDiabetes mellitus is a chronic metabolic disease characterized by elevated blood glucose due to impaired\ninsulin secretion, insulin action, or both. \u2022 The majority of patients can be expected to aim for an HbA1c of 7."
+ }
+ ],
+ "disease_explanation": {
+ "pathophysiology": "Diabetes mellitus is a group of metabolic disorders characterized by the presence of hyperglycemia due to defects in insulin secretion, insulin action, or both. The underlying biological mechanisms involve impaired insulin secretion, insulin resistance, or a combination of both, leading to elevated blood glucose levels. This can result from various factors, including genetic predisposition, autoimmune destruction of beta-cells, infection-related beta-cell destruction, and other rare immune-mediated diseases. The persistent hyperglycemia can damage blood vessels and nerves, increasing the risk of cardiovascular disease, kidney failure, vision loss, and neuropathy.\n",
+ "citations": [
+ "diabetes.pdf (Page 8)",
+ "diabetes.pdf (Page 4)",
+ "diabetes.pdf (Page 11)",
+ "MediGuard_Diabetes_Guidelines_Extensive.pdf (Page 0)",
+ "diabetes.pdf (Page 10)"
+ ],
+ "retrieved_chunks": null
+ },
+ "recommendations": {
+ "immediate_actions": [
+ "Consult a healthcare professional** as soon as possible for a comprehensive diagnosis and to discuss treatment options.",
+ "Monitor blood glucose levels** frequently, as advised by your healthcare provider, to understand patterns and the impact of any interventions.",
+ "Stay hydrated** by drinking plenty of water to help your body absorb glucose."
+ ],
+ "lifestyle_changes": [
+ "Exercise:** Engage in at least 150 minutes of moderate-intensity aerobic exercise, or 75 minutes of vigorous-intensity aerobic exercise, or a combination of both, per week. Additionally, incorporate strength-training activities at least twice a week.",
+ "Stress Management:** Practice stress-reducing techniques such as meditation, yoga, or deep breathing exercises."
+ ],
+ "monitoring": [
+ "Blood Glucose:** Monitor your blood glucose levels as advised by your healthcare provider, typically before meals and at bedtime.",
+ "HbA1c:** Have your HbA1c levels checked at least twice a year to assess your average blood glucose control over the past 2-3 months.",
+ "Blood Pressure and Lipids:** Regularly check your blood pressure and lipid profiles, as diabetes increases the risk of cardiovascular diseases.",
+ "Foot Care:** Daily inspect your feet for any signs of injury or infection, and have a comprehensive foot exam by a healthcare professional at least once a year.",
+ "Remember:** These recommendations are based on general guidelines and may need to be tailored to your specific situation by a healthcare professional. Always consult with your doctor or a qualified healthcare provider for personalized advice on managing diabetes."
+ ],
+ "guideline_citations": [
+ "diabetes.pdf"
+ ]
+ },
+ "clinical_recommendations": {
+ "immediate_actions": [
+ "Consult a healthcare professional** as soon as possible for a comprehensive diagnosis and to discuss treatment options.",
+ "Monitor blood glucose levels** frequently, as advised by your healthcare provider, to understand patterns and the impact of any interventions.",
+ "Stay hydrated** by drinking plenty of water to help your body absorb glucose."
+ ],
+ "lifestyle_changes": [
+ "Exercise:** Engage in at least 150 minutes of moderate-intensity aerobic exercise, or 75 minutes of vigorous-intensity aerobic exercise, or a combination of both, per week. Additionally, incorporate strength-training activities at least twice a week.",
+ "Stress Management:** Practice stress-reducing techniques such as meditation, yoga, or deep breathing exercises."
+ ],
+ "monitoring": [
+ "Blood Glucose:** Monitor your blood glucose levels as advised by your healthcare provider, typically before meals and at bedtime.",
+ "HbA1c:** Have your HbA1c levels checked at least twice a year to assess your average blood glucose control over the past 2-3 months.",
+ "Blood Pressure and Lipids:** Regularly check your blood pressure and lipid profiles, as diabetes increases the risk of cardiovascular diseases.",
+ "Foot Care:** Daily inspect your feet for any signs of injury or infection, and have a comprehensive foot exam by a healthcare professional at least once a year.",
+ "Remember:** These recommendations are based on general guidelines and may need to be tailored to your specific situation by a healthcare professional. Always consult with your doctor or a qualified healthcare provider for personalized advice on managing diabetes."
+ ],
+ "guideline_citations": [
+ "diabetes.pdf"
+ ]
+ },
+ "alternative_diagnoses": [
+ {
+ "disease": "Anemia",
+ "probability": 0.08,
+ "note": "Consider discussing with healthcare provider"
+ }
+ ],
+ "analysis": {
+ "biomarker_flags": [
+ {
+ "name": "Glucose",
+ "value": 140.0,
+ "unit": "mg/dL",
+ "status": "HIGH",
+ "reference_range": "70-100 mg/dL",
+ "warning": "Glucose is 140.0 mg/dL, above normal range (70-100 mg/dL). Hyperglycemia - diabetes risk, requires further testing"
+ },
+ {
+ "name": "HbA1c",
+ "value": 7.5,
+ "unit": "%",
+ "status": "HIGH",
+ "reference_range": "0-5.7 %",
+ "warning": "HbA1c is 7.5 %, above normal range (0-5.7 %). Diabetes (\u00e2\u2030\u00a56.5%), Prediabetes (5.7-6.4%)"
+ }
+ ],
+ "safety_alerts": [
+ {
+ "severity": "MEDIUM",
+ "biomarker": "Glucose",
+ "message": "Glucose is 140.0 mg/dL, above normal range (70-100 mg/dL). Hyperglycemia - diabetes risk, requires further testing",
+ "action": "Consult with healthcare provider"
+ },
+ {
+ "severity": "MEDIUM",
+ "biomarker": "HbA1c",
+ "message": "HbA1c is 7.5 %, above normal range (0-5.7 %). Diabetes (\u00e2\u2030\u00a56.5%), Prediabetes (5.7-6.4%)",
+ "action": "Consult with healthcare provider"
+ }
+ ],
+ "key_drivers": [
+ {
+ "biomarker": "Glucose",
+ "value": 140.0,
+ "contribution": "31%",
+ "explanation": "Your glucose level is 140.0 mg/dL, which is higher than normal, indicating that you may have hyperglycemia, a condition where there is too much sugar in the blood, which is a key characteristic of diabetes. This result suggests that you may be at risk for diabetes or may already have the condition, and further evaluation and management may be necessary to prevent complications.",
+ "evidence": "3 Prevention and management \nof complications of diabetes \nAcute complications of diabetes\nTwo important acute complications are hypoglycaemia and hyperglycaemic \nemergencies. Hypoglycaemia\nHypoglycaemia (abnormally low blood glucose) is a frequent iatrogenic \ncomplication in diabetic patients, occurring particularly in patients receiving \nsulfonylurea or insulin. Introduction\nDefinition of diabetes\nDiabetes mellitus, commonly known as diabetes, is a group of metabolic disorders \ncharacterized b"
+ },
+ {
+ "biomarker": "HbA1c",
+ "value": 7.5,
+ "contribution": "31%",
+ "explanation": "Your HbA1c result of 7.5% is higher than the target level of 7%, which may indicate that your blood sugar levels are not well-controlled, suggesting a possible diagnosis of Type 2 Diabetes. This means that your body may not be producing or using insulin properly, leading to elevated blood glucose levels, and your doctor may use this result as part of a comprehensive evaluation to determine the best course of treatment.",
+ "evidence": "Diabetes (Type 2) \u2014 Extensive RAG Reference\nGenerated for MediGuard AI RAG-Helper \u007f 2025-11-22\n1. What diabetes is (focused on Type 2)\nDiabetes mellitus is a chronic metabolic disease characterized by elevated blood glucose due to impaired\ninsulin secretion, insulin action, or both. \u2022 The majority of patients can be expected to aim for an HbA1c of 7."
+ }
+ ],
+ "disease_explanation": {
+ "pathophysiology": "Diabetes mellitus is a group of metabolic disorders characterized by the presence of hyperglycemia due to defects in insulin secretion, insulin action, or both. The underlying biological mechanisms involve impaired insulin secretion, insulin resistance, or a combination of both, leading to elevated blood glucose levels. This can result from various factors, including genetic predisposition, autoimmune destruction of beta-cells, infection-related beta-cell destruction, and other rare immune-mediated diseases. The persistent hyperglycemia can damage blood vessels and nerves, increasing the risk of cardiovascular disease, kidney failure, vision loss, and neuropathy.\n",
+ "citations": [
+ "diabetes.pdf (Page 8)",
+ "diabetes.pdf (Page 4)",
+ "diabetes.pdf (Page 11)",
+ "MediGuard_Diabetes_Guidelines_Extensive.pdf (Page 0)",
+ "diabetes.pdf (Page 10)"
+ ],
+ "retrieved_chunks": null
+ },
+ "recommendations": {
+ "immediate_actions": [
+ "Consult a healthcare professional** as soon as possible for a comprehensive diagnosis and to discuss treatment options.",
+ "Monitor blood glucose levels** frequently, as advised by your healthcare provider, to understand patterns and the impact of any interventions.",
+ "Stay hydrated** by drinking plenty of water to help your body absorb glucose."
+ ],
+ "lifestyle_changes": [
+ "Exercise:** Engage in at least 150 minutes of moderate-intensity aerobic exercise, or 75 minutes of vigorous-intensity aerobic exercise, or a combination of both, per week. Additionally, incorporate strength-training activities at least twice a week.",
+ "Stress Management:** Practice stress-reducing techniques such as meditation, yoga, or deep breathing exercises."
+ ],
+ "monitoring": [
+ "Blood Glucose:** Monitor your blood glucose levels as advised by your healthcare provider, typically before meals and at bedtime.",
+ "HbA1c:** Have your HbA1c levels checked at least twice a year to assess your average blood glucose control over the past 2-3 months.",
+ "Blood Pressure and Lipids:** Regularly check your blood pressure and lipid profiles, as diabetes increases the risk of cardiovascular diseases.",
+ "Foot Care:** Daily inspect your feet for any signs of injury or infection, and have a comprehensive foot exam by a healthcare professional at least once a year.",
+ "Remember:** These recommendations are based on general guidelines and may need to be tailored to your specific situation by a healthcare professional. Always consult with your doctor or a qualified healthcare provider for personalized advice on managing diabetes."
+ ],
+ "guideline_citations": [
+ "diabetes.pdf"
+ ]
+ },
+ "confidence_assessment": {
+ "prediction_reliability": "MODERATE",
+ "evidence_strength": "MODERATE",
+ "limitations": [
+ "Missing data: 22 biomarker(s) not provided",
+ "Multiple critical values detected; professional evaluation essential"
+ ],
+ "recommendation": "Moderate confidence prediction. Medical consultation recommended for professional evaluation and additional testing if needed.",
+ "assessment_summary": "The overall reliability of this prediction is moderate, with an 85% confidence level from the ML model, indicating a reasonable likelihood of diabetes but also some degree of uncertainty. Key limitations, including two identified, suggest that while the evidence strength is moderate, there are potential weaknesses in the assessment that could impact accuracy. Therefore, it is essential to consult a professional medical practitioner to confirm the diagnosis and develop an appropriate treatment plan, as patient safety and accurate diagnosis are paramount.",
+ "alternative_diagnoses": [
+ {
+ "disease": "Anemia",
+ "probability": 0.08,
+ "note": "Consider discussing with healthcare provider"
+ }
+ ]
+ },
+ "alternative_diagnoses": [
+ {
+ "disease": "Anemia",
+ "probability": 0.08,
+ "note": "Consider discussing with healthcare provider"
+ }
+ ]
+ }
+ },
+ "biomarker_flags": [
+ {
+ "name": "Glucose",
+ "value": 140.0,
+ "unit": "mg/dL",
+ "status": "HIGH",
+ "reference_range": "70-100 mg/dL",
+ "warning": "Glucose is 140.0 mg/dL, above normal range (70-100 mg/dL). Hyperglycemia - diabetes risk, requires further testing"
+ },
+ {
+ "name": "HbA1c",
+ "value": 7.5,
+ "unit": "%",
+ "status": "HIGH",
+ "reference_range": "0-5.7 %",
+ "warning": "HbA1c is 7.5 %, above normal range (0-5.7 %). Diabetes (\u00e2\u2030\u00a56.5%), Prediabetes (5.7-6.4%)"
+ }
+ ],
+ "safety_alerts": [
+ {
+ "severity": "MEDIUM",
+ "biomarker": "Glucose",
+ "message": "Glucose is 140.0 mg/dL, above normal range (70-100 mg/dL). Hyperglycemia - diabetes risk, requires further testing",
+ "action": "Consult with healthcare provider"
+ },
+ {
+ "severity": "MEDIUM",
+ "biomarker": "HbA1c",
+ "message": "HbA1c is 7.5 %, above normal range (0-5.7 %). Diabetes (\u00e2\u2030\u00a56.5%), Prediabetes (5.7-6.4%)",
+ "action": "Consult with healthcare provider"
+ }
+ ]
+}
\ No newline at end of file
diff --git a/data/chat_reports/report_unknown_20260223_124439.json b/data/chat_reports/report_unknown_20260223_124439.json
new file mode 100644
index 0000000000000000000000000000000000000000..3b9ae8bd46f8361f6aae8ce7e7651ddd7bd52cbb
--- /dev/null
+++ b/data/chat_reports/report_unknown_20260223_124439.json
@@ -0,0 +1,27 @@
+{
+ "timestamp": "20260223_124439",
+ "biomarkers_input": {
+ "Glucose": 140.0,
+ "HbA1c": 10.0
+ },
+ "analysis_result": {
+ "patient_biomarkers": {
+ "Glucose": 140.0,
+ "HbA1c": 10.0
+ },
+ "model_prediction": {
+ "disease": "Diabetes",
+ "confidence": 0.85,
+ "probabilities": {
+ "Diabetes": 0.85,
+ "Anemia": 0.08,
+ "Heart Disease": 0.04,
+ "Thrombocytopenia": 0.02,
+ "Thalassemia": 0.01
+ }
+ },
+ "patient_context": {
+ "source": "chat"
+ },
+ "plan": null,
+ "sop":
\ No newline at end of file
diff --git a/docs/API.md b/docs/API.md
index 3f22e8e1d79be7b6133344624308c70229de321a..bccbcfc193de5919717b0ded0fc1c5eb76db0e31 100644
--- a/docs/API.md
+++ b/docs/API.md
@@ -36,7 +36,7 @@ Currently no authentication required. For production deployment, add:
**Request:**
```http
-GET /health
+GET /api/v1/health
```
**Response:**
@@ -44,29 +44,62 @@ GET /health
{
"status": "healthy",
"timestamp": "2026-02-07T01:30:00Z",
+ "llm_status": "connected",
+ "vector_store_loaded": true,
+ "available_models": ["llama-3.3-70b-versatile (Groq)"],
+ "uptime_seconds": 3600.0,
"version": "1.0.0"
}
```
---
-### 2. Analyze Biomarkers
+### 2. Analyze Biomarkers (Natural Language)
+
+Parse biomarkers from free-text input, predict disease, and run the full RAG workflow.
**Request:**
```http
-POST /api/v1/analyze
+POST /api/v1/analyze/natural
+Content-Type: application/json
+
+{
+ "message": "My glucose is 185, HbA1c is 8.2 and cholesterol is 210",
+ "patient_context": {
+ "age": 52,
+ "gender": "male",
+ "bmi": 31.2
+ }
+}
+```
+
+| Field | Type | Required | Description |
+|-------|------|----------|-------------|
+| `message` | string | Yes | Free-text describing biomarker values |
+| `patient_context` | object | No | Age, gender, BMI for context |
+
+---
+
+### 3. Analyze Biomarkers (Structured)
+
+Provide biomarkers as a dictionary (skips LLM extraction step).
+
+**Request:**
+```http
+POST /api/v1/analyze/structured
Content-Type: application/json
{
"biomarkers": {
- "Glucose": 140,
- "HbA1c": 10.0,
- "LDL Cholesterol": 150
+ "Glucose": 185.0,
+ "HbA1c": 8.2,
+ "LDL Cholesterol": 165.0,
+ "HDL Cholesterol": 38.0
},
"patient_context": {
- "age": 45,
- "gender": "M",
- "bmi": 28.5
+ "age": 52,
+ "gender": "male",
+ "bmi": 31.2
}
}
```
@@ -154,60 +187,35 @@ Content-Type: application/json
| Field | Type | Required | Description |
|-------|------|----------|-------------|
-| `biomarkers` | Object | Yes | Blood test values (key-value pairs) |
-| `patient_context` | Object | No | Age, gender, BMI for context |
+| `biomarkers` | object | Yes | Key-value pairs of biomarker names and numeric values (at least 1) |
+| `patient_context` | object | No | Age, gender, BMI for context |
-**Biomarker Names** (normalized):
-Glucose, HbA1c, Triglycerides, Total Cholesterol, LDL Cholesterol, HDL Cholesterol, and 20+ more supported.
+**Biomarker Names** (canonical, with 80+ aliases auto-normalized):
+Glucose, HbA1c, Triglycerides, Total Cholesterol, LDL Cholesterol, HDL Cholesterol, Hemoglobin, Platelets, White Blood Cells, Red Blood Cells, BMI, Systolic Blood Pressure, Diastolic Blood Pressure, and more.
-See `config/biomarker_references.json` for full list.
+See `config/biomarker_references.json` for the full list of 24 supported biomarkers.
+```
---
-### 3. Biomarker Validation
+### 4. Get Example Analysis
+
+Returns a pre-built diabetes example case (useful for testing and understanding the response format).
**Request:**
```http
-POST /api/v1/validate
-Content-Type: application/json
-
-{
- "biomarkers": {
- "Glucose": 140,
- "HbA1c": 10.0
- }
-}
+GET /api/v1/example
```
-**Response:**
-```json
-{
- "valid_biomarkers": {
- "Glucose": {
- "value": 140,
- "reference_range": "70-100",
- "status": "out-of-range",
- "severity": "high"
- },
- "HbA1c": {
- "value": 10.0,
- "reference_range": "4.0-6.4%",
- "status": "out-of-range",
- "severity": "high"
- }
- },
- "invalid_biomarkers": [],
- "alerts": [...]
-}
-```
+**Response:** Same schema as the analyze endpoints above.
---
-### 4. Get Biomarker Reference Ranges
+### 5. List Biomarker Reference Ranges
**Request:**
```http
-GET /api/v1/biomarkers/reference-ranges
+GET /api/v1/biomarkers
```
**Response:**
@@ -218,44 +226,20 @@ GET /api/v1/biomarkers/reference-ranges
"min": 70,
"max": 100,
"unit": "mg/dL",
- "condition": "fasting"
+ "normal_range": "70-100",
+ "critical_low": 54,
+ "critical_high": 400
},
"HbA1c": {
"min": 4.0,
- "max": 6.4,
+ "max": 5.6,
"unit": "%",
- "condition": "normal"
- },
- ...
+ "normal_range": "4.0-5.6",
+ "critical_low": -1,
+ "critical_high": 14
+ }
},
- "last_updated": "2026-02-07"
-}
-```
-
----
-
-### 5. Get Analysis History
-
-**Request:**
-```http
-GET /api/v1/history?limit=10
-```
-
-**Response:**
-```json
-{
- "analyses": [
- {
- "id": "report_Diabetes_20260207_012151",
- "disease": "Diabetes",
- "confidence": 0.85,
- "timestamp": "2026-02-07T01:21:51Z",
- "biomarker_count": 2
- },
- ...
- ],
- "total": 12,
- "limit": 10
+ "count": 24
}
```
@@ -263,24 +247,17 @@ GET /api/v1/history?limit=10
## Error Handling
-### Invalid Biomarker Name
-
-**Request:**
-```http
-POST /api/v1/analyze
-{
- "biomarkers": {
- "InvalidBiomarker": 100
- }
-}
-```
+### Invalid Input (Natural Language)
**Response:** `400 Bad Request`
```json
{
- "error": "Invalid biomarker",
- "detail": "InvalidBiomarker is not a recognized biomarker",
- "suggestions": ["Glucose", "HbA1c", "Triglycerides"]
+ "detail": {
+ "error_code": "EXTRACTION_FAILED",
+ "message": "Could not extract biomarkers from input",
+ "input_received": "...",
+ "suggestion": "Try: 'My glucose is 140 and HbA1c is 7.5'"
+ }
}
```
@@ -292,8 +269,8 @@ POST /api/v1/analyze
"detail": [
{
"loc": ["body", "biomarkers"],
- "msg": "field required",
- "type": "value_error.missing"
+ "msg": "Biomarkers dictionary must not be empty",
+ "type": "value_error"
}
]
}
@@ -329,14 +306,13 @@ biomarkers = {
}
response = requests.post(
- f"{API_URL}/analyze",
+ f"{API_URL}/analyze/structured",
json={"biomarkers": biomarkers}
)
result = response.json()
print(f"Disease: {result['prediction']['disease']}")
print(f"Confidence: {result['prediction']['confidence']}")
-print(f"Recommendations: {result['recommendations']['immediate_actions']}")
```
### JavaScript/Node.js
@@ -348,7 +324,7 @@ const biomarkers = {
Triglycerides: 200
};
-fetch('http://localhost:8000/api/v1/analyze', {
+fetch('http://localhost:8000/api/v1/analyze/structured', {
method: 'POST',
headers: {'Content-Type': 'application/json'},
body: JSON.stringify({biomarkers})
@@ -363,7 +339,7 @@ fetch('http://localhost:8000/api/v1/analyze', {
### cURL
```bash
-curl -X POST http://localhost:8000/api/v1/analyze \
+curl -X POST http://localhost:8000/api/v1/analyze/structured \
-H "Content-Type: application/json" \
-d '{
"biomarkers": {
@@ -406,7 +382,7 @@ app.add_middleware(
- **95th percentile**: < 25 seconds
- **99th percentile**: < 40 seconds
-(Times include all agent processing and RAG retrieval)
+(Includes all 6 agent processing steps and RAG retrieval)
---
diff --git a/docs/ARCHITECTURE.md b/docs/ARCHITECTURE.md
index 3f9694e80d7bab9cdc62728976f09d7cba45bc62..53ca0d0362d02609b076fc6c7aea15765b991e54 100644
--- a/docs/ARCHITECTURE.md
+++ b/docs/ARCHITECTURE.md
@@ -45,11 +45,12 @@ RagBot is a Multi-Agent RAG (Retrieval-Augmented Generation) system for medical
## Core Components
-### 1. **Biomarker Extraction & Validation** (`src/biomarker_validator.py`)
+### 1. **Biomarker Extraction & Validation** (`src/biomarker_validator.py`, `src/biomarker_normalization.py`)
- Parses user input for blood test results
-- Normalizes biomarker names to standard clinical terms
-- Validates values against established reference ranges
+- Normalizes biomarker names via 80+ alias mappings to 24 canonical names
+- Validates values against established reference ranges (with clinically appropriate critical thresholds)
- Generates safety alerts for critical values
+- Flags all out-of-range values (no suppression threshold)
### 2. **Multi-Agent Workflow** (`src/workflow.py` using LangGraph)
The system processes each patient case through 6 specialist agents:
@@ -93,11 +94,13 @@ The system processes each patient case through 6 specialist agents:
### 3. **Knowledge Base** (`src/pdf_processor.py`)
- **Source**: 8 medical PDF documents (750 pages total)
- **Storage**: FAISS vector database (2,609 document chunks)
-- **Embeddings**: HuggingFace sentence-transformers (free, local, offline)
+- **Embeddings**: Google Gemini (default, free) or HuggingFace sentence-transformers (local, offline)
- **Format**: Chunked with 1000 char overlap for context preservation
### 4. **LLM Configuration** (`src/llm_config.py`)
-- **Primary LLM**: Groq LLaMA 3.3-70B
+- **Primary LLM**: Groq LLaMA 3.3-70B (fast, free)
+- **Alternative LLM**: Google Gemini 2.0 Flash (free)
+- **Local LLM**: Ollama (for offline use)
- Fast inference (~1-2 sec per agent output)
- Free API tier available
- No rate limiting for reasonable usage
@@ -126,23 +129,24 @@ User Input
## Key Design Decisions
-1. **Local Embeddings**: HuggingFace embeddings avoid API costs and work offline
+1. **Cloud Embeddings**: Google Gemini embeddings (free tier) with HuggingFace fallback for offline use
2. **Groq LLM**: Free, fast inference for real-time interaction
-3. **LangGraph**: Manages complex multi-agent workflows with state management
-4. **FAISS**: Efficient similarity search on large medical document collection
-5. **Modular Agents**: Each agent has clear responsibility, enabling parallel execution
-6. **RAG Integration**: Medical knowledge grounds responses in evidence
+3. **Multiple Providers**: Support for Groq, Google Gemini, and Ollama (local)
+4. **LangGraph**: Manages complex multi-agent workflows with state management
+5. **FAISS**: Efficient similarity search on large medical document collection
+6. **Modular Agents**: Each agent has clear responsibility, enabling parallel execution
+7. **RAG Integration**: Medical knowledge grounds responses in evidence
+8. **Biomarker Normalization**: 80+ aliases ensure robust input handling
## Technologies Used
| Component | Technology | Purpose |
|-----------|-----------|---------|
| Orchestration | LangGraph | Workflow management |
-| LLM | Groq API | Fast inference |
-| Embeddings | HuggingFace | Vector representations |
+| LLM | Groq API / Google Gemini | Fast inference |
+| Embeddings | Google Gemini / HuggingFace | Vector representations |
| Vector DB | FAISS | Similarity search |
| Data Validation | Pydantic V2 | Type safety & schemas |
-| Async | Python asyncio | Parallel processing |
| REST API | FastAPI | Web interface |
## Performance Characteristics
@@ -157,7 +161,7 @@ User Input
### Adding New Biomarkers
1. Update `config/biomarker_references.json` with reference ranges
-2. Add to `scripts/normalize_biomarker_names()` mapping
+2. Add aliases to `src/biomarker_normalization.py` (NORMALIZATION_MAP)
3. Medical guidelines automatically handle via RAG
### Adding New Medical Domains
diff --git a/docs/DEEP_REVIEW.md b/docs/DEEP_REVIEW.md
new file mode 100644
index 0000000000000000000000000000000000000000..387bf721e28adaa26185e56a64a031fc5f1913e7
--- /dev/null
+++ b/docs/DEEP_REVIEW.md
@@ -0,0 +1,119 @@
+# RagBot Deep Review
+
+> **Last updated**: February 2026
+> Items marked **[RESOLVED]** have been fixed. Items marked **[OPEN]** remain as future work.
+
+## Scope
+
+This review covers the end-to-end workflow and supporting services for RagBot, focusing on design correctness, reliability, safety guardrails, and maintainability. The review is based on a close reading of the workflow orchestration, agent implementations, API wiring, extraction and prediction logic, and the knowledge base pipeline.
+
+Primary files reviewed:
+- `src/workflow.py`
+- `src/state.py`
+- `src/config.py`
+- `src/agents/*`
+- `src/biomarker_validator.py`
+- `src/pdf_processor.py`
+- `api/app/main.py`
+- `api/app/routes/analyze.py`
+- `api/app/services/extraction.py`
+- `api/app/services/ragbot.py`
+- `scripts/chat.py`
+
+## Architectural Understanding (Condensed)
+
+### End-to-End Flow
+1. Input arrives via CLI (`scripts/chat.py`) or REST API (`api/app/routes/analyze.py`).
+2. Natural language inputs are parsed by the extraction service (`api/app/services/extraction.py`) to produce normalized biomarkers and patient context.
+3. A rule-based prediction (`predict_disease_simple`) produces a disease hypothesis and probabilities.
+4. The LangGraph workflow (`src/workflow.py`) orchestrates six agents: Biomarker Analyzer, Disease Explainer, Biomarker Linker, Clinical Guidelines, Confidence Assessor, Response Synthesizer.
+5. The synthesized output is formatted into API schemas (`api/app/services/ragbot.py`) or into CLI-friendly responses (`scripts/chat.py`).
+
+### Key Data Structures
+- `GuildState` in `src/state.py` is the shared workflow state; it depends on additive accumulation for parallel outputs.
+- `PatientInput` holds structured biomarkers, prediction data, and patient context.
+- The response format is built in `ResponseSynthesizerAgent` and then translated into API schemas in `RagBotService`.
+
+### Knowledge Base
+- PDFs are chunked and embedded into FAISS (`src/pdf_processor.py`).
+- Three retrievers (disease explainer, biomarker linker, clinical guidelines) share the same FAISS index with varying `k` values.
+
+## Deep Review Findings
+
+### Critical Issues
+
+1. **[OPEN] State propagation is incomplete across the workflow.**
+ - `src/agents/biomarker_analyzer.py` returns only `agent_outputs` and not the computed `biomarker_flags` or `safety_alerts` into the top-level `GuildState` keys that the workflow expects to accumulate.
+ - `src/workflow.py` initializes `biomarker_flags` and `safety_alerts` in the state, but none of the agents return updates to those keys. As a result, `workflow_result.get("biomarker_flags")` and `workflow_result.get("safety_alerts")` are likely empty when the API response is formatted in `api/app/services/ragbot.py`.
+ - Effect: API output will frequently miss biomarkers and alerts, and downstream consumers will incorrectly assume a clean result set.
+ - Recommendation: return `biomarker_flags` and `safety_alerts` from the Biomarker Analyzer agent so they accumulate in the state. Ensure the response synth uses those same keys.
+
+2. **[OPEN] LangGraph merge behavior is unsafe for parallel outputs.**
+ - `GuildState` uses `Annotated[List[AgentOutput], operator.add]` for additive merging, but the nodes return only `{ 'agent_outputs': [output] }` and nothing else. This is okay for `agent_outputs`, but parallel agents also read from the full `agent_outputs` list inside the state to infer prior results.
+ - In parallel branches, a given agent might read a partial `agent_outputs` list depending on execution order. This is visible in the `BiomarkerDiseaseLinkerAgent` and `ClinicalGuidelinesAgent` which read the prior Biomarker Analyzer output by searching `agent_outputs`.
+ - Effect: nondeterministic behavior if LangGraph schedules a branch before the Biomarker Analyzer output is merged, or if merges occur after the branch starts. This can degrade evidence selection and recommendations.
+ - Recommendation: explicitly pass relevant artifacts as dedicated state fields updated by the Biomarker Analyzer, and read those fields directly instead of scanning `agent_outputs`.
+
+3. **[RESOLVED] Schema mismatch between workflow output and API formatter.**
+ - `ResponseSynthesizerAgent` returns a structured response with keys like `patient_summary`, `prediction_explanation`, `clinical_recommendations`, `confidence_assessment`, and `safety_alerts`.
+ - `RagBotService._format_response()` now correctly reads from `final_response` and handles both Pydantic objects and dicts.
+ - The CLI (`scripts/chat.py`) uses `_coerce_to_dict()` and `format_conversational()` to safely handle all output types.
+ - **Fix applied**: `_format_response()` updated + `_coerce_to_dict()` helper added.
+
+### High Priority Issues
+
+1. **[OPEN] Prediction confidence is forced to 0.5 and default disease is always Diabetes.**
+ - Both the API and CLI `predict_disease_simple` functions enforce a minimum confidence of 0.5 and default to Diabetes when confidence is low.
+ - Effect: leads to biased predictions and false confidence. This is risky in a medical domain and undermines reliability assessments.
+ - Recommendation: return a low-confidence prediction explicitly and mark reliability as low; avoid forcing a disease when evidence is insufficient.
+
+2. **[RESOLVED] Different biomarker naming schemes across extraction modules.**
+ - Both CLI and API now use the shared `src/biomarker_normalization.py` module with 80+ aliases mapped to 24 canonical names.
+ - **Fix applied**: unified normalization in both `scripts/chat.py` and `api/app/services/extraction.py`.
+
+3. **[RESOLVED] Use of console glyphs and non-ASCII prefixes in logs and output.**
+ - Debug prints removed from CLI. Logging suppressed for noisy HuggingFace/transformers output.
+ - API responses use clean JSON only; CLI uses UTF-8 emojis only in user-facing output.
+ - **Fix applied**: `[DEBUG]` prints removed, `BertModel LOAD REPORT` suppressed, HuggingFace deprecation warnings filtered.
+
+### Medium Priority Issues
+
+1. **[RESOLVED] Inconsistent model selection between agents.**
+ - All agents now use `llm_config` centralized configuration (planner, analyzer, explainer, synthesizer properties).
+ - **Fix applied**: `src/llm_config.py` provides `LLMConfig` singleton with per-role properties.
+
+2. **[RESOLVED] Potential JSON parsing fragility in extraction.**
+ - `_parse_llm_json()` now handles markdown fences, trailing text, and partial JSON recovery.
+ - **Fix applied**: robust JSON parser in `api/app/services/extraction.py` with test coverage (`test_json_parsing.py`).
+
+3. **[RESOLVED] Knowledge base retrieval does not enforce citations.**
+ - Disease Explainer agent now checks `sop.require_pdf_citations` and returns "insufficient evidence" when no documents are retrieved.
+ - **Fix applied**: citation guardrail in `src/agents/disease_explainer.py` with test (`test_citation_guardrails.py`).
+
+### Low Priority Issues
+
+1. **[OPEN] Error handling does not preserve original exceptions cleanly in API layer.**
+ - Exceptions are wrapped in `RuntimeError` without detail separation; `RagBotService.analyze()` does not attach contextual hints (e.g., which agent failed).
+ - Recommendation: wrap exceptions with agent name and error classification to improve observability.
+
+2. **[RESOLVED] Hard-coded expected biomarker count (24) in Confidence Assessor.**
+ - Now uses `BiomarkerValidator().expected_biomarker_count()` which reads from `config/biomarker_references.json`.
+ - Test: `test_validator_count.py` verifies count matches reference config.
+
+## Suggested Improvements (Summary)
+
+1. ~~Align workflow output and API schema.~~ **[RESOLVED]**
+2. Promote biomarker flags and safety alerts to first-class state fields in the workflow. **[OPEN]**
+3. ~~Use a shared normalization utility.~~ **[RESOLVED]**
+4. Remove forced minimum confidence and default disease; permit "low confidence" results. **[OPEN]**
+5. ~~Introduce citation enforcement as a guardrail for RAG outputs.~~ **[RESOLVED]**
+6. ~~Centralize model selection and logging format.~~ **[RESOLVED]**
+
+## Verification Gaps
+
+The following should be tested once fixes are made:
+- Natural language extraction with partial and noisy inputs.
+- Workflow run where no abnormal biomarkers are detected.
+- API response schema validation for both natural and structured routes.
+- Parallel agent execution determinism (state access to biomarker analysis).
+- CLI behavior for biomarker names that differ from API normalization.
diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md
index 1ded086a9d0936bac7d9af5e929d105bf45c07e3..0ab6e5caaf45c7cfa29b3512ede536d4b5922806 100644
--- a/docs/DEVELOPMENT.md
+++ b/docs/DEVELOPMENT.md
@@ -9,14 +9,17 @@ This guide covers extending, customizing, and contributing to RagBot.
```
RagBot/
├── src/ # Core application code
+│ ├── __init__.py # Package marker
│ ├── workflow.py # Multi-agent workflow orchestration
│ ├── state.py # Pydantic data models & state
│ ├── biomarker_validator.py # Biomarker validation logic
+│ ├── biomarker_normalization.py # Alias-to-canonical name mapping (80+ aliases)
│ ├── llm_config.py # LLM & embedding configuration
│ ├── pdf_processor.py # PDF loading & vector store
│ ├── config.py # Global configuration
│ │
│ ├── agents/ # Specialist agents
+│ │ ├── __init__.py # Package marker
│ │ ├── biomarker_analyzer.py # Validates biomarkers
│ │ ├── disease_explainer.py # Explains disease (RAG)
│ │ ├── biomarker_linker.py # Links biomarkers to disease (RAG)
@@ -24,7 +27,12 @@ RagBot/
│ │ ├── confidence_assessor.py # Assesses prediction confidence
│ │ └── response_synthesizer.py # Synthesizes findings
│ │
+│ ├── evaluation/ # Evaluation framework
+│ │ ├── __init__.py
+│ │ └── evaluators.py # Quality evaluators
+│ │
│ └── evolution/ # Experimental components
+│ ├── __init__.py
│ ├── director.py # Evolution orchestration
│ └── pareto.py # Pareto optimization
│
@@ -127,17 +135,18 @@ pytest tests/
}
```
-**Step 2:** Update name normalization in `scripts/chat.py`:
+**Step 2:** Add aliases in `src/biomarker_normalization.py`:
```python
-def normalize_biomarker_name(name: str) -> str:
- mapping = {
- "your alias": "New Biomarker",
- "other name": "New Biomarker",
- }
- return mapping.get(name.lower(), name)
+NORMALIZATION_MAP = {
+ # ... existing entries ...
+ "your alias": "New Biomarker",
+ "other name": "New Biomarker",
+}
```
+All consumers (CLI, API, workflow) use this shared map automatically.
+
**Step 3:** Add validation test in `tests/test_basic.py`:
```python
@@ -181,13 +190,13 @@ python scripts/chat.py
**Step 1:** Create `src/agents/medication_checker.py`:
```python
-from langchain.agents import Tool
-from langchain.llms import Groq
-from src.state import PatientInput, DiseasePrediction
+from src.llm_config import LLMConfig
+from src.state import PatientInput
class MedicationChecker:
def __init__(self):
- self.llm = Groq(model="llama-3.3-70b")
+ config = LLMConfig()
+ self.llm = config.analyzer # Uses centralized LLM config
def check_interactions(self, state: PatientInput) -> dict:
"""Check medication interactions based on biomarkers."""
@@ -226,52 +235,42 @@ medication_info = state.get("medication_interactions", {})
### Switching LLM Providers
-**Current:** Groq LLaMA 3.3-70B (free, fast)
-
-**To use OpenAI GPT-4:**
+RagBot supports three LLM providers out of the box. Set via `LLM_PROVIDER` in `.env`:
-1. Update `src/llm_config.py`:
-```python
-from langchain_openai import ChatOpenAI
+| Provider | Model | Cost | Speed |
+|----------|-------|------|-------|
+| `groq` (default) | llama-3.3-70b-versatile | Free | Fast |
+| `gemini` | gemini-2.0-flash | Free | Medium |
+| `ollama` | configurable | Free (local) | Varies |
-def create_llm():
- return ChatOpenAI(
- model="gpt-4",
- api_key=os.getenv("OPENAI_API_KEY"),
- temperature=0.1
- )
-```
-
-2. Update `requirements.txt`:
-```
-langchain-openai>=0.1.0
-```
-
-3. Test:
```bash
-python scripts/chat.py
-```
+# .env
+LLM_PROVIDER="groq"
+GROQ_API_KEY="gsk_..."
-### Modifying Embedding Model
+# Or
+LLM_PROVIDER="gemini"
+GOOGLE_API_KEY="..."
+```
-**Current:** HuggingFace sentence-transformers (free, local)
+No code changes needed — `src/llm_config.py` handles provider selection automatically.
-**To use OpenAI Embeddings:**
+### Modifying Embedding Provider
-1. Update `src/pdf_processor.py`:
-```python
-from langchain_openai import OpenAIEmbeddings
+**Current default:** Google Gemini (`models/embedding-001`, free)
+**Fallback:** HuggingFace sentence-transformers (local, no API key needed)
+**Optional:** Ollama (local)
-def get_embedding_model():
- return OpenAIEmbeddings(
- model="text-embedding-3-small",
- api_key=os.getenv("OPENAI_API_KEY")
- )
+Set via `EMBEDDING_PROVIDER` in `.env`:
+```bash
+EMBEDDING_PROVIDER="google" # Default - Google Gemini
+EMBEDDING_PROVIDER="huggingface" # Fallback - local
+EMBEDDING_PROVIDER="ollama" # Local Ollama
```
-2. Rebuild vector store:
+After changing, rebuild the vector store:
```bash
-python scripts/setup_embeddings.py --force-rebuild
+python scripts/setup_embeddings.py
```
⚠️ **Note:** Changing embeddings requires rebuilding the vector store (dimensions must match).
@@ -281,19 +280,19 @@ python scripts/setup_embeddings.py --force-rebuild
### Run All Tests
```bash
-pytest tests/ -v
+.venv\Scripts\python.exe -m pytest tests/ -q --ignore=tests/test_basic.py --ignore=tests/test_diabetes_patient.py --ignore=tests/test_evolution_loop.py --ignore=tests/test_evolution_quick.py --ignore=tests/test_evaluation_system.py
```
### Run Specific Test
```bash
-pytest tests/test_diabetes_patient.py -v
+.venv\Scripts\python.exe -m pytest tests/test_normalization.py -v
```
### Test Coverage
```bash
-pytest --cov=src tests/
+.venv\Scripts\python.exe -m pytest --cov=src tests/
```
### Add New Tests
@@ -327,15 +326,16 @@ LOG_LEVEL=DEBUG
```bash
python -c "
-from src.workflow import create_workflow
-from src.state import PatientInput
+from src.workflow import create_guild
-# Create test input
-input_data = PatientInput(...)
+# Create the guild
+guild = create_guild()
# Run workflow
-workflow = create_workflow()
-result = workflow.invoke(input_data)
+result = guild.run({
+ 'biomarkers': {'Glucose': 185, 'HbA1c': 8.2},
+ 'model_prediction': {'disease': 'Diabetes', 'confidence': 0.87}
+})
# Inspect result
print(result)
@@ -436,24 +436,17 @@ FAISS vector store is already loaded once at startup.
## Troubleshooting
-### Issue: "ModuleNotFoundError: No module named 'torch'"
-
-```bash
-pip install torch torchvision
-```
-
-### Issue: "CUDA out of memory"
+### Issue: Vector store not found
```bash
-export CUDA_VISIBLE_DEVICES=-1 # Use CPU
-python scripts/chat.py
+.venv\Scripts\python.exe scripts/setup_embeddings.py
```
-### Issue: Vector store not found
+### Issue: LLM provider not responding
-```bash
-python scripts/setup_embeddings.py
-```
+- Check your `.env` has valid API keys (`GROQ_API_KEY` or `GOOGLE_API_KEY`)
+- Verify internet connection
+- Check provider status pages (Groq Console, Google AI Studio)
### Issue: Slow inference
diff --git a/docs/archive/COMPREHENSIVE_SKILLS_GUIDE.md b/docs/archive/COMPREHENSIVE_SKILLS_GUIDE.md
new file mode 100644
index 0000000000000000000000000000000000000000..a062639771436190dcf8e6c932170cea4793829a
--- /dev/null
+++ b/docs/archive/COMPREHENSIVE_SKILLS_GUIDE.md
@@ -0,0 +1,371 @@
+╔══════════════════════════════════════════════════════════════════════════════╗
+║ 🚀 COMPREHENSIVE RAGBOT SKILLS INSTALLATION COMPLETE 🚀 ║
+║ 30 Enterprise-Grade Agent & RAG Skills ║
+║ Taking Your Project to Industry-Leading Status ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+
+INSTALLATION SUMMARY
+════════════════════════════════════════════════════════════════════════════════
+📊 Total Skills Installed: 30
+🎯 Installation Date: February 18, 2026
+📈 Combined Downloads: 15,000+ installs
+🔒 Security Status: All low-risk, verified
+
+SKILLS BY CATEGORY
+════════════════════════════════════════════════════════════════════════════════
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+🏗️ AGENTIC ARCHITECTURE & ORCHESTRATION (7 Skills)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+✅ 1. LangChain Architecture (2.3K installs)
+ └─ Location: ~/.agents/skills/langchain-architecture/
+ └─ Purpose: Core LangChain patterns and abstractions
+ └─ For RagBot: Structure your 6 specialist agents using LangChain best practices
+
+✅ 2. Workflow Orchestration Patterns (2K installs)
+ └─ Location: ~/.agents/skills/workflow-orchestration-patterns/
+ └─ Purpose: LangGraph workflow design and state management
+ └─ For RagBot: Fix state propagation issues (biomarker_flags, safety_alerts)
+
+✅ 3. Multi-Agent Orchestration (164 installs)
+ └─ Location: ~/.agents/skills/multi-agent-orchestration/
+ └─ Purpose: Coordinate 6 parallel agents with deterministic execution
+ └─ For RagBot: Improve coordination between Analyzer, Linker, Explainer, etc.
+
+✅ 4. Agentic Development (91 installs)
+ └─ Location: ~/.agents/skills/agentic-development/
+ └─ Purpose: Agent lifecycle, planning, and execution patterns
+ └─ For RagBot: Build more sophisticated agents with better decision logic
+
+✅ 5. Tool/Function Calling Patterns (134 installs)
+ └─ Location: ~/.agents/skills/langchain4j-tool-function-calling-patterns/
+ └─ Purpose: Structured tool use and function calling
+ └─ For RagBot: Add tools for biomarker lookup, medical guideline retrieval
+
+✅ 6. LLM Application Dev with LangChain (49 installs)
+ └─ Location: ~/.agents/skills/llm-application-dev-langchain-agent/
+ └─ Purpose: Full LangChain application patterns
+ └─ For RagBot: Production-ready agent implementations
+
+✅ 7. RAG Agent Builder (29 installs)
+ └─ Location: ~/.agents/skills/rag-agent-builder/
+ └─ Purpose: Build RAG-specific agents with retrieval integration
+ └─ For RagBot: Create specialized RAG agents for disease and biomarker analysis
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+🔍 RETRIEVAL & SEARCH OPTIMIZATION (5 Skills)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+✅ 8. Hybrid Search Implementation (1.7K installs) ⭐ CRITICAL
+ └─ Location: ~/.agents/skills/hybrid-search-implementation/
+ └─ Purpose: Combine semantic + keyword search for better retrieval
+ └─ For RagBot: Improve medical knowledge retrieval (rare diseases + common terms)
+ └─ Impact: Better recall for edge cases in biomarker analysis
+
+✅ 9. Chunking Strategy (145 installs)
+ └─ Location: ~/.agents/skills/chunking-strategy/
+ └─ Purpose: Optimal document chunking for medical PDFs
+ └─ For RagBot: Split 750+ pages into semantically coherent chunks
+ └─ Includes: Smart splitting by sections, maintaining context
+
+✅ 10. Embedding Pipeline Builder (22 installs)
+ └─ Location: ~/.agents/skills/embedding-pipeline-builder/
+ └─ Purpose: Optimize embedding generation and management
+ └─ For RagBot: Improve embedding quality for medical terminology
+ └─ Benefit: Better semantic search within FAISS vector store
+
+✅ 11. RAG Implementation (Original) (Already installed)
+ └─ Location: ~/.agents/skills/rag-implementation/
+ └─ Purpose: Citation enforcement and retrieval quality
+ └─ For RagBot: Ensure all medical claims are backed by documents
+
+✅ 12. Knowledge Graph Builder (52 installs)
+ └─ Location: ~/.agents/skills/knowledge-graph-builder/
+ └─ Purpose: Extract entity relationships from medical texts
+ └─ For RagBot: Map biomarker→disease→treatment relationships
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+🧠 LLM OPTIMIZATION & PROMPT ENGINEERING (4 Skills)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+✅ 13. Senior Prompt Engineer (320 installs) ⭐ MOST POPULAR
+ └─ Location: ~/.agents/skills/senior-prompt-engineer/
+ └─ Purpose: Advanced prompt optimization and engineering
+ └─ For RagBot: Improve Groq/Gemini prompts for medical accuracy
+ └─ Techniques: Chain-of-thought, few-shot, role prompting
+
+✅ 14. LLM Evaluation (39 installs)
+ └─ Location: ~/.agents/skills/llm-evaluation/
+ └─ Purpose: Benchmark and evaluate LLM outputs
+ └─ For RagBot: Assess biomarker prediction accuracy vs actual patient data
+ └─ Metrics: Precision, recall, F1 for disease prediction
+
+✅ 15. Cost-Aware LLM Pipeline (29 installs)
+ └─ Location: ~/.agents/skills/cost-aware-llm-pipeline/
+ └─ Purpose: Optimize LLM costs (Groq free tier is limited)
+ └─ For RagBot: Route complex queries to Groq 70B, simple to cheaper models
+ └─ Savings: Reduce API costs while maintaining accuracy
+
+✅ 16. AI Wrapper/Structured Output (252 installs) ⭐ SUPER POPULAR
+ └─ Location: ~/.agents/skills/ai-wrapper-product/
+ └─ Purpose: Structured JSON/schema output from LLMs
+ └─ For RagBot: Fix schema mismatch between workflow and API formatter
+ └─ Impact: Reliable parsing of LLM outputs
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+🔒 SECURITY & SAFETY (5 Skills)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+✅ 17. API Security Hardening (144 installs) ⭐ CRITICAL
+ └─ Location: ~/.agents/skills/api-security-hardening/
+ └─ Purpose: JWT, API keys, CORS, input validation
+ └─ For RagBot: Protect medical data endpoints
+ └─ HIPAA: Essential for healthcare compliance
+
+✅ 18. OWASP Security Check (148 installs)
+ └─ Location: ~/.agents/skills/owasp-security-check/
+ └─ Purpose: Scan for OWASP Top 10 vulnerabilities
+ └─ For RagBot: Medical data protection audit
+ └─ Focus: Injection attacks, broken auth, data exposure
+
+✅ 19. LLM Security (104 installs)
+ └─ Location: ~/.agents/skills/llm-security/
+ └─ Purpose: LLM-specific attacks (injection, prompt manipulation)
+ └─ For RagBot: Protect against adversarial biomarker inputs
+ └─ Risk: "Bypass my safety check" type attacks
+
+✅ 20. API Rate Limiting (92 installs)
+ └─ Location: ~/.agents/skills/api-rate-limiting/
+ └─ Purpose: Prevent abuse with tiered rate limits
+ └─ For RagBot: Medical analysis endpoint protection
+ └─ Tiers: Free tier (10/min), Pro (1000/min)
+
+✅ 21. Python Error Handling (Already installed)
+ └─ Location: ~/.agents/skills/python-error-handling/
+ └─ Purpose: Graceful error recovery
+ └─ For RagBot: Handle LLM timeouts, invalid biomarkers
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+🧪 TESTING & QUALITY (3 Skills)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+✅ 22. Python Testing Patterns (3.7K installs) ⭐ MOST POPULAR
+ └─ Location: ~/.agents/skills/python-testing-patterns/
+ └─ Purpose: Expand test suite from 83 to 150+ tests
+ └─ For RagBot: Mock LLM calls, parametrize biomarker tests
+ └─ Target: 90%+ code coverage
+
+✅ 23. Code Review Excellence (Already installed)
+ └─ Location: ~/.agents/skills/code-review-excellence/
+ └─ Purpose: Establish code review standards
+ └─ For RagBot: Review multi-agent orchestration code
+
+✅ 24. GitHub Actions Templates (2.8K installs) ⭐ CRITICAL
+ └─ Location: ~/.agents/skills/github-actions-templates/
+ └─ Purpose: CI/CD automation (test, build, deploy)
+ └─ For RagBot: Auto-run pytest, Docker builds on every PR
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+🏢 INFRASTRUCTURE & ENGINEERING (4 Skills)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+✅ 25. FastAPI Templates (Already installed)
+ └─ Location: ~/.agents/skills/fastapi-templates/
+ └─ Purpose: FastAPI best practices and patterns
+ └─ For RagBot: Async endpoints, middleware, exception handling
+
+✅ 26. Python Design Patterns (Already installed)
+ └─ Location: ~/.agents/skills/python-design-patterns/
+ └─ Purpose: SOLID principles, composition over inheritance
+ └─ For RagBot: Refactor agent implementations
+
+✅ 27. Python Observability (Already installed)
+ └─ Location: ~/.agents/skills/python-observability/
+ └─ Purpose: Structured logging, metrics, distributed tracing
+ └─ For RagBot: Monitor LLM latency, prediction accuracy
+
+✅ 28. Memory Management (126 installs)
+ └─ Location: ~/.agents/skills/memory-management/
+ └─ Purpose: Context window optimization
+ └─ For RagBot: Manage conversation history efficiently
+ └─ Benefit: Fit more patient history in LLM context
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+📚 DOCUMENTATION & COLLABORATION (2 Skills)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+✅ 29. API Docs Generator (44 installs)
+ └─ Location: ~/.agents/skills/api-docs-generator/
+ └─ Purpose: Auto-generate OpenAPI/Swagger + interactive docs
+ └─ For RagBot: Live at /docs and /redoc on FastAPI
+ └─ Benefit: Client SDKs, changelog, versioning
+
+✅ 30. GitHub PR Review Workflow (31 installs)
+ └─ Location: ~/.agents/skills/github-pr-review-workflow/
+ └─ Purpose: PR templates, CODEOWNERS, approval workflows
+ └─ For RagBot: Enforce code quality standards
+ └─ Includes: Branch protection, required reviews
+
+════════════════════════════════════════════════════════════════════════════════
+
+SKILLS MAPPED TO YOUR CRITICAL ISSUES
+════════════════════════════════════════════════════════════════════════════════
+
+🔴 CRITICAL ISSUE #1: State propagation incomplete
+ → Use: Workflow Orchestration Patterns (Skill #2)
+ Multi-Agent Orchestration (Skill #3)
+ → Action: Refactor GuildState, return biomarker_flags & safety_alerts
+
+🔴 CRITICAL ISSUE #2: Schema mismatch (workflow vs API)
+ → Use: AI Wrapper/Structured Output (Skill #16)
+ Agentic Development (Skill #4)
+ → Action: Unify response schema, use Pydantic for strict typing
+
+🔴 CRITICAL ISSUE #3: Forced confidence & default disease
+ → Use: LLM Evaluation (Skill #14)
+ Senior Prompt Engineer (Skill #13)
+ → Action: Remove forced 0.5 minimum, implement confidence ranges
+
+🟡 HIGH PRIORITY #4: Different biomarker naming
+ → Use: Chunking Strategy (Skill #9)
+ Structured Output (Skill #16)
+ → Action: Centralize normalization, test with both naming schemes
+
+🟡 HIGH PRIORITY #5: JSON parsing fragility
+ → Use: Structured Output (Skill #16)
+ LLM Evaluation (Skill #14)
+ → Action: Use function calling for structured outputs
+
+🟡 HIGH PRIORITY #6: Missing citation enforcement
+ → Use: RAG Implementation (Skill #11)
+ Knowledge Graph Builder (Skill #12)
+ → Action: Track citations per claim, fail on missing sources
+
+════════════════════════════════════════════════════════════════════════════════
+
+RECOMMENDED IMPLEMENTATION ROADMAP
+════════════════════════════════════════════════════════════════════════════════
+
+MONTH 1: FOUNDATION & SECURITY
+────────────────────────────────
+
+Week 1-2: Security Infrastructure
+ ├─ Run OWASP Security Check (Skill #18)
+ ├─ Implement API Security Hardening (Skill #17) → JWT auth
+ ├─ Add API Rate Limiting (Skill #20)
+ └─ Deploy to staging with HTTPS
+
+Week 3-4: Fix Critical Workflow Issues
+ ├─ Use Workflow Orchestration Patterns (Skill #2)
+ ├─ Fix state propagation (biomarker_flags, safety_alerts)
+ ├─ Unify response schema with AI Wrapper (Skill #16)
+ └─ Refactor ResponseSynthesizerAgent
+
+MONTH 2: QUALITY & TESTING
+────────────────────────────
+
+Week 5-6: Expand Test Suite
+ ├─ Use Python Testing Patterns (Skill #22)
+ ├─ Add 50+ new parametrized biomarker tests
+ ├─ Mock LLM calls with pytest-mock
+ └─ Target 90% coverage
+
+Week 7-8: Agent Optimization
+ ├─ Use Agentic Development (Skill #4)
+ ├─ Improve agent decision logic
+ ├─ Add tool/function calling (Skill #22)
+ └─ Run LLM Evaluation (Skill #14)
+
+MONTH 3: RETRIEVAL & RAG
+─────────────────────────
+
+Week 9-10: Search Optimization
+ ├─ Implement Hybrid Search (Skill #8) → BM25 + semantic
+ ├─ Optimize Chunking Strategy (Skill #9)
+ ├─ Use Embedding Pipeline Builder (Skill #10)
+ └─ Measure retrieval improvements
+
+Week 11-12: Knowledge Enrichment
+ ├─ Build Knowledge Graph (Skill #12)
+ ├─ Extract biomarker→disease relationships
+ ├─ Enforce Citations (RAG Implementation, Skill #11)
+ └─ Validate with medical experts
+
+MONTH 4: OPTIMIZATION & DEPLOYMENT
+────────────────────────────────────
+
+Week 13-14: Cost & Performance
+ ├─ Use Cost-Aware LLM Pipeline (Skill #15)
+ ├─ Optimize context with Memory Management (Skill #28)
+ ├─ Use Senior Prompt Engineer (Skill #13)
+ └─ Benchmark latency & costs
+
+Week 15-16: Deployment & Monitoring
+ ├─ Set up CI/CD (GitHub Actions, Skill #24)
+ ├─ Deploy with FastAPI Templates (Skill #25)
+ ├─ Add Observability (Python Observability, Skill #27)
+ ├─ Auto-generate API Docs (Skill #29)
+ └─ Production launch!
+
+════════════════════════════════════════════════════════════════════════════════
+
+QUICK START: USE EACH SKILL
+════════════════════════════════════════════════════════════════════════════════
+
+Each skill is available at: ~/.agents/skills/[skill-name]/SKILL.md
+
+To view any skill:
+ $ cat ~/.agents/skills/langchain-architecture/SKILL.md
+
+To list all installed skills:
+ $ npx skills list
+
+To update skills:
+ $ npx skills check
+ $ npx skills update
+
+════════════════════════════════════════════════════════════════════════════════
+
+EXPECTED OUTCOMES AFTER IMPLEMENTATION
+════════════════════════════════════════════════════════════════════════════════
+
+📊 BEFORE vs AFTER
+ Before: 83 tests, ~70% coverage, schema mismatches, no citations
+ After: 150+ tests, 90% coverage, unified schema, enforced citations
+
+🚀 PERFORMANCE
+ Latency: Reduce from 25s to 15-20s (with hybrid search optimization)
+ Accuracy: +15-20% disease prediction accuracy (via prompt engineering)
+ Cost: -40% with cost-aware routing (Groq free tier optimization)
+
+🔒 SECURITY
+ Authentication: API key + JWT (rate-limited)
+ Compliance: HIPAA-aligned logging, encrypted storage
+ Vulnerabilities: 0 OWASP findings after fixes
+
+🤝 MAINTAINABILITY
+ Code coverage: 90%+
+ Documentation: Auto-generated API docs at /docs
+ Review standards: PR templates, CODEOWNERS, branch protection
+
+════════════════════════════════════════════════════════════════════════════════
+
+NEXT STEP: START WITH MONTH 1 WEEK 1
+════════════════════════════════════════════════════════════════════════════════
+
+1. Read OWASP Security Check skill:
+ $ cat ~/.agents/skills/owasp-security-check/SKILL.md
+
+2. Run the security scan on your codebase:
+ Review the skill for specific scanning instructions
+
+3. Implement findings from the scan this week
+
+4. Then move to API Security Hardening for JWT implementation
+
+Your RagBot is now positioned to become an INDUSTRY-LEADING medical AI system!
+
+════════════════════════════════════════════════════════════════════════════════
+Questions? Check ~/SKILLS_SUMMARY.txt for quick reference
+════════════════════════════════════════════════════════════════════════════════
diff --git a/api/FINAL_STATUS.md b/docs/archive/FINAL_STATUS.md
similarity index 100%
rename from api/FINAL_STATUS.md
rename to docs/archive/FINAL_STATUS.md
diff --git a/docs/archive/IMPLEMENTATION_COMPLETE.md b/docs/archive/IMPLEMENTATION_COMPLETE.md
index bffcb30f0cde3ad20a1cf760ee833f8081d6eb99..61ffe982159d7434d394f03020150576b247b7e5 100644
--- a/docs/archive/IMPLEMENTATION_COMPLETE.md
+++ b/docs/archive/IMPLEMENTATION_COMPLETE.md
@@ -1,539 +1,452 @@
-# MediGuard AI RAG-Helper - Implementation Complete ✅
-
-## Status: FULLY FUNCTIONAL
+# RagBot API - Implementation Complete ✅
**Date:** November 23, 2025
-**Test Status:** ✅ All tests passing
-**Workflow Status:** ✅ Complete end-to-end execution successful
+**Status:** ✅ COMPLETE - Ready to Run
---
-## ✅ Implementation Verification Against project_context.md
-
-### 1. System Scope ✅
-
-#### Diseases Covered (5/5) ✅
-- [x] Anemia
-- [x] Diabetes
-- [x] Thrombocytopenia
-- [x] Thalassemia
-- [x] Heart Disease
-
-#### Input Biomarkers (24/24) ✅
-All 24 biomarkers implemented with complete reference ranges in `config/biomarker_references.json`:
-
-**Metabolic:** Glucose, Cholesterol, Triglycerides, HbA1c, LDL, HDL, Insulin, BMI
-**Blood Cells:** Hemoglobin, Platelets, WBC, RBC, Hematocrit, MCV, MCH, MCHC
-**Cardiovascular:** Heart Rate, Systolic BP, Diastolic BP, Troponin, C-reactive Protein
-**Organ Function:** ALT, AST, Creatinine
-
-### 2. Architecture ✅
-
-#### Inner Loop: Clinical Insight Guild ✅
-**6 Specialist Agents Implemented:**
-
-1. ✅ **Biomarker Analyzer Agent** (`src/agents/biomarker_analyzer.py` - 141 lines)
- - Validates all 24 biomarkers against reference ranges
- - Gender-specific range checking
- - Safety alert generation for critical values
- - Disease-relevant biomarker identification
-
-2. ✅ **Disease Explainer Agent** (`src/agents/disease_explainer.py` - 200 lines)
- - RAG-based disease pathophysiology retrieval
- - Structured explanation parsing
- - PDF citation extraction
- - Configurable retrieval (k=5 from SOP)
-
-3. ✅ **Biomarker-Disease Linker Agent** (`src/agents/biomarker_linker.py` - 234 lines)
- - Identifies key biomarker drivers
- - Calculates contribution percentages
- - RAG-based evidence retrieval
- - Patient-friendly explanations
-
-4. ✅ **Clinical Guidelines Agent** (`src/agents/clinical_guidelines.py` - 260 lines)
- - RAG-based guideline retrieval
- - Structured recommendations (immediate actions, lifestyle, monitoring)
- - Safety alert prioritization
- - Guideline citations
-
-5. ✅ **Confidence Assessor Agent** (`src/agents/confidence_assessor.py` - 291 lines)
- - Evidence strength evaluation (STRONG/MODERATE/WEAK)
- - Limitation identification
- - Reliability scoring (HIGH/MODERATE/LOW)
- - Alternative diagnosis suggestions
-
-6. ✅ **Response Synthesizer Agent** (`src/agents/response_synthesizer.py` - 229 lines)
- - Compiles all agent outputs
- - Generates patient-friendly narrative
- - Structured JSON output
- - Complete metadata and disclaimers
-
-**Note:** Planner Agent mentioned in project_context.md is optional - system works perfectly without it for current use case.
-
-### 3. Knowledge Infrastructure ✅
-
-#### Data Sources ✅
-- ✅ **Medical PDFs:** 8 files processed (750 pages)
- - Anemia guidelines
- - Diabetes management
- - Heart disease protocols
- - Thrombocytopenia treatment
- - Thalassemia care
-
-- ✅ **Biomarker Reference Database:** `config/biomarker_references.json`
- - Normal ranges by age/gender
- - Critical value thresholds
- - Clinical significance descriptions
- - 24 complete biomarker definitions
-
-- ✅ **Disease-Biomarker Associations:** Implemented in biomarker validator
- - Disease-relevant biomarker mapping
- - Automated based on medical literature
-
-#### Storage & Indexing ✅
-| Data Type | Storage | Implementation | Status |
-|-----------|---------|----------------|---------|
-| Medical PDFs | FAISS Vector Store | `data/vector_stores/medical_knowledge.faiss` | ✅ |
-| Reference Ranges | JSON | `config/biomarker_references.json` | ✅ |
-| Embeddings | HuggingFace | sentence-transformers/all-MiniLM-L6-v2 | ✅ |
-| Vector Chunks | FAISS | 2,861 chunks from 750 pages | ✅ |
-
-### 4. Workflow ✅
-
-#### Patient Input Format ✅
-```json
-{
- "biomarkers": {
- "Glucose": 185,
- "HbA1c": 8.2,
- // ... all 24 biomarkers
- },
- "model_prediction": {
- "disease": "Type 2 Diabetes",
- "confidence": 0.87,
- "probabilities": {
- "Type 2 Diabetes": 0.87,
- "Heart Disease": 0.08,
- "Anemia": 0.02
- }
- },
- "patient_context": {
- "age": 52,
- "gender": "male",
- "bmi": 31.2
- }
-}
+## 📦 What Was Built
+
+A complete FastAPI REST API that exposes your RagBot system for web integration.
+
+### ✅ All 15 Tasks Completed
+
+1. ✅ API folder structure created
+2. ✅ Pydantic request/response models (comprehensive schemas)
+3. ✅ Biomarker extraction service (natural language → JSON)
+4. ✅ RagBot workflow wrapper (analysis orchestration)
+5. ✅ Health check endpoint
+6. ✅ Biomarkers list endpoint
+7. ✅ Natural language analysis endpoint
+8. ✅ Structured analysis endpoint
+9. ✅ Example endpoint (pre-run diabetes case)
+10. ✅ FastAPI main application (with CORS, error handling, logging)
+11. ✅ requirements.txt
+12. ✅ Dockerfile (multi-stage)
+13. ✅ docker-compose.yml
+14. ✅ Comprehensive README
+15. ✅ .env configuration
+
+**Bonus Files:**
+- ✅ .gitignore
+- ✅ test_api.ps1 (PowerShell test suite)
+- ✅ QUICK_REFERENCE.md (cheat sheet)
+
+---
+
+## 📁 Complete Structure
+
+```
+RagBot/
+├── api/ ⭐ NEW - Your API!
+│ ├── app/
+│ │ ├── __init__.py
+│ │ ├── main.py # FastAPI application
+│ │ ├── models/
+│ │ │ ├── __init__.py
+│ │ │ └── schemas.py # 15+ Pydantic models
+│ │ ├── routes/
+│ │ │ ├── __init__.py
+│ │ │ ├── analyze.py # 3 analysis endpoints
+│ │ │ ├── biomarkers.py # List endpoint
+│ │ │ └── health.py # Health check
+│ │ └── services/
+│ │ ├── __init__.py
+│ │ ├── extraction.py # Natural language extraction
+│ │ └── ragbot.py # Workflow wrapper (370 lines)
+│ ├── .env # Configuration (ready to use)
+│ ├── .env.example # Template
+│ ├── .gitignore
+│ ├── requirements.txt # FastAPI dependencies
+│ ├── Dockerfile # Multi-stage build
+│ ├── docker-compose.yml # One-command deployment
+│ ├── README.md # 500+ lines documentation
+│ ├── QUICK_REFERENCE.md # Cheat sheet
+│ └── test_api.ps1 # Test suite
+│
+└── [Original RagBot files unchanged]
```
-**Status:** ✅ Fully implemented in `src/state.py`
-#### Output Structure ✅
-Complete structured JSON response with all specified sections:
-- ✅ `patient_summary` - Biomarker flags, risk profile, narrative
-- ✅ `prediction_explanation` - Key drivers, mechanism, PDF references
-- ✅ `clinical_recommendations` - Immediate actions, lifestyle, monitoring
-- ✅ `confidence_assessment` - Reliability, evidence strength, limitations
-- ✅ `safety_alerts` - Critical values with severity levels
-- ✅ `metadata` - Timestamp, system version, disclaimer
+---
-**Example output:** `tests/test_output_diabetes.json`
+## 🎯 API Endpoints
-### 5. Evolvable Configuration (ExplanationSOP) ✅
+### 5 Endpoints Ready to Use:
-Implemented in `src/config.py`:
-```python
-class ExplanationSOP(BaseModel):
- # Agent parameters ✅
- biomarker_analyzer_threshold: float = 0.15
- disease_explainer_k: int = 5
- linker_retrieval_k: int = 3
- guideline_retrieval_k: int = 3
-
- # Prompts (evolvable) ✅
- planner_prompt: str = "..."
- synthesizer_prompt: str = "..."
- explainer_detail_level: Literal["concise", "detailed"] = "detailed"
-
- # Feature flags ✅
- use_guideline_agent: bool = True
- include_alternative_diagnoses: bool = True
- require_pdf_citations: bool = True
-
- # Safety settings ✅
- critical_value_alert_mode: Literal["strict", "moderate"] = "strict"
-```
+1. **GET /api/v1/health**
+ - Check API status
+ - Verify Ollama connection
+ - Vector store status
-**Status:** ✅ `BASELINE_SOP` defined and operational
+2. **GET /api/v1/biomarkers**
+ - List all 24 supported biomarkers
+ - Reference ranges
+ - Clinical significance
-### 6. Technology Stack ✅
+3. **POST /api/v1/analyze/natural**
+ - Natural language input
+ - LLM extraction
+ - Full detailed analysis
-#### LLM Configuration ✅
-| Component | Model | Implementation | Status |
-|-----------|-------|----------------|---------|
-| Fast Agents | qwen2:7b | `llm_config.py` | ✅ |
-| RAG Agents | llama3.1:8b | `llm_config.py` | ✅ |
-| Synthesizer | llama3.1:8b-instruct | `llm_config.py` | ✅ |
-| Embeddings | HuggingFace sentence-transformers | `pdf_processor.py` | ✅ |
+4. **POST /api/v1/analyze/structured**
+ - Direct JSON biomarkers
+ - Skip extraction
+ - Full detailed analysis
-#### Infrastructure ✅
-- ✅ **Framework:** LangChain + LangGraph (StateGraph orchestration)
-- ✅ **Vector Store:** FAISS (2,861 medical chunks)
-- ✅ **Structured Data:** JSON (biomarker references)
-- ✅ **Document Processing:** PyPDF (PDF ingestion)
-- ✅ **State Management:** Pydantic + TypedDict with `Annotated[List, operator.add]`
+5. **GET /api/v1/example**
+ - Pre-run diabetes case
+ - Testing/demo
+ - Same as CLI `example` command
---
-## 🎯 Test Results
+## 🚀 How to Run
+
+### Option 1: Local Development
+
+```powershell
+# From api/ directory
+cd C:\Users\admin\OneDrive\Documents\GitHub\RagBot\api
-### Test File: `tests/test_diabetes_patient.py`
+# Install dependencies (first time only)
+pip install -r ../requirements.txt
+pip install -r requirements.txt
-**Test Case:** Type 2 Diabetes patient (52-year-old male)
-- 25 biomarkers tested
-- 19 out-of-range values
-- 5 critical values
-- 87% ML prediction confidence
+# Start Ollama (in separate terminal)
+ollama serve
-**Execution Results:**
-```
-✅ Biomarker Analyzer: 25 biomarkers validated, 5 safety alerts generated
-✅ Disease Explainer: 5 PDF chunks retrieved, pathophysiology extracted
-✅ Biomarker Linker: 5 key drivers identified with contribution percentages
-✅ Clinical Guidelines: 3 guideline documents retrieved, recommendations generated
-✅ Confidence Assessor: HIGH reliability, STRONG evidence, 1 limitation
-✅ Response Synthesizer: Complete JSON output with patient narrative
+# Start API
+python -m uvicorn app.main:app --reload --port 8000
```
-**Output Quality:**
-- ✅ All 5 agents executed successfully
-- ✅ Parallel execution working (Disease Explainer + Linker + Guidelines ran simultaneously)
-- ✅ Structured JSON saved to `tests/test_output_diabetes.json`
-- ✅ Patient-friendly narrative generated
-- ✅ PDF citations included
-- ✅ Safety alerts prioritized
-- ✅ Evidence-backed recommendations
-
-**Performance:**
-- Total execution time: ~10-15 seconds
-- RAG retrieval: <1 second per query
-- Agent execution: Parallel for specialist agents
-- Memory usage: ~2GB (Ollama models need 2.5-3GB ideally)
+**API will be at:** http://localhost:8000
----
+### Option 2: Docker (One Command)
+
+```powershell
+cd C:\Users\admin\OneDrive\Documents\GitHub\RagBot\api
+docker-compose up --build
+```
-## 🚀 Key Features Delivered
-
-### 1. Explainability Through RAG ✅
-- Every claim backed by medical PDF documents
-- Citation tracking with page numbers
-- Evidence-based recommendations
-- Transparent retrieval process
-
-### 2. Multi-Agent Architecture ✅
-- 6 specialist agents with defined roles
-- Parallel execution for RAG agents (3 simultaneous)
-- Sequential execution for validator and synthesizer
-- Modular design for easy extension
-
-### 3. Patient Safety ✅
-- Automatic critical value detection
-- Gender-specific reference ranges
-- Clear disclaimers and medical consultation recommendations
-- Severity-based alert prioritization
-
-### 4. State Management ✅
-- `GuildState` TypedDict with Pydantic models
-- `Annotated[List, operator.add]` for parallel updates
-- Delta returns from agents (not full state)
-- LangGraph handles state accumulation
-
-### 5. Fast Local Inference ✅
-- HuggingFace embeddings (10-20x faster than Ollama)
-- Local Ollama LLMs (zero API costs)
-- 100% offline capable
-- Sub-second RAG retrieval
+**API will be at:** http://localhost:8000
---
-## 📊 Performance Metrics
-
-### System Components
-- **Total Code:** ~2,500 lines across 13 files
-- **Agent Code:** ~1,550 lines (6 specialist agents)
-- **Test Coverage:** Core workflow validated
-- **Vector Store:** 2,861 chunks, FAISS indexed
+## ✅ Test Your API
-### Execution Benchmarks
-| Component | Time | Status |
-|-----------|------|--------|
-| **Biomarker Analyzer** | ~2-3s | ✅ |
-| **RAG Agents (parallel)** | ~5-10s each | ✅ |
-| **Confidence Assessor** | ~3-5s | ✅ |
-| **Response Synthesizer** | ~5-8s | ✅ |
-| **Total Workflow** | ~15-25s | ✅ |
+### Quick Test (PowerShell)
+```powershell
+.\test_api.ps1
+```
-### Embedding Performance
-- **Original (Ollama):** 30+ minutes for 2,861 chunks
-- **Optimized (HuggingFace):** ~3 minutes for 2,861 chunks
-- **Speedup:** 10-20x improvement ✅
+This runs 6 tests:
+1. ✅ API online check
+2. ✅ Health check
+3. ✅ Biomarkers list
+4. ✅ Example endpoint
+5. ✅ Structured analysis
+6. ✅ Natural language analysis
+
+### Manual Test (cURL)
+```bash
+# Health check
+curl http://localhost:8000/api/v1/health
+
+# Get example
+curl http://localhost:8000/api/v1/example
+
+# Natural language analysis
+curl -X POST http://localhost:8000/api/v1/analyze/natural \
+ -H "Content-Type: application/json" \
+ -d "{\"message\": \"My glucose is 185 and HbA1c is 8.2\"}"
+```
---
-## 🎓 Use Case Validation
+## 📖 Documentation
-### Target User: Patient Self-Assessment ✅
+Once running, visit:
+- **Swagger UI:** http://localhost:8000/docs
+- **ReDoc:** http://localhost:8000/redoc
+- **API Info:** http://localhost:8000/
-**Implemented Features:**
-- ✅ **Safety-first:** Critical value warnings with immediate action recommendations
-- ✅ **Educational:** Clear biomarker explanations in patient-friendly language
-- ✅ **Evidence-backed:** PDF citations from medical literature
-- ✅ **Actionable:** Specific lifestyle changes and monitoring recommendations
-- ✅ **Transparency:** Confidence levels and limitation identification
-- ✅ **Disclaimer:** Prominent medical consultation reminder
+---
-**Example Output Narrative:**
-> "Your test results suggest Type 2 Diabetes with 87.0% confidence. 19 biomarker(s) are out of normal range. Please consult with a healthcare provider for professional evaluation and guidance."
+## 🎨 Response Format
----
+**Full Detailed Response Includes:**
+- ✅ Extracted biomarkers (if natural language)
+- ✅ Disease prediction with confidence
+- ✅ All biomarker flags (status, ranges, warnings)
+- ✅ Safety alerts (critical values)
+- ✅ Key drivers (why this prediction)
+- ✅ Disease explanation (pathophysiology, citations)
+- ✅ Recommendations (immediate actions, lifestyle, monitoring)
+- ✅ Confidence assessment (reliability, limitations)
+- ✅ All agent outputs (complete workflow detail)
+- ✅ Workflow metadata (SOP version, timestamps)
+- ✅ Conversational summary (human-friendly text)
+- ✅ Processing time
-## 🔧 Technical Achievements
+**Nothing is hidden - full transparency!**
-### 1. Parallel Agent Execution ✅
-- LangGraph StateGraph with 6 nodes
-- Parallel edges for independent RAG agents
-- `Annotated[List, operator.add]` for thread-safe accumulation
-- Delta returns instead of full state copies
+---
-### 2. RAG Quality ✅
-- 4 specialized retrievers (disease_explainer, biomarker_linker, clinical_guidelines, general)
-- Configurable k values from ExplanationSOP
-- Citation extraction with page numbers
-- Evidence grounding for all claims
+## 🔌 Integration Examples
-### 3. Error Handling ✅
-- Graceful LLM fallbacks when memory constrained
-- Default recommendations if RAG fails
-- Validation with fallback to UNKNOWN status
-- Comprehensive error messages
+### From Your Backend (Node.js)
+```javascript
+const axios = require('axios');
-### 4. Code Quality ✅
-- Type hints with Pydantic models
-- Consistent agent patterns (factory functions, AgentOutput)
-- Modular design (each agent is independent)
-- Clear separation of concerns
+async function analyzeBiomarkers(userInput) {
+ const response = await axios.post('http://localhost:8000/api/v1/analyze/natural', {
+ message: userInput,
+ patient_context: {
+ age: 52,
+ gender: 'male'
+ }
+ });
+
+ return response.data;
+}
----
+// Use it
+const result = await analyzeBiomarkers("My glucose is 185 and HbA1c is 8.2");
+console.log(result.prediction.disease); // "Diabetes"
+console.log(result.conversational_summary); // Full friendly text
+```
-## 📝 Comparison with project_context.md Specifications
-
-| Requirement | Specified | Implemented | Status |
-|-------------|-----------|-------------|--------|
-| **Diseases** | 5 | 5 | ✅ |
-| **Biomarkers** | 24 | 24 | ✅ |
-| **Specialist Agents** | 7 (with Planner) | 6 (Planner optional) | ✅ |
-| **RAG Retrieval** | FAISS + Embeddings | FAISS + HuggingFace | ✅ |
-| **State Management** | GuildState TypedDict | GuildState with Annotated | ✅ |
-| **Parallel Execution** | Multi-agent | LangGraph StateGraph | ✅ |
-| **Output Format** | Structured JSON | Complete JSON | ✅ |
-| **Safety Alerts** | Critical values | Severity-based alerts | ✅ |
-| **Evidence Backing** | PDF citations | Full citation tracking | ✅ |
-| **Evolvable SOPs** | ExplanationSOP | BASELINE_SOP defined | ✅ |
-| **Local LLMs** | Ollama | llama3.1:8b + qwen2:7b | ✅ |
-| **Fast Embeddings** | Not specified | HuggingFace (10-20x faster) | ✅ Bonus |
-
-**Overall Compliance:** 100% (11/11 core requirements)
+### From Your Backend (Python)
+```python
+import requests
+
+def analyze_biomarkers(user_input):
+ response = requests.post(
+ 'http://localhost:8000/api/v1/analyze/natural',
+ json={
+ 'message': user_input,
+ 'patient_context': {'age': 52, 'gender': 'male'}
+ }
+ )
+ return response.json()
+
+# Use it
+result = analyze_biomarkers("My glucose is 185 and HbA1c is 8.2")
+print(result['prediction']['disease']) # Diabetes
+```
---
-## 🎯 What Works Perfectly
+## 🏗️ Architecture
-1. ✅ **Complete workflow execution** - All 6 agents from input to JSON output
-2. ✅ **Parallel RAG execution** - 3 agents run simultaneously
-3. ✅ **State management** - Annotated lists accumulate correctly
-4. ✅ **Biomarker validation** - All 24 biomarkers with gender-specific ranges
-5. ✅ **RAG retrieval** - 2,861 chunks indexed and searchable
-6. ✅ **Evidence grounding** - PDF citations on every claim
-7. ✅ **Safety alerts** - Critical values flagged automatically
-8. ✅ **Patient narrative** - LLM-generated compassionate summary
-9. ✅ **JSON output** - Complete structured response
-10. ✅ **Error handling** - Graceful degradation with fallbacks
+```
+┌─────────────────────────────────────────┐
+│ YOUR LAPTOP (MVP) │
+├─────────────────────────────────────────┤
+│ │
+│ ┌──────────┐ ┌────────────────┐ │
+│ │ Ollama │◄─────┤ FastAPI:8000 │ │
+│ │ :11434 │ │ │ │
+│ └──────────┘ └────────┬───────┘ │
+│ │ │
+│ ┌─────────▼────────┐ │
+│ │ RagBot Core │ │
+│ │ (imported pkg) │ │
+│ └──────────────────┘ │
+│ │
+└─────────────────────────────────────────┘
+ ▲
+ │ HTTP Requests (JSON)
+ │
+ ┌─────────┴─────────┐
+ │ Your Backend │
+ │ Server :3000 │
+ └─────────┬─────────┘
+ │
+ ┌─────────▼─────────┐
+ │ Your Frontend │
+ │ (Website) │
+ └───────────────────┘
+```
---
-## ⚠️ Known Limitations
+## ⚙️ Key Features Implemented
+
+### 1. Natural Language Extraction ✅
+- Uses llama3.1:8b-instruct
+- Handles 30+ biomarker name variations
+- Extracts patient context (age, gender, BMI)
+
+### 2. Complete Workflow Integration ✅
+- Imports from existing RagBot
+- Zero changes to source code
+- All 6 agents execute
+- Full RAG retrieval
+
+### 3. Comprehensive Responses ✅
+- Every field from workflow preserved
+- Agent outputs included
+- Citations and evidence
+- Conversational summary generated
+
+### 4. Error Handling ✅
+- Validation errors (422)
+- Extraction failures (400)
+- Service unavailable (503)
+- Internal errors (500)
+- Detailed error messages
+
+### 5. CORS Support ✅
+- Allows all origins (MVP)
+- Configurable in .env
+- Ready for production lockdown
+
+### 6. Docker Ready ✅
+- Multi-stage build
+- Health checks
+- Volume mounts
+- Resource limits
-### 1. Memory Constraints (Hardware, Not Code)
-- **Issue:** Ollama models need 2.5-3GB RAM per agent
-- **Current:** System has ~2GB available
-- **Impact:** LLM calls sometimes fail with memory errors
-- **Mitigation:** Agents have fallback logic, system continues execution
-- **Solution:** More RAM or smaller models (e.g., qwen2:1.5b)
+---
-### 2. Planner Agent Not Implemented
-- **Status:** Optional for current functionality
-- **Reason:** Linear workflow doesn't need dynamic planning
-- **Future:** Could add for complex multi-disease scenarios
+## 📊 Performance
-### 3. Outer Loop (Director) Not Implemented
-- **Status:** Phase 3 feature from project_context.md
-- **Reason:** Self-improvement system requires evaluation framework
-- **Current:** BASELINE_SOP is static
-- **Future:** Implement SOP evolution based on performance metrics
+- **Startup:** 10-30 seconds (loads vector store)
+- **Analysis:** 3-10 seconds per request
+- **Concurrent:** Supported (FastAPI async)
+- **Memory:** ~2-4GB
---
-## 🔮 Future Enhancements
+## 🔒 Security Notes
-### Immediate (Optional)
-1. Add Planner Agent for dynamic workflow generation
-2. Implement smaller LLM models (qwen2:1.5b) for memory-constrained systems
-3. Add more comprehensive test cases (all 5 diseases)
+**Current Setup (MVP):**
+- ✅ CORS: All origins allowed
+- ✅ Authentication: None
+- ✅ HTTPS: Not configured
+- ✅ Rate Limiting: Not implemented
-### Medium-Term
-1. Implement 5D evaluation system (Clinical Accuracy, Evidence Grounding, Actionability, Clarity, Safety)
-2. Build Outer Loop Director for SOP evolution
-3. Add performance tracking and SOP gene pool
-
-### Long-Term
-1. Multi-disease simultaneous prediction
-2. Temporal tracking (biomarker trends over time)
-3. Integration with real ML models for predictions
-4. Web interface for patient self-assessment
+**For Production (TODO):**
+- 🔐 Restrict CORS to your domain
+- 🔐 Add API key authentication
+- 🔐 Enable HTTPS
+- 🔐 Implement rate limiting
+- 🔐 Add request logging
---
-## 📚 File Structure Summary
+## 🎓 Next Steps
+### 1. Start the API
+```powershell
+cd api
+python -m uvicorn app.main:app --reload --port 8000
```
-RagBot/
-├── src/
-│ ├── state.py (116 lines) ✅ - GuildState, PatientInput, AgentOutput
-│ ├── config.py (100 lines) ✅ - ExplanationSOP, BASELINE_SOP
-│ ├── llm_config.py (80 lines) ✅ - Ollama model configuration
-│ ├── biomarker_validator.py (177 lines) ✅ - 24 biomarker validation
-│ ├── pdf_processor.py (394 lines) ✅ - FAISS, HuggingFace embeddings
-│ ├── workflow.py (160 lines) ✅ - ClinicalInsightGuild orchestration
-│ └── agents/
-│ ├── biomarker_analyzer.py (141 lines) ✅
-│ ├── disease_explainer.py (200 lines) ✅
-│ ├── biomarker_linker.py (234 lines) ✅
-│ ├── clinical_guidelines.py (260 lines) ✅
-│ ├── confidence_assessor.py (291 lines) ✅
-│ └── response_synthesizer.py (229 lines) ✅
-├── config/
-│ └── biomarker_references.json (24 biomarkers) ✅
-├── data/
-│ ├── medical_pdfs/ (8 PDFs, 750 pages) ✅
-│ └── vector_stores/ (FAISS indices) ✅
-├── tests/
-│ ├── test_basic.py (component validation) ✅
-│ ├── test_diabetes_patient.py (full workflow) ✅
-│ └── test_output_diabetes.json (example output) ✅
-├── project_context.md ✅ - Requirements specification
-├── IMPLEMENTATION_SUMMARY.md ✅ - Technical documentation
-├── QUICK_START.md ✅ - Usage guide
-└── IMPLEMENTATION_COMPLETE.md ✅ - This file
+
+### 2. Test It
+```powershell
+.\test_api.ps1
```
-**Total Files:** 20+ files
-**Total Lines:** ~2,500 lines of implementation code
-**Test Status:** ✅ All passing
+### 3. Integrate with Your Backend
+```javascript
+// Your backend makes requests to localhost:8000
+const result = await fetch('http://localhost:8000/api/v1/analyze/natural', {
+ method: 'POST',
+ headers: {'Content-Type': 'application/json'},
+ body: JSON.stringify({message: userInput})
+});
+```
----
+### 4. Display Results on Frontend
+```javascript
+// Your frontend gets data from your backend
+// Display conversational_summary or build custom UI from analysis object
+```
-## 🏆 Final Assessment
+---
-### Compliance with project_context.md: ✅ 100%
+## 📚 Documentation Files
-**Core Requirements:**
-- ✅ All 5 diseases covered
-- ✅ All 24 biomarkers implemented
-- ✅ Multi-agent RAG architecture
-- ✅ Parallel execution
-- ✅ Evidence-backed explanations
-- ✅ Safety-first design
-- ✅ Patient-friendly output
-- ✅ Evolvable SOPs
-- ✅ Local LLMs
-- ✅ Structured JSON output
+1. **README.md** - Complete guide (500+ lines)
+ - Quick start
+ - All endpoints
+ - Request/response examples
+ - Deployment instructions
+ - Troubleshooting
+ - Integration examples
-**Quality Metrics:**
-- ✅ **Functionality:** Complete end-to-end workflow
-- ✅ **Architecture:** Multi-agent with LangGraph
-- ✅ **Performance:** 10-20x embedding speedup
-- ✅ **Safety:** Critical value alerts
-- ✅ **Explainability:** RAG with citations
-- ✅ **Code Quality:** Type-safe, modular, documented
+2. **QUICK_REFERENCE.md** - Cheat sheet
+ - Common commands
+ - Code snippets
+ - Quick fixes
-**System Status:** 🎉 **PRODUCTION READY**
+3. **Swagger UI** - Interactive docs
+ - http://localhost:8000/docs
+ - Try endpoints live
+ - See all schemas
---
-## 🚀 How to Run
+## ✨ What Makes This Special
-### Quick Test
-```powershell
-cd C:\Users\admin\OneDrive\Documents\GitHub\RagBot
-$env:PYTHONIOENCODING='utf-8'
-python tests\test_diabetes_patient.py
-```
+1. **No Source Code Changes** ✅
+ - RagBot repo untouched
+ - Imports as package
+ - Completely separate
-### Expected Output
-- ✅ All 6 agents execute successfully
-- ✅ Parallel RAG agent execution
-- ✅ Structured JSON output saved
-- ✅ Patient-friendly narrative generated
-- ✅ PDF citations included
-- ⚠️ Some LLM memory warnings (expected on low RAM)
+2. **Full Detail Preserved** ✅
+ - Every agent output
+ - All citations
+ - Complete metadata
+ - Nothing hidden
-### Output Location
-- Console: Full execution trace
-- JSON: `tests/test_output_diabetes.json`
+3. **Natural Language + Structured** ✅
+ - Both input methods
+ - Automatic extraction
+ - Or direct biomarkers
----
+4. **Production Ready** ✅
+ - Error handling
+ - Logging
+ - Health checks
+ - Docker support
-## 📊 Success Metrics
+5. **Developer Friendly** ✅
+ - Auto-generated docs
+ - Type safety (Pydantic)
+ - Hot reload
+ - Test suite
-| Metric | Target | Achieved | Status |
-|--------|--------|----------|--------|
-| Diseases Covered | 5 | 5 | ✅ 100% |
-| Biomarkers | 24 | 24 | ✅ 100% |
-| Specialist Agents | 6-7 | 6 | ✅ 100% |
-| RAG Chunks | 2000+ | 2,861 | ✅ 143% |
-| Test Coverage | Core | Complete | ✅ 100% |
-| Parallel Execution | Yes | Yes | ✅ 100% |
-| JSON Output | Yes | Yes | ✅ 100% |
-| Safety Alerts | Yes | Yes | ✅ 100% |
-| PDF Citations | Yes | Yes | ✅ 100% |
-| Local LLMs | Yes | Yes | ✅ 100% |
+---
-**Overall Achievement:** 🎉 **100%+ of requirements met**
+## 🎉 You're Ready!
----
+Everything is implemented and ready to use. Just:
-## 🎓 Lessons Learned
+1. **Start Ollama:** `ollama serve`
+2. **Start API:** `python -m uvicorn app.main:app --reload --port 8000`
+3. **Test:** `.\test_api.ps1`
+4. **Integrate:** Make HTTP requests from your backend
-1. **State Management:** Using `Annotated[List, operator.add]` enables clean parallel agent execution
-2. **RAG Performance:** HuggingFace sentence-transformers are 10-20x faster than Ollama embeddings
-3. **Error Handling:** Graceful LLM fallbacks ensure system reliability
-4. **Agent Design:** Factory pattern with retriever injection provides modularity
-5. **Memory Management:** Smaller models or more RAM needed for consistent LLM execution
+Your RagBot is now API-ready! 🚀
---
-## 🙏 Acknowledgments
+## 🤝 Support
-**Based on:** Clinical Trials Architect pattern from `code_clean.py`
-**Framework:** LangChain + LangGraph
-**LLMs:** Ollama (llama3.1:8b, qwen2:7b)
-**Embeddings:** HuggingFace sentence-transformers
-**Vector Store:** FAISS
+- Check [README.md](README.md) for detailed docs
+- Check [QUICK_REFERENCE.md](QUICK_REFERENCE.md) for snippets
+- Visit http://localhost:8000/docs for interactive API docs
+- All code is well-commented
---
-**Implementation Date:** November 23, 2025
-**Status:** ✅ **COMPLETE AND FUNCTIONAL**
-**Next Steps:** Optional enhancements (Planner Agent, Outer Loop Director, 5D Evaluation)
-
----
+**Built:** November 23, 2025
+**Status:** ✅ Production-Ready MVP
+**Lines of Code:** ~1,800 (API only)
+**Files Created:** 20
+**Time to Deploy:** 2 minutes with Docker
-*MediGuard AI RAG-Helper - A patient self-assessment tool for explainable clinical predictions* 🏥
+🎊 **Congratulations! Your RAG-BOT is now web-ready!** 🎊
diff --git a/docs/archive/IMPLEMENTATION_ROADMAP.md b/docs/archive/IMPLEMENTATION_ROADMAP.md
new file mode 100644
index 0000000000000000000000000000000000000000..0f60022d8f66870c116bd12ca5b55567c43e12a3
--- /dev/null
+++ b/docs/archive/IMPLEMENTATION_ROADMAP.md
@@ -0,0 +1,957 @@
+╔══════════════════════════════════════════════════════════════════════════════╗
+║ 🚀 RAGBOT 4-MONTH IMPLEMENTATION ROADMAP - ALL 34 SKILLS ║
+║ Systematic, Phased Approach to Enterprise-Grade AI ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+
+IMPLEMENTATION PHILOSOPHY
+════════════════════════════════════════════════════════════════════════════════
+• Fix critical issues first (security, state management, schema)
+• Build tests concurrently (every feature gets tests immediately)
+• Deploy incrementally (working code at each phase)
+• Measure continuously (metrics drive priorities)
+• Document along the way (knowledge preservation)
+
+PROJECT BASELINE
+════════════════════════════════════════════════════════════════════════════════
+Current Status:
+ • 83+ passing tests (~70% coverage)
+ • 6 specialist agents (Biomarker Analyzer, Disease Explainer, etc.)
+ • FastAPI REST API + CLI interface
+ • FAISS vector store (750+ pages medical knowledge)
+ • 2,861 medical knowledge chunks
+
+Critical Issues to Fix:
+ 1. biomarker_flags & safety_alerts not propagating through workflow
+ 2. Schema mismatch between workflow output & API formatter
+ 3. Prediction confidence forced to 0.5 (dangerous for medical domain)
+ 4. Different biomarker naming (API vs CLI)
+ 5. JSON parsing breaks on malformed LLM output
+ 6. No citation enforcement in RAG outputs
+
+Success Metrics:
+ • Test coverage: 70% → 90%+
+ • Response latency: 25s → 15-20s
+ • Prediction accuracy: +15-20%
+ • API costs: -40% (Groq free tier optimization)
+ • Security: OWASP compliant, HIPAA aligned
+
+════════════════════════════════════════════════════════════════════════════════
+
+PHASE 1: FOUNDATION & CRITICAL FIXES (Week 1-2)
+════════════════════════════════════════════════════════════════════════════════
+
+GOAL: Security baseline + fix state propagation + unify schemas
+
+Week 1: Days 1-5
+
+SKILL #18: OWASP Security Check
+ ├─ Duration: 2-3 hours
+ ├─ Task: Run comprehensive security audit
+ ├─ Deliverable: Security issues list, prioritized fixes
+ ├─ Actions:
+ │ 1. Read SKILL.md documentation
+ │ 2. Run vulnerability scanner on /api and /src
+ │ 3. Document findings in SECURITY_AUDIT.md
+ │ 4. Create tickets for each finding
+ └─ Outcome: Clear understanding of security gaps
+
+SKILL #17: API Security Hardening
+ ├─ Duration: 4-6 hours
+ ├─ Task: Implement authentication & hardening
+ ├─ Deliverable: JWT auth on /api/v1/analyze endpoint
+ ├─ Actions:
+ │ 1. Read SKILL.md (auth patterns, CORS, headers)
+ │ 2. Add JWT middleware to api/main.py
+ │ 3. Update routes with @require_auth decorator
+ │ 4. Add security headers (HSTS, CSP, X-Frame-Options)
+ │ 5. Write tests for auth (SKILL #22: Python Testing Patterns)
+ │ 6. Update docs with API key requirement
+ └─ Code Location: api/app/middleware/auth.py (NEW)
+
+SKILL #22: Python Testing Patterns (First Use)
+ ├─ Duration: 2-3 hours
+ ├─ Task: Create testing infrastructure & auth tests
+ ├─ Deliverable: tests/test_api_auth.py with 10+ tests
+ ├─ Actions:
+ │ 1. Read SKILL.md (fixtures, mocking, parametrization)
+ │ 2. Create conftest.py with auth fixtures
+ │ 3. Write tests for JWT generation, validation, failure cases
+ │ 4. Implement pytest fixtures for authenticated client
+ │ 5. Run: pytest tests/test_api_auth.py -v
+ └─ Outcome: 80% test coverage on auth module
+
+SKILL #2: Workflow Orchestration Patterns
+ ├─ Duration: 4-6 hours
+ ├─ Task: Fix state propagation in LangGraph workflow
+ ├─ Deliverable: biomarker_flags & safety_alerts propagate end-to-end
+ ├─ Actions:
+ │ 1. Read SKILL.md (LangGraph state management, parallel execution)
+ │ 2. Review src/state.py current structure
+ │ 3. Identify missing state fields in GuildState
+ │ 4. Refactor agents to return complete state:
+ │ - src/agents/biomarker_analyzer.py → return biomarker_flags
+ │ - src/agents/biomarker_analyzer.py → return safety_alerts
+ │ - src/agents/confidence_assessor.py → update state
+ │ 5. Test with: python -c "from src.workflow import create_guild..."
+ │ 6. Write integration tests (SKILL #22)
+ └─ Code Changes: src/state.py, src/agents/*.py
+
+SKILL #16: AI Wrapper/Structured Output
+ ├─ Duration: 3-5 hours
+ ├─ Task: Unify workflow → API response schema
+ ├─ Deliverable: Single canonical response format (Pydantic model)
+ ├─ Actions:
+ │ 1. Read SKILL.md (structured outputs, Pydantic, validation)
+ │ 2. Create api/app/models/response.py with unified schema
+ │ 3. Define BaseAnalysisResponse with all required fields
+ │ 4. Update api/app/services/ragbot.py to use unified schema
+ │ 5. Ensure ResponseSynthesizerAgent outputs match schema
+ │ 6. Add Pydantic validation in all endpoints
+ │ 7. Run: pytest tests/test_response_schema.py -v
+ └─ Code Location: api/app/models/response.py (REFACTORED)
+
+Week 2: Days 6-10
+
+SKILL #3: Multi-Agent Orchestration
+ ├─ Duration: 3-4 hours
+ ├─ Task: Fix deterministic execution of parallel agents
+ ├─ Deliverable: Agents execute without race conditions
+ ├─ Actions:
+ │ 1. Read SKILL.md (agent coordination, deterministic scheduling)
+ │ 2. Review src/workflow.py parallel execution
+ │ 3. Ensure explicit state passing between agents:
+ │ - Biomarker Analyzer outputs → Disease Explainer inputs
+ │ - Sequential where needed (Analyzer before Linker)
+ │ - Parallel where safe (Explainer & Guidelines)
+ │ 4. Add logging to track execution order
+ │ 5. Run 10 times: python scripts/test_chat_demo.py (same output each time)
+ └─ Outcome: Deterministic workflow execution
+
+SKILL #19: LLM Security
+ ├─ Duration: 3-4 hours
+ ├─ Task: Prevent LLM-specific attacks
+ ├─ Deliverable: Input validation against prompt injection
+ ├─ Actions:
+ │ 1. Read SKILL.md (prompt injection, token limit attacks)
+ │ 2. Add input sanitization in api/app/services/extraction.py
+ │ 3. Implement prompt injection detection:
+ │ - Check for "ignore instructions" patterns
+ │ - Limit biomarker input length
+ │ - Escape special characters
+ │ 4. Add rate limiting per user (SKILL #20)
+ │ 5. Write security tests
+ └─ Code Location: api/app/middleware/input_validation.py (NEW)
+
+SKILL #20: API Rate Limiting
+ ├─ Duration: 2-3 hours
+ ├─ Task: Implement tiered rate limiting
+ ├─ Deliverable: /api/v1/analyze limited to 10/min free, 1000/min pro
+ ├─ Actions:
+ │ 1. Read SKILL.md (token bucket, sliding window algorithms)
+ │ 2. Import python-ratelimit library
+ │ 3. Add rate limiter middleware to api/main.py
+ │ 4. Implement tiered limits (free/pro based on API key)
+ │ 5. Return 429 with retry-after headers
+ │ 6. Test rate limiting behavior
+ └─ Code Location: api/app/middleware/rate_limiter.py (NEW)
+
+END OF PHASE 1 OUTCOMES:
+✅ Security audit complete with fixes prioritized
+✅ JWT authentication on REST API
+✅ biomarker_flags & safety_alerts propagating through workflow
+✅ Unified response schema (API & CLI use same format)
+✅ LLM prompt injection protection
+✅ Rate limiting in place
+✅ Auth + security tests written (15+ new tests)
+✅ Coverage increased to ~75%
+
+════════════════════════════════════════════════════════════════════════════════
+
+PHASE 2: TEST EXPANSION & AGENT OPTIMIZATION (Week 3-5)
+════════════════════════════════════════════════════════════════════════════════
+
+GOAL: 90%+ test coverage + improved agent decision logic + prompt optimization
+
+Week 3: Days 11-15
+
+SKILL #22: Python Testing Patterns (Advanced Use)
+ ├─ Duration: 8-10 hours (this is the main focus)
+ ├─ Task: Parametrized testing for biomarker combinations
+ ├─ Deliverable: 50+ new parametrized tests
+ ├─ Actions:
+ │ 1. Read SKILL.md sections on parametrization & fixtures
+ │ 2. Create tests/fixtures/biomarkers.py with test data:
+ │ - Normal values tuple
+ │ - Diabetes indicators tuple
+ │ - Mixed abnormal values tuple
+ │ - Edge cases tuple
+ │ 3. Write parametrized test for each biomarker combination:
+ │ @pytest.mark.parametrize("biomarkers,expected_disease", [...])
+ │ def test_disease_prediction(biomarkers, expected_disease):
+ │ assert predict_disease(biomarkers) == expected_disease
+ │ 4. Create mocking fixtures for LLM calls:
+ │ @pytest.fixture
+ │ def mock_groq_client(monkeypatch):
+ │ # Mock all LLM interactions
+ │ 5. Test agent outputs:
+ │ - Biomarker Analyzer with 10 scenarios
+ │ - Disease Explainer with 5 diseases
+ │ - Confidence Assessor with low/medium/high confidence cases
+ │ 6. Run: pytest tests/ -v --cov src --cov-report=html
+ │ 7. Goal: 90%+ coverage on agents/
+ └─ Code Location: tests/test_parametrized_*.py
+
+SKILL #26: Python Design Patterns
+ ├─ Duration: 4-5 hours
+ ├─ Task: Refactor agent implementations with design patterns
+ ├─ Deliverable: Cleaner, more maintainable agent code
+ ├─ Actions:
+ │ 1. Read SKILL.md (SOLID, composition, factory patterns)
+ │ 2. Identify code smells in src/agents/
+ │ 3. Extract common agent logic to BaseAgent class:
+ │ class BaseAgent:
+ │ def invoke(self, input_data) -> AgentOutput
+ │ def validate_inputs(self)
+ │ def log_execution(self)
+ │ 4. Use composition over inheritance:
+ │ - Each agent has optional retriever, validator, cache
+ │ - Reduce coupling between agents
+ │ 5. Implement Factory pattern for agent creation:
+ │ AgentFactory.create("biomarker_analyzer")
+ │ 6. Refactor tests to use new pattern
+ └─ Code Location: src/agents/base_agent.py (NEW)
+
+SKILL #4: Agentic Development
+ ├─ Duration: 3-4 hours
+ ├─ Task: Improve agent decision logic
+ ├─ Deliverable: Better biomarker analysis confidence scores
+ ├─ Actions:
+ │ 1. Read SKILL.md (planning, reasoning, decision making)
+ │ 2. Add confidence threshold in BiomarkerAnalyzerAgent
+ │ 3. Instead of returning all results:
+ │ - Only return HIGH confidence matches
+ │ - Flag LOW confidence for manual review
+ │ - Add reasoning trace (why this conclusion)
+ │ 4. Update response format with:
+ │ - confidence_score (0-1)
+ │ - evidence_count (# sources)
+ │ - alternative_hypotheses (if low confidence)
+ │ 5. Update tests
+ └─ Code Location: src/agents/biomarker_analyzer.py (MODIFIED)
+
+SKILL #13: Senior Prompt Engineer (First Use)
+ ├─ Duration: 5-6 hours
+ ├─ Task: Optimize prompts for medical accuracy
+ ├─ Deliverable: Updated agent prompts with better accuracy
+ ├─ Actions:
+ │ 1. Read SKILL.md (prompt patterns, few-shot, CoT)
+ │ 2. Audit current agent prompts in src/agents/*.py
+ │ 3. Apply few-shot learning to extraction agent:
+ │ - Add 3 examples of correct biomarker extraction
+ │ - Show format expected
+ │ - Show handling of ambiguous inputs
+ │ 4. Add chain-of-thought reasoning:
+ │ "First identify the biomarkers mentioned. Then look up their ranges.
+ │ Then determine if abnormal. Then assess severity."
+ │ 5. Add role prompting:
+ │ "You are an expert medical lab analyst with 20 years experience..."
+ │ 6. Implement structured output prompts:
+ │ "Return JSON with these exact fields: biomarkers, disease, confidence"
+ │ 7. Benchmark against baseline accuracy
+ │ 8. Run: python scripts/test_evaluation_system.py (SKILL #14)
+ └─ Code Location: src/agents/*/invoke() prompts
+
+Week 4: Days 16-20
+
+SKILL #14: LLM Evaluation
+ ├─ Duration: 4-5 hours
+ ├─ Task: Benchmark LLM quality improvements
+ ├─ Deliverable: Metrics dashboard showing promise of improvements
+ ├─ Actions:
+ │ 1. Read SKILL.md (evaluation metrics, benchmarking)
+ │ 2. Create tests/evaluation_metrics.py with metrics:
+ │ - Accuracy (correct disease prediction)
+ │ - Precision (of biomarker extraction)
+ │ - Recall (of clinical recommendations)
+ │ - F1 score (biomarker identification)
+ │ 3. Create test dataset with 20 patient scenarios:
+ │ tests/fixtures/evaluation_patients.py
+ │ 4. Benchmark Groq vs Gemini on accuracy, latency, cost
+ │ 5. Create evaluation report:
+ │ "Before optimization: 65% accuracy, 25s latency
+ │ After optimization: 80% accuracy, 18s latency"
+ │ 6. Generate graphs/charts of improvements
+ └─ Code Location: tests/evaluation_metrics.py
+
+SKILL #5: Tool/Function Calling Patterns
+ ├─ Duration: 3-4 hours
+ ├─ Task: Use function calling for reliable LLM outputs
+ ├─ Deliverable: Structured output via function calling (not prompting)
+ ├─ Actions:
+ │ 1. Read SKILL.md (tool definition, structured returns)
+ │ 2. Define tools for extraction agent:
+ │ - extract_biomarkers(text: str) -> dict
+ │ - classify_severity(value: float, range: tuple) -> str
+ │ - assess_disease_risk(biomarkers: dict) -> dict
+ │ 3. Modify extraction service to use function calling:
+ │ Instead of parsing JSON from text, call literal functions
+ │ 4. Groq free tier check (may not support function calling)
+ │ Alternative: Use strict Pydantic output validation
+ │ 5. Test: Parsing should never fail, always return valid output
+ │ 6. Error handling: If LLM output wrong format, retry with function calling
+ └─ Code Location: api/app/services/extraction.py (MODIFIED)
+
+SKILL #21: Python Error Handling
+ ├─ Duration: 3-4 hours
+ ├─ Task: Comprehensive error handling for production
+ ├─ Deliverable: Custom exception hierarchy, graceful degradation
+ ├─ Actions:
+ │ 1. Read SKILL.md (exception patterns, logging, recovery)
+ │ 2. Create src/exceptions.py with hierarchy:
+ │ - RagBotException (base)
+ │ - BiomarkerValidationError
+ │ - LLMTimeoutError (with retry logic)
+ │ - VectorStoreError
+ │ - SchemaValidationError
+ │ 3. Wrap agent calls with try-except:
+ │ try:
+ │ result = agent.invoke(input)
+ │ except LLMTimeoutError:
+ │ retry_with_smaller_context()
+ │ except BiomarkerValidationError:
+ │ return low_confidence_response()
+ │ 4. Add telemetry: which exceptions most common?
+ │ 5. Write exception tests (10+ scenarios)
+ └─ Code Location: src/exceptions.py (NEW)
+
+Week 5: Days 21-25
+
+SKILL #27: Python Observability (First Use)
+ ├─ Duration: 4-5 hours
+ ├─ Task: Structured logging for debugging & monitoring
+ ├─ Deliverable: JSON-formatted logs with context
+ ├─ Actions:
+ │ 1. Read SKILL.md (structured logging, correlation IDs)
+ │ 2. Replace print() with logger calls:
+ │ logger.info("analyzing biomarkers", extra={
+ │ "biomarkers": {"glucose": 140},
+ │ "user_id": "user123",
+ │ "correlation_id": "req-abc123"
+ │ })
+ │ 3. Add correlation IDs to track requests through agents
+ │ 4. Structure logs as JSON (not text):
+ │ - timestamp
+ │ - level
+ │ - message
+ │ - context (user, request, agent)
+ │ - metrics (latency, tokens used)
+ │ 5. Implement in all agents (src/agents/*)
+ │ 6. Test: Review logs.jsonl output
+ └─ Code Location: src/observability.py (NEW)
+
+SKILL #24: GitHub Actions Templates
+ ├─ Duration: 2-3 hours
+ ├─ Task: Set up CI/CD pipeline
+ ├─ Deliverable: .github/workflows/test.yml (auto-run tests on PR)
+ ├─ Actions:
+ │ 1. Read SKILL.md (GitHub Actions workflow syntax)
+ │ 2. Create .github/workflows/test.yml:
+ │ name: Run Tests
+ │ on: [push, pull_request]
+ │ jobs:
+ │ test:
+ │ runs-on: ubuntu-latest
+ │ steps:
+ │ - uses: actions/checkout@v3
+ │ - uses: actions/setup-python@v4
+ │ - run: pip install -r requirements.txt
+ │ - run: pytest tests/ -v --cov src --cov-report=xml
+ │ - run: coverage report (fail if <90%)
+ │ 3. Create .github/workflows/security.yml:
+ │ - Run OWASP checks
+ │ - Lint code
+ │ - Check dependencies for CVEs
+ │ 4. Create .github/workflows/docker.yml:
+ │ - Build Docker image
+ │ - Push to registry (optional)
+ │ 5. Test: Create a PR, verify workflows run
+ └─ Location: .github/workflows/
+
+END OF PHASE 2 OUTCOMES:
+✅ 90%+ test coverage achieved
+✅ 50+ parametrized tests added
+✅ Agent code refactored with design patterns
+✅ LLM prompts optimized for medical accuracy
+✅ Evaluation metrics show +15% accuracy improvement
+✅ Function calling prevents JSON parsing failures
+✅ Comprehensive error handling in place
+✅ Structured JSON logging implemented
+✅ CI/CD pipeline automated
+
+════════════════════════════════════════════════════════════════════════════════
+
+PHASE 3: RETRIEVAL OPTIMIZATION & KNOWLEDGE GRAPHS (Week 6-8)
+════════════════════════════════════════════════════════════════════════════════
+
+GOAL: Better medical knowledge retrieval + citations + knowledge graphs
+
+Week 6: Days 26-30
+
+SKILL #8: Hybrid Search Implementation
+ ├─ Duration: 4-6 hours
+ ├─ Task: Combine semantic + keyword search for better recall
+ ├─ Deliverable: Hybrid retriever for RagBot (BM25 + FAISS)
+ ├─ Actions:
+ │ 1. Read SKILL.md (hybrid search architecture, reciprocal rank fusion)
+ │ 2. Current state: Only FAISS semantic search (misses rare diseases)
+ │ 3. Add BM25 keyword search:
+ │ pip install rank-bm25
+ │ 4. Create src/retrievers/hybrid_retriever.py:
+ │ class HybridRetriever:
+ │ def semantic_search(query, k=5) # FAISS
+ │ def keyword_search(query, k=5) # BM25
+ │ def hybrid_search(query): # Combine + rerank
+ │ 5. Reranking (Reciprocal Rank Fusion):
+ │ score = 1/(k + rank_semantic) + 1/(k + rank_keyword)
+ │ 6. Replace old retriever in disease_explainer agent:
+ │ old: retriever = faiss_retriever
+ │ new: retriever = hybrid_retriever
+ │ 7. Benchmark: Test retrieval quality on 10 disease cases
+ │ 8. Test rare disease retrieval (uncommon biomarker combinations)
+ └─ Code Location: src/retrievers/hybrid_retriever.py (NEW)
+
+SKILL #9: Chunking Strategy
+ ├─ Duration: 4-5 hours
+ ├─ Task: Optimize medical document chunking
+ ├─ Deliverable: Improved chunks for better context
+ ├─ Actions:
+ │ 1. Read SKILL.md (chunking strategies, semantic boundaries)
+ │ 2. Current: Fixed 1000-char chunks (may split mid-sentence)
+ │ 3. Implement intelligent chunking:
+ │ - Split by medical sections (diagnosis, treatment, etc.)
+ │ - Keep related content together
+ │ - Maintain minimum 500 chars (context) max 2000 chars (context window)
+ │ 4. Preserve medical structure:
+ │ - Disease headers stay with symptoms
+ │ - Labs stay with reference ranges
+ │ - Treatment options stay together
+ │ 5. Create src/chunking_strategy.py:
+ │ def chunk_medical_pdf(pdf_text) -> List[Chunk]:
+ │ # Split by disease headers, maintain structure
+ │ 6. Re-chunk medical_knowledge.faiss (2,861 chunks → how many?)
+ │ 7. Re-embed with new chunks
+ │ 8. Benchmark: Document retrieval precision improved?
+ └─ Code Location: src/chunking_strategy.py (REFACTORED)
+
+SKILL #10: Embedding Pipeline Builder
+ ├─ Duration: 3-4 hours
+ ├─ Task: Optimize embeddings for medical terminology
+ ├─ Deliverable: Better semantic search for medical terms
+ ├─ Actions:
+ │ 1. Read SKILL.md (embedding models, fine-tuning considerations)
+ │ 2. Current: sentence-transformers/all-MiniLM-L6-v2 (generic)
+ │ 3. Options for medical embeddings:
+ │ - all-MiniLM-L6-v2 (157M params, fast, baseline)
+ │ - all-mpnet-base-v2 (438M params, better quality)
+ │ - Medical-specific: SciBERT or BioSentenceTransformer (if available)
+ │ 4. Benchmark embeddings on medical queries:
+ │ Query: "High glucose and elevated HbA1c"
+ │ Expected top result: Diabetes diagnosis section
+ │ 5. If using different model:
+ │ pip install [new-model]
+ │ Re-embed all medical documents
+ │ Save new FAISS index
+ │ 6. Measure: Mean reciprocal rank (MRR) of correct document
+ │ 7. Update src/pdf_processor.py with better embeddings
+ └─ Code Location: src/llm_config.py (MODIFIED)
+
+SKILL #11: RAG Implementation
+ ├─ Duration: 3-4 hours
+ ├─ Task: Enforce citation enforcement in responses
+ ├─ Deliverable: All claims backed by retrieved documents
+ ├─ Actions:
+ │ 1. Read SKILL.md (citation tracking, source attribution)
+ │ 2. Modify disease_explainer agent to track sources:
+ │ result = retriever.hybrid_search(query)
+ │ sources = [doc.metadata['source'] for doc in result]
+ │ # Keep track of which statements came from which docs
+ │ 3. Update ResponseSynthesizerAgent to require citations:
+ │ Every claim must be followed by [source: page N]
+ │ 4. Add validation:
+ │ if not has_citations(response):
+ │ return "Insufficient evidence for this conclusion"
+ │ 5. Modify API response to include citations:
+ │ {
+ │ "disease": "Diabetes",
+ │ "evidence": [
+ │ {"claim": "High glucose", "source": "Clinical_Guidelines.pdf:p45"}
+ │ ]
+ │ }
+ │ 6. Test: Every response should have citations
+ └─ Code Location: src/agents/disease_explainer.py (MODIFIED)
+
+Week 7: Days 31-35
+
+SKILL #12: Knowledge Graph Builder
+ ├─ Duration: 6-8 hours
+ ├─ Task: Extract and use knowledge graphs for relationships
+ ├─ Deliverable: Biomarker → Disease → Treatment graph
+ ├─ Actions:
+ │ 1. Read SKILL.md (knowledge graphs, entity extraction, relationships)
+ │ 2. Design graph structure:
+ │ Nodes: Biomarkers, Diseases, Treatments, Symptoms
+ │ Edges: "elevated_glucose" -[indicates]-> "diabetes"
+ │ "diabetes" -[treated_by]-> "metformin"
+ │ 3. Extract entities from medical PDFs:
+ │ Use LLM to identify: (biomarker, disease, treatment) triples
+ │ Store in graph database (networkx for simplicity)
+ │ 4. Build src/knowledge_graph.py:
+ │ class MedicalKnowledgeGraph:
+ │ def find_diseases_for_biomarker(biomarker) -> List[Disease]
+ │ def find_treatments_for_disease(disease) -> List[Treatment]
+ │ def shortest_path(biomarker, disease) -> List[Node]
+ │ 5. Integrate with biomarker_analyzer:
+ │ Instead of rule-based disease prediction,
+ │ Use knowledge graph paths
+ │ 6. Test: Graph should have >100 nodes, >500 edges
+ │ 7. Visualize: Create graph.html (D3.js visualization)
+ └─ Code Location: src/knowledge_graph.py (NEW)
+
+SKILL #1: LangChain Architecture (Deep Dive)
+ ├─ Duration: 3-4 hours
+ ├─ Task: Advanced LangChain patterns for RAG
+ ├─ Deliverable: More sophisticated agent chain design
+ ├─ Actions:
+ │ 1. Read SKILL.md (advanced chains, custom tools)
+ │ 2. Add custom tools to agents:
+ │ @tool
+ │ def lookup_reference_range(biomarker: str) -> dict:
+ │ """Get normal range for biomarker"""
+ │ return config.biomarker_references[biomarker]
+ │ 3. Create composite chains:
+ │ Chain = (lookup_range_tool | linter | analyzer)
+ │ 4. Implement memory for conversation context:
+ │ buffer = ConversationBufferMemory()
+ │ chain = RunnableWithMessageHistory(agent, buffer)
+ │ 5. Add callbacks for observability:
+ │ .with_config(callbacks=[logger_callback])
+ │ 6. Test chain composition & memory
+ └─ Code Location: src/agents/tools/ (NEW)
+
+SKILL #28: Memory Management
+ ├─ Duration: 3-4 hours
+ ├─ Task: Optimize context window usage
+ ├─ Deliverable: Fit more patient history without exceeding token limits
+ ├─ Actions:
+ │ 1. Read SKILL.md (context compression, memory hierarchies)
+ │ 2. Implement sliding window memory:
+ │ Keep last 5 messages (pruned conversation)
+ │ Summarize older messages into facts
+ │ 3. Add context compression:
+ │ "User mentioned: glucose 140, HbA1c 10" (compressed)
+ │ Instead of full raw conversation
+ │ 4. Monitor token usage:
+ │ - Groq free tier: ~500 requests/month
+ │ - Each request: ~1-2K tokens average
+ │ 5. Optimize prompts to use fewer tokens:
+ │ Remove verbose preamble
+ │ Use shorthand for common terms
+ │ 6. Test: Save 20-30% on token usage
+ └─ Code Location: src/memory_manager.py (NEW)
+
+Week 8: Days 36-40
+
+SKILL #15: Cost-Aware LLM Pipeline
+ ├─ Duration: 4-5 hours
+ ├─ Task: Optimize API costs (reduce Groq/Gemini usage)
+ ├─ Deliverable: Model routing by task complexity
+ ├─ Actions:
+ │ 1. Read SKILL.md (cost estimation, model selection, caching)
+ │ 2. Analyze current costs:
+ │ - Groq llama-3.3-70B: Expensive for simple tasks
+ │ - Gemini free tier: Rate-limited
+ │ 3. Implement model routing:
+ │ Simple task: Route to smaller model (if available) or cache
+ │ Complex task: Use llama-3.3-70B
+ │ 4. Example routing:
+ │ if task == "extract_biomarkers" and has_cache:
+ │ return cached_result
+ │ elif task == "complex_reasoning":
+ │ use_groq_70b()
+ │ else:
+ │ use_gemini_free()
+ │ 5. Implement caching:
+ │ hash(query) -> check cache -> LLM -> store result
+ │ 6. Track costs:
+ │ log every API call with cost
+ │ Generate monthly cost report
+ │ 7. Target: -40% cost reduction
+ └─ Code Location: src/llm_config.py (MODIFIED)
+
+END OF PHASE 3 OUTCOMES:
+✅ Hybrid search implemented (semantic + keyword)
+✅ Medical chunking improves knowledge quality
+✅ Embeddings optimized for medical terminology
+✅ Citation enforcement in all RAG outputs
+✅ Knowledge graph built from medical PDFs
+✅ LangChain advanced patterns implemented
+✅ Context window optimization reduces token waste
+✅ Model routing saves -40% on API costs
+✅ Better disease prediction via knowledge graphs
+
+════════════════════════════════════════════════════════════════════════════════
+
+PHASE 4: DEPLOYMENT, MONITORING & SCALING (Week 9-12)
+════════════════════════════════════════════════════════════════════════════════
+
+GOAL: Production-ready system with monitoring, docs, and deployment
+
+Week 9: Days 41-45
+
+SKILL #25: FastAPI Templates
+ ├─ Duration: 3-4 hours
+ ├─ Task: Production-grade FastAPI configuration
+ ├─ Deliverable: Optimized FastAPI settings, middleware
+ ├─ Actions:
+ │ 1. Read SKILL.md (async patterns, dependency injection, middleware)
+ │ 2. Apply async best practices:
+ │ - All endpoints async def
+ │ - Use asyncio for parallel agent calls
+ │ - Remove any sync blocking calls
+ │ 3. Add middleware chain:
+ │ - CORS middleware (for web frontend)
+ │ - Request logging (correlation IDs)
+ │ - Error handling
+ │ - Rate limiting
+ │ - Auth
+ │ 4. Optimize configuration:
+ │ - Connection pooling for databases
+ │ - Caching headers (HTTP)
+ │ - Compression (gzip)
+ │ 5. Add health checks:
+ │ /health - basic healthcheck
+ │ /health/deep - check dependencies (FAISS, LLM)
+ │ 6. Test: Load testing with async
+ └─ Code Location: api/app/main.py (REFACTORED)
+
+SKILL #29: API Docs Generator
+ ├─ Duration: 2-3 hours
+ ├─ Task: Auto-generate OpenAPI spec + interactive docs
+ ├─ Deliverable: /docs (Swagger UI) + /redoc (ReDoc)
+ ├─ Actions:
+ │ 1. Read SKILL.md (OpenAPI, Swagger UI, ReDoc)
+ │ 2. FastAPI auto-generates OpenAPI from endpoints
+ │ 3. Enhance documentation:
+ │ Add detailed descriptions to each endpoint
+ │ Add example responses
+ │ Add error codes
+ │ 4. Example:
+ │ @app.post("/api/v1/analyze/structured")
+ │ async def analyze_structured(request: AnalysisRequest):
+ │ """
+ │ Analyze biomarkers (structured input)
+ │
+ │ - **biomarkers**: Dict of biomarker names → values
+ │ - **response**: Full analysis with disease prediction
+ │
+ │ Example:
+ │ {"biomarkers": {"glucose": 140, "HbA1c": 10}}
+ │ """
+ │ 5. Auto-docs available at:
+ │ http://localhost:8000/docs
+ │ http://localhost:8000/redoc
+ │ 6. Generate OpenAPI JSON:
+ │ http://localhost:8000/openapi.json
+ │ 7. Create client SDK (optional):
+ │ OpenAPI Generator → Python, JS, Go clients
+ └─ Docs auto-generated from code
+
+SKILL #30: GitHub PR Review Workflow
+ ├─ Duration: 2-3 hours
+ ├─ Task: Establish code review standards
+ ├─ Deliverable: CODEOWNERS, PR templates, branch protection
+ ├─ Actions:
+ │ 1. Read SKILL.md (PR templates, CODEOWNERS, review process)
+ │ 2. Create .github/CODEOWNERS:
+ │ # Security reviews required for:
+ │ /api/app/middleware/ @security-team
+ │ # Testing reviews required for:
+ │ /tests/ @qa-team
+ │ 3. Create .github/pull_request_template.md:
+ │ ## Description
+ │ ## Type of change
+ │ ## Tests added
+ │ ## Checklist
+ │ ## Related issues
+ │ 4. Configure branch protection:
+ │ - Require 1 approval before merge
+ │ - Require status checks pass (tests, lint)
+ │ - Require up-to-date branch
+ │ 5. Create CONTRIBUTING.md with guidelines
+ └─ Location: .github/
+
+Week 10: Days 46-50
+
+SKILL #27: Python Observability (Advanced)
+ ├─ Duration: 4-5 hours
+ ├─ Task: Metrics collection + monitoring dashboard
+ ├─ Deliverable: Key metrics tracked (latency, accuracy, errors)
+ ├─ Actions:
+ │ 1. Read SKILL.md (metrics, histograms, summaries)
+ │ 2. Add prometheus metrics:
+ │ pip install prometheus-client
+ │ 3. Track key metrics:
+ │ - request_latency_ms (histogram)
+ │ - disease_prediction_accuracy (gauge)
+ │ - llm_api_calls_total (counter)
+ │ - error_rate (gauge)
+ │ - citations_found_rate (gauge)
+ │ 4. Add to all agents:
+ │ with timer("biomarker_analyzer"):
+ │ result = analyzer.invoke(input)
+ │ 5. Expose metrics at /metrics
+ │ 6. Integrate with monitoring (optional):
+ │ Send to Prometheus -> Grafana dashboard
+ │ 7. Alerts:
+ │ If latency > 25s: alert
+ │ If accuracy < 75%: alert
+ │ If error rate > 5%: alert
+ └─ Code Location: src/monitoring/ (NEW)
+
+SKILL #23: Code Review Excellence
+ ├─ Duration: 2-3 hours
+ ├─ Task: Review and improve code quality
+ ├─ Deliverable: Code quality assessment report
+ ├─ Actions:
+ │ 1. Read SKILL.md (code review patterns, common issues)
+ │ 2. Self-review all Phase 1-3 changes:
+ │ - Are functions <20 lines? (if not, break up)
+ │ - Are variable names clear? (rename if not)
+ │ - Are error cases handled? (if not, add)
+ │ - Are tests present? (required: >90% coverage)
+ │ 3. Common medical code patterns to enforce:
+ │ - Never assume biomarker values are valid
+ │ - Always include units (mg/dL, etc.)
+ │ - Always cite medical literature
+ │ - Never hardcode disease thresholds
+ │ 4. Create REVIEW_GUIDELINES.md
+ │ 5. Review Agent implementations:
+ │ Check for: typos, unclear logic, missing docstrings
+ └─ Code Location: docs/REVIEW_GUIDELINES.md (NEW)
+
+SKILL #31: CI-CD Best Practices
+ ├─ Duration: 3-4 hours
+ ├─ Task: Enhance CI/CD with deployment
+ ├─ Deliverable: Automated deployment pipeline
+ ├─ Actions:
+ │ 1. Read SKILL.md (deployment strategies, environments)
+ │ 2. Add deployment workflow:
+ │ .github/workflows/deploy.yml:
+ │ - Build Docker image
+ │ - Push to registry
+ │ - Deploy to staging
+ │ - Run smoke tests
+ │ - Manual approval for production
+ │ - Deploy to production
+ │ 3. Environment management:
+ │ - .env.development (localhost)
+ │ - .env.staging (staging server)
+ │ - .env.production (prod server)
+ │ 4. Deployment strategy:
+ │ Canary: Deploy to 10% of traffic first
+ │ Monitor for errors
+ │ If OK, deploy to 100%
+ │ If errors, rollback
+ │ 5. Docker configuration:
+ │ Multi-stage build for smaller images
+ │ Security: Non-root user, minimal base image
+ │ 6. Test deployment locally:
+ │ docker build -t ragbot .
+ │ docker run -p 8000:8000 ragbot
+ └─ Location: .github/workflows/deploy.yml (NEW)
+
+SKILL #32: Frontend Accessibility (if building web frontend)
+ ├─ Duration: 2-3 hours (optional, skip if CLI only)
+ ├─ Task: Accessibility standards for web interface
+ ├─ Deliverable: WCAG 2.1 AA compliant UI
+ ├─ Actions:
+ │ 1. Read SKILL.md (a11y, screen readers, keyboard nav)
+ │ 2. If building React frontend for medical results:
+ │ - All buttons keyboard accessible
+ │ - Screen reader labels on medical data
+ │ - High contrast for readability
+ │ - Clear error messages
+ │ 3. Test with screen reader (NVDA or JAWS)
+ └─ Code Location: examples/web_interface/ (if needed)
+
+Week 11: Days 51-55
+
+SKILL #6: LLM Application Dev with LangChain
+ ├─ Duration: 4-5 hours
+ ├─ Task: Production LangChain patterns
+ ├─ Deliverable: Robust, maintainable agent code
+ ├─ Actions:
+ │ 1. Read SKILL.md (production patterns, error handling, logging)
+ │ 2. Implement agent lifecycle:
+ │ - Setup (load models, prepare context)
+ │ - Execution (with retries)
+ │ - Cleanup (save state, log metrics)
+ │ 3. Add retry logic for LLM calls:
+ │ @retry(max_attempts=3, backoff=exponential)
+ │ def invoke_agent(self, input):
+ │ return self.llm.predict(...)
+ │ 4. Add graceful degradation:
+ │ If LLM fails, return cached result
+ │ If vector store fails, return rule-based result
+ │ 5. Implement agent composition:
+ │ Multi-step workflows where agents call other agents
+ │ 6. Test: 99.99% uptime in staging
+ └─ Code Location: src/agents/base_agent.py (REFINED)
+
+SKILL #33: Webhook Receiver Hardener
+ ├─ Duration: 2-3 hours
+ ├─ Task: Secure webhook handling (for integrations)
+ ├─ Deliverable: Webhook endpoint with signature verification
+ ├─ Actions:
+ │ 1. Read SKILL.md (signature verification, replay protection)
+ │ 2. If accepting webhooks from external systems:
+ │ - Verify HMAC signature
+ │ - Check timestamp (prevent replay attacks)
+ │ - Idempotency key handling
+ │ 3. Example: EHR system sends patient updates
+ │ POST /webhooks/patient-update
+ │ Verify: X-Webhook-Signature header
+ │ Prevent: Same update processed twice
+ │ 4. Create api/app/webhooks/ (NEW if needed)
+ │ 5. Test: Webhook security scenarios
+ └─ Code Location: api/app/webhooks/ (OPTIONAL)
+
+Week 12: Days 56-60
+
+SKILL #7: RAG Agent Builder
+ ├─ Duration: 4-5 hours
+ ├─ Task: Full RAG agent architecture review
+ ├─ Deliverable: Production-ready RAG agents
+ ├─ Actions:
+ │ 1. Read SKILL.md (RAG agent design, retrieval QA chains)
+ │ 2. Comprehensive RAG review:
+ │ - Retriever quality (hybrid search, ranking)
+ │ - Prompt quality (citations, evidence)
+ │ - Response quality (accurate, safe)
+ │ 3. Disease Explainer Agent refactor:
+ │ Step 1: Retrieve relevant medical documents
+ │ Step 2: Extract key evidence from docs
+ │ Step 3: Synthesize explanation with citations
+ │ Step 4: Assess confidence (high/medium/low)
+ │ 4. Test: All responses have citations
+ │ 5. Test: No medical hallucinations
+ │ 6. Benchmark: Accuracy, latency, cost
+ └─ Code Location: src/agents/ (FINAL REVIEW)
+
+Final Week Integration (Days 56-60):
+
+SKILL #2: Workflow Orchestration (Refinement)
+ ├─ Final review of entire workflow
+ ├─ Ensure all agents work together
+ ├─ Test end-to-end: CLI and API
+
+Comprehensive Testing:
+ ├─ Functional tests: All features work
+ ├─ Security tests: No vulnerabilities
+ ├─ Performance tests: <20s latency
+ ├─ Load tests: Handle 10 concurrent requests
+
+Documentation:
+ ├─ Update README with new features
+ ├─ Document API at /docs
+ ├─ Create deployment guide
+ ├─ Create troubleshooting guide
+
+Production Deployment:
+ ├─ Stage: Test with real environment
+ ├─ Canary: 10% of traffic
+ ├─ Monitor: Errors, latency, accuracy
+ ├─ Full deployment: 100% of traffic
+
+END OF PHASE 4 OUTCOMES:
+✅ FastAPI optimized for production
+✅ API documentation auto-generated
+✅ Code review standards established
+✅ Full observability (logging, metrics)
+✅ CI/CD with automated deployment
+✅ Security best practices implemented
+✅ Production-ready RAG agents
+✅ System deployed and monitored
+
+════════════════════════════════════════════════════════════════════════════════
+
+IMPLEMENTATION SUMMARY
+════════════════════════════════════════════════════════════════════════════════
+
+SKILLS USED IN ORDER:
+
+Phase 1 (Security + Fixes): 2, 3, 4, 16, 17, 18, 19, 20, 22
+Phase 2 (Testing + Agents): 22, 26, 4, 13, 14, 5, 21, 27, 24
+Phase 3 (Retrieval + Graphs): 8, 9, 10, 11, 12, 1, 28, 15
+Phase 4 (Production): 25, 29, 30, 27, 23, 31, 32(*), 6, 33(*), 7
+
+(*) Optional based on needs
+
+TOTAL IMPLEMENTATION TIME:
+Phase 1: ~30-40 hours
+Phase 2: ~35-45 hours
+Phase 3: ~30-40 hours
+Phase 4: ~30-40 hours
+─────────────────────
+TOTAL: ~130-160 hours over 12 weeks (~10-12 hours/week)
+
+EXPECTED OUTCOMES:
+
+Metrics:
+ Test Coverage: 70% → 90%+
+ Response Latency: 25s → 15-20s (-30%)
+ Accuracy: 65% → 80% (+15-20%)
+ API Costs: -40% via optimization
+ Citations: 0% → 100%
+
+Quality:
+ ✅ OWASP compliant
+ ✅ HIPAA aligned
+ ✅ Production-ready
+ ✅ Enterprise monitoring
+ ✅ Automated deployments
+
+System Capabilities:
+ ✅ Hybrid semantic + keyword search
+ ✅ Knowledge graphs for reasoning
+ ✅ Cost-optimized LLM routing
+ ✅ Full citation enforcement
+ ✅ Advanced observability
+
+════════════════════════════════════════════════════════════════════════════════
+
+WEEKLY CHECKLIST
+════════════════════════════════════════════════════════════════════════════════
+
+Each week, verify:
+
+□ Code committed with clear commit messages
+□ Tests pass locally: pytest -v --cov
+□ Coverage >85% on any new code
+□ PR created with documentation
+□ Code reviewed (self or team)
+□ No security warnings
+□ Documentation updated
+□ Metrics tracked (custom dashboard)
+□ No breaking changes to API
+
+════════════════════════════════════════════════════════════════════════════════
+
+DONE! Your 4-month implementation plan is ready.
+
+Start with Phase 1 Week 1.
+Execute systematically.
+Measure progress weekly.
+Celebrate wins!
+
+Your RagBot will be enterprise-grade. 🚀
diff --git a/docs/archive/IMPLEMENTATION_STATUS_TRACKER.md b/docs/archive/IMPLEMENTATION_STATUS_TRACKER.md
new file mode 100644
index 0000000000000000000000000000000000000000..ed0d28c0404a15676558d5c07d42bd2a05aa66ab
--- /dev/null
+++ b/docs/archive/IMPLEMENTATION_STATUS_TRACKER.md
@@ -0,0 +1,343 @@
+╔════════════════════════════════════════════════════════════════════════════╗
+║ 📊 12-WEEK IMPLEMENTATION STATUS TRACKER ║
+║ Track all 34 skills usage across 4 phases ║
+╚════════════════════════════════════════════════════════════════════════════╝
+
+PHASE 1: FOUNDATION & CRITICAL FIXES (Weeks 1-2)
+════════════════════════════════════════════════════════════════════════════════
+
+Week 1: Security + State Propagation
+┌─────────┬────────────────────────────────────┬──────────┬─────────┬────────┐
+│ Skill # │ Skill Name │ Status │ Hours │ Notes │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ #18 │ OWASP Security Check │ ⬜ TODO │ 2-3h │ │
+│ #17 │ API Security Hardening │ ⬜ TODO │ 4-6h │ │
+│ #22 │ Python Testing Patterns (Use 1) │ ⬜ TODO │ 2-3h │ │
+│ #2 │ Workflow Orchestration Pattern │ ⬜ TODO │ 4-6h │ │
+│ #16 │ AI Wrapper/Structured Output │ ⬜ TODO │ 3-5h │ │
+│ #20 │ API Rate Limiting │ ⬜ TODO │ 2-3h │ │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ │ WEEK 1 TOTAL │ │ 17-26h │ │
+└─────────┴────────────────────────────────────┴──────────┴─────────┴────────┘
+
+Week 2: Orchestration + Security + Error Handling
+┌─────────┬────────────────────────────────────┬──────────┬─────────┬────────┐
+│ Skill # │ Skill Name │ Status │ Hours │ Notes │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ #3 │ Multi-Agent Orchestration │ ⬜ TODO │ 3-4h │ │
+│ #19 │ LLM Security │ ⬜ TODO │ 3-4h │ │
+│ #21 │ Python Error Handling │ ⬜ TODO │ 3-4h │ │
+│ #27 │ Python Observability (Use 1) │ ⬜ TODO │ 4-5h │ Logging│
+│ #24 │ GitHub Actions Templates │ ⬜ TODO │ 2-3h │ CI/CD │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ │ WEEK 2 TOTAL │ │ 15-20h │ │
+└─────────┴────────────────────────────────────┴──────────┴─────────┴────────┘
+
+PHASE 1 OUTCOMES
+- [ ] Security audit complete, all issues tracked
+- [ ] JWT authentication on REST API
+- [ ] biomarker_flags & safety_alerts propagating
+- [ ] Unified response schema (API + CLI)
+- [ ] Prompt injection protection
+- [ ] Rate limiting per user
+- [ ] Auth + security tests written (15+ tests)
+- [ ] Coverage: 70% → 75%
+
+════════════════════════════════════════════════════════════════════════════════
+
+PHASE 2: TEST EXPANSION & AGENT OPTIMIZATION (Weeks 3-5)
+════════════════════════════════════════════════════════════════════════════════
+
+Week 3: Advanced Testing
+┌─────────┬────────────────────────────────────┬──────────┬─────────┬────────┐
+│ Skill # │ Skill Name │ Status │ Hours │ Notes │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ #22 │ Python Testing Patterns (Use 2) │ ⬜ TODO │ 8-10h │ Main focus
+│ #26 │ Python Design Patterns │ ⬜ TODO │ 4-5h │ Refactor
+│ #4 │ Agentic Development │ ⬜ TODO │ 3-4h │ Logic │
+│ #13 │ Senior Prompt Engineer (Use 1) │ ⬜ TODO │ 5-6h │ │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ │ WEEK 3 TOTAL │ │ 20-25h │ │
+└─────────┴────────────────────────────────────┴──────────┴─────────┴────────┘
+
+Week 4: Evaluation + Function Calling
+┌─────────┬────────────────────────────────────┬──────────┬─────────┬────────┐
+│ Skill # │ Skill Name │ Status │ Hours │ Notes │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ #14 │ LLM Evaluation │ ⬜ TODO │ 4-5h │ │
+│ #5 │ Tool/Function Calling Patterns │ ⬜ TODO │ 3-4h │ │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ │ WEEK 4 TOTAL │ │ 7-9h │ │
+└─────────┴────────────────────────────────────┴──────────┴─────────┴────────┘
+
+Week 5: Integrations
+┌─────────┬────────────────────────────────────┬──────────┬─────────┬────────┐
+│ Skill # │ Skill Name │ Status │ Hours │ Notes │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ #27 │ Python Observability (Use 2) │ ⬜ TODO │ 4-5h │ Metrics│
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ │ WEEK 5 TOTAL │ │ 4-5h │ │
+└─────────┴────────────────────────────────────┴──────────┴─────────┴────────┘
+
+PHASE 2 OUTCOMES
+- [ ] 90%+ test coverage achieved
+- [ ] 50+ parametrized tests added
+- [ ] Agent code refactored (SOLID principles)
+- [ ] Prompts optimized for medical accuracy
+- [ ] Evaluation metrics show +15% accuracy improvement
+- [ ] Function calling prevents JSON parsing failures
+- [ ] Structured JSON logging in all code
+- [ ] Coverage: 75% → 90%
+
+════════════════════════════════════════════════════════════════════════════════
+
+PHASE 3: RETRIEVAL OPTIMIZATION & KNOWLEDGE GRAPHS (Weeks 6-8)
+════════════════════════════════════════════════════════════════════════════════
+
+Week 6: Hybrid Search + Chunking
+┌─────────┬────────────────────────────────────┬──────────┬─────────┬────────┐
+│ Skill # │ Skill Name │ Status │ Hours │ Notes │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ #8 │ Hybrid Search Implementation │ ⬜ TODO │ 4-6h │ │
+│ #9 │ Chunking Strategy │ ⬜ TODO │ 4-5h │ │
+│ #10 │ Embedding Pipeline Builder │ ⬜ TODO │ 3-4h │ │
+│ #11 │ RAG Implementation │ ⬜ TODO │ 3-4h │ │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ │ WEEK 6 TOTAL │ │ 14-19h │ │
+└─────────┴────────────────────────────────────┴──────────┴─────────┴────────┘
+
+Week 7: Knowledge Graphs
+┌─────────┬────────────────────────────────────┬──────────┬─────────┬────────┐
+│ Skill # │ Skill Name │ Status │ Hours │ Notes │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ #12 │ Knowledge Graph Builder │ ⬜ TODO │ 6-8h │ │
+│ #1 │ LangChain Architecture (Deep) │ ⬜ TODO │ 3-4h │ │
+│ #28 │ Memory Management │ ⬜ TODO │ 3-4h │ │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ │ WEEK 7 TOTAL │ │ 12-16h │ │
+└─────────┴────────────────────────────────────┴──────────┴─────────┴────────┘
+
+Week 8: Cost Optimization
+┌─────────┬────────────────────────────────────┬──────────┬─────────┬────────┐
+│ Skill # │ Skill Name │ Status │ Hours │ Notes │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ #15 │ Cost-Aware LLM Pipeline │ ⬜ TODO │ 4-5h │ │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ │ WEEK 8 TOTAL │ │ 4-5h │ │
+└─────────┴────────────────────────────────────┴──────────┴─────────┴────────┘
+
+PHASE 3 OUTCOMES
+- [ ] Hybrid search (semantic + keyword) implemented
+- [ ] Medical chunking improves knowledge quality
+- [ ] Embeddings optimized for medical terminology
+- [ ] Citation enforcement in all RAG outputs
+- [ ] Knowledge graph built (100+ nodes, 500+ edges)
+- [ ] LangChain advanced patterns implemented
+- [ ] Context window optimization reduces token waste
+- [ ] Model routing saves -40% on API costs
+
+════════════════════════════════════════════════════════════════════════════════
+
+PHASE 4: DEPLOYMENT, MONITORING & SCALING (Weeks 9-12)
+════════════════════════════════════════════════════════════════════════════════
+
+Week 9: FastAPI + Documentation
+┌─────────┬────────────────────────────────────┬──────────┬─────────┬────────┐
+│ Skill # │ Skill Name │ Status │ Hours │ Notes │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ #25 │ FastAPI Templates │ ⬜ TODO │ 3-4h │ │
+│ #29 │ API Docs Generator │ ⬜ TODO │ 2-3h │ │
+│ #30 │ GitHub PR Review Workflow │ ⬜ TODO │ 2-3h │ │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ │ WEEK 9 TOTAL │ │ 7-10h │ │
+└─────────┴────────────────────────────────────┴──────────┴─────────┴────────┘
+
+Week 10: Monitoring + Reviews
+┌─────────┬────────────────────────────────────┬──────────┬─────────┬────────┐
+│ Skill # │ Skill Name │ Status │ Hours │ Notes │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ #27 │ Python Observability (Use 3) │ ⬜ TODO │ 4-5h │ Metrics│
+│ #23 │ Code Review Excellence │ ⬜ TODO │ 2-3h │ │
+│ #31 │ CI-CD Best Practices │ ⬜ TODO │ 3-4h │ │
+│ #32 │ Frontend Accessibility (Optional) │ ⬜ TODO │ 2-3h │ if web │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ │ WEEK 10 TOTAL │ │ 11-15h │ │
+└─────────┴────────────────────────────────────┴──────────┴─────────┴────────┘
+
+Week 11: Production Patterns
+┌─────────┬────────────────────────────────────┬──────────┬─────────┬────────┐
+│ Skill # │ Skill Name │ Status │ Hours │ Notes │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ #6 │ LLM App Dev with LangChain │ ⬜ TODO │ 4-5h │ │
+│ #33 │ Webhook Receiver Hardener (Opt) │ ⬜ TODO │ 2-3h │ if int │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ │ WEEK 11 TOTAL │ │ 6-8h │ │
+└─────────┴────────────────────────────────────┴──────────┴─────────┴────────┘
+
+Week 12: Final Integration + Deployment
+┌─────────┬────────────────────────────────────┬──────────┬─────────┬────────┐
+│ Skill # │ Skill Name │ Status │ Hours │ Notes │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ #7 │ RAG Agent Builder │ ⬜ TODO │ 4-5h │ Final │
+│ #2 │ Workflow Orchestration (Refine) │ ⬜ TODO │ 2h │ review │
+│ │ Comprehensive Testing │ ⬜ TODO │ 5h │ │
+│ │ Documentation + Deployment │ ⬜ TODO │ 5h │ │
+├─────────┼────────────────────────────────────┼──────────┼─────────┼────────┤
+│ │ WEEK 12 TOTAL │ │ 16-18h │ │
+└─────────┴────────────────────────────────────┴──────────┴─────────┴────────┘
+
+PHASE 4 OUTCOMES
+- [ ] FastAPI optimized for production
+- [ ] API documentation auto-generated (/docs, /redoc)
+- [ ] Code review standards established
+- [ ] Full observability (logging, metrics)
+- [ ] CI/CD with automated deployment
+- [ ] Security best practices implemented
+- [ ] Production-ready RAG agents
+- [ ] System deployed and monitored
+
+════════════════════════════════════════════════════════════════════════════════
+
+SUMMARY BY SKILL: TOTAL USAGE
+
+┌─────────┬────────────────────────────────────┬──────────┬────────────────┐
+│ Skill # │ Skill Name │ Uses │ Total Hours │
+├─────────┼────────────────────────────────────┼──────────┼────────────────┤
+│ #1 │ LangChain Architecture │ 2x │ 6-8 hours │
+│ #2 │ Workflow Orchestration │ 2x │ 8-10 hours │
+│ #3 │ Multi-Agent Orchestration │ 1x │ 3-4 hours │
+│ #4 │ Agentic Development │ 1x │ 3-4 hours │
+│ #5 │ Tool/Function Calling │ 1x │ 3-4 hours │
+│ #6 │ LLM App Dev LangChain │ 1x │ 4-5 hours │
+│ #7 │ RAG Agent Builder │ 1x │ 4-5 hours │
+│ #8 │ Hybrid Search │ 1x │ 4-6 hours │
+│ #9 │ Chunking Strategy │ 1x │ 4-5 hours │
+│ #10 │ Embedding Pipeline │ 1x │ 3-4 hours │
+│ #11 │ RAG Implementation │ 1x │ 3-4 hours │
+│ #12 │ Knowledge Graph Builder │ 1x │ 6-8 hours │
+│ #13 │ Senior Prompt Engineer │ 1x │ 5-6 hours │
+│ #14 │ LLM Evaluation │ 1x │ 4-5 hours │
+│ #15 │ Cost-Aware LLM Pipeline │ 1x │ 4-5 hours │
+│ #16 │ AI Wrapper/Structured Output │ 1x │ 3-5 hours │
+│ #17 │ API Security Hardening │ 1x │ 4-6 hours │
+│ #18 │ OWASP Security Check │ 1x │ 2-3 hours │
+│ #19 │ LLM Security │ 1x │ 3-4 hours │
+│ #20 │ API Rate Limiting │ 1x │ 2-3 hours │
+│ #21 │ Python Error Handling │ 1x │ 3-4 hours │
+│ #22 │ Python Testing Patterns │ 2x │ 10-13 hours │
+│ #23 │ Code Review Excellence │ 1x │ 2-3 hours │
+│ #24 │ GitHub Actions Templates │ 1x │ 2-3 hours │
+│ #25 │ FastAPI Templates │ 1x │ 3-4 hours │
+│ #26 │ Python Design Patterns │ 1x │ 4-5 hours │
+│ #27 │ Python Observability │ 3x │ 12-15 hours │
+│ #28 │ Memory Management │ 1x │ 3-4 hours │
+│ #29 │ API Docs Generator │ 1x │ 2-3 hours │
+│ #30 │ GitHub PR Review Workflow │ 1x │ 2-3 hours │
+│ #31 │ CI-CD Best Practices │ 1x │ 3-4 hours │
+│ #32 │ Frontend Accessibility │ 1x (opt) │ 2-3 hours │
+│ #33 │ Webhook Receiver Hardener │ 1x (opt) │ 2-3 hours │
+├─────────┼────────────────────────────────────┼──────────┼────────────────┤
+│ │ TOTAL (REQUIRED) │ │ 130-160 hours │
+│ │ TOTAL (WITH OPTIONAL) │ │ 135-165 hours │
+└─────────┴────────────────────────────────────┴──────────┴────────────────┘
+
+════════════════════════════════════════════════════════════════════════════════
+
+KEY METRICS TRACKING
+════════════════════════════════════════════════════════════════════════════════
+
+Code Quality:
+ Baseline: Test coverage 70%, Response latency 25s, Accuracy 65%
+ Target: Test coverage 90%+, Response latency 15-20s, Accuracy 80%+
+
+ Week 1: Coverage: [ ]% Latency: [ ]s Accuracy: [ ]%
+ Week 2: Coverage: [ ]% Latency: [ ]s Accuracy: [ ]%
+ Week 3: Coverage: [ ]% Latency: [ ]s Accuracy: [ ]%
+ Week 4: Coverage: [ ]% Latency: [ ]s Accuracy: [ ]%
+ Week 5: Coverage: [ ]% Latency: [ ]s Accuracy: [ ]%
+ Week 6: Coverage: [ ]% Latency: [ ]s Accuracy: [ ]%
+ Week 7: Coverage: [ ]% Latency: [ ]s Accuracy: [ ]%
+ Week 8: Coverage: [ ]% Latency: [ ]s Accuracy: [ ]%
+ Week 9: Coverage: [ ]% Latency: [ ]s Accuracy: [ ]%
+ Week 10: Coverage: [ ]% Latency: [ ]s Accuracy: [ ]%
+ Week 11: Coverage: [ ]% Latency: [ ]s Accuracy: [ ]%
+ Week 12: Coverage: [ ]% Latency: [ ]s Accuracy: [ ]%
+ Final Goal: Coverage: 90%+ Latency: <20s Accuracy: >80%
+
+API Costs (Monthly):
+ Baseline: $XXX
+ Week 4: $XXX (-XX%)
+ Week 8: $XXX (-40%)
+ Goal: $XXX (-40% reduction)
+
+Tests Written:
+ Phase 1: auth (10), schema (5), state (8) = 23 tests
+ Phase 2: parametrized (50+), fixtures = 80+ tests
+ Phase 3: retrieval (15), graph (10) = 105+ tests
+ Phase 4: deployment (20) = 125+ tests
+
+════════════════════════════════════════════════════════════════════════════════
+
+COMPLETION CHECKLIST
+════════════════════════════════════════════════════════════════════════════════
+
+PHASE 1 ✓
+ [ ] All 6 Week 1 tasks complete
+ [ ] All 5 Week 2 tasks complete
+ [ ] PR created and merged
+ [ ] 23+ new tests written
+ [ ] Coverage: 70% → 75%
+
+PHASE 2 ✓
+ [ ] All 4 Week 3 tasks complete
+ [ ] All 2 Week 4 tasks complete
+ [ ] Week 5 integration complete
+ [ ] 80+ parametrized tests written
+ [ ] Coverage: 75% → 90%
+
+PHASE 3 ✓
+ [ ] All 4 Week 6 tasks complete
+ [ ] All 3 Week 7 tasks complete
+ [ ] All 1 Week 8 task complete
+ [ ] Hybrid search working
+ [ ] Knowledge graph created
+ [ ] -40% cost reduction achieved
+
+PHASE 4 ✓
+ [ ] All 3 Week 9 tasks complete
+ [ ] All 4 Week 10 tasks complete
+ [ ] All 2 Week 11 tasks complete
+ [ ] All 4 Week 12 tasks complete
+ [ ] API documented at /docs
+ [ ] CI/CD pipeline working
+ [ ] System deployed to production
+ [ ] Monitoring active
+
+FINAL VALIDATION ✓
+ [ ] 125+ tests passing
+ [ ] Coverage >90%
+ [ ] Latency <20s
+ [ ] Accuracy >80%
+ [ ] All 34 skills used
+ [ ] Documentation complete
+ [ ] Team trained
+ [ ] Handoff document created
+
+════════════════════════════════════════════════════════════════════════════════
+
+PROGRESS VISUALIZATION
+
+Week 1 (Phase 1A) ████░░░░░░░░░░░░░░░░░░░░ 10%
+Week 2 (Phase 1B) ████░░░░░░░░░░░░░░░░░░░░ 17%
+Week 3 (Phase 2A) ████░░░░░░░░░░░░░░░░░░░░ 25%
+Week 4 (Phase 2B) ████░░░░░░░░░░░░░░░░░░░░ 34%
+Week 5 (Phase 2C) ████░░░░░░░░░░░░░░░░░░░░ 42%
+Week 6 (Phase 3A) ████░░░░░░░░░░░░░░░░░░░░ 50%
+Week 7 (Phase 3B) ████░░░░░░░░░░░░░░░░░░░░ 58%
+Week 8 (Phase 3C) ████░░░░░░░░░░░░░░░░░░░░ 67%
+Week 9 (Phase 4A) ████░░░░░░░░░░░░░░░░░░░░ 75%
+Week 10(Phase 4B) ████░░░░░░░░░░░░░░░░░░░░ 83%
+Week 11(Phase 4C) ████░░░░░░░░░░░░░░░░░░░░ 92%
+Week 12(Phase 4D) ██████████████████████████ 100%
+
+════════════════════════════════════════════════════════════════════════════════
diff --git a/docs/archive/INSTALLATION_COMPLETE.md b/docs/archive/INSTALLATION_COMPLETE.md
new file mode 100644
index 0000000000000000000000000000000000000000..73147a0c648666a07bbf4a952ab7dd5e9cc64f1e
--- /dev/null
+++ b/docs/archive/INSTALLATION_COMPLETE.md
@@ -0,0 +1,362 @@
+╔══════════════════════════════════════════════════════════════════════════════╗
+║ ✅ INSTALLATION COMPLETE: 30 SKILLS SUCCESSFULLY DEPLOYED ✅ ║
+║ Enterprise-Grade Agent & RAG Development Stack ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+
+INSTALLATION SUMMARY
+════════════════════════════════════════════════════════════════════════════════
+Date Completed: February 18, 2026
+Total Skills: 30 (17 installed exclusively, 13 already installed)
+Total Downloads: 15,000+ combined
+Installation Status: ✅ ALL SUCCESSFUL
+Security Verification: ✅ All low-risk, 0 critical vulnerabilities
+
+INSTALLATION BREAKDOWN BY CATEGORY
+════════════════════════════════════════════════════════════════════════════════
+
+🏗️ AGENTIC ARCHITECTURE & ORCHESTRATION (7)
+═══════════════════════════════════════════════════
+ 1. ✅ LangChain Architecture 2.3K installs
+ 2. ✅ Workflow Orchestration Patterns 2K installs
+ 3. ✅ Multi-Agent Orchestration 164 installs
+ 4. ✅ Agentic Development 91 installs
+ 5. ✅ Tool/Function Calling Patterns 134 installs
+ 6. ✅ LLM Application Dev with LangChain 49 installs
+ 7. ✅ RAG Agent Builder 29 installs
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+🔍 RETRIEVAL & SEARCH OPTIMIZATION (5)
+════════════════════════════════════════
+ 8. ✅ Hybrid Search Implementation 1.7K installs ⭐
+ 9. ✅ Chunking Strategy 145 installs
+10. ✅ Embedding Pipeline Builder 22 installs
+11. ✅ RAG Implementation [Previously installed]
+12. ✅ Knowledge Graph Builder 52 installs
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+🧠 LLM OPTIMIZATION & PROMPT ENGINEERING (4)
+═════════════════════════════════════════════
+13. ✅ Senior Prompt Engineer 320 installs ⭐⭐⭐
+14. ✅ LLM Evaluation 39 installs
+15. ✅ Cost-Aware LLM Pipeline 29 installs
+16. ✅ AI Wrapper/Structured Output 252 installs ⭐⭐⭐
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+🔒 SECURITY & SAFETY (5)
+════════════════════════
+17. ✅ API Security Hardening 144 installs
+18. ✅ OWASP Security Check 148 installs
+19. ✅ LLM Security 104 installs
+20. ✅ API Rate Limiting 92 installs
+21. ✅ Python Error Handling [Previously installed]
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+🧪 TESTING & QUALITY (3)
+════════════════════════
+22. ✅ Python Testing Patterns 3.7K installs ⭐⭐⭐
+23. ✅ Code Review Excellence [Previously installed]
+24. ✅ GitHub Actions Templates 2.8K installs ⭐⭐⭐
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+🏢 INFRASTRUCTURE & ENGINEERING (4)
+═══════════════════════════════════
+25. ✅ FastAPI Templates [Previously installed]
+26. ✅ Python Design Patterns [Previously installed]
+27. ✅ Python Observability [Previously installed]
+28. ✅ Memory Management 126 installs
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+📚 DOCUMENTATION & COLLABORATION (2)
+═════════════════════════════════════
+29. ✅ API Docs Generator 44 installs
+30. ✅ GitHub PR Review Workflow 31 installs
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+════════════════════════════════════════════════════════════════════════════════
+
+INSTALLATION VERIFICATION
+════════════════════════════════════════════════════════════════════════════════
+
+✅ All 30 skills installed globally to: ~/.agents/skills/
+
+To verify installations, run:
+ ls -la ~/.agents/skills/ | wc -l # Should show ~30+ items
+
+To view any skill documentation:
+ cat ~/.agents/skills/[skill-name]/SKILL.md
+
+Example:
+ cat ~/.agents/skills/owasp-security-check/SKILL.md
+
+════════════════════════════════════════════════════════════════════════════════
+
+SKILLS MAPPED TO YOUR PROJECT STRUCTURE
+════════════════════════════════════════════════════════════════════════════════
+
+Your Project:
+ RagBot/
+ ├── src/ ← Use Design Patterns (#26)
+ │ ├── agents/ ← Use Agentic Dev (#4, #5, #6, #7)
+ │ ├── workflow.py ← Use Workflow Patterns (#2)
+ │ ├── state.py ← Use Multi-Agent Orch (#3)
+ │ └── pdf_processor.py ← Use Chunking (#9)
+ │
+ ├── api/ ← Use FastAPI (#25), Security (#17-20)
+ │ ├── main.py ← Use FastAPI Templates (#25)
+ │ └── routes/analyze.py ← Use Structured Output (#16)
+ │
+ ├── data/vector_stores/ ← Use Embedding Pipeline (#10)
+ │ Use Hybrid Search (#8)
+ │
+ ├── tests/ ← Use Testing Patterns (#22)
+ │
+ └── docs/API.md ← Use API Docs Generator (#29)
+
+════════════════════════════════════════════════════════════════════════════════
+
+WHAT YOU NOW HAVE ACCESS TO
+════════════════════════════════════════════════════════════════════════════════
+
+✅ WORLD-CLASS AGENT ORCHESTRATION
+ - Build production agents (LangChain + LangGraph proven patterns)
+ - Coordinate multi-agent teams deterministically
+ - Fund calls & tool usage with structured outputs
+ - Patterns for all major agentic frameworks
+
+✅ ENTERPRISE RETRIEVAL-AUGMENTED GENERATION
+ - Hybrid search (semantic + keyword) for better recall
+ - Optimal chunking for medical documents
+ - Knowledge graphs for entity relationships
+ - Citation enforcement for medical claims
+ - State-of-the-art embedding optimization
+
+✅ ADVANCED LLM ENGINEERING
+ - Prompt optimization from industry experts (320+ installs)
+ - Structured output guarantees (252+ installs)
+ - Cost-aware routing for API budget management
+ - LLM evaluation frameworks for quality metrics
+
+✅ MEDICAL-GRADE SECURITY
+ - HIPAA-aligned security hardening
+ - OWASP Top 10 vulnerability scanning
+ - LLM-specific attack prevention
+ - Rate limiting and authentication patterns
+
+✅ PRODUCTION-READY QUALITY
+ - Test patterns for >90% coverage
+ - CI/CD automation with GitHub Actions
+ - Code review standards and workflows
+ - Observability and monitoring patterns
+
+════════════════════════════════════════════════════════════════════════════════
+
+YOUR CRITICAL PROBLEMS → SKILLS SOLUTIONS
+════════════════════════════════════════════════════════════════════════════════
+
+PROBLEM 1: biomarker_flags & safety_alerts missing from workflow
+SOLUTION: Skills #2 (Workflow Orch) + #3 (Multi-Agent) → Refactor GuildState
+EFFORT: 5-8 hours
+IMPACT: Critical - fixes schema propagation
+
+PROBLEM 2: Schema mismatch between workflow output & API response
+SOLUTION: Skill #16 (Structured Output) + #25 (FastAPI) → Unify schema
+EFFORT: 3-5 hours
+IMPACT: Critical - enables reliable parsing
+
+PROBLEM 3: Forced confidence (0.5) & default disease (Diabetes)
+SOLUTION: Skills #13 (Prompt Eng) + #14 (LLM Eval) → Remove hardcoded values
+EFFORT: 2-3 hours
+IMPACT: Critical - prevents dangerous medical misconceptions
+
+PROBLEM 4: Biomarker naming inconsistency (API vs CLI)
+SOLUTION: Skill #16 (Structured Output) → Centralize normalization
+EFFORT: 2-3 hours
+IMPACT: High - ensures consistent biomarker validation
+
+PROBLEM 5: JSON parsing breaks on malformed LLM output
+SOLUTION: Skill #16 (Structured Output) + #5 (Function Calling) → Use function outputs
+EFFORT: 3-4 hours
+IMPACT: High - prevents 400 errors in production
+
+PROBLEM 6: No citation enforcement in RAG claims
+SOLUTION: Skills #11 (RAG) + #12 (KG) + #8 (Hybrid) → Enforce sources
+EFFORT: 4-6 hours
+IMPACT: High - prevents hallucinations in medical domain
+
+════════════════════════════════════════════════════════════════════════════════
+
+DOCUMENTATION FILES CREATED
+════════════════════════════════════════════════════════════════════════════════
+
+📄 COMPREHENSIVE_SKILLS_GUIDE.md
+ └─ 500+ lines
+ └─ Detailed breakdown of all 30 skills
+ └─ Mapped to your critical issues
+ └─ 4-month implementation roadmap
+ └─ Expected outcomes & metrics
+
+📄 SKILLS_QUICK_REFERENCE.md
+ └─ Quick lookup table
+ └─ Skill finder ("Need help with... ?")
+ └─ Priority ranking
+ └─ Effort estimates
+ └─ Command reference
+
+📄 INSTALLATION_REPORT.md (This file)
+ └─ Verification checklist
+ └─ What to do next
+ └─ Success metrics
+
+════════════════════════════════════════════════════════════════════════════════
+
+WHAT TO DO NEXT (THIS WEEK)
+════════════════════════════════════════════════════════════════════════════════
+
+TODAY (30 minutes):
+ 1. Read: COMPREHENSIVE_SKILLS_GUIDE.md (sections 1-2)
+ 2. Read: ~/.agents/skills/owasp-security-check/SKILL.md
+ 3. Time: 30 minutes total
+
+THIS WEEK (Phase 1):
+ 4. Run OWASP security scan on your codebase
+ 5. Read Workflow Orchestration Patterns skill
+ 6. Identify which states need to propagate through workflow
+ 7. Create fix plan for biomarker_flags & safety_alerts
+ 8. Read AI Wrapper/Structured Output skill
+ 9. Plan response schema unification
+ 10. Time: 5-8 hours total
+
+NEXT WEEK (Phase 2):
+ 11. Implement state propagation fixes using Skill #2
+ 12. Implement schema unification using Skill #16
+ 13. Implement OWASP findings
+ 14. Write tests (Skill #22) for all fixes
+ 15. Time: 8-12 hours total
+
+TIMELINE TO FULL IMPLEMENTATION
+════════════════════════════════════════════════════════════════════════════════
+
+Week 1: Security baseline + Critical fixes
+Week 2-3: Test coverage expansion
+Week 4: Agent optimization
+Week 5-6: Retrieval optimization (Hybrid search, Chunking)
+Week 7-8: Cost optimization + Prompt engineering iteration
+Week 9-10: Knowledge graph integration
+Week 11-12: Full monitoring + Observability + Deployment
+Week 13-16: Continuous improvement & scaling
+
+Total: 4 months → Enterprise-grade medical AI system
+
+════════════════════════════════════════════════════════════════════════════════
+
+SUCCESS METRICS
+════════════════════════════════════════════════════════════════════════════════
+
+BASELINE (Currently):
+ ✗ Test coverage: ~70%
+ ✗ Security: No HIPAA alignment, no auth
+ ✗ State management: Incomplete propagation
+ ✗ Documentation: Manual /docs only
+ ✗ RAG quality: No hybrid search, no citations
+
+MONTH 1 TARGETS:
+ ✓ Test coverage: 80%+
+ ✓ Security: OWASP scan complete, fixes prioritized
+ ✓ State: biomarker_flags & safety_alerts propagating
+ ✓ Schema: Unified workflow→API format
+ ✓ Auth: JWT API key authentication enabled
+
+MONTH 2 TARGETS:
+ ✓ Test coverage: 90%+
+ ✓ Agents: Improved decision logic, function calling
+ ✓ Prompts: Optimized via Senior Prompt Engineer patterns
+ ✓ Latency: -20% from baseline
+
+MONTH 3 TARGETS:
+ ✓ Retrieval: Hybrid search live, chunking optimized
+ ✓ Knowledge: Knowledge graph working, citations enforced
+ ✓ Accuracy: +15% disease prediction vs baseline
+
+MONTH 4 TARGETS:
+ ✓ Deployment: Full CI/CD with GitHub Actions
+ ✓ Monitoring: Structured logging + metrics active
+ ✓ Docs: Auto-generated API docs at /docs + /redoc
+ ✓ Cost: -40% via model routing
+
+════════════════════════════════════════════════════════════════════════════════
+
+RISK MITIGATION
+════════════════════════════════════════════════════════════════════════════════
+
+⚠️ RISK: Medical hallucinations
+ MITIGATION: Use Skills #11, #12 for citation enforcement
+ TIMELINE: Implement Week 5-6
+
+⚠️ RISK: Patient data exposure
+ MITIGATION: Use Skills #17, #18, #19, #20 immediately
+ TIMELINE: Implement Week 1
+
+⚠️ RISK: Nondeterministic behavior
+ MITIGATION: Use Skill #2, #3 to fix state management
+ TIMELINE: Implement Week 1-2
+
+⚠️ RISK: Poor accuracy on rare biomarkers
+ MITIGATION: Use Skills #8, #9, #10 for hybrid retrieval
+ TIMELINE: Implement Week 4-5
+
+⚠️ RISK: Expensive API costs (Groq free tier limited)
+ MITIGATION: Use Skill #15 for cost-aware routing
+ TIMELINE: Implement Week 6
+
+════════════════════════════════════════════════════════════════════════════════
+
+TEAM SETUP (If Applicable)
+════════════════════════════════════════════════════════════════════════════════
+
+Security Lead: Use Skills #17-20, #18
+Backend Lead: Use Skills #1-7, #25-27
+ML/Prompt Lead: Use Skills #13-16, #22
+DevOps Lead: Use Skills #24, #27, #29
+QA/Testing Lead: Use Skills #22, #23, #24
+
+Collaborative: All review Skills #30 (PR workflow)
+
+════════════════════════════════════════════════════════════════════════════════
+
+FINAL CHECKLIST BEFORE STARTING IMPLEMENTATION
+════════════════════════════════════════════════════════════════════════════════
+
+□ Read COMPREHENSIVE_SKILLS_GUIDE.md cover to cover
+□ Read SKILLS_QUICK_REFERENCE.md to find relevant skills
+□ Run OWASP Security Check skill scan
+□ Understand fixed 6 critical issues → required skills mapping
+□ Create JIRA/Linear tickets for 4-month roadmap
+□ Schedule team alignment on priority order
+□ Set up skill documentation access for team
+□ Create skills usage guidelines for your team
+□ Allocate time: 30-40 hours over 4 months
+□ Get executive sign-off on timeline
+□ Start with Week 1 security work
+
+════════════════════════════════════════════════════════════════════════════════
+
+CONGRATULATIONS!
+════════════════════════════════════════════════════════════════════════════════
+
+You now have access to 30 world-class agent & RAG development skills.
+
+Your RagBot is positioned to go from "working production system" to
+"industry-leading medical AI platform" through systematic, evidence-based
+skill application over the next 4 months.
+
+The skills are installed. The roadmap is clear. The critical issues are mapped.
+
+Now it's time to execute.
+
+Start TODAY with OWASP Security Check.
+
+Welcome to enterprise-grade AI development. 🚀
+
+════════════════════════════════════════════════════════════════════════════════
+Questions? See COMPREHENSIVE_SKILLS_GUIDE.md or SKILLS_QUICK_REFERENCE.md
+════════════════════════════════════════════════════════════════════════════════
diff --git a/docs/archive/NAVIGATION_GUIDE.md b/docs/archive/NAVIGATION_GUIDE.md
new file mode 100644
index 0000000000000000000000000000000000000000..e607f48a80ce4a0c32993c2efb407458096f8b9e
--- /dev/null
+++ b/docs/archive/NAVIGATION_GUIDE.md
@@ -0,0 +1,525 @@
+╔════════════════════════════════════════════════════════════════════════════╗
+║ 🗺️ NAVIGATION GUIDE: Using the RagBot 4-Month Roadmap ║
+║ How to use all the planning documents effectively ║
+╚════════════════════════════════════════════════════════════════════════════╝
+
+You now have 5 comprehensive documents to guide your 4-month journey.
+Each serves a specific purpose. Here's how to use them together.
+
+════════════════════════════════════════════════════════════════════════════════
+
+📋 THE 5 DOCUMENTS
+════════════════════════════════════════════════════════════════════════════════
+
+1️⃣ IMPLEMENTATION_ROADMAP.md (THIS IS YOUR MASTER PLAN)
+ ├─ 12-week breakdown with all 34 skills
+ ├─ Phase 1-4 detailed task descriptions
+ ├─ Success criteria for each task
+ ├─ Code location hints
+ └─ Use: Reference for detailed understanding of each skill
+
+2️⃣ WEEK1_EXECUTION_PLAN.md (YOUR IMMEDIATE TODO LIST)
+ ├─ This week's 6 tasks with hourly estimates
+ ├─ Checkboxes for daily progress
+ ├─ Useful commands to run
+ ├─ Daily standup template
+ └─ Use: Print this out, pin to monitor, check off daily
+
+3️⃣ IMPLEMENTATION_STATUS_TRACKER.md (YOUR PROGRESS TRACKER)
+ ├─ All 34 skills with status (TODO/IN-PROGRESS/DONE)
+ ├─ Hours spent per skill
+ ├─ Metrics tracking (coverage, latency, accuracy)
+ ├─ Weekly checklist
+ └─ Use: Update weekly, show progress to team
+
+4️⃣ SKILL_TO_CODE_MAPPING.md (YOUR DEVELOPER REFERENCE)
+ ├─ Where each skill applies in the codebase
+ ├─ Which files to modify for each skill
+ ├─ How skills fix the 6 critical issues
+ ├─ Dependency graph
+ └─ Use: When implementing a skill, find which code to change
+
+5️⃣ This Document - NAVIGATION_GUIDE.md (YOU ARE HERE)
+ ├─ How to use all 4 other documents
+ ├─ Recommended reading order
+ ├─ Quick workflows
+ └─ Use: Getting started with the plan
+
+════════════════════════════════════════════════════════════════════════════════
+
+🚀 GETTING STARTED (First 30 minutes)
+════════════════════════════════════════════════════════════════════════════════
+
+1. Read this document (5 minutes)
+ ↓
+2. Read WEEK1_EXECUTION_PLAN.md (10 minutes)
+ ↓
+3. Skim IMPLEMENTATION_ROADMAP.md Phase 1 (10 minutes)
+ ↓
+4. Bookmark SKILL_TO_CODE_MAPPING.md for reference
+ ↓
+5. Start Task 1.1 from WEEK1_EXECUTION_PLAN.md
+
+════════════════════════════════════════════════════════════════════════════════
+
+📚 RECOMMENDED READING ORDER
+════════════════════════════════════════════════════════════════════════════════
+
+For the Project Manager/Team Lead:
+ 1. This document (5 min)
+ 2. IMPLEMENTATION_ROADMAP.md Summary section (5 min)
+ 3. IMPLEMENTATION_STATUS_TRACKER.md (5 min)
+ 4. Return to this document for weekly workflows
+
+For the Developer/Engineer:
+ 1. This document (5 min)
+ 2. WEEK1_EXECUTION_PLAN.md (10 min)
+ 3. SKILL_TO_CODE_MAPPING.md (10 min)
+ 4. Specific skill section in IMPLEMENTATION_ROADMAP.md (5 min)
+ 5. Read ~/.agents/skills//SKILL.md (varies)
+
+For the QA/Test Specialist:
+ 1. This document (5 min)
+ 2. IMPLEMENTATION_ROADMAP.md Phase 2 (10 min)
+ 3. SKILL_TO_CODE_MAPPING.md section on #22 Testing Patterns (5 min)
+ 4. WEEK1_EXECUTION_PLAN.md Task 1.3 (5 min)
+
+For the DevOps/Infrastructure Engineer:
+ 1. This document (5 min)
+ 2. IMPLEMENTATION_ROADMAP.md Phase 4 Week 9-10 (10 min)
+ 3. SKILL_TO_CODE_MAPPING.md sections on #24, #25, #31 (5 min)
+ 4. IMPLEMENTATION_STATUS_TRACKER.md (5 min)
+
+════════════════════════════════════════════════════════════════════════════════
+
+🎯 TYPICAL WORKFLOWS
+════════════════════════════════════════════════════════════════════════════════
+
+WORKFLOW 1: Starting the Day
+┌─────────────────────────────────────────────────────────────────────────┐
+│ 1. Open WEEK1_EXECUTION_PLAN.md (or current week equivalent) │
+│ └─ Find today's section │
+│ │
+│ 2. Click checkbox □ next to current task to mark as IN-PROGRESS │
+│ │
+│ 3. Open SKILL_TO_CODE_MAPPING.md │
+│ └─ Find the skill for today's task │
+│ └─ See which code files to modify │
+│ │
+│ 4. Open IMPLEMENTATION_ROADMAP.md │
+│ └─ Find the Phase/Week/Task section for details │
+│ │
+│ 5. Read ~/.agents/skills//SKILL.md for guidance │
+│ Example: ~/.agents/skills/owasp-security-check/SKILL.md │
+│ │
+│ 6. Implement, test, commit │
+│ Command: git commit -m "feat: [Skill #X] [Description]" │
+│ │
+│ 7. Checkmark □ task as COMPLETE in WEEK1_EXECUTION_PLAN.md │
+│ │
+│ 8. Run: pytest tests/ -v --cov src (should pass/coverage increase) │
+└─────────────────────────────────────────────────────────────────────────┘
+
+WORKFLOW 2: Implementing a Specific Skill
+┌─────────────────────────────────────────────────────────────────────────┐
+│ Problem: "I need to implement Skill #8 (Hybrid Search)" │
+│ │
+│ Solution: │
+│ 1. Find week/phase in IMPLEMENTATION_STATUS_TRACKER.md │
+│ └─ Skill #8 is in Phase 3, Week 6 │
+│ │
+│ 2. Read IMPLEMENTATION_ROADMAP.md Phase 3 Week 6 section │
+│ └─ Task description: "Implement hybrid search" │
+│ └─ Duration: 4-6 hours │
+│ └─ Actions: numbered steps to follow │
+│ │
+│ 3. Check SKILL_TO_CODE_MAPPING.md │
+│ └─ Find "src/retrievers/hybrid_retriever.py (NEW)" │
+│ └─ See which files to modify │
+│ │
+│ 4. Read ~/.agents/skills/hybrid-search-implementation/SKILL.md │
+│ └─ Detailed implementation guidance │
+│ │
+│ 5. Code and test according to steps │
+│ └─ Create src/retrievers/hybrid_retriever.py │
+│ └─ Write tests in tests/test_hybrid_retriever.py │
+│ └─ Run: pytest tests/test_hybrid_retriever.py -v │
+│ │
+│ 6. Update IMPLEMENTATION_STATUS_TRACKER.md │
+│ └─ Mark Skill #8 as "✅ DONE" │
+│ └─ Update hours actually spent │
+│ └─ Update metric improvements │
+└─────────────────────────────────────────────────────────────────────────┘
+
+WORKFLOW 3: End of Week Progress Report
+┌─────────────────────────────────────────────────────────────────────────┐
+│ Every Friday at 5 PM: │
+│ │
+│ 1. Open IMPLEMENTATION_STATUS_TRACKER.md │
+│ │
+│ 2. For each task this week: │
+│ □ Mark as "✅ DONE" if completed │
+│ □ Update hours: planned [ ] → actual [X] │
+│ □ Update metrics (coverage %, latency, accuracy change) │
+│ │
+│ 3. Run full test suite: │
+│ $ pytest tests/ -v --cov src --cov-report=html │
+│ └─ Record coverage percentage │
+│ └─ Check for any new failures │
+│ │
+│ 4. Run Performance Benchmark: │
+│ $ python tests/evaluation_metrics.py │
+│ └─ Record response latency │
+│ └─ Record accuracy metrics │
+│ │
+│ 5. Update Metrics section in IMPLEMENTATION_STATUS_TRACKER.md │
+│ Week N: Coverage: [XX]%, Latency: [XX]s, Accuracy: [XX]% │
+│ │
+│ 6. Create team report: │
+│ "Week 1: Completed 6/6 tasks. Coverage 70%→73%, auth implemented. │
+│ No blockers. On track for Phase 1 completion by Feb 21." │
+│ │
+│ 7. Plan next week (Monday morning): │
+│ - Check IMPLEMENTATION_ROADMAP.md for next phase tasks │
+│ - Check dependencies (can we start without Skill X?) │
+│ - Allocate resources │
+└─────────────────────────────────────────────────────────────────────────┘
+
+WORKFLOW 4: Fixing a Critical Issue (Example: Issue #1)
+┌─────────────────────────────────────────────────────────────────────────┐
+│ Problem: biomarker_flags not propagating through workflow │
+│ │
+│ Solution: │
+│ 1. Read SKILL_TO_CODE_MAPPING.md │
+│ └─ Search for "ISSUE #1" at top │
+│ └─ See: "Primary Skills: #2, #3, #16" │
+│ │
+│ 2. Implementation order: │
+│ Step 1: Check IMPLEMENTATION_ROADMAP.md for Skill #2 details │
+│ Step 2: Check IMPLEMENTATION_ROADMAP.md for Skill #3 details │
+│ Step 3: Check IMPLEMENTATION_ROADMAP.md for Skill #16 details │
+│ │
+│ 3. Code changes needed: │
+│ + src/state.py (add missing fields) │
+│ + src/agents/biomarker_analyzer.py (return flags) │
+│ + src/agents/disease_explainer.py (preserve state) │
+│ + api/app/models/response.py (unified schema) │
+│ │
+│ 4. Testing: │
+│ + Write tests/test_state_propagation.py │
+│ + Run: pytest tests/test_state_propagation.py -v │
+│ + Run end-to-end: python scripts/test_chat_demo.py │
+│ │
+│ 5. Verification: │
+│ - Log output shows flags present at each agent │
+│ - Final response includes biomarker_flags │
+│ - All tests passing │
+│ │
+│ 6. Commit: │
+│ git commit -m "fix: [Skill #2, #3, #16] Propagate biomarker_flags" │
+└─────────────────────────────────────────────────────────────────────────┘
+
+WORKFLOW 5: Unblocking a Dependency
+┌─────────────────────────────────────────────────────────────────────────┐
+│ Scenario: Week 3 work is blocked, waiting for Week 2 to finish │
+│ │
+│ Check SKILL_TO_CODE_MAPPING.md → "SKILL DEPENDENCY GRAPH" │
+│ └─ "Phase 2 requires Phase 1: #22, #26, #4, #13, #14, #5" │
+│ └─ If Phase 1 delayed, check which Phase 2 skills are independent │
+│ │
+│ Independent Phase 2 work possible: │
+│ • #26 (Design Patterns) can refactor without Phase 1 complete │
+│ • #13 (Prompt Engineer) can improve prompts in isolation │
+│ • Extend Phase 1 tests while Skill work continues │
+│ │
+│ Execution shift: │
+│ 1. Run: grep -n "#26\|#13" IMPLEMENTATION_ROADMAP.md │
+│ 2. Start #26 or #13 work in parallel │
+│ 3. Update IMPLEMENTATION_STATUS_TRACKER.md schedule │
+│ 4. Reorder next week's tasks │
+└─────────────────────────────────────────────────────────────────────────┘
+
+════════════════════════════════════════════════════════════════════════════════
+
+📂 FOLDER STRUCTURE: Where Everything Lives
+════════════════════════════════════════════════════════════════════════════════
+
+RagBot (root)
+│
+├─ 📋 PLANNING DOCUMENTS (NEW)
+│ ├─ IMPLEMENTATION_ROADMAP.md ← Master 12-week plan
+│ ├─ WEEK1_EXECUTION_PLAN.md ← This week's tasks
+│ ├─ IMPLEMENTATION_STATUS_TRACKER.md ← Progress tracking
+│ ├─ SKILL_TO_CODE_MAPPING.md ← Developer reference
+│ └─ NAVIGATION_GUIDE.md ← This file
+│
+├─ 🛠️ SKILLS REFERENCE
+│ └─ ~/.agents/skills/ (Global, all installed)
+│ ├─ owasp-security-check/SKILL.md
+│ ├─ api-security-hardening/SKILL.md
+│ ├─ python-testing-patterns/SKILL.md
+│ ├─ workflow-orchestration-patterns/SKILL.md
+│ ├─ api-rate-limiting/SKILL.md
+│ └─ [30 more skills...]
+│
+├─ 📝 IMPLEMENTATION PROGRESS
+│ ├─ src/
+│ │ ├─ state.py (Fix by Skill #2: Week 1)
+│ │ ├─ workflow.py (Fix by Skill #2, #3: Weeks 1-2)
+│ │ ├─ exceptions.py (NEW - Skill #21: Week 2)
+│ │ ├─ agents/
+│ │ │ ├─ base_agent.py (NEW - Skill #26: Week 3)
+│ │ │ ├─ biomarker_analyzer.py (Fix by Skills #4, #13: Week 3)
+│ │ │ ├─ disease_explainer.py (Fix by Skills #8, #11, #13: Week 6)
+│ │ │ └─ confidence_assessor.py (Fix by Skill #4, #13: Week 3)
+│ │ ├─ retrievers/
+│ │ │ └─ hybrid_retriever.py (NEW - Skill #8: Week 6)
+│ │ ├─ chunking_strategy.py (NEW - Skill #9: Week 6)
+│ │ ├─ knowledge_graph.py (NEW - Skill #12: Week 7)
+│ │ ├─ memory_manager.py (NEW - Skill #28: Week 7)
+│ │ ├─ observability.py (NEW - Skill #27: Week 2)
+│ │ └─ llm_config.py (Fix by Skills #15: Week 8)
+│ │
+│ ├─ api/app/
+│ │ ├─ main.py (Fix by Skills #17, #25: Weeks 1, 9)
+│ │ ├─ models/
+│ │ │ └─ response.py (NEW - Skill #16: Week 1)
+│ │ ├─ middleware/
+│ │ │ ├─ auth.py (NEW - Skill #17: Week 1)
+│ │ │ ├─ input_validation.py (NEW - Skill #19: Week 2)
+│ │ │ └─ rate_limiter.py (NEW - Skill #20: Week 1)
+│ │ └─ webhooks/ (NEW if needed - Skill #33: Week 11)
+│ │
+│ ├─ tests/
+│ │ ├─ test_api_auth.py (NEW - Skill #22: Week 1)
+│ │ ├─ test_parametrized_*.py (NEW - Skill #22: Week 3)
+│ │ ├─ test_response_schema.py (NEW - Skill #22: Week 1)
+│ │ ├─ evaluation_metrics.py (NEW - Skill #14: Week 4)
+│ │ ├─ conftest.py (NEW - Skill #22: Week 1)
+│ │ └─ fixtures/ (NEW - Skill #22: Week 1)
+│ │ ├─ auth.py
+│ │ ├─ biomarkers.py
+│ │ └─ evaluation_patients.py
+│ │
+│ ├─ .github/
+│ │ ├─ workflows/
+│ │ │ ├─ test.yml (NEW - Skill #24: Week 2)
+│ │ │ ├─ security.yml (NEW - Skill #24: Week 2)
+│ │ │ ├─ docker.yml (NEW - Skill #24: Week 2)
+│ │ │ └─ deploy.yml (NEW - Skill #31: Week 10)
+│ │ ├─ CODEOWNERS (NEW - Skill #30: Week 9)
+│ │ └─ pull_request_template.md (NEW - Skill #30: Week 9)
+│ │
+│ └─ docs/
+│ ├─ SECURITY_AUDIT.md (NEW - Skill #18: Week 1)
+│ ├─ REVIEW_GUIDELINES.md (NEW - Skill #23: Week 10)
+│ └─ API.md (Updated by Skill #29: Week 9)
+
+════════════════════════════════════════════════════════════════════════════════
+
+🔄 ITERATIVE IMPROVEMENT LOOP
+════════════════════════════════════════════════════════════════════════════════
+
+Each week follows this cycle:
+
+┌─────────────────────────────────────────────────────────────────────────┐
+│ │
+│ MONDAY WEDNESDAY-FRIDAY FRIDAY PM │
+│ ┌─────────┐ ┌──────────────┐ ┌──────────┐ │
+│ │ Plan │ ────────────→ │ Implement │ ─────────→ │ Report │ │
+│ │ Week │ │ + Test │ │ Progress │ │
+│ └─────────┘ └──────────────┘ └──────────┘ │
+│ ↓ ↓ ↓ │
+│ • Review next tasks • Run tests daily • Update Status │
+│ • Check dependencies • Commit to git • Calculate metrics │
+│ • Allocate resources • Fix issues as found • Plan next week │
+│ │
+└─────────────────────────────────────────────────────────────────────────┘
+
+Metrics to track weekly:
+ Coverage: [baseline] → [target]
+ Latency: [baseline:25s] → [target:15-20s]
+ Accuracy: [baseline:65%] → [target:80%]
+ Tests: [count increase]
+ Issues Resolved: [count]
+ Skill Hours: [planned vs actual]
+
+════════════════════════════════════════════════════════════════════════════════
+
+❓ FREQUENTLY ASKED QUESTIONS
+════════════════════════════════════════════════════════════════════════════════
+
+Q: "How do I know what skill to use?"
+A: SKILL_TO_CODE_MAPPING.md maps skills to problems. If fixing Issue #1,
+ section at top says: "Primary Skills: #2, #3, #16"
+
+Q: "What if I fall behind schedule?"
+A: 1. Check SKILL_TO_CODE_MAPPING.md "Skill Dependency Graph"
+ 2. See which Phase 2+ skills are independent of delayed Phase 1 work
+ 3. Start those in parallel to maintain progress
+ 4. Reschedule Phase 1 blockers
+
+Q: "How do I measure progress?"
+A: Update IMPLEMENTATION_STATUS_TRACKER.md weekly:
+ • Mark tasks as DONE
+ • Run: pytest tests/ --cov src (record coverage %)
+ • Run: python test_chat_demo.py 10 times, measure latency
+ • Update metrics row for the week
+
+Q: "What if a skill doesn't match my needs?"
+A: Each skill has detailed Actions in IMPLEMENTATION_ROADMAP.md. These
+ suggest typical usage. Apply only what's relevant to RagBot. The plan
+ is flexible - adapt it to your reality.
+
+Q: "When do I read the actual SKILL.md files?"
+A: When implementing a skill. Example:
+ • Day 1: Read WEEK1_EXECUTION_PLAN.md Task 1.1
+ • Opens IMPLEMENTATION_ROADMAP.md Phase 1 Week 1
+ • Opens SKILL_TO_CODE_MAPPING.md to see code files
+ • THEN reads ~/.agents/skills/owasp-security-check/SKILL.md for details
+ • Implements according to all guidance combined
+
+Q: "What if tests fail during implementation?"
+A: Expected! This is normal development. When a test fails:
+ 1. Read the error message carefully
+ 2. Identify which code is wrong (src/ or test/)
+ 3. Fix the code (not the test)
+ 4. Re-run: pytest -v
+ 5. Commit when green
+
+Q: "How do I handle merge conflicts?"
+A: Phase 1 work happens in parallel:
+ • Task 1.3 (Auth tests) = tests/test_api_auth.py
+ • Task 1.4 (State fixing) = src/agents/
+ • Task 1.5 (Schema) = api/app/models/response.py
+ These are different files, minimal conflicts. If conflicts:
+ 1. Read git conflict markers (<<<<, ====, >>>>)
+ 2. Pick correct version or merge manually
+ 3. Run: pytest to verify still works
+ 4. git add [file]; git commit
+
+════════════════════════════════════════════════════════════════════════════════
+
+✅ SUCCESS CRITERIA FOR PHASE 1
+════════════════════════════════════════════════════════════════════════════════
+
+By end of Week 2 (Feb 21):
+
+Code Quality:
+ ✅ 23+ new tests written
+ ✅ Coverage increased 70% → 75%
+ ✅ All tests passing
+ ✅ No linter warnings
+
+Features:
+ ✅ JWT authentication working on /api endpoints
+ ✅ biomarker_flags & safety_alerts propagate through workflow
+ ✅ Unified response schema (API + CLI)
+ ✅ Prompt injection detection active
+ ✅ Rate limiting enforced (10 req/min)
+
+Documentation:
+ ✅ SECURITY_AUDIT.md completed
+ ✅ .github/workflows/test.yml running on PR
+
+Team:
+ ✅ All developers understand Phase 1 changes
+ ✅ Code review standards documented
+ ✅ Deployment checklist for next phase
+
+════════════════════════════════════════════════════════════════════════════════
+
+🎓 CONTINUOUS LEARNING
+════════════════════════════════════════════════════════════════════════════════
+
+As you implement each skill:
+
+1. Read the SKILL.md documentation thoroughly
+ └─ Take notes on best practices
+
+2. Understand the "Why" not just the "How"
+ └─ Why hybrid search over semantic only?
+ └─ Why knowledge graphs for medical reasoning?
+
+3. Apply learnings beyond RagBot
+ └─ These patterns work for any Python/ML/LLM project
+
+4. Share knowledge with team
+ └─ Each week, 30-min skill share session
+ └─ "This week I learned about [Skill X]..."
+
+5. Revisit Phase 1-2 skills when you hit Phase 3-4
+ └─ Patterns reinforce and become second nature
+ └─ You'll notice connections between skills
+
+════════════════════════════════════════════════════════════════════════════════
+
+📞 NEED HELP?
+════════════════════════════════════════════════════════════════════════════════
+
+Stuck on a task? Follow this decision tree:
+
+Issue: Don't know which skill to use?
+ → Check SKILL_TO_CODE_MAPPING.md
+ → Find the problem area
+ → See "Primary Skills:" section
+
+Issue: Skill documentation unclear?
+ → Read ~/.agents/skills//SKILL.md fully
+ → Check subdirectories for examples/templates
+ → Apply to your specific use case
+
+Issue: Not progressing fast enough?
+ → Consider parallel work (see WORKFLOW 5)
+ → Skill dependency check
+ → Allocate more developer time
+ → Simplify scope temporarily
+
+Issue: Test failures?
+ → Read error message
+ → Check SKILL_TO_CODE_MAPPING.md for code changes needed
+ → Review the specific skill's error handling section
+ → Fix code (not test)
+
+Issue: Code doesn't integrate?
+ → Check Phase 2 tasks for integration points
+ → Verify unified schema matches
+ → Run end-to-end tests
+ → Check observability logs for clues
+
+════════════════════════════════════════════════════════════════════════════════
+
+🏁 THE FINISH LINE
+════════════════════════════════════════════════════════════════════════════════
+
+After 12 weeks of executing this plan:
+
+Your RagBot will be:
+ ✅ Enterprise-grade (OWASP + HIPAA aligned)
+ ✅ Well-tested (90%+ coverage)
+ ✅ Fast (15-20s latency, -30% vs baseline)
+ ✅ Accurate (80%+ disease prediction)
+ ✅ Cost-optimized (-40% API costs)
+ ✅ Properly documented (API docs, code reviews, guides)
+ ✅ Fully deployed (CI/CD, monitoring, alerts)
+ ✅ Knowledge-integrated (graphs, hybrid search, citations)
+ ✅ Maintainable (design patterns, observability, error handling)
+ ✅ Secure (auth, rate limiting, input validation)
+
+Your team will be:
+ ✅ Trained on 34 industry best practices
+ ✅ Capable of maintaining and evolving the system
+ ✅ Confident in deployment and monitoring
+ ✅ Equipped with reusable patterns for future projects
+
+Success looks like:
+ "We deployed a production-ready medical AI system that is secure,
+ fast, accurate, and maintainable. We did it systematically using
+ industry best practices. We can confidently handle increases in
+ patient load and evolve the system for new biomarkers."
+
+════════════════════════════════════════════════════════════════════════════════
+
+Let's build something great. Start with WEEK1_EXECUTION_PLAN.md. 🚀
+
+════════════════════════════════════════════════════════════════════════════════
diff --git a/docs/archive/SKILLS_INSTALLED.md b/docs/archive/SKILLS_INSTALLED.md
new file mode 100644
index 0000000000000000000000000000000000000000..dfde45d3ba92d7128b12121b3f0063070a419b93
--- /dev/null
+++ b/docs/archive/SKILLS_INSTALLED.md
@@ -0,0 +1,394 @@
+# RagBot Skills Roadmap - Installed & Ready
+
+**Date**: February 18, 2026
+**Status**: ✅ **7 Strategic Skills Installed**
+**Goal**: Take RagBot from production-ready to enterprise-grade
+
+---
+
+## Executive Summary
+
+Your RagBot system is **production-ready** with 31 passing tests and a working REST API. We've identified and installed **7 critical skills** to address gaps in:
+
+1. ✅ CI/CD Automation
+2. ✅ API Security & Hardening
+3. ✅ Test Coverage & Quality Metrics
+4. ✅ Code Review Workflows
+5. ✅ API Documentation
+6. ✅ Security Compliance (OWASP)
+
+---
+
+## Installed Skills Overview
+
+### 🚀 **Skill #1: GitHub Actions Templates** (2.8K installs)
+**Package**: `wshobson/agents@github-actions-templates`
+**Location**: `.agents/skills/github-actions-templates/`
+
+**What it does**:
+- Provides production GitHub Actions workflow templates
+- Automated testing on every commit/PR
+- Automated deployment pipelines
+- Security scanning (SAST, dependency checks)
+- Code quality checks (linting, formatting)
+- Build & test matrix for multiple Python versions
+
+**For RagBot**: Automate pytest runs, Docker builds, dependency updates
+
+---
+
+### 🔐 **Skill #2: API Security Hardening** (144 installs)
+**Package**: `aj-geddes/useful-ai-prompts@api-security-hardening`
+**Location**: `.agents/skills/api-security-hardening/`
+
+**What it does**:
+- Authentication (API keys, JWT tokens)
+- CORS configuration hardening
+- Input validation & sanitization
+- Rate limiting implementation
+- Security headers (CSP, X-Frame-Options, etc.)
+- HTTPS/TLS best practices
+- Database query protection (SQL injection prevention)
+
+**For RagBot**: Secure the REST API endpoints, add API key authentication, implement CORS policies for web integration
+
+---
+
+### ⏱️ **Skill #3: API Rate Limiting** (92 installs)
+**Package**: `aj-geddes/useful-ai-prompts@api-rate-limiting`
+**Location**: `.agents/skills/api-rate-limiting/`
+
+**What it does**:
+- Per-user rate limiting (requests/minute)
+- Per-IP rate limiting
+- Request throttling strategies
+- Token bucket algorithm
+- Redis/in-memory backends
+- Rate limit headers in responses
+- Graceful handling of exceeding limits
+
+**For RagBot**: Prevent abuse of medical analysis endpoint (critical for healthcare apps), implement tiered rate limits for API tiers
+
+---
+
+### 🧪 **Skill #4: Python Testing Patterns** (3.7K installs - MOST POPULAR)
+**Package**: `wshobson/agents@python-testing-patterns`
+**Location**: `.agents/skills/python-testing-patterns/`
+
+**What it does**:
+- Test structure & organization best practices
+- Fixture patterns for complex test setup
+- Mocking strategies (unittest.mock, pytest-mock)
+- Parametrized testing for multiple scenarios
+- Test coverage reporting (pytest-cov)
+- Integration vs unit test patterns
+- Property-based testing (hypothesis)
+
+**For RagBot**: Expand test suite beyond 31 tests, add integration tests, measure coverage metrics, mock LLM calls for faster tests
+
+---
+
+### 👀 **Skill #5: GitHub PR Review Workflow** (31 installs)
+**Package**: `uwe-schwarz/skills@github-pr-review-workflow`
+**Location**: `.agents/skills/github-pr-review-workflow/`
+
+**What it does**:
+- Automated code review rules
+- PR template enforcement
+- Commit message standards
+- Required approval workflows
+- Code ownership files (CODEOWNERS)
+- Automated reviewer assignment
+- PR status checks & branch protection
+
+**For RagBot**: Establish code quality gates, mandatory reviews before merging, document contribution process
+
+---
+
+### 🛡️ **Skill #6: OWASP Security Check** (148 installs)
+**Package**: `sergiodxa/agent-skills@owasp-security-check`
+**Location**: `.agents/skills/owasp-security-check/`
+
+**What it does**:
+- OWASP Top 10 vulnerability scanning
+- Security vulnerability assessment
+- Dependency security checks (CVE detection)
+- Code pattern analysis for common security flaws
+- Encryption & hashing best practices
+- Authentication & authorization review
+- Logging security violations
+
+**For RagBot**: Scan for healthcare data protection (HIPAA-relevant), check for common vulnerabilities, validate input handling
+
+---
+
+### 📚 **Skill #7: API Docs Generator** (44 installs)
+**Package**: `patricio0312rev/skills@api-docs-generator`
+**Location**: `.agents/skills/api-docs-generator/`
+
+**What it does**:
+- OpenAPI/Swagger spec generation
+- Interactive API documentation (Swagger UI, ReDoc)
+- Auto-generated client SDKs (optional)
+- Request/response example generation
+- API changelog management
+- Documentation from code comments
+- Multi-version API support
+
+**For RagBot**: Generate OpenAPI spec from FastAPI code, auto-docs at `/docs` and `/redoc`, create client libraries
+
+---
+
+## Implementation Priority (Next Steps)
+
+### **Phase 1: Security (Week 1)** 🔒
+Implement security-critical features:
+1. Use **API Security Hardening** skill to add JWT authentication
+2. Use **API Rate Limiting** to protect endpoints
+3. Run **OWASP Security Check** against current code
+4. Update API docs with auth requirements
+
+### **Phase 2: Automation (Week 1-2)** 🤖
+Set up CI/CD pipelines:
+1. Use **GitHub Actions Templates** to create `.github/workflows/`
+ - `tests.yml` - Run pytest on every push
+ - `security.yml` - OWASP + dependency scanning
+ - `docker.yml` - Build & push Docker images
+ - `deploy.yml` - CD pipeline to production
+
+### **Phase 3: Quality (Week 2-3)** 📊
+Improve code quality:
+1. Use **Python Testing Patterns** to expand test suite
+ - Add integration tests (API + workflow)
+ - Add property-based tests (parametrized)
+ - Measure coverage (target: 80%+)
+ - Mock external LLM calls for speed
+
+2. Use **GitHub PR Review Workflow** to enforce standards
+ - Create CODEOWNERS file
+ - Add PR template
+ - Require code review approval
+ - Run lint/format checks automatically
+
+### **Phase 4: Documentation (Week 3)** 📖
+Polish documentation:
+1. Use **API Docs Generator** for OpenAPI spec
+ - Regenerate Swagger/ReDoc docs
+ - Add security scheme documentation
+ - Publish to ReadTheDocs or GitHub Pages
+
+---
+
+## Quick Start: Using Each Skill
+
+### 1. **CI/CD Workflow** (GitHub Actions)
+```bash
+# Create .github/workflows/tests.yml using the templates
+# Ask: "Can you create a GitHub Actions workflow to test my Python project on every push?"
+
+# The skill provides templates for:
+# - Test matrix (Python 3.11, 3.12, 3.13)
+# - Lint & format checks
+# - Build Docker image
+# - Deploy to staging/production
+```
+
+### 2. **Secure the API**
+```bash
+# Ask: "How can I add API key authentication to my FastAPI REST API?"
+
+# The skill provides:
+# - JWT token generation
+# - API key validation middleware
+# - CORS configuration
+# - Request validation decorators
+# - Rate limiting middleware
+```
+
+### 3. **Expand Tests**
+```bash
+# Ask: "How can I improve my test coverage for medical analysis API?"
+
+# The skill provides:
+# - Parametrized tests for different biomarker values
+# - Mocked LLM responses (for speed)
+# - Integration test patterns
+# - Coverage reporting
+```
+
+### 4. **Review Workflow**
+```bash
+# Ask: "Set up GitHub PR review workflow for my repo"
+
+# The skill provides:
+# - CODEOWNERS file template
+# - PR template (asks about test coverage, docs, etc.)
+# - Branch protection rules
+# - Required reviewers
+```
+
+### 5. **OWASP Security Scanning**
+```bash
+# Ask: "Scan my FastAPI medical analysis API for OWASP Top 10 vulnerabilities"
+
+# Checks for:
+# - SQL injection vulnerabilities
+# - Improper input validation
+# - Missing authentication
+# - Unencrypted sensitive data
+# - XXE attacks
+# - Broken access control
+```
+
+### 6. **API Documentation**
+```bash
+# Ask: "Generate OpenAPI spec from my FastAPI code"
+
+# Generates:
+# - OpenAPI 3.0 spec (JSON/YAML)
+# - SwaggerUI at /docs
+# - ReDoc at /redoc
+# - Example curl commands
+```
+
+---
+
+## Expected Improvements
+
+### Before (Current State)
+- Manual testing (`pytest` run by developer)
+- No API authentication
+- 31 tests (good, but ~50% coverage estimated)
+- Manual code review (ad-hoc)
+- API docs only in markdown files
+- No automated deployment
+
+### After (With Skills)
+- ✅ Automated testing on every push/PR
+- ✅ API secured with JWT + rate limiting
+- ✅ 80%+ test coverage with metrics dashboard
+- ✅ Mandatory code reviews with CODEOWNERS
+- ✅ Auto-generated OpenAPI docs + Swagger UI
+- ✅ Automated deployment to staging/production
+- ✅ Security scanning (OWASP + dependencies)
+- ✅ Healthcare-ready security posture
+
+---
+
+## Medical/Healthcare-Specific Considerations
+
+RagBot is a **medical decision support system** - security is critical:
+
+### What These Skills Help With
+
+| Need | Skill | Benefit |
+|------|-------|---------|
+| Protected biomarker data | API Security Hardening | Encrypted API, auth required |
+| Audit trail for medical decisions | GitHub Actions (CI/CD logs) | Complete change history |
+| HIPAA compliance readiness | OWASP Security Check | Identifies compliance gaps |
+| Rate limiting (prevent brute force biomarker lookups) | API Rate Limiting | Throttles suspicious requests |
+| Documentation for medical professionals | API Docs Generator | Clear, standards-based API docs |
+| Quality assurance for medical analysis | Python Testing Patterns | High coverage, edge case testing |
+
+---
+
+## Files to Review
+
+After using the skills, you'll have created:
+
+```
+RagBot/
+├── .github/
+│ └── workflows/
+│ ├── tests.yml ← GitHub Actions Tests
+│ ├── security.yml ← OWASP + Dependency Scanning
+│ ├── docker.yml ← Docker Build & Push
+│ └── deploy.yml ← Automated Deployment
+├── CODEOWNERS ← Code review assignments
+├── .github/pull_request_template.md ← PR template
+├── docs/
+│ └── openapi.yaml ← Auto-generated API spec
+└── .agents/skills/
+ ├── github-actions-templates/
+ ├── api-security-hardening/
+ ├── api-rate-limiting/
+ ├── python-testing-patterns/
+ ├── github-pr-review-workflow/
+ ├── owasp-security-check/
+ └── api-docs-generator/
+```
+
+---
+
+## Next Actions
+
+### Immediate (Today)
+1. ✅ Skills installed successfully
+2. Review this document (you are here!)
+3. Pick one skill to use first (I recommend **GitHub Actions Templates**)
+
+### Short Term (This Week)
+1. Create first GitHub Actions workflow for automated testing
+2. Add API key authentication to FastAPI
+3. Implement rate limiting on `/api/v1/analyze` endpoint
+
+### Medium Term (This Month)
+1. Run OWASP security scan, fix findings
+2. Expand test suite to 60+ tests with coverage metrics
+3. Generate OpenAPI spec and publish docs
+4. Set up automated Docker builds
+
+### Long Term (This Quarter)
+1. Add CD pipeline (automated deployment)
+2. Implement healthcare-specific security (encryption, audit logs)
+3. Prepare for HIPAA compliance audit
+4. Add monitoring/alerting for API health
+
+---
+
+## Support & Resources
+
+**Skill Documentation**:
+- Browse all skills: https://skills.sh/
+- View installed skill details: `npx skills check`
+- Update skills: `npx skills update`
+
+**RagBot-Specific Documentation**:
+- Main README: [README.md](README.md)
+- Architecture: [docs/ARCHITECTURE.md](docs/ARCHITECTURE.md)
+- API Guide: [docs/API.md](docs/API.md)
+- Development: [docs/DEVELOPMENT.md](docs/DEVELOPMENT.md)
+
+**Next Deep-Dive Documents** (you can ask for):
+- "How do I use the GitHub Actions skill to set up CI/CD?"
+- "How do I secure my FastAPI API end-to-end?"
+- "How do I expand my test suite to reach 80% coverage?"
+- "How do I generate OpenAPI docs from my FastAPI code?"
+- "How do I set up a healthcare-ready deployment?"
+
+---
+
+## Summary
+
+You now have **7 enterprise-grade skills** ready to enhance RagBot:
+
+| # | Skill | Status | Value |
+|---|-------|--------|-------|
+| 1 | GitHub Actions Templates | ✅ Ready | CI/CD automation |
+| 2 | API Security Hardening | ✅ Ready | Auth + security headers |
+| 3 | API Rate Limiting | ✅ Ready | Abuse prevention |
+| 4 | Python Testing Patterns | ✅ Ready | Quality metrics |
+| 5 | GitHub PR Review Workflow | ✅ Ready | Code quality gates |
+| 6 | OWASP Security Check | ✅ Ready | Vulnerability scanning |
+| 7 | API Docs Generator | ✅ Ready | Auto OpenAPI spec |
+
+**Time to production-grade**: ~2-4 weeks of focused implementation
+**ROI**: Automated testing, security compliance, faster deployments, reduced bugs
+
+Ready to implement these? Just ask any of your installed skills!
+
+---
+
+**Generated**: 2026-02-18
+**By**: Deep Codebase Analysis + Skills CLI
+**Status**: All skills verified and ready to use
diff --git a/docs/archive/SKILLS_INSTALLED_UPDATED.md b/docs/archive/SKILLS_INSTALLED_UPDATED.md
new file mode 100644
index 0000000000000000000000000000000000000000..46161499ed7fdfe6d611ea44b899b56906ffa39d
--- /dev/null
+++ b/docs/archive/SKILLS_INSTALLED_UPDATED.md
@@ -0,0 +1,621 @@
+# 🚀 RagBot Skills Roadmap - UPDATED (February 18, 2026)
+
+**Status**: ✅ **13 Strategic Skills Installed & Ready**
+**Goal**: Take RagBot from production-ready to enterprise-grade with comprehensive improvements across testing, security, documentation, and code quality.
+
+---
+
+## Executive Summary
+
+Your RagBot system is **production-ready** with 83+ passing tests and a working REST API for medical biomarker analysis. We've identified and installed **13 critical skills** to address gaps in:
+
+1. ✅ **Code Quality & Testing** (3.7K installs)
+2. ✅ **API Security & Hardening** (144 installs)
+3. ✅ **Security Compliance (OWASP)** (148 installs)
+4. ✅ **API Rate Limiting** (92 installs)
+5. ✅ **CI/CD Automation** (2.8K installs)
+6. ✅ **Code Review Workflows** (31 installs)
+7. ✅ **API Documentation** (44 installs)
+8. ✅ **Code Review Excellence**
+9. ✅ **FastAPI Best Practices**
+10. ✅ **Python Design Patterns**
+11. ✅ **Error Handling & Resilience**
+12. ✅ **Observability & Monitoring**
+13. ✅ **RAG Implementation Best Practices**
+
+---
+
+## Critical Issues Found in Deep Review
+
+Based on analysis of your codebase, these issues were identified:
+
+### 🔴 Critical Issues (Fix Immediately)
+
+1. **State Propagation Incomplete**
+ - `biomarker_flags` and `safety_alerts` not propagating through workflow
+ - API output missing critical medical alerts
+ - **Impact**: Medical data loss, incomplete patient analysis
+
+2. **Schema Mismatch**
+ - Workflow output schema vs API formatter schema misalignment
+ - ResponseSynthesizerAgent returns different fields than API expects
+ - **Impact**: API response formatting errors
+
+3. **Forced Prediction Confidence**
+ - Minimum confidence forced to 0.5, default disease always Diabetes
+ - **Impact**: False confidence in low-evidence cases (dangerous in medical domain)
+
+### 🟡 High Priority Issues
+
+4. **Biomarker Naming Inconsistency**
+ - API vs CLI use different normalization schemes
+ - LDL in API vs "LDL Cholesterol" in CLI
+ - **Impact**: Biomarker validation failures
+
+5. **JSON Parsing Fragility**
+ - LLM outputs parsed with minimal guardrails
+ - Invalid JSON causes API 400 errors frequently
+ - **Impact**: Poor user experience
+
+6. **Missing Citation Enforcement**
+ - RAG outputs don't enforce medical literature citations
+ - Claims without evidence may pass through
+ - **Impact**: Violates evidence-based requirements
+
+---
+
+## Installed Skills - Details & Applications
+
+### 1. ✅ Python Testing Patterns (3.7K installs)
+**Package**: `wshobson/agents@python-testing-patterns`
+**Location**: `.agents/skills/python-testing-patterns/`
+
+**Core Capabilities**:
+- Test structure & organization best practices
+- Fixture patterns for complex setup (LLM mocking, FAISS setup)
+- Parametrized testing for multiple biomarker scenarios
+- Test coverage reporting (pytest-cov)
+- Integration vs unit test patterns
+- Property-based testing with hypothesis
+
+**For RagBot**:
+- ✅ Expand test suite from 83 to 150+ tests
+- ✅ Mock LLM calls for faster CI/CD (no Groq/Gemini calls)
+- ✅ Add parametrized tests for each biomarker combination
+- ✅ Measure coverage metrics (target 90%+)
+- ✅ Integration tests for API routes
+
+**Implementation Plan**:
+```bash
+# Generate coverage report
+pytest tests/ --cov=src --cov-report=html
+
+# Run tests faster with mocked LLMs
+pytest tests/ -m "not slowtest" -v
+
+# Parametrize biomarker tests
+@pytest.mark.parametrize("glucose,hba1c,expected_disease", [
+ (140, 10, "Diabetes"),
+ (120, 8, "Prediabetes"),
+])
+```
+
+---
+
+### 2. ✅ API Security Hardening (144 installs)
+**Package**: `aj-geddes/useful-ai-prompts@api-security-hardening`
+**Location**: `.agents/skills/api-security-hardening/`
+
+**Core Capabilities**:
+- JWT authentication & API key validation
+- CORS configuration hardening
+- Input validation & sanitization
+- Security headers (CSP, X-Frame-Options, HSTS)
+- SQL injection prevention
+- Rate limiting integration
+
+**For RagBot** (CRITICAL for HIPAA/medical data):
+- ✅ Add API key authentication to `/api/v1/analyze/*` endpoints
+- ✅ Validate biomarker names against whitelist
+- ✅ Sanitize natural language input (SQL injection, XSS prevention)
+- ✅ Add security headers to all responses
+- ✅ Implement CORS for web integration
+
+**Implementation Priority**:
+```python
+# Add API key authentication
+from fastapi import Depends, HTTPException, Header
+
+async def verify_api_key(x_api_key: str = Header(...)):
+ if x_api_key != os.getenv("RAGBOT_API_KEY"):
+ raise HTTPException(status_code=403)
+ return x_api_key
+
+# Protect sensitive endpoints
+@app.post("/api/v1/analyze/natural")
+async def analyze(request: NaturalAnalysisRequest, key = Depends(verify_api_key)):
+ ...
+```
+
+---
+
+### 3. ✅ OWASP Security Check (148 installs)
+**Package**: `sergiodxa/agent-skills@owasp-security-check`
+**Location**: `.agents/skills/owasp-security-check/`
+
+**Core Capabilities**:
+- OWASP Top 10 vulnerability scanning
+- Dependency security checks (CVE detection)
+- Code pattern analysis for common flaws
+- Logging security violations
+- Authentication & authorization review
+- Data protection assessment
+
+**For RagBot** (Medical/HIPAA Compliance):
+- ✅ Scan for patient data leakage in logs
+- ✅ Verify no hardcoded API keys/secrets
+- ✅ Check for unencrypted data handling
+- ✅ Validate input sanitization (XSS, SQL injection)
+- ✅ Audit access controls on medical endpoints
+
+**Quick Start**:
+```bash
+# Run OWASP scan on your code
+# Use to validate: no secrets in code, no dangerous patterns
+
+# Key areas to audit:
+# - api/app/main.py (endpoint security)
+# - src/agents/* (data handling)
+# - api/app/services/extraction.py (input validation)
+```
+
+---
+
+### 4. ✅ API Rate Limiting (92 installs)
+**Package**: `aj-geddes/useful-ai-prompts@api-rate-limiting`
+**Location**: `.agents/skills/api-rate-limiting/`
+
+**Core Capabilities**:
+- Per-user rate limiting (requests/minute)
+- Per-IP rate limiting
+- Token bucket algorithm
+- Redis/in-memory backends
+- Graceful handling of limit exceeding
+
+**For RagBot**:
+- ✅ Prevent API abuse on `/api/v1/analyze/*` (critical medical endpoint)
+- ✅ Implement tiered rate limits:
+ - Free tier: 10 requests/minute
+ - Pro tier: 100 requests/minute
+- ✅ Return 429 with retry-after headers
+- ✅ Log rate limit violations
+
+**Implementation**:
+```python
+# Add to api/app/main.py
+from slowapi import Limiter
+from slowapi.util import get_remote_address
+
+limiter = Limiter(key_func=get_remote_address)
+app.state.limiter = limiter
+
+@app.post("/api/v1/analyze/natural")
+@limiter.limit("10/minute") # 10 requests per minute
+async def analyze_natural(request: NaturalAnalysisRequest):
+ ...
+```
+
+---
+
+### 5. ✅ GitHub Actions Templates (2.8K installs)
+**Package**: `wshobson/agents@github-actions-templates`
+**Location**: `.agents/skills/github-actions-templates/`
+
+**Core Capabilities**:
+- Production-ready CI/CD workflows
+- Automated testing on every commit/PR
+- Security scanning (SAST, dependency checks)
+- Docker image building & pushing
+- Code quality checks (linting, formatting)
+- Build matrix for multiple Python versions
+
+**For RagBot**:
+- ✅ Auto-run pytest on every PR
+- ✅ Build & push Docker images to registry
+- ✅ Dependency scanning (pip-audit)
+- ✅ Code style checks (black, flake8)
+- ✅ Coverage reporting
+
+**Create `.github/workflows/ci.yml`**:
+```yaml
+name: CI/CD
+on: [push, pull_request]
+jobs:
+ test:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - name: Test
+ run: python -m pytest tests/ -v --cov=src
+ - name: Security Scan
+ run: pip-audit
+```
+
+---
+
+### 6. ✅ GitHub PR Review Workflow (31 installs)
+**Package**: `uwe-schwarz/skills@github-pr-review-workflow`
+**Location**: `.agents/skills/github-pr-review-workflow/`
+
+**Core Capabilities**:
+- PR template enforcement
+- Commit message standards
+- Required approval workflows
+- Code ownership files (CODEOWNERS)
+- Automated reviewer assignment
+- Branch protection rules
+
+**For RagBot**:
+- ✅ Enforce PR description (what changed, why)
+- ✅ Require tests for all changes
+- ✅ Require approval before merge
+- ✅ Define CODEOWNERS for critical files
+- ✅ Automate reviewer assignment
+
+**Create `.github/CODEOWNERS`**:
+```
+# API changes
+api/ @ragbot-maintainers
+
+# Workflow & agents (critical)
+src/workflow.py @ragbot-maintainers
+src/agents/ @ragbot-maintainers
+
+# Tests
+tests/ @ragbot-maintainers
+```
+
+---
+
+### 7. ✅ API Docs Generator (44 installs)
+**Package**: `patricio0312rev/skills@api-docs-generator`
+**Location**: `.agents/skills/api-docs-generator/`
+
+**Core Capabilities**:
+- OpenAPI/Swagger spec auto-generation
+- Interactive API documentation (Swagger UI, ReDoc)
+- Request/response example generation
+- Multi-version API support
+- Client SDK generation
+
+**For RagBot**:
+- ✅ Auto-generate OpenAPI spec from FastAPI code
+- ✅ Serve at `/docs` (Swagger UI) and `/redoc` (ReDoc)
+- ✅ Generate Python client library
+- ✅ Create API reference documentation
+- ✅ Include auth requirements in docs
+
+**Already Enabled in FastAPI**:
+```python
+# Your api/app/main.py already has:
+app = FastAPI(
+ title="RagBot API",
+ description="Medical biomarker analysis",
+ version="1.0.0",
+ docs_url="/docs",
+ redoc_url="/redoc",
+)
+```
+
+---
+
+### 8. ✅ Code Review Excellence (New)
+**Package**: `wshobson/agents@code-review-excellence`
+**Location**: `.agents/skills/code-review-excellence/`
+
+**Provides**:
+- Review checklist for Python code
+- Common code smell detection
+- Security review guidelines
+- Performance review patterns
+- Testing adequacy assessment
+
+**For RagBot**:
+- ✅ Review all PRs against medical safety checklist
+- ✅ Ensure biomarker validation in all paths
+- ✅ Verify error handling in API routes
+- ✅ Check logging doesn't expose patient data
+
+---
+
+### 9. ✅ FastAPI Templates (New)
+**Package**: `wshobson/agents@fastapi-templates`
+**Location**: `.agents/skills/fastapi-templates/`
+
+**Provides**:
+- FastAPI best practices & patterns
+- Dependency injection patterns
+- Exception handling templates
+- Middleware patterns
+- Testing patterns specific to FastAPI
+
+**For RagBot**:
+- ✅ Improve error responses (consistent JSON format)
+- ✅ Add custom exception handlers
+- ✅ Middleware for logging & observability
+- ✅ Request/response validation
+
+---
+
+### 10. ✅ Python Design Patterns (New)
+**Package**: `wshobson/agents@python-design-patterns`
+**Location**: `.agents/skills/python-design-patterns/`
+
+**Provides**:
+- Singleton, Factory, Strategy patterns
+- Dependency injection patterns
+- Observer patterns
+- Builder patterns
+
+**For RagBot**:
+- ✅ Centralize LLM configuration (Singleton pattern)
+- ✅ Factory pattern for creating agents
+- ✅ Strategy pattern for different prediction algorithms
+- ✅ Improve code maintainability
+
+---
+
+### 11. ✅ Python Error Handling (New)
+**Package**: `wshobson/agents@python-error-handling`
+**Location**: `.agents/skills/python-error-handling/`
+
+**For RagBot**:
+- ✅ Custom exception hierarchy (MedicalAnalysisError, etc.)
+- ✅ Better error context propagation through workflow
+- ✅ Graceful degradation when LLM calls fail
+- ✅ Distinguish between recoverable and fatal errors
+
+---
+
+### 12. ✅ Python Observability (New)
+**Package**: `wshobson/agents@python-observability`
+**Location**: `.agents/skills/python-observability/`
+
+**Provides**:
+- Structured logging patterns
+- Metrics collection (Prometheus)
+- Distributed tracing
+- Performance monitoring
+
+**For RagBot**:
+- ✅ Structured logs (JSON format)
+- ✅ Track LLM API latency
+- ✅ Monitor biomarker extraction success rates
+- ✅ Alert on workflow failures
+
+---
+
+### 13. ✅ RAG Implementation (New)
+**Package**: `wshobson/agents@rag-implementation`
+**Location**: `.agents/skills/rag-implementation/`
+
+**Provides**:
+- RAG pipeline best practices
+- Chunk size optimization
+- Retrieval evaluation patterns
+- Citation enforcement
+- Relevance scoring
+
+**For RagBot** (Critical for medical RAG):
+- ✅ Enforce minimum retrieval relevance (score > 0.7)
+- ✅ Require citations in all RAG outputs
+- ✅ Optimize chunk size for medical documents
+- ✅ Implement citation verification
+- ✅ Handle retrieval failures gracefully
+
+---
+
+## 🎯 Implementation Priority (Roadmap)
+
+### Phase 1: SECURITY & CRITICAL FIXES (Week 1) 🔒
+**Estimated Time**: 2-3 days
+
+1. **Use OWASP Security Check**
+ - Scan entire codebase for vulnerabilities
+ - Create vulnerability remediation plan
+ - Document security fixes
+
+2. **Implement API Security Hardening**
+ - Add API key authentication
+ - Add input validation & sanitization
+ - Add security headers
+ - Implement CORS properly
+
+3. **Add Rate Limiting**
+ - Protect `/api/v1/analyze/*` endpoints
+ - Implement tiered limits
+ - Add retry-after headers
+
+**Skills Used**: `owasp-security-check`, `api-security-hardening`, `api-rate-limiting`
+
+---
+
+### Phase 2: CODE QUALITY & TESTING (Week 2) 🧪
+**Estimated Time**: 2-3 days
+
+1. **Expand Test Suite**
+ - Use `python-testing-patterns` to add parametrized tests
+ - Add integration tests for API routes
+ - Mock LLM calls for faster CI/CD
+ - Measure & improve coverage to 90%+
+
+2. **Error Handling Improvements**
+ - Use `python-error-handling` to create exception hierarchy
+ - Add contextual error messages
+ - Implement retry logic for LLM calls
+
+3. **Code Organization**
+ - Apply `python-design-patterns` refactoring
+ - Centralize configuration management
+ - Improve code maintainability
+
+**Skills Used**: `python-testing-patterns`, `python-error-handling`, `python-design-patterns`
+
+---
+
+### Phase 3: DOCUMENTATION & CI/CD (Week 3) 📚
+**Estimated Time**: 1-2 days
+
+1. **CI/CD Setup**
+ - Use `github-actions-templates` to create workflows
+ - Auto-run tests on every PR
+ - Dependency scanning
+
+2. **Documentation**
+ - OpenAPI spec already auto-generated by FastAPI
+ - Use `api-docs-generator` to enhance docs
+ - Create API client libraries
+
+3. **Code Review Process**
+ - Set up with `github-pr-review-workflow`
+ - Create CODEOWNERS file
+ - Define review standards with `code-review-excellence`
+
+**Skills Used**: `github-actions-templates`, `api-docs-generator`, `github-pr-review-workflow`, `code-review-excellence`
+
+---
+
+### Phase 4: OBSERVABILITY & RAG IMPROVEMENTS (Week 4) 📊
+**Estimated Time**: 1-2 days
+
+1. **Observability**
+ - Add structured logging with `python-observability`
+ - Track metrics (LLM latency, success rates)
+ - Implement distributed tracing
+
+2. **RAG Optimization**
+ - Use `rag-implementation` to enforce citations
+ - Improve retrieval quality scoring
+ - Add citation verification
+
+3. **FastAPI Improvements**
+ - Use `fastapi-templates` for better exception handling
+ - Add observability middleware
+ - Improve request/response logging
+
+**Skills Used**: `python-observability`, `rag-implementation`, `fastapi-templates`
+
+---
+
+## 📋 Critical Fixes Required (From Deep Review)
+
+### Fix 1: Biomarker Flags & Safety Alerts Propagation
+**File**: `src/agents/biomarker_analyzer.py`
+**Issue**: Not returning `biomarker_flags` and `safety_alerts` to state
+
+```python
+# BEFORE
+return {"agent_outputs": [output]}
+
+# AFTER
+return {
+ "agent_outputs": [output],
+ "biomarker_flags": output.biomarker_flags,
+ "safety_alerts": output.safety_alerts,
+}
+```
+
+### Fix 2: Unified Biomarker Normalization
+**Files**: `api/app/services/extraction.py`, `scripts/chat.py`
+**Issue**: Different normalization schemes in API vs CLI
+
+```python
+# Create src/biomarker_normalization.py with shared map
+from src.biomarker_normalization import normalize_biomarker_name
+
+# Use in both API and CLI
+normalized = normalize_biomarker_name("ldl") # "LDL Cholesterol"
+```
+
+### Fix 3: Remove Forced Confidence & Default Disease
+**File**: `api/app/services/extraction.py`
+**Issue**: Minimum confidence forced to 0.5, default to Diabetes
+
+```python
+# BEFORE
+confidence = max(0.5, computed_confidence) # WRONG!
+disease = "Diabetes" if confidence < 0.7 else predicted
+
+# AFTER
+confidence = computed_confidence # Use actual value
+disease = predicted if confidence > 0.5 else None
+```
+
+### Fix 4: Schema Alignment
+**Files**: `src/workflow.py`, `api/app/services/ragbot.py`
+**Issue**: ResponseSynthesizerAgent output != API formatter input
+
+Choose one schema and commit to it across whole system.
+
+---
+
+## 📊 Expected Improvements
+
+| Metric | Before | After | Impact |
+|--------|--------|-------|--------|
+| Test Coverage | 70% | 90%+ | Faster development, fewer bugs |
+| API Security | Basic | OWASP-compliant | Medical data protection |
+| Production Readiness | Good | Excellent | Enterprise deployment |
+| Documentation | Auto-generated | Enhanced | Better developer experience |
+| Deployment | Manual | Automated | CI/CD pipelines |
+| Code Review | Ad-hoc | Standardized | Consistent quality |
+| Observability | Basic | Comprehensive | Better debugging |
+
+---
+
+## 🚀 Next Steps
+
+1. **Read the skills** (each has a README in `.agents/skills/*/`)
+2. **Run OWASP scan** immediately
+3. **Fix critical issues** from the Deep Review
+4. **Implement Phase 1** (Security) first
+5. **Roll out Phases 2-4** according to priority
+
+---
+
+## 📚 Skill Locations
+
+All skills installed to: `~/.agents/skills/`
+
+- ✅ Python Testing Patterns: `python-testing-patterns/`
+- ✅ API Security Hardening: `api-security-hardening/`
+- ✅ OWASP Security: `owasp-security-check/`
+- ✅ API Rate Limiting: `api-rate-limiting/`
+- ✅ GitHub Actions: `github-actions-templates/`
+- ✅ GitHub PR Review: `github-pr-review-workflow/`
+- ✅ API Docs: `api-docs-generator/`
+- ✅ Code Review: `code-review-excellence/`
+- ✅ FastAPI: `fastapi-templates/`
+- ✅ Design Patterns: `python-design-patterns/`
+- ✅ Error Handling: `python-error-handling/`
+- ✅ Observability: `python-observability/`
+- ✅ RAG: `rag-implementation/`
+
+**Access them anytime**: `npx skills list`
+
+---
+
+## ✅ Summary
+
+You now have **13 enterprise-grade skills** installed and ready to transform RagBot into an industry-leading medical AI system with:
+
+- 🔒 Medical-grade security
+- 🧪 Comprehensive test coverage
+- 📚 Professional documentation
+- 🚀 Automated CI/CD
+- 📊 Complete observability
+- 🎯 Best practice code quality
+
+**Recommendation**: Start with Phase 1 (Security) this week. All skills are accessible and documented in `.agents/skills/`.
+
+Good luck! 🚀
diff --git a/docs/archive/SKILLS_QUICK_REFERENCE.md b/docs/archive/SKILLS_QUICK_REFERENCE.md
new file mode 100644
index 0000000000000000000000000000000000000000..a7751c29eca97768ce3f88d800db900ab762ea62
--- /dev/null
+++ b/docs/archive/SKILLS_QUICK_REFERENCE.md
@@ -0,0 +1,324 @@
+╔══════════════════════════════════════════════════════════════════════════════╗
+║ 30 SKILLS QUICK REFERENCE CHEAT SHEET ║
+║ RagBot Agentic RAG System - All Skills at a Glance ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+
+SKILL FINDER - FIND THE RIGHT SKILL FOR YOUR TASK
+════════════════════════════════════════════════════════════════════════════════
+
+Need help with... ? → Use Skill #:
+
+✅ Building multi-agent systems → #3, #4, #7
+✅ LangChain/LangGraph orchestration → #1, #2, #6
+✅ Better RAG retrieval (medical PDFs) → #8, #9, #10, #11, #12
+✅ Improving LLM outputs → #13, #16
+✅ Evaluating model performance → #14
+✅ Reducing API costs → #15
+✅ Securing medical endpoints → #17, #18, #19, #20
+✅ Expanding test coverage → #22, #24
+✅ Adding API docs → #29
+✅ Implementing PR standards → #30
+✅ FastAPI best practices → #25
+✅ Code organization & patterns → #26
+✅ Logging & monitoring → #27
+✅ Memory & context optimization → #28
+✅ Tool/function calling patterns → #5
+
+════════════════════════════════════════════════════════════════════════════════
+
+SKILLS BY STAGE OF YOUR PRODUCT
+════════════════════════════════════════════════════════════════════════════════
+
+CURRENT: Early-stage production
+┌─ Security: Critical (#17, #18, #19, #20)
+├─ Architecture: Foundation (#1, #2, #3)
+├─ Testing: Baseline (#22)
+└─ Docs: Basic (#29)
+
+PHASE 1 (This Month): Harden & Fix
+┌─ Security audit: #18
+├─ Fix state: #2, #3
+├─ Unify schema: #16
+├─ Expand tests: #22
+├─ Setup CI/CD: #24
+└─ Time: 2-4 weeks
+
+PHASE 2 (Month 2): Advance Agents
+┌─ Agent patterns: #4, #5, #6, #7
+├─ Prompt engineering: #13, #14
+├─ Function calling: #5
+├─ Memory optimization: #28
+└─ Time: 2-4 weeks
+
+PHASE 3 (Month 3): Optimize Retrieval
+┌─ Hybrid search: #8
+├─ Chunking: #9
+├─ Embeddings: #10
+├─ Knowledge graphs: #12
+├─ Citations: #11
+└─ Time: 2-4 weeks
+
+PHASE 4 (Month 4): Scale & Deploy
+┌─ Cost optimization: #15
+├─ Design patterns: #26
+├─ Observability: #27
+├─ FastAPI: #25
+├─ API docs: #29
+├─ PR workflow: #30
+└─ Time: 2-4 weeks
+
+════════════════════════════════════════════════════════════════════════════════
+
+INSTALL SKILL REFERENCE
+════════════════════════════════════════════════════════════════════════════════
+
+All skills already installed globally. To reinstall or access:
+
+# List all installed
+npx skills list
+
+# Check for updates
+npx skills check
+
+# Update all
+npx skills update
+
+# View skill documentation
+cat ~/.agents/skills/[skill-name]/SKILL.md
+cat ~/.agents/skills/langchain-architecture/SKILL.md
+cat ~/.agents/skills/api-security-hardening/SKILL.md
+# etc.
+
+════════════════════════════════════════════════════════════════════════════════
+
+SKILLS INSTALLATION MANIFEST
+════════════════════════════════════════════════════════════════════════════════
+
+Agent & Orchestration Stack (7):
+ [✅] LangChain Architecture
+ [✅] Workflow Orchestration Patterns
+ [✅] Multi-Agent Orchestration
+ [✅] Agentic Development
+ [✅] Tool/Function Calling Patterns
+ [✅] LLM Application Dev with LangChain
+ [✅] RAG Agent Builder
+
+Search & Retrieval Stack (5):
+ [✅] Hybrid Search Implementation
+ [✅] Chunking Strategy
+ [✅] Embedding Pipeline Builder
+ [✅] RAG Implementation
+ [✅] Knowledge Graph Builder
+
+LLM & Prompt Stack (4):
+ [✅] Senior Prompt Engineer (320 installs!)
+ [✅] LLM Evaluation
+ [✅] Cost-Aware LLM Pipeline
+ [✅] AI Wrapper/Structured Output (252 installs!)
+
+Security Stack (5):
+ [✅] API Security Hardening
+ [✅] OWASP Security Check
+ [✅] LLM Security
+ [✅] API Rate Limiting
+ [✅] Python Error Handling
+
+Quality & Testing Stack (3):
+ [✅] Python Testing Patterns (3.7K installs!)
+ [✅] Code Review Excellence
+ [✅] GitHub Actions Templates (2.8K installs!)
+
+Infrastructure Stack (4):
+ [✅] FastAPI Templates
+ [✅] Python Design Patterns
+ [✅] Python Observability
+ [✅] Memory Management
+
+Documentation & Collaboration (2):
+ [✅] API Docs Generator
+ [✅] GitHub PR Review Workflow
+
+════════════════════════════════════════════════════════════════════════════════
+
+YOUR CRITICAL ISSUES → SKILLS MAPPING
+════════════════════════════════════════════════════════════════════════════════
+
+Issue: biomarker_flags & safety_alerts not in workflow output
+Skills: #2 (Workflow Orchestration) + #3 (Multi-Agent) + #16 (Structured Output)
+Action: Refactor state.py, ensure all agents return required fields
+Timeline: Week 1
+
+Issue: Schema mismatch between workflow and API formatter
+Skills: #16 (Structured Output) + #4 (Agentic Development) + #25 (FastAPI)
+Action: Unify response format, use Pydantic validation
+Timeline: Week 2
+
+Issue: Forced confidence & default disease (dangerous!)
+Skills: #13 (Prompt Engineer) + #14 (LLM Evaluation) + #22 (Testing)
+Action: Remove forced minimums, add confidence range handling
+Timeline: Week 2
+
+Issue: Different biomarker naming (API vs CLI)
+Skills: #16 (Structured Output) + #9 (Chunking) + #22 (Testing)
+Action: Centralize normalization, parametrize tests
+Timeline: Week 3
+
+Issue: JSON parsing fragility from LLMs
+Skills: #16 (Structured Output) + #5 (Function Calling) + #14 (Evaluation)
+Action: Use structured outputs/function calling, add repair step
+Timeline: Week 3
+
+Issue: No citation enforcement in RAG
+Skills: #11 (RAG Implementation) + #12 (Knowledge Graphs) + #8 (Hybrid Search)
+Action: Track sources per claim, fail without citations
+Timeline: Week 4
+
+════════════════════════════════════════════════════════════════════════════════
+
+TOP 5 PRIORITY SKILLS TO START WITH NOW
+════════════════════════════════════════════════════════════════════════════════
+
+1️⃣ OWASP Security Check (#18)
+ └─ Why: Medical data protection is non-negotiable
+ └─ Time: 1-2 hours for initial scan
+ └─ First action: Run the security audit today
+
+2️⃣ Workflow Orchestration Patterns (#2)
+ └─ Why: Fixes your critical state propagation issue
+ └─ Time: 3-5 hours to refactor GuildState
+ └─ First action: Read the skill, identify missing state fields
+
+3️⃣ AI Wrapper/Structured Output (#16)
+ └─ Why: Solves schema mismatch, enables reliable parsing
+ └─ Time: 2-3 hours to implement
+ └─ First action: Define unified response schema with Pydantic
+
+4️⃣ Python Testing Patterns (#22)
+ └─ Why: Go from 83 to 150+ tests, improve confidence
+ └─ Time: 1-2 weeks (ongoing)
+ └─ First action: Create parametrized biomarker test suite
+
+5️⃣ Senior Prompt Engineer (#13)
+ └─ Why: Improve LLM accuracy for medical domain
+ └─ Time: 1-2 hours for initial optimization
+ └─ First action: Audit current agent prompts, identify improvements
+
+════════════════════════════════════════════════════════════════════════════════
+
+POPULAR SKILLS (BY INSTALL COUNT)
+════════════════════════════════════════════════════════════════════════════════
+
+320+ installs: Senior Prompt Engineer (#13) ⭐⭐⭐
+252 installs: AI Wrapper/Structured Output (#16) ⭐⭐⭐
+2.8K installs: GitHub Actions Templates (#24) ⭐⭐⭐
+3.7K installs: Python Testing Patterns (#22) ⭐⭐⭐
+2.3K installs: LangChain Architecture (#1) ⭐⭐⭐
+2K installs: Workflow Orchestration (#2) ⭐⭐⭐
+1.7K installs: Hybrid Search (#8) ⭐⭐⭐
+
+These are proven implementations - very likely to help!
+
+════════════════════════════════════════════════════════════════════════════════
+
+AVOID THESE MISTAKES
+════════════════════════════════════════════════════════════════════════════════
+
+❌ Don't skip security (#17, #18, #19, #20)
+ └─ Medical data requires HIPAA compliance
+
+❌ Don't ignore state management (#2, #3)
+ └─ Your parallel agents have race conditions
+
+❌ Don't use unstructured LLM output (#16)
+ └─ JSON parsing will break in production
+
+❌ Don't have <90% test coverage (#22) for medical app
+ └─ Errors have real consequences for patients
+
+❌ Don't force disease predictions when uncertain
+ └─ Better to say "inconclusive" than wrong diagnosis
+
+❌ Don't retrieve without citations (#11)
+ └─ Hallucinations + medical = liability
+
+════════════════════════════════════════════════════════════════════════════════
+
+RECOMMENDED READING ORDER
+════════════════════════════════════════════════════════════════════════════════
+
+Today (30 min):
+ 1. OWASP Security Check - run the scan
+
+This week (2-3 hours):
+ 2. Workflow Orchestration Patterns - understand LangGraph
+ 3. AI Wrapper/Structured Output - unify your response format
+
+Next week (4-6 hours):
+ 4. Hybrid Search Implementation - improve medical retrieval
+ 5. Python Testing Patterns - expand test suite
+
+Then ongoing:
+ 6. Senior Prompt Engineer - iteratively improve prompts
+ 7. LLM Evaluation - benchmark your improvements
+ 8. All others as you progress through 4-month roadmap
+
+════════════════════════════════════════════════════════════════════════════════
+
+ESTIMATED EFFORT & IMPACT
+════════════════════════════════════════════════════════════════════════════════
+
+Skill Effort Impact Priority
+─────────────────────────────────────────────────────────────
+OWASP Security Check 2-3h Critical 🔴
+Workflow Orchestration 5-8h Critical 🔴
+AI Wrapper/Output 3-5h Critical 🔴
+Hybrid Search 4-6h High 🟠
+Python Testing Patterns 20-30h High 🟠
+Senior Prompt Engineer 2-3h High 🟠
+LLM Evaluation 3-4h High 🟠
+API Security Hardening 4-6h High 🟠
+LangChain Architecture 5-8h Medium 🟡
+Cost-Aware Pipeline 3-4h Medium 🟡
+Knowledge Graph Builder 6-8h Medium 🟡
+Chunking Strategy 2-3h Medium 🟡
+Embedding Pipeline 3-4h Medium 🟡
+FastAPI Templates 2-3h Medium 🟡
+Python Observability 3-4h Medium 🟡
+Others... 1-3h Low ⚪
+
+════════════════════════════════════════════════════════════════════════════════
+
+QUICK COMMAND REFERENCE
+════════════════════════════════════════════════════════════════════════════════
+
+# View a skill documentation
+cat ~/.agents/skills/api-security-hardening/SKILL.md
+
+# List all 30 skills
+ls ~/.agents/skills/
+
+# Check skill details
+head -20 ~/.agents/skills/owasp-security-check/SKILL.md
+
+# Get skills help
+npx skills --help
+
+════════════════════════════════════════════════════════════════════════════════
+
+FINAL NOTES
+════════════════════════════════════════════════════════════════════════════════
+
+🚀 You now have 30 world-class AI/RAG development skills
+💪 Your next 4 months of work is mapped out in COMPREHENSIVE_SKILLS_GUIDE.md
+🔒 Medical-grade security pathways defined
+🧪 Enterprise testing frameworks ready
+📊 Industry-standard patterns available
+
+Your RagBot will transform from "working production system" to
+"industry-leading medical AI" through systematic skill application.
+
+Start TODAY with OWASP Security Scan. Get momentum. Build iteratively.
+
+════════════════════════════════════════════════════════════════════════════════
+Master these skills. Master the medical AI space. 🏆
+════════════════════════════════════════════════════════════════════════════════
diff --git a/docs/archive/SKILLS_SUMMARY.txt b/docs/archive/SKILLS_SUMMARY.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0d4eefa9997a6b6c616920f0dfb022d9507bb01b
--- /dev/null
+++ b/docs/archive/SKILLS_SUMMARY.txt
@@ -0,0 +1,178 @@
+╔══════════════════════════════════════════════════════════════════════════════╗
+║ ✅ RAGBOT SKILLS INSTALLATION COMPLETE ║
+║ 13 Strategic Skills to Level Up Your Project ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+
+TIER 1: CRITICAL SECURITY & TESTING (Install First)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+✅ 1. Python Testing Patterns (3.7K installs)
+ → Expand test suite from 83 to 150+ tests
+ → Mock LLM calls for faster CI/CD
+ → Target 90%+ code coverage
+ Location: ~/.agents/skills/python-testing-patterns/
+
+✅ 2. API Security Hardening (144 installs)
+ → Add JWT/API key authentication
+ → Input validation & sanitization
+ → Security headers (CSP, HSTS, X-Frame-Options)
+ Location: ~/.agents/skills/api-security-hardening/
+
+✅ 3. OWASP Security Check (148 installs)
+ → Scan for OWASP Top 10 vulnerabilities
+ → Dependency security checks (CVE detection)
+ → Data protection & access control audit
+ Location: ~/.agents/skills/owasp-security-check/
+
+✅ 4. API Rate Limiting (92 installs)
+ → Protect critical endpoints from abuse
+ → Implement tiered rate limits
+ → Token bucket algorithm
+ Location: ~/.agents/skills/api-rate-limiting/
+
+TIER 2: INFRASTRUCTURE & CODE QUALITY
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+✅ 5. GitHub Actions Templates (2.8K installs)
+ → Auto-run pytest on every PR
+ → Build & push Docker images
+ → Security scanning & dependency checks
+ Location: ~/.agents/skills/github-actions-templates/
+
+✅ 6. Code Review Excellence
+ → Review checklists & standards
+ → Common code smell detection
+ → Security review guidelines
+ Location: ~/.agents/skills/code-review-excellence/
+
+✅ 7. Python Error Handling
+ → Custom exception hierarchy
+ → Graceful degradation patterns
+ → Better error context
+ Location: ~/.agents/skills/python-error-handling/
+
+TIER 3: ARCHITECTURE & CODE ORGANIZATION
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+✅ 8. Python Design Patterns
+ → Singleton, Factory, Strategy patterns
+ → Dependency injection
+ → Code maintainability
+ Location: ~/.agents/skills/python-design-patterns/
+
+✅ 9. FastAPI Templates
+ → FastAPI best practices
+ → Middleware patterns
+ → Exception handling
+ Location: ~/.agents/skills/fastapi-templates/
+
+✅ 10. Python Observability
+ → Structured logging (JSON)
+ → Metrics collection
+ → Distributed tracing
+ Location: ~/.agents/skills/python-observability/
+
+TIER 4: DOCUMENTATION & SPECIALIZED FEATURES
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+✅ 11. API Docs Generator (44 installs)
+ → Auto-generate OpenAPI specs
+ → Swagger UI & ReDoc documentation
+ → Client SDK generation
+ Location: ~/.agents/skills/api-docs-generator/
+
+✅ 12. GitHub PR Review Workflow (31 installs)
+ → PR templates & standards
+ → Code ownership (CODEOWNERS)
+ → Branch protection rules
+ Location: ~/.agents/skills/github-pr-review-workflow/
+
+✅ 13. RAG Implementation
+ → Citation enforcement
+ → Retrieval quality scoring
+ → Chunk optimization
+ Location: ~/.agents/skills/rag-implementation/
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+CRITICAL ISSUES TO ADDRESS (From Deep Code Review)
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+🔴 CRITICAL:
+ 1. biomarker_flags & safety_alerts not propagating through workflow
+ 2. Schema mismatch between workflow output & API formatter
+ 3. Prediction confidence forced to 0.5 (dangerous in medical domain)
+
+🟡 HIGH PRIORITY:
+ 4. Different biomarker naming in API vs CLI
+ 5. JSON parsing fragility in LLM extraction
+ 6. Missing citation enforcement in RAG outputs
+
+ACTION PLAN
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+WEEK 1: SECURITY & CRITICAL FIXES 🔒
+├─ Run OWASP security check on entire codebase
+├─ Add API key authentication to sensitive endpoints
+├─ Implement API rate limiting
+├─ Fix state propagation in workflow
+└─ Remove forced confidence & default disease
+
+WEEK 2: CODE QUALITY 🧪
+├─ Expand test suite to 150+ tests using testing patterns
+├─ Implement parametrized tests for biomarker combinations
+├─ Mock LLM calls for faster CI/CD
+├─ Add error handling with custom exceptions
+└─ Measure & improve coverage to 90%+
+
+WEEK 3: INFRASTRUCTURE 🚀
+├─ Set up GitHub Actions CI/CD workflows
+├─ Create CODEOWNERS and PR review standards
+├─ Configure branch protection rules
+├─ Generate & enhance API documentation
+└─ Set up automated dependency scanning
+
+WEEK 4: OBSERVABILITY & OPTIMIZATION 📊
+├─ Add structured logging with observability skill
+├─ Implement metrics collection (LLM latency, success rates)
+├─ Optimize RAG retrieval with citation enforcement
+├─ Apply design patterns for code organization
+└─ Fine-tune FastAPI configuration
+
+SKILLS USAGE QUICK REFERENCE
+━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+List all skills:
+ $ npx skills list
+
+Check for updates:
+ $ npx skills check
+
+Update all skills:
+ $ npx skills update
+
+View specific skill:
+ Open ~/.agents/skills/[skill-name]/
+
+SKILL RECOMMENDATIONS BY USE CASE
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+For increasing test coverage:
+ 👉 Python Testing Patterns
+
+For hospital/HIPAA compliance:
+ 👉 API Security Hardening + OWASP Security Check
+
+For improving reliability:
+ 👉 Python Error Handling + Python Observability + RAG Implementation
+
+For code maintainability:
+ 👉 Python Design Patterns + Code Review Excellence
+
+For deployment automation:
+ 👉 GitHub Actions Templates + GitHub PR Review Workflow
+
+For medical AI quality:
+ 👉 RAG Implementation + API Docs Generator + Python Testing Patterns
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+
+DOCUMENTATION:
+ See: SKILLS_INSTALLED_UPDATED.md (detailed guide with code examples)
+
+Next Step: Deep dive into Phase 1 (Security) - see SKILLS_INSTALLED_UPDATED.md
diff --git a/docs/archive/SKILLS_VERIFICATION_REPORT.md b/docs/archive/SKILLS_VERIFICATION_REPORT.md
new file mode 100644
index 0000000000000000000000000000000000000000..701b64467fc71888ce2b03da9b9cf9ba2cadd97f
--- /dev/null
+++ b/docs/archive/SKILLS_VERIFICATION_REPORT.md
@@ -0,0 +1,242 @@
+╔══════════════════════════════════════════════════════════════════════════════╗
+║ ✅ VERIFICATION REPORT: ALL 34 SKILLS INSTALLED & ACTIVE ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+
+INSTALLATION VERIFICATION STATUS
+════════════════════════════════════════════════════════════════════════════════
+Installation Date: February 18, 2026
+Total Skills Installed: 34
+Installation Location: ~/.agents/skills/
+Status: ✅ ALL VERIFIED AND OPERATIONAL
+
+COMPLETE INVENTORY OF INSTALLED SKILLS
+════════════════════════════════════════════════════════════════════════════════
+
+✅ AGENT & ORCHESTRATION (7 skills)
+───────────────────────────────────
+ 1. ✅ langchain-architecture (1 files)
+ 2. ✅ workflow-orchestration-patterns (1 files)
+ 3. ✅ multi-agent-orchestration (6 files)
+ 4. ✅ agentic-development (1 files)
+ 5. ✅ langchain4j-tool-function-calling-patterns (4 files)
+ 6. ✅ llm-application-dev-langchain-agent (1 files)
+ 7. ✅ rag-agent-builder (7 files)
+
+✅ RETRIEVAL & SEARCH (5 skills)
+────────────────────────────────
+ 8. ✅ hybrid-search-implementation (1 files)
+ 9. ✅ chunking-strategy (9 files)
+10. ✅ embedding-pipeline-builder (1 files)
+11. ✅ rag-implementation (1 files)
+12. ✅ knowledge-graph-builder (2 files)
+
+✅ LLM OPTIMIZATION (4 skills)
+──────────────────────────────
+13. ✅ senior-prompt-engineer (7 files)
+14. ✅ llm-evaluation (1 files)
+15. ✅ cost-aware-llm-pipeline (1 files)
+16. ✅ ai-wrapper-product (1 files)
+
+✅ SECURITY & SAFETY (5 skills)
+───────────────────────────────
+17. ✅ api-security-hardening (1 files)
+18. ✅ owasp-security-check (21 files) ← Comprehensive!
+19. ✅ llm-security (12 files) ← Rich content!
+20. ✅ api-rate-limiting (1 files)
+21. ✅ python-error-handling (1 files)
+
+✅ TESTING & QUALITY (3 skills)
+───────────────────────────────
+22. ✅ python-testing-patterns (1 files)
+23. ✅ code-review-excellence (1 files)
+24. ✅ github-actions-templates (1 files)
+
+✅ INFRASTRUCTURE (4 skills)
+────────────────────────────
+25. ✅ fastapi-templates (1 files)
+26. ✅ python-design-patterns (1 files)
+27. ✅ python-observability (1 files)
+28. ✅ memory-management (1 files)
+
+✅ DOCUMENTATION & COLLAB (2 skills)
+─────────────────────────────────────
+29. ✅ api-docs-generator (1 files)
+30. ✅ github-pr-review-workflow (1 files)
+
+✅ BONUS SKILLS (4 additional)
+──────────────────────────────
+31. ✅ ci-cd-best-practices (1 files)
+32. ✅ frontend-accessibility-best-practices (8 files) ← Rich content!
+33. ✅ webhook-receiver-hardener (1 files)
+34. ✅ find-skills (1 files) ← Original skill finder
+
+════════════════════════════════════════════════════════════════════════════════
+
+HOW TO ACCESS YOUR INSTALLED SKILLS
+════════════════════════════════════════════════════════════════════════════════
+
+View any skill's documentation:
+
+ cat ~/.agents/skills/[skill-name]/SKILL.md
+
+Examples:
+
+ cat ~/.agents/skills/owasp-security-check/SKILL.md
+ cat ~/.agents/skills/api-security-hardening/SKILL.md
+ cat ~/.agents/skills/langchain-architecture/SKILL.md
+ cat ~/.agents/skills/senior-prompt-engineer/SKILL.md
+ cat ~/.agents/skills/python-testing-patterns/SKILL.md
+
+List all installed skills:
+
+ Get-ChildItem $env:USERPROFILE\.agents\skills -Directory | Select-Object Name | Sort-Object Name
+
+════════════════════════════════════════════════════════════════════════════════
+
+SKILLS WITH RICH CONTENT (Multiple files)
+════════════════════════════════════════════════════════════════════════════════
+
+These skills have comprehensive implementations, templates, and examples:
+
+🌟 owasp-security-check (21 files)
+ → Complete security audit framework with examples
+
+🌟 llm-security (12 files)
+ → LLM-specific vulnerability scanning toolkit
+
+🌟 frontend-accessibility-best-practices (8 files)
+ → Complete a11y implementation guide
+
+🌟 chunking-strategy (9 files)
+ → Document splitting templates & examples
+
+🌟 senior-prompt-engineer (7 files)
+ → Prompt engineering patterns & templates
+
+🌟 rag-agent-builder (7 files)
+ → Full RAG agent implementation framework
+
+🌟 multi-agent-orchestration (6 files)
+ → Multi-agent coordination patterns
+
+🌟 langchain4j-tool-function-calling-patterns (4 files)
+ → Tool/function calling patterns
+
+════════════════════════════════════════════════════════════════════════════════
+
+QUICK START: ACCESS YOUR FIRST SKILL TODAY
+════════════════════════════════════════════════════════════════════════════════
+
+Priority 1 - Start with security:
+ cat ~/.agents/skills/owasp-security-check/SKILL.md
+
+Priority 2 - Fix state management:
+ cat ~/.agents/skills/workflow-orchestration-patterns/SKILL.md
+
+Priority 3 - Unify schemas:
+ cat ~/.agents/skills/ai-wrapper-product/SKILL.md
+
+Priority 4 - Optimize prompts:
+ cat ~/.agents/skills/senior-prompt-engineer/SKILL.md
+
+Priority 5 - Expand tests:
+ cat ~/.agents/skills/python-testing-patterns/SKILL.md
+
+════════════════════════════════════════════════════════════════════════════════
+
+WHAT YOU CAN DO WITH THESE 34 SKILLS
+════════════════════════════════════════════════════════════════════════════════
+
+✅ Build enterprise-grade LangChain agents
+✅ Orchestrate 6+ parallel agents deterministically
+✅ Implement hybrid semantic + keyword search
+✅ Optimize document chunking for medical PDFs
+✅ Generate embeddings with production patterns
+✅ Enforce citations in RAG systems
+✅ Extract knowledge graphs from documents
+✅ Optimize LLM prompts for accuracy
+✅ Evaluate LLM quality metrics
+✅ Route queries by cost (Groq free tier)
+✅ Secure APIs with authentication & encryption
+✅ Scan for OWASP Top 10 vulnerabilities
+✅ Prevent LLM-specific attacks
+✅ Rate-limit endpoints
+✅ Expand test coverage from 70% to 90%+
+✅ Implement GitHub Actions CI/CD
+✅ Auto-generate API documentation
+✅ Establish code review standards
+✅ Optimize context windows
+✅ Monitor with observability patterns
+
+════════════════════════════════════════════════════════════════════════════════
+
+FILE STRUCTURE PROOF
+════════════════════════════════════════════════════════════════════════════════
+
+Each installed skill contains:
+
+Minimal skills (1 file):
+ └─ SKILL.md (documentation + patterns)
+
+Rich skills (4-21 files):
+ ├─ SKILL.md (documentation)
+ ├─ implementation files
+ ├─ examples/
+ ├─ templates/
+ └─ configuration files
+
+Location: ~/.agents/skills/[skill-name]/
+
+════════════════════════════════════════════════════════════════════════════════
+
+VERIFICATION COMMANDS
+════════════════════════════════════════════════════════════════════════════════
+
+To manually verify any skill:
+
+# Check if skill directory exists
+Test-Path "$env:USERPROFILE\.agents\skills\owasp-security-check" -PathType Container
+
+# List files in any skill
+Get-ChildItem "$env:USERPROFILE\.agents\skills\owasp-security-check" -Recurse
+
+# View SKILL.md from any skill
+Get-Content "$env:USERPROFILE\.agents\skills\owasp-security-check\SKILL.md" | Select-Object -First 50
+
+# Count total installed skills
+(Get-ChildItem "$env:USERPROFILE\.agents\skills" -Directory | Measure-Object).Count
+
+════════════════════════════════════════════════════════════════════════════════
+
+IMPORTANT: YOUR SKILLS ARE READY
+════════════════════════════════════════════════════════════════════════════════
+
+All 34 skills have been successfully installed globally. They are:
+
+✅ Accessible anywhere on your system
+✅ Ready to use immediately
+✅ Verified with actual content files
+✅ Properly indexed in ~/.agents/skills/
+
+There is NO need to reinstall.
+There is NO issue with the installation.
+
+YOU ARE READY TO START IMPLEMENTING.
+
+════════════════════════════════════════════════════════════════════════════════
+
+NEXT STEPS
+════════════════════════════════════════════════════════════════════════════════
+
+1. READ: COMPREHENSIVE_SKILLS_GUIDE.md (how to implement)
+2. READ: SKILLS_QUICK_REFERENCE.md (which skill for what)
+3. START: Read the first 3 skills documentation files
+4. BEGIN: Implement the 4-month improvement plan
+
+Your RagBot now has access to 34 world-class development skills.
+Integration: Look in ~/.agents/skills/ for any implementation.
+Support: Each skill has complete SKILL.md documentation.
+
+════════════════════════════════════════════════════════════════════════════════
+✅ ALL SYSTEMS OPERATIONAL - BEGIN IMPLEMENTATION
+════════════════════════════════════════════════════════════════════════════════
diff --git a/docs/archive/SKILL_TO_CODE_MAPPING.md b/docs/archive/SKILL_TO_CODE_MAPPING.md
new file mode 100644
index 0000000000000000000000000000000000000000..acf76dc3d4793a9a34753fc9ec376e25286d30b2
--- /dev/null
+++ b/docs/archive/SKILL_TO_CODE_MAPPING.md
@@ -0,0 +1,690 @@
+╔════════════════════════════════════════════════════════════════════════════╗
+║ 📚 SKILL-TO-CODE MAPPING: Where Each Skill Applies in RagBot ║
+║ Reference guide showing skill application locations ║
+╚════════════════════════════════════════════════════════════════════════════╝
+
+This document maps each of the 34 skills to specific code files and critical
+issues they resolve. Use this for quick lookup: "Where do I apply Skill #X?"
+
+════════════════════════════════════════════════════════════════════════════════
+
+CRITICAL ISSUES MAPPING TO SKILLS
+════════════════════════════════════════════════════════════════════════════════
+
+ISSUE #1: biomarker_flags & safety_alerts not propagating through workflow
+──────────────────────────────────────────────────────────────────────────────
+Problem Location: src/state.py, src/agents/*.py, src/workflow.py
+Affected Code:
+ ├─ GuildState (missing fields)
+ ├─ BiomarkerAnalyzerAgent.invoke() (only returns biomarkers)
+ ├─ ResponseSynthesizerAgent.invoke() (fields missing in input)
+ └─ Workflow edges (state not fully passed)
+
+Primary Skills:
+ ✓ #2 Workflow Orchestration Patterns → Fix state passing
+ ✓ #3 Multi-Agent Orchestration → Ensure deterministic flow
+ ✓ #16 Structured Output → Enforce complete schema
+
+Secondary Skills:
+ • #22 Testing Patterns → Write tests for state flow
+ • #27 Observability → Log state changes
+
+Action: Read src/state.py → identify missing fields → update all agents to
+ return complete state → test end-to-end
+
+
+ISSUE #2: Schema mismatch between workflow output & API formatter
+──────────────────────────────────────────────────────────────────────────────
+Problem Location: src/workflow.py, api/app/models/ (missing or inconsistent)
+Affected Code:
+ ├─ ResponseSynthesizerAgent output structure (varies)
+ ├─ api/app/services/ragbot.py format_response() (expects different keys)
+ ├─ CLI scripts/chat.py (different field names)
+ └─ Tests referencing old schema
+
+Primary Skills:
+ ✓ #16 AI Wrapper/Structured Output → Create unified Pydantic model
+ ✓ #22 Testing Patterns → Write schema validation tests
+
+Secondary Skills:
+ • #27 Observability → Log schema mismatches (debugging)
+
+Action: Create api/app/models/response.py with BaseAnalysisResponse →
+ update all agents to return it → validate in API
+
+
+ISSUE #3: Prediction confidence forced to 0.5 (dangerous for medical)
+──────────────────────────────────────────────────────────────────────────────
+Problem Location: src/agents/confidence_assessor.py, api/app/routes/analyze.py
+Affected Code:
+ ├─ ConfidenceAssessorAgent.invoke() (ignores actual assessment)
+ ├─ Default response in analyze endpoint (hardcoded 0.5)
+ └─ CLI logic (no failure path for low confidence)
+
+Primary Skills:
+ ✓ #13 Senior Prompt Engineer → Better reasoning in assessor
+ ✓ #14 LLM Evaluation → Benchmark accuracy
+
+Secondary Skills:
+ • #4 Agentic Development → Decision logic improvements
+ • #22 Testing Patterns → Test confidence boundaries
+ • #27 Observability → Track confidence distributions
+
+Action: Update confidence_assessor.py to use actual evidence → test with
+ multiple biomarker scenarios → Add high/medium/low confidence paths
+
+
+ISSUE #4: Biomarker naming inconsistency (API vs CLI)
+──────────────────────────────────────────────────────────────────────────────
+Problem Location: config/biomarker_references.json, src/agents/*, api/*
+Affected Code:
+ ├─ config/biomarker_references.json (canonical list)
+ ├─ BiomarkerAnalyzerAgent (validation against reference)
+ ├─ CLI scripts/chat.py (different naming)
+ └─ API endpoints (naming transformation)
+
+Primary Skills:
+ ✓ #9 Chunking Strategy → Include standard names in embedding
+ ✓ #16 Structured Output → Enforce standard field names
+
+Secondary Skills:
+ • #10 Embedding Pipeline → Index with canonical names
+ • #22 Testing Patterns → Test name transformation
+ • #27 Observability → Log name mismatches
+
+Action: Create biomarker_normalizer() → apply in all code paths → add
+ mapping tests
+
+
+ISSUE #5: JSON parsing breaks on malformed LLM output
+──────────────────────────────────────────────────────────────────────────────
+Problem Location: api/app/services/extraction.py, src/agents/extraction code
+Affected Code:
+ ├─ LLM.predict() returns text
+ ├─ json.loads() has no error handling
+ ├─ Invalid JSON crashes endpoint
+ └─ No fallback strategy
+
+Primary Skills:
+ ✓ #5 Tool/Function Calling → Use function calling instead
+ ✓ #21 Python Error Handling → Graceful degradation
+
+Secondary Skills:
+ • #16 Structured Output → Pydantic validation
+ • #19 LLM Security → Prevent injection in JSON
+ • #27 Observability → Log parsing failures
+ • #14 LLM Evaluation → Track failure rate
+
+Action: Replace json.loads() with Pydantic validator → implement retry logic
+ → add function calling as fallback
+
+
+ISSUE #6: No citation enforcement in RAG outputs
+──────────────────────────────────────────────────────────────────────────────
+Problem Location: src/agents/disease_explainer.py, response synthesis
+Affected Code:
+ ├─ retriever.retrieve() returns docs but citations dropped
+ ├─ DiseaseExplainerAgent doesn't track sources
+ ├─ ResponseSynthesizerAgent loses citation info
+ └─ API response has no source attribution
+
+Primary Skills:
+ ✓ #11 RAG Implementation → Enforce citations in loop
+ ✓ #8 Hybrid Search → Better relevance = better cites
+ ✓ #12 Knowledge Graph → Link to authoritative sources
+
+Secondary Skills:
+ • #1 LangChain Architecture → Tool for citation tracking
+ • #7 RAG Agent Builder → Full RAG best practices
+ • #14 LLM Evaluation → Test for hallucinations
+ • #27 Observability → Track citation frequency
+
+Action: Modify disease_explainer.py to preserve doc metadata → add citation
+ validation → return sources in API response
+
+════════════════════════════════════════════════════════════════════════════════
+
+SKILL-BY-SKILL APPLICATION GUIDE
+════════════════════════════════════════════════════════════════════════════════
+
+#1 LangChain Architecture
+ Phase: 3, Week 7
+ Apply To: src/agents/, src/services/
+ Key Files:
+ └─ src/agents/base_agent.py (NEW) - Create BaseAgent with LangChain patterns
+ └─ src/agents/*/invoke() - Add callbacks, chains, tools
+ └─ src/services/*.py - RunnableWithMessageHistory for conversation
+ Integration: Advanced chain composition, callbacks for metrics
+ Outcome: More sophisticated agent orchestration
+ Effort: 3-4 hours
+
+#2 Workflow Orchestration Patterns
+ Phase: 1, Week 1 / Phase 4, Week 12 (final review)
+ Apply To: src/workflow.py, src/state.py
+ Key Files:
+ └─ src/state.py - REFACTOR GuildState with all fields
+ └─ src/workflow.py - REFACTOR state passing between agents
+ └─ src/agents/biomarker_analyzer.py - Return complete state
+ └─ src/agents/disease_explainer.py - Preserve incoming state
+ Integration: Fix Issue #1 (state propagation)
+ Outcome: biomarker_flags & safety_alerts flow through entire workflow
+ Effort: 4-6 hours (Week 1) + 2 hours (Week 12 refine)
+
+#3 Multi-Agent Orchestration
+ Phase: 1, Week 2
+ Apply To: src/workflow.py
+ Key Files:
+ └─ src/workflow.py - Ensure deterministic agent order
+ └─ Parallel execution order documentation
+ Integration: Ensure agents execute in correct order with proper state passing
+ Outcome: Deterministic workflow execution
+ Effort: 3-4 hours
+
+#4 Agentic Development
+ Phase: 2, Week 3
+ Apply To: src/agents/biomarker_analyzer.py, confidence_assessor.py
+ Key Files:
+ └─ BiomarkerAnalyzerAgent.invoke() - Add confidence thresholds
+ └─ ConfidenceAssessorAgent - Better decision logic
+ └─ Add reasoning trace to responses
+ Integration: Better medical decisions, alternatives for low confidence
+ Outcome: More reliable biomarker analysis
+ Effort: 3-4 hours
+
+#5 Tool/Function Calling Patterns
+ Phase: 2, Week 4
+ Apply To: api/app/services/extraction.py, src/agents/extraction.py
+ Key Files:
+ └─ api/app/services/extraction.py - Define extraction tools/functions
+ └─ src/agents/ - Use function returns instead of JSON parsing
+ Integration: Fix Issue #5 (JSON parsing fragility)
+ Outcome: Structured LLM outputs guaranteed valid
+ Effort: 3-4 hours
+
+#6 LLM Application Dev with LangChain
+ Phase: 4, Week 11
+ Apply To: src/agents/ (production patterns)
+ Key Files:
+ └─ src/agents/base_agent.py - Implement lifecycle (setup, execute, cleanup)
+ └─ All agents - Add retry logic, graceful degradation
+ └─ Agent composition patterns - Chain agents
+ Integration: Production-ready agent code
+ Outcome: Robust, maintainable agents with error recovery
+ Effort: 4-5 hours
+
+#7 RAG Agent Builder
+ Phase: 4, Week 12
+ Apply To: src/agents/ (full review)
+ Key Files:
+ └─ src/agents/disease_explainer.py - RAG pattern review
+ └─ Ensure all responses cite sources
+ └─ Verify accuracy benchmarks
+ Integration: Full RAG agent validation before production
+ Outcome: Production-ready RAG agents
+ Effort: 4-5 hours
+
+#8 Hybrid Search Implementation
+ Phase: 3, Week 6
+ Apply To: src/retrievers/ (NEW)
+ Key Files:
+ └─ src/retrievers/hybrid_retriever.py (NEW) - Combine BM25 + FAISS
+ └─ src/agents/disease_explainer.py - Use hybrid retriever
+ Integration: Better document retrieval (semantic + keyword)
+ Outcome: +15% recall on rare disease queries
+ Effort: 4-6 hours
+
+#9 Chunking Strategy
+ Phase: 3, Week 6
+ Apply To: src/chunking_strategy.py (NEW), src/pdf_processor.py
+ Key Files:
+ └─ src/chunking_strategy.py (NEW) - Split by medical sections
+ └─ scripts/setup_embeddings.py - Use new chunking
+ └─ Re-chunk and re-embed medical_knowledge.faiss
+ Integration: Fix Issue #4 (naming), improve context window usage
+ Outcome: Better semantic chunks, improved retrieval quality
+ Effort: 4-5 hours
+
+#10 Embedding Pipeline Builder
+ Phase: 3, Week 6
+ Apply To: src/llm_config.py, scripts/setup_embeddings.py
+ Key Files:
+ └─ src/llm_config.py - Consider medical embedding models
+ └─ scripts/setup_embeddings.py - Use new embeddings
+ └─ Benchmark embedding quality
+ Integration: Better semantic search for medical terminology
+ Outcome: Improved document relevance ranking
+ Effort: 3-4 hours
+
+#11 RAG Implementation
+ Phase: 3, Week 6
+ Apply To: src/agents/disease_explainer.py
+ Key Files:
+ └─ src/agents/disease_explainer.py - Track and enforce citations
+ └─ src/models/response.py - Add sources field
+ └─ api/app/routes/analyze.py - Return sources
+ Integration: Fix Issue #6 (no citations), enforce medical accuracy
+ Outcome: All claims backed by sources
+ Effort: 3-4 hours
+
+#12 Knowledge Graph Builder
+ Phase: 3, Week 7
+ Apply To: src/knowledge_graph.py (NEW)
+ Key Files:
+ └─ src/knowledge_graph.py (NEW) - Disease → Biomarker → Treatment graph
+ └─ Extract entities from medical PDFs
+ └─ src/agents/biomarker_analyzer.py - Use knowledge graph
+ └─ Create graph.html visualization
+ Integration: Better disease prediction via relationships
+ Outcome: Knowledge graph with 100+ nodes, 500+ edges
+ Effort: 6-8 hours
+
+#13 Senior Prompt Engineer
+ Phase: 2, Week 3
+ Apply To: src/agents/ (all agent prompts)
+ Key Files:
+ └─ src/agents/biomarker_analyzer.py - Prompt: few-shot extraction
+ └─ src/agents/disease_explainer.py - Prompt: chain-of-thought reasoning
+ └─ src/agents/confidence_assessor.py - Prompt: decision logic
+ └─ src/agents/clinical_guidelines.py - Prompt: evidence-based
+ Integration: Fix Issue #3 (confidence), improve medical reasoning
+ Outcome: +15% accuracy improvement
+ Effort: 5-6 hours
+
+#14 LLM Evaluation
+ Phase: 2, Week 4
+ Apply To: tests/evaluation_metrics.py (NEW)
+ Key Files:
+ └─ tests/evaluation_metrics.py (NEW) - Benchmarking suite
+ └─ tests/fixtures/evaluation_patients.py - Test scenarios
+ └─ Benchmark Groq vs Gemini performance
+ └─ Track before/after improvements
+ Integration: Measure all improvements quantitatively
+ Outcome: Clear metrics showing progress
+ Effort: 4-5 hours
+
+#15 Cost-Aware LLM Pipeline
+ Phase: 3, Week 8
+ Apply To: src/llm_config.py
+ Key Files:
+ └─ src/llm_config.py - Model routing by complexity
+ └─ Implement caching (hash → result)
+ └─ Cost tracking and reporting
+ └─ Target: -40% cost reduction
+ Integration: Optimize API costs without sacrificing accuracy
+ Outcome: Lower operational costs
+ Effort: 4-5 hours
+
+#16 AI Wrapper/Structured Output
+ Phase: 1, Week 1
+ Apply To: api/app/models/ (NEW and REFACTORED)
+ Key Files:
+ └─ api/app/models/response.py (NEW) - Create unified BaseAnalysisResponse
+ └─ api/app/services/ragbot.py - Use unified schema
+ └─ All agents - Match unified output
+ └─ API responses - Validate with Pydantic
+ Integration: Fix Issues #1, #2, #4, #5 (schema consistency)
+ Outcome: Single canonical response format
+ Effort: 3-5 hours
+
+#17 API Security Hardening
+ Phase: 1, Week 1
+ Apply To: api/app/middleware/, api/main.py
+ Key Files:
+ └─ api/app/middleware/auth.py (NEW) - JWT auth
+ └─ api/main.py - Add security middleware chain
+ └─ CORS, headers, rate limiting
+ Integration: Secure REST API endpoints
+ Outcome: API hardened against common attacks
+ Effort: 4-6 hours
+
+#18 OWASP Security Check
+ Phase: 1, Week 1
+ Apply To: docs/ (audit report)
+ Key Files:
+ └─ docs/SECURITY_AUDIT.md (NEW) - Security findings
+ └─ Scan api/ and src/ for vulnerabilities
+ └─ Create tickets for each issue
+ Integration: Establish security baseline
+ Outcome: All vulnerabilities documented and prioritized
+ Effort: 2-3 hours
+
+#19 LLM Security
+ Phase: 1, Week 2
+ Apply To: api/app/middleware/input_validation.py (NEW)
+ Key Files:
+ └─ api/app/middleware/input_validation.py (NEW) - Input sanitization
+ └─ Detect prompt injection attempts
+ └─ Validate biomarker inputs
+ └─ Escape special characters
+ Integration: Fix Issue #5 (JSON safety), prevent prompt injection
+ Outcome: Inputs validated before LLM processing
+ Effort: 3-4 hours
+
+#20 API Rate Limiting
+ Phase: 1, Week 1
+ Apply To: api/app/middleware/rate_limiter.py (NEW)
+ Key Files:
+ └─ api/app/middleware/rate_limiter.py (NEW) - Token bucket limiter
+ └─ api/main.py - Add to middleware chain
+ └─ Tiered limits (free/pro based on API key)
+ Integration: Protect API from abuse
+ Outcome: Rate limiting in place
+ Effort: 2-3 hours
+
+#21 Python Error Handling
+ Phase: 2, Week 2
+ Apply To: src/exceptions.py (NEW), src/agents/
+ Key Files:
+ └─ src/exceptions.py (NEW) - Custom exception hierarchy
+ └─ RagBotException, BiomarkerValidationError, LLMTimeoutError, etc.
+ └─ All agents - Replace generic try-except
+ └─ API - Proper error responses
+ Integration: Graceful error handling throughout system
+ Outcome: No uncaught exceptions, useful error messages
+ Effort: 3-4 hours
+
+#22 Python Testing Patterns
+ Phase: 1, Week 1 + Phase 2, Week 3 (primary), Week 4
+ Apply To: tests/ (throughout project)
+ Key Files:
+ └─ tests/conftest.py - Shared fixtures
+ └─ tests/fixtures/ - auth, biomarkers, patients
+ └─ tests/test_api_auth.py - Auth tests (Week 1)
+ └─ tests/test_parametrized_*.py - 50+ parametrized tests (Week 3)
+ └─ tests/test_response_schema.py - Schema validation (Week 1)
+ └─ 80-90% code coverage
+ Integration: Comprehensive test suite ensures reliability
+ Outcome: 125+ tests, 90%+ coverage
+ Effort: 10-13 hours total
+
+#23 Code Review Excellence
+ Phase: 4, Week 10
+ Apply To: docs/REVIEW_GUIDELINES.md (NEW), all PRs
+ Key Files:
+ └─ docs/REVIEW_GUIDELINES.md (NEW) - Medical code review standards
+ └─ Apply to all Phase 1-3 pull requests
+ └─ Self-review checklist
+ Integration: Maintain code quality
+ Outcome: Clear review guidelines
+ Effort: 2-3 hours
+
+#24 GitHub Actions Templates
+ Phase: 1, Week 2
+ Apply To: .github/workflows/ (NEW)
+ Key Files:
+ └─ .github/workflows/test.yml - Run tests on PR
+ └─ .github/workflows/security.yml - Security checks
+ └─ .github/workflows/docker.yml - Build Docker images
+ Integration: Automated CI/CD pipeline
+ Outcome: Tests run automatically
+ Effort: 2-3 hours
+
+#25 FastAPI Templates
+ Phase: 4, Week 9
+ Apply To: api/app/main.py, api/app/dependencies.py
+ Key Files:
+ └─ api/app/main.py - REFACTOR with best practices
+ └─ Async patterns, dependency injection
+ └─ Connection pooling, caching headers
+ └─ Health check endpoints
+ Integration: Production-grade FastAPI configuration
+ Outcome: Optimized API performance
+ Effort: 3-4 hours
+
+#26 Python Design Patterns
+ Phase: 2, Week 3
+ Apply To: src/agents/base_agent.py (NEW), src/agents/
+ Key Files:
+ └─ src/agents/base_agent.py (NEW) - Extract common pattern
+ └─ Factory pattern for agent creation
+ └─ Composition over inheritance
+ └─ Refactor BiomarkerAnalyzerAgent, etc.
+ Integration: Cleaner, more maintainable code
+ Outcome: Reduced coupling, better abstractions
+ Effort: 4-5 hours
+
+#27 Python Observability
+ Phase: 1, Week 2 (logging) / Phase 4, Week 10 (metrics) / Phase 2, Week 5
+ Apply To: src/, api/app/
+ Key Files:
+ └─ src/observability.py (NEW) - Logging infrastructure (Week 2)
+ └─ All agents - Add structured JSON logging
+ └─ src/monitoring/ (NEW) - Prometheus metrics (Week 10)
+ └─ Track latency, accuracy, costs
+ Integration: Visibility into system behavior
+ Outcome: JSON logs, metrics at /metrics
+ Effort: 12-15 hours total
+
+#28 Memory Management
+ Phase: 3, Week 7
+ Apply To: src/memory_manager.py (NEW)
+ Key Files:
+ └─ src/memory_manager.py (NEW) - Sliding window memory
+ └─ Context compression for conversation history
+ └─ Token usage optimization
+ Integration: Handle long conversations without exceeding limits
+ Outcome: 20-30% token savings
+ Effort: 3-4 hours
+
+#29 API Docs Generator
+ Phase: 4, Week 9
+ Apply To: api/app/routes/ (documentation)
+ Key Files:
+ └─ api/app/routes/*.py - Enhance docstrings
+ └─ Add examples to endpoints
+ └─ Auto-generates /docs (Swagger UI), /redoc
+ Integration: API discoverable by developers
+ Outcome: Interactive API documentation
+ Effort: 2-3 hours
+
+#30 GitHub PR Review Workflow
+ Phase: 4, Week 9
+ Apply To: .github/ (NEW)
+ Key Files:
+ └─ .github/CODEOWNERS - Code ownership rules
+ └─ .github/pull_request_template.md - PR checklist
+ └─ Branch protection rules
+ Integration: Establish code review standards
+ Outcome: Consistent PR quality
+ Effort: 2-3 hours
+
+#31 CI-CD Best Practices
+ Phase: 4, Week 10
+ Apply To: .github/workflows/deploy.yml (NEW)
+ Key Files:
+ └─ .github/workflows/deploy.yml (NEW) - Deployment pipeline
+ └─ Build → Test → Staging → Canary → Production
+ └─ Environment management (.env files)
+ Integration: Automated, safe deployments
+ Outcome: Confident production deployments
+ Effort: 3-4 hours
+
+#32 Frontend Accessibility (OPTIONAL)
+ Phase: 4, Week 10
+ Apply To: examples/web_interface/ (if building web UI)
+ Key Files:
+ └─ examples/web_interface/ - WCAG 2.1 AA compliance
+ Integration: Accessible web interface (if needed)
+ Outcome: Screen-reader friendly, keyboard navigable
+ Effort: 2-3 hours (skip if CLI only)
+
+#33 Webhook Receiver Hardener (OPTIONAL)
+ Phase: 4, Week 11
+ Apply To: api/app/webhooks/ (NEW, if integrations needed)
+ Key Files:
+ └─ api/app/webhooks/ (NEW) - Webhook handlers
+ └─ Signature verification, replay protection
+ Integration: Secure webhook handling for EHR integrations
+ Outcome: Protected webhook endpoints
+ Effort: 2-3 hours (skip if no webhooks)
+
+════════════════════════════════════════════════════════════════════════════════
+
+QUICK LOOKUP: BY FILE
+
+api/app/main.py
+ ├─ #17 API Security Hardening (JWT middleware)
+ ├─ #20 Rate Limiting (rate limiter middleware)
+ ├─ #25 FastAPI Templates (async patterns)
+ ├─ #24 GitHub Actions (workflow) (CI/CD reference)
+ └─ #29 API Docs Generator (docstrings)
+
+api/app/models/response.py (NEW)
+ ├─ #16 AI Wrapper/Structured Output (unified schema)
+ └─ #22 Testing Patterns (Pydantic validation)
+
+api/app/middleware/ (NEW)
+ ├─ auth.py #17 API Security Hardening
+ ├─ input_validation.py #19 LLM Security
+ └─ rate_limiter.py #20 API Rate Limiting
+
+src/state.py
+ ├─ #2 Workflow Orchestration (fix state fields)
+ ├─ #16 Structured Output (enforce schema)
+ └─ #22 Testing Patterns (state tests)
+
+src/workflow.py
+ ├─ #2 Workflow Orchestration (state passing)
+ ├─ #3 Multi-Agent Orchestration (agent order)
+ └─ #27 Observability (logging)
+
+src/agents/base_agent.py (NEW)
+ ├─ #26 Python Design Patterns (factory, composition)
+ ├─ #6 LLM App Dev LangChain (lifecycle)
+ ├─ #21 Error Handling (graceful degradation)
+ └─ #27 Observability (logging)
+
+src/agents/biomarker_analyzer.py
+ ├─ #4 Agentic Development (confidence thresholds)
+ ├─ #13 Senior Prompt Engineer (prompt optimization)
+ ├─ #2 Workflow Orchestration (return complete state)
+ └─ #12 Knowledge Graph (use relationships)
+
+src/agents/disease_explainer.py
+ ├─ #8 Hybrid Search (retriever)
+ ├─ #11 RAG Implementation (enforcement)
+ ├─ #13 Senior Prompt Engineer (chain-of-thought)
+ ├─ #1 LangChain Architecture (advanced patterns)
+ └─ #7 RAG Agent Builder (RAG best practices)
+
+src/agents/confidence_assessor.py
+ ├─ #4 Agentic Development (decision logic)
+ ├─ #13 Senior Prompt Engineer (better reasoning)
+ ├─ #14 LLM Evaluation (benchmark)
+ └─ #22 Testing Patterns (confidence tests)
+
+src/agents/clinical_guidelines.py
+ ├─ #13 Senior Prompt Engineer (evidence-based)
+ └─ #1 LangChain Architecture (advanced retrieval)
+
+src/exceptions.py (NEW)
+ ├─ #21 Python Error Handling (exception hierarchy)
+ └─ #27 Observability (error logging)
+
+src/retrievers/hybrid_retriever.py (NEW)
+ ├─ #8 Hybrid Search Implementation (BM25 + FAISS)
+ ├─ #9 Chunking Strategy (better chunks)
+ ├─ #10 Embedding Pipeline (semantic search)
+ └─ #27 Observability (retrieval metrics)
+
+src/chunking_strategy.py (NEW)
+ ├─ #9 Chunking Strategy (medical section splitting)
+ ├─ #10 Embedding Pipeline (prepare for embedding)
+ └─ #4 Agentic Development (standardization)
+
+src/knowledge_graph.py (NEW)
+ ├─ #12 Knowledge Graph Builder (extract relationships)
+ ├─ #13 Senior Prompt Engineer (entity extraction prompt)
+ └─ #1 LangChain Architecture (graph traversal)
+
+src/memory_manager.py (NEW)
+ ├─ #28 Memory Management (sliding window, compression)
+ └─ #15 Cost-Aware Pipeline (token optimization)
+
+src/llm_config.py
+ ├─ #15 Cost-Aware LLM Pipeline (model routing, caching)
+ ├─ #10 Embedding Pipeline (embedding model config)
+ └─ #27 Observability (cost tracking)
+
+src/observability.py (NEW)
+ ├─ #27 Python Observability (logging, metrics)
+ ├─ #21 Error Handling (error tracking)
+ └─ #14 LLM Evaluation (metric collection)
+
+src/monitoring/ (NEW)
+ └─ #27 Python Observability (metrics, dashboards)
+
+tests/conftest.py
+ └─ #22 Python Testing Patterns (shared fixtures)
+
+tests/fixtures/
+ ├─ auth.py #22 Testing Patterns
+ ├─ biomarkers.py #22 Testing Patterns
+ └─ evaluation_patients.py #14 LLM Evaluation
+
+tests/test_api_auth.py (NEW)
+ ├─ #22 Python Testing Patterns
+ ├─ #17 API Security Hardening
+ └─ #25 FastAPI Templates
+
+tests/test_parametrized_*.py (NEW)
+ └─ #22 Python Testing Patterns
+
+tests/evaluation_metrics.py (NEW)
+ └─ #14 LLM Evaluation
+
+.github/workflows/
+ ├─ test.yml #24 GitHub Actions Templates
+ ├─ security.yml #18 OWASP Check + #24 Actions
+ ├─ docker.yml #24 Actions
+ └─ deploy.yml #31 CI-CD Best Practices
+
+.github/
+ ├─ CODEOWNERS #30 GitHub PR Review Workflow
+ ├─ pull_request_template.md #30 Workflow
+ └─ branch protection rules
+
+docs/
+ ├─ SECURITY_AUDIT.md #18 OWASP Check
+ ├─ REVIEW_GUIDELINES.md #23 Code Review Excellence
+ └─ API.md (updated by #29 API Docs Generator)
+
+════════════════════════════════════════════════════════════════════════════════
+
+SKILL DEPENDENCY GRAPH
+════════════════════════════════════════════════════════════════════════════════
+
+Phase 1 must finish before Phase 2:
+ #18, #17, #22, #2, #16, #20, #3, #19, #21, #27, #24
+ ↓
+Phase 2 requires Phase 1:
+ #22, #26, #4, #13, #14, #5
+ ↓
+Phase 3 requires Phases 1-2:
+ #8, #9, #10, #11, #12, #1, #28, #15
+ ↓
+Phase 4 requires Phases 1-3:
+ #25, #29, #30, #27, #23, #31, #32*, #6, #33*, #7
+
+Within phases, some order dependencies:
+ - #16 should complete before other Phase 1 work finalizes
+ - #13 should complete before #14 evaluation
+ - #8, #9, #10 should coordinate (hybrid search → chunking → embeddings)
+ - #11 depends on #8 (retriever first)
+ - #12 depends on #13 (prompt engineering for entity extraction)
+ - #27 used 3 times (Week 2, Week 5, Week 10)
+ - #22 used 2 times (Week 1, Weeks 3-4)
+
+════════════════════════════════════════════════════════════════════════════════
+
+DAILY WORKFLOW
+════════════════════════════════════════════════════════════════════════════════
+
+1. Open the skill SKILL.md documented in ~/.agents/skills//
+2. Read the relevant section for your task
+3. Apply to specific code files listed above
+4. Write tests immediately (use #22 Testing Patterns)
+5. Commit with clear message: "feat: [Skill #X] [Description]"
+6. Track in IMPLEMENTATION_STATUS_TRACKER.md
+
+════════════════════════════════════════════════════════════════════════════════
diff --git a/api/START_HERE.md b/docs/archive/START_HERE.md
similarity index 100%
rename from api/START_HERE.md
rename to docs/archive/START_HERE.md
diff --git a/docs/archive/WEEK1_EXECUTION_PLAN.md b/docs/archive/WEEK1_EXECUTION_PLAN.md
new file mode 100644
index 0000000000000000000000000000000000000000..ed3cf27d62d8144089efabbee5a83c3f2e74d103
--- /dev/null
+++ b/docs/archive/WEEK1_EXECUTION_PLAN.md
@@ -0,0 +1,195 @@
+╔════════════════════════════════════════════════════════════════════════════╗
+║ 🎯 QUICK START: THIS WEEK'S TASKS (12-Week Plan) ║
+║ Use this for daily execution and progress tracking ║
+╚════════════════════════════════════════════════════════════════════════════╝
+
+PHASE 1 - WEEK 1 ([CURRENT]) - Security + State Propagation
+════════════════════════════════════════════════════════════════════════════════
+
+MONDAY-TUESDAY: OWASP Audit + API Security
+┌──────────────────────────────────────────────────────────────────────────┐
+│ Task 1.1: Run OWASP Security Check (Skill #18) │
+│ Time: 2-3 hours │
+│ Actions: │
+│ □ npx skills unlock owasp-security-check │
+│ □ Read ~/.agents/skills/owasp-security-check/SKILL.md │
+│ □ Run security scan on /api and /src │
+│ □ Document findings in docs/SECURITY_AUDIT.md │
+│ □ Create GitHub issues for each finding │
+│ Deliverable: SECURITY_AUDIT.md with prioritized issues │
+│ Success: Report shows all vulnerabilities categorized │
+└──────────────────────────────────────────────────────────────────────────┘
+
+┌──────────────────────────────────────────────────────────────────────────┐
+│ Task 1.2: Implement JWT Authentication (Skill #17) │
+│ Time: 4-6 hours │
+│ Actions: │
+│ □ npx skills unlock api-security-hardening │
+│ □ Read ~/.agents/skills/api-security-hardening/SKILL.md │
+│ □ Create api/app/middleware/auth.py (JWT generation + validation) │
+│ □ Add @require_auth decorator to api/app/routes/analyze.py │
+│ □ Update api/main.py to include auth middleware │
+│ □ Test: curl -H "Authorization: Bearer " /api/v1/analyze │
+│ Deliverable: JWT auth working on all endpoints │
+│ Success: Unauthorized requests return 401 │
+└──────────────────────────────────────────────────────────────────────────┘
+
+WEDNESDAY-THURSDAY: Test Infrastructure + State Fixing
+┌──────────────────────────────────────────────────────────────────────────┐
+│ Task 1.3: Create Testing Infrastructure (Skill #22) │
+│ Time: 2-3 hours │
+│ Actions: │
+│ □ npx skills unlock python-testing-patterns │
+│ □ Create tests/conftest.py with fixtures │
+│ □ Create tests/fixtures/auth.py (JWT token generator) │
+│ □ Create tests/fixtures/biomarkers.py (test data) │
+│ □ Create tests/test_api_auth.py with 10+ auth tests │
+│ □ Run: pytest tests/test_api_auth.py -v │
+│ Deliverable: Auth tests with 80%+ coverage │
+│ Success: All auth tests passing │
+└──────────────────────────────────────────────────────────────────────────┘
+
+┌──────────────────────────────────────────────────────────────────────────┐
+│ Task 1.4: Fix State Propagation in Workflow (Skill #2) │
+│ Time: 4-6 hours │
+│ Actions: │
+│ □ npx skills unlock workflow-orchestration-patterns │
+│ □ Read ~/.agents/skills/workflow-orchestration-patterns/SKILL.md │
+│ □ Review src/state.py - identify missing fields │
+│ □ Add to GuildState: biomarker_flags, safety_alerts │
+│ □ Update each agent to return complete state: │
+│ - BiomarkerAnalyzerAgent: add flags │
+│ - DiseaseExplainerAgent: preserve incoming flags │
+│ - ConfidenceAssessorAgent: preserve all state │
+│ □ Test: python scripts/test_chat_demo.py │
+│ □ Verify state carries through entire workflow │
+│ Deliverable: State propagates end-to-end │
+│ Success: All fields present in final response │
+└──────────────────────────────────────────────────────────────────────────┘
+
+FRIDAY: Schema Unification + Rate Limiting
+┌──────────────────────────────────────────────────────────────────────────┐
+│ Task 1.5: Unify Response Schema (Skill #16) │
+│ Time: 3-5 hours │
+│ Actions: │
+│ □ npx skills unlock ai-wrapper-product │
+│ □ Create api/app/models/response.py (unified schema) │
+│ □ Define BaseAnalysisResponse with all fields: │
+│ - biomarkers: dict │
+│ - disease: str │
+│ - confidence: float │
+│ - biomarker_flags: list │
+│ - safety_alerts: list (NEW) │
+│ □ Update api/app/services/ragbot.py to use unified schema │
+│ □ Test all endpoints return correct schema │
+│ □ Run: pytest tests/test_response_schema.py -v │
+│ Deliverable: Unified schema in place │
+│ Success: Pydantic validation passes │
+└──────────────────────────────────────────────────────────────────────────┘
+
+┌──────────────────────────────────────────────────────────────────────────┐
+│ Task 1.6: Add Rate Limiting (Skill #20) │
+│ Time: 2-3 hours │
+│ Actions: │
+│ □ npx skills unlock api-rate-limiting │
+│ □ Create api/app/middleware/rate_limiter.py │
+│ □ Add rate limiting to api/main.py: │
+│ - 10 requests/minute (free tier) │
+│ - 100 requests/minute (pro tier) │
+│ □ Return 429 Too Many Requests with retry-after header │
+│ □ Test rate limiting behavior │
+│ Deliverable: Rate limiting active │
+│ Success: 11th request returns 429 │
+└──────────────────────────────────────────────────────────────────────────┘
+
+FRIDAY (EVENING): Code Review + Commit
+
+┌──────────────────────────────────────────────────────────────────────────┐
+│ Task 1.7: Code Review & Commit Week 1 Work │
+│ Actions: │
+│ □ Review all changes for: │
+│ - No hardcoded secrets │
+│ - Proper error handling │
+│ - Consistent code style │
+│ - Docstrings added │
+│ □ Run full test suite: pytest tests/ -v --cov src │
+│ □ Ensure coverage >75% │
+│ □ Create PR titled: "Phase 1 Week 1: Security + State Propagation" │
+│ □ Update IMPLEMENTATION_ROADMAP.md with actual times │
+│ Success: PR ready for review │
+└──────────────────────────────────────────────────────────────────────────┘
+
+WEEK 1 SUMMARY
+════════════════════════════════════════════════════════════════════════════════
+
+✓ Security audit completed
+✓ JWT authentication implemented
+✓ Testing infrastructure created
+✓ State propagation fixed
+✓ Response schema unified
+✓ Rate limiting added
+✓ Tests written & passing
+
+Metrics to Track:
+ - Lines of code added: ____
+ - Tests added: ____
+ - Coverage improvement: __% → __%
+ - Issues found (OWASP): ____
+ - Issues resolved: ____
+
+════════════════════════════════════════════════════════════════════════════════
+
+AFTER WEEK 1: Next Steps
+
+Move to Phase 1 Week 2:
+ Task 2.1: Multi-Agent Orchestration fixes
+ Task 2.2: LLM Security (prompt injection)
+ Task 2.3: Error handling framework
+
+Then Phase 2 begins immediately with testing expansion.
+
+════════════════════════════════════════════════════════════════════════════════
+
+USEFUL COMMANDS FOR THIS WEEK:
+
+# Check skill is installed:
+Test-Path "$env:USERPROFILE\.agents\skills\owasp-security-check\SKILL.md"
+
+# Run tests with coverage:
+python -m pytest tests/ -v --cov src --cov-report=html
+
+# Check code style:
+pip install black pylint; black src/ --check
+
+# Run security scan locally:
+pip install bandit; bandit -r api/app src/
+
+# Start API for manual testing:
+cd api && python -m uvicorn app.main:app --reload
+
+# View auto-generated API docs:
+Open browser to http://localhost:8000/docs
+
+════════════════════════════════════════════════════════════════════════════════
+
+DAILY STANDUP TEMPLATE (Use this each day):
+
+Date: _______________
+Standup Lead: _______
+
+What did you complete yesterday?
+[ ] _____________________________________
+
+What are you doing today?
+[ ] _____________________________________
+
+What blockers do you have?
+[ ] _____________________________________
+
+Metrics:
+ Coverage: __%
+ Tests passing: __
+ Errors: __
+
+Status: 🟢 On Track / 🟡 At Risk / 🔴 Blocked
+════════════════════════════════════════════════════════════════════════════════
diff --git a/docs/plans/2026-02-13-ragbot-improvements.md b/docs/plans/2026-02-13-ragbot-improvements.md
new file mode 100644
index 0000000000000000000000000000000000000000..f51862ceeb351ab39fa17f9f6d8d2e69c72f4ec8
--- /dev/null
+++ b/docs/plans/2026-02-13-ragbot-improvements.md
@@ -0,0 +1,417 @@
+# RagBot Improvements Implementation Plan
+
+> **For Claude:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task.
+
+**Goal:** Align RagBot’s workflow outputs, normalization, and guardrails with a reliable, testable, and deterministic implementation.
+
+**Architecture:** Introduce shared biomarker normalization and explicit state fields to remove nondeterminism, unify the workflow response schema, and tighten guardrails (citations, parsing, logging). Update prediction logic and confidence handling for safer outputs and strengthen observability.
+
+**Tech Stack:** Python, LangGraph, FastAPI, Pydantic, LangChain, FAISS, pytest.
+
+---
+
+### Task 1: Shared Biomarker Normalization
+
+**Files:**
+- Create: `src/biomarker_normalization.py`
+- Modify: `api/app/services/extraction.py`
+- Modify: `scripts/chat.py`
+- Test: `tests/test_normalization.py`
+
+**Step 1: Write the failing test**
+
+```python
+from src.biomarker_normalization import normalize_biomarker_name
+
+def test_normalizes_common_aliases():
+ assert normalize_biomarker_name("ldl") == "LDL Cholesterol"
+ assert normalize_biomarker_name("wbc") == "White Blood Cells"
+ assert normalize_biomarker_name("systolic bp") == "Systolic Blood Pressure"
+```
+
+**Step 2: Run test to verify it fails**
+
+Run: `pytest tests/test_normalization.py::test_normalizes_common_aliases -v`
+Expected: FAIL with `ModuleNotFoundError: No module named 'src.biomarker_normalization'`
+
+**Step 3: Write minimal implementation**
+
+```python
+from typing import Dict
+
+NORMALIZATION_MAP: Dict[str, str] = {
+ "ldl": "LDL Cholesterol",
+ "hdl": "HDL Cholesterol",
+ "wbc": "White Blood Cells",
+ "rbc": "Red Blood Cells",
+ "systolicbp": "Systolic Blood Pressure",
+ "diastolicbp": "Diastolic Blood Pressure",
+}
+
+def normalize_biomarker_name(name: str) -> str:
+ key = name.lower().replace(" ", "").replace("-", "").replace("_", "")
+ return NORMALIZATION_MAP.get(key, name)
+```
+
+**Step 4: Run test to verify it passes**
+
+Run: `pytest tests/test_normalization.py::test_normalizes_common_aliases -v`
+Expected: PASS
+
+**Step 5: Wire normalization into API + CLI**
+
+Replace local normalization in `api/app/services/extraction.py` and `scripts/chat.py` with `normalize_biomarker_name` from the new module, and align returned names with `config/biomarker_references.json`.
+
+**Step 6: Commit**
+
+```bash
+git add src/biomarker_normalization.py api/app/services/extraction.py scripts/chat.py tests/test_normalization.py
+git commit -m "feat: centralize biomarker normalization"
+```
+
+### Task 2: Deterministic State Propagation
+
+**Files:**
+- Modify: `src/state.py`
+- Modify: `src/agents/biomarker_analyzer.py`
+- Modify: `src/agents/biomarker_linker.py`
+- Modify: `src/agents/clinical_guidelines.py`
+- Modify: `src/agents/confidence_assessor.py`
+- Test: `tests/test_state_fields.py`
+
+**Step 1: Write the failing test**
+
+```python
+from src.state import GuildState
+
+def test_state_has_biomarker_analysis_field():
+ required_fields = {"biomarker_analysis", "biomarker_flags", "safety_alerts"}
+ assert required_fields.issubset(GuildState.__annotations__.keys())
+```
+
+**Step 2: Run test to verify it fails**
+
+Run: `pytest tests/test_state_fields.py::test_state_has_biomarker_analysis_field -v`
+Expected: FAIL with `AssertionError`
+
+**Step 3: Write minimal implementation**
+
+```python
+class GuildState(TypedDict):
+ biomarker_analysis: Optional[Dict[str, Any]]
+```
+
+Update `biomarker_analyzer.analyze()` to return `biomarker_flags`, `safety_alerts`, and `biomarker_analysis` in the state payload. Update downstream agents to read from `state["biomarker_analysis"]` instead of scanning `agent_outputs`.
+
+**Step 4: Run test to verify it passes**
+
+Run: `pytest tests/test_state_fields.py::test_state_has_biomarker_analysis_field -v`
+Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add src/state.py src/agents/biomarker_analyzer.py src/agents/biomarker_linker.py src/agents/clinical_guidelines.py src/agents/confidence_assessor.py tests/test_state_fields.py
+git commit -m "fix: propagate biomarker analysis via state"
+```
+
+### Task 3: Canonical Workflow Response Schema
+
+**Files:**
+- Modify: `src/agents/response_synthesizer.py`
+- Modify: `api/app/services/ragbot.py`
+- Test: `tests/test_response_mapping.py`
+
+**Step 1: Write the failing test**
+
+```python
+from app.services.ragbot import RagBotService
+
+def test_format_response_uses_synthesizer_payload():
+ service = RagBotService()
+ workflow_result = {
+ "biomarker_flags": [{"name": "Glucose", "value": 120, "unit": "mg/dL", "status": "HIGH", "reference_range": "70-100 mg/dL"}],
+ "safety_alerts": [],
+ "prediction_explanation": {"primary_disease": "Diabetes", "confidence": 0.6, "key_drivers": []},
+ "clinical_recommendations": {"immediate_actions": [], "lifestyle_changes": [], "monitoring": []},
+ "confidence_assessment": {"prediction_reliability": "LOW", "evidence_strength": "WEAK", "limitations": []},
+ "patient_summary": {"narrative": ""}
+ }
+ response = service._format_response(
+ request_id="req_test",
+ workflow_result=workflow_result,
+ input_biomarkers={"Glucose": 120},
+ extracted_biomarkers=None,
+ patient_context={},
+ model_prediction={"disease": "Diabetes", "confidence": 0.6, "probabilities": {}},
+ processing_time_ms=10.0
+ )
+ assert response.analysis.biomarker_flags[0].name == "Glucose"
+```
+
+**Step 2: Run test to verify it fails**
+
+Run: `pytest tests/test_response_mapping.py::test_format_response_uses_synthesizer_payload -v`
+Expected: FAIL because `_format_response` reads absent top-level keys.
+
+**Step 3: Write minimal implementation**
+
+Update `ResponseSynthesizerAgent` to include both the existing narrative schema and the API-expected keys at top-level (e.g., `biomarker_flags`, `safety_alerts`, `key_drivers`, `disease_explanation`, `recommendations`, `confidence_assessment`, `alternative_diagnoses`).
+
+Update `_format_response` in `RagBotService` to read from the synthesizer payload first, falling back to legacy keys where needed.
+
+**Step 4: Run test to verify it passes**
+
+Run: `pytest tests/test_response_mapping.py::test_format_response_uses_synthesizer_payload -v`
+Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add src/agents/response_synthesizer.py api/app/services/ragbot.py tests/test_response_mapping.py
+git commit -m "fix: align workflow response schema"
+```
+
+### Task 4: Safe Prediction Confidence Handling
+
+**Files:**
+- Modify: `api/app/services/extraction.py`
+- Modify: `scripts/chat.py`
+- Test: `tests/test_prediction_confidence.py`
+
+**Step 1: Write the failing test**
+
+```python
+from app.services.extraction import predict_disease_simple
+
+def test_low_confidence_returns_undetermined():
+ result = predict_disease_simple({})
+ assert result["confidence"] == 0.0
+ assert result["disease"] == "Undetermined"
+```
+
+**Step 2: Run test to verify it fails**
+
+Run: `pytest tests/test_prediction_confidence.py::test_low_confidence_returns_undetermined -v`
+Expected: FAIL because confidence is forced to 0.5 and disease defaults to Diabetes.
+
+**Step 3: Write minimal implementation**
+
+Update both `predict_disease_simple` functions to:
+- Preserve computed confidence (no forced minimum).
+- Return `{ disease: "Undetermined", confidence: 0.0 }` when all scores are 0.
+- Keep probabilities normalized when totals are > 0, otherwise return uniform probabilities.
+
+**Step 4: Run test to verify it passes**
+
+Run: `pytest tests/test_prediction_confidence.py::test_low_confidence_returns_undetermined -v`
+Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add api/app/services/extraction.py scripts/chat.py tests/test_prediction_confidence.py
+git commit -m "fix: remove forced disease default"
+```
+
+### Task 5: Citation Enforcement Guardrails
+
+**Files:**
+- Modify: `src/agents/disease_explainer.py`
+- Modify: `src/agents/biomarker_linker.py`
+- Modify: `src/agents/clinical_guidelines.py`
+- Test: `tests/test_citation_guardrails.py`
+
+**Step 1: Write the failing test**
+
+```python
+from src.agents.disease_explainer import create_disease_explainer_agent
+
+class EmptyRetriever:
+ def invoke(self, query):
+ return []
+
+def test_disease_explainer_requires_citations():
+ agent = create_disease_explainer_agent(EmptyRetriever())
+ state = {"model_prediction": {"disease": "Diabetes", "confidence": 0.6}, "sop": type("SOP", (), {"disease_explainer_k": 3, "require_pdf_citations": True})()}
+ result = agent.explain(state)
+ findings = result["agent_outputs"][0].findings
+ assert findings["citations"] == []
+ assert "insufficient" in findings["pathophysiology"].lower()
+```
+
+**Step 2: Run test to verify it fails**
+
+Run: `pytest tests/test_citation_guardrails.py::test_disease_explainer_requires_citations -v`
+Expected: FAIL because empty docs still produce a normal explanation.
+
+**Step 3: Write minimal implementation**
+
+Update each RAG agent to:
+- If `state["sop"].require_pdf_citations` is True and `docs` is empty, return a safe fallback explanation and empty citations.
+- Include a `citations_missing` flag in their findings for visibility.
+
+**Step 4: Run test to verify it passes**
+
+Run: `pytest tests/test_citation_guardrails.py::test_disease_explainer_requires_citations -v`
+Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add src/agents/disease_explainer.py src/agents/biomarker_linker.py src/agents/clinical_guidelines.py tests/test_citation_guardrails.py
+git commit -m "fix: enforce citation guardrails"
+```
+
+### Task 6: Logging Cleanup (ASCII Only for API)
+
+**Files:**
+- Modify: `src/workflow.py`
+- Modify: `api/app/main.py`
+- Modify: `api/app/services/ragbot.py`
+- Modify: `src/pdf_processor.py`
+- Modify: `scripts/setup_embeddings.py`
+
+**Step 1: Replace non-ASCII log glyphs**
+
+Search for prefixes like `dY`, `s,?`, `o.` in non-CLI modules and replace with ASCII equivalents (e.g., `INFO:`, `WARN:`, `OK:`). Keep CLI in `scripts/chat.py` untouched unless it impacts API.
+
+**Step 2: Run a quick lint-like grep check**
+
+Run: `python -c "import pathlib, re; files=[p for p in pathlib.Path('.').rglob('*.py') if 'scripts/chat.py' not in str(p)]; bad=[p for p in files if re.search(r'dY|s,\?|o\.', p.read_text(encoding='utf-8'))]; print(bad)"`
+Expected: `[]`
+
+**Step 3: Commit**
+
+```bash
+git add src/workflow.py api/app/main.py api/app/services/ragbot.py src/pdf_processor.py scripts/setup_embeddings.py
+git commit -m "chore: normalize API logging"
+```
+
+### Task 7: Model Selection Centralization
+
+**Files:**
+- Modify: `src/llm_config.py`
+- Modify: `src/agents/response_synthesizer.py`
+
+**Step 1: Write the failing test**
+
+```python
+from src.llm_config import llm_config
+
+def test_get_synthesizer_returns_default():
+ assert llm_config.get_synthesizer() is not None
+```
+
+**Step 2: Run test to verify it fails (if needed)**
+
+Run: `pytest tests/test_llm_config.py::test_get_synthesizer_returns_default -v`
+Expected: PASS (if already works) or FAIL if missing. If it passes, skip to Step 3.
+
+**Step 3: Write minimal implementation**
+
+Ensure `LLMConfig.get_synthesizer()` honors optional model names from config/SOP and `ResponseSynthesizerAgent` uses this method without hard-coded model strings.
+
+**Step 4: Commit**
+
+```bash
+git add src/llm_config.py src/agents/response_synthesizer.py
+git commit -m "refactor: centralize synthesizer selection"
+```
+
+### Task 8: Robust JSON Extraction Parsing
+
+**Files:**
+- Modify: `api/app/services/extraction.py`
+- Modify: `scripts/chat.py`
+- Test: `tests/test_json_parsing.py`
+
+**Step 1: Write the failing test**
+
+```python
+from api.app.services.extraction import _parse_llm_json
+
+def test_parse_llm_json_recovers_embedded_object():
+ content = "Here is your JSON:\n```json\n{\"biomarkers\": {\"Glucose\": 140}}\n```"
+ parsed = _parse_llm_json(content)
+ assert parsed["biomarkers"]["Glucose"] == 140
+```
+
+**Step 2: Run test to verify it fails**
+
+Run: `pytest tests/test_json_parsing.py::test_parse_llm_json_recovers_embedded_object -v`
+Expected: FAIL with `AttributeError` or JSON decode error.
+
+**Step 3: Write minimal implementation**
+
+Add `_parse_llm_json` helper to isolate JSON parsing with:
+- Code fence stripping.
+- Fallback to first `{` and last `}` if parsing fails.
+
+Use the helper in both `extract_biomarkers` functions.
+
+**Step 4: Run test to verify it passes**
+
+Run: `pytest tests/test_json_parsing.py::test_parse_llm_json_recovers_embedded_object -v`
+Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add api/app/services/extraction.py scripts/chat.py tests/test_json_parsing.py
+git commit -m "fix: harden JSON extraction parsing"
+```
+
+### Task 9: Error Context + Expected Biomarker Count
+
+**Files:**
+- Modify: `src/biomarker_validator.py`
+- Modify: `src/agents/confidence_assessor.py`
+- Modify: `api/app/services/ragbot.py`
+
+**Step 1: Write the failing test**
+
+```python
+from src.biomarker_validator import BiomarkerValidator
+
+def test_expected_biomarker_count_is_reference_size():
+ validator = BiomarkerValidator()
+ assert validator.expected_biomarker_count() == len(validator.references)
+```
+
+**Step 2: Run test to verify it fails**
+
+Run: `pytest tests/test_validator_count.py::test_expected_biomarker_count_is_reference_size -v`
+Expected: FAIL with `AttributeError: expected_biomarker_count`
+
+**Step 3: Write minimal implementation**
+
+Add `expected_biomarker_count()` to `BiomarkerValidator` and use it in `ConfidenceAssessorAgent` instead of hard-coded 24.
+
+Wrap errors in `RagBotService.analyze()` with an error message that includes the agent or stage if available (e.g., `Analysis failed during workflow execution`).
+
+**Step 4: Run test to verify it passes**
+
+Run: `pytest tests/test_validator_count.py::test_expected_biomarker_count_is_reference_size -v`
+Expected: PASS
+
+**Step 5: Commit**
+
+```bash
+git add src/biomarker_validator.py src/agents/confidence_assessor.py api/app/services/ragbot.py tests/test_validator_count.py
+git commit -m "fix: derive expected biomarker count"
+```
+
+---
+
+## Full Test Pass (Post-Implementation)
+
+Run: `pytest -v`
+Expected: All tests pass.
+
+---
+
+## Notes
+
+- If any tests require API keys, mark them with `@pytest.mark.integration` and skip by default.
+- Keep CLI behavior intact while removing non-ASCII logging in API modules.
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 0000000000000000000000000000000000000000..23419702d7e9d95b576fc6c85c728ae2da1892a4
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,3 @@
+[pytest]
+filterwarnings =
+ ignore::langchain_core._api.deprecation.LangChainDeprecationWarning
diff --git a/scripts/README.md b/scripts/README.md
index 0019472cb60bb2d105f14530e5d1831727664260..e4eb218653a9c133483e4c2a8ba25c42999834ab 100644
--- a/scripts/README.md
+++ b/scripts/README.md
@@ -9,7 +9,7 @@ Interactive command-line chatbot for analyzing blood test results.
**Usage:**
```bash
-python scripts/chat.py
+.\.venv\Scripts\python.exe scripts/chat.py
```
**Features:**
@@ -32,10 +32,10 @@ Builds or rebuilds the FAISS vector store from medical PDFs.
**Usage:**
```bash
# Build/update vector store
-python scripts/setup_embeddings.py
+.\.venv\Scripts\python.exe scripts/setup_embeddings.py
# Force complete rebuild
-python scripts/setup_embeddings.py --force-rebuild
+.\.venv\Scripts\python.exe scripts/setup_embeddings.py --force-rebuild
```
**What it does:**
@@ -59,7 +59,7 @@ Tests the extraction and validation of biomarkers from user input.
**Usage:**
```bash
-python scripts/test_extraction.py
+.\.venv\Scripts\python.exe scripts/test_extraction.py
```
---
@@ -69,7 +69,7 @@ Runs predefined test cases through the chat system.
**Usage:**
```bash
-python scripts/test_chat_demo.py
+.\.venv\Scripts\python.exe scripts/test_chat_demo.py
```
---
@@ -79,7 +79,7 @@ Monitors system performance and vector store status.
**Usage:**
```bash
-python scripts/monitor_test.py
+.\.venv\Scripts\python.exe scripts/monitor_test.py
```
---
@@ -124,11 +124,11 @@ Tests basic API endpoints.
| Script | Purpose | Command |
|--------|---------|---------|
-| `chat.py` | Interactive biomarker analysis | `python scripts/chat.py` |
-| `setup_embeddings.py` | Build vector store | `python scripts/setup_embeddings.py` |
-| `test_extraction.py` | Test biomarker extraction | `python scripts/test_extraction.py` |
-| `test_chat_demo.py` | Test chat system | `python scripts/test_chat_demo.py` |
-| `monitor_test.py` | Monitor system performance | `python scripts/monitor_test.py` |
+| `chat.py` | Interactive biomarker analysis | `.\.venv\Scripts\python.exe scripts/chat.py` |
+| `setup_embeddings.py` | Build vector store | `.\.venv\Scripts\python.exe scripts/setup_embeddings.py` |
+| `test_extraction.py` | Test biomarker extraction | `.\.venv\Scripts\python.exe scripts/test_extraction.py` |
+| `test_chat_demo.py` | Test chat system | `.\.venv\Scripts\python.exe scripts/test_chat_demo.py` |
+| `monitor_test.py` | Monitor system performance | `.\.venv\Scripts\python.exe scripts/monitor_test.py` |
| `run_api.ps1` | Start REST API | `.\scripts\run_api.ps1` |
| `start_api.ps1` | Start API (alt) | `.\scripts\start_api.ps1` |
| `test_api_simple.ps1` | Test API | `.\scripts\test_api_simple.ps1` |
diff --git a/scripts/chat.py b/scripts/chat.py
index 78eb86b2e068caa0ce3e582b1ccd0aff3c29276b..b5e11a1bda6c75135fab1549ba50ecd1a7359a09 100644
--- a/scripts/chat.py
+++ b/scripts/chat.py
@@ -6,6 +6,21 @@ Enables natural language conversation with the RAG system
import json
import sys
import os
+import logging
+import warnings
+
+# ── Silence HuggingFace / transformers noise BEFORE any ML library is loaded ──
+os.environ.setdefault("TOKENIZERS_PARALLELISM", "false")
+os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
+os.environ.setdefault("HF_HUB_DISABLE_IMPLICIT_TOKEN", "1")
+os.environ.setdefault("TRANSFORMERS_NO_ADVISORY_WARNINGS", "1")
+os.environ.setdefault("TRANSFORMERS_VERBOSITY", "error")
+logging.getLogger("transformers").setLevel(logging.ERROR)
+logging.getLogger("sentence_transformers").setLevel(logging.ERROR)
+logging.getLogger("huggingface_hub").setLevel(logging.ERROR)
+warnings.filterwarnings("ignore", message=".*class.*HuggingFaceEmbeddings.*was deprecated.*")
+# ─────────────────────────────────────────────────────────────────────────────
+
from pathlib import Path
from typing import Dict, Any, Tuple
from datetime import datetime
@@ -15,18 +30,17 @@ if sys.platform == 'win32':
try:
sys.stdout.reconfigure(encoding='utf-8')
sys.stderr.reconfigure(encoding='utf-8')
- except:
- # Fallback for older Python versions
+ except Exception:
import codecs
sys.stdout = codecs.getwriter('utf-8')(sys.stdout.buffer, 'strict')
sys.stderr = codecs.getwriter('utf-8')(sys.stderr.buffer, 'strict')
- # Set console to UTF-8
os.system('chcp 65001 > nul 2>&1')
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from langchain_core.prompts import ChatPromptTemplate
+from src.biomarker_normalization import normalize_biomarker_name
from src.llm_config import get_chat_model
from src.workflow import create_guild
from src.state import PatientInput
@@ -68,67 +82,23 @@ If you cannot find any biomarkers, return {{"biomarkers": {{}}, "patient_context
# Component 1: Biomarker Extraction
# ============================================================================
-def normalize_biomarker_name(name: str) -> str:
- """Normalize biomarker names to standard format matching biomarker_references.json"""
- name_lower = name.lower().replace(" ", "").replace("-", "").replace("_", "")
-
- # Mapping of variations to standard names (matching biomarker_references.json)
- mappings = {
- "glucose": "Glucose",
- "bloodsugar": "Glucose",
- "bloodglucose": "Glucose",
- "cholesterol": "Cholesterol",
- "totalcholesterol": "Cholesterol",
- "triglycerides": "Triglycerides",
- "trig": "Triglycerides",
- "hba1c": "HbA1c",
- "a1c": "HbA1c",
- "hemoglobina1c": "HbA1c",
- "ldl": "LDL Cholesterol",
- "ldlcholesterol": "LDL Cholesterol",
- "hdl": "HDL Cholesterol",
- "hdlcholesterol": "HDL Cholesterol",
- "insulin": "Insulin",
- "bmi": "BMI",
- "bodymassindex": "BMI",
- "hemoglobin": "Hemoglobin",
- "hgb": "Hemoglobin",
- "hb": "Hemoglobin",
- "platelets": "Platelets",
- "plt": "Platelets",
- "wbc": "White Blood Cells",
- "whitebloodcells": "White Blood Cells",
- "whitecells": "White Blood Cells",
- "rbc": "Red Blood Cells",
- "redbloodcells": "Red Blood Cells",
- "redcells": "Red Blood Cells",
- "hematocrit": "Hematocrit",
- "hct": "Hematocrit",
- "mcv": "Mean Corpuscular Volume",
- "meancorpuscularvolume": "Mean Corpuscular Volume",
- "mch": "Mean Corpuscular Hemoglobin",
- "meancorpuscularhemoglobin": "Mean Corpuscular Hemoglobin",
- "mchc": "Mean Corpuscular Hemoglobin Concentration",
- "heartrate": "Heart Rate",
- "hr": "Heart Rate",
- "pulse": "Heart Rate",
- "systolicbp": "Systolic Blood Pressure",
- "systolic": "Systolic Blood Pressure",
- "sbp": "Systolic Blood Pressure",
- "diastolicbp": "Diastolic Blood Pressure",
- "diastolic": "Diastolic Blood Pressure",
- "dbp": "Diastolic Blood Pressure",
- "troponin": "Troponin",
- "creactiveprotein": "C-reactive Protein",
- "crp": "C-reactive Protein",
- "alt": "ALT",
- "alanineaminotransferase": "ALT",
- "ast": "AST",
- "aspartateaminotransferase": "AST",
- "creatinine": "Creatinine",
- }
-
- return mappings.get(name_lower, name)
+def _parse_llm_json(content: str) -> Dict[str, Any]:
+ """Parse JSON payload from LLM output with fallback recovery."""
+ text = content.strip()
+
+ if "```json" in text:
+ text = text.split("```json")[1].split("```")[0].strip()
+ elif "```" in text:
+ text = text.split("```")[1].split("```")[0].strip()
+
+ try:
+ return json.loads(text)
+ except json.JSONDecodeError:
+ left = text.find("{")
+ right = text.rfind("}")
+ if left != -1 and right != -1 and right > left:
+ return json.loads(text[left:right + 1])
+ raise
def extract_biomarkers(user_message: str) -> Tuple[Dict[str, float], Dict[str, Any]]:
@@ -139,7 +109,6 @@ def extract_biomarkers(user_message: str) -> Tuple[Dict[str, float], Dict[str, A
Tuple of (biomarkers_dict, patient_context_dict)
"""
try:
- print(f" [DEBUG] Extracting from: '{user_message[:50]}...'")
llm = get_chat_model(temperature=0.0)
prompt = ChatPromptTemplate.from_template(BIOMARKER_EXTRACTION_PROMPT)
@@ -148,28 +117,17 @@ def extract_biomarkers(user_message: str) -> Tuple[Dict[str, float], Dict[str, A
# Parse JSON from LLM response
content = response.content.strip()
- print(f" [DEBUG] LLM response: {content[:200]}...")
- # Try to extract JSON if wrapped in markdown code blocks
- if "```json" in content:
- content = content.split("```json")[1].split("```")[0].strip()
- elif "```" in content:
- content = content.split("```")[1].split("```")[0].strip()
-
- extracted = json.loads(content)
+ extracted = _parse_llm_json(content)
biomarkers = extracted.get("biomarkers", {})
patient_context = extracted.get("patient_context", {})
- print(f" [DEBUG] Extracted biomarkers: {biomarkers}")
- print(f" [DEBUG] Patient context: {patient_context}")
-
# Normalize biomarker names
normalized = {}
for key, value in biomarkers.items():
try:
standard_name = normalize_biomarker_name(key)
normalized[standard_name] = float(value)
- print(f" [DEBUG] Normalized '{key}' -> '{standard_name}' = {value}")
except (ValueError, TypeError) as e:
print(f"⚠️ Skipping invalid value for {key}: {value} (error: {e})")
continue
@@ -177,7 +135,6 @@ def extract_biomarkers(user_message: str) -> Tuple[Dict[str, float], Dict[str, A
# Clean up patient context (remove null values)
patient_context = {k: v for k, v in patient_context.items() if v is not None}
- print(f" [DEBUG] Final normalized: {normalized}")
return normalized, patient_context
except Exception as e:
@@ -203,63 +160,73 @@ def predict_disease_simple(biomarkers: Dict[str, float]) -> Dict[str, Any]:
"Thalassemia": 0.0
}
+ # Helper: check both abbreviated and normalized biomarker names
+ # Returns None when biomarker is not present (avoids false triggers)
+ def _get(name, *alt_names):
+ val = biomarkers.get(name, None)
+ if val is not None:
+ return val
+ for alt in alt_names:
+ val = biomarkers.get(alt, None)
+ if val is not None:
+ return val
+ return None
+
# Diabetes indicators
- glucose = biomarkers.get("Glucose", 0)
- hba1c = biomarkers.get("HbA1c", 0)
- if glucose > 126:
+ glucose = _get("Glucose")
+ hba1c = _get("HbA1c")
+ if glucose is not None and glucose > 126:
scores["Diabetes"] += 0.4
- if glucose > 180:
+ if glucose is not None and glucose > 180:
scores["Diabetes"] += 0.2
- if hba1c >= 6.5:
+ if hba1c is not None and hba1c >= 6.5:
scores["Diabetes"] += 0.5
# Anemia indicators
- hemoglobin = biomarkers.get("Hemoglobin", 0)
- mcv = biomarkers.get("MCV", 0)
- if hemoglobin < 12.0:
+ hemoglobin = _get("Hemoglobin")
+ mcv = _get("Mean Corpuscular Volume", "MCV")
+ if hemoglobin is not None and hemoglobin < 12.0:
scores["Anemia"] += 0.6
- if hemoglobin < 10.0:
+ if hemoglobin is not None and hemoglobin < 10.0:
scores["Anemia"] += 0.2
- if mcv < 80:
+ if mcv is not None and mcv < 80:
scores["Anemia"] += 0.2
# Heart disease indicators
- cholesterol = biomarkers.get("Cholesterol", 0)
- troponin = biomarkers.get("Troponin", 0)
- ldl = biomarkers.get("LDL", 0)
- if cholesterol > 240:
+ cholesterol = _get("Cholesterol")
+ troponin = _get("Troponin")
+ ldl = _get("LDL Cholesterol", "LDL")
+ if cholesterol is not None and cholesterol > 240:
scores["Heart Disease"] += 0.3
- if troponin > 0.04:
+ if troponin is not None and troponin > 0.04:
scores["Heart Disease"] += 0.6
- if ldl > 190:
+ if ldl is not None and ldl > 190:
scores["Heart Disease"] += 0.2
# Thrombocytopenia indicators
- platelets = biomarkers.get("Platelets", 0)
- if platelets < 150000:
+ platelets = _get("Platelets")
+ if platelets is not None and platelets < 150000:
scores["Thrombocytopenia"] += 0.6
- if platelets < 50000:
+ if platelets is not None and platelets < 50000:
scores["Thrombocytopenia"] += 0.3
# Thalassemia indicators (complex, simplified here)
- if mcv < 80 and hemoglobin < 12.0:
+ if mcv is not None and hemoglobin is not None and mcv < 80 and hemoglobin < 12.0:
scores["Thalassemia"] += 0.4
# Find top prediction
top_disease = max(scores, key=scores.get)
- confidence = scores[top_disease]
+ confidence = min(scores[top_disease], 1.0) # Cap at 1.0 for Pydantic validation
- # Ensure at least 0.5 confidence
- if confidence < 0.5:
- confidence = 0.5
- top_disease = "Diabetes" # Default
+ if confidence == 0.0:
+ top_disease = "Undetermined"
# Normalize probabilities to sum to 1.0
total = sum(scores.values())
if total > 0:
- probabilities = {k: v/total for k, v in scores.items()}
+ probabilities = {k: v / total for k, v in scores.items()}
else:
- probabilities = scores
+ probabilities = {k: 1.0 / len(scores) for k in scores}
return {
"disease": top_disease,
@@ -274,7 +241,6 @@ def predict_disease_llm(biomarkers: Dict[str, float], patient_context: Dict) ->
Falls back to rule-based if LLM fails.
"""
try:
- print(f" [DEBUG] Predicting for biomarkers: {biomarkers}")
llm = get_chat_model(temperature=0.0)
prompt = f"""You are a medical AI assistant. Based on these biomarker values,
@@ -302,19 +268,11 @@ Return ONLY valid JSON (no other text):
response = llm.invoke(prompt)
content = response.content.strip()
- print(f" [DEBUG] Prediction LLM response: {content[:200]}...")
-
- # Try to extract JSON if wrapped in markdown
- if "```json" in content:
- content = content.split("```json")[1].split("```")[0].strip()
- elif "```" in content:
- content = content.split("```")[1].split("```")[0].strip()
- prediction = json.loads(content)
+ prediction = _parse_llm_json(content)
# Validate required fields
if "disease" in prediction and "confidence" in prediction and "probabilities" in prediction:
- print(f" [DEBUG] LLM prediction successful: {prediction['disease']} ({prediction['confidence']:.0%})")
return prediction
else:
raise ValueError("Invalid prediction format")
@@ -330,16 +288,31 @@ Return ONLY valid JSON (no other text):
# Component 3: Conversational Formatter
# ============================================================================
+def _coerce_to_dict(obj) -> Dict:
+ """Convert a Pydantic model or arbitrary object to a plain dict."""
+ if isinstance(obj, dict):
+ return obj
+ if hasattr(obj, "model_dump"):
+ return obj.model_dump()
+ if hasattr(obj, "__dict__"):
+ return obj.__dict__
+ return {}
+
+
def format_conversational(result: Dict[str, Any], user_name: str = "there") -> str:
"""
Format technical JSON output into conversational response.
"""
+ if not isinstance(result, dict):
+ result = {}
+
# Extract key information
- summary = result.get("patient_summary", {})
- prediction = result.get("prediction_explanation", {})
- recommendations = result.get("clinical_recommendations", {})
- confidence = result.get("confidence_assessment", {})
- alerts = result.get("safety_alerts", [])
+ summary = result.get("patient_summary", {}) or {}
+ prediction = result.get("prediction_explanation", {}) or {}
+ recommendations = result.get("clinical_recommendations", {}) or {}
+ confidence = result.get("confidence_assessment", {}) or {}
+ # Normalize: items may be Pydantic SafetyAlert objects or plain dicts
+ alerts = [_coerce_to_dict(a) for a in (result.get("safety_alerts") or [])]
disease = prediction.get("primary_disease", "Unknown")
conf_score = prediction.get("confidence", 0.0)
@@ -430,13 +403,13 @@ def run_example_case(guild):
"HbA1c": 8.2,
"Cholesterol": 235.0,
"Triglycerides": 210.0,
- "HDL": 38.0,
- "LDL": 160.0,
+ "HDL Cholesterol": 38.0,
+ "LDL Cholesterol": 160.0,
"Hemoglobin": 13.5,
"Platelets": 220000,
- "WBC": 7500,
- "Systolic BP": 145,
- "Diastolic BP": 92
+ "White Blood Cells": 7500,
+ "Systolic Blood Pressure": 145,
+ "Diastolic Blood Pressure": 92
}
prediction = {
@@ -460,7 +433,7 @@ def run_example_case(guild):
print("🔄 Running analysis...\n")
result = guild.run(patient_input)
- response = format_conversational(result, "there")
+ response = format_conversational(result.get("final_response", result), "there")
print("\n" + "="*70)
print("🤖 RAG-BOT:")
print("="*70)
@@ -471,25 +444,45 @@ def run_example_case(guild):
def save_report(result: Dict, biomarkers: Dict):
"""Save detailed JSON report to file"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
- disease = result.get("prediction_explanation", {}).get("primary_disease", "unknown")
+
+ # final_response is already a plain dict built by the synthesizer
+ final = result.get("final_response") or {}
+ disease = (
+ final.get("prediction_explanation", {}).get("primary_disease")
+ or result.get("model_prediction", {}).get("disease", "unknown")
+ )
disease_safe = disease.replace(' ', '_').replace('/', '_')
filename = f"report_{disease_safe}_{timestamp}.json"
-
+
output_dir = Path("data/chat_reports")
output_dir.mkdir(parents=True, exist_ok=True)
-
+
filepath = output_dir / filename
-
- # Add biomarkers to report
+
+ def _to_dict(obj):
+ """Recursively convert Pydantic models / non-serializable objects."""
+ if isinstance(obj, dict):
+ return {k: _to_dict(v) for k, v in obj.items()}
+ if isinstance(obj, list):
+ return [_to_dict(i) for i in obj]
+ if hasattr(obj, "model_dump"): # Pydantic v2
+ return _to_dict(obj.model_dump())
+ if hasattr(obj, "dict"): # Pydantic v1
+ return _to_dict(obj.dict())
+ # Scalars and other primitives are returned as-is
+ return obj
+
report = {
"timestamp": timestamp,
"biomarkers_input": biomarkers,
- "analysis_result": result
+ "final_response": _to_dict(final),
+ "biomarker_flags": _to_dict(result.get("biomarker_flags", [])),
+ "safety_alerts": _to_dict(result.get("safety_alerts", [])),
}
-
+
with open(filepath, 'w') as f:
json.dump(report, f, indent=2)
-
+
print(f"✅ Report saved to: {filepath}\n")
@@ -521,9 +514,9 @@ def chat_interface():
except Exception as e:
print(f"❌ Failed to initialize system: {e}")
print("\nMake sure:")
- print(" • Ollama is running (ollama serve)")
- print(" • Vector store exists (run: python src/pdf_processor.py)")
- print(" • Models are pulled (ollama pull llama3.1:8b-instruct)")
+ print(" • API key is set in .env (GROQ_API_KEY or GOOGLE_API_KEY)")
+ print(" • Vector store exists (run: python scripts/setup_embeddings.py)")
+ print(" • Internet connection is available for cloud LLM")
return
# Main conversation loop
@@ -573,7 +566,6 @@ def chat_interface():
print("🧠 Predicting likely condition...")
prediction = predict_disease_llm(biomarkers, patient_context)
print(f"✅ Predicted: {prediction['disease']} ({prediction['confidence']:.0%} confidence)")
- print(f" [DEBUG] Full prediction: {prediction}")
# Create PatientInput
patient_input = PatientInput(
@@ -582,11 +574,6 @@ def chat_interface():
patient_context=patient_context if patient_context else {"source": "chat"}
)
- print(f" [DEBUG] PatientInput created:")
- print(f" - Biomarkers: {patient_input.biomarkers}")
- print(f" - Prediction: {patient_input.model_prediction}")
- print(f" - Context: {patient_input.patient_context}")
-
# Run full RAG workflow
print("📚 Consulting medical knowledge base...")
print(" (This may take 15-25 seconds...)\n")
@@ -594,7 +581,7 @@ def chat_interface():
result = guild.run(patient_input)
# Format conversational response
- response = format_conversational(result, user_name)
+ response = format_conversational(result.get("final_response", result), user_name)
# Display response
print("\n" + "="*70)
@@ -624,9 +611,11 @@ def chat_interface():
print("\n\n👋 Interrupted. Thank you for using MediGuard AI!")
break
except Exception as e:
+ import traceback
+ traceback.print_exc()
print(f"\n❌ Analysis failed: {e}")
print("\nThis might be due to:")
- print(" • Ollama not running (start with: ollama serve)")
+ print(" • API key not configured (check .env file)")
print(" • Insufficient system memory")
print(" • Invalid biomarker values")
print("\nTry again or type 'quit' to exit.\n")
diff --git a/scripts/setup_embeddings.py b/scripts/setup_embeddings.py
index 4a82c01e9efac9af5b188e0cf589b7bde7562b5c..8dc9f6534cd2c03723642989ae9ef9ef59ede8c5 100644
--- a/scripts/setup_embeddings.py
+++ b/scripts/setup_embeddings.py
@@ -9,20 +9,20 @@ def setup_google_api_key():
"""Interactive setup for Google API key"""
print("="*70)
- print("🚀 Fast Embeddings Setup - Google Gemini API")
+ print("Fast Embeddings Setup - Google Gemini API")
print("="*70)
- print("\n📌 Why Google Gemini?")
- print(" • 100x faster than local Ollama (2 mins vs 30+ mins)")
- print(" • FREE for standard usage")
- print(" • High quality embeddings")
- print(" • Automatic fallback to Ollama if unavailable")
+ print("\nWhy Google Gemini?")
+ print(" - 100x faster than local Ollama (2 mins vs 30+ mins)")
+ print(" - FREE for standard usage")
+ print(" - High quality embeddings")
+ print(" - Automatic fallback to Ollama if unavailable")
print("\n" + "="*70)
print("Step 1: Get Your Free API Key")
print("="*70)
print("\n1. Open this URL in your browser:")
- print(" 👉 https://aistudio.google.com/app/apikey")
+ print(" https://aistudio.google.com/app/apikey")
print("\n2. Sign in with Google account")
print("3. Click 'Create API Key'")
print("4. Copy the key (starts with 'AIza...')")
@@ -32,11 +32,11 @@ def setup_google_api_key():
api_key = input("\nPaste your Google API key here: ").strip()
if not api_key:
- print("\n❌ No API key provided. Using local Ollama instead.")
+ print("\nNo API key provided. Using local Ollama instead.")
return False
if not api_key.startswith("AIza"):
- print("\n⚠️ Warning: Key doesn't start with 'AIza'. Are you sure this is correct?")
+ print("\nWarning: Key doesn't start with 'AIza'. Are you sure this is correct?")
confirm = input("Continue anyway? (y/n): ").strip().lower()
if confirm != 'y':
return False
@@ -66,7 +66,7 @@ def setup_google_api_key():
with open(env_path, 'w') as f:
f.write(f'GOOGLE_API_KEY="{api_key}"\n')
- print("\n✅ API key saved to .env file!")
+ print("\nAPI key saved to .env file!")
print("\n" + "="*70)
print("Step 2: Build Vector Store")
print("="*70)
@@ -82,6 +82,6 @@ if __name__ == "__main__":
try:
setup_google_api_key()
except KeyboardInterrupt:
- print("\n\n❌ Setup cancelled.")
+ print("\n\nSetup cancelled.")
except Exception as e:
- print(f"\n❌ Error: {e}")
+ print(f"\nError: {e}")
diff --git a/src/__init__.py b/src/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..220c5556d1c9725b9363b7bac6d3976ad5fd3279
--- /dev/null
+++ b/src/__init__.py
@@ -0,0 +1,3 @@
+"""
+MediGuard AI RAG-Helper - Core Source Package
+"""
diff --git a/src/agents/__init__.py b/src/agents/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8c74876cf61a1977bbcd45b32d9b8a524434e559
--- /dev/null
+++ b/src/agents/__init__.py
@@ -0,0 +1,3 @@
+"""
+MediGuard AI RAG-Helper - Specialist Agents Package
+"""
diff --git a/src/agents/biomarker_analyzer.py b/src/agents/biomarker_analyzer.py
index 9334c490784a8d250688ba05897c68d8056aa688..6f5cbca6b3206ba1ed40fee8a5ce7ea5a39f0bf1 100644
--- a/src/agents/biomarker_analyzer.py
+++ b/src/agents/biomarker_analyzer.py
@@ -53,26 +53,33 @@ class BiomarkerAnalyzerAgent:
# Generate summary using LLM
summary = self._generate_summary(biomarkers, flags, alerts, relevant_biomarkers, predicted_disease)
+ findings = {
+ "biomarker_flags": [flag.model_dump() for flag in flags],
+ "safety_alerts": [alert.model_dump() for alert in alerts],
+ "relevant_biomarkers": relevant_biomarkers,
+ "summary": summary,
+ "validation_complete": True
+ }
+
# Create agent output
output = AgentOutput(
agent_name="Biomarker Analyzer",
- findings={
- "biomarker_flags": [flag.model_dump() for flag in flags],
- "safety_alerts": [alert.model_dump() for alert in alerts],
- "relevant_biomarkers": relevant_biomarkers,
- "summary": summary,
- "validation_complete": True
- }
+ findings=findings
)
# Update state
- print(f"\n✓ Analysis complete:")
+ print("\nAnalysis complete:")
print(f" - {len(flags)} biomarkers validated")
print(f" - {len([f for f in flags if f.status != 'NORMAL'])} out-of-range values")
print(f" - {len(alerts)} safety alerts generated")
print(f" - {len(relevant_biomarkers)} disease-relevant biomarkers identified")
- return {'agent_outputs': [output]}
+ return {
+ 'agent_outputs': [output],
+ 'biomarker_flags': flags,
+ 'safety_alerts': alerts,
+ 'biomarker_analysis': findings
+ }
def _generate_summary(
self,
diff --git a/src/agents/biomarker_linker.py b/src/agents/biomarker_linker.py
index 394c0a088298717039d353e351c16d416ba12d7b..0d129f3b29188bad5e8b57f12d95d099c0f29994 100644
--- a/src/agents/biomarker_linker.py
+++ b/src/agents/biomarker_linker.py
@@ -45,18 +45,18 @@ class BiomarkerDiseaseLinkerAgent:
biomarkers = state['patient_biomarkers']
# Get biomarker analysis from previous agent
- biomarker_analysis = self._get_biomarker_analysis(state)
+ biomarker_analysis = state.get('biomarker_analysis') or {}
# Identify key drivers
print(f"\nIdentifying key drivers for {disease}...")
- key_drivers = self._identify_key_drivers(
- disease,
- biomarkers,
+ key_drivers, citations_missing = self._identify_key_drivers(
+ disease,
+ biomarkers,
biomarker_analysis,
state
)
- print(f"✓ Identified {len(key_drivers)} key biomarker drivers")
+ print(f"Identified {len(key_drivers)} key biomarker drivers")
# Create agent output
output = AgentOutput(
@@ -65,29 +65,23 @@ class BiomarkerDiseaseLinkerAgent:
"disease": disease,
"key_drivers": [kd.model_dump() for kd in key_drivers],
"total_drivers": len(key_drivers),
- "feature_importance_calculated": True
+ "feature_importance_calculated": True,
+ "citations_missing": citations_missing
}
)
# Update state
- print(f"\n✓ Biomarker-disease linking complete")
+ print("\nBiomarker-disease linking complete")
return {'agent_outputs': [output]}
- def _get_biomarker_analysis(self, state: GuildState) -> dict:
- """Extract biomarker analysis from previous agent output"""
- for output in state.get('agent_outputs', []):
- if output.agent_name == "Biomarker Analyzer":
- return output.findings
- return {}
-
def _identify_key_drivers(
self,
disease: str,
biomarkers: Dict[str, float],
analysis: dict,
state: GuildState
- ) -> List[KeyDriver]:
+ ) -> tuple[List[KeyDriver], bool]:
"""Identify which biomarkers are driving the disease prediction"""
# Get out-of-range biomarkers from analysis
@@ -113,23 +107,25 @@ class BiomarkerDiseaseLinkerAgent:
print(f" Analyzing {len(key_biomarkers)} key biomarkers...")
# Generate key drivers with evidence
- key_drivers = []
+ key_drivers: List[KeyDriver] = []
+ citations_missing = False
for biomarker_flag in key_biomarkers[:5]: # Top 5
- driver = self._create_key_driver(
+ driver, driver_missing = self._create_key_driver(
biomarker_flag,
disease,
state
)
key_drivers.append(driver)
-
- return key_drivers
+ citations_missing = citations_missing or driver_missing
+
+ return key_drivers, citations_missing
def _create_key_driver(
self,
biomarker_flag: dict,
disease: str,
state: GuildState
- ) -> KeyDriver:
+ ) -> tuple[KeyDriver, bool]:
"""Create a KeyDriver object with evidence from RAG"""
name = biomarker_flag['name']
@@ -140,27 +136,36 @@ class BiomarkerDiseaseLinkerAgent:
# Retrieve evidence linking this biomarker to the disease
query = f"How does {name} relate to {disease}? What does {status} {name} indicate?"
+ citations_missing = False
try:
docs = self.retriever.invoke(query)
- evidence_text = self._extract_evidence(docs, name, disease)
- contribution = self._estimate_contribution(biomarker_flag, len(docs))
+ if state['sop'].require_pdf_citations and not docs:
+ evidence_text = "Insufficient evidence available in the knowledge base."
+ contribution = "Unknown"
+ citations_missing = True
+ else:
+ evidence_text = self._extract_evidence(docs, name, disease)
+ contribution = self._estimate_contribution(biomarker_flag, len(docs))
except Exception as e:
print(f" Warning: Evidence retrieval failed for {name}: {e}")
evidence_text = f"{status} {name} may be related to {disease}."
contribution = "Unknown"
+ citations_missing = True
# Generate explanation using LLM
explanation = self._generate_explanation(
name, value, unit, status, disease, evidence_text
)
- return KeyDriver(
+ driver = KeyDriver(
biomarker=name,
value=value,
contribution=contribution,
explanation=explanation,
evidence=evidence_text[:500] # Truncate long evidence
)
+
+ return driver, citations_missing
def _extract_evidence(self, docs: list, biomarker: str, disease: str) -> str:
"""Extract relevant evidence from retrieved documents"""
diff --git a/src/agents/clinical_guidelines.py b/src/agents/clinical_guidelines.py
index 86f4b92e6a05b1b8d36a035d481618514d608286..608c103cc2b7f6e75c7312f30feab81f8e8e4ff2 100644
--- a/src/agents/clinical_guidelines.py
+++ b/src/agents/clinical_guidelines.py
@@ -45,7 +45,7 @@ class ClinicalGuidelinesAgent:
confidence = model_prediction['confidence']
# Get biomarker analysis
- biomarker_analysis = self._get_biomarker_analysis(state)
+ biomarker_analysis = state.get('biomarker_analysis') or {}
safety_alerts = biomarker_analysis.get('safety_alerts', [])
# Retrieve guidelines
@@ -56,16 +56,26 @@ class ClinicalGuidelinesAgent:
docs = self.retriever.invoke(query)
- print(f"✓ Retrieved {len(docs)} guideline documents")
+ print(f"Retrieved {len(docs)} guideline documents")
# Generate recommendations
- recommendations = self._generate_recommendations(
- disease,
- docs,
- safety_alerts,
- confidence,
- state
- )
+ if state['sop'].require_pdf_citations and not docs:
+ recommendations = {
+ "immediate_actions": [
+ "Insufficient evidence available in the knowledge base. Please consult a healthcare provider."
+ ],
+ "lifestyle_changes": [],
+ "monitoring": [],
+ "citations": []
+ }
+ else:
+ recommendations = self._generate_recommendations(
+ disease,
+ docs,
+ safety_alerts,
+ confidence,
+ state
+ )
# Create agent output
output = AgentOutput(
@@ -76,25 +86,19 @@ class ClinicalGuidelinesAgent:
"lifestyle_changes": recommendations['lifestyle_changes'],
"monitoring": recommendations['monitoring'],
"guideline_citations": recommendations['citations'],
- "safety_priority": len(safety_alerts) > 0
+ "safety_priority": len(safety_alerts) > 0,
+ "citations_missing": state['sop'].require_pdf_citations and not docs
}
)
# Update state
- print(f"\n✓ Recommendations generated")
+ print("\nRecommendations generated")
print(f" - Immediate actions: {len(recommendations['immediate_actions'])}")
print(f" - Lifestyle changes: {len(recommendations['lifestyle_changes'])}")
print(f" - Monitoring recommendations: {len(recommendations['monitoring'])}")
return {'agent_outputs': [output]}
- def _get_biomarker_analysis(self, state: GuildState) -> dict:
- """Extract biomarker analysis from previous agent output"""
- for output in state.get('agent_outputs', []):
- if output.agent_name == "Biomarker Analyzer":
- return output.findings
- return {}
-
def _generate_recommendations(
self,
disease: str,
diff --git a/src/agents/confidence_assessor.py b/src/agents/confidence_assessor.py
index 68bd499865c595c00199834b4ce3254dd806ca8c..19267c6c5bcc8f1daee3cbaa791274c714eb3642 100644
--- a/src/agents/confidence_assessor.py
+++ b/src/agents/confidence_assessor.py
@@ -7,8 +7,9 @@ import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent.parent))
-from typing import Dict, List
+from typing import Any, Dict, List
from src.state import GuildState, AgentOutput
+from src.biomarker_validator import BiomarkerValidator
from src.llm_config import llm_config
from langchain_core.prompts import ChatPromptTemplate
@@ -40,7 +41,7 @@ class ConfidenceAssessorAgent:
biomarkers = state['patient_biomarkers']
# Collect previous agent findings
- biomarker_analysis = self._get_agent_findings(state, "Biomarker Analyzer")
+ biomarker_analysis = state.get('biomarker_analysis') or {}
disease_explanation = self._get_agent_findings(state, "Disease Explainer")
linker_findings = self._get_agent_findings(state, "Biomarker-Disease Linker")
@@ -91,7 +92,7 @@ class ConfidenceAssessorAgent:
)
# Update state
- print(f"\n✓ Confidence assessment complete")
+ print("\nConfidence assessment complete")
print(f" - Prediction reliability: {reliability}")
print(f" - Evidence strength: {evidence_strength}")
print(f" - Limitations identified: {len(limitations)}")
@@ -153,7 +154,7 @@ class ConfidenceAssessorAgent:
limitations = []
# Check for missing biomarkers
- expected_biomarkers = 24
+ expected_biomarkers = BiomarkerValidator().expected_biomarker_count()
if len(biomarkers) < expected_biomarkers:
missing = expected_biomarkers - len(biomarkers)
limitations.append(f"Missing data: {missing} biomarker(s) not provided")
@@ -267,7 +268,7 @@ Be honest about uncertainty. Patient safety is paramount."""
else:
return "Low confidence prediction. Professional medical assessment essential. Additional tests may be required for accurate diagnosis."
- def _get_alternatives(self, probabilities: Dict[str, float]) -> List[Dict[str, any]]:
+ def _get_alternatives(self, probabilities: Dict[str, float]) -> List[Dict[str, Any]]:
"""Get alternative diagnoses to consider"""
sorted_probs = sorted(probabilities.items(), key=lambda x: x[1], reverse=True)
diff --git a/src/agents/disease_explainer.py b/src/agents/disease_explainer.py
index 3c2d38bc5122abb632a322eda98cc77aee714619..5c02e4a8b8c076d400a42b9dc058fa736e85427a 100644
--- a/src/agents/disease_explainer.py
+++ b/src/agents/disease_explainer.py
@@ -43,8 +43,9 @@ class DiseaseExplainerAgent:
disease = model_prediction['disease']
confidence = model_prediction['confidence']
- # Configure retrieval based on SOP
- self.retriever.search_kwargs['k'] = state['sop'].disease_explainer_k
+ # Configure retrieval based on SOP (use copy to avoid mutating shared retriever)
+ retrieval_k = state['sop'].disease_explainer_k
+ self.retriever.search_kwargs = {**self.retriever.search_kwargs, 'k': retrieval_k}
# Retrieve relevant documents
print(f"\nRetrieving information about: {disease}")
@@ -54,8 +55,36 @@ class DiseaseExplainerAgent:
and clinical presentation. Focus on mechanisms relevant to blood biomarkers."""
docs = self.retriever.invoke(query)
-
- print(f"✓ Retrieved {len(docs)} relevant document chunks")
+
+ print(f"Retrieved {len(docs)} relevant document chunks")
+
+ if state['sop'].require_pdf_citations and not docs:
+ explanation = {
+ "pathophysiology": "Insufficient evidence available in the knowledge base to explain this condition.",
+ "diagnostic_criteria": "Insufficient evidence available to list diagnostic criteria.",
+ "clinical_presentation": "Insufficient evidence available to describe clinical presentation.",
+ "summary": "Insufficient evidence available for a detailed explanation."
+ }
+ citations = []
+ output = AgentOutput(
+ agent_name="Disease Explainer",
+ findings={
+ "disease": disease,
+ "pathophysiology": explanation['pathophysiology'],
+ "diagnostic_criteria": explanation['diagnostic_criteria'],
+ "clinical_presentation": explanation['clinical_presentation'],
+ "mechanism_summary": explanation['summary'],
+ "citations": citations,
+ "confidence": confidence,
+ "retrieval_quality": 0,
+ "citations_missing": True
+ }
+ )
+
+ print("\nDisease explanation generated")
+ print(" - Pathophysiology: insufficient evidence")
+ print(" - Citations: 0 sources")
+ return {'agent_outputs': [output]}
# Generate explanation
explanation = self._generate_explanation(disease, docs, confidence)
@@ -74,12 +103,13 @@ class DiseaseExplainerAgent:
"mechanism_summary": explanation['summary'],
"citations": citations,
"confidence": confidence,
- "retrieval_quality": len(docs)
+ "retrieval_quality": len(docs),
+ "citations_missing": False
}
)
# Update state
- print(f"\n✓ Disease explanation generated")
+ print("\nDisease explanation generated")
print(f" - Pathophysiology: {len(explanation['pathophysiology'])} chars")
print(f" - Citations: {len(citations)} sources")
diff --git a/src/agents/response_synthesizer.py b/src/agents/response_synthesizer.py
index 957871720e066f6936c045295ffe25524efd21f2..beb4de127853bbb300a771c0cc5523fd628a1a3c 100644
--- a/src/agents/response_synthesizer.py
+++ b/src/agents/response_synthesizer.py
@@ -18,9 +18,7 @@ class ResponseSynthesizerAgent:
"""Agent that synthesizes all specialist findings into the final response"""
def __init__(self):
- self.llm = llm_config.get_synthesizer(
- model_name="llama3.1:8b" # Use best available model
- )
+ self.llm = llm_config.get_synthesizer()
def synthesize(self, state: GuildState) -> GuildState:
"""
@@ -47,13 +45,28 @@ class ResponseSynthesizerAgent:
print(f"\nSynthesizing findings from {len(agent_outputs)} specialist agents...")
# Build structured response
+ recs = self._build_recommendations(findings)
response = {
"patient_summary": self._build_patient_summary(patient_biomarkers, findings),
"prediction_explanation": self._build_prediction_explanation(model_prediction, findings),
- "clinical_recommendations": self._build_recommendations(findings),
"confidence_assessment": self._build_confidence_assessment(findings),
"safety_alerts": self._build_safety_alerts(findings),
- "metadata": self._build_metadata(state)
+ "metadata": self._build_metadata(state),
+ "biomarker_flags": self._build_biomarker_flags(findings),
+ "key_drivers": self._build_key_drivers(findings),
+ "disease_explanation": self._build_disease_explanation(findings),
+ "recommendations": recs,
+ "clinical_recommendations": recs, # Alias for backward compatibility
+ "alternative_diagnoses": self._build_alternative_diagnoses(findings),
+ "analysis": {
+ "biomarker_flags": self._build_biomarker_flags(findings),
+ "safety_alerts": self._build_safety_alerts(findings),
+ "key_drivers": self._build_key_drivers(findings),
+ "disease_explanation": self._build_disease_explanation(findings),
+ "recommendations": recs,
+ "confidence_assessment": self._build_confidence_assessment(findings),
+ "alternative_diagnoses": self._build_alternative_diagnoses(findings)
+ }
}
# Generate patient-friendly summary
@@ -63,7 +76,7 @@ class ResponseSynthesizerAgent:
response
)
- print(f"\n✓ Response synthesis complete")
+ print("\nResponse synthesis complete")
print(f" - Patient summary: Generated")
print(f" - Prediction explanation: {len(response['prediction_explanation']['key_drivers'])} key drivers")
print(f" - Recommendations: {len(response['clinical_recommendations']['immediate_actions'])} immediate actions")
@@ -125,6 +138,22 @@ class ResponseSynthesizerAgent:
"pathophysiology": disease_explanation.get('pathophysiology', ''),
"pdf_references": disease_explanation.get('citations', [])
}
+
+ def _build_biomarker_flags(self, findings: Dict) -> List[Dict]:
+ biomarker_analysis = findings.get("Biomarker Analyzer", {})
+ return biomarker_analysis.get('biomarker_flags', [])
+
+ def _build_key_drivers(self, findings: Dict) -> List[Dict]:
+ linker_findings = findings.get("Biomarker-Disease Linker", {})
+ return linker_findings.get('key_drivers', [])
+
+ def _build_disease_explanation(self, findings: Dict) -> Dict:
+ disease_explanation = findings.get("Disease Explainer", {})
+ return {
+ "pathophysiology": disease_explanation.get('pathophysiology', ''),
+ "citations": disease_explanation.get('citations', []),
+ "retrieved_chunks": disease_explanation.get('retrieved_chunks')
+ }
def _build_recommendations(self, findings: Dict) -> Dict:
"""Build clinical recommendations section"""
@@ -149,6 +178,10 @@ class ResponseSynthesizerAgent:
"assessment_summary": assessment.get('assessment_summary', ''),
"alternative_diagnoses": assessment.get('alternative_diagnoses', [])
}
+
+ def _build_alternative_diagnoses(self, findings: Dict) -> List[Dict]:
+ assessment = findings.get("Confidence Assessor", {})
+ return assessment.get('alternative_diagnoses', [])
def _build_safety_alerts(self, findings: Dict) -> List[Dict]:
"""Build safety alerts section"""
diff --git a/src/biomarker_normalization.py b/src/biomarker_normalization.py
new file mode 100644
index 0000000000000000000000000000000000000000..046cc0893645f58781f5500b14e95509e6fa5d0f
--- /dev/null
+++ b/src/biomarker_normalization.py
@@ -0,0 +1,93 @@
+"""
+MediGuard AI RAG-Helper
+Shared biomarker normalization utilities
+"""
+
+from typing import Dict
+
+# Normalization map for biomarker aliases to canonical names.
+NORMALIZATION_MAP: Dict[str, str] = {
+ # Glucose variations
+ "glucose": "Glucose",
+ "bloodsugar": "Glucose",
+ "bloodglucose": "Glucose",
+
+ # Lipid panel
+ "cholesterol": "Cholesterol",
+ "totalcholesterol": "Cholesterol",
+ "triglycerides": "Triglycerides",
+ "trig": "Triglycerides",
+ "ldl": "LDL Cholesterol",
+ "ldlcholesterol": "LDL Cholesterol",
+ "hdl": "HDL Cholesterol",
+ "hdlcholesterol": "HDL Cholesterol",
+
+ # Diabetes markers
+ "hba1c": "HbA1c",
+ "a1c": "HbA1c",
+ "hemoglobina1c": "HbA1c",
+ "insulin": "Insulin",
+
+ # Body metrics
+ "bmi": "BMI",
+ "bodymassindex": "BMI",
+
+ # Complete Blood Count (CBC)
+ "hemoglobin": "Hemoglobin",
+ "hgb": "Hemoglobin",
+ "hb": "Hemoglobin",
+ "platelets": "Platelets",
+ "plt": "Platelets",
+ "wbc": "White Blood Cells",
+ "whitebloodcells": "White Blood Cells",
+ "whitecells": "White Blood Cells",
+ "rbc": "Red Blood Cells",
+ "redbloodcells": "Red Blood Cells",
+ "redcells": "Red Blood Cells",
+ "hematocrit": "Hematocrit",
+ "hct": "Hematocrit",
+
+ # Red blood cell indices
+ "mcv": "Mean Corpuscular Volume",
+ "meancorpuscularvolume": "Mean Corpuscular Volume",
+ "mch": "Mean Corpuscular Hemoglobin",
+ "meancorpuscularhemoglobin": "Mean Corpuscular Hemoglobin",
+ "mchc": "Mean Corpuscular Hemoglobin Concentration",
+
+ # Cardiovascular
+ "heartrate": "Heart Rate",
+ "hr": "Heart Rate",
+ "pulse": "Heart Rate",
+ "systolicbp": "Systolic Blood Pressure",
+ "systolic": "Systolic Blood Pressure",
+ "sbp": "Systolic Blood Pressure",
+ "diastolicbp": "Diastolic Blood Pressure",
+ "diastolic": "Diastolic Blood Pressure",
+ "dbp": "Diastolic Blood Pressure",
+ "troponin": "Troponin",
+
+ # Inflammation and liver
+ "creactiveprotein": "C-reactive Protein",
+ "crp": "C-reactive Protein",
+ "alt": "ALT",
+ "alanineaminotransferase": "ALT",
+ "ast": "AST",
+ "aspartateaminotransferase": "AST",
+
+ # Kidney
+ "creatinine": "Creatinine",
+}
+
+
+def normalize_biomarker_name(name: str) -> str:
+ """
+ Normalize biomarker names to standard format.
+
+ Args:
+ name: Raw biomarker name from user input
+
+ Returns:
+ Standardized biomarker name
+ """
+ key = name.lower().replace(" ", "").replace("-", "").replace("_", "")
+ return NORMALIZATION_MAP.get(key, name)
diff --git a/src/biomarker_validator.py b/src/biomarker_validator.py
index 44381baa888d8c52adf669e44859ba1550eab64d..bc10080704642a2936c27d77358820e9e741da68 100644
--- a/src/biomarker_validator.py
+++ b/src/biomarker_validator.py
@@ -23,7 +23,7 @@ class BiomarkerValidator:
name: str,
value: float,
gender: Optional[str] = None,
- threshold_pct: float = 0.15
+ threshold_pct: float = 0.0
) -> BiomarkerFlag:
"""
Validate a single biomarker value against reference ranges.
@@ -32,7 +32,7 @@ class BiomarkerValidator:
name: Biomarker name
value: Measured value
gender: "male" or "female" (for gender-specific ranges)
- threshold_pct: Percentage deviation to flag as warning (0.15 = 15%)
+ threshold_pct: Only flag LOW/HIGH if deviation from boundary exceeds this fraction (e.g. 0.15 = 15%)
Returns:
BiomarkerFlag object with status and warnings
@@ -70,7 +70,7 @@ class BiomarkerValidator:
status = "NORMAL"
warning = None
- # Check critical values first
+ # Check critical values first (threshold_pct does not suppress critical alerts)
if critical_low and value < critical_low:
status = "CRITICAL_LOW"
warning = f"CRITICAL: {name} is {value} {unit}, below critical threshold of {critical_low} {unit}. {ref['clinical_significance'].get('low', 'Seek immediate medical attention.')}"
@@ -78,13 +78,13 @@ class BiomarkerValidator:
status = "CRITICAL_HIGH"
warning = f"CRITICAL: {name} is {value} {unit}, above critical threshold of {critical_high} {unit}. {ref['clinical_significance'].get('high', 'Seek immediate medical attention.')}"
elif value < min_val:
- # Check if it's within threshold percentage
- deviation = (min_val - value) / min_val if min_val > 0 else 1
+ # Only flag if deviation exceeds threshold_pct fraction of the boundary
+ deviation = (min_val - value) / min_val if min_val != 0 else 1.0
if deviation > threshold_pct:
status = "LOW"
warning = f"{name} is {value} {unit}, below normal range ({min_val}-{max_val} {unit}). {ref['clinical_significance'].get('low', '')}"
elif value > max_val:
- deviation = (value - max_val) / max_val if max_val > 0 else 1
+ deviation = (value - max_val) / max_val if max_val != 0 else 1.0
if deviation > threshold_pct:
status = "HIGH"
warning = f"{name} is {value} {unit}, above normal range ({min_val}-{max_val} {unit}). {ref['clinical_significance'].get('high', '')}"
@@ -104,11 +104,16 @@ class BiomarkerValidator:
self,
biomarkers: Dict[str, float],
gender: Optional[str] = None,
- threshold_pct: float = 0.15
+ threshold_pct: float = 0.0
) -> Tuple[List[BiomarkerFlag], List[SafetyAlert]]:
"""
Validate all biomarker values.
+ Args:
+ biomarkers: Dict of biomarker name -> value
+ gender: "male" or "female" (for gender-specific ranges)
+ threshold_pct: Only flag LOW/HIGH if deviation exceeds this fraction (e.g. 0.15 = 15%)
+
Returns:
Tuple of (biomarker_flags, safety_alerts)
"""
@@ -141,6 +146,10 @@ class BiomarkerValidator:
def get_biomarker_info(self, name: str) -> Optional[Dict]:
"""Get reference information for a biomarker"""
return self.references.get(name)
+
+ def expected_biomarker_count(self) -> int:
+ """Return expected number of biomarkers from reference ranges."""
+ return len(self.references)
def get_disease_relevant_biomarkers(self, disease: str) -> List[str]:
"""
diff --git a/src/evaluation/evaluators.py b/src/evaluation/evaluators.py
index 7b15e0323c49e85e034d7768dc11963c8eff295c..74fbb942eff77875dc1ca9ecececb7cee4e3d5c8 100644
--- a/src/evaluation/evaluators.py
+++ b/src/evaluation/evaluators.py
@@ -35,8 +35,8 @@ class EvaluationResult(BaseModel):
def average_score(self) -> float:
"""Calculate average of all 5 dimensions"""
- import numpy as np
- return float(np.mean(self.to_vector()))
+ scores = self.to_vector()
+ return sum(scores) / len(scores) if scores else 0.0
# Evaluator 1: Clinical Accuracy (LLM-as-Judge)
@@ -98,7 +98,7 @@ Respond ONLY with valid JSON in this format:
content = result.content if isinstance(result.content, str) else str(result.content)
parsed = json.loads(content)
return GradedScore(score=parsed['score'], reasoning=parsed['reasoning'])
- except:
+ except (json.JSONDecodeError, KeyError, TypeError):
# Fallback if JSON parsing fails
return GradedScore(score=0.85, reasoning="Medical interpretations appear accurate and evidence-based.")
@@ -196,7 +196,7 @@ Respond ONLY with valid JSON in this format:
try:
parsed = json.loads(result.content if isinstance(result.content, str) else str(result.content))
return GradedScore(score=parsed['score'], reasoning=parsed['reasoning'])
- except:
+ except (json.JSONDecodeError, KeyError, TypeError):
# Fallback if JSON parsing fails
return GradedScore(score=0.90, reasoning="Recommendations are clear, actionable, and appropriately prioritized.")
@@ -313,16 +313,16 @@ def evaluate_safety_completeness(
# Scoring
alert_score = min(1.0, alert_count / max(1, out_of_range_count))
- critical_score = critical_coverage
+ critical_score = min(1.0, critical_coverage)
disclaimer_score = 1.0 if has_disclaimer else 0.0
uncertainty_score = 1.0 if acknowledges_uncertainty else 0.5
- final_score = (
+ final_score = min(1.0, (
alert_score * 0.4 +
critical_score * 0.3 +
disclaimer_score * 0.2 +
uncertainty_score * 0.1
- )
+ ))
reasoning = f"""
Out-of-range biomarkers: {out_of_range_count}
@@ -354,7 +354,13 @@ def run_full_evaluation(
pubmed_context = ""
for output in agent_outputs:
if output.agent_name == "Disease Explainer":
- pubmed_context = output.findings
+ findings = output.findings
+ if isinstance(findings, dict):
+ pubmed_context = findings.get('mechanism_summary', '') or findings.get('pathophysiology', '')
+ elif isinstance(findings, str):
+ pubmed_context = findings
+ else:
+ pubmed_context = str(findings)
break
# Run all evaluators
diff --git a/src/evolution/director.py b/src/evolution/director.py
index 42ba7ad7738bf25675c0731c031f1aff865b1071..9c5b829853e57730a7e11e283d3080a6a7eefd82 100644
--- a/src/evolution/director.py
+++ b/src/evolution/director.py
@@ -4,7 +4,7 @@ Outer Loop Director for SOP Evolution
"""
import json
-from typing import List, Dict, Any, Optional, Literal, Callable
+from typing import Any, Callable, Dict, List, Literal, Optional
from pydantic import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate
from src.config import ExplanationSOP
diff --git a/src/evolution/pareto.py b/src/evolution/pareto.py
index 6d1e71457a858e386893abed4a7fa8ccd21ec37d..1716ab64a7bb549c239036398fa814d5370bd041 100644
--- a/src/evolution/pareto.py
+++ b/src/evolution/pareto.py
@@ -8,11 +8,6 @@ from typing import List, Dict, Any
import matplotlib
matplotlib.use('Agg') # Use non-interactive backend
import matplotlib.pyplot as plt
-try:
- import pandas as pd
- HAS_PANDAS = True
-except ImportError:
- HAS_PANDAS = False
def identify_pareto_front(gene_pool_entries: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
diff --git a/src/llm_config.py b/src/llm_config.py
index d6fbe721bf0ddb8f0ffaee84034f959273af281f..01ee509736b96bd389ce9112adb3381dd79fa4df 100644
--- a/src/llm_config.py
+++ b/src/llm_config.py
@@ -83,7 +83,10 @@ def get_chat_model(
)
elif provider == "ollama":
- from langchain_community.chat_models import ChatOllama
+ try:
+ from langchain_ollama import ChatOllama
+ except ImportError:
+ from langchain_community.chat_models import ChatOllama
model = model or "llama3.1:8b"
@@ -114,7 +117,7 @@ def get_embedding_model(provider: Literal["google", "huggingface", "ollama"] = N
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
- print("⚠️ GOOGLE_API_KEY not found. Falling back to HuggingFace embeddings.")
+ print("WARN: GOOGLE_API_KEY not found. Falling back to HuggingFace embeddings.")
return get_embedding_model("huggingface")
try:
@@ -123,19 +126,25 @@ def get_embedding_model(provider: Literal["google", "huggingface", "ollama"] = N
google_api_key=api_key
)
except Exception as e:
- print(f"⚠️ Google embeddings failed: {e}")
- print(" Falling back to HuggingFace embeddings...")
+ print(f"WARN: Google embeddings failed: {e}")
+ print("INFO: Falling back to HuggingFace embeddings...")
return get_embedding_model("huggingface")
elif provider == "huggingface":
- from langchain_community.embeddings import HuggingFaceEmbeddings
+ try:
+ from langchain_huggingface import HuggingFaceEmbeddings
+ except ImportError:
+ from langchain_community.embeddings import HuggingFaceEmbeddings
return HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
elif provider == "ollama":
- from langchain_community.embeddings import OllamaEmbeddings
+ try:
+ from langchain_ollama import OllamaEmbeddings
+ except ImportError:
+ from langchain_community.embeddings import OllamaEmbeddings
return OllamaEmbeddings(model="nomic-embed-text")
@@ -256,6 +265,8 @@ class LLMConfig:
def get_synthesizer(self, model_name: str = None):
"""Get synthesizer model (for backward compatibility)"""
+ if model_name:
+ return get_chat_model(provider=self.provider, model=model_name, temperature=0.2)
return self.synthesizer_8b
def print_config(self):
@@ -288,7 +299,7 @@ def check_api_connection():
if provider == "groq":
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
- print("✗ GROQ_API_KEY not set")
+ print("WARN: GROQ_API_KEY not set")
print("\n Get your FREE API key at:")
print(" https://console.groq.com/keys")
return False
@@ -296,31 +307,34 @@ def check_api_connection():
# Test connection
test_model = get_chat_model("groq")
response = test_model.invoke("Say 'OK' in one word")
- print("✓ Groq API connection successful")
+ print("OK: Groq API connection successful")
return True
elif provider == "gemini":
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
- print("✗ GOOGLE_API_KEY not set")
+ print("WARN: GOOGLE_API_KEY not set")
print("\n Get your FREE API key at:")
print(" https://aistudio.google.com/app/apikey")
return False
test_model = get_chat_model("gemini")
response = test_model.invoke("Say 'OK' in one word")
- print("✓ Google Gemini API connection successful")
+ print("OK: Google Gemini API connection successful")
return True
else:
- from langchain_community.chat_models import ChatOllama
+ try:
+ from langchain_ollama import ChatOllama
+ except ImportError:
+ from langchain_community.chat_models import ChatOllama
test_model = ChatOllama(model="llama3.1:8b")
response = test_model.invoke("Hello")
- print("✓ Ollama connection successful")
+ print("OK: Ollama connection successful")
return True
except Exception as e:
- print(f"✗ Connection failed: {e}")
+ print(f"ERROR: Connection failed: {e}")
return False
diff --git a/src/pdf_processor.py b/src/pdf_processor.py
index 75a67c07bcf42d5a810569e33f3cb873f2e049a7..76c506bf3c456a2c35bbc19e1c280770e48a2816 100644
--- a/src/pdf_processor.py
+++ b/src/pdf_processor.py
@@ -4,15 +4,20 @@ PDF document processing and vector store creation
"""
import os
+import warnings
from pathlib import Path
from typing import List, Optional, Literal
from langchain_community.document_loaders import PyPDFLoader, DirectoryLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_core.documents import Document
from dotenv import load_dotenv
import time
+# Suppress noisy warnings
+warnings.filterwarnings("ignore", message=".*class.*HuggingFaceEmbeddings.*was deprecated.*")
+os.environ.setdefault("HF_HUB_DISABLE_IMPLICIT_TOKEN", "1")
+
# Load environment variables
load_dotenv()
@@ -34,20 +39,20 @@ def get_embedding_model(provider: Literal["google", "huggingface", "ollama"] = N
api_key = os.getenv("GOOGLE_API_KEY")
if not api_key:
- print("⚠️ GOOGLE_API_KEY not found in .env file")
- print(" Get FREE API key: https://aistudio.google.com/app/apikey")
- print(" Falling back to HuggingFace local embeddings...\n")
+ print("WARN: GOOGLE_API_KEY not found in .env file")
+ print("INFO: Get FREE API key: https://aistudio.google.com/app/apikey")
+ print("INFO: Falling back to HuggingFace local embeddings...\n")
return get_embedding_model("huggingface")
try:
- print("✓ Using Google Gemini embeddings (FREE, fast)")
+ print("INFO: Using Google Gemini embeddings (FREE, fast)")
return GoogleGenerativeAIEmbeddings(
model="models/text-embedding-004",
google_api_key=api_key
)
except Exception as e:
- print(f"⚠️ Google embeddings failed: {e}")
- print(" Falling back to HuggingFace local embeddings...\n")
+ print(f"WARN: Google embeddings failed: {e}")
+ print("INFO: Falling back to HuggingFace local embeddings...\n")
return get_embedding_model("huggingface")
elif provider == "huggingface":
@@ -56,15 +61,18 @@ def get_embedding_model(provider: Literal["google", "huggingface", "ollama"] = N
except ImportError:
from langchain_community.embeddings import HuggingFaceEmbeddings
- print("✓ Using HuggingFace local embeddings (free, offline)")
+ print("INFO: Using HuggingFace local embeddings (free, offline)")
return HuggingFaceEmbeddings(
model_name="sentence-transformers/all-MiniLM-L6-v2"
)
elif provider == "ollama":
- from langchain_community.embeddings import OllamaEmbeddings
+ try:
+ from langchain_ollama import OllamaEmbeddings
+ except ImportError:
+ from langchain_community.embeddings import OllamaEmbeddings
- print("✓ Using local Ollama embeddings (requires Ollama running)")
+ print("INFO: Using local Ollama embeddings (requires Ollama running)")
return OllamaEmbeddings(model="nomic-embed-text")
else:
@@ -119,8 +127,8 @@ class PDFProcessor:
pdf_files = list(self.pdf_directory.glob("*.pdf"))
if not pdf_files:
- print(f"⚠️ No PDF files found in {self.pdf_directory}")
- print(f" Please place medical PDFs in this directory")
+ print(f"WARN: No PDF files found in {self.pdf_directory}")
+ print("INFO: Please place medical PDFs in this directory")
return []
print(f"Found {len(pdf_files)} PDF file(s):")
@@ -140,10 +148,10 @@ class PDFProcessor:
doc.metadata['source_path'] = str(pdf_path)
documents.extend(docs)
- print(f" ✓ Loaded {len(docs)} pages from {pdf_path.name}")
+ print(f" OK: Loaded {len(docs)} pages from {pdf_path.name}")
except Exception as e:
- print(f" ✗ Error loading {pdf_path.name}: {e}")
+ print(f" ERROR: Error loading {pdf_path.name}: {e}")
print(f"\nTotal: {len(documents)} pages loaded from {len(pdf_files)} PDF(s)")
return documents
@@ -166,7 +174,7 @@ class PDFProcessor:
for i, chunk in enumerate(chunks):
chunk.metadata['chunk_id'] = i
- print(f"✓ Created {len(chunks)} chunks from {len(documents)} pages")
+ print(f"OK: Created {len(chunks)} chunks from {len(documents)} pages")
print(f" Average chunk size: {sum(len(c.page_content) for c in chunks) // len(chunks)} characters")
return chunks
@@ -202,7 +210,7 @@ class PDFProcessor:
save_path = self.vector_store_path / f"{store_name}.faiss"
vector_store.save_local(str(self.vector_store_path), index_name=store_name)
- print(f"✓ Vector store created and saved to: {save_path}")
+ print(f"OK: Vector store created and saved to: {save_path}")
return vector_store
@@ -224,7 +232,7 @@ class PDFProcessor:
store_path = self.vector_store_path / f"{store_name}.faiss"
if not store_path.exists():
- print(f"⚠️ Vector store not found: {store_path}")
+ print(f"WARN: Vector store not found: {store_path}")
return None
try:
@@ -234,11 +242,11 @@ class PDFProcessor:
index_name=store_name,
allow_dangerous_deserialization=True
)
- print(f"✓ Loaded vector store from: {store_path}")
+ print(f"OK: Loaded vector store from: {store_path}")
return vector_store
except Exception as e:
- print(f"✗ Error loading vector store: {e}")
+ print(f"ERROR: Error loading vector store: {e}")
return None
def create_retrievers(
@@ -270,7 +278,7 @@ class PDFProcessor:
documents = self.load_pdfs()
if not documents:
- print("⚠️ No documents to process. Please add PDF files.")
+ print("WARN: No documents to process. Please add PDF files.")
return {}
chunks = self.chunk_documents(documents)
@@ -292,7 +300,7 @@ class PDFProcessor:
)
}
- print(f"\n✓ Created {len(retrievers)} specialized retrievers")
+ print(f"\nOK: Created {len(retrievers)} specialized retrievers")
return retrievers
@@ -327,9 +335,9 @@ def setup_knowledge_base(embedding_model=None, force_rebuild: bool = False, use_
)
if retrievers:
- print("\n✓ Knowledge base setup complete!")
+ print("\nOK: Knowledge base setup complete!")
else:
- print("\n⚠️ Knowledge base setup incomplete. Add PDFs and try again.")
+ print("\nWARN: Knowledge base setup incomplete. Add PDFs and try again.")
print("=" * 60)
@@ -376,5 +384,5 @@ if __name__ == "__main__":
)
if retrievers:
- print("\n✓ PDF processing test successful!")
+ print("\nOK: PDF processing test successful!")
print(f"Available retrievers: {list(retrievers.keys())}")
diff --git a/src/state.py b/src/state.py
index cc9fb5c08e63b7201d46c9016e1c11e5d07a0e3a..91dfbfec7e7e4b12cb812f4e4e8d7f104570309b 100644
--- a/src/state.py
+++ b/src/state.py
@@ -63,6 +63,7 @@ class GuildState(TypedDict):
agent_outputs: Annotated[List[AgentOutput], operator.add]
biomarker_flags: Annotated[List[BiomarkerFlag], operator.add]
safety_alerts: Annotated[List[SafetyAlert], operator.add]
+ biomarker_analysis: Optional[Dict[str, Any]]
# === Final Structured Output ===
final_response: Optional[Dict[str, Any]]
@@ -80,11 +81,11 @@ class PatientInput(BaseModel):
model_prediction: Dict[str, Any] # Contains: disease, confidence, probabilities
- patient_context: Optional[Dict[str, Any]] = {
- "age": None,
- "gender": None, # "male" or "female"
- "bmi": None
- }
+ patient_context: Optional[Dict[str, Any]] = None
+
+ def model_post_init(self, __context: Any) -> None:
+ if self.patient_context is None:
+ self.patient_context = {"age": None, "gender": None, "bmi": None}
model_config = ConfigDict(json_schema_extra={
"example": {
diff --git a/src/workflow.py b/src/workflow.py
index b7da3464318bce05f61c048605dc3c1c15671cb8..29f4d859468c239c8fd1b0acefcba19713690cd0 100644
--- a/src/workflow.py
+++ b/src/workflow.py
@@ -43,11 +43,11 @@ class ClinicalInsightGuild:
self.confidence_assessor = confidence_assessor_agent
self.response_synthesizer = response_synthesizer_agent
- print("✓ All agents initialized successfully")
+ print("All agents initialized successfully")
# Build workflow graph
self.workflow = self._build_workflow()
- print("✓ Workflow graph compiled")
+ print("Workflow graph compiled")
print("="*70 + "\n")
def _build_workflow(self):
@@ -130,6 +130,7 @@ class ClinicalInsightGuild:
'biomarker_flags': [],
'safety_alerts': [],
'final_response': None,
+ 'biomarker_analysis': None,
'processing_timestamp': datetime.now().isoformat(),
'sop_version': "Baseline"
}
@@ -141,10 +142,12 @@ class ClinicalInsightGuild:
print("COMPLETED: Clinical Insight Guild Workflow")
print("="*70)
print(f"Total Agents Executed: {len(final_state.get('agent_outputs', []))}")
- print("✓ Workflow execution successful")
+ print("Workflow execution successful")
print("="*70 + "\n")
- return final_state.get('final_response', {})
+ # Return full state so callers can access agent_outputs,
+ # biomarker_flags, safety_alerts, and final_response
+ return dict(final_state)
def create_guild() -> ClinicalInsightGuild:
@@ -156,5 +159,5 @@ if __name__ == "__main__":
# Test workflow initialization
print("Testing Clinical Insight Guild initialization...")
guild = create_guild()
- print("\n✓ Guild initialization successful!")
+ print("\nGuild initialization successful!")
print("Ready to process patient inputs.")
diff --git a/tests/test_basic.py b/tests/test_basic.py
index b46c77dc288694841c2b4deb5d28311dfa5b8a03..b8a3ae2dbc328892f282e3c200a11dd5d3399193 100644
--- a/tests/test_basic.py
+++ b/tests/test_basic.py
@@ -12,19 +12,19 @@ print("Testing imports...")
try:
from src.state import PatientInput
- print("✓ PatientInput imported")
+ print("PatientInput imported")
from src.config import BASELINE_SOP
- print("✓ BASELINE_SOP imported")
+ print("BASELINE_SOP imported")
from src.pdf_processor import get_all_retrievers
- print("✓ get_all_retrievers imported")
+ print("get_all_retrievers imported")
from src.llm_config import llm_config
- print("✓ llm_config imported")
+ print("llm_config imported")
from src.biomarker_validator import BiomarkerValidator
- print("✓ BiomarkerValidator imported")
+ print("BiomarkerValidator imported")
print("\n" + "="*70)
print("ALL IMPORTS SUCCESSFUL")
@@ -33,7 +33,7 @@ try:
# Test retrievers
print("\nTesting retrievers...")
retrievers = get_all_retrievers(force_rebuild=False)
- print(f"✓ Retrieved {len(retrievers)} retrievers")
+ print(f"Retrieved {len(retrievers)} retrievers")
print(f" Available: {list(retrievers.keys())}")
# Test patient input creation
@@ -43,7 +43,7 @@ try:
model_prediction={"disease": "Type 2 Diabetes", "confidence": 0.87, "probabilities": {}},
patient_context={"age": 52, "gender": "male", "bmi": 31.2}
)
- print(f"✓ PatientInput created")
+ print("PatientInput created")
print(f" Disease: {patient.model_prediction['disease']}")
print(f" Confidence: {patient.model_prediction['confidence']:.1%}")
@@ -51,7 +51,7 @@ try:
print("\nTesting BiomarkerValidator...")
validator = BiomarkerValidator()
flags, alerts = validator.validate_all(patient.biomarkers, patient.patient_context.get('gender', 'male'))
- print(f"✓ Validator working")
+ print("Validator working")
print(f" Flags: {len(flags)}")
print(f" Alerts: {len(alerts)}")
@@ -62,7 +62,7 @@ try:
print("All core components are functional and ready.")
except Exception as e:
- print(f"\n✗ ERROR: {e}")
+ print(f"\nERROR: {e}")
import traceback
traceback.print_exc()
diff --git a/tests/test_citation_guardrails.py b/tests/test_citation_guardrails.py
new file mode 100644
index 0000000000000000000000000000000000000000..577bac2cc585412326cd5ff02d36a1367920ffa4
--- /dev/null
+++ b/tests/test_citation_guardrails.py
@@ -0,0 +1,26 @@
+from src.agents.disease_explainer import create_disease_explainer_agent
+
+
+class EmptyRetriever:
+ def __init__(self):
+ self.search_kwargs = {"k": 3}
+
+ def invoke(self, query):
+ return []
+
+
+class StubSOP:
+ disease_explainer_k = 3
+ require_pdf_citations = True
+
+
+def test_disease_explainer_requires_citations():
+ agent = create_disease_explainer_agent(EmptyRetriever())
+ state = {
+ "model_prediction": {"disease": "Diabetes", "confidence": 0.6},
+ "sop": StubSOP()
+ }
+ result = agent.explain(state)
+ findings = result["agent_outputs"][0].findings
+ assert findings["citations"] == []
+ assert "insufficient" in findings["pathophysiology"].lower()
diff --git a/tests/test_codebase_fixes.py b/tests/test_codebase_fixes.py
new file mode 100644
index 0000000000000000000000000000000000000000..fb1b3ce6ad350d7767d385a428a145078efcd342
--- /dev/null
+++ b/tests/test_codebase_fixes.py
@@ -0,0 +1,136 @@
+"""
+Tests for codebase fixes: confidence cap, validator, thresholds, schema validation
+"""
+import sys
+from pathlib import Path
+import json
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from api.app.services.extraction import predict_disease_simple as api_predict
+from scripts.chat import predict_disease_simple as cli_predict
+from src.biomarker_validator import BiomarkerValidator
+from api.app.models.schemas import StructuredAnalysisRequest, HealthResponse
+
+
+# ============================================================================
+# Confidence cap tests
+# ============================================================================
+
+class TestConfidenceCap:
+ """Verify confidence never exceeds 1.0"""
+
+ def test_api_confidence_capped_at_one(self):
+ # Glucose>126 (+0.4), Glucose>180 (+0.2), HbA1c>=6.5 (+0.5) = 1.1 raw
+ result = api_predict({"Glucose": 200, "HbA1c": 7.0})
+ assert result["confidence"] <= 1.0
+
+ def test_cli_confidence_capped_at_one(self):
+ result = cli_predict({"Glucose": 200, "HbA1c": 7.0})
+ assert result["confidence"] <= 1.0
+
+ def test_confidence_is_exactly_one_for_high_diabetes(self):
+ result = api_predict({"Glucose": 200, "HbA1c": 7.0})
+ assert result["confidence"] == 1.0
+
+ def test_confidence_not_capped_when_below_one(self):
+ result = api_predict({"Glucose": 130})
+ assert result["confidence"] == 0.4
+
+
+# ============================================================================
+# Updated critical threshold tests
+# ============================================================================
+
+class TestCriticalThresholds:
+ """Verify biomarker_references.json has clinically appropriate critical thresholds"""
+
+ def setup_method(self):
+ config_path = Path(__file__).parent.parent / "config" / "biomarker_references.json"
+ with open(config_path) as f:
+ self.refs = json.load(f)["biomarkers"]
+
+ def test_glucose_critical_high_is_emergency(self):
+ assert self.refs["Glucose"]["critical_high"] >= 300
+
+ def test_glucose_critical_low_is_emergency(self):
+ assert self.refs["Glucose"]["critical_low"] <= 54
+
+ def test_hba1c_critical_high_is_emergency(self):
+ assert self.refs["HbA1c"]["critical_high"] >= 10
+
+ def test_troponin_critical_high_above_normal(self):
+ normal_max = self.refs["Troponin"]["normal_range"]["max"]
+ assert self.refs["Troponin"]["critical_high"] > normal_max
+
+ def test_bmi_critical_high_is_morbid(self):
+ assert self.refs["BMI"]["critical_high"] >= 40
+
+ def test_systolic_bp_critical_high_is_crisis(self):
+ assert self.refs["Systolic Blood Pressure"]["critical_high"] >= 180
+
+ def test_diastolic_bp_critical_low_is_shock(self):
+ assert self.refs["Diastolic Blood Pressure"]["critical_low"] <= 40
+
+
+# ============================================================================
+# Validator threshold removal tests
+# ============================================================================
+
+class TestValidatorNoThreshold:
+ """Verify validator flags all out-of-range values (no 15% threshold)"""
+
+ def setup_method(self):
+ self.validator = BiomarkerValidator()
+
+ def test_slightly_high_glucose_is_flagged(self):
+ """Glucose=105 is above normal max=100 — should be HIGH, not NORMAL"""
+ flag = self.validator.validate_biomarker("Glucose", 105.0)
+ assert flag.status == "HIGH"
+
+ def test_slightly_low_hemoglobin_is_flagged(self):
+ """Hemoglobin=13.0 for male (min=13.5) should be LOW"""
+ flag = self.validator.validate_biomarker("Hemoglobin", 13.0, gender="male")
+ assert flag.status == "LOW"
+
+ def test_normal_glucose_stays_normal(self):
+ flag = self.validator.validate_biomarker("Glucose", 90.0)
+ assert flag.status == "NORMAL"
+
+ def test_critical_high_glucose_flagged(self):
+ flag = self.validator.validate_biomarker("Glucose", 500.0)
+ assert flag.status == "CRITICAL_HIGH"
+
+ def test_high_glucose_200_not_critical(self):
+ """Glucose=200 is above normal but below critical_high=400"""
+ flag = self.validator.validate_biomarker("Glucose", 200.0)
+ assert flag.status == "HIGH"
+
+
+# ============================================================================
+# Pydantic schema validation tests
+# ============================================================================
+
+class TestSchemaValidation:
+ """Verify Pydantic models enforce constraints correctly"""
+
+ def test_structured_request_rejects_empty_biomarkers(self):
+ import pytest
+ with pytest.raises(Exception):
+ StructuredAnalysisRequest(biomarkers={})
+
+ def test_structured_request_accepts_valid_biomarkers(self):
+ req = StructuredAnalysisRequest(biomarkers={"Glucose": 100.0})
+ assert req.biomarkers == {"Glucose": 100.0}
+
+ def test_health_response_uses_llm_status_field(self):
+ resp = HealthResponse(
+ status="healthy",
+ timestamp="2025-01-01T00:00:00",
+ llm_status="connected",
+ vector_store_loaded=True,
+ available_models=["test"],
+ uptime_seconds=100.0,
+ version="1.0.0"
+ )
+ assert resp.llm_status == "connected"
diff --git a/tests/test_diabetes_patient.py b/tests/test_diabetes_patient.py
index df6bbdaff075f4adc079440c59376aad0c3b8d8a..dc66e5a9d9456eee88ff6843cca47245f7aaa655 100644
--- a/tests/test_diabetes_patient.py
+++ b/tests/test_diabetes_patient.py
@@ -166,9 +166,14 @@ def run_test():
print("-" * 70)
if response['safety_alerts']:
for alert in response['safety_alerts']:
- severity = alert.get('severity', alert.get('priority', 'UNKNOWN'))
- biomarker = alert.get('biomarker', 'General')
- message = alert.get('message', str(alert))
+ if hasattr(alert, 'severity'):
+ severity = alert.severity
+ biomarker = alert.biomarker or 'General'
+ message = alert.message
+ else:
+ severity = alert.get('severity', alert.get('priority', 'UNKNOWN'))
+ biomarker = alert.get('biomarker', 'General')
+ message = alert.get('message', str(alert))
print(f" [{severity}] {biomarker}: {message}")
else:
print(" No safety alerts")
@@ -180,10 +185,20 @@ def run_test():
print(f"System: {response['metadata']['system_version']}")
print(f"Agents: {', '.join(response['metadata']['agents_executed'])}")
- # Save response to file
+ # Save response to file (convert Pydantic objects to dicts for serialization)
+ def _to_serializable(obj):
+ """Recursively convert Pydantic models and non-serializable objects to dicts."""
+ if hasattr(obj, 'model_dump'):
+ return obj.model_dump()
+ elif isinstance(obj, dict):
+ return {k: _to_serializable(v) for k, v in obj.items()}
+ elif isinstance(obj, list):
+ return [_to_serializable(item) for item in obj]
+ return obj
+
output_file = Path(__file__).parent / "test_output_diabetes.json"
with open(output_file, 'w', encoding='utf-8') as f:
- json.dump(response, f, indent=2, ensure_ascii=False)
+ json.dump(_to_serializable(response), f, indent=2, ensure_ascii=False, default=str)
print(f"\n✓ Full response saved to: {output_file}")
print("\n" + "="*70)
diff --git a/tests/test_evaluation_system.py b/tests/test_evaluation_system.py
index 927218b9784003387043b6ede57c7f575f82a3c3..f1422dd31deb64df0e367d03af217ffb4793759a 100644
--- a/tests/test_evaluation_system.py
+++ b/tests/test_evaluation_system.py
@@ -182,18 +182,19 @@ def test_evaluation_system():
if all_valid:
print("\n" + "=" * 80)
- print("🎉 ALL EVALUATORS PASSED VALIDATION")
+ print("All evaluators passed validation")
print("=" * 80)
else:
print("\n" + "=" * 80)
- print("⚠️ SOME EVALUATORS FAILED VALIDATION")
+ print("Some evaluators failed validation")
print("=" * 80)
- return evaluation_result
+ assert all_valid, "Some evaluators had scores out of valid range"
+ assert avg_score > 0.0, "Average evaluation score should be positive"
except Exception as e:
print("\n" + "=" * 80)
- print("❌ EVALUATION FAILED")
+ print("Evaluation failed")
print("=" * 80)
print(f"\nError: {type(e).__name__}: {str(e)}")
import traceback
@@ -202,6 +203,6 @@ def test_evaluation_system():
if __name__ == "__main__":
- print("\n🚀 Starting 5D Evaluation System Test\n")
- result = test_evaluation_system()
- print("\n✅ Test completed successfully!")
+ print("\nStarting 5D Evaluation System Test\n")
+ test_evaluation_system()
+ print("\nTest completed successfully!")
diff --git a/tests/test_json_parsing.py b/tests/test_json_parsing.py
new file mode 100644
index 0000000000000000000000000000000000000000..27c4fe6b8360fd522af3472d1da3f7ab953db1a2
--- /dev/null
+++ b/tests/test_json_parsing.py
@@ -0,0 +1,7 @@
+from api.app.services.extraction import _parse_llm_json
+
+
+def test_parse_llm_json_recovers_embedded_object():
+ content = "Here is your JSON:\n```json\n{\"biomarkers\": {\"Glucose\": 140}}\n```"
+ parsed = _parse_llm_json(content)
+ assert parsed["biomarkers"]["Glucose"] == 140
diff --git a/tests/test_llm_config.py b/tests/test_llm_config.py
new file mode 100644
index 0000000000000000000000000000000000000000..6e0857b9b0209d3d767be795ccd97949079fd7c1
--- /dev/null
+++ b/tests/test_llm_config.py
@@ -0,0 +1,44 @@
+"""
+Tests for Task 7: Model Selection Centralization
+"""
+import sys
+from pathlib import Path
+
+sys.path.insert(0, str(Path(__file__).parent.parent))
+
+from src.llm_config import LLMConfig
+
+
+def test_get_synthesizer_returns_not_none():
+ """get_synthesizer should return a model (may need API key — skip if unavailable)"""
+ config = LLMConfig(lazy=True)
+ try:
+ model = config.get_synthesizer()
+ assert model is not None
+ except (ValueError, ImportError):
+ # API keys may not be configured in CI
+ import pytest
+ pytest.skip("LLM provider not configured, skipping")
+
+
+def test_get_synthesizer_with_model_name():
+ """get_synthesizer with custom model should not raise (validates dispatch)"""
+ config = LLMConfig(lazy=True)
+ try:
+ model = config.get_synthesizer(model_name="llama-3.3-70b-versatile")
+ assert model is not None
+ except (ValueError, ImportError):
+ import pytest
+ pytest.skip("LLM provider not configured, skipping")
+
+
+def test_llm_config_has_synthesizer_property():
+ """LLMConfig should expose synthesizer_8b via property"""
+ assert hasattr(LLMConfig, "synthesizer_8b")
+
+
+def test_llm_config_has_all_properties():
+ """Verify all expected model properties exist"""
+ expected = ["planner", "analyzer", "explainer", "synthesizer_7b", "synthesizer_8b", "director", "embedding_model"]
+ for prop_name in expected:
+ assert hasattr(LLMConfig, prop_name), f"Missing property: {prop_name}"
diff --git a/tests/test_normalization.py b/tests/test_normalization.py
new file mode 100644
index 0000000000000000000000000000000000000000..37e5f70b9089201ed8215acad211ffc9f654696b
--- /dev/null
+++ b/tests/test_normalization.py
@@ -0,0 +1,7 @@
+from src.biomarker_normalization import normalize_biomarker_name
+
+
+def test_normalizes_common_aliases():
+ assert normalize_biomarker_name("ldl") == "LDL Cholesterol"
+ assert normalize_biomarker_name("wbc") == "White Blood Cells"
+ assert normalize_biomarker_name("systolic bp") == "Systolic Blood Pressure"
diff --git a/tests/test_prediction_confidence.py b/tests/test_prediction_confidence.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9e82c105592fffcf1735326aed42b6df6752dc1
--- /dev/null
+++ b/tests/test_prediction_confidence.py
@@ -0,0 +1,11 @@
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent / "api"))
+
+from app.services.extraction import predict_disease_simple
+
+
+def test_low_confidence_returns_undetermined():
+ result = predict_disease_simple({})
+ assert result["confidence"] == 0.0
+ assert result["disease"] == "Undetermined"
diff --git a/tests/test_response_mapping.py b/tests/test_response_mapping.py
new file mode 100644
index 0000000000000000000000000000000000000000..361d1299d773336f9c81c5ba00e913e55ec7108c
--- /dev/null
+++ b/tests/test_response_mapping.py
@@ -0,0 +1,55 @@
+import sys
+from pathlib import Path
+sys.path.insert(0, str(Path(__file__).parent.parent / "api"))
+
+from app.services.ragbot import RagBotService
+
+
+def test_format_response_uses_synthesizer_payload():
+ service = RagBotService()
+ workflow_result = {
+ "final_response": {
+ "biomarker_flags": [
+ {
+ "name": "Glucose",
+ "value": 120,
+ "unit": "mg/dL",
+ "status": "HIGH",
+ "reference_range": "70-100 mg/dL",
+ "warning": None
+ }
+ ],
+ "safety_alerts": [],
+ "key_drivers": [],
+ "disease_explanation": {
+ "pathophysiology": "",
+ "citations": [],
+ "retrieved_chunks": None
+ },
+ "recommendations": {
+ "immediate_actions": [],
+ "lifestyle_changes": [],
+ "monitoring": []
+ },
+ "confidence_assessment": {
+ "prediction_reliability": "LOW",
+ "evidence_strength": "WEAK",
+ "limitations": []
+ },
+ "patient_summary": {"narrative": ""}
+ },
+ "biomarker_flags": [],
+ "safety_alerts": []
+ }
+
+ response = service._format_response(
+ request_id="req_test",
+ workflow_result=workflow_result,
+ input_biomarkers={"Glucose": 120},
+ extracted_biomarkers=None,
+ patient_context={},
+ model_prediction={"disease": "Diabetes", "confidence": 0.6, "probabilities": {}},
+ processing_time_ms=10.0
+ )
+
+ assert response.analysis.biomarker_flags[0].name == "Glucose"
diff --git a/tests/test_state_fields.py b/tests/test_state_fields.py
new file mode 100644
index 0000000000000000000000000000000000000000..413d3bd5be8eaa39677a8b615880abbce04447b9
--- /dev/null
+++ b/tests/test_state_fields.py
@@ -0,0 +1,6 @@
+from src.state import GuildState
+
+
+def test_state_has_biomarker_analysis_field():
+ required_fields = {"biomarker_analysis", "biomarker_flags", "safety_alerts"}
+ assert required_fields.issubset(GuildState.__annotations__.keys())
diff --git a/tests/test_validator_count.py b/tests/test_validator_count.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9674649fc07a971223892f8c0bba6cd30bc9f13
--- /dev/null
+++ b/tests/test_validator_count.py
@@ -0,0 +1,6 @@
+from src.biomarker_validator import BiomarkerValidator
+
+
+def test_expected_biomarker_count_is_reference_size():
+ validator = BiomarkerValidator()
+ assert validator.expected_biomarker_count() == len(validator.references)