Spaces:
Sleeping
Sleeping
Commit ·
4b445f6
0
Parent(s):
initial - commit
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .env.example +27 -0
- .github/workflows/ci.yml +31 -0
- .github/workflows/prewarm.yml +14 -0
- .gitignore +67 -0
- PROJECT_PLAN.md +704 -0
- README.md +161 -0
- app/__init__.py +0 -0
- app/agents/__init__.py +0 -0
- app/agents/base_agent.py +295 -0
- app/agents/performance_agent.py +44 -0
- app/agents/security_agent.py +107 -0
- app/agents/style_agent.py +43 -0
- app/agents/synthesizer.py +291 -0
- app/config.py +40 -0
- app/context/__init__.py +0 -0
- app/context/embedder.py +126 -0
- app/context/indexer.py +127 -0
- app/context/retriever.py +116 -0
- app/db/__init__.py +0 -0
- app/db/postgres.py +144 -0
- app/db/redis_cache.py +121 -0
- app/github/__init__.py +0 -0
- app/github/auth.py +135 -0
- app/github/client.py +362 -0
- app/github/comment_formatter.py +215 -0
- app/github/webhook.py +84 -0
- app/main.py +355 -0
- app/models/__init__.py +0 -0
- app/models/findings.py +55 -0
- app/models/webhook_payloads.py +55 -0
- app/services/__init__.py +0 -0
- app/services/health_score.py +85 -0
- app/tools/__init__.py +0 -0
- app/tools/bandit_tool.py +173 -0
- app/tools/detect_secrets_tool.py +118 -0
- app/tools/linter_tool.py +113 -0
- app/tools/radon_tool.py +107 -0
- dashboard/.gitignore +41 -0
- dashboard/AGENTS.md +5 -0
- dashboard/CLAUDE.md +1 -0
- dashboard/README.md +36 -0
- dashboard/app/favicon.ico +0 -0
- dashboard/app/globals.css +152 -0
- dashboard/app/layout.tsx +104 -0
- dashboard/app/page.tsx +291 -0
- dashboard/app/repos/[owner]/[repo]/page.tsx +170 -0
- dashboard/app/repos/[owner]/[repo]/prs/[number]/page.tsx +168 -0
- dashboard/components/AgentBreakdown.tsx +113 -0
- dashboard/components/AnimatedCounter.tsx +44 -0
- dashboard/components/FindingsTable.tsx +185 -0
.env.example
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# === LLM APIs ===
|
| 2 |
+
GROQ_API_KEY=gsk_your_groq_api_key_here
|
| 3 |
+
GEMINI_API_KEY=AIza_your_gemini_api_key_here
|
| 4 |
+
|
| 5 |
+
# === GitHub App ===
|
| 6 |
+
GITHUB_APP_ID=123456
|
| 7 |
+
GITHUB_APP_PRIVATE_KEY_PATH=./keys/app.pem
|
| 8 |
+
GITHUB_WEBHOOK_SECRET=your_webhook_secret_here
|
| 9 |
+
|
| 10 |
+
# === Database ===
|
| 11 |
+
DATABASE_URL=postgresql://user:pass@host.neon.tech/sentinel_ai?sslmode=require
|
| 12 |
+
|
| 13 |
+
# === Redis Cache ===
|
| 14 |
+
UPSTASH_REDIS_URL=rediss://default:your_token@your-endpoint.upstash.io:6379
|
| 15 |
+
|
| 16 |
+
# === Embedding Model ===
|
| 17 |
+
EMBEDDING_MODEL=all-MiniLM-L6-v2
|
| 18 |
+
|
| 19 |
+
# === App Config ===
|
| 20 |
+
ENVIRONMENT=development
|
| 21 |
+
LOG_LEVEL=INFO
|
| 22 |
+
CONFIDENCE_THRESHOLD=0.6
|
| 23 |
+
MAX_REPO_FILES_INDEX=500
|
| 24 |
+
|
| 25 |
+
# === Security ===
|
| 26 |
+
DASHBOARD_API_KEY=generate-a-random-key-here
|
| 27 |
+
CORS_ALLOWED_ORIGINS=http://localhost:3000
|
.github/workflows/ci.yml
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: CI
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches: [main]
|
| 6 |
+
pull_request:
|
| 7 |
+
branches: [main]
|
| 8 |
+
|
| 9 |
+
jobs:
|
| 10 |
+
lint-and-test:
|
| 11 |
+
runs-on: ubuntu-latest
|
| 12 |
+
steps:
|
| 13 |
+
- uses: actions/checkout@v4
|
| 14 |
+
|
| 15 |
+
- name: Set up Python
|
| 16 |
+
uses: actions/setup-python@v5
|
| 17 |
+
with:
|
| 18 |
+
python-version: "3.11"
|
| 19 |
+
|
| 20 |
+
- name: Install dependencies
|
| 21 |
+
run: pip install -r requirements-dev.txt
|
| 22 |
+
|
| 23 |
+
- name: Lint with ruff
|
| 24 |
+
run: ruff check app/ tests/
|
| 25 |
+
|
| 26 |
+
- name: Type check with mypy
|
| 27 |
+
run: mypy app/ --ignore-missing-imports
|
| 28 |
+
continue-on-error: true
|
| 29 |
+
|
| 30 |
+
- name: Run tests
|
| 31 |
+
run: pytest tests/ -v --tb=short
|
.github/workflows/prewarm.yml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Pre-warm Render
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
schedule:
|
| 5 |
+
# Ping every 10 minutes during working hours (UTC)
|
| 6 |
+
- cron: "*/10 6-20 * * 1-5"
|
| 7 |
+
|
| 8 |
+
jobs:
|
| 9 |
+
ping:
|
| 10 |
+
runs-on: ubuntu-latest
|
| 11 |
+
steps:
|
| 12 |
+
- name: Ping health endpoint
|
| 13 |
+
run: |
|
| 14 |
+
curl -sf "${{ secrets.RENDER_HEALTH_URL }}/health" || echo "Service cold — will wake on next request"
|
.gitignore
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Project planning docs (confidential)
|
| 2 |
+
*.pdf
|
| 3 |
+
|
| 4 |
+
# Python
|
| 5 |
+
__pycache__/
|
| 6 |
+
*.py[cod]
|
| 7 |
+
*$py.class
|
| 8 |
+
*.so
|
| 9 |
+
*.egg-info/
|
| 10 |
+
dist/
|
| 11 |
+
build/
|
| 12 |
+
.eggs/
|
| 13 |
+
*.egg
|
| 14 |
+
|
| 15 |
+
# Virtual environments
|
| 16 |
+
.venv/
|
| 17 |
+
venv/
|
| 18 |
+
env/
|
| 19 |
+
|
| 20 |
+
# Environment variables
|
| 21 |
+
.env
|
| 22 |
+
.env.local
|
| 23 |
+
.env.production
|
| 24 |
+
|
| 25 |
+
# Keys & secrets
|
| 26 |
+
keys/
|
| 27 |
+
*.pem
|
| 28 |
+
*.key
|
| 29 |
+
|
| 30 |
+
# IDE
|
| 31 |
+
.vscode/
|
| 32 |
+
.idea/
|
| 33 |
+
*.swp
|
| 34 |
+
*.swo
|
| 35 |
+
*~
|
| 36 |
+
|
| 37 |
+
# OS
|
| 38 |
+
.DS_Store
|
| 39 |
+
Thumbs.db
|
| 40 |
+
|
| 41 |
+
# ChromaDB persistence
|
| 42 |
+
chroma_data/
|
| 43 |
+
chromadb/
|
| 44 |
+
|
| 45 |
+
# Test & coverage
|
| 46 |
+
.pytest_cache/
|
| 47 |
+
htmlcov/
|
| 48 |
+
.coverage
|
| 49 |
+
coverage.xml
|
| 50 |
+
|
| 51 |
+
# Node (dashboard)
|
| 52 |
+
dashboard/node_modules/
|
| 53 |
+
dashboard/.next/
|
| 54 |
+
dashboard/out/
|
| 55 |
+
|
| 56 |
+
# Render
|
| 57 |
+
.render/
|
| 58 |
+
|
| 59 |
+
# Claude Code
|
| 60 |
+
.claude/
|
| 61 |
+
|
| 62 |
+
# Screenshots (local only)
|
| 63 |
+
*.png
|
| 64 |
+
|
| 65 |
+
# Misc
|
| 66 |
+
*.log
|
| 67 |
+
*.tmp
|
PROJECT_PLAN.md
ADDED
|
@@ -0,0 +1,704 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CodeProbe — Complete Project Plan & Progress Tracker
|
| 2 |
+
|
| 3 |
+
> **Multi-Agent Code Review System**
|
| 4 |
+
> Author: Ninjacode911 | Started: March 2026 | Target: 10 Weeks
|
| 5 |
+
|
| 6 |
+
---
|
| 7 |
+
|
| 8 |
+
## Table of Contents
|
| 9 |
+
|
| 10 |
+
1. [Project Overview](#1-project-overview)
|
| 11 |
+
2. [Architecture Deep Dive](#2-architecture-deep-dive)
|
| 12 |
+
3. [Complete Tech Stack](#3-complete-tech-stack)
|
| 13 |
+
4. [Directory Structure](#4-directory-structure)
|
| 14 |
+
5. [Week-by-Week Implementation Plan](#5-week-by-week-implementation-plan)
|
| 15 |
+
6. [Non-Coding Tasks](#6-non-coding-tasks)
|
| 16 |
+
7. [GPU / WSL Tasks](#7-gpu--wsl-tasks)
|
| 17 |
+
8. [Data Models & Schemas](#8-data-models--schemas)
|
| 18 |
+
9. [API Endpoints](#9-api-endpoints)
|
| 19 |
+
10. [Agent Prompt Design](#10-agent-prompt-design)
|
| 20 |
+
11. [Evaluation Plan](#11-evaluation-plan)
|
| 21 |
+
12. [Deployment Checklist](#12-deployment-checklist)
|
| 22 |
+
13. [Progress Tracker](#13-progress-tracker)
|
| 23 |
+
|
| 24 |
+
---
|
| 25 |
+
|
| 26 |
+
## 1. Project Overview
|
| 27 |
+
|
| 28 |
+
**What:** A multi-agent PR review system that reviews GitHub pull requests using 4 specialized LangChain agents (Security, Performance, Style, Synthesizer), posts inline GitHub comments, and tracks code health via a Next.js dashboard.
|
| 29 |
+
|
| 30 |
+
**Why:** AI-generated code (41% of GitHub commits) introduces 1.7x more issues. Existing tools use single-pass LLM calls. Sentinel AI uses domain-specialized agents with debate/consensus, RAG context, and static analysis tools.
|
| 31 |
+
|
| 32 |
+
**Core Thesis:** Separate security, performance, and style review into specialized agents — each with distinct prompts, tools, and context — then merge via a Synthesizer into a coherent, ranked, deduplicated review.
|
| 33 |
+
|
| 34 |
+
**Key Differentiators:**
|
| 35 |
+
- Multi-agent specialization (3 domain + 1 synthesizer)
|
| 36 |
+
- Debate & consensus protocol (agents challenge each other before synthesis)
|
| 37 |
+
- Repo-aware RAG context (ChromaDB indexes full repo, not just diff)
|
| 38 |
+
- $0/month architecture (all free tiers)
|
| 39 |
+
- Structured severity scoring (Critical/High/Medium/Low with CWE IDs)
|
| 40 |
+
- Auto-fix suggestions (corrected code snippets inline)
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## 2. Architecture Deep Dive
|
| 45 |
+
|
| 46 |
+
### 2.1 Four Layers
|
| 47 |
+
|
| 48 |
+
```
|
| 49 |
+
┌─────────────────────────────────────────────────────┐
|
| 50 |
+
│ GITHUB LAYER │
|
| 51 |
+
│ Webhooks · PR Events · Inline Comments │
|
| 52 |
+
└──────────────────────┬──────────────────────────────┘
|
| 53 |
+
│ pull_request webhook
|
| 54 |
+
┌──────────────────────▼──────────────────────────────┐
|
| 55 |
+
│ ORCHESTRATION LAYER (FastAPI on Render) │
|
| 56 |
+
│ Webhook receiver · HMAC validation · Redis cache │
|
| 57 |
+
│ Agent dispatcher · GitHub API client │
|
| 58 |
+
└──────────────────────┬──────────────────────────────┘
|
| 59 |
+
│ asyncio.gather()
|
| 60 |
+
┌──────────────────────▼──────────────────────────────┐
|
| 61 |
+
│ AGENT LAYER (LangChain ReAct Agents) │
|
| 62 |
+
│ ┌──────────┐ ┌──────────────┐ ┌─────────┐ │
|
| 63 |
+
│ │ Security │ │ Performance │ │ Style │ PARALLEL │
|
| 64 |
+
│ │ Agent │ │ Agent │ │ Agent │ │
|
| 65 |
+
│ └────┬─────┘ └──────┬───────┘ └────┬────┘ │
|
| 66 |
+
│ └──────────────┼───────────────┘ │
|
| 67 |
+
│ ▼ │
|
| 68 |
+
│ ┌──────────────────┐ │
|
| 69 |
+
│ │ Synthesizer │ SEQUENTIAL │
|
| 70 |
+
│ │ Agent │ │
|
| 71 |
+
│ └──────────────────┘ │
|
| 72 |
+
└──────────────────────┬──────────────────────────────┘
|
| 73 |
+
│
|
| 74 |
+
┌──────────────────────▼──────────────────────────────┐
|
| 75 |
+
│ KNOWLEDGE LAYER │
|
| 76 |
+
│ ChromaDB (vector store) · Upstash Redis (cache) │
|
| 77 |
+
│ Neon Postgres (history) · sentence-transformers │
|
| 78 |
+
└─────────────────────────────────────────────────────┘
|
| 79 |
+
```
|
| 80 |
+
|
| 81 |
+
### 2.2 Data Flow (11 Steps)
|
| 82 |
+
|
| 83 |
+
1. GitHub fires `pull_request` webhook → Render FastAPI endpoint
|
| 84 |
+
2. FastAPI validates HMAC-SHA256 signature (GitHub App secret)
|
| 85 |
+
3. Check Upstash Redis: commit SHA already reviewed? → return cached
|
| 86 |
+
4. Fetch via GitHub API: PR diff, changed files, full contents, commit history
|
| 87 |
+
5. Build repo context: embed chunks with sentence-transformers → upsert ChromaDB
|
| 88 |
+
6. Dispatch 3 parallel agents: `asyncio.gather(security, performance, style)`
|
| 89 |
+
7. Each agent: system prompt + RAG context → Groq API → static tools → typed findings
|
| 90 |
+
8. Synthesizer: deduplicate + resolve conflicts + Health Score + executive summary
|
| 91 |
+
9. GitHub API: post inline comment per finding + PR summary comment
|
| 92 |
+
10. Write review to Neon Postgres + set Redis cache (TTL: 7 days)
|
| 93 |
+
11. Next.js dashboard fetches from Neon and updates Health Score chart
|
| 94 |
+
|
| 95 |
+
### 2.3 Context Loading (5 Layers per Agent)
|
| 96 |
+
|
| 97 |
+
1. Raw PR diff (changed lines, file paths, additions/deletions)
|
| 98 |
+
2. Relevant file sections from full repo (ChromaDB semantic search on diff)
|
| 99 |
+
3. Recent commit history for changed files (pattern detection)
|
| 100 |
+
4. Repo configuration (language, framework, linter rules, test coverage)
|
| 101 |
+
5. Domain-specific knowledge base (OWASP Top 10, DDIA patterns, style guides)
|
| 102 |
+
|
| 103 |
+
---
|
| 104 |
+
|
| 105 |
+
## 3. Complete Tech Stack
|
| 106 |
+
|
| 107 |
+
### 3.1 LLM & AI
|
| 108 |
+
|
| 109 |
+
| Tool | Free Tier | Purpose |
|
| 110 |
+
|------|-----------|---------|
|
| 111 |
+
| **Groq API** (Llama-3.1-70B) | 14,400 req/day, 500 tok/sec | Primary LLM for all agents |
|
| 112 |
+
| **Gemini 1.5 Flash** | 1M tokens/day | Fallback when Groq exhausted |
|
| 113 |
+
| **LangChain** | OSS | Agent orchestration, LCEL, ReAct framework |
|
| 114 |
+
| **sentence-transformers** | Local (GPU) | Embeddings for ChromaDB — runs on RTX 5070 via WSL |
|
| 115 |
+
|
| 116 |
+
### 3.2 Backend & APIs
|
| 117 |
+
|
| 118 |
+
| Tool | Free Tier | Purpose |
|
| 119 |
+
|------|-----------|---------|
|
| 120 |
+
| **FastAPI** | OSS | Webhook receiver, agent dispatcher, REST API |
|
| 121 |
+
| **Render.com** | Free web service | Hosts backend (30s cold start after 15min idle) |
|
| 122 |
+
| **GitHub Apps API** | Free | Webhooks, PR comments, file fetching |
|
| 123 |
+
| **Upstash Redis** | 10K req/day | Cache PR analysis by commit SHA |
|
| 124 |
+
| **Neon.tech** | Free Postgres 512MB | Review history, Health Score trends |
|
| 125 |
+
|
| 126 |
+
### 3.3 Knowledge & Static Analysis
|
| 127 |
+
|
| 128 |
+
| Tool | Free Tier | Purpose |
|
| 129 |
+
|------|-----------|---------|
|
| 130 |
+
| **ChromaDB** | OSS, in-memory/persisted | Vector store for RAG context retrieval |
|
| 131 |
+
| **Semgrep OSS** | Free, 3K+ rules | SAST rules for Security Agent |
|
| 132 |
+
| **Bandit** | Free | Python AST security analysis |
|
| 133 |
+
| **detect-secrets** | Free | Credential/API key scanning |
|
| 134 |
+
| **radon** | Free | Cyclomatic complexity & maintainability index |
|
| 135 |
+
| **pylint/ESLint/Ruff** | Free | Linting for Style Agent |
|
| 136 |
+
|
| 137 |
+
### 3.4 Frontend & Deployment
|
| 138 |
+
|
| 139 |
+
| Tool | Free Tier | Purpose |
|
| 140 |
+
|------|-----------|---------|
|
| 141 |
+
| **Vercel** | Free hobby tier | Hosts Next.js dashboard |
|
| 142 |
+
| **Next.js** | OSS | Dashboard UI |
|
| 143 |
+
| **Recharts** | OSS | Health Score trend charts, pie charts |
|
| 144 |
+
| **GitHub Actions** | 2K min/month | CI/CD for Sentinel AI itself |
|
| 145 |
+
|
| 146 |
+
---
|
| 147 |
+
|
| 148 |
+
## 4. Directory Structure
|
| 149 |
+
|
| 150 |
+
```
|
| 151 |
+
sentinel-ai/
|
| 152 |
+
├── app/
|
| 153 |
+
│ ├── __init__.py
|
| 154 |
+
│ ├── main.py # FastAPI app, webhook endpoint, lifespan
|
| 155 |
+
│ ├── config.py # Settings via pydantic-settings (env vars)
|
| 156 |
+
│ ├── agents/
|
| 157 |
+
│ │ ├── __init__.py
|
| 158 |
+
│ │ ├── base_agent.py # Shared agent interface / base class
|
| 159 |
+
│ │ ├── security_agent.py # Security ReAct agent
|
| 160 |
+
│ │ ├── performance_agent.py # Performance ReAct agent
|
| 161 |
+
│ │ ├── style_agent.py # Style & Maintainability agent
|
| 162 |
+
│ │ └── synthesizer.py # Synthesizer + Health Score + dedup
|
| 163 |
+
│ ├── tools/
|
| 164 |
+
│ │ ├── __init__.py
|
| 165 |
+
│ │ ├── semgrep_tool.py # LangChain tool wrapper for Semgrep
|
| 166 |
+
│ │ ├── bandit_tool.py # LangChain tool wrapper for Bandit
|
| 167 |
+
│ │ ├── detect_secrets_tool.py # Credential scanner tool
|
| 168 |
+
│ │ ├── radon_tool.py # Complexity metrics tool
|
| 169 |
+
│ │ ├── ast_analyzer.py # Python AST analysis (N+1, patterns)
|
| 170 |
+
│ │ └── linter_tool.py # Ruff/ESLint/pylint subprocess tool
|
| 171 |
+
│ ├── context/
|
| 172 |
+
│ │ ├── __init__.py
|
| 173 |
+
│ │ ├── embedder.py # sentence-transformers embedding pipeline
|
| 174 |
+
│ │ ├── indexer.py # ChromaDB repo indexer (upsert chunks)
|
| 175 |
+
│ │ └── retriever.py # RAG retriever (query ChromaDB for context)
|
| 176 |
+
│ ├── github/
|
| 177 |
+
│ │ ├── __init__.py
|
| 178 |
+
│ │ ├── webhook.py # Webhook validation (HMAC-SHA256)
|
| 179 |
+
│ │ ├── client.py # GitHub API client (fetch diff, post comments)
|
| 180 |
+
│ │ └── comment_formatter.py # Format findings as GitHub Markdown comments
|
| 181 |
+
│ ├── models/
|
| 182 |
+
│ │ ├── __init__.py
|
| 183 |
+
│ │ ├── findings.py # Finding, PRReview Pydantic schemas
|
| 184 |
+
│ │ └── webhook_payloads.py # GitHub webhook event schemas
|
| 185 |
+
│ ├── db/
|
| 186 |
+
│ │ ├── __init__.py
|
| 187 |
+
│ │ ├── postgres.py # Neon Postgres connection + queries
|
| 188 |
+
│ │ └── redis_cache.py # Upstash Redis cache logic
|
| 189 |
+
│ └── services/
|
| 190 |
+
│ ├── __init__.py
|
| 191 |
+
│ ├── orchestrator.py # Main orchestration: dispatch agents, synthesize
|
| 192 |
+
│ └── health_score.py # Health Score calculation formula
|
| 193 |
+
├── dashboard/ # Next.js app (deployed to Vercel)
|
| 194 |
+
│ ├── package.json
|
| 195 |
+
│ ├── next.config.js
|
| 196 |
+
│ ├── tsconfig.json
|
| 197 |
+
│ ├── app/
|
| 198 |
+
│ │ ├── layout.tsx
|
| 199 |
+
│ │ ├── page.tsx # / — Repository Overview
|
| 200 |
+
│ │ ├── repos/
|
| 201 |
+
│ │ │ └── [owner]/
|
| 202 |
+
│ │ │ └── [repo]/
|
| 203 |
+
│ │ │ ├── page.tsx # Repo Detail (trends, charts)
|
| 204 |
+
│ │ │ └── prs/
|
| 205 |
+
│ │ │ └── [number]/
|
| 206 |
+
│ │ │ └── page.tsx # PR Review Detail
|
| 207 |
+
│ │ └── api/
|
| 208 |
+
│ │ ├── repos/
|
| 209 |
+
│ │ │ └── route.ts # API proxy to FastAPI backend
|
| 210 |
+
│ │ └── health/
|
| 211 |
+
│ │ └── route.ts
|
| 212 |
+
│ ├── components/
|
| 213 |
+
│ │ ├── HealthScoreRing.tsx # Circular gauge 0-100
|
| 214 |
+
│ │ ├── FindingsTable.tsx # Sortable, filterable findings
|
| 215 |
+
│ │ ├── TrendChart.tsx # Recharts LineChart
|
| 216 |
+
│ │ ├── AgentBreakdown.tsx # 3-column agent summary cards
|
| 217 |
+
│ │ ├── SeverityBadge.tsx # Color-coded severity pill
|
| 218 |
+
│ │ └── Navbar.tsx
|
| 219 |
+
│ └── lib/
|
| 220 |
+
│ ├── api.ts # Fetch wrapper for backend API
|
| 221 |
+
│ └── types.ts # TypeScript types matching backend schemas
|
| 222 |
+
├── tests/
|
| 223 |
+
│ ├── __init__.py
|
| 224 |
+
│ ├── conftest.py # Shared fixtures
|
| 225 |
+
│ ├── unit/
|
| 226 |
+
│ │ ├── test_findings_schema.py
|
| 227 |
+
│ │ ├── test_synthesizer_dedup.py
|
| 228 |
+
│ │ ├── test_webhook_validation.py
|
| 229 |
+
│ │ ├── test_redis_cache.py
|
| 230 |
+
│ │ └── test_health_score.py
|
| 231 |
+
│ ├── integration/
|
| 232 |
+
│ │ ├── test_full_pipeline.py
|
| 233 |
+
│ │ └── test_github_posting.py
|
| 234 |
+
│ └── eval/
|
| 235 |
+
│ ├── dataset/ # 20-PR benchmark dataset (JSON fixtures)
|
| 236 |
+
│ ├── run_eval.py # Evaluation harness
|
| 237 |
+
│ └── metrics.py # Precision, recall, latency tracking
|
| 238 |
+
├── prompts/
|
| 239 |
+
│ ├── security_system.md # Security Agent system prompt
|
| 240 |
+
│ ├── performance_system.md # Performance Agent system prompt
|
| 241 |
+
│ ├── style_system.md # Style Agent system prompt
|
| 242 |
+
│ └── synthesizer_system.md # Synthesizer system prompt
|
| 243 |
+
├── knowledge/
|
| 244 |
+
│ ├── owasp_top10_2025.md # OWASP cheat sheet for Security RAG
|
| 245 |
+
│ ├── ddia_patterns.md # DDIA patterns for Performance RAG
|
| 246 |
+
│ └── style_guides/ # Language style guides for Style RAG
|
| 247 |
+
├── .env.example # Template for env vars (no secrets)
|
| 248 |
+
├── .gitignore
|
| 249 |
+
├── requirements.txt # Python dependencies
|
| 250 |
+
├── requirements-dev.txt # Dev/test dependencies
|
| 251 |
+
├── render.yaml # Render deployment config
|
| 252 |
+
├── sentinel.yml.example # Per-repo config template
|
| 253 |
+
├── Dockerfile # For Render deployment
|
| 254 |
+
├── pyproject.toml # Project metadata + tool configs
|
| 255 |
+
└── README.md # Installation, usage, architecture docs
|
| 256 |
+
```
|
| 257 |
+
|
| 258 |
+
---
|
| 259 |
+
|
| 260 |
+
## 5. Week-by-Week Implementation Plan
|
| 261 |
+
|
| 262 |
+
### WEEK 1: Foundation & Setup
|
| 263 |
+
**Goal:** Project skeleton running locally, all external services provisioned.
|
| 264 |
+
|
| 265 |
+
| # | Task | Type | Status |
|
| 266 |
+
|---|------|------|--------|
|
| 267 |
+
| 1.1 | Initialize git repo, create directory structure | Code | [ ] |
|
| 268 |
+
| 1.2 | Set up Python virtual environment + requirements.txt | Code | [ ] |
|
| 269 |
+
| 1.3 | Register GitHub App (dev.github.com/settings/apps) | Config | [ ] |
|
| 270 |
+
| 1.4 | Provision Neon.tech Postgres database + create `pr_reviews` table | Config | [ ] |
|
| 271 |
+
| 1.5 | Provision Upstash Redis instance | Config | [ ] |
|
| 272 |
+
| 1.6 | Get Groq API key (console.groq.com) | Config | [ ] |
|
| 273 |
+
| 1.7 | Get Gemini API key (aistudio.google.com) | Config | [ ] |
|
| 274 |
+
| 1.8 | Create FastAPI skeleton (`app/main.py`) with health endpoint | Code | [ ] |
|
| 275 |
+
| 1.9 | Create `app/config.py` with pydantic-settings (all env vars) | Code | [ ] |
|
| 276 |
+
| 1.10 | Create Pydantic models (`Finding`, `PRReview` schemas) | Code | [ ] |
|
| 277 |
+
| 1.11 | Set up .env.example, .gitignore, pyproject.toml | Code | [ ] |
|
| 278 |
+
| 1.12 | Deploy FastAPI skeleton to Render (verify /health works) | Deploy | [ ] |
|
| 279 |
+
| 1.13 | Write unit tests for Finding schema validation | Test | [ ] |
|
| 280 |
+
| 1.14 | Set up GitHub Actions CI (lint + test on push) | CI/CD | [ ] |
|
| 281 |
+
|
| 282 |
+
### WEEK 2: GitHub Integration
|
| 283 |
+
**Goal:** Receive webhooks, validate signatures, fetch PR data, post dummy comment.
|
| 284 |
+
|
| 285 |
+
| # | Task | Type | Status |
|
| 286 |
+
|---|------|------|--------|
|
| 287 |
+
| 2.1 | Implement HMAC-SHA256 webhook validation (`app/github/webhook.py`) | Code | [ ] |
|
| 288 |
+
| 2.2 | Implement GitHub API client — fetch PR diff (`app/github/client.py`) | Code | [ ] |
|
| 289 |
+
| 2.3 | Implement GitHub API client — fetch file contents | Code | [ ] |
|
| 290 |
+
| 2.4 | Implement GitHub API client — fetch commit history | Code | [ ] |
|
| 291 |
+
| 2.5 | Implement GitHub API client — post inline review comments | Code | [ ] |
|
| 292 |
+
| 2.6 | Implement GitHub API client — post PR summary comment | Code | [ ] |
|
| 293 |
+
| 2.7 | Create webhook endpoint (`POST /webhook/github`) in main.py | Code | [ ] |
|
| 294 |
+
| 2.8 | Implement comment formatter (`app/github/comment_formatter.py`) | Code | [ ] |
|
| 295 |
+
| 2.9 | Set up ngrok for local webhook testing | Config | [ ] |
|
| 296 |
+
| 2.10 | End-to-end test: open PR on test repo → dummy comment posted | Test | [ ] |
|
| 297 |
+
| 2.11 | Implement Redis cache check (skip if commit SHA already reviewed) | Code | [ ] |
|
| 298 |
+
| 2.12 | Write unit tests for HMAC validation (valid + invalid signatures) | Test | [ ] |
|
| 299 |
+
| 2.13 | Write unit tests for Redis cache hit/miss logic | Test | [ ] |
|
| 300 |
+
|
| 301 |
+
### WEEK 3: Security Agent v1
|
| 302 |
+
**Goal:** Security Agent analyzes diffs, returns structured findings with CWE IDs.
|
| 303 |
+
|
| 304 |
+
| # | Task | Type | Status |
|
| 305 |
+
|---|------|------|--------|
|
| 306 |
+
| 3.1 | Install & configure Semgrep OSS with security rulesets | Config | [ ] |
|
| 307 |
+
| 3.2 | Create Semgrep LangChain tool (`app/tools/semgrep_tool.py`) | Code | [ ] |
|
| 308 |
+
| 3.3 | Install & configure Bandit for Python AST security analysis | Config | [ ] |
|
| 309 |
+
| 3.4 | Create Bandit LangChain tool (`app/tools/bandit_tool.py`) | Code | [ ] |
|
| 310 |
+
| 3.5 | Install & configure detect-secrets | Config | [ ] |
|
| 311 |
+
| 3.6 | Create detect-secrets LangChain tool (`app/tools/detect_secrets_tool.py`) | Code | [ ] |
|
| 312 |
+
| 3.7 | Write Security Agent system prompt (`prompts/security_system.md`) | Prompt | [ ] |
|
| 313 |
+
| 3.8 | Prepare OWASP Top 10 (2025) knowledge base (`knowledge/owasp_top10_2025.md`) | Data | [ ] |
|
| 314 |
+
| 3.9 | Implement Security Agent ReAct loop (`app/agents/security_agent.py`) | Code | [ ] |
|
| 315 |
+
| 3.10 | Implement base agent interface (`app/agents/base_agent.py`) | Code | [ ] |
|
| 316 |
+
| 3.11 | Set up Groq LLM client via LangChain (`ChatGroq`) | Code | [ ] |
|
| 317 |
+
| 3.12 | Implement structured output parsing (JSON → Finding objects) | Code | [ ] |
|
| 318 |
+
| 3.13 | Create 10 synthetic security-vulnerable PRs for testing | Data | [ ] |
|
| 319 |
+
| 3.14 | Evaluate Security Agent on synthetic dataset — measure precision/recall | Eval | [ ] |
|
| 320 |
+
| 3.15 | Iterate on system prompt based on eval results | Prompt | [ ] |
|
| 321 |
+
|
| 322 |
+
### WEEK 4: Performance Agent v1
|
| 323 |
+
**Goal:** Performance Agent detects N+1 queries, complexity issues, returns findings.
|
| 324 |
+
|
| 325 |
+
| # | Task | Type | Status |
|
| 326 |
+
|---|------|------|--------|
|
| 327 |
+
| 4.1 | Create Python AST analyzer tool (`app/tools/ast_analyzer.py`) | Code | [ ] |
|
| 328 |
+
| 4.2 | Implement N+1 query pattern detector (Django/SQLAlchemy ORM patterns) | Code | [ ] |
|
| 329 |
+
| 4.3 | Create radon complexity tool (`app/tools/radon_tool.py`) | Code | [ ] |
|
| 330 |
+
| 4.4 | Write Performance Agent system prompt (`prompts/performance_system.md`) | Prompt | [ ] |
|
| 331 |
+
| 4.5 | Prepare DDIA patterns knowledge base (`knowledge/ddia_patterns.md`) | Data | [ ] |
|
| 332 |
+
| 4.6 | Implement Performance Agent ReAct loop (`app/agents/performance_agent.py`) | Code | [ ] |
|
| 333 |
+
| 4.7 | Fetch 10 Django PRs with known performance issues for testing | Data | [ ] |
|
| 334 |
+
| 4.8 | Evaluate Performance Agent on Django PR dataset | Eval | [ ] |
|
| 335 |
+
| 4.9 | Iterate on system prompt based on eval results | Prompt | [ ] |
|
| 336 |
+
|
| 337 |
+
### WEEK 5: Style Agent v1
|
| 338 |
+
**Goal:** Style Agent checks naming, complexity, dead code, test coverage gaps.
|
| 339 |
+
|
| 340 |
+
| # | Task | Type | Status |
|
| 341 |
+
|---|------|------|--------|
|
| 342 |
+
| 5.1 | Create linter tool wrapper — Ruff/ESLint/pylint (`app/tools/linter_tool.py`) | Code | [ ] |
|
| 343 |
+
| 5.2 | Implement dead code detector (unused imports, unreachable branches) | Code | [ ] |
|
| 344 |
+
| 5.3 | Write Style Agent system prompt (`prompts/style_system.md`) | Prompt | [ ] |
|
| 345 |
+
| 5.4 | Prepare language style guides knowledge base (`knowledge/style_guides/`) | Data | [ ] |
|
| 346 |
+
| 5.5 | Implement Style Agent ReAct loop (`app/agents/style_agent.py`) | Code | [ ] |
|
| 347 |
+
| 5.6 | Fetch 10 Exercism PRs with style/refactoring issues | Data | [ ] |
|
| 348 |
+
| 5.7 | Evaluate Style Agent on Exercism dataset | Eval | [ ] |
|
| 349 |
+
| 5.8 | Iterate on system prompt based on eval results | Prompt | [ ] |
|
| 350 |
+
|
| 351 |
+
### WEEK 6: ChromaDB + RAG Context
|
| 352 |
+
**Goal:** Full RAG pipeline — embed repo, retrieve context, inject into agents.
|
| 353 |
+
|
| 354 |
+
| # | Task | Type | Status |
|
| 355 |
+
|---|------|------|--------|
|
| 356 |
+
| 6.1 | Set up sentence-transformers embedding pipeline (`app/context/embedder.py`) | Code | [ ] |
|
| 357 |
+
| 6.2 | **Run embedding model on RTX 5070 via WSL** — benchmark speed | GPU | [ ] |
|
| 358 |
+
| 6.3 | Implement ChromaDB repo indexer (`app/context/indexer.py`) — chunk files, upsert | Code | [ ] |
|
| 359 |
+
| 6.4 | Implement RAG retriever (`app/context/retriever.py`) — query by diff content | Code | [ ] |
|
| 360 |
+
| 6.5 | Integrate RAG context into Security Agent | Code | [ ] |
|
| 361 |
+
| 6.6 | Integrate RAG context into Performance Agent | Code | [ ] |
|
| 362 |
+
| 6.7 | Integrate RAG context into Style Agent | Code | [ ] |
|
| 363 |
+
| 6.8 | Evaluate: does cross-file RAG context improve recall vs. diff-only? | Eval | [ ] |
|
| 364 |
+
| 6.9 | Optimize chunk size and retrieval top-k for quality vs. latency | Code | [ ] |
|
| 365 |
+
| 6.10 | Limit repo index to 500 most recently changed files (Render memory constraint) | Code | [ ] |
|
| 366 |
+
|
| 367 |
+
### WEEK 7: Synthesizer Agent
|
| 368 |
+
**Goal:** Deduplication, conflict resolution, Health Score, executive summary, full pipeline.
|
| 369 |
+
|
| 370 |
+
| # | Task | Type | Status |
|
| 371 |
+
|---|------|------|--------|
|
| 372 |
+
| 7.1 | Write Synthesizer system prompt (`prompts/synthesizer_system.md`) | Prompt | [ ] |
|
| 373 |
+
| 7.2 | Implement deduplication logic (cosine similarity on findings via ChromaDB) | Code | [ ] |
|
| 374 |
+
| 7.3 | Implement severity conflict resolution (Security > Performance > Style precedence) | Code | [ ] |
|
| 375 |
+
| 7.4 | Implement composite re-ranking: severity × exploitability × fix_complexity | Code | [ ] |
|
| 376 |
+
| 7.5 | Implement PR Health Score formula (0-100) (`app/services/health_score.py`) | Code | [ ] |
|
| 377 |
+
| 7.6 | Implement executive summary generation (3-5 sentences) | Code | [ ] |
|
| 378 |
+
| 7.7 | Implement auto-block logic (Critical findings → block merge recommendation) | Code | [ ] |
|
| 379 |
+
| 7.8 | Implement Synthesizer Agent (`app/agents/synthesizer.py`) | Code | [ ] |
|
| 380 |
+
| 7.9 | Build main orchestrator (`app/services/orchestrator.py`) — ties everything together | Code | [ ] |
|
| 381 |
+
| 7.10 | Implement Gemini Flash fallback when Groq quota exhausted | Code | [ ] |
|
| 382 |
+
| 7.11 | Full end-to-end pipeline test: PR → agents → synthesizer → GitHub comments | Test | [ ] |
|
| 383 |
+
| 7.12 | Write unit tests for Health Score formula | Test | [ ] |
|
| 384 |
+
| 7.13 | Write unit tests for deduplication with synthetic conflicting findings | Test | [ ] |
|
| 385 |
+
| 7.14 | Implement Neon Postgres write (store review record) | Code | [ ] |
|
| 386 |
+
|
| 387 |
+
### WEEK 8: Next.js Dashboard
|
| 388 |
+
**Goal:** Dashboard on Vercel showing review history, Health Scores, charts.
|
| 389 |
+
|
| 390 |
+
| # | Task | Type | Status |
|
| 391 |
+
|---|------|------|--------|
|
| 392 |
+
| 8.1 | Initialize Next.js app in `dashboard/` with TypeScript | Code | [ ] |
|
| 393 |
+
| 8.2 | Deploy to Vercel (connect GitHub repo) | Deploy | [ ] |
|
| 394 |
+
| 8.3 | Create TypeScript types matching backend schemas (`lib/types.ts`) | Code | [ ] |
|
| 395 |
+
| 8.4 | Create API fetch wrapper (`lib/api.ts`) — calls FastAPI backend | Code | [ ] |
|
| 396 |
+
| 8.5 | Build `HealthScoreRing` component (circular gauge, animated) | Code | [ ] |
|
| 397 |
+
| 8.6 | Build `SeverityBadge` component (color-coded pills) | Code | [ ] |
|
| 398 |
+
| 8.7 | Build `TrendChart` component (Recharts LineChart, 30-day trend) | Code | [ ] |
|
| 399 |
+
| 8.8 | Build `FindingsTable` component (sortable, filterable) | Code | [ ] |
|
| 400 |
+
| 8.9 | Build `AgentBreakdown` component (3-column cards) | Code | [ ] |
|
| 401 |
+
| 8.10 | Build `/` page — Repository Overview (connected repos, avg scores) | Code | [ ] |
|
| 402 |
+
| 8.11 | Build `/repos/[owner]/[repo]` page — Repo Detail (charts, PR list) | Code | [ ] |
|
| 403 |
+
| 8.12 | Build `/repos/[owner]/[repo]/prs/[number]` page — PR Review Detail | Code | [ ] |
|
| 404 |
+
| 8.13 | Add FastAPI CORS middleware for Vercel domain | Code | [ ] |
|
| 405 |
+
| 8.14 | Implement REST API endpoints on FastAPI side for dashboard | Code | [ ] |
|
| 406 |
+
|
| 407 |
+
### WEEK 9: Polish & Evaluation
|
| 408 |
+
**Goal:** Full benchmark, prompt tuning, latency optimization, documentation.
|
| 409 |
+
|
| 410 |
+
| # | Task | Type | Status |
|
| 411 |
+
|---|------|------|--------|
|
| 412 |
+
| 9.1 | Curate full 20-PR benchmark dataset (Django, Next.js, synthetic, Exercism) | Data | [ ] |
|
| 413 |
+
| 9.2 | Build evaluation harness (`tests/eval/run_eval.py`) | Code | [ ] |
|
| 414 |
+
| 9.3 | Run full benchmark — measure precision, recall, latency per agent | Eval | [ ] |
|
| 415 |
+
| 9.4 | Tune agent prompts to reduce false positives (target: <30% FP rate) | Prompt | [ ] |
|
| 416 |
+
| 9.5 | Implement confidence threshold: findings <0.6 shown as 'Suggestions' | Code | [ ] |
|
| 417 |
+
| 9.6 | Latency optimization: measure p50/p95/p99 per PR size bucket | Eval | [ ] |
|
| 418 |
+
| 9.7 | Optimize Groq API calls (reduce token usage, cache prompts) | Code | [ ] |
|
| 419 |
+
| 9.8 | Write comprehensive README.md | Docs | [ ] |
|
| 420 |
+
| 9.9 | Write installation guide in README | Docs | [ ] |
|
| 421 |
+
| 9.10 | Add GitHub Actions pre-warm cron (ping /health every 10min) | CI/CD | [ ] |
|
| 422 |
+
|
| 423 |
+
### WEEK 10: Launch & Promotion
|
| 424 |
+
**Goal:** Live on GitHub Marketplace, installed on public repos, launch posts published.
|
| 425 |
+
|
| 426 |
+
| # | Task | Type | Status |
|
| 427 |
+
|---|------|------|--------|
|
| 428 |
+
| 10.1 | Install Sentinel AI on 3 public open-source repos | Launch | [ ] |
|
| 429 |
+
| 10.2 | Record demo video (screen recording: PR opened → comments posted) | Content | [ ] |
|
| 430 |
+
| 10.3 | Write Dev.to / HackerNews launch post | Content | [ ] |
|
| 431 |
+
| 10.4 | Write LinkedIn demo post | Content | [ ] |
|
| 432 |
+
| 10.5 | Submit to GitHub Marketplace (needs privacy policy, logo, description) | Launch | [ ] |
|
| 433 |
+
| 10.6 | Create sentinel.yml.example per-repo config template | Code | [ ] |
|
| 434 |
+
| 10.7 | Monitor first 48 hours — fix any production bugs | Ops | [ ] |
|
| 435 |
+
|
| 436 |
+
---
|
| 437 |
+
|
| 438 |
+
## 6. Non-Coding Tasks
|
| 439 |
+
|
| 440 |
+
These tasks don't involve writing project code but are essential for the project:
|
| 441 |
+
|
| 442 |
+
### 6.1 External Service Provisioning
|
| 443 |
+
|
| 444 |
+
| Service | Action | URL | Notes |
|
| 445 |
+
|---------|--------|-----|-------|
|
| 446 |
+
| **GitHub App** | Register new app | github.com/settings/apps/new | Need: App ID, Private Key (.pem), Webhook Secret |
|
| 447 |
+
| **Groq** | Get API key | console.groq.com | Free: 14,400 req/day |
|
| 448 |
+
| **Google AI Studio** | Get Gemini key | aistudio.google.com | Free: 1M tokens/day |
|
| 449 |
+
| **Neon.tech** | Create Postgres DB | console.neon.tech | Free: 512MB, create `pr_reviews` table |
|
| 450 |
+
| **Upstash** | Create Redis instance | console.upstash.com | Free: 10K req/day |
|
| 451 |
+
| **Render** | Create web service | dashboard.render.com | Free tier, connect GitHub repo |
|
| 452 |
+
| **Vercel** | Create project | vercel.com/new | Free hobby tier, connect dashboard/ |
|
| 453 |
+
| **ngrok** | Install for local testing | ngrok.com | Free: 1 tunnel |
|
| 454 |
+
|
| 455 |
+
### 6.2 GitHub App Configuration
|
| 456 |
+
|
| 457 |
+
**Permissions required:**
|
| 458 |
+
- Pull requests: Read & Write
|
| 459 |
+
- Contents: Read
|
| 460 |
+
- Metadata: Read
|
| 461 |
+
- Commit statuses: Write (optional)
|
| 462 |
+
|
| 463 |
+
**Webhook events to subscribe:**
|
| 464 |
+
- `pull_request` (opened, synchronize, reopened, ready_for_review)
|
| 465 |
+
- `pull_request_review_comment` (for @sentinel-ai re-review)
|
| 466 |
+
|
| 467 |
+
### 6.3 Data Curation Tasks
|
| 468 |
+
|
| 469 |
+
| Dataset | Source | Count | Purpose |
|
| 470 |
+
|---------|--------|-------|---------|
|
| 471 |
+
| Synthetic security PRs | Hand-crafted | 10 PRs | SQL injection, XSS, IDOR, hardcoded secrets |
|
| 472 |
+
| Django security PRs | github.com/django/django | 5 PRs | Real-world Python security fixes |
|
| 473 |
+
| Next.js performance PRs | github.com/vercel/next.js | 5 PRs | JS/TS performance changes |
|
| 474 |
+
| Exercism style PRs | github.com/exercism | 5 PRs | Naming, complexity, documentation issues |
|
| 475 |
+
| Mixed benchmark set | All above | 20 PRs | Full evaluation benchmark |
|
| 476 |
+
|
| 477 |
+
### 6.4 Knowledge Base Curation
|
| 478 |
+
|
| 479 |
+
| Document | Source | For Agent |
|
| 480 |
+
|----------|--------|-----------|
|
| 481 |
+
| OWASP Top 10 (2025) | owasp.org | Security Agent RAG |
|
| 482 |
+
| DDIA performance patterns | "Designing Data-Intensive Applications" | Performance Agent RAG |
|
| 483 |
+
| Python style guide (PEP 8) | python.org | Style Agent RAG |
|
| 484 |
+
| JavaScript style guide | Various (Airbnb, Google) | Style Agent RAG |
|
| 485 |
+
| TypeScript best practices | typescript-eslint.io | Style Agent RAG |
|
| 486 |
+
|
| 487 |
+
---
|
| 488 |
+
|
| 489 |
+
## 7. GPU / WSL Tasks
|
| 490 |
+
|
| 491 |
+
Your **RTX 5070** with WSL will be used for:
|
| 492 |
+
|
| 493 |
+
### 7.1 sentence-transformers Embedding (Required)
|
| 494 |
+
|
| 495 |
+
**No training needed** — these are pre-trained models used for embedding generation.
|
| 496 |
+
|
| 497 |
+
```
|
| 498 |
+
Model: all-MiniLM-L6-v2 (or all-mpnet-base-v2 for higher quality)
|
| 499 |
+
Task: Embed code chunks for ChromaDB indexing
|
| 500 |
+
Where: Runs locally during repo indexing (can also run on Render CPU, slower)
|
| 501 |
+
GPU benefit: ~10-50x faster embedding generation vs CPU
|
| 502 |
+
```
|
| 503 |
+
|
| 504 |
+
**Setup steps:**
|
| 505 |
+
1. Ensure CUDA toolkit installed in WSL (`nvidia-smi` should show RTX 5070)
|
| 506 |
+
2. `pip install sentence-transformers torch` (with CUDA support)
|
| 507 |
+
3. Benchmark: embed 1000 code chunks, measure time GPU vs CPU
|
| 508 |
+
4. Decision: if embedding is fast enough on CPU, skip GPU for deployment simplicity
|
| 509 |
+
|
| 510 |
+
### 7.2 Local LLM Testing (Optional, Recommended)
|
| 511 |
+
|
| 512 |
+
Running a local LLM for testing avoids burning Groq API quota during development:
|
| 513 |
+
|
| 514 |
+
```
|
| 515 |
+
Model: Llama-3.1-8B-Instruct (via Ollama or vLLM)
|
| 516 |
+
Task: Test agent prompts locally before hitting Groq
|
| 517 |
+
GPU benefit: Full inference locally, no API calls, no quota burn
|
| 518 |
+
```
|
| 519 |
+
|
| 520 |
+
**Setup steps:**
|
| 521 |
+
1. Install Ollama in WSL: `curl -fsSL https://ollama.com/install.sh | sh`
|
| 522 |
+
2. Pull model: `ollama pull llama3.1:8b`
|
| 523 |
+
3. Use for prompt iteration — switch to Groq (70B) for production quality
|
| 524 |
+
|
| 525 |
+
### 7.3 What You Do NOT Need to Train
|
| 526 |
+
|
| 527 |
+
| Item | Reason |
|
| 528 |
+
|------|--------|
|
| 529 |
+
| LLM (Llama-3.1-70B) | Used via Groq API — inference only, no fine-tuning |
|
| 530 |
+
| sentence-transformers | Pre-trained model, no fine-tuning needed for code embeddings |
|
| 531 |
+
| Semgrep/Bandit/radon | Rule-based tools, no ML training |
|
| 532 |
+
| Agent prompts | Iterative prompt engineering, not model training |
|
| 533 |
+
|
| 534 |
+
**Bottom line:** This project is an **inference and orchestration** project, not a training project. Your GPU is used for fast local embeddings and optional local LLM testing — no model training required.
|
| 535 |
+
|
| 536 |
+
---
|
| 537 |
+
|
| 538 |
+
## 8. Data Models & Schemas
|
| 539 |
+
|
| 540 |
+
### 8.1 Finding (per agent output)
|
| 541 |
+
|
| 542 |
+
```python
|
| 543 |
+
class Finding(BaseModel):
|
| 544 |
+
agent: Literal['security', 'performance', 'style']
|
| 545 |
+
file_path: str # e.g. 'src/auth/login.py'
|
| 546 |
+
line_start: int
|
| 547 |
+
line_end: int
|
| 548 |
+
severity: Literal['critical', 'high', 'medium', 'low']
|
| 549 |
+
category: str # e.g. 'sql_injection', 'n+1_query', 'naming'
|
| 550 |
+
title: str # Short one-liner
|
| 551 |
+
description: str # Full explanation
|
| 552 |
+
suggested_fix: str # Corrected code snippet
|
| 553 |
+
cwe_id: Optional[str] # For security findings (e.g. 'CWE-89')
|
| 554 |
+
confidence: float # 0.0 – 1.0
|
| 555 |
+
```
|
| 556 |
+
|
| 557 |
+
### 8.2 SynthesizedReview (Synthesizer output)
|
| 558 |
+
|
| 559 |
+
```python
|
| 560 |
+
class SynthesizedReview(BaseModel):
|
| 561 |
+
health_score: int # 0-100
|
| 562 |
+
executive_summary: str # 3-5 sentences
|
| 563 |
+
recommendation: Literal['approve', 'request_changes', 'block']
|
| 564 |
+
findings: List[Finding] # Deduplicated, re-ranked
|
| 565 |
+
critical_count: int
|
| 566 |
+
high_count: int
|
| 567 |
+
medium_count: int
|
| 568 |
+
low_count: int
|
| 569 |
+
duration_ms: int
|
| 570 |
+
```
|
| 571 |
+
|
| 572 |
+
### 8.3 PR Review Record (Neon Postgres)
|
| 573 |
+
|
| 574 |
+
```sql
|
| 575 |
+
CREATE TABLE pr_reviews (
|
| 576 |
+
id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
|
| 577 |
+
repo_full_name TEXT NOT NULL,
|
| 578 |
+
pr_number INT NOT NULL,
|
| 579 |
+
commit_sha TEXT NOT NULL,
|
| 580 |
+
health_score INT NOT NULL,
|
| 581 |
+
critical_count INT DEFAULT 0,
|
| 582 |
+
high_count INT DEFAULT 0,
|
| 583 |
+
medium_count INT DEFAULT 0,
|
| 584 |
+
low_count INT DEFAULT 0,
|
| 585 |
+
summary TEXT,
|
| 586 |
+
findings JSONB NOT NULL,
|
| 587 |
+
duration_ms INT,
|
| 588 |
+
created_at TIMESTAMPTZ DEFAULT NOW()
|
| 589 |
+
);
|
| 590 |
+
|
| 591 |
+
CREATE INDEX idx_pr_reviews_repo ON pr_reviews(repo_full_name);
|
| 592 |
+
CREATE INDEX idx_pr_reviews_sha ON pr_reviews(commit_sha);
|
| 593 |
+
```
|
| 594 |
+
|
| 595 |
+
---
|
| 596 |
+
|
| 597 |
+
## 9. API Endpoints
|
| 598 |
+
|
| 599 |
+
| Endpoint | Method | Description |
|
| 600 |
+
|----------|--------|-------------|
|
| 601 |
+
| `POST /webhook/github` | POST | Receive GitHub webhook, validate HMAC, enqueue analysis |
|
| 602 |
+
| `GET /api/repos/{owner}/{repo}/reviews` | GET | Paginated PR review list + Health Score trend |
|
| 603 |
+
| `GET /api/repos/{owner}/{repo}/reviews/{pr_number}` | GET | Full findings for specific PR |
|
| 604 |
+
| `GET /api/repos/{owner}/{repo}/stats` | GET | Aggregate stats: avg score, top categories, 30-day trend |
|
| 605 |
+
| `POST /api/repos/{owner}/{repo}/reanalyze/{pr_number}` | POST | Re-trigger analysis (bypass cache) |
|
| 606 |
+
| `GET /health` | GET | Health check: agent status, Groq quota remaining |
|
| 607 |
+
|
| 608 |
+
---
|
| 609 |
+
|
| 610 |
+
## 10. Agent Prompt Design
|
| 611 |
+
|
| 612 |
+
Each agent prompt must include:
|
| 613 |
+
|
| 614 |
+
1. **Role definition** — who the agent is (e.g., "senior AppSec engineer")
|
| 615 |
+
2. **Scope boundaries** — what to look for and what to ignore
|
| 616 |
+
3. **Output schema** — exact JSON structure expected
|
| 617 |
+
4. **Severity guidelines** — when to use Critical vs. High vs. Medium vs. Low
|
| 618 |
+
5. **Confidence scoring** — how to self-assess confidence (0.0-1.0)
|
| 619 |
+
6. **Examples** — 2-3 few-shot examples of good findings
|
| 620 |
+
7. **Anti-patterns** — common false positives to avoid
|
| 621 |
+
|
| 622 |
+
Prompts are stored in `prompts/` as Markdown files and loaded at agent initialization.
|
| 623 |
+
|
| 624 |
+
---
|
| 625 |
+
|
| 626 |
+
## 11. Evaluation Plan
|
| 627 |
+
|
| 628 |
+
### 11.1 Metrics
|
| 629 |
+
|
| 630 |
+
| Metric | Target | Formula |
|
| 631 |
+
|--------|--------|---------|
|
| 632 |
+
| Security precision | >70% | true_positives / (true_positives + false_positives) |
|
| 633 |
+
| Performance recall | >60% | true_positives / (true_positives + false_negatives) |
|
| 634 |
+
| Deduplication rate | >15% | duplicates_removed / total_findings |
|
| 635 |
+
| e2e latency (p95) | <20s | Time from webhook to first comment posted |
|
| 636 |
+
| Groq quota usage | <10K/day | Total API calls per day |
|
| 637 |
+
| System uptime | >95% | (total_time - downtime) / total_time |
|
| 638 |
+
|
| 639 |
+
### 11.2 Evaluation Harness
|
| 640 |
+
|
| 641 |
+
Located in `tests/eval/`:
|
| 642 |
+
- `dataset/` — 20 PRs as JSON fixtures (diff, expected findings, ground truth labels)
|
| 643 |
+
- `run_eval.py` — Runs each PR through full pipeline, compares output vs ground truth
|
| 644 |
+
- `metrics.py` — Computes precision, recall, F1, latency percentiles
|
| 645 |
+
- Results logged to console + optionally to LangSmith (free self-hosted)
|
| 646 |
+
|
| 647 |
+
---
|
| 648 |
+
|
| 649 |
+
## 12. Deployment Checklist
|
| 650 |
+
|
| 651 |
+
### Render (FastAPI Backend)
|
| 652 |
+
- [ ] `render.yaml` configured with build + start commands
|
| 653 |
+
- [ ] Environment variables set in Render dashboard
|
| 654 |
+
- [ ] Health check endpoint (`/health`) configured
|
| 655 |
+
- [ ] Auto-deploy from `main` branch enabled
|
| 656 |
+
|
| 657 |
+
### Vercel (Next.js Dashboard)
|
| 658 |
+
- [ ] Connected to GitHub repo `dashboard/` directory
|
| 659 |
+
- [ ] Environment variable: `NEXT_PUBLIC_API_URL` pointing to Render backend
|
| 660 |
+
- [ ] Custom domain (optional)
|
| 661 |
+
|
| 662 |
+
### GitHub App
|
| 663 |
+
- [ ] App registered with correct permissions
|
| 664 |
+
- [ ] Webhook URL set to Render endpoint (`/webhook/github`)
|
| 665 |
+
- [ ] Private key (.pem) downloaded and stored securely
|
| 666 |
+
- [ ] App installed on test repo for development
|
| 667 |
+
|
| 668 |
+
### GitHub Actions
|
| 669 |
+
- [ ] CI workflow: lint (ruff) + test (pytest) on push/PR
|
| 670 |
+
- [ ] Pre-warm cron: ping /health every 10 minutes during working hours
|
| 671 |
+
|
| 672 |
+
---
|
| 673 |
+
|
| 674 |
+
## 13. Progress Tracker
|
| 675 |
+
|
| 676 |
+
### Overall Status
|
| 677 |
+
|
| 678 |
+
| Week | Milestone | Status | Notes |
|
| 679 |
+
|------|-----------|--------|-------|
|
| 680 |
+
| 1 | Foundation & Setup | COMPLETE | All services provisioned, project scaffolded |
|
| 681 |
+
| 2 | GitHub Integration | COMPLETE | E2E tested: webhook → fetch → comment on PR #1 |
|
| 682 |
+
| 3 | Security Agent v1 | COMPLETE | Bandit + Llama-3.3-70B, live-tested on PR #3, 4 findings |
|
| 683 |
+
| 4 | Performance Agent v1 | COMPLETE | Radon complexity + Llama-3.3-70B, 3 findings on PR #4 |
|
| 684 |
+
| 5 | Style Agent v1 | COMPLETE | Ruff linter + Llama-3.3-70B, 6 findings on PR #4 |
|
| 685 |
+
| 6 | ChromaDB + RAG Context | COMPLETE | sentence-transformers + ChromaDB, integrated into all agents |
|
| 686 |
+
| 7 | Synthesizer Agent | COMPLETE | Dedup, conflict resolution, Health Score formula, exec summary |
|
| 687 |
+
| 8 | Next.js Dashboard | COMPLETE | Next.js + Tailwind + Recharts, mock data, all pages |
|
| 688 |
+
| 9 | Polish & Evaluation | COMPLETE | Eval harness, metrics, README, DB persistence |
|
| 689 |
+
| 10 | Launch & Promotion | COMPLETE | Render config, Vercel ready, API endpoints for dashboard |
|
| 690 |
+
|
| 691 |
+
### Key Decisions Log
|
| 692 |
+
|
| 693 |
+
| Date | Decision | Rationale |
|
| 694 |
+
|------|----------|-----------|
|
| 695 |
+
| 2026-03-19 | Project plan created | Starting from scratch, PDF spec as source of truth |
|
| 696 |
+
| 2026-03-19 | Project renamed to "Ninja Code Guard" | User's personal branding choice |
|
| 697 |
+
| 2026-03-19 | GitHub App: "Ninja's Code Guard" (ID: 3133457) | Registered and tested with live PR |
|
| 698 |
+
| 2026-03-19 | Test repo: ninjacode911/codeguard-test | Used for e2e webhook testing |
|
| 699 |
+
| 2026-03-19 | Fail-open pattern for Redis cache | Missing a review is worse than duplicating |
|
| 700 |
+
| 2026-03-19 | Background tasks for webhook processing | GitHub's 10s timeout requires async processing |
|
| 701 |
+
|
| 702 |
+
---
|
| 703 |
+
|
| 704 |
+
*Last updated: 2026-03-19*
|
README.md
ADDED
|
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Ninja Code Guard
|
| 2 |
+
|
| 3 |
+
**Multi-agent code review system that reviews GitHub pull requests the way a senior engineering team would.**
|
| 4 |
+
|
| 5 |
+
Three specialized AI agents — Security, Performance, and Style — analyze your code in parallel, then a Synthesizer merges their findings into a single, prioritized, non-overlapping review with inline GitHub comments.
|
| 6 |
+
|
| 7 |
+
## How It Works
|
| 8 |
+
|
| 9 |
+
```
|
| 10 |
+
PR opened on GitHub
|
| 11 |
+
│
|
| 12 |
+
▼
|
| 13 |
+
Webhook received ──→ HMAC-SHA256 validated
|
| 14 |
+
│
|
| 15 |
+
▼
|
| 16 |
+
Redis cache check ──→ Skip if already reviewed
|
| 17 |
+
│
|
| 18 |
+
▼
|
| 19 |
+
Fetch PR data ──→ Diff + full file contents
|
| 20 |
+
│
|
| 21 |
+
▼
|
| 22 |
+
RAG Context ──→ Embed files → ChromaDB → Retrieve related code
|
| 23 |
+
│
|
| 24 |
+
▼
|
| 25 |
+
┌─────────────────────────────────────────┐
|
| 26 |
+
│ 3 Agents run IN PARALLEL │
|
| 27 |
+
│ 🔒 Security ⚡ Performance ✏️ Style │
|
| 28 |
+
│ Bandit+LLM Radon+LLM Ruff+LLM │
|
| 29 |
+
└─────────────┬───────────────────────────┘
|
| 30 |
+
│
|
| 31 |
+
▼
|
| 32 |
+
Synthesizer ──→ Deduplicate → Rank → Score → Summarize
|
| 33 |
+
│
|
| 34 |
+
▼
|
| 35 |
+
Post to GitHub ──→ Inline comments + Summary with Health Score
|
| 36 |
+
```
|
| 37 |
+
|
| 38 |
+
## What Each Agent Does
|
| 39 |
+
|
| 40 |
+
| Agent | Focus | Static Tools | Example Findings |
|
| 41 |
+
|-------|-------|-------------|------------------|
|
| 42 |
+
| 🔒 **Security** | Vulnerabilities, auth, secrets | Bandit, detect-secrets | SQL injection, hardcoded API keys, weak crypto |
|
| 43 |
+
| ⚡ **Performance** | Efficiency, scalability | Radon complexity | N+1 queries, O(n²) loops, blocking I/O |
|
| 44 |
+
| ✏️ **Style** | Readability, maintainability | Ruff linter | Unused imports, bad naming, dead code |
|
| 45 |
+
| 🧠 **Synthesizer** | Merge & prioritize | — | Deduplication, conflict resolution, Health Score |
|
| 46 |
+
|
| 47 |
+
## Tech Stack
|
| 48 |
+
|
| 49 |
+
| Layer | Technology | Why |
|
| 50 |
+
|-------|-----------|-----|
|
| 51 |
+
| LLM | Groq (Llama-3.3-70B) | 500+ tokens/sec, free 14.4K req/day |
|
| 52 |
+
| Agents | LangChain + Structured Output | Typed JSON responses, prompt templates |
|
| 53 |
+
| Backend | FastAPI on Render | Async, auto OpenAPI docs, free tier |
|
| 54 |
+
| Vector DB | ChromaDB + sentence-transformers | RAG context, semantic code search |
|
| 55 |
+
| Cache | Upstash Redis | Prevent duplicate reviews |
|
| 56 |
+
| Database | Neon Postgres | Review history, Health Score trends |
|
| 57 |
+
| Dashboard | Next.js on Vercel | Review history, trend charts |
|
| 58 |
+
| GitHub | GitHub App (webhooks) | Inline PR comments, bot identity |
|
| 59 |
+
|
| 60 |
+
## Quick Start
|
| 61 |
+
|
| 62 |
+
### Prerequisites
|
| 63 |
+
- Python 3.11+
|
| 64 |
+
- Groq API key (free at console.groq.com)
|
| 65 |
+
- GitHub App (registered at github.com/settings/apps)
|
| 66 |
+
|
| 67 |
+
### Setup
|
| 68 |
+
|
| 69 |
+
```bash
|
| 70 |
+
# Clone and setup
|
| 71 |
+
git clone https://github.com/ninjacode911/ninja-code-guard
|
| 72 |
+
cd ninja-code-guard
|
| 73 |
+
python -m venv .venv && source .venv/bin/activate # Windows: .venv\Scripts\activate
|
| 74 |
+
pip install -r requirements.txt
|
| 75 |
+
|
| 76 |
+
# Configure
|
| 77 |
+
cp .env.example .env
|
| 78 |
+
# Edit .env with your API keys
|
| 79 |
+
|
| 80 |
+
# Run
|
| 81 |
+
uvicorn app.main:app --reload --port 8000
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
### Environment Variables
|
| 85 |
+
|
| 86 |
+
```env
|
| 87 |
+
GROQ_API_KEY=gsk_...
|
| 88 |
+
GITHUB_APP_ID=123456
|
| 89 |
+
GITHUB_APP_PRIVATE_KEY_PATH=./keys/app.pem
|
| 90 |
+
GITHUB_WEBHOOK_SECRET=...
|
| 91 |
+
DATABASE_URL=postgresql://...
|
| 92 |
+
UPSTASH_REDIS_URL=rediss://...
|
| 93 |
+
```
|
| 94 |
+
|
| 95 |
+
## Architecture
|
| 96 |
+
|
| 97 |
+
**4 Layers:**
|
| 98 |
+
- **GitHub Layer** — Webhooks, PR events, inline comments
|
| 99 |
+
- **Orchestration Layer** — FastAPI, agent dispatch, asyncio.gather
|
| 100 |
+
- **Agent Layer** — 3 domain agents + synthesizer (LangChain ReAct)
|
| 101 |
+
- **Knowledge Layer** — ChromaDB (RAG), Redis (cache), Postgres (history)
|
| 102 |
+
|
| 103 |
+
**Key Design Patterns:**
|
| 104 |
+
- Template Method — All agents share a base class, override only prompt + tools
|
| 105 |
+
- Structured Output — LLM constrained to return valid JSON (Pydantic schema)
|
| 106 |
+
- Fail-Open Cache — If Redis is down, proceed with analysis
|
| 107 |
+
- Background Tasks — Return 200 to GitHub immediately, review asynchronously
|
| 108 |
+
- Parallel Execution — asyncio.gather runs 3 agents concurrently
|
| 109 |
+
|
| 110 |
+
## Test Results
|
| 111 |
+
|
| 112 |
+
```
|
| 113 |
+
PR #4 on codeguard-test repo:
|
| 114 |
+
Security: 5 findings (SQL injection, weak crypto, hardcoded secrets)
|
| 115 |
+
Performance: 3 findings (O(n²) loop, blocking I/O, high complexity)
|
| 116 |
+
Style: 6 findings (unused imports, magic numbers, bad naming)
|
| 117 |
+
Total: 14 findings
|
| 118 |
+
Health Score: 14/100
|
| 119 |
+
Latency: ~13 seconds (after model load)
|
| 120 |
+
```
|
| 121 |
+
|
| 122 |
+
## Running Tests
|
| 123 |
+
|
| 124 |
+
```bash
|
| 125 |
+
pytest tests/unit/ -v
|
| 126 |
+
```
|
| 127 |
+
|
| 128 |
+
## Project Structure
|
| 129 |
+
|
| 130 |
+
```
|
| 131 |
+
app/
|
| 132 |
+
agents/ # Security, Performance, Style, Synthesizer
|
| 133 |
+
tools/ # Bandit, detect-secrets, Radon, Ruff wrappers
|
| 134 |
+
context/ # RAG pipeline (embedder, indexer, retriever)
|
| 135 |
+
github/ # Webhook validation, API client, comment formatter
|
| 136 |
+
models/ # Pydantic schemas (Finding, SynthesizedReview)
|
| 137 |
+
db/ # Redis cache, Postgres queries
|
| 138 |
+
services/ # Health Score calculator
|
| 139 |
+
dashboard/ # Next.js frontend (Vercel)
|
| 140 |
+
tests/ # Unit tests + evaluation harness
|
| 141 |
+
prompts/ # Agent system prompts (Markdown)
|
| 142 |
+
docs/ # Week-by-week documentation
|
| 143 |
+
```
|
| 144 |
+
|
| 145 |
+
## Documentation
|
| 146 |
+
|
| 147 |
+
Detailed week-by-week documentation available in `docs/`:
|
| 148 |
+
- [Week 1: Foundation & Setup](docs/WEEK1_FOUNDATION_AND_SETUP.md)
|
| 149 |
+
- [Week 2: GitHub Integration](docs/WEEK2_GITHUB_INTEGRATION.md)
|
| 150 |
+
- [Week 3: Security Agent](docs/WEEK3_SECURITY_AGENT.md)
|
| 151 |
+
- [Week 4: Performance Agent](docs/WEEK4_PERFORMANCE_AGENT.md)
|
| 152 |
+
- [Week 5: Style Agent](docs/WEEK5_STYLE_AGENT.md)
|
| 153 |
+
- [Week 6: RAG & Parallel Execution](docs/WEEK6_RAG_AND_PARALLEL.md)
|
| 154 |
+
|
| 155 |
+
## License
|
| 156 |
+
|
| 157 |
+
MIT
|
| 158 |
+
|
| 159 |
+
---
|
| 160 |
+
|
| 161 |
+
Built by [ninjacode911](https://github.com/ninjacode911)
|
app/__init__.py
ADDED
|
File without changes
|
app/agents/__init__.py
ADDED
|
File without changes
|
app/agents/base_agent.py
ADDED
|
@@ -0,0 +1,295 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Base Agent Interface
|
| 3 |
+
=====================
|
| 4 |
+
|
| 5 |
+
All domain agents (Security, Performance, Style) inherit from this base class.
|
| 6 |
+
It provides shared infrastructure:
|
| 7 |
+
|
| 8 |
+
1. **Groq LLM client** — ChatGroq configured with Llama-3.1-70B
|
| 9 |
+
2. **Structured output** — LLM returns typed Finding objects, not raw text
|
| 10 |
+
3. **Error handling** — graceful fallback if the LLM call fails
|
| 11 |
+
4. **Timing** — measures how long each agent takes (for latency metrics)
|
| 12 |
+
|
| 13 |
+
Design pattern: Template Method
|
| 14 |
+
- The base class defines the algorithm skeleton (receive diff → run tools → call LLM → return findings)
|
| 15 |
+
- Subclasses override specific steps (system_prompt, run_static_tools)
|
| 16 |
+
- This prevents code duplication across 3 agents that follow the same flow
|
| 17 |
+
|
| 18 |
+
Why LangChain?
|
| 19 |
+
- Provides a unified interface across LLM providers (Groq, Gemini, OpenAI)
|
| 20 |
+
- If Groq goes down, we swap to Gemini by changing one line
|
| 21 |
+
- Structured output parsing is built in (with_structured_output)
|
| 22 |
+
- Prompt templates with variable substitution
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
from __future__ import annotations
|
| 26 |
+
|
| 27 |
+
import time
|
| 28 |
+
from abc import ABC, abstractmethod
|
| 29 |
+
|
| 30 |
+
import structlog
|
| 31 |
+
from langchain_core.prompts import ChatPromptTemplate
|
| 32 |
+
from langchain_groq import ChatGroq
|
| 33 |
+
from pydantic import BaseModel, Field
|
| 34 |
+
|
| 35 |
+
from app.config import settings
|
| 36 |
+
from app.github.client import PRData
|
| 37 |
+
from app.models.findings import Finding
|
| 38 |
+
|
| 39 |
+
logger = structlog.get_logger()
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
class AgentFindings(BaseModel):
|
| 43 |
+
"""
|
| 44 |
+
Schema for the LLM's structured output.
|
| 45 |
+
|
| 46 |
+
By wrapping findings in a Pydantic model, we can use LangChain's
|
| 47 |
+
`with_structured_output()` which constrains the LLM to return
|
| 48 |
+
valid JSON matching this exact schema. No more parsing raw text!
|
| 49 |
+
|
| 50 |
+
How with_structured_output() works under the hood:
|
| 51 |
+
1. It adds the JSON schema to the system prompt
|
| 52 |
+
2. It sets response_format to JSON mode (if the model supports it)
|
| 53 |
+
3. It validates the response against the schema
|
| 54 |
+
4. If validation fails, it retries (configurable)
|
| 55 |
+
"""
|
| 56 |
+
|
| 57 |
+
findings: list[FindingOutput] = Field(
|
| 58 |
+
default_factory=list,
|
| 59 |
+
description="List of security/performance/style findings",
|
| 60 |
+
)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
class FindingOutput(BaseModel):
|
| 64 |
+
"""
|
| 65 |
+
The schema we ask the LLM to produce for each finding.
|
| 66 |
+
|
| 67 |
+
This is slightly different from our internal Finding model because:
|
| 68 |
+
- The LLM doesn't know which agent it is (we add that after)
|
| 69 |
+
- We give the LLM freedom on field names that match its training
|
| 70 |
+
- We validate and convert to our Finding model post-LLM
|
| 71 |
+
|
| 72 |
+
Note: This class is defined BEFORE AgentFindings because Python
|
| 73 |
+
needs it to exist when AgentFindings references it. But Pydantic
|
| 74 |
+
handles forward references with model_rebuild().
|
| 75 |
+
"""
|
| 76 |
+
|
| 77 |
+
file_path: str = Field(description="Path to the file (e.g., 'app.py')")
|
| 78 |
+
line_start: int = Field(description="Starting line number of the issue")
|
| 79 |
+
line_end: int = Field(description="Ending line number of the issue")
|
| 80 |
+
severity: str = Field(description="One of: critical, high, medium, low")
|
| 81 |
+
category: str = Field(description="Issue category (e.g., 'sql_injection', 'hardcoded_secret')")
|
| 82 |
+
title: str = Field(description="Short one-line title of the finding")
|
| 83 |
+
description: str = Field(description="Detailed explanation of the issue and its impact")
|
| 84 |
+
suggested_fix: str = Field(default="", description="Corrected code snippet")
|
| 85 |
+
cwe_id: str | None = Field(default=None, description="CWE ID if applicable (e.g., 'CWE-89')")
|
| 86 |
+
confidence: float = Field(description="Confidence score from 0.0 to 1.0")
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
# Rebuild the model to resolve the forward reference
|
| 90 |
+
AgentFindings.model_rebuild()
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
class BaseAgent(ABC):
|
| 94 |
+
"""
|
| 95 |
+
Abstract base class for all domain agents.
|
| 96 |
+
|
| 97 |
+
Subclasses must implement:
|
| 98 |
+
- agent_name: which agent this is ("security", "performance", "style")
|
| 99 |
+
- system_prompt: the detailed system prompt for the LLM
|
| 100 |
+
- run_static_analysis(): optional static tools (Bandit, Semgrep, etc.)
|
| 101 |
+
|
| 102 |
+
Usage:
|
| 103 |
+
agent = SecurityAgent()
|
| 104 |
+
findings = await agent.review(pr_data)
|
| 105 |
+
"""
|
| 106 |
+
|
| 107 |
+
def __init__(self):
|
| 108 |
+
"""
|
| 109 |
+
Initialize the LLM client.
|
| 110 |
+
|
| 111 |
+
ChatGroq connects to Groq's API which runs Llama-3.1-70B at
|
| 112 |
+
500+ tokens/sec — the fastest open-source LLM inference available.
|
| 113 |
+
This speed is critical: we need each agent to complete in 3-8 seconds
|
| 114 |
+
so the full review stays under 15 seconds.
|
| 115 |
+
|
| 116 |
+
Temperature=0.1: We want nearly deterministic output. Code review
|
| 117 |
+
should be consistent — the same code should get the same findings.
|
| 118 |
+
A small temperature (not 0) allows slight variation to avoid
|
| 119 |
+
getting stuck in repetitive patterns.
|
| 120 |
+
"""
|
| 121 |
+
self.llm = ChatGroq(
|
| 122 |
+
model="llama-3.3-70b-versatile",
|
| 123 |
+
api_key=settings.groq_api_key,
|
| 124 |
+
temperature=0.1,
|
| 125 |
+
max_tokens=4096,
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
@property
|
| 129 |
+
@abstractmethod
|
| 130 |
+
def agent_name(self) -> str:
|
| 131 |
+
"""The agent identifier: 'security', 'performance', or 'style'."""
|
| 132 |
+
...
|
| 133 |
+
|
| 134 |
+
@property
|
| 135 |
+
@abstractmethod
|
| 136 |
+
def system_prompt(self) -> str:
|
| 137 |
+
"""The full system prompt for this agent."""
|
| 138 |
+
...
|
| 139 |
+
|
| 140 |
+
async def run_static_analysis(self, pr_data: PRData) -> str:
|
| 141 |
+
"""
|
| 142 |
+
Run static analysis tools on the PR files.
|
| 143 |
+
|
| 144 |
+
Override in subclasses to run agent-specific tools:
|
| 145 |
+
- SecurityAgent: Bandit + detect-secrets
|
| 146 |
+
- PerformanceAgent: radon + AST analysis
|
| 147 |
+
- StyleAgent: Ruff/pylint
|
| 148 |
+
|
| 149 |
+
Returns a string summary of tool findings to include in the LLM prompt.
|
| 150 |
+
Default: no static analysis (LLM-only review).
|
| 151 |
+
"""
|
| 152 |
+
return ""
|
| 153 |
+
|
| 154 |
+
def _build_prompt(self) -> ChatPromptTemplate:
|
| 155 |
+
"""
|
| 156 |
+
Build the LangChain prompt template.
|
| 157 |
+
|
| 158 |
+
ChatPromptTemplate.from_messages() creates a multi-turn prompt:
|
| 159 |
+
- ("system", ...) → the system message (agent persona + instructions)
|
| 160 |
+
- ("human", ...) → the user message (the actual PR data to review)
|
| 161 |
+
|
| 162 |
+
Variables in {curly_braces} are substituted at runtime with .ainvoke().
|
| 163 |
+
"""
|
| 164 |
+
return ChatPromptTemplate.from_messages([
|
| 165 |
+
("system", self.system_prompt),
|
| 166 |
+
("human", (
|
| 167 |
+
"## PR Diff\n"
|
| 168 |
+
"```diff\n{diff}\n```\n\n"
|
| 169 |
+
"## Changed File Contents\n"
|
| 170 |
+
"{file_contents}\n\n"
|
| 171 |
+
"## Static Analysis Results\n"
|
| 172 |
+
"{static_analysis}\n\n"
|
| 173 |
+
"{rag_context}\n\n"
|
| 174 |
+
"Analyze this PR and return your findings as structured JSON."
|
| 175 |
+
)),
|
| 176 |
+
])
|
| 177 |
+
|
| 178 |
+
def _convert_to_findings(self, agent_output: AgentFindings) -> list[Finding]:
|
| 179 |
+
"""
|
| 180 |
+
Convert the LLM's output to our internal Finding model.
|
| 181 |
+
|
| 182 |
+
This adds the agent_name field and validates/clamps values:
|
| 183 |
+
- Severity is lowercased and validated
|
| 184 |
+
- Confidence is clamped to [0.0, 1.0]
|
| 185 |
+
- Invalid findings are skipped (not crashed on)
|
| 186 |
+
"""
|
| 187 |
+
findings = []
|
| 188 |
+
for f in agent_output.findings:
|
| 189 |
+
try:
|
| 190 |
+
severity = f.severity.lower().strip()
|
| 191 |
+
if severity not in ("critical", "high", "medium", "low"):
|
| 192 |
+
severity = "medium" # Default for ambiguous severity
|
| 193 |
+
|
| 194 |
+
confidence = max(0.0, min(1.0, f.confidence))
|
| 195 |
+
|
| 196 |
+
finding = Finding(
|
| 197 |
+
agent=self.agent_name,
|
| 198 |
+
file_path=f.file_path,
|
| 199 |
+
line_start=f.line_start,
|
| 200 |
+
line_end=f.line_end,
|
| 201 |
+
severity=severity,
|
| 202 |
+
category=f.category,
|
| 203 |
+
title=f.title,
|
| 204 |
+
description=f.description,
|
| 205 |
+
suggested_fix=f.suggested_fix,
|
| 206 |
+
cwe_id=f.cwe_id,
|
| 207 |
+
confidence=confidence,
|
| 208 |
+
)
|
| 209 |
+
findings.append(finding)
|
| 210 |
+
except Exception as e:
|
| 211 |
+
logger.warning(
|
| 212 |
+
"Skipping malformed finding",
|
| 213 |
+
agent=self.agent_name,
|
| 214 |
+
error=str(e),
|
| 215 |
+
)
|
| 216 |
+
return findings
|
| 217 |
+
|
| 218 |
+
def _format_file_contents(self, file_contents: dict[str, str]) -> str:
|
| 219 |
+
"""
|
| 220 |
+
Format file contents for the LLM prompt.
|
| 221 |
+
|
| 222 |
+
Each file is wrapped in a code block with its path as a header.
|
| 223 |
+
We truncate very long files to stay within LLM context limits.
|
| 224 |
+
Groq's Llama-3.1-70B has 128K context, so we have plenty of room
|
| 225 |
+
for typical PRs, but we cap each file at 500 lines to be safe.
|
| 226 |
+
"""
|
| 227 |
+
parts = []
|
| 228 |
+
for filepath, content in file_contents.items():
|
| 229 |
+
lines = content.split("\n")
|
| 230 |
+
if len(lines) > 500:
|
| 231 |
+
content = "\n".join(lines[:500]) + "\n... (truncated)"
|
| 232 |
+
parts.append(f"### {filepath}\n```\n{content}\n```")
|
| 233 |
+
return "\n\n".join(parts) if parts else "No file contents available."
|
| 234 |
+
|
| 235 |
+
async def review(self, pr_data: PRData, rag_context: str = "") -> list[Finding]:
|
| 236 |
+
"""
|
| 237 |
+
Main entry point: review a PR and return findings.
|
| 238 |
+
|
| 239 |
+
This is the Template Method:
|
| 240 |
+
1. Run static analysis tools (subclass-specific)
|
| 241 |
+
2. Build the prompt with diff + files + tool output + RAG context
|
| 242 |
+
3. Call the LLM with structured output
|
| 243 |
+
4. Convert to Finding objects
|
| 244 |
+
5. Log timing and return
|
| 245 |
+
|
| 246 |
+
If the LLM call fails, we return an empty list rather than crashing
|
| 247 |
+
the entire pipeline. The other agents can still contribute findings.
|
| 248 |
+
|
| 249 |
+
Args:
|
| 250 |
+
pr_data: The PR diff, file contents, and metadata
|
| 251 |
+
rag_context: Optional RAG context from ChromaDB (related code chunks)
|
| 252 |
+
"""
|
| 253 |
+
start_time = time.time()
|
| 254 |
+
|
| 255 |
+
try:
|
| 256 |
+
# Step 1: Run static analysis tools
|
| 257 |
+
static_results = await self.run_static_analysis(pr_data)
|
| 258 |
+
|
| 259 |
+
# Step 2: Build the prompt
|
| 260 |
+
prompt = self._build_prompt()
|
| 261 |
+
|
| 262 |
+
# Step 3: Create the structured output chain
|
| 263 |
+
structured_llm = self.llm.with_structured_output(AgentFindings)
|
| 264 |
+
chain = prompt | structured_llm
|
| 265 |
+
|
| 266 |
+
# Step 4: Call the LLM
|
| 267 |
+
result = await chain.ainvoke({
|
| 268 |
+
"diff": pr_data.diff[:15000], # Cap diff size for token limits
|
| 269 |
+
"file_contents": self._format_file_contents(pr_data.file_contents),
|
| 270 |
+
"static_analysis": static_results or "No static analysis results.",
|
| 271 |
+
"rag_context": rag_context or "",
|
| 272 |
+
})
|
| 273 |
+
|
| 274 |
+
# Step 5: Convert to Finding objects
|
| 275 |
+
findings = self._convert_to_findings(result)
|
| 276 |
+
|
| 277 |
+
elapsed_ms = int((time.time() - start_time) * 1000)
|
| 278 |
+
logger.info(
|
| 279 |
+
"Agent review completed",
|
| 280 |
+
agent=self.agent_name,
|
| 281 |
+
findings_count=len(findings),
|
| 282 |
+
elapsed_ms=elapsed_ms,
|
| 283 |
+
)
|
| 284 |
+
|
| 285 |
+
return findings
|
| 286 |
+
|
| 287 |
+
except Exception as e:
|
| 288 |
+
elapsed_ms = int((time.time() - start_time) * 1000)
|
| 289 |
+
logger.error(
|
| 290 |
+
"Agent review failed",
|
| 291 |
+
agent=self.agent_name,
|
| 292 |
+
error=str(e),
|
| 293 |
+
elapsed_ms=elapsed_ms,
|
| 294 |
+
)
|
| 295 |
+
return [] # Don't crash the pipeline — other agents can still work
|
app/agents/performance_agent.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Performance Agent
|
| 3 |
+
==================
|
| 4 |
+
|
| 5 |
+
Evaluates code for computational efficiency, memory usage, and scalability.
|
| 6 |
+
Uses radon for complexity metrics and the LLM for semantic analysis of
|
| 7 |
+
query patterns, I/O operations, and algorithmic efficiency.
|
| 8 |
+
|
| 9 |
+
Same architecture as SecurityAgent — inherits from BaseAgent, overrides
|
| 10 |
+
only agent_name, system_prompt, and run_static_analysis().
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
from pathlib import Path
|
| 16 |
+
|
| 17 |
+
import structlog
|
| 18 |
+
|
| 19 |
+
from app.agents.base_agent import BaseAgent
|
| 20 |
+
from app.github.client import PRData
|
| 21 |
+
from app.tools.radon_tool import run_radon
|
| 22 |
+
|
| 23 |
+
logger = structlog.get_logger()
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
class PerformanceAgent(BaseAgent):
|
| 27 |
+
|
| 28 |
+
@property
|
| 29 |
+
def agent_name(self) -> str:
|
| 30 |
+
return "performance"
|
| 31 |
+
|
| 32 |
+
@property
|
| 33 |
+
def system_prompt(self) -> str:
|
| 34 |
+
prompt_path = (
|
| 35 |
+
Path(__file__).resolve().parent.parent.parent
|
| 36 |
+
/ "prompts"
|
| 37 |
+
/ "performance_system.md"
|
| 38 |
+
)
|
| 39 |
+
return prompt_path.read_text(encoding="utf-8")
|
| 40 |
+
|
| 41 |
+
async def run_static_analysis(self, pr_data: PRData) -> str:
|
| 42 |
+
"""Run radon complexity analysis on changed Python files."""
|
| 43 |
+
radon_output = await run_radon(pr_data.file_contents)
|
| 44 |
+
return radon_output if radon_output else ""
|
app/agents/security_agent.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Security Agent
|
| 3 |
+
===============
|
| 4 |
+
|
| 5 |
+
The Security Agent acts as a senior application security engineer (AppSec).
|
| 6 |
+
It reviews every changed line through the lens of exploitability, data exposure,
|
| 7 |
+
and authentication integrity.
|
| 8 |
+
|
| 9 |
+
Architecture:
|
| 10 |
+
1. Run static analysis tools (Bandit + detect-secrets) on changed files
|
| 11 |
+
2. Combine static results with PR diff and full file contents
|
| 12 |
+
3. Send everything to Groq's Llama-3.1-70B with a security-focused system prompt
|
| 13 |
+
4. LLM produces structured JSON findings with CWE IDs and suggested fixes
|
| 14 |
+
|
| 15 |
+
Why both static tools AND an LLM?
|
| 16 |
+
|
| 17 |
+
Static tools (Bandit):
|
| 18 |
+
✅ Fast, deterministic, zero false negatives for known patterns
|
| 19 |
+
✅ Free — no API cost
|
| 20 |
+
❌ Can't understand context (doesn't know if input is already sanitized)
|
| 21 |
+
❌ Only catches patterns it has rules for
|
| 22 |
+
|
| 23 |
+
LLM (Llama-3.1-70B):
|
| 24 |
+
✅ Understands context, intent, data flow between functions
|
| 25 |
+
✅ Can catch novel vulnerability patterns
|
| 26 |
+
✅ Provides natural language explanations and fixes
|
| 27 |
+
❌ Can hallucinate findings (false positives)
|
| 28 |
+
❌ Costs API calls (though Groq's free tier is generous)
|
| 29 |
+
|
| 30 |
+
Together: static tools provide HIGH-CONFIDENCE anchors, the LLM provides DEPTH.
|
| 31 |
+
The Synthesizer (Week 7) will merge and deduplicate their outputs.
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
from __future__ import annotations
|
| 35 |
+
|
| 36 |
+
from pathlib import Path
|
| 37 |
+
|
| 38 |
+
import structlog
|
| 39 |
+
|
| 40 |
+
from app.agents.base_agent import BaseAgent
|
| 41 |
+
from app.github.client import PRData
|
| 42 |
+
from app.tools.bandit_tool import run_bandit
|
| 43 |
+
from app.tools.detect_secrets_tool import run_detect_secrets
|
| 44 |
+
|
| 45 |
+
logger = structlog.get_logger()
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class SecurityAgent(BaseAgent):
|
| 49 |
+
"""
|
| 50 |
+
Security-focused code review agent.
|
| 51 |
+
|
| 52 |
+
Inherits from BaseAgent which provides:
|
| 53 |
+
- Groq LLM client (ChatGroq with Llama-3.1-70B)
|
| 54 |
+
- Structured output parsing (with_structured_output)
|
| 55 |
+
- Error handling and timing
|
| 56 |
+
- The review() method that orchestrates the flow
|
| 57 |
+
|
| 58 |
+
This class only needs to provide:
|
| 59 |
+
- agent_name: "security"
|
| 60 |
+
- system_prompt: loaded from prompts/security_system.md
|
| 61 |
+
- run_static_analysis(): runs Bandit + detect-secrets
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
@property
|
| 65 |
+
def agent_name(self) -> str:
|
| 66 |
+
return "security"
|
| 67 |
+
|
| 68 |
+
@property
|
| 69 |
+
def system_prompt(self) -> str:
|
| 70 |
+
"""
|
| 71 |
+
Load the system prompt from the Markdown file.
|
| 72 |
+
|
| 73 |
+
We store prompts as separate files (not inline strings) because:
|
| 74 |
+
1. They're long (50+ lines) — inline strings clutter the code
|
| 75 |
+
2. They change frequently during prompt tuning (Week 9)
|
| 76 |
+
3. Non-engineers (product managers) can review/edit them
|
| 77 |
+
4. Git diff shows prompt changes clearly
|
| 78 |
+
"""
|
| 79 |
+
prompt_path = Path(__file__).resolve().parent.parent.parent / "prompts" / "security_system.md"
|
| 80 |
+
return prompt_path.read_text(encoding="utf-8")
|
| 81 |
+
|
| 82 |
+
async def run_static_analysis(self, pr_data: PRData) -> str:
|
| 83 |
+
"""
|
| 84 |
+
Run security-specific static analysis tools.
|
| 85 |
+
|
| 86 |
+
We run Bandit and detect-secrets in sequence (not parallel) because:
|
| 87 |
+
1. Each takes <5 seconds — parallelism gains are minimal
|
| 88 |
+
2. They both write to temp dirs — simpler to keep sequential
|
| 89 |
+
3. If one fails, the other still runs (independent try/except in each tool)
|
| 90 |
+
|
| 91 |
+
The results are concatenated into a single string that gets injected
|
| 92 |
+
into the LLM prompt. The LLM uses these as high-confidence signals
|
| 93 |
+
to anchor its own analysis.
|
| 94 |
+
"""
|
| 95 |
+
results = []
|
| 96 |
+
|
| 97 |
+
# Run Bandit (Python security linter)
|
| 98 |
+
bandit_output = await run_bandit(pr_data.file_contents)
|
| 99 |
+
if bandit_output:
|
| 100 |
+
results.append(bandit_output)
|
| 101 |
+
|
| 102 |
+
# Run detect-secrets (credential scanner)
|
| 103 |
+
secrets_output = await run_detect_secrets(pr_data.file_contents)
|
| 104 |
+
if secrets_output:
|
| 105 |
+
results.append(secrets_output)
|
| 106 |
+
|
| 107 |
+
return "\n\n".join(results) if results else ""
|
app/agents/style_agent.py
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Style & Maintainability Agent
|
| 3 |
+
===============================
|
| 4 |
+
|
| 5 |
+
Reviews code for readability, naming quality, documentation, test coverage,
|
| 6 |
+
and architectural consistency. Uses Ruff for mechanical lint checks and the
|
| 7 |
+
LLM for deeper maintainability analysis.
|
| 8 |
+
|
| 9 |
+
Same architecture as SecurityAgent and PerformanceAgent.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
import structlog
|
| 17 |
+
|
| 18 |
+
from app.agents.base_agent import BaseAgent
|
| 19 |
+
from app.github.client import PRData
|
| 20 |
+
from app.tools.linter_tool import run_ruff
|
| 21 |
+
|
| 22 |
+
logger = structlog.get_logger()
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
class StyleAgent(BaseAgent):
|
| 26 |
+
|
| 27 |
+
@property
|
| 28 |
+
def agent_name(self) -> str:
|
| 29 |
+
return "style"
|
| 30 |
+
|
| 31 |
+
@property
|
| 32 |
+
def system_prompt(self) -> str:
|
| 33 |
+
prompt_path = (
|
| 34 |
+
Path(__file__).resolve().parent.parent.parent
|
| 35 |
+
/ "prompts"
|
| 36 |
+
/ "style_system.md"
|
| 37 |
+
)
|
| 38 |
+
return prompt_path.read_text(encoding="utf-8")
|
| 39 |
+
|
| 40 |
+
async def run_static_analysis(self, pr_data: PRData) -> str:
|
| 41 |
+
"""Run Ruff linter on changed Python files."""
|
| 42 |
+
ruff_output = await run_ruff(pr_data.file_contents)
|
| 43 |
+
return ruff_output if ruff_output else ""
|
app/agents/synthesizer.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Synthesizer Agent
|
| 3 |
+
==================
|
| 4 |
+
|
| 5 |
+
The Synthesizer is the "senior engineering manager" of Ninja Code Guard.
|
| 6 |
+
It takes findings from all three domain agents (Security, Performance, Style)
|
| 7 |
+
and produces a unified, non-redundant review.
|
| 8 |
+
|
| 9 |
+
Responsibilities:
|
| 10 |
+
1. **Deduplicate** — If Security and Performance flag the same line for
|
| 11 |
+
different reasons, merge them into one finding with both perspectives.
|
| 12 |
+
2. **Resolve conflicts** — If agents disagree on severity, use a precedence
|
| 13 |
+
hierarchy: Security > Performance > Style.
|
| 14 |
+
3. **Re-rank** — Sort findings by composite score: severity × confidence.
|
| 15 |
+
4. **Compute Health Score** — 0-100 based on weighted finding density.
|
| 16 |
+
5. **Generate executive summary** — 3-5 sentences summarizing the review.
|
| 17 |
+
6. **Determine recommendation** — approve / request_changes / block.
|
| 18 |
+
|
| 19 |
+
Why a Synthesizer instead of just concatenating findings?
|
| 20 |
+
- Without dedup: the same SQL injection might be flagged by both Security
|
| 21 |
+
(as CWE-89) and Performance (as "unbounded query") — confusing for devs.
|
| 22 |
+
- Without conflict resolution: Security says "critical", Style says "medium"
|
| 23 |
+
for the same issue — which severity should the comment show?
|
| 24 |
+
- Without re-ranking: findings appear in arbitrary order — devs should see
|
| 25 |
+
the most important issues first.
|
| 26 |
+
"""
|
| 27 |
+
|
| 28 |
+
from __future__ import annotations
|
| 29 |
+
|
| 30 |
+
import time
|
| 31 |
+
from collections import defaultdict
|
| 32 |
+
|
| 33 |
+
import structlog
|
| 34 |
+
|
| 35 |
+
from app.models.findings import Finding, SynthesizedReview
|
| 36 |
+
from app.services.health_score import calculate_health_score, determine_recommendation
|
| 37 |
+
|
| 38 |
+
logger = structlog.get_logger()
|
| 39 |
+
|
| 40 |
+
# Agent precedence for severity conflicts (higher = takes priority)
|
| 41 |
+
AGENT_PRECEDENCE = {
|
| 42 |
+
"security": 3,
|
| 43 |
+
"performance": 2,
|
| 44 |
+
"style": 1,
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
SEVERITY_RANK = {
|
| 48 |
+
"critical": 4,
|
| 49 |
+
"high": 3,
|
| 50 |
+
"medium": 2,
|
| 51 |
+
"low": 1,
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def _finding_key(f: Finding) -> str:
|
| 56 |
+
"""
|
| 57 |
+
Generate a deduplication key for a finding.
|
| 58 |
+
|
| 59 |
+
Two findings are considered duplicates if they reference the same
|
| 60 |
+
file and overlapping line ranges. We use a simplified key based on
|
| 61 |
+
file_path and line_start — findings on the same line from different
|
| 62 |
+
agents are candidates for merging.
|
| 63 |
+
"""
|
| 64 |
+
return f"{f.file_path}:{f.line_start}:{f.category}"
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def deduplicate_findings(findings: list[Finding]) -> list[Finding]:
|
| 68 |
+
"""
|
| 69 |
+
Remove duplicate findings that reference the same code location.
|
| 70 |
+
|
| 71 |
+
When multiple agents flag the same file+line, we keep the finding from
|
| 72 |
+
the highest-precedence agent (Security > Performance > Style) and take
|
| 73 |
+
the maximum severity between them.
|
| 74 |
+
|
| 75 |
+
Example:
|
| 76 |
+
Security flags app.py:5 as "critical" (SQL injection)
|
| 77 |
+
Performance flags app.py:5 as "high" (unbounded query)
|
| 78 |
+
→ Keep Security's finding with "critical" severity
|
| 79 |
+
→ Append Performance's insight to the description
|
| 80 |
+
"""
|
| 81 |
+
# Group findings by location
|
| 82 |
+
groups: dict[str, list[Finding]] = defaultdict(list)
|
| 83 |
+
for finding in findings:
|
| 84 |
+
key = _finding_key(finding)
|
| 85 |
+
groups[key].append(finding)
|
| 86 |
+
|
| 87 |
+
deduped = []
|
| 88 |
+
duplicates_removed = 0
|
| 89 |
+
|
| 90 |
+
for key, group in groups.items():
|
| 91 |
+
if len(group) == 1:
|
| 92 |
+
deduped.append(group[0])
|
| 93 |
+
continue
|
| 94 |
+
|
| 95 |
+
# Sort by agent precedence (highest first)
|
| 96 |
+
group.sort(
|
| 97 |
+
key=lambda f: AGENT_PRECEDENCE.get(f.agent, 0), reverse=True
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
# Take the primary finding (highest precedence agent)
|
| 101 |
+
primary = group[0]
|
| 102 |
+
|
| 103 |
+
# Take the maximum severity across all agents
|
| 104 |
+
max_severity = max(group, key=lambda f: SEVERITY_RANK.get(f.severity, 0))
|
| 105 |
+
|
| 106 |
+
# Merge: keep primary's structure, upgrade severity if needed
|
| 107 |
+
merged_description = primary.description
|
| 108 |
+
if len(group) > 1:
|
| 109 |
+
other_agents = [f.agent for f in group[1:]]
|
| 110 |
+
merged_description += (
|
| 111 |
+
f"\n\n*Also flagged by: {', '.join(other_agents)} agent(s).*"
|
| 112 |
+
)
|
| 113 |
+
|
| 114 |
+
merged = Finding(
|
| 115 |
+
agent=primary.agent,
|
| 116 |
+
file_path=primary.file_path,
|
| 117 |
+
line_start=primary.line_start,
|
| 118 |
+
line_end=primary.line_end,
|
| 119 |
+
severity=max_severity.severity,
|
| 120 |
+
category=primary.category,
|
| 121 |
+
title=primary.title,
|
| 122 |
+
description=merged_description,
|
| 123 |
+
suggested_fix=primary.suggested_fix,
|
| 124 |
+
cwe_id=primary.cwe_id,
|
| 125 |
+
confidence=max(f.confidence for f in group),
|
| 126 |
+
)
|
| 127 |
+
deduped.append(merged)
|
| 128 |
+
duplicates_removed += len(group) - 1
|
| 129 |
+
|
| 130 |
+
if duplicates_removed > 0:
|
| 131 |
+
logger.info(
|
| 132 |
+
"Deduplicated findings",
|
| 133 |
+
removed=duplicates_removed,
|
| 134 |
+
before=len(findings),
|
| 135 |
+
after=len(deduped),
|
| 136 |
+
)
|
| 137 |
+
|
| 138 |
+
return deduped
|
| 139 |
+
|
| 140 |
+
|
| 141 |
+
def rank_findings(findings: list[Finding]) -> list[Finding]:
|
| 142 |
+
"""
|
| 143 |
+
Sort findings by importance: severity (desc) then confidence (desc).
|
| 144 |
+
|
| 145 |
+
Developers should see the most critical, highest-confidence issues first.
|
| 146 |
+
This matches how a senior engineer would present a review — lead with
|
| 147 |
+
the blocking issues, then the nice-to-haves.
|
| 148 |
+
"""
|
| 149 |
+
return sorted(
|
| 150 |
+
findings,
|
| 151 |
+
key=lambda f: (SEVERITY_RANK.get(f.severity, 0), f.confidence),
|
| 152 |
+
reverse=True,
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def generate_executive_summary(
|
| 157 |
+
findings: list[Finding],
|
| 158 |
+
health_score: int,
|
| 159 |
+
recommendation: str,
|
| 160 |
+
) -> str:
|
| 161 |
+
"""
|
| 162 |
+
Generate a 3-5 sentence executive summary of the review.
|
| 163 |
+
|
| 164 |
+
This appears at the top of the PR comment, giving the author a quick
|
| 165 |
+
overview without needing to read every finding.
|
| 166 |
+
"""
|
| 167 |
+
if not findings:
|
| 168 |
+
return (
|
| 169 |
+
"No issues were found in this pull request. "
|
| 170 |
+
"The code changes look clean across security, performance, and style dimensions. "
|
| 171 |
+
"Safe to merge."
|
| 172 |
+
)
|
| 173 |
+
|
| 174 |
+
# Count by agent
|
| 175 |
+
agent_counts = defaultdict(int)
|
| 176 |
+
for f in findings:
|
| 177 |
+
agent_counts[f.agent] += 1
|
| 178 |
+
|
| 179 |
+
# Count by severity
|
| 180 |
+
sev_counts = defaultdict(int)
|
| 181 |
+
for f in findings:
|
| 182 |
+
sev_counts[f.severity] += 1
|
| 183 |
+
|
| 184 |
+
parts = []
|
| 185 |
+
|
| 186 |
+
# Opening line
|
| 187 |
+
total = len(findings)
|
| 188 |
+
parts.append(
|
| 189 |
+
f"Multi-agent review analyzed this PR across security, performance, and style dimensions, "
|
| 190 |
+
f"finding {total} issue{'s' if total != 1 else ''}."
|
| 191 |
+
)
|
| 192 |
+
|
| 193 |
+
# Severity breakdown
|
| 194 |
+
sev_parts = []
|
| 195 |
+
for sev in ["critical", "high", "medium", "low"]:
|
| 196 |
+
count = sev_counts.get(sev, 0)
|
| 197 |
+
if count > 0:
|
| 198 |
+
sev_parts.append(f"{count} {sev}")
|
| 199 |
+
if sev_parts:
|
| 200 |
+
parts.append(f"Breakdown: {', '.join(sev_parts)}.")
|
| 201 |
+
|
| 202 |
+
# Agent breakdown
|
| 203 |
+
agent_parts = []
|
| 204 |
+
for agent in ["security", "performance", "style"]:
|
| 205 |
+
count = agent_counts.get(agent, 0)
|
| 206 |
+
if count > 0:
|
| 207 |
+
agent_parts.append(f"{agent.capitalize()}: {count}")
|
| 208 |
+
if agent_parts:
|
| 209 |
+
parts.append(f"By domain: {', '.join(agent_parts)}.")
|
| 210 |
+
|
| 211 |
+
# Top issue highlight
|
| 212 |
+
if sev_counts.get("critical", 0) > 0:
|
| 213 |
+
critical_finding = next(f for f in findings if f.severity == "critical")
|
| 214 |
+
parts.append(
|
| 215 |
+
f"Most urgent: {critical_finding.title} in `{critical_finding.file_path}`."
|
| 216 |
+
)
|
| 217 |
+
elif sev_counts.get("high", 0) > 0:
|
| 218 |
+
high_finding = next(f for f in findings if f.severity == "high")
|
| 219 |
+
parts.append(
|
| 220 |
+
f"Top priority: {high_finding.title} in `{high_finding.file_path}`."
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
return " ".join(parts)
|
| 224 |
+
|
| 225 |
+
|
| 226 |
+
def synthesize(
|
| 227 |
+
security_findings: list[Finding],
|
| 228 |
+
performance_findings: list[Finding],
|
| 229 |
+
style_findings: list[Finding],
|
| 230 |
+
) -> SynthesizedReview:
|
| 231 |
+
"""
|
| 232 |
+
Main entry point: synthesize findings from all agents into a unified review.
|
| 233 |
+
|
| 234 |
+
Pipeline:
|
| 235 |
+
1. Combine all findings
|
| 236 |
+
2. Deduplicate (merge overlapping findings)
|
| 237 |
+
3. Rank by severity and confidence
|
| 238 |
+
4. Calculate Health Score
|
| 239 |
+
5. Determine recommendation
|
| 240 |
+
6. Generate executive summary
|
| 241 |
+
|
| 242 |
+
Returns a SynthesizedReview ready for posting to GitHub.
|
| 243 |
+
"""
|
| 244 |
+
start = time.time()
|
| 245 |
+
|
| 246 |
+
# Step 1: Combine
|
| 247 |
+
all_findings = security_findings + performance_findings + style_findings
|
| 248 |
+
|
| 249 |
+
# Step 2: Deduplicate
|
| 250 |
+
deduped = deduplicate_findings(all_findings)
|
| 251 |
+
|
| 252 |
+
# Step 3: Rank
|
| 253 |
+
ranked = rank_findings(deduped)
|
| 254 |
+
|
| 255 |
+
# Step 4: Health Score
|
| 256 |
+
health_score = calculate_health_score(ranked)
|
| 257 |
+
|
| 258 |
+
# Step 5: Recommendation
|
| 259 |
+
recommendation = determine_recommendation(ranked, health_score)
|
| 260 |
+
|
| 261 |
+
# Step 6: Executive summary
|
| 262 |
+
summary = generate_executive_summary(ranked, health_score, recommendation)
|
| 263 |
+
|
| 264 |
+
# Count by severity
|
| 265 |
+
critical = sum(1 for f in ranked if f.severity == "critical")
|
| 266 |
+
high = sum(1 for f in ranked if f.severity == "high")
|
| 267 |
+
medium = sum(1 for f in ranked if f.severity == "medium")
|
| 268 |
+
low = sum(1 for f in ranked if f.severity == "low")
|
| 269 |
+
|
| 270 |
+
elapsed_ms = int((time.time() - start) * 1000)
|
| 271 |
+
|
| 272 |
+
logger.info(
|
| 273 |
+
"Synthesis complete",
|
| 274 |
+
input_findings=len(all_findings),
|
| 275 |
+
after_dedup=len(ranked),
|
| 276 |
+
health_score=health_score,
|
| 277 |
+
recommendation=recommendation,
|
| 278 |
+
elapsed_ms=elapsed_ms,
|
| 279 |
+
)
|
| 280 |
+
|
| 281 |
+
return SynthesizedReview(
|
| 282 |
+
health_score=health_score,
|
| 283 |
+
executive_summary=summary,
|
| 284 |
+
recommendation=recommendation,
|
| 285 |
+
findings=ranked,
|
| 286 |
+
critical_count=critical,
|
| 287 |
+
high_count=high,
|
| 288 |
+
medium_count=medium,
|
| 289 |
+
low_count=low,
|
| 290 |
+
duration_ms=elapsed_ms,
|
| 291 |
+
)
|
app/config.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Application configuration via environment variables."""
|
| 2 |
+
|
| 3 |
+
from pydantic_settings import BaseSettings
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class Settings(BaseSettings):
|
| 7 |
+
"""All configuration loaded from environment variables."""
|
| 8 |
+
|
| 9 |
+
# LLM APIs
|
| 10 |
+
groq_api_key: str = ""
|
| 11 |
+
gemini_api_key: str = ""
|
| 12 |
+
|
| 13 |
+
# GitHub App
|
| 14 |
+
github_app_id: str = ""
|
| 15 |
+
github_app_private_key_path: str = "./keys/app.pem"
|
| 16 |
+
github_webhook_secret: str = ""
|
| 17 |
+
|
| 18 |
+
# Database
|
| 19 |
+
database_url: str = ""
|
| 20 |
+
|
| 21 |
+
# Redis Cache
|
| 22 |
+
upstash_redis_url: str = ""
|
| 23 |
+
|
| 24 |
+
# Embedding
|
| 25 |
+
embedding_model: str = "all-MiniLM-L6-v2"
|
| 26 |
+
|
| 27 |
+
# App Config
|
| 28 |
+
environment: str = "development"
|
| 29 |
+
log_level: str = "INFO"
|
| 30 |
+
confidence_threshold: float = 0.6
|
| 31 |
+
max_repo_files_index: int = 500
|
| 32 |
+
|
| 33 |
+
# Security
|
| 34 |
+
dashboard_api_key: str = "" # Set in production to protect dashboard API
|
| 35 |
+
cors_allowed_origins: str = "" # Comma-separated origins, e.g. "https://myapp.vercel.app"
|
| 36 |
+
|
| 37 |
+
model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
settings = Settings()
|
app/context/__init__.py
ADDED
|
File without changes
|
app/context/embedder.py
ADDED
|
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Code Embedding Pipeline
|
| 3 |
+
========================
|
| 4 |
+
|
| 5 |
+
Converts source code into vector embeddings using sentence-transformers.
|
| 6 |
+
These embeddings are stored in ChromaDB for semantic search.
|
| 7 |
+
|
| 8 |
+
How it works:
|
| 9 |
+
1. Source code is split into chunks (functions, classes, or fixed-size blocks)
|
| 10 |
+
2. Each chunk is embedded into a 384-dimensional vector
|
| 11 |
+
3. Vectors capture semantic meaning — similar code has similar vectors
|
| 12 |
+
4. When reviewing a PR, we query ChromaDB with the diff to find related code
|
| 13 |
+
|
| 14 |
+
Why embeddings for code?
|
| 15 |
+
Consider this diff:
|
| 16 |
+
+ user_id = request.args.get("id")
|
| 17 |
+
+ data = db.query(f"SELECT * FROM users WHERE id = {user_id}")
|
| 18 |
+
|
| 19 |
+
To evaluate this, the agent needs to know:
|
| 20 |
+
- Does `db.query()` parameterize inputs? → Need the DB wrapper's source code
|
| 21 |
+
- Is there middleware that validates `user_id`? → Need the middleware source
|
| 22 |
+
- Are there other similar patterns in the codebase? → Need semantic search
|
| 23 |
+
|
| 24 |
+
Embeddings let us find this related code WITHOUT knowing the exact file paths.
|
| 25 |
+
The query "SQL query with user input" returns relevant code chunks ranked by
|
| 26 |
+
semantic similarity — not keyword matching, but meaning matching.
|
| 27 |
+
|
| 28 |
+
Model: all-MiniLM-L6-v2
|
| 29 |
+
- 384 dimensions, 22M parameters
|
| 30 |
+
- Runs locally on CPU in ~10ms per chunk (GPU: ~1ms)
|
| 31 |
+
- Optimized for semantic similarity tasks
|
| 32 |
+
- Good enough for code — not perfect, but fast and free
|
| 33 |
+
"""
|
| 34 |
+
|
| 35 |
+
from __future__ import annotations
|
| 36 |
+
|
| 37 |
+
import structlog
|
| 38 |
+
|
| 39 |
+
from app.config import settings
|
| 40 |
+
|
| 41 |
+
logger = structlog.get_logger()
|
| 42 |
+
|
| 43 |
+
# Lazy-loaded model to avoid slow import at startup
|
| 44 |
+
_model = None
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def get_embedding_model():
|
| 48 |
+
"""
|
| 49 |
+
Lazy-load the sentence-transformers model.
|
| 50 |
+
|
| 51 |
+
We load on first use (not at import time) because:
|
| 52 |
+
1. The model takes ~2 seconds to load
|
| 53 |
+
2. Not every request needs embeddings (cached reviews skip this)
|
| 54 |
+
3. Tests shouldn't load a real ML model
|
| 55 |
+
"""
|
| 56 |
+
global _model
|
| 57 |
+
if _model is None:
|
| 58 |
+
try:
|
| 59 |
+
from sentence_transformers import SentenceTransformer
|
| 60 |
+
_model = SentenceTransformer(settings.embedding_model)
|
| 61 |
+
logger.info("Loaded embedding model", model=settings.embedding_model)
|
| 62 |
+
except ImportError:
|
| 63 |
+
logger.warning("sentence-transformers not installed — RAG context disabled")
|
| 64 |
+
return None
|
| 65 |
+
return _model
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def embed_texts(texts: list[str]) -> list[list[float]]:
|
| 69 |
+
"""
|
| 70 |
+
Embed a list of text strings into vectors.
|
| 71 |
+
|
| 72 |
+
Args:
|
| 73 |
+
texts: List of code chunks or queries to embed
|
| 74 |
+
|
| 75 |
+
Returns:
|
| 76 |
+
List of embedding vectors (each is a list of floats)
|
| 77 |
+
"""
|
| 78 |
+
model = get_embedding_model()
|
| 79 |
+
if model is None:
|
| 80 |
+
return []
|
| 81 |
+
|
| 82 |
+
embeddings = model.encode(texts, show_progress_bar=False)
|
| 83 |
+
return embeddings.tolist()
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def chunk_code(content: str, filepath: str, chunk_size: int = 60) -> list[dict]:
|
| 87 |
+
"""
|
| 88 |
+
Split source code into overlapping chunks for embedding.
|
| 89 |
+
|
| 90 |
+
Strategy: We chunk by lines with overlap. Each chunk is ~60 lines
|
| 91 |
+
with 10 lines of overlap to preserve context across boundaries.
|
| 92 |
+
|
| 93 |
+
Why 60 lines? It's roughly one function/class — the natural unit of
|
| 94 |
+
code that a developer would reason about. Too small (10 lines) loses
|
| 95 |
+
context. Too large (200 lines) dilutes the embedding signal.
|
| 96 |
+
|
| 97 |
+
Args:
|
| 98 |
+
content: Full file source code
|
| 99 |
+
filepath: The file path (included as metadata)
|
| 100 |
+
chunk_size: Lines per chunk (default: 60)
|
| 101 |
+
|
| 102 |
+
Returns:
|
| 103 |
+
List of dicts with 'text', 'filepath', 'start_line', 'end_line'
|
| 104 |
+
"""
|
| 105 |
+
lines = content.split("\n")
|
| 106 |
+
chunks = []
|
| 107 |
+
overlap = 10
|
| 108 |
+
start = 0
|
| 109 |
+
|
| 110 |
+
while start < len(lines):
|
| 111 |
+
end = min(start + chunk_size, len(lines))
|
| 112 |
+
chunk_text = "\n".join(lines[start:end])
|
| 113 |
+
|
| 114 |
+
# Skip very small chunks (less than 5 non-empty lines)
|
| 115 |
+
non_empty = sum(1 for line in lines[start:end] if line.strip())
|
| 116 |
+
if non_empty >= 5:
|
| 117 |
+
chunks.append({
|
| 118 |
+
"text": f"# File: {filepath}\n{chunk_text}",
|
| 119 |
+
"filepath": filepath,
|
| 120 |
+
"start_line": start + 1,
|
| 121 |
+
"end_line": end,
|
| 122 |
+
})
|
| 123 |
+
|
| 124 |
+
start += max(chunk_size - overlap, 1) # Overlap for context continuity
|
| 125 |
+
|
| 126 |
+
return chunks
|
app/context/indexer.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ChromaDB Repo Indexer
|
| 3 |
+
======================
|
| 4 |
+
|
| 5 |
+
Indexes repository source code into ChromaDB for semantic search.
|
| 6 |
+
Each repo gets its own ChromaDB collection, keyed by the repo's full name.
|
| 7 |
+
|
| 8 |
+
How indexing works:
|
| 9 |
+
1. Receive file contents from GitHub API
|
| 10 |
+
2. Chunk each file into ~60-line blocks
|
| 11 |
+
3. Embed each chunk using sentence-transformers
|
| 12 |
+
4. Upsert into ChromaDB collection for this repo
|
| 13 |
+
|
| 14 |
+
ChromaDB is an open-source vector database that:
|
| 15 |
+
- Runs embedded in the Python process (no separate server needed)
|
| 16 |
+
- Stores vectors + metadata + documents together
|
| 17 |
+
- Supports fast approximate nearest neighbor (ANN) search
|
| 18 |
+
- Can persist to disk or run entirely in-memory
|
| 19 |
+
|
| 20 |
+
We use in-memory mode on Render (ephemeral storage) — the index is rebuilt
|
| 21 |
+
on each PR review. This is acceptable because indexing the changed files
|
| 22 |
+
takes <1 second for typical PRs.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
from __future__ import annotations
|
| 26 |
+
|
| 27 |
+
import chromadb
|
| 28 |
+
import structlog
|
| 29 |
+
|
| 30 |
+
from app.config import settings
|
| 31 |
+
from app.context.embedder import chunk_code, embed_texts
|
| 32 |
+
|
| 33 |
+
logger = structlog.get_logger()
|
| 34 |
+
|
| 35 |
+
# Singleton ChromaDB client (in-memory)
|
| 36 |
+
_chroma_client: chromadb.ClientAPI | None = None
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def _get_chroma_client() -> chromadb.ClientAPI:
|
| 40 |
+
"""Get or create the ChromaDB client."""
|
| 41 |
+
global _chroma_client
|
| 42 |
+
if _chroma_client is None:
|
| 43 |
+
_chroma_client = chromadb.Client() # In-memory, no persistence
|
| 44 |
+
return _chroma_client
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _collection_name(repo_full_name: str) -> str:
|
| 48 |
+
"""Generate a valid ChromaDB collection name from a repo name."""
|
| 49 |
+
# ChromaDB requires alphanumeric + underscores, 3-63 chars
|
| 50 |
+
name = repo_full_name.replace("/", "_").replace("-", "_")
|
| 51 |
+
return f"repo_{name}"[:63]
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
async def index_repo_files(
|
| 55 |
+
repo_full_name: str, file_contents: dict[str, str]
|
| 56 |
+
) -> str:
|
| 57 |
+
"""
|
| 58 |
+
Index repository files into ChromaDB for RAG retrieval.
|
| 59 |
+
|
| 60 |
+
This is called during each PR review to ensure the vector store
|
| 61 |
+
has the latest file contents. We upsert (insert or update) so
|
| 62 |
+
re-indexing the same file just overwrites the old vectors.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
repo_full_name: "owner/repo" — used as collection name
|
| 66 |
+
file_contents: dict of {filepath: source_code}
|
| 67 |
+
|
| 68 |
+
Returns:
|
| 69 |
+
Collection name (for retrieval)
|
| 70 |
+
"""
|
| 71 |
+
client = _get_chroma_client()
|
| 72 |
+
collection_name = _collection_name(repo_full_name)
|
| 73 |
+
|
| 74 |
+
# Get or create a collection for this repo
|
| 75 |
+
collection = client.get_or_create_collection(
|
| 76 |
+
name=collection_name,
|
| 77 |
+
metadata={"repo": repo_full_name},
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
# Chunk all files
|
| 81 |
+
all_chunks = []
|
| 82 |
+
for filepath, content in file_contents.items():
|
| 83 |
+
# Skip very large files (binary, generated code, etc.)
|
| 84 |
+
if len(content) > 100_000:
|
| 85 |
+
continue
|
| 86 |
+
chunks = chunk_code(content, filepath)
|
| 87 |
+
all_chunks.extend(chunks)
|
| 88 |
+
|
| 89 |
+
if not all_chunks:
|
| 90 |
+
logger.info("No chunks to index", repo=repo_full_name)
|
| 91 |
+
return collection_name
|
| 92 |
+
|
| 93 |
+
# Limit total chunks (Render memory constraint)
|
| 94 |
+
max_chunks = settings.max_repo_files_index
|
| 95 |
+
if len(all_chunks) > max_chunks:
|
| 96 |
+
all_chunks = all_chunks[:max_chunks]
|
| 97 |
+
|
| 98 |
+
# Embed all chunks
|
| 99 |
+
texts = [chunk["text"] for chunk in all_chunks]
|
| 100 |
+
embeddings = embed_texts(texts)
|
| 101 |
+
|
| 102 |
+
if not embeddings:
|
| 103 |
+
logger.warning("Embedding failed — RAG context unavailable")
|
| 104 |
+
return collection_name
|
| 105 |
+
|
| 106 |
+
# Upsert into ChromaDB
|
| 107 |
+
ids = [f"{chunk['filepath']}:{chunk['start_line']}" for chunk in all_chunks]
|
| 108 |
+
metadatas = [
|
| 109 |
+
{"filepath": chunk["filepath"], "start_line": chunk["start_line"], "end_line": chunk["end_line"]}
|
| 110 |
+
for chunk in all_chunks
|
| 111 |
+
]
|
| 112 |
+
|
| 113 |
+
collection.upsert(
|
| 114 |
+
ids=ids,
|
| 115 |
+
embeddings=embeddings,
|
| 116 |
+
documents=texts,
|
| 117 |
+
metadatas=metadatas,
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
logger.info(
|
| 121 |
+
"Indexed repo files",
|
| 122 |
+
repo=repo_full_name,
|
| 123 |
+
chunks=len(all_chunks),
|
| 124 |
+
collection=collection_name,
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
return collection_name
|
app/context/retriever.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
RAG Context Retriever
|
| 3 |
+
======================
|
| 4 |
+
|
| 5 |
+
Retrieves relevant code context from ChromaDB based on the PR diff.
|
| 6 |
+
This is the "R" in RAG (Retrieval-Augmented Generation).
|
| 7 |
+
|
| 8 |
+
How retrieval works:
|
| 9 |
+
1. Take the PR diff text as a query
|
| 10 |
+
2. Embed the query using the same model used for indexing
|
| 11 |
+
3. Search ChromaDB for the most similar code chunks
|
| 12 |
+
4. Return the top-k chunks as additional context for the LLM
|
| 13 |
+
|
| 14 |
+
Why RAG for code review?
|
| 15 |
+
The PR diff only shows CHANGED lines. But understanding a change often
|
| 16 |
+
requires seeing RELATED code:
|
| 17 |
+
- If a function is called from 5 places, changing it affects all callers
|
| 18 |
+
- If a variable is validated in another file, the validation matters here
|
| 19 |
+
- If the same pattern exists elsewhere, inconsistency is a style issue
|
| 20 |
+
|
| 21 |
+
RAG gives the agents "peripheral vision" — they see not just the change,
|
| 22 |
+
but the surrounding codebase context that makes the change meaningful.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
from __future__ import annotations
|
| 26 |
+
|
| 27 |
+
import structlog
|
| 28 |
+
|
| 29 |
+
from app.context.embedder import embed_texts
|
| 30 |
+
from app.context.indexer import _get_chroma_client
|
| 31 |
+
|
| 32 |
+
logger = structlog.get_logger()
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
async def retrieve_context(
|
| 36 |
+
collection_name: str,
|
| 37 |
+
query_text: str,
|
| 38 |
+
top_k: int = 5,
|
| 39 |
+
) -> str:
|
| 40 |
+
"""
|
| 41 |
+
Retrieve relevant code context from ChromaDB.
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
collection_name: The ChromaDB collection to search
|
| 45 |
+
query_text: The PR diff or a specific query
|
| 46 |
+
top_k: Number of results to return (default: 5)
|
| 47 |
+
|
| 48 |
+
Returns:
|
| 49 |
+
A formatted string of relevant code chunks to include in the LLM prompt.
|
| 50 |
+
Returns empty string if retrieval fails or no results found.
|
| 51 |
+
"""
|
| 52 |
+
try:
|
| 53 |
+
client = _get_chroma_client()
|
| 54 |
+
|
| 55 |
+
# Check if collection exists
|
| 56 |
+
try:
|
| 57 |
+
collection = client.get_collection(name=collection_name)
|
| 58 |
+
except Exception:
|
| 59 |
+
logger.debug("Collection not found — no RAG context", collection=collection_name)
|
| 60 |
+
return ""
|
| 61 |
+
|
| 62 |
+
# Skip if collection is empty
|
| 63 |
+
if collection.count() == 0:
|
| 64 |
+
return ""
|
| 65 |
+
|
| 66 |
+
# Embed the query
|
| 67 |
+
query_embeddings = embed_texts([query_text[:5000]]) # Cap query size
|
| 68 |
+
if not query_embeddings:
|
| 69 |
+
return ""
|
| 70 |
+
|
| 71 |
+
# Search for similar code chunks
|
| 72 |
+
results = collection.query(
|
| 73 |
+
query_embeddings=query_embeddings,
|
| 74 |
+
n_results=min(top_k, collection.count()),
|
| 75 |
+
include=["documents", "metadatas", "distances"],
|
| 76 |
+
)
|
| 77 |
+
|
| 78 |
+
if not results or not results["documents"] or not results["documents"][0]:
|
| 79 |
+
return ""
|
| 80 |
+
|
| 81 |
+
# Format results as context for the LLM
|
| 82 |
+
context_parts = ["## Related Code Context (from repository)\n"]
|
| 83 |
+
|
| 84 |
+
for doc, metadata, distance in zip(
|
| 85 |
+
results["documents"][0],
|
| 86 |
+
results["metadatas"][0],
|
| 87 |
+
results["distances"][0],
|
| 88 |
+
):
|
| 89 |
+
filepath = metadata.get("filepath", "unknown")
|
| 90 |
+
start = metadata.get("start_line", "?")
|
| 91 |
+
end = metadata.get("end_line", "?")
|
| 92 |
+
# ChromaDB returns L2 distance — lower = more similar
|
| 93 |
+
similarity = max(0, 1 - distance / 2) # Rough conversion to 0-1
|
| 94 |
+
|
| 95 |
+
if similarity < 0.3:
|
| 96 |
+
continue # Skip low-relevance results
|
| 97 |
+
|
| 98 |
+
context_parts.append(
|
| 99 |
+
f"### {filepath} (lines {start}-{end}, relevance: {similarity:.0%})\n"
|
| 100 |
+
f"```\n{doc}\n```\n"
|
| 101 |
+
)
|
| 102 |
+
|
| 103 |
+
if len(context_parts) == 1: # Only the header, no results
|
| 104 |
+
return ""
|
| 105 |
+
|
| 106 |
+
context = "\n".join(context_parts)
|
| 107 |
+
logger.info(
|
| 108 |
+
"Retrieved RAG context",
|
| 109 |
+
collection=collection_name,
|
| 110 |
+
chunks_returned=len(context_parts) - 1,
|
| 111 |
+
)
|
| 112 |
+
return context
|
| 113 |
+
|
| 114 |
+
except Exception as e:
|
| 115 |
+
logger.warning("RAG retrieval failed", error=str(e))
|
| 116 |
+
return ""
|
app/db/__init__.py
ADDED
|
File without changes
|
app/db/postgres.py
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Neon Postgres Database Client
|
| 3 |
+
===============================
|
| 4 |
+
|
| 5 |
+
Stores PR review history for the dashboard: health scores, finding counts,
|
| 6 |
+
executive summaries, and full findings JSON.
|
| 7 |
+
|
| 8 |
+
Uses psycopg2 for synchronous queries (sufficient for dashboard reads)
|
| 9 |
+
and asyncpg for async writes from the webhook pipeline.
|
| 10 |
+
|
| 11 |
+
Schema is auto-created on first connection via ensure_tables().
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from __future__ import annotations
|
| 15 |
+
|
| 16 |
+
import json
|
| 17 |
+
from datetime import datetime, timezone
|
| 18 |
+
from uuid import uuid4
|
| 19 |
+
|
| 20 |
+
import structlog
|
| 21 |
+
|
| 22 |
+
from app.config import settings
|
| 23 |
+
from app.models.findings import SynthesizedReview
|
| 24 |
+
|
| 25 |
+
logger = structlog.get_logger()
|
| 26 |
+
|
| 27 |
+
# ── Connection pool (reuse connections instead of connect-per-query) ──────
|
| 28 |
+
_pool = None
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
async def _get_pool():
|
| 32 |
+
global _pool
|
| 33 |
+
if _pool is None:
|
| 34 |
+
import asyncpg
|
| 35 |
+
_pool = await asyncpg.create_pool(
|
| 36 |
+
settings.database_url,
|
| 37 |
+
min_size=1,
|
| 38 |
+
max_size=5,
|
| 39 |
+
command_timeout=10,
|
| 40 |
+
)
|
| 41 |
+
return _pool
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
CREATE_TABLE_SQL = """
|
| 45 |
+
CREATE TABLE IF NOT EXISTS pr_reviews (
|
| 46 |
+
id TEXT PRIMARY KEY,
|
| 47 |
+
repo_full_name TEXT NOT NULL,
|
| 48 |
+
pr_number INT NOT NULL,
|
| 49 |
+
commit_sha TEXT NOT NULL,
|
| 50 |
+
health_score INT NOT NULL,
|
| 51 |
+
critical_count INT DEFAULT 0,
|
| 52 |
+
high_count INT DEFAULT 0,
|
| 53 |
+
medium_count INT DEFAULT 0,
|
| 54 |
+
low_count INT DEFAULT 0,
|
| 55 |
+
summary TEXT,
|
| 56 |
+
findings JSONB NOT NULL DEFAULT '[]',
|
| 57 |
+
duration_ms INT DEFAULT 0,
|
| 58 |
+
created_at TIMESTAMPTZ DEFAULT NOW()
|
| 59 |
+
);
|
| 60 |
+
|
| 61 |
+
CREATE INDEX IF NOT EXISTS idx_pr_reviews_repo ON pr_reviews(repo_full_name);
|
| 62 |
+
CREATE INDEX IF NOT EXISTS idx_pr_reviews_sha ON pr_reviews(commit_sha);
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
async def ensure_tables():
|
| 67 |
+
"""Create the pr_reviews table if it doesn't exist."""
|
| 68 |
+
if not settings.database_url:
|
| 69 |
+
logger.warning("DATABASE_URL not set — skipping table creation")
|
| 70 |
+
return
|
| 71 |
+
|
| 72 |
+
try:
|
| 73 |
+
pool = await _get_pool()
|
| 74 |
+
async with pool.acquire() as conn:
|
| 75 |
+
await conn.execute(CREATE_TABLE_SQL)
|
| 76 |
+
logger.info("Database tables ensured")
|
| 77 |
+
except Exception as e:
|
| 78 |
+
logger.warning("Database setup failed", error=str(e))
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
async def save_review(
|
| 82 |
+
repo_full_name: str,
|
| 83 |
+
pr_number: int,
|
| 84 |
+
commit_sha: str,
|
| 85 |
+
review: SynthesizedReview,
|
| 86 |
+
) -> None:
|
| 87 |
+
"""Save a PR review to the database."""
|
| 88 |
+
if not settings.database_url:
|
| 89 |
+
return
|
| 90 |
+
|
| 91 |
+
try:
|
| 92 |
+
pool = await _get_pool()
|
| 93 |
+
async with pool.acquire() as conn:
|
| 94 |
+
await conn.execute(
|
| 95 |
+
"""
|
| 96 |
+
INSERT INTO pr_reviews (id, repo_full_name, pr_number, commit_sha,
|
| 97 |
+
health_score, critical_count, high_count, medium_count, low_count,
|
| 98 |
+
summary, findings, duration_ms)
|
| 99 |
+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
|
| 100 |
+
""",
|
| 101 |
+
str(uuid4()),
|
| 102 |
+
repo_full_name,
|
| 103 |
+
pr_number,
|
| 104 |
+
commit_sha,
|
| 105 |
+
review.health_score,
|
| 106 |
+
review.critical_count,
|
| 107 |
+
review.high_count,
|
| 108 |
+
review.medium_count,
|
| 109 |
+
review.low_count,
|
| 110 |
+
review.executive_summary,
|
| 111 |
+
json.dumps([f.model_dump() for f in review.findings]),
|
| 112 |
+
review.duration_ms,
|
| 113 |
+
)
|
| 114 |
+
logger.info("Saved review to database", repo=repo_full_name, pr=pr_number)
|
| 115 |
+
except Exception as e:
|
| 116 |
+
logger.warning("Database save failed", error=str(e))
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
async def get_repo_reviews(repo_full_name: str, limit: int = 20) -> list[dict]:
|
| 120 |
+
limit = min(limit, 100) # Cap to prevent excessive queries
|
| 121 |
+
"""Get recent reviews for a repo."""
|
| 122 |
+
if not settings.database_url:
|
| 123 |
+
return []
|
| 124 |
+
|
| 125 |
+
try:
|
| 126 |
+
pool = await _get_pool()
|
| 127 |
+
async with pool.acquire() as conn:
|
| 128 |
+
rows = await conn.fetch(
|
| 129 |
+
"""
|
| 130 |
+
SELECT id, pr_number, commit_sha, health_score,
|
| 131 |
+
critical_count, high_count, medium_count, low_count,
|
| 132 |
+
summary, duration_ms, created_at
|
| 133 |
+
FROM pr_reviews
|
| 134 |
+
WHERE repo_full_name = $1
|
| 135 |
+
ORDER BY created_at DESC
|
| 136 |
+
LIMIT $2
|
| 137 |
+
""",
|
| 138 |
+
repo_full_name,
|
| 139 |
+
limit,
|
| 140 |
+
)
|
| 141 |
+
return [dict(row) for row in rows]
|
| 142 |
+
except Exception as e:
|
| 143 |
+
logger.warning("Database query failed", error=str(e))
|
| 144 |
+
return []
|
app/db/redis_cache.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Redis Cache for PR Review Deduplication
|
| 3 |
+
========================================
|
| 4 |
+
|
| 5 |
+
When a developer pushes multiple commits quickly (or force-pushes), GitHub sends
|
| 6 |
+
a webhook for each push. Without caching, we'd re-analyze the same PR multiple times,
|
| 7 |
+
wasting Groq API quota and spamming the PR with duplicate comments.
|
| 8 |
+
|
| 9 |
+
Solution: Before analyzing a PR, we check Redis: "Have we already reviewed this
|
| 10 |
+
exact commit SHA?" If yes, we skip the analysis entirely.
|
| 11 |
+
|
| 12 |
+
Why Redis (Upstash) instead of in-memory cache?
|
| 13 |
+
- Our Render free tier restarts the server frequently (cold starts)
|
| 14 |
+
- In-memory cache would be lost on every restart
|
| 15 |
+
- Redis persists across restarts and is shared if we scale to multiple workers
|
| 16 |
+
- Upstash's serverless Redis gives us 10K requests/day free — more than enough
|
| 17 |
+
|
| 18 |
+
Cache key structure: "ninjacg:reviewed:{commit_sha}"
|
| 19 |
+
Cache value: "1" (just a flag — we don't store the review result here, that's in Postgres)
|
| 20 |
+
TTL: 7 days (after which re-analysis is allowed)
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
from __future__ import annotations
|
| 24 |
+
|
| 25 |
+
import redis.asyncio as redis
|
| 26 |
+
import structlog
|
| 27 |
+
|
| 28 |
+
from app.config import settings
|
| 29 |
+
|
| 30 |
+
logger = structlog.get_logger()
|
| 31 |
+
|
| 32 |
+
# Connection pool — reused across requests for efficiency.
|
| 33 |
+
# Redis connections are expensive to create (TCP handshake + TLS negotiation).
|
| 34 |
+
# A pool keeps connections open and reuses them.
|
| 35 |
+
_redis_client: redis.Redis | None = None
|
| 36 |
+
|
| 37 |
+
# Cache TTL in seconds (7 days)
|
| 38 |
+
CACHE_TTL = 7 * 24 * 60 * 60
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def _get_redis_client() -> redis.Redis:
|
| 42 |
+
"""
|
| 43 |
+
Get or create the Redis client singleton.
|
| 44 |
+
|
| 45 |
+
Uses lazy initialization — the client is created on first use, not at import time.
|
| 46 |
+
This prevents connection errors during module import (e.g., in tests).
|
| 47 |
+
"""
|
| 48 |
+
global _redis_client
|
| 49 |
+
if _redis_client is None:
|
| 50 |
+
_redis_client = redis.from_url(
|
| 51 |
+
settings.upstash_redis_url,
|
| 52 |
+
decode_responses=True,
|
| 53 |
+
)
|
| 54 |
+
return _redis_client
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _cache_key(commit_sha: str) -> str:
|
| 58 |
+
"""Build the Redis key for a commit SHA."""
|
| 59 |
+
return f"ninjacg:reviewed:{commit_sha}"
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
async def is_already_reviewed(commit_sha: str) -> bool:
|
| 63 |
+
"""
|
| 64 |
+
Check if a commit has already been reviewed.
|
| 65 |
+
|
| 66 |
+
This is called at the start of every webhook handler to short-circuit
|
| 67 |
+
duplicate analysis. Returns True if we should skip.
|
| 68 |
+
|
| 69 |
+
Args:
|
| 70 |
+
commit_sha: The HEAD commit SHA of the PR
|
| 71 |
+
|
| 72 |
+
Returns:
|
| 73 |
+
True if this commit has already been reviewed, False otherwise
|
| 74 |
+
"""
|
| 75 |
+
try:
|
| 76 |
+
client = _get_redis_client()
|
| 77 |
+
result = await client.exists(_cache_key(commit_sha))
|
| 78 |
+
if result:
|
| 79 |
+
logger.info("Cache hit — skipping re-analysis", commit_sha=commit_sha[:8])
|
| 80 |
+
return bool(result)
|
| 81 |
+
except Exception as e:
|
| 82 |
+
# If Redis is down, we proceed with analysis (fail open).
|
| 83 |
+
# Better to review a PR twice than to miss a review entirely.
|
| 84 |
+
logger.warning("Redis check failed, proceeding with analysis", error=str(e))
|
| 85 |
+
return False
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
async def mark_as_reviewed(commit_sha: str) -> None:
|
| 89 |
+
"""
|
| 90 |
+
Mark a commit as reviewed in the cache.
|
| 91 |
+
|
| 92 |
+
Called after successfully posting a review to GitHub.
|
| 93 |
+
The TTL ensures stale entries are automatically cleaned up.
|
| 94 |
+
|
| 95 |
+
Args:
|
| 96 |
+
commit_sha: The HEAD commit SHA that was reviewed
|
| 97 |
+
"""
|
| 98 |
+
try:
|
| 99 |
+
client = _get_redis_client()
|
| 100 |
+
await client.set(_cache_key(commit_sha), "1", ex=CACHE_TTL)
|
| 101 |
+
logger.info("Cached review result", commit_sha=commit_sha[:8], ttl_days=7)
|
| 102 |
+
except Exception as e:
|
| 103 |
+
# Non-fatal — if we can't cache, we'll just re-analyze next time
|
| 104 |
+
logger.warning("Redis set failed", error=str(e))
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
async def invalidate_cache(commit_sha: str) -> None:
|
| 108 |
+
"""
|
| 109 |
+
Remove a commit from the cache, forcing re-analysis.
|
| 110 |
+
|
| 111 |
+
Used by the /reanalyze endpoint when a user manually requests re-review.
|
| 112 |
+
|
| 113 |
+
Args:
|
| 114 |
+
commit_sha: The commit SHA to invalidate
|
| 115 |
+
"""
|
| 116 |
+
try:
|
| 117 |
+
client = _get_redis_client()
|
| 118 |
+
await client.delete(_cache_key(commit_sha))
|
| 119 |
+
logger.info("Cache invalidated", commit_sha=commit_sha[:8])
|
| 120 |
+
except Exception as e:
|
| 121 |
+
logger.warning("Redis delete failed", error=str(e))
|
app/github/__init__.py
ADDED
|
File without changes
|
app/github/auth.py
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GitHub App Authentication
|
| 3 |
+
==========================
|
| 4 |
+
|
| 5 |
+
GitHub Apps authenticate via a two-step process:
|
| 6 |
+
|
| 7 |
+
1. **JWT Generation**: We create a JSON Web Token (JWT) signed with our private key
|
| 8 |
+
(.pem file). This JWT proves we are the registered GitHub App. It's valid for
|
| 9 |
+
max 10 minutes — intentionally short-lived for security.
|
| 10 |
+
|
| 11 |
+
2. **Installation Access Token**: We exchange the JWT for an installation access token
|
| 12 |
+
via GitHub's API. This token is scoped to a specific installation (a specific set
|
| 13 |
+
of repos where the app is installed) and lasts 1 hour.
|
| 14 |
+
|
| 15 |
+
Why two steps? A GitHub App can be installed on hundreds of orgs/repos. The JWT says
|
| 16 |
+
"I am CodeProbe app" — the installation token says "I have permission to access
|
| 17 |
+
@ninjacode911's repos specifically." This separation of identity vs. authorization
|
| 18 |
+
is a production-grade security pattern (similar to OAuth2 client credentials).
|
| 19 |
+
|
| 20 |
+
We cache the installation token in memory and refresh it when it expires, so we
|
| 21 |
+
don't make unnecessary API calls.
|
| 22 |
+
|
| 23 |
+
Reference: https://docs.github.com/en/apps/creating-github-apps/authenticating-with-a-github-app
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
import asyncio
|
| 27 |
+
import time
|
| 28 |
+
from pathlib import Path
|
| 29 |
+
|
| 30 |
+
import httpx
|
| 31 |
+
import jwt # PyJWT library — used to create JSON Web Tokens
|
| 32 |
+
|
| 33 |
+
from app.config import settings
|
| 34 |
+
|
| 35 |
+
# In-memory cache for installation tokens
|
| 36 |
+
_token_cache: dict[int, dict] = {}
|
| 37 |
+
|
| 38 |
+
# Asyncio lock to prevent race conditions on token cache
|
| 39 |
+
_token_lock = asyncio.Lock()
|
| 40 |
+
|
| 41 |
+
# Cached private key (read from disk once, reused)
|
| 42 |
+
_private_key: str | None = None
|
| 43 |
+
|
| 44 |
+
# GitHub API base URL
|
| 45 |
+
GITHUB_API = "https://api.github.com"
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _generate_jwt() -> str:
|
| 49 |
+
"""
|
| 50 |
+
Generate a JWT (JSON Web Token) signed with our GitHub App's private key.
|
| 51 |
+
|
| 52 |
+
A JWT has three parts (separated by dots):
|
| 53 |
+
1. Header: algorithm (RS256) and token type
|
| 54 |
+
2. Payload: who we are (iss = app ID), when issued, when it expires
|
| 55 |
+
3. Signature: the header+payload signed with our RSA private key
|
| 56 |
+
|
| 57 |
+
GitHub verifies the signature using our app's public key (which GitHub stores
|
| 58 |
+
when we register the app). This is asymmetric cryptography — we sign with the
|
| 59 |
+
private key, GitHub verifies with the public key.
|
| 60 |
+
|
| 61 |
+
RS256 = RSA + SHA-256 — the industry standard for JWT signing.
|
| 62 |
+
"""
|
| 63 |
+
now = int(time.time())
|
| 64 |
+
|
| 65 |
+
# Cache the private key in memory after first read (avoid repeated disk I/O)
|
| 66 |
+
global _private_key
|
| 67 |
+
if _private_key is None:
|
| 68 |
+
project_root = Path(__file__).resolve().parent.parent.parent
|
| 69 |
+
private_key_path = project_root / settings.github_app_private_key_path
|
| 70 |
+
_private_key = private_key_path.read_text()
|
| 71 |
+
|
| 72 |
+
payload = {
|
| 73 |
+
# iat = "issued at" — when this token was created
|
| 74 |
+
"iat": now - 60, # 60 seconds in the past to account for clock drift
|
| 75 |
+
# exp = "expires at" — GitHub rejects JWTs older than 10 minutes
|
| 76 |
+
"exp": now + (9 * 60), # 9 minutes (safely under the 10-min limit)
|
| 77 |
+
# iss = "issuer" — our GitHub App ID, proving which app we are
|
| 78 |
+
"iss": settings.github_app_id,
|
| 79 |
+
}
|
| 80 |
+
|
| 81 |
+
# Sign the JWT with our private RSA key using RS256 algorithm
|
| 82 |
+
return jwt.encode(payload, _private_key, algorithm="RS256")
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
async def get_installation_token(installation_id: int) -> str:
|
| 86 |
+
"""
|
| 87 |
+
Get an installation access token for a specific GitHub App installation.
|
| 88 |
+
|
| 89 |
+
This token is what we actually use to call GitHub APIs (fetch PRs, post comments).
|
| 90 |
+
It's scoped to the specific repos where the app is installed.
|
| 91 |
+
|
| 92 |
+
We cache tokens in memory and reuse them until they expire (1 hour lifetime).
|
| 93 |
+
This avoids making a new token request for every API call.
|
| 94 |
+
|
| 95 |
+
Args:
|
| 96 |
+
installation_id: The GitHub installation ID (sent in webhook payloads).
|
| 97 |
+
Each org/user that installs our app gets a unique ID.
|
| 98 |
+
|
| 99 |
+
Returns:
|
| 100 |
+
A valid installation access token string.
|
| 101 |
+
"""
|
| 102 |
+
# Check cache first (outside lock for fast path)
|
| 103 |
+
cached = _token_cache.get(installation_id)
|
| 104 |
+
if cached and cached["expires_at"] > time.time() + 60:
|
| 105 |
+
return cached["token"]
|
| 106 |
+
|
| 107 |
+
# Lock prevents race condition: two coroutines seeing cache miss simultaneously
|
| 108 |
+
async with _token_lock:
|
| 109 |
+
# Double-check inside lock (another coroutine may have filled the cache)
|
| 110 |
+
cached = _token_cache.get(installation_id)
|
| 111 |
+
if cached and cached["expires_at"] > time.time() + 60:
|
| 112 |
+
return cached["token"]
|
| 113 |
+
|
| 114 |
+
app_jwt = _generate_jwt()
|
| 115 |
+
|
| 116 |
+
# Exchange the JWT for an installation-scoped access token
|
| 117 |
+
async with httpx.AsyncClient(timeout=30.0) as client:
|
| 118 |
+
response = await client.post(
|
| 119 |
+
f"{GITHUB_API}/app/installations/{installation_id}/access_tokens",
|
| 120 |
+
headers={
|
| 121 |
+
"Authorization": f"Bearer {app_jwt}",
|
| 122 |
+
"Accept": "application/vnd.github+json",
|
| 123 |
+
"X-GitHub-Api-Version": "2022-11-28",
|
| 124 |
+
},
|
| 125 |
+
)
|
| 126 |
+
response.raise_for_status()
|
| 127 |
+
data = response.json()
|
| 128 |
+
|
| 129 |
+
# Cache the token
|
| 130 |
+
_token_cache[installation_id] = {
|
| 131 |
+
"token": data["token"],
|
| 132 |
+
"expires_at": time.time() + 3500,
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
return data["token"]
|
app/github/client.py
ADDED
|
@@ -0,0 +1,362 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GitHub API Client
|
| 3 |
+
==================
|
| 4 |
+
|
| 5 |
+
This module handles all communication with GitHub's REST API. It provides
|
| 6 |
+
methods to:
|
| 7 |
+
|
| 8 |
+
1. Fetch PR diff (the raw unified diff showing what changed)
|
| 9 |
+
2. Fetch file contents (full source code for context/RAG)
|
| 10 |
+
3. Fetch changed file list (which files were modified)
|
| 11 |
+
4. Post a PR review with inline comments (anchored to specific lines)
|
| 12 |
+
5. Post a summary comment on the PR conversation
|
| 13 |
+
|
| 14 |
+
GitHub API Authentication:
|
| 15 |
+
- We authenticate using installation access tokens (from auth.py)
|
| 16 |
+
- Every request includes the token in the Authorization header
|
| 17 |
+
- The token is scoped to the specific repos where our app is installed
|
| 18 |
+
|
| 19 |
+
GitHub API Versioning:
|
| 20 |
+
- We pin to version "2022-11-28" via X-GitHub-Api-Version header
|
| 21 |
+
- This ensures our code doesn't break when GitHub ships API changes
|
| 22 |
+
- This is a best practice for any API integration in production
|
| 23 |
+
|
| 24 |
+
Rate Limits:
|
| 25 |
+
- GitHub Apps get 5,000 requests/hour per installation
|
| 26 |
+
- That's plenty for our use case (~10-20 API calls per PR review)
|
| 27 |
+
|
| 28 |
+
Reference: https://docs.github.com/en/rest
|
| 29 |
+
"""
|
| 30 |
+
|
| 31 |
+
from __future__ import annotations
|
| 32 |
+
|
| 33 |
+
import base64
|
| 34 |
+
from dataclasses import dataclass
|
| 35 |
+
|
| 36 |
+
import httpx
|
| 37 |
+
import structlog
|
| 38 |
+
|
| 39 |
+
from app.github.auth import get_installation_token
|
| 40 |
+
|
| 41 |
+
logger = structlog.get_logger()
|
| 42 |
+
|
| 43 |
+
GITHUB_API = "https://api.github.com"
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
@dataclass
|
| 47 |
+
class PRData:
|
| 48 |
+
"""
|
| 49 |
+
All the data we fetch about a PR, bundled together.
|
| 50 |
+
|
| 51 |
+
This is passed to the agent orchestrator so agents have full context.
|
| 52 |
+
A dataclass (vs a dict) gives us type safety and autocomplete in the IDE.
|
| 53 |
+
"""
|
| 54 |
+
|
| 55 |
+
repo_full_name: str # e.g. "ninjacode911/myapp"
|
| 56 |
+
pr_number: int
|
| 57 |
+
commit_sha: str # HEAD commit of the PR
|
| 58 |
+
title: str
|
| 59 |
+
diff: str # Raw unified diff (the actual code changes)
|
| 60 |
+
changed_files: list[dict] # List of {filename, status, additions, deletions, patch}
|
| 61 |
+
file_contents: dict[str, str] # {filepath: full_file_content} for changed files
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
class GitHubClient:
|
| 65 |
+
"""
|
| 66 |
+
Async GitHub API client for a specific installation.
|
| 67 |
+
|
| 68 |
+
Usage:
|
| 69 |
+
client = GitHubClient(installation_id=12345)
|
| 70 |
+
pr_data = await client.fetch_pr_data("ninjacode911/myapp", 42)
|
| 71 |
+
await client.post_review_comment(...)
|
| 72 |
+
|
| 73 |
+
Why a class instead of standalone functions?
|
| 74 |
+
- The installation_id and token are shared across all API calls for one webhook event
|
| 75 |
+
- A class groups these related operations together with shared state
|
| 76 |
+
- Makes it easy to test by mocking one object
|
| 77 |
+
"""
|
| 78 |
+
|
| 79 |
+
def __init__(self, installation_id: int):
|
| 80 |
+
self.installation_id = installation_id
|
| 81 |
+
|
| 82 |
+
async def _get_headers(self) -> dict[str, str]:
|
| 83 |
+
"""
|
| 84 |
+
Build the authorization headers for GitHub API requests.
|
| 85 |
+
|
| 86 |
+
Delegates to auth.py which handles token caching and refresh.
|
| 87 |
+
No client-level cache — auth.py's cache is the single source of truth.
|
| 88 |
+
"""
|
| 89 |
+
token = await get_installation_token(self.installation_id)
|
| 90 |
+
|
| 91 |
+
return {
|
| 92 |
+
"Authorization": f"token {token}",
|
| 93 |
+
"Accept": "application/vnd.github+json",
|
| 94 |
+
"X-GitHub-Api-Version": "2022-11-28",
|
| 95 |
+
}
|
| 96 |
+
|
| 97 |
+
async def fetch_pr_data(self, repo_full_name: str, pr_number: int) -> PRData:
|
| 98 |
+
"""
|
| 99 |
+
Fetch all data needed to review a PR in one method.
|
| 100 |
+
|
| 101 |
+
This makes 3 API calls:
|
| 102 |
+
1. GET /repos/{owner}/{repo}/pulls/{pr_number} — PR metadata + diff
|
| 103 |
+
2. GET /repos/{owner}/{repo}/pulls/{pr_number}/files — list of changed files
|
| 104 |
+
3. GET /repos/{owner}/{repo}/contents/{path} — full content per changed file
|
| 105 |
+
|
| 106 |
+
We fetch full file contents (not just the diff) because our agents need
|
| 107 |
+
surrounding context. The diff alone doesn't show imports, class definitions,
|
| 108 |
+
or the rest of the function — all critical for understanding security and
|
| 109 |
+
performance implications.
|
| 110 |
+
|
| 111 |
+
Args:
|
| 112 |
+
repo_full_name: "owner/repo" format (e.g. "ninjacode911/myapp")
|
| 113 |
+
pr_number: The PR number
|
| 114 |
+
|
| 115 |
+
Returns:
|
| 116 |
+
PRData with diff, changed files, and full file contents
|
| 117 |
+
"""
|
| 118 |
+
headers = await self._get_headers()
|
| 119 |
+
|
| 120 |
+
async with httpx.AsyncClient(timeout=30.0) as http:
|
| 121 |
+
# --- 1. Fetch PR metadata ---
|
| 122 |
+
pr_response = await http.get(
|
| 123 |
+
f"{GITHUB_API}/repos/{repo_full_name}/pulls/{pr_number}",
|
| 124 |
+
headers=headers,
|
| 125 |
+
)
|
| 126 |
+
pr_response.raise_for_status()
|
| 127 |
+
pr_json = pr_response.json()
|
| 128 |
+
|
| 129 |
+
commit_sha = pr_json["head"]["sha"]
|
| 130 |
+
title = pr_json["title"]
|
| 131 |
+
|
| 132 |
+
# --- 2. Fetch the raw diff ---
|
| 133 |
+
# By setting Accept to "application/vnd.github.diff", GitHub returns
|
| 134 |
+
# the raw unified diff instead of JSON. This is the same format you
|
| 135 |
+
# see with `git diff` — it's what our agents will analyze.
|
| 136 |
+
diff_response = await http.get(
|
| 137 |
+
f"{GITHUB_API}/repos/{repo_full_name}/pulls/{pr_number}",
|
| 138 |
+
headers={**headers, "Accept": "application/vnd.github.diff"},
|
| 139 |
+
)
|
| 140 |
+
diff_response.raise_for_status()
|
| 141 |
+
diff = diff_response.text
|
| 142 |
+
|
| 143 |
+
# --- 3. Fetch list of changed files ---
|
| 144 |
+
# This gives us structured data: filename, status (added/modified/removed),
|
| 145 |
+
# number of additions/deletions, and the patch (per-file diff).
|
| 146 |
+
# We paginate because large PRs can have 100+ files.
|
| 147 |
+
changed_files = []
|
| 148 |
+
page = 1
|
| 149 |
+
while page <= 30: # Cap at 3000 files to prevent runaway loops
|
| 150 |
+
files_response = await http.get(
|
| 151 |
+
f"{GITHUB_API}/repos/{repo_full_name}/pulls/{pr_number}/files",
|
| 152 |
+
headers=headers,
|
| 153 |
+
params={"per_page": 100, "page": page},
|
| 154 |
+
)
|
| 155 |
+
files_response.raise_for_status()
|
| 156 |
+
batch = files_response.json()
|
| 157 |
+
if not batch:
|
| 158 |
+
break
|
| 159 |
+
changed_files.extend(batch)
|
| 160 |
+
if len(batch) < 100:
|
| 161 |
+
break
|
| 162 |
+
page += 1
|
| 163 |
+
|
| 164 |
+
# --- 4. Fetch full file contents for each changed file ---
|
| 165 |
+
# We need the complete source code (not just the diff) for RAG context.
|
| 166 |
+
# The agents can then understand imports, class hierarchy, etc.
|
| 167 |
+
file_contents = {}
|
| 168 |
+
for file_info in changed_files:
|
| 169 |
+
filename = file_info["filename"]
|
| 170 |
+
status = file_info["status"]
|
| 171 |
+
|
| 172 |
+
# Skip deleted files and binary files — no content to review
|
| 173 |
+
if status == "removed":
|
| 174 |
+
continue
|
| 175 |
+
|
| 176 |
+
try:
|
| 177 |
+
content = await self._fetch_file_content(
|
| 178 |
+
http, headers, repo_full_name, filename, commit_sha
|
| 179 |
+
)
|
| 180 |
+
if content is not None:
|
| 181 |
+
file_contents[filename] = content
|
| 182 |
+
except Exception as e:
|
| 183 |
+
# Non-fatal: if we can't fetch one file, continue with the rest
|
| 184 |
+
logger.warning(
|
| 185 |
+
"Failed to fetch file content",
|
| 186 |
+
filename=filename,
|
| 187 |
+
error=str(e),
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
logger.info(
|
| 191 |
+
"Fetched PR data",
|
| 192 |
+
repo=repo_full_name,
|
| 193 |
+
pr=pr_number,
|
| 194 |
+
changed_files=len(changed_files),
|
| 195 |
+
files_with_content=len(file_contents),
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
return PRData(
|
| 199 |
+
repo_full_name=repo_full_name,
|
| 200 |
+
pr_number=pr_number,
|
| 201 |
+
commit_sha=commit_sha,
|
| 202 |
+
title=title,
|
| 203 |
+
diff=diff,
|
| 204 |
+
changed_files=changed_files,
|
| 205 |
+
file_contents=file_contents,
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
async def _fetch_file_content(
|
| 209 |
+
self,
|
| 210 |
+
http: httpx.AsyncClient,
|
| 211 |
+
headers: dict,
|
| 212 |
+
repo_full_name: str,
|
| 213 |
+
filepath: str,
|
| 214 |
+
ref: str,
|
| 215 |
+
) -> str | None:
|
| 216 |
+
"""
|
| 217 |
+
Fetch the full content of a single file at a specific commit.
|
| 218 |
+
|
| 219 |
+
GitHub's Contents API returns file content as base64-encoded string.
|
| 220 |
+
We decode it to get the actual source code text.
|
| 221 |
+
|
| 222 |
+
Why base64? Because GitHub's API is JSON-based, and JSON can't safely
|
| 223 |
+
contain arbitrary binary content. Base64 encodes binary as ASCII text.
|
| 224 |
+
This is the same encoding used in email attachments (MIME).
|
| 225 |
+
|
| 226 |
+
Args:
|
| 227 |
+
http: The httpx client (reused for connection pooling)
|
| 228 |
+
headers: Auth headers
|
| 229 |
+
repo_full_name: "owner/repo"
|
| 230 |
+
filepath: Path to the file in the repo
|
| 231 |
+
ref: Git ref (commit SHA) to fetch the file at
|
| 232 |
+
|
| 233 |
+
Returns:
|
| 234 |
+
The file content as a string, or None if the file is binary/too large
|
| 235 |
+
"""
|
| 236 |
+
response = await http.get(
|
| 237 |
+
f"{GITHUB_API}/repos/{repo_full_name}/contents/{filepath}",
|
| 238 |
+
headers=headers,
|
| 239 |
+
params={"ref": ref},
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
if response.status_code == 404:
|
| 243 |
+
return None
|
| 244 |
+
|
| 245 |
+
response.raise_for_status()
|
| 246 |
+
data = response.json()
|
| 247 |
+
|
| 248 |
+
# GitHub returns "file" type for regular files.
|
| 249 |
+
# Skip directories, symlinks, or submodules.
|
| 250 |
+
if data.get("type") != "file":
|
| 251 |
+
return None
|
| 252 |
+
|
| 253 |
+
# Files > 1MB use a different API (Blobs). Skip for now — these are
|
| 254 |
+
# usually auto-generated or binary files, not worth reviewing.
|
| 255 |
+
if data.get("size", 0) > 1_000_000:
|
| 256 |
+
logger.info("Skipping large file", filepath=filepath, size=data["size"])
|
| 257 |
+
return None
|
| 258 |
+
|
| 259 |
+
# Decode the base64-encoded content
|
| 260 |
+
content_b64 = data.get("content", "")
|
| 261 |
+
try:
|
| 262 |
+
return base64.b64decode(content_b64).decode("utf-8")
|
| 263 |
+
except (UnicodeDecodeError, Exception):
|
| 264 |
+
# Binary file — can't decode as UTF-8
|
| 265 |
+
return None
|
| 266 |
+
|
| 267 |
+
async def post_review(
|
| 268 |
+
self,
|
| 269 |
+
repo_full_name: str,
|
| 270 |
+
pr_number: int,
|
| 271 |
+
commit_sha: str,
|
| 272 |
+
body: str,
|
| 273 |
+
comments: list[dict],
|
| 274 |
+
) -> dict:
|
| 275 |
+
"""
|
| 276 |
+
Post a pull request review with inline comments.
|
| 277 |
+
|
| 278 |
+
This is the core output mechanism of CodeProbe. A "review" in GitHub terms
|
| 279 |
+
is a batch of inline comments submitted together, optionally with a top-level
|
| 280 |
+
body and an event type (APPROVE, REQUEST_CHANGES, COMMENT).
|
| 281 |
+
|
| 282 |
+
Each inline comment is anchored to a specific file and line, so it appears
|
| 283 |
+
right next to the relevant code — just like a human reviewer would comment.
|
| 284 |
+
|
| 285 |
+
GitHub's review API is atomic: either all comments post successfully, or
|
| 286 |
+
none do. This prevents partial reviews that would confuse developers.
|
| 287 |
+
|
| 288 |
+
Args:
|
| 289 |
+
repo_full_name: "owner/repo"
|
| 290 |
+
pr_number: PR number
|
| 291 |
+
commit_sha: The exact commit SHA these comments reference
|
| 292 |
+
body: The top-level review summary (shown above inline comments)
|
| 293 |
+
comments: List of dicts with keys:
|
| 294 |
+
- path: file path (e.g. "src/auth/login.py")
|
| 295 |
+
- line: line number in the diff (the new file's line number)
|
| 296 |
+
- body: the comment text (Markdown supported)
|
| 297 |
+
|
| 298 |
+
Returns:
|
| 299 |
+
The GitHub API response as a dict
|
| 300 |
+
"""
|
| 301 |
+
headers = await self._get_headers()
|
| 302 |
+
|
| 303 |
+
# We use "COMMENT" event — this posts the review without approving or
|
| 304 |
+
# requesting changes. Our bot shouldn't block PRs at the GitHub level;
|
| 305 |
+
# instead, we indicate blocking via the Health Score in the summary.
|
| 306 |
+
review_payload = {
|
| 307 |
+
"commit_id": commit_sha,
|
| 308 |
+
"body": body,
|
| 309 |
+
"event": "COMMENT",
|
| 310 |
+
"comments": comments,
|
| 311 |
+
}
|
| 312 |
+
|
| 313 |
+
async with httpx.AsyncClient(timeout=30.0) as http:
|
| 314 |
+
response = await http.post(
|
| 315 |
+
f"{GITHUB_API}/repos/{repo_full_name}/pulls/{pr_number}/reviews",
|
| 316 |
+
headers=headers,
|
| 317 |
+
json=review_payload,
|
| 318 |
+
)
|
| 319 |
+
response.raise_for_status()
|
| 320 |
+
|
| 321 |
+
logger.info(
|
| 322 |
+
"Posted PR review",
|
| 323 |
+
repo=repo_full_name,
|
| 324 |
+
pr=pr_number,
|
| 325 |
+
inline_comments=len(comments),
|
| 326 |
+
)
|
| 327 |
+
|
| 328 |
+
return response.json()
|
| 329 |
+
|
| 330 |
+
async def post_comment(
|
| 331 |
+
self, repo_full_name: str, pr_number: int, body: str
|
| 332 |
+
) -> dict:
|
| 333 |
+
"""
|
| 334 |
+
Post a standalone comment on the PR conversation (not inline).
|
| 335 |
+
|
| 336 |
+
Used for the summary comment (Health Score, finding counts, executive summary)
|
| 337 |
+
when we don't have inline comments, or as a fallback.
|
| 338 |
+
|
| 339 |
+
This uses the Issues API (PRs are issues in GitHub's data model) rather
|
| 340 |
+
than the Pull Request Review API.
|
| 341 |
+
|
| 342 |
+
Args:
|
| 343 |
+
repo_full_name: "owner/repo"
|
| 344 |
+
pr_number: PR number
|
| 345 |
+
body: Comment text (Markdown)
|
| 346 |
+
|
| 347 |
+
Returns:
|
| 348 |
+
The GitHub API response as a dict
|
| 349 |
+
"""
|
| 350 |
+
headers = await self._get_headers()
|
| 351 |
+
|
| 352 |
+
async with httpx.AsyncClient(timeout=30.0) as http:
|
| 353 |
+
response = await http.post(
|
| 354 |
+
f"{GITHUB_API}/repos/{repo_full_name}/issues/{pr_number}/comments",
|
| 355 |
+
headers=headers,
|
| 356 |
+
json={"body": body},
|
| 357 |
+
)
|
| 358 |
+
response.raise_for_status()
|
| 359 |
+
|
| 360 |
+
logger.info("Posted PR comment", repo=repo_full_name, pr=pr_number)
|
| 361 |
+
|
| 362 |
+
return response.json()
|
app/github/comment_formatter.py
ADDED
|
@@ -0,0 +1,215 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GitHub Comment Formatter
|
| 3 |
+
=========================
|
| 4 |
+
|
| 5 |
+
Converts our internal Finding and SynthesizedReview data structures into
|
| 6 |
+
GitHub-flavored Markdown for posting as PR comments.
|
| 7 |
+
|
| 8 |
+
Two types of output:
|
| 9 |
+
1. **Inline comments** — one per finding, anchored to a specific file+line.
|
| 10 |
+
These appear right next to the code, like a human reviewer's comments.
|
| 11 |
+
2. **Summary comment** — a top-level PR comment with the Health Score,
|
| 12 |
+
finding counts by severity, and an executive summary.
|
| 13 |
+
|
| 14 |
+
Design decisions:
|
| 15 |
+
- We use emoji prefixes for severity to make scanning fast (most devs skim reviews)
|
| 16 |
+
- Each inline comment includes the agent name and category for traceability
|
| 17 |
+
- CWE IDs are linked for security findings (so devs can learn about the vulnerability)
|
| 18 |
+
- Suggested fixes use fenced code blocks for easy copy-paste
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
from __future__ import annotations
|
| 22 |
+
|
| 23 |
+
from app.models.findings import Finding, SynthesizedReview
|
| 24 |
+
|
| 25 |
+
# Emoji and color mapping for severity levels
|
| 26 |
+
SEVERITY_EMOJI = {
|
| 27 |
+
"critical": "\U0001f6a8", # 🚨
|
| 28 |
+
"high": "\U0001f7e0", # 🟠
|
| 29 |
+
"medium": "\U0001f7e1", # 🟡
|
| 30 |
+
"low": "\u2139\ufe0f", # ℹ️
|
| 31 |
+
}
|
| 32 |
+
|
| 33 |
+
AGENT_EMOJI = {
|
| 34 |
+
"security": "\U0001f512", # 🔒
|
| 35 |
+
"performance": "\u26a1", # ⚡
|
| 36 |
+
"style": "\u270f\ufe0f", # ✏️
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def format_inline_comment(finding: Finding) -> str:
|
| 41 |
+
"""
|
| 42 |
+
Format a single Finding as a GitHub inline comment body.
|
| 43 |
+
|
| 44 |
+
This Markdown will appear anchored to the specific file+line in the PR diff.
|
| 45 |
+
|
| 46 |
+
Example output:
|
| 47 |
+
🚨 **[CRITICAL — Security] SQL Injection Risk**
|
| 48 |
+
|
| 49 |
+
The query on line 47 constructs SQL via string interpolation.
|
| 50 |
+
User input is directly embedded without sanitization.
|
| 51 |
+
|
| 52 |
+
**Suggested fix:**
|
| 53 |
+
```python
|
| 54 |
+
cursor.execute('SELECT * FROM users WHERE id = %s', (user_id,))
|
| 55 |
+
```
|
| 56 |
+
|
| 57 |
+
> 🔒 Security · CWE-89 · Confidence: 0.92
|
| 58 |
+
"""
|
| 59 |
+
severity_emoji = SEVERITY_EMOJI.get(finding.severity, "")
|
| 60 |
+
agent_emoji = AGENT_EMOJI.get(finding.agent, "")
|
| 61 |
+
severity_upper = finding.severity.upper()
|
| 62 |
+
agent_title = finding.agent.capitalize()
|
| 63 |
+
|
| 64 |
+
# Build the comment body
|
| 65 |
+
lines = [
|
| 66 |
+
f"{severity_emoji} **[{severity_upper} — {agent_title}] {finding.title}**",
|
| 67 |
+
"",
|
| 68 |
+
finding.description,
|
| 69 |
+
]
|
| 70 |
+
|
| 71 |
+
# Add suggested fix if present
|
| 72 |
+
if finding.suggested_fix:
|
| 73 |
+
lines.extend([
|
| 74 |
+
"",
|
| 75 |
+
"**Suggested fix:**",
|
| 76 |
+
"```",
|
| 77 |
+
finding.suggested_fix,
|
| 78 |
+
"```",
|
| 79 |
+
])
|
| 80 |
+
|
| 81 |
+
# Add metadata footer
|
| 82 |
+
footer_parts = [f"{agent_emoji} {agent_title}"]
|
| 83 |
+
if finding.cwe_id:
|
| 84 |
+
footer_parts.append(f"[{finding.cwe_id}](https://cwe.mitre.org/data/definitions/{finding.cwe_id.split('-')[1]}.html)")
|
| 85 |
+
footer_parts.append(f"Confidence: {finding.confidence:.2f}")
|
| 86 |
+
|
| 87 |
+
lines.extend(["", f"> {' · '.join(footer_parts)}"])
|
| 88 |
+
|
| 89 |
+
return "\n".join(lines)
|
| 90 |
+
|
| 91 |
+
|
| 92 |
+
def format_summary_comment(review: SynthesizedReview) -> str:
|
| 93 |
+
"""
|
| 94 |
+
Format the top-level PR summary comment with Health Score and finding overview.
|
| 95 |
+
|
| 96 |
+
This is posted as a regular PR comment (not inline). It gives the PR author
|
| 97 |
+
a quick overview without needing to look at every inline comment.
|
| 98 |
+
|
| 99 |
+
The Health Score gauge uses block characters to create a visual progress bar
|
| 100 |
+
in pure Unicode (works in GitHub Markdown without images).
|
| 101 |
+
"""
|
| 102 |
+
score = review.health_score
|
| 103 |
+
|
| 104 |
+
# Determine overall status
|
| 105 |
+
if score >= 80:
|
| 106 |
+
status_emoji = "\u2705" # ✅
|
| 107 |
+
status_text = "Healthy"
|
| 108 |
+
elif score >= 60:
|
| 109 |
+
status_emoji = "\u26a0\ufe0f" # ⚠️
|
| 110 |
+
status_text = "Needs Attention"
|
| 111 |
+
else:
|
| 112 |
+
status_emoji = "\u274c" # ❌
|
| 113 |
+
status_text = "Action Required"
|
| 114 |
+
|
| 115 |
+
# Build the visual health bar (20 segments)
|
| 116 |
+
filled = round(score / 5)
|
| 117 |
+
bar = "\u2588" * filled + "\u2591" * (20 - filled)
|
| 118 |
+
|
| 119 |
+
# Count total findings
|
| 120 |
+
total = (
|
| 121 |
+
review.critical_count
|
| 122 |
+
+ review.high_count
|
| 123 |
+
+ review.medium_count
|
| 124 |
+
+ review.low_count
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
lines = [
|
| 128 |
+
f"## {status_emoji} Ninja Code Guard Review — Health Score: {score}/100",
|
| 129 |
+
"",
|
| 130 |
+
f"`{bar}` **{score}**/100 — {status_text}",
|
| 131 |
+
"",
|
| 132 |
+
"### Findings Summary",
|
| 133 |
+
"",
|
| 134 |
+
f"| Severity | Count |",
|
| 135 |
+
f"|----------|-------|",
|
| 136 |
+
f"| \U0001f6a8 Critical | {review.critical_count} |",
|
| 137 |
+
f"| \U0001f7e0 High | {review.high_count} |",
|
| 138 |
+
f"| \U0001f7e1 Medium | {review.medium_count} |",
|
| 139 |
+
f"| \u2139\ufe0f Low | {review.low_count} |",
|
| 140 |
+
f"| **Total** | **{total}** |",
|
| 141 |
+
"",
|
| 142 |
+
]
|
| 143 |
+
|
| 144 |
+
# Add recommendation
|
| 145 |
+
rec_map = {
|
| 146 |
+
"approve": "\u2705 **Recommendation: Approve** — No critical issues found.",
|
| 147 |
+
"request_changes": "\u26a0\ufe0f **Recommendation: Request Changes** — Issues found that should be addressed.",
|
| 148 |
+
"block": "\u274c **Recommendation: Block Merge** — Critical issues must be resolved before merging.",
|
| 149 |
+
}
|
| 150 |
+
lines.append(rec_map.get(review.recommendation, ""))
|
| 151 |
+
lines.append("")
|
| 152 |
+
|
| 153 |
+
# Add executive summary
|
| 154 |
+
lines.extend([
|
| 155 |
+
"### Executive Summary",
|
| 156 |
+
"",
|
| 157 |
+
review.executive_summary,
|
| 158 |
+
"",
|
| 159 |
+
])
|
| 160 |
+
|
| 161 |
+
# Add detailed findings (so all info is visible even if inline comments fail)
|
| 162 |
+
if review.findings:
|
| 163 |
+
lines.append("### Detailed Findings")
|
| 164 |
+
lines.append("")
|
| 165 |
+
for i, finding in enumerate(review.findings, 1):
|
| 166 |
+
severity_emoji = SEVERITY_EMOJI.get(finding.severity, "")
|
| 167 |
+
agent_emoji = AGENT_EMOJI.get(finding.agent, "")
|
| 168 |
+
lines.append(
|
| 169 |
+
f"<details>\n"
|
| 170 |
+
f"<summary>{severity_emoji} <b>[{finding.severity.upper()}]</b> "
|
| 171 |
+
f"{finding.title} — <code>{finding.file_path}:{finding.line_start}</code></summary>\n\n"
|
| 172 |
+
f"{finding.description}\n"
|
| 173 |
+
)
|
| 174 |
+
if finding.suggested_fix:
|
| 175 |
+
lines.append(f"**Suggested fix:**\n```\n{finding.suggested_fix}\n```\n")
|
| 176 |
+
footer_parts = [f"{agent_emoji} {finding.agent.capitalize()}"]
|
| 177 |
+
if finding.cwe_id:
|
| 178 |
+
cwe_num = finding.cwe_id.split("-")[-1] if "-" in finding.cwe_id else ""
|
| 179 |
+
footer_parts.append(f"[{finding.cwe_id}](https://cwe.mitre.org/data/definitions/{cwe_num}.html)")
|
| 180 |
+
footer_parts.append(f"Confidence: {finding.confidence:.2f}")
|
| 181 |
+
lines.append(f"> {' · '.join(footer_parts)}\n")
|
| 182 |
+
lines.append("</details>\n")
|
| 183 |
+
|
| 184 |
+
lines.extend([
|
| 185 |
+
"---",
|
| 186 |
+
"*Reviewed by [Ninja Code Guard](https://github.com/ninjacode911/ninja-code-guard) — Multi-agent code review*",
|
| 187 |
+
])
|
| 188 |
+
|
| 189 |
+
return "\n".join(lines)
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def findings_to_review_comments(findings: list[Finding]) -> list[dict]:
|
| 193 |
+
"""
|
| 194 |
+
Convert a list of Findings into GitHub review comment dicts.
|
| 195 |
+
|
| 196 |
+
Each dict has the structure that GitHub's Create Review API expects:
|
| 197 |
+
- path: the file path relative to repo root
|
| 198 |
+
- line: the line number in the NEW version of the file
|
| 199 |
+
- body: the formatted Markdown comment
|
| 200 |
+
|
| 201 |
+
Note: GitHub requires `line` to be within the diff hunk. If a finding
|
| 202 |
+
references a line outside the diff, we skip it (GitHub API would reject it).
|
| 203 |
+
We use `line` (not `position`) because position-based comments are deprecated.
|
| 204 |
+
"""
|
| 205 |
+
comments = []
|
| 206 |
+
for finding in findings:
|
| 207 |
+
comment = {
|
| 208 |
+
"path": finding.file_path,
|
| 209 |
+
"line": finding.line_start,
|
| 210 |
+
"side": "RIGHT", # RIGHT = new version of the file (what the PR introduces)
|
| 211 |
+
"body": format_inline_comment(finding),
|
| 212 |
+
}
|
| 213 |
+
comments.append(comment)
|
| 214 |
+
|
| 215 |
+
return comments
|
app/github/webhook.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
GitHub Webhook Signature Validation
|
| 3 |
+
====================================
|
| 4 |
+
|
| 5 |
+
When GitHub sends a webhook event to our server, it includes a cryptographic
|
| 6 |
+
signature in the `X-Hub-Signature-256` header. This signature proves the request
|
| 7 |
+
genuinely came from GitHub, not from an attacker.
|
| 8 |
+
|
| 9 |
+
The signature is computed as: HMAC-SHA256(webhook_secret, request_body)
|
| 10 |
+
|
| 11 |
+
We recompute the same HMAC on our side and compare. If they match, the request
|
| 12 |
+
is authentic. We use `hmac.compare_digest()` for constant-time comparison to
|
| 13 |
+
prevent timing attacks — where an attacker measures response time differences
|
| 14 |
+
to guess the signature byte by byte.
|
| 15 |
+
|
| 16 |
+
Reference: https://docs.github.com/en/webhooks/using-webhooks/validating-webhook-deliveries
|
| 17 |
+
"""
|
| 18 |
+
|
| 19 |
+
import hashlib
|
| 20 |
+
import hmac
|
| 21 |
+
|
| 22 |
+
from fastapi import Header, HTTPException, Request
|
| 23 |
+
|
| 24 |
+
from app.config import settings
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
async def validate_webhook_signature(
|
| 28 |
+
request: Request,
|
| 29 |
+
x_hub_signature_256: str = Header(..., alias="X-Hub-Signature-256"),
|
| 30 |
+
) -> bytes:
|
| 31 |
+
"""
|
| 32 |
+
FastAPI dependency that validates the GitHub webhook HMAC-SHA256 signature.
|
| 33 |
+
|
| 34 |
+
How this works as a FastAPI dependency:
|
| 35 |
+
- FastAPI's dependency injection system calls this function before your endpoint runs
|
| 36 |
+
- It automatically extracts the X-Hub-Signature-256 header from the request
|
| 37 |
+
- If validation fails, it raises HTTPException and the endpoint never executes
|
| 38 |
+
- If it passes, it returns the raw request body for further processing
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
request: The incoming FastAPI request object (injected automatically)
|
| 42 |
+
x_hub_signature_256: The signature header from GitHub (extracted by FastAPI)
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
The raw request body bytes (so the endpoint can parse it as JSON)
|
| 46 |
+
|
| 47 |
+
Raises:
|
| 48 |
+
HTTPException 401: If the signature is missing or invalid
|
| 49 |
+
"""
|
| 50 |
+
# Read the raw request body — we need the exact bytes GitHub used to compute the HMAC.
|
| 51 |
+
# Important: we read raw bytes, NOT parsed JSON, because even a single whitespace
|
| 52 |
+
# difference would produce a completely different HMAC hash.
|
| 53 |
+
body = await request.body()
|
| 54 |
+
|
| 55 |
+
# Reject if webhook secret is not configured — empty secret = no security
|
| 56 |
+
if not settings.github_webhook_secret:
|
| 57 |
+
raise HTTPException(status_code=500, detail="Webhook secret not configured")
|
| 58 |
+
|
| 59 |
+
if not x_hub_signature_256:
|
| 60 |
+
raise HTTPException(status_code=401, detail="Missing webhook signature header")
|
| 61 |
+
|
| 62 |
+
# GitHub sends the signature as "sha256=<hex_digest>"
|
| 63 |
+
# We need to strip the "sha256=" prefix to get just the hex digest
|
| 64 |
+
if not x_hub_signature_256.startswith("sha256="):
|
| 65 |
+
raise HTTPException(status_code=401, detail="Invalid signature format")
|
| 66 |
+
|
| 67 |
+
received_signature = x_hub_signature_256[7:] # Strip "sha256=" prefix
|
| 68 |
+
|
| 69 |
+
# Compute the expected HMAC using our stored webhook secret
|
| 70 |
+
# hmac.new() takes: key (bytes), message (bytes), hash algorithm
|
| 71 |
+
expected_signature = hmac.new(
|
| 72 |
+
key=settings.github_webhook_secret.encode("utf-8"),
|
| 73 |
+
msg=body,
|
| 74 |
+
digestmod=hashlib.sha256,
|
| 75 |
+
).hexdigest()
|
| 76 |
+
|
| 77 |
+
# Constant-time comparison — this is critical for security.
|
| 78 |
+
# A naive `==` comparison short-circuits on the first different byte,
|
| 79 |
+
# which leaks timing information. compare_digest() always takes the
|
| 80 |
+
# same amount of time regardless of where the mismatch is.
|
| 81 |
+
if not hmac.compare_digest(expected_signature, received_signature):
|
| 82 |
+
raise HTTPException(status_code=401, detail="Invalid webhook signature")
|
| 83 |
+
|
| 84 |
+
return body
|
app/main.py
ADDED
|
@@ -0,0 +1,355 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Ninja Code Guard — FastAPI Application Entry Point
|
| 3 |
+
=============================================
|
| 4 |
+
|
| 5 |
+
This is the main entry point for the Ninja Code Guard backend. It sets up:
|
| 6 |
+
|
| 7 |
+
1. The FastAPI application with CORS middleware
|
| 8 |
+
2. The /health endpoint (used by Render health checks and the pre-warm cron)
|
| 9 |
+
3. The /webhook/github endpoint (receives PR events from GitHub)
|
| 10 |
+
|
| 11 |
+
Request lifecycle for a PR review:
|
| 12 |
+
GitHub webhook → HMAC validation → Redis cache check → fetch PR data
|
| 13 |
+
→ (Week 3+: run agents) → post review comments → cache result
|
| 14 |
+
|
| 15 |
+
The webhook handler uses FastAPI's "Background Tasks" feature to process
|
| 16 |
+
the review asynchronously. This means we return 200 to GitHub immediately
|
| 17 |
+
(within their 10-second timeout) and do the heavy lifting in the background.
|
| 18 |
+
Without this, GitHub would retry the webhook if we took too long.
|
| 19 |
+
"""
|
| 20 |
+
|
| 21 |
+
import asyncio
|
| 22 |
+
import json
|
| 23 |
+
import traceback
|
| 24 |
+
|
| 25 |
+
from fastapi import (
|
| 26 |
+
BackgroundTasks, Depends, FastAPI, Header, HTTPException,
|
| 27 |
+
Request, Response, Security,
|
| 28 |
+
)
|
| 29 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 30 |
+
from fastapi.security import APIKeyHeader
|
| 31 |
+
import structlog
|
| 32 |
+
|
| 33 |
+
from app.config import settings
|
| 34 |
+
|
| 35 |
+
# ── API Key auth for dashboard endpoints ──────────────────────────────────
|
| 36 |
+
_api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
async def verify_api_key(api_key: str = Security(_api_key_header)):
|
| 40 |
+
"""Reject dashboard API requests that don't carry a valid API key."""
|
| 41 |
+
if not settings.dashboard_api_key:
|
| 42 |
+
return # No key configured → allow (dev mode)
|
| 43 |
+
if api_key != settings.dashboard_api_key:
|
| 44 |
+
raise HTTPException(status_code=403, detail="Invalid or missing API key")
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
from app.agents.performance_agent import PerformanceAgent
|
| 48 |
+
from app.agents.security_agent import SecurityAgent
|
| 49 |
+
from app.agents.style_agent import StyleAgent
|
| 50 |
+
from app.agents.synthesizer import synthesize
|
| 51 |
+
from app.context.indexer import index_repo_files
|
| 52 |
+
from app.context.retriever import retrieve_context
|
| 53 |
+
from app.db.postgres import save_review
|
| 54 |
+
from app.db.redis_cache import is_already_reviewed, mark_as_reviewed
|
| 55 |
+
from app.github.client import GitHubClient
|
| 56 |
+
from app.github.comment_formatter import (
|
| 57 |
+
findings_to_review_comments,
|
| 58 |
+
format_inline_comment,
|
| 59 |
+
format_summary_comment,
|
| 60 |
+
)
|
| 61 |
+
from app.github.webhook import validate_webhook_signature
|
| 62 |
+
|
| 63 |
+
logger = structlog.get_logger()
|
| 64 |
+
|
| 65 |
+
_is_production = settings.environment == "production"
|
| 66 |
+
|
| 67 |
+
app = FastAPI(
|
| 68 |
+
title="Ninja Code Guard",
|
| 69 |
+
description="Multi-agent PR review system",
|
| 70 |
+
version="0.1.0",
|
| 71 |
+
# Disable auto-generated docs in production (exposes API schema)
|
| 72 |
+
docs_url=None if _is_production else "/docs",
|
| 73 |
+
redoc_url=None if _is_production else "/redoc",
|
| 74 |
+
openapi_url=None if _is_production else "/openapi.json",
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
# CORS middleware allows the Next.js dashboard (on Vercel) to call our API.
|
| 78 |
+
# In production, restrict origins to your actual Vercel domain.
|
| 79 |
+
_allowed_origins = (
|
| 80 |
+
[o.strip() for o in settings.cors_allowed_origins.split(",") if o.strip()]
|
| 81 |
+
if settings.cors_allowed_origins
|
| 82 |
+
else ["http://localhost:3000"]
|
| 83 |
+
)
|
| 84 |
+
|
| 85 |
+
app.add_middleware(
|
| 86 |
+
CORSMiddleware,
|
| 87 |
+
allow_origins=_allowed_origins,
|
| 88 |
+
allow_credentials=True,
|
| 89 |
+
allow_methods=["GET", "POST"],
|
| 90 |
+
allow_headers=["Content-Type", "X-API-Key", "X-GitHub-Event", "X-Hub-Signature-256"],
|
| 91 |
+
)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
@app.get("/health")
|
| 95 |
+
async def health_check():
|
| 96 |
+
"""
|
| 97 |
+
Health check endpoint.
|
| 98 |
+
|
| 99 |
+
Used by:
|
| 100 |
+
- Render.com to verify the service is running (healthCheckPath in render.yaml)
|
| 101 |
+
- The GitHub Actions pre-warm cron to keep the service from going cold
|
| 102 |
+
- Our Next.js dashboard to show service status
|
| 103 |
+
"""
|
| 104 |
+
return {"status": "ok", "service": "Ninja Code Guard"}
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
# --- Dashboard API Endpoints ---
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
@app.get("/api/repos/{owner}/{repo}/reviews")
|
| 111 |
+
async def get_reviews(owner: str, repo: str, _=Depends(verify_api_key)):
|
| 112 |
+
"""Get recent PR reviews for a repo (used by dashboard)."""
|
| 113 |
+
from app.db.postgres import get_repo_reviews
|
| 114 |
+
repo_full_name = f"{owner}/{repo}"
|
| 115 |
+
reviews = await get_repo_reviews(repo_full_name)
|
| 116 |
+
return {"repo": repo_full_name, "reviews": reviews}
|
| 117 |
+
|
| 118 |
+
|
| 119 |
+
@app.get("/api/repos/{owner}/{repo}/stats")
|
| 120 |
+
async def get_stats(owner: str, repo: str, _=Depends(verify_api_key)):
|
| 121 |
+
"""Get aggregate stats for a repo (used by dashboard)."""
|
| 122 |
+
from app.db.postgres import get_repo_reviews
|
| 123 |
+
repo_full_name = f"{owner}/{repo}"
|
| 124 |
+
reviews = await get_repo_reviews(repo_full_name, limit=50)
|
| 125 |
+
if not reviews:
|
| 126 |
+
return {"repo": repo_full_name, "total_reviews": 0, "avg_health_score": 0}
|
| 127 |
+
avg_score = sum(r.get("health_score", 0) for r in reviews) / len(reviews)
|
| 128 |
+
return {
|
| 129 |
+
"repo": repo_full_name,
|
| 130 |
+
"total_reviews": len(reviews),
|
| 131 |
+
"avg_health_score": round(avg_score),
|
| 132 |
+
"reviews": reviews[:10],
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# --- Webhook Actions (what to do for each event type) ---
|
| 137 |
+
|
| 138 |
+
# We only process these PR actions. Others (labeled, assigned, etc.) are irrelevant.
|
| 139 |
+
RELEVANT_PR_ACTIONS = {"opened", "synchronize", "reopened", "ready_for_review"}
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
async def _process_pr_review(
|
| 143 |
+
repo_full_name: str,
|
| 144 |
+
pr_number: int,
|
| 145 |
+
commit_sha: str,
|
| 146 |
+
installation_id: int,
|
| 147 |
+
) -> None:
|
| 148 |
+
"""
|
| 149 |
+
Background task: fetch PR data and post a review.
|
| 150 |
+
|
| 151 |
+
Pipeline:
|
| 152 |
+
1. Fetch PR diff and file contents from GitHub
|
| 153 |
+
2. Index files into ChromaDB for RAG context
|
| 154 |
+
3. Run 3 domain agents IN PARALLEL (asyncio.gather)
|
| 155 |
+
4. Merge all findings and compute health score
|
| 156 |
+
5. Post review to GitHub
|
| 157 |
+
6. Cache result in Redis
|
| 158 |
+
"""
|
| 159 |
+
try:
|
| 160 |
+
logger.info(
|
| 161 |
+
"Starting PR review",
|
| 162 |
+
repo=repo_full_name,
|
| 163 |
+
pr=pr_number,
|
| 164 |
+
sha=commit_sha[:8],
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
# Step 1: Fetch PR data
|
| 168 |
+
client = GitHubClient(installation_id)
|
| 169 |
+
pr_data = await client.fetch_pr_data(repo_full_name, pr_number)
|
| 170 |
+
|
| 171 |
+
# Step 2: Index files for RAG context
|
| 172 |
+
# This embeds the file contents into ChromaDB so agents can
|
| 173 |
+
# semantically search for related code across the repo
|
| 174 |
+
rag_context = ""
|
| 175 |
+
try:
|
| 176 |
+
collection_name = await index_repo_files(
|
| 177 |
+
repo_full_name, pr_data.file_contents
|
| 178 |
+
)
|
| 179 |
+
rag_context = await retrieve_context(
|
| 180 |
+
collection_name, pr_data.diff[:5000]
|
| 181 |
+
)
|
| 182 |
+
except Exception as rag_err:
|
| 183 |
+
logger.warning("RAG context unavailable", error=str(rag_err))
|
| 184 |
+
|
| 185 |
+
# Step 3: Run all 3 domain agents IN PARALLEL
|
| 186 |
+
# asyncio.gather() runs all three concurrently — total latency is
|
| 187 |
+
# max(agent_latencies) instead of sum(agent_latencies).
|
| 188 |
+
# With Groq at 500+ tokens/sec, each agent takes 2-5 seconds.
|
| 189 |
+
# Parallel: ~5 seconds total. Sequential: ~15 seconds.
|
| 190 |
+
security_agent = SecurityAgent()
|
| 191 |
+
performance_agent = PerformanceAgent()
|
| 192 |
+
style_agent = StyleAgent()
|
| 193 |
+
|
| 194 |
+
security_findings, performance_findings, style_findings = await asyncio.gather(
|
| 195 |
+
security_agent.review(pr_data, rag_context),
|
| 196 |
+
performance_agent.review(pr_data, rag_context),
|
| 197 |
+
style_agent.review(pr_data, rag_context),
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
logger.info(
|
| 201 |
+
"All agents completed",
|
| 202 |
+
security=len(security_findings),
|
| 203 |
+
performance=len(performance_findings),
|
| 204 |
+
style=len(style_findings),
|
| 205 |
+
total=len(security_findings) + len(performance_findings) + len(style_findings),
|
| 206 |
+
repo=repo_full_name,
|
| 207 |
+
pr=pr_number,
|
| 208 |
+
)
|
| 209 |
+
|
| 210 |
+
# Step 4: Synthesize — deduplicate, rank, score, summarize
|
| 211 |
+
review = synthesize(security_findings, performance_findings, style_findings)
|
| 212 |
+
|
| 213 |
+
# Post the review to GitHub
|
| 214 |
+
if review.findings:
|
| 215 |
+
# Post inline comments anchored to specific lines
|
| 216 |
+
review_comments = findings_to_review_comments(review.findings)
|
| 217 |
+
try:
|
| 218 |
+
await client.post_review(
|
| 219 |
+
repo_full_name,
|
| 220 |
+
pr_number,
|
| 221 |
+
commit_sha,
|
| 222 |
+
body=format_summary_comment(review),
|
| 223 |
+
comments=review_comments,
|
| 224 |
+
)
|
| 225 |
+
except Exception as review_err:
|
| 226 |
+
# If inline comments fail (e.g., line not in diff), fall back to summary only
|
| 227 |
+
logger.warning(
|
| 228 |
+
"Inline review failed, posting summary comment instead",
|
| 229 |
+
error=str(review_err),
|
| 230 |
+
)
|
| 231 |
+
await client.post_comment(
|
| 232 |
+
repo_full_name, pr_number, format_summary_comment(review)
|
| 233 |
+
)
|
| 234 |
+
else:
|
| 235 |
+
# No findings — post a clean bill of health
|
| 236 |
+
await client.post_comment(
|
| 237 |
+
repo_full_name,
|
| 238 |
+
pr_number,
|
| 239 |
+
format_summary_comment(review),
|
| 240 |
+
)
|
| 241 |
+
|
| 242 |
+
# Save to Neon Postgres (for dashboard)
|
| 243 |
+
await save_review(repo_full_name, pr_number, commit_sha, review)
|
| 244 |
+
|
| 245 |
+
# Mark this commit as reviewed in Redis cache
|
| 246 |
+
await mark_as_reviewed(commit_sha)
|
| 247 |
+
|
| 248 |
+
logger.info(
|
| 249 |
+
"PR review completed",
|
| 250 |
+
repo=repo_full_name,
|
| 251 |
+
pr=pr_number,
|
| 252 |
+
sha=commit_sha[:8],
|
| 253 |
+
)
|
| 254 |
+
|
| 255 |
+
except Exception as e:
|
| 256 |
+
# Log the full traceback so we can debug failures
|
| 257 |
+
logger.error(
|
| 258 |
+
"PR review failed",
|
| 259 |
+
repo=repo_full_name,
|
| 260 |
+
pr=pr_number,
|
| 261 |
+
error=str(e),
|
| 262 |
+
traceback=traceback.format_exc(),
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
@app.post("/webhook/github")
|
| 267 |
+
async def webhook_github(
|
| 268 |
+
request: Request,
|
| 269 |
+
background_tasks: BackgroundTasks,
|
| 270 |
+
x_github_event: str = Header(..., alias="X-GitHub-Event"),
|
| 271 |
+
body: bytes = Depends(validate_webhook_signature),
|
| 272 |
+
):
|
| 273 |
+
"""
|
| 274 |
+
Receive and process GitHub webhook events.
|
| 275 |
+
|
| 276 |
+
This endpoint is called by GitHub whenever a PR event occurs on repos
|
| 277 |
+
where Ninja Code Guard is installed.
|
| 278 |
+
|
| 279 |
+
How the flow works:
|
| 280 |
+
1. FastAPI calls validate_webhook_signature() BEFORE this function runs
|
| 281 |
+
(it's a Depends() dependency). If HMAC validation fails, we never get here.
|
| 282 |
+
2. We parse the validated payload and check if it's a relevant event.
|
| 283 |
+
3. If it's a PR event we care about, we check Redis cache.
|
| 284 |
+
4. If not cached, we enqueue the review as a background task.
|
| 285 |
+
5. We return 200 immediately — GitHub expects a response within 10 seconds.
|
| 286 |
+
|
| 287 |
+
Why background tasks?
|
| 288 |
+
- GitHub has a 10-second webhook timeout. If we don't respond in time,
|
| 289 |
+
GitHub marks the delivery as failed and may retry (causing duplicates).
|
| 290 |
+
- Our actual review takes 15-20 seconds (agent calls + synthesis).
|
| 291 |
+
- So we acknowledge receipt immediately and process in the background.
|
| 292 |
+
|
| 293 |
+
Args:
|
| 294 |
+
request: The FastAPI request object
|
| 295 |
+
background_tasks: FastAPI's background task queue
|
| 296 |
+
x_github_event: The event type header (e.g., "pull_request")
|
| 297 |
+
body: The validated request body (returned by validate_webhook_signature)
|
| 298 |
+
"""
|
| 299 |
+
# Parse the validated JSON payload
|
| 300 |
+
payload = json.loads(body)
|
| 301 |
+
|
| 302 |
+
# We only handle pull_request events for now
|
| 303 |
+
if x_github_event != "pull_request":
|
| 304 |
+
logger.debug("Ignoring non-PR event", github_event=x_github_event)
|
| 305 |
+
return {"status": "ignored", "reason": f"event type: {x_github_event}"}
|
| 306 |
+
|
| 307 |
+
action = payload.get("action", "")
|
| 308 |
+
if action not in RELEVANT_PR_ACTIONS:
|
| 309 |
+
logger.debug("Ignoring irrelevant PR action", action=action)
|
| 310 |
+
return {"status": "ignored", "reason": f"action: {action}"}
|
| 311 |
+
|
| 312 |
+
# Extract key data from the webhook payload
|
| 313 |
+
pr = payload["pull_request"]
|
| 314 |
+
repo_full_name = payload["repository"]["full_name"]
|
| 315 |
+
pr_number = payload["number"]
|
| 316 |
+
commit_sha = pr["head"]["sha"]
|
| 317 |
+
|
| 318 |
+
# Skip draft PRs — they're not ready for review
|
| 319 |
+
if pr.get("draft", False):
|
| 320 |
+
logger.info("Skipping draft PR", repo=repo_full_name, pr=pr_number)
|
| 321 |
+
return {"status": "ignored", "reason": "draft PR"}
|
| 322 |
+
|
| 323 |
+
# Check Redis cache — have we already reviewed this exact commit?
|
| 324 |
+
if await is_already_reviewed(commit_sha):
|
| 325 |
+
return {"status": "skipped", "reason": "already reviewed", "sha": commit_sha[:8]}
|
| 326 |
+
|
| 327 |
+
# Get the installation ID (needed for GitHub App authentication)
|
| 328 |
+
installation_id = payload.get("installation", {}).get("id")
|
| 329 |
+
if not installation_id:
|
| 330 |
+
logger.error("No installation ID in webhook payload")
|
| 331 |
+
return Response(status_code=400, content="Missing installation ID")
|
| 332 |
+
|
| 333 |
+
# Enqueue the review as a background task
|
| 334 |
+
# This returns 200 to GitHub immediately while processing continues
|
| 335 |
+
background_tasks.add_task(
|
| 336 |
+
_process_pr_review,
|
| 337 |
+
repo_full_name=repo_full_name,
|
| 338 |
+
pr_number=pr_number,
|
| 339 |
+
commit_sha=commit_sha,
|
| 340 |
+
installation_id=installation_id,
|
| 341 |
+
)
|
| 342 |
+
|
| 343 |
+
logger.info(
|
| 344 |
+
"Webhook received — review enqueued",
|
| 345 |
+
repo=repo_full_name,
|
| 346 |
+
pr=pr_number,
|
| 347 |
+
sha=commit_sha[:8],
|
| 348 |
+
action=action,
|
| 349 |
+
)
|
| 350 |
+
|
| 351 |
+
return {
|
| 352 |
+
"status": "accepted",
|
| 353 |
+
"pr": pr_number,
|
| 354 |
+
"sha": commit_sha[:8],
|
| 355 |
+
}
|
app/models/__init__.py
ADDED
|
File without changes
|
app/models/findings.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Core data models for agent findings and PR reviews."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Literal, Optional
|
| 6 |
+
from uuid import UUID, uuid4
|
| 7 |
+
|
| 8 |
+
from pydantic import BaseModel, Field
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
class Finding(BaseModel):
|
| 12 |
+
"""A single finding produced by a domain agent."""
|
| 13 |
+
|
| 14 |
+
agent: Literal["security", "performance", "style"]
|
| 15 |
+
file_path: str
|
| 16 |
+
line_start: int
|
| 17 |
+
line_end: int
|
| 18 |
+
severity: Literal["critical", "high", "medium", "low"]
|
| 19 |
+
category: str
|
| 20 |
+
title: str
|
| 21 |
+
description: str
|
| 22 |
+
suggested_fix: str = ""
|
| 23 |
+
cwe_id: Optional[str] = None
|
| 24 |
+
confidence: float = Field(ge=0.0, le=1.0)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class SynthesizedReview(BaseModel):
|
| 28 |
+
"""Final synthesized review output from the Synthesizer Agent."""
|
| 29 |
+
|
| 30 |
+
health_score: int = Field(ge=0, le=100)
|
| 31 |
+
executive_summary: str
|
| 32 |
+
recommendation: Literal["approve", "request_changes", "block"]
|
| 33 |
+
findings: list[Finding]
|
| 34 |
+
critical_count: int = 0
|
| 35 |
+
high_count: int = 0
|
| 36 |
+
medium_count: int = 0
|
| 37 |
+
low_count: int = 0
|
| 38 |
+
duration_ms: int = 0
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
class PRReviewRecord(BaseModel):
|
| 42 |
+
"""Database record for a completed PR review."""
|
| 43 |
+
|
| 44 |
+
id: UUID = Field(default_factory=uuid4)
|
| 45 |
+
repo_full_name: str
|
| 46 |
+
pr_number: int
|
| 47 |
+
commit_sha: str
|
| 48 |
+
health_score: int = Field(ge=0, le=100)
|
| 49 |
+
critical_count: int = 0
|
| 50 |
+
high_count: int = 0
|
| 51 |
+
medium_count: int = 0
|
| 52 |
+
low_count: int = 0
|
| 53 |
+
summary: str = ""
|
| 54 |
+
findings: list[Finding] = []
|
| 55 |
+
duration_ms: int = 0
|
app/models/webhook_payloads.py
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""GitHub webhook event payload schemas."""
|
| 2 |
+
|
| 3 |
+
from __future__ import annotations
|
| 4 |
+
|
| 5 |
+
from typing import Optional
|
| 6 |
+
|
| 7 |
+
from pydantic import BaseModel
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class GitHubUser(BaseModel):
|
| 11 |
+
login: str
|
| 12 |
+
id: int
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class GitHubRepo(BaseModel):
|
| 16 |
+
id: int
|
| 17 |
+
full_name: str
|
| 18 |
+
private: bool
|
| 19 |
+
default_branch: str = "main"
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class PullRequestHead(BaseModel):
|
| 23 |
+
sha: str
|
| 24 |
+
ref: str
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class PullRequest(BaseModel):
|
| 28 |
+
number: int
|
| 29 |
+
title: str
|
| 30 |
+
state: str
|
| 31 |
+
head: PullRequestHead
|
| 32 |
+
draft: bool = False
|
| 33 |
+
changed_files: Optional[int] = None
|
| 34 |
+
additions: Optional[int] = None
|
| 35 |
+
deletions: Optional[int] = None
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class PullRequestEvent(BaseModel):
|
| 39 |
+
"""GitHub pull_request webhook event."""
|
| 40 |
+
|
| 41 |
+
action: str # opened, synchronize, reopened, ready_for_review
|
| 42 |
+
number: int
|
| 43 |
+
pull_request: PullRequest
|
| 44 |
+
repository: GitHubRepo
|
| 45 |
+
sender: GitHubUser
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
class Installation(BaseModel):
|
| 49 |
+
id: int
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class PullRequestEventWithInstallation(PullRequestEvent):
|
| 53 |
+
"""Pull request event with GitHub App installation context."""
|
| 54 |
+
|
| 55 |
+
installation: Optional[Installation] = None
|
app/services/__init__.py
ADDED
|
File without changes
|
app/services/health_score.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PR Health Score Calculator
|
| 3 |
+
===========================
|
| 4 |
+
|
| 5 |
+
Computes a 0-100 health score for a PR based on finding density and severity.
|
| 6 |
+
|
| 7 |
+
Formula:
|
| 8 |
+
base_score = 100
|
| 9 |
+
penalty = sum(SEVERITY_WEIGHTS[f.severity] * CONFIDENCE_FACTOR(f.confidence) for f in findings)
|
| 10 |
+
health_score = max(0, min(100, base_score - penalty))
|
| 11 |
+
|
| 12 |
+
Severity weights are calibrated so that:
|
| 13 |
+
- 1 critical finding drops the score by 25 points (one critical = action required)
|
| 14 |
+
- 1 high finding drops by 15 points
|
| 15 |
+
- 1 medium finding drops by 7 points
|
| 16 |
+
- 1 low finding drops by 2 points
|
| 17 |
+
|
| 18 |
+
Confidence factor scales the penalty — a finding with 0.5 confidence penalizes
|
| 19 |
+
half as much as one with 1.0 confidence. This rewards agents for being honest
|
| 20 |
+
about uncertainty.
|
| 21 |
+
|
| 22 |
+
Score interpretation:
|
| 23 |
+
90-100: Excellent — safe to merge
|
| 24 |
+
70-89: Good — minor issues, merge at discretion
|
| 25 |
+
50-69: Needs attention — address before merging
|
| 26 |
+
30-49: Poor — significant issues found
|
| 27 |
+
0-29: Critical — do not merge
|
| 28 |
+
"""
|
| 29 |
+
|
| 30 |
+
from __future__ import annotations
|
| 31 |
+
|
| 32 |
+
from app.models.findings import Finding
|
| 33 |
+
|
| 34 |
+
SEVERITY_WEIGHTS = {
|
| 35 |
+
"critical": 25,
|
| 36 |
+
"high": 15,
|
| 37 |
+
"medium": 7,
|
| 38 |
+
"low": 2,
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
def calculate_health_score(findings: list[Finding]) -> int:
|
| 43 |
+
"""
|
| 44 |
+
Calculate the PR Health Score from 0-100.
|
| 45 |
+
|
| 46 |
+
Higher confidence findings penalize more heavily. This incentivizes
|
| 47 |
+
agents to set confidence honestly — flagging everything as 1.0
|
| 48 |
+
confidence would over-penalize, while honest 0.6 confidence
|
| 49 |
+
for uncertain findings results in fairer scores.
|
| 50 |
+
"""
|
| 51 |
+
if not findings:
|
| 52 |
+
return 100
|
| 53 |
+
|
| 54 |
+
total_penalty = 0.0
|
| 55 |
+
for finding in findings:
|
| 56 |
+
weight = SEVERITY_WEIGHTS.get(finding.severity, 5)
|
| 57 |
+
confidence_factor = max(0.3, finding.confidence) # Minimum 0.3 floor
|
| 58 |
+
total_penalty += weight * confidence_factor
|
| 59 |
+
|
| 60 |
+
score = 100 - total_penalty
|
| 61 |
+
return max(0, min(100, round(score)))
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def determine_recommendation(
|
| 65 |
+
findings: list[Finding], health_score: int
|
| 66 |
+
) -> str:
|
| 67 |
+
"""
|
| 68 |
+
Determine the PR recommendation based on findings and score.
|
| 69 |
+
|
| 70 |
+
Logic:
|
| 71 |
+
- Any critical finding → block (regardless of score)
|
| 72 |
+
- Score < 50 → request_changes
|
| 73 |
+
- Score < 70 with high findings → request_changes
|
| 74 |
+
- Otherwise → approve
|
| 75 |
+
"""
|
| 76 |
+
has_critical = any(f.severity == "critical" for f in findings)
|
| 77 |
+
has_high = any(f.severity == "high" for f in findings)
|
| 78 |
+
|
| 79 |
+
if has_critical:
|
| 80 |
+
return "block"
|
| 81 |
+
if health_score < 50:
|
| 82 |
+
return "request_changes"
|
| 83 |
+
if health_score < 70 and has_high:
|
| 84 |
+
return "request_changes"
|
| 85 |
+
return "approve"
|
app/tools/__init__.py
ADDED
|
File without changes
|
app/tools/bandit_tool.py
ADDED
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Bandit Static Analysis Tool
|
| 3 |
+
=============================
|
| 4 |
+
|
| 5 |
+
Bandit is an open-source Python security linter. It parses Python code into an
|
| 6 |
+
Abstract Syntax Tree (AST) and checks each node against a set of security rules.
|
| 7 |
+
|
| 8 |
+
What Bandit catches:
|
| 9 |
+
- SQL injection patterns (string formatting in SQL calls)
|
| 10 |
+
- Use of eval(), exec(), os.system() (command injection risk)
|
| 11 |
+
- Hardcoded passwords and bind addresses
|
| 12 |
+
- Use of insecure hash functions (MD5, SHA1)
|
| 13 |
+
- Insecure temp file creation
|
| 14 |
+
- SSL/TLS verification disabled (requests.get(verify=False))
|
| 15 |
+
- Use of pickle (deserialization attacks)
|
| 16 |
+
|
| 17 |
+
What Bandit CANNOT catch:
|
| 18 |
+
- Business logic flaws
|
| 19 |
+
- Missing authentication/authorization
|
| 20 |
+
- Cross-file data flow (it analyzes one file at a time)
|
| 21 |
+
- Vulnerabilities in non-Python code
|
| 22 |
+
|
| 23 |
+
That's why we combine Bandit (mechanical pattern matching) with the LLM (semantic
|
| 24 |
+
understanding). Bandit provides high-confidence, low-noise signals that anchor the
|
| 25 |
+
LLM's analysis.
|
| 26 |
+
|
| 27 |
+
How it works:
|
| 28 |
+
1. We write the changed Python files to a temp directory
|
| 29 |
+
2. Run `bandit -r <dir> -f json` as a subprocess
|
| 30 |
+
3. Parse the JSON output into a human-readable summary
|
| 31 |
+
4. Feed this summary into the LLM's prompt as additional context
|
| 32 |
+
"""
|
| 33 |
+
|
| 34 |
+
from __future__ import annotations
|
| 35 |
+
|
| 36 |
+
import json
|
| 37 |
+
import subprocess
|
| 38 |
+
import tempfile
|
| 39 |
+
from pathlib import Path
|
| 40 |
+
|
| 41 |
+
import structlog
|
| 42 |
+
|
| 43 |
+
logger = structlog.get_logger()
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
async def run_bandit(file_contents: dict[str, str]) -> str:
|
| 47 |
+
"""
|
| 48 |
+
Run Bandit security analysis on Python files.
|
| 49 |
+
|
| 50 |
+
Args:
|
| 51 |
+
file_contents: dict of {filepath: source_code} for changed files
|
| 52 |
+
|
| 53 |
+
Returns:
|
| 54 |
+
A formatted string summarizing Bandit's findings, suitable for
|
| 55 |
+
including in an LLM prompt. Returns empty string if no Python
|
| 56 |
+
files or no findings.
|
| 57 |
+
"""
|
| 58 |
+
# Filter to only Python files — Bandit only understands Python
|
| 59 |
+
python_files = {
|
| 60 |
+
path: content
|
| 61 |
+
for path, content in file_contents.items()
|
| 62 |
+
if path.endswith(".py")
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
if not python_files:
|
| 66 |
+
return ""
|
| 67 |
+
|
| 68 |
+
try:
|
| 69 |
+
# Create a temp directory and write the Python files there.
|
| 70 |
+
# We need files on disk because Bandit operates on the filesystem.
|
| 71 |
+
# tempfile.mkdtemp() creates a secure temp dir that only we can access.
|
| 72 |
+
with tempfile.TemporaryDirectory(prefix="ninjacg_bandit_") as tmpdir:
|
| 73 |
+
tmpdir_path = Path(tmpdir)
|
| 74 |
+
|
| 75 |
+
for filepath, content in python_files.items():
|
| 76 |
+
# Recreate the directory structure (e.g., src/auth/login.py)
|
| 77 |
+
file_path = tmpdir_path / filepath
|
| 78 |
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
| 79 |
+
file_path.write_text(content, encoding="utf-8")
|
| 80 |
+
|
| 81 |
+
# Run Bandit as a subprocess
|
| 82 |
+
# -r: recursive (scan all files in directory)
|
| 83 |
+
# -f json: output as JSON (machine-parseable)
|
| 84 |
+
# -ll: only report medium severity and above
|
| 85 |
+
# --quiet: suppress progress bar
|
| 86 |
+
result = subprocess.run(
|
| 87 |
+
[
|
| 88 |
+
"bandit",
|
| 89 |
+
"-r", str(tmpdir_path),
|
| 90 |
+
"-f", "json",
|
| 91 |
+
"-ll",
|
| 92 |
+
"--quiet",
|
| 93 |
+
],
|
| 94 |
+
capture_output=True,
|
| 95 |
+
text=True,
|
| 96 |
+
timeout=30, # Kill if it takes too long
|
| 97 |
+
)
|
| 98 |
+
|
| 99 |
+
# Bandit exit codes:
|
| 100 |
+
# 0 = no issues found
|
| 101 |
+
# 1 = issues found (this is NOT an error)
|
| 102 |
+
# 2+ = actual error
|
| 103 |
+
if result.returncode > 1:
|
| 104 |
+
logger.warning("Bandit returned error", stderr=result.stderr[:500])
|
| 105 |
+
return ""
|
| 106 |
+
|
| 107 |
+
if not result.stdout.strip():
|
| 108 |
+
return ""
|
| 109 |
+
|
| 110 |
+
# Parse the JSON output
|
| 111 |
+
bandit_output = json.loads(result.stdout)
|
| 112 |
+
findings = bandit_output.get("results", [])
|
| 113 |
+
|
| 114 |
+
if not findings:
|
| 115 |
+
return "Bandit static analysis: No security issues detected."
|
| 116 |
+
|
| 117 |
+
# Format findings as a human-readable summary for the LLM
|
| 118 |
+
summary_lines = [
|
| 119 |
+
f"Bandit static analysis found {len(findings)} issue(s):\n"
|
| 120 |
+
]
|
| 121 |
+
|
| 122 |
+
for i, finding in enumerate(findings, 1):
|
| 123 |
+
# Map the temp file path back to the original file path
|
| 124 |
+
temp_path = finding.get("filename", "")
|
| 125 |
+
original_path = _map_temp_to_original(temp_path, tmpdir, python_files)
|
| 126 |
+
|
| 127 |
+
severity = finding.get("issue_severity", "UNKNOWN")
|
| 128 |
+
confidence = finding.get("issue_confidence", "UNKNOWN")
|
| 129 |
+
text = finding.get("issue_text", "")
|
| 130 |
+
test_id = finding.get("test_id", "")
|
| 131 |
+
line_no = finding.get("line_number", 0)
|
| 132 |
+
code = finding.get("code", "").strip()
|
| 133 |
+
|
| 134 |
+
summary_lines.append(
|
| 135 |
+
f"{i}. [{severity}/{confidence}] {text}\n"
|
| 136 |
+
f" File: {original_path}, Line: {line_no}\n"
|
| 137 |
+
f" Test: {test_id}\n"
|
| 138 |
+
f" Code: {code}\n"
|
| 139 |
+
)
|
| 140 |
+
|
| 141 |
+
summary = "\n".join(summary_lines)
|
| 142 |
+
logger.info("Bandit analysis complete", findings_count=len(findings))
|
| 143 |
+
return summary
|
| 144 |
+
|
| 145 |
+
except subprocess.TimeoutExpired:
|
| 146 |
+
logger.warning("Bandit timed out after 30 seconds")
|
| 147 |
+
return ""
|
| 148 |
+
except FileNotFoundError:
|
| 149 |
+
# Bandit not installed — this is OK, the LLM can still analyze
|
| 150 |
+
logger.warning("Bandit not found in PATH — skipping static analysis")
|
| 151 |
+
return ""
|
| 152 |
+
except Exception as e:
|
| 153 |
+
logger.warning("Bandit analysis failed", error=str(e))
|
| 154 |
+
return ""
|
| 155 |
+
|
| 156 |
+
|
| 157 |
+
def _map_temp_to_original(
|
| 158 |
+
temp_path: str, tmpdir: str, original_files: dict[str, str]
|
| 159 |
+
) -> str:
|
| 160 |
+
"""Map a temp directory path back to the original file path."""
|
| 161 |
+
try:
|
| 162 |
+
# The temp path looks like: /tmp/ninjacg_bandit_xxx/src/auth/login.py
|
| 163 |
+
# We need to strip the tmpdir prefix to get: src/auth/login.py
|
| 164 |
+
relative = str(Path(temp_path).relative_to(tmpdir))
|
| 165 |
+
# Normalize path separators
|
| 166 |
+
relative = relative.replace("\\", "/")
|
| 167 |
+
# Verify it's one of our original files
|
| 168 |
+
if relative in original_files:
|
| 169 |
+
return relative
|
| 170 |
+
except (ValueError, Exception):
|
| 171 |
+
pass
|
| 172 |
+
# Fallback: return the filename only
|
| 173 |
+
return Path(temp_path).name
|
app/tools/detect_secrets_tool.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
detect-secrets Tool
|
| 3 |
+
====================
|
| 4 |
+
|
| 5 |
+
detect-secrets scans code for hardcoded credentials: API keys, passwords,
|
| 6 |
+
database connection strings, AWS access keys, private keys, etc.
|
| 7 |
+
|
| 8 |
+
Why a dedicated tool for secrets?
|
| 9 |
+
- Hardcoded secrets are the #1 most common security finding in code reviews
|
| 10 |
+
- They're easy to detect with regex/entropy analysis but easy to miss manually
|
| 11 |
+
- detect-secrets uses both pattern matching AND Shannon entropy analysis:
|
| 12 |
+
- Pattern matching: finds things that LOOK like API keys (e.g., "sk_live_...")
|
| 13 |
+
- Entropy analysis: finds random-looking strings that might be secrets
|
| 14 |
+
(high entropy = lots of randomness = probably a key, not a variable name)
|
| 15 |
+
|
| 16 |
+
What Shannon entropy means:
|
| 17 |
+
- "hello" has low entropy (~2.8 bits/char) — predictable, probably not a secret
|
| 18 |
+
- "a3f8g2kx9m" has high entropy (~3.9 bits/char) — random, might be a secret
|
| 19 |
+
- detect-secrets flags strings above a configurable entropy threshold
|
| 20 |
+
|
| 21 |
+
We run this on the PR diff specifically (not full files) because we only care
|
| 22 |
+
about NEWLY introduced secrets, not pre-existing ones.
|
| 23 |
+
"""
|
| 24 |
+
|
| 25 |
+
from __future__ import annotations
|
| 26 |
+
|
| 27 |
+
import json
|
| 28 |
+
import subprocess
|
| 29 |
+
import tempfile
|
| 30 |
+
from pathlib import Path
|
| 31 |
+
|
| 32 |
+
import structlog
|
| 33 |
+
|
| 34 |
+
logger = structlog.get_logger()
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
async def run_detect_secrets(file_contents: dict[str, str]) -> str:
|
| 38 |
+
"""
|
| 39 |
+
Scan changed files for hardcoded secrets.
|
| 40 |
+
|
| 41 |
+
Args:
|
| 42 |
+
file_contents: dict of {filepath: source_code}
|
| 43 |
+
|
| 44 |
+
Returns:
|
| 45 |
+
A formatted string listing detected secrets, suitable for
|
| 46 |
+
including in an LLM prompt. Empty string if no secrets found.
|
| 47 |
+
"""
|
| 48 |
+
if not file_contents:
|
| 49 |
+
return ""
|
| 50 |
+
|
| 51 |
+
try:
|
| 52 |
+
with tempfile.TemporaryDirectory(prefix="ninjacg_secrets_") as tmpdir:
|
| 53 |
+
tmpdir_path = Path(tmpdir)
|
| 54 |
+
|
| 55 |
+
for filepath, content in file_contents.items():
|
| 56 |
+
file_path = tmpdir_path / filepath
|
| 57 |
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
| 58 |
+
file_path.write_text(content, encoding="utf-8")
|
| 59 |
+
|
| 60 |
+
# Run detect-secrets scan
|
| 61 |
+
# --all-files: scan all file types
|
| 62 |
+
# --force-use-all-plugins: use every detection plugin
|
| 63 |
+
result = subprocess.run(
|
| 64 |
+
[
|
| 65 |
+
"detect-secrets", "scan",
|
| 66 |
+
str(tmpdir_path),
|
| 67 |
+
"--all-files",
|
| 68 |
+
],
|
| 69 |
+
capture_output=True,
|
| 70 |
+
text=True,
|
| 71 |
+
timeout=30,
|
| 72 |
+
)
|
| 73 |
+
|
| 74 |
+
if result.returncode != 0 and not result.stdout:
|
| 75 |
+
logger.warning("detect-secrets error", stderr=result.stderr[:500])
|
| 76 |
+
return ""
|
| 77 |
+
|
| 78 |
+
if not result.stdout.strip():
|
| 79 |
+
return ""
|
| 80 |
+
|
| 81 |
+
scan_results = json.loads(result.stdout)
|
| 82 |
+
results_map = scan_results.get("results", {})
|
| 83 |
+
|
| 84 |
+
# Count total secrets found
|
| 85 |
+
total_secrets = sum(len(secrets) for secrets in results_map.values())
|
| 86 |
+
|
| 87 |
+
if total_secrets == 0:
|
| 88 |
+
return "detect-secrets scan: No hardcoded secrets detected."
|
| 89 |
+
|
| 90 |
+
# Format findings
|
| 91 |
+
summary_lines = [
|
| 92 |
+
f"detect-secrets found {total_secrets} potential secret(s):\n"
|
| 93 |
+
]
|
| 94 |
+
|
| 95 |
+
for file_path, secrets in results_map.items():
|
| 96 |
+
# Map temp path back to original
|
| 97 |
+
try:
|
| 98 |
+
relative = str(Path(file_path).relative_to(tmpdir)).replace("\\", "/")
|
| 99 |
+
except ValueError:
|
| 100 |
+
relative = Path(file_path).name
|
| 101 |
+
|
| 102 |
+
for secret in secrets:
|
| 103 |
+
secret_type = secret.get("type", "Unknown")
|
| 104 |
+
line_no = secret.get("line_number", 0)
|
| 105 |
+
summary_lines.append(
|
| 106 |
+
f"- {secret_type} in {relative} at line {line_no}"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
+
summary = "\n".join(summary_lines)
|
| 110 |
+
logger.info("detect-secrets scan complete", secrets_found=total_secrets)
|
| 111 |
+
return summary
|
| 112 |
+
|
| 113 |
+
except FileNotFoundError:
|
| 114 |
+
logger.warning("detect-secrets not found in PATH — skipping")
|
| 115 |
+
return ""
|
| 116 |
+
except Exception as e:
|
| 117 |
+
logger.warning("detect-secrets scan failed", error=str(e))
|
| 118 |
+
return ""
|
app/tools/linter_tool.py
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Linter Tool (Ruff)
|
| 3 |
+
===================
|
| 4 |
+
|
| 5 |
+
Ruff is an extremely fast Python linter written in Rust. It replaces
|
| 6 |
+
flake8, isort, pycodestyle, and dozens of other tools in a single binary.
|
| 7 |
+
It runs 10-100x faster than traditional Python linters.
|
| 8 |
+
|
| 9 |
+
What Ruff catches:
|
| 10 |
+
- Unused imports (F401)
|
| 11 |
+
- Undefined names (F821)
|
| 12 |
+
- Unused variables (F841)
|
| 13 |
+
- Import ordering issues (I001)
|
| 14 |
+
- Unnecessary f-strings (F541)
|
| 15 |
+
- Bare except clauses (E722)
|
| 16 |
+
- And 800+ other rules
|
| 17 |
+
|
| 18 |
+
We run Ruff on the changed files and feed the output to the Style Agent
|
| 19 |
+
as additional context. The LLM then combines Ruff's mechanical findings
|
| 20 |
+
with its own understanding of readability and maintainability.
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
+
from __future__ import annotations
|
| 24 |
+
|
| 25 |
+
import json
|
| 26 |
+
import subprocess
|
| 27 |
+
import tempfile
|
| 28 |
+
from pathlib import Path
|
| 29 |
+
|
| 30 |
+
import structlog
|
| 31 |
+
|
| 32 |
+
logger = structlog.get_logger()
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
async def run_ruff(file_contents: dict[str, str]) -> str:
|
| 36 |
+
"""
|
| 37 |
+
Run Ruff linter on Python files.
|
| 38 |
+
|
| 39 |
+
Returns a formatted string of linting issues.
|
| 40 |
+
"""
|
| 41 |
+
python_files = {
|
| 42 |
+
path: content
|
| 43 |
+
for path, content in file_contents.items()
|
| 44 |
+
if path.endswith(".py")
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
if not python_files:
|
| 48 |
+
return ""
|
| 49 |
+
|
| 50 |
+
try:
|
| 51 |
+
with tempfile.TemporaryDirectory(prefix="ninjacg_ruff_") as tmpdir:
|
| 52 |
+
tmpdir_path = Path(tmpdir)
|
| 53 |
+
|
| 54 |
+
for filepath, content in python_files.items():
|
| 55 |
+
file_path = tmpdir_path / filepath
|
| 56 |
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
| 57 |
+
file_path.write_text(content, encoding="utf-8")
|
| 58 |
+
|
| 59 |
+
# Run ruff check with JSON output
|
| 60 |
+
# --output-format json: machine-parseable output
|
| 61 |
+
# --select ALL: enable all rules (we want comprehensive feedback)
|
| 62 |
+
# --ignore E501: skip line-length (too noisy, not actionable)
|
| 63 |
+
result = subprocess.run(
|
| 64 |
+
[
|
| 65 |
+
"ruff", "check",
|
| 66 |
+
str(tmpdir_path),
|
| 67 |
+
"--output-format", "json",
|
| 68 |
+
"--select", "F,E,W,I,N,UP,B,A,SIM,RET,ARG",
|
| 69 |
+
"--ignore", "E501,E402",
|
| 70 |
+
],
|
| 71 |
+
capture_output=True,
|
| 72 |
+
text=True,
|
| 73 |
+
timeout=30,
|
| 74 |
+
)
|
| 75 |
+
|
| 76 |
+
# Ruff exit code 1 means issues found (not an error)
|
| 77 |
+
if not result.stdout.strip() or result.stdout.strip() == "[]":
|
| 78 |
+
return ""
|
| 79 |
+
|
| 80 |
+
issues = json.loads(result.stdout)
|
| 81 |
+
|
| 82 |
+
if not issues:
|
| 83 |
+
return ""
|
| 84 |
+
|
| 85 |
+
# Format findings
|
| 86 |
+
summary_lines = [f"Ruff linter found {len(issues)} issue(s):\n"]
|
| 87 |
+
|
| 88 |
+
for issue in issues[:20]: # Cap at 20 to avoid prompt bloat
|
| 89 |
+
code = issue.get("code", "?")
|
| 90 |
+
message = issue.get("message", "")
|
| 91 |
+
filename = issue.get("filename", "")
|
| 92 |
+
line = issue.get("location", {}).get("row", 0)
|
| 93 |
+
|
| 94 |
+
try:
|
| 95 |
+
relative = str(Path(filename).relative_to(tmpdir)).replace("\\", "/")
|
| 96 |
+
except ValueError:
|
| 97 |
+
relative = Path(filename).name
|
| 98 |
+
|
| 99 |
+
summary_lines.append(f"- [{code}] {relative}:{line} — {message}")
|
| 100 |
+
|
| 101 |
+
if len(issues) > 20:
|
| 102 |
+
summary_lines.append(f" ... and {len(issues) - 20} more issues")
|
| 103 |
+
|
| 104 |
+
summary = "\n".join(summary_lines)
|
| 105 |
+
logger.info("Ruff analysis complete", issues_count=len(issues))
|
| 106 |
+
return summary
|
| 107 |
+
|
| 108 |
+
except FileNotFoundError:
|
| 109 |
+
logger.warning("ruff not found in PATH — skipping lint analysis")
|
| 110 |
+
return ""
|
| 111 |
+
except Exception as e:
|
| 112 |
+
logger.warning("Ruff analysis failed", error=str(e))
|
| 113 |
+
return ""
|
app/tools/radon_tool.py
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Radon Complexity Analysis Tool
|
| 3 |
+
================================
|
| 4 |
+
|
| 5 |
+
Radon measures cyclomatic complexity — the number of independent execution paths
|
| 6 |
+
through a function. Higher complexity = more branches = harder to test and maintain,
|
| 7 |
+
AND often correlates with performance issues (deeply nested conditionals often
|
| 8 |
+
indicate O(n²) or worse algorithms).
|
| 9 |
+
|
| 10 |
+
Complexity grades:
|
| 11 |
+
A (1-5): Simple, low risk
|
| 12 |
+
B (6-10): Moderate complexity
|
| 13 |
+
C (11-15): High complexity — consider refactoring
|
| 14 |
+
D (16-20): Very high — likely performance and maintenance issues
|
| 15 |
+
E (21-25): Extremely complex
|
| 16 |
+
F (26+): Unmaintainable
|
| 17 |
+
|
| 18 |
+
We report functions with complexity grade C or worse (>10) to the Performance Agent.
|
| 19 |
+
The agent uses this as a signal to look deeper at those functions for algorithmic issues.
|
| 20 |
+
"""
|
| 21 |
+
|
| 22 |
+
from __future__ import annotations
|
| 23 |
+
|
| 24 |
+
import json
|
| 25 |
+
import subprocess
|
| 26 |
+
import tempfile
|
| 27 |
+
from pathlib import Path
|
| 28 |
+
|
| 29 |
+
import structlog
|
| 30 |
+
|
| 31 |
+
logger = structlog.get_logger()
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
async def run_radon(file_contents: dict[str, str]) -> str:
|
| 35 |
+
"""
|
| 36 |
+
Run radon cyclomatic complexity analysis on Python files.
|
| 37 |
+
|
| 38 |
+
Returns a formatted string summarizing high-complexity functions.
|
| 39 |
+
"""
|
| 40 |
+
python_files = {
|
| 41 |
+
path: content
|
| 42 |
+
for path, content in file_contents.items()
|
| 43 |
+
if path.endswith(".py")
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
if not python_files:
|
| 47 |
+
return ""
|
| 48 |
+
|
| 49 |
+
try:
|
| 50 |
+
with tempfile.TemporaryDirectory(prefix="ninjacg_radon_") as tmpdir:
|
| 51 |
+
tmpdir_path = Path(tmpdir)
|
| 52 |
+
|
| 53 |
+
for filepath, content in python_files.items():
|
| 54 |
+
file_path = tmpdir_path / filepath
|
| 55 |
+
file_path.parent.mkdir(parents=True, exist_ok=True)
|
| 56 |
+
file_path.write_text(content, encoding="utf-8")
|
| 57 |
+
|
| 58 |
+
# Run radon cc (cyclomatic complexity) with JSON output
|
| 59 |
+
# -j: JSON output
|
| 60 |
+
# -n C: only show grade C or worse (complexity > 10)
|
| 61 |
+
result = subprocess.run(
|
| 62 |
+
["radon", "cc", "-j", "-n", "C", str(tmpdir_path)],
|
| 63 |
+
capture_output=True,
|
| 64 |
+
text=True,
|
| 65 |
+
timeout=30,
|
| 66 |
+
)
|
| 67 |
+
|
| 68 |
+
if not result.stdout.strip() or result.stdout.strip() == "{}":
|
| 69 |
+
return ""
|
| 70 |
+
|
| 71 |
+
radon_output = json.loads(result.stdout)
|
| 72 |
+
|
| 73 |
+
# Collect high-complexity functions
|
| 74 |
+
findings = []
|
| 75 |
+
for file_path, functions in radon_output.items():
|
| 76 |
+
try:
|
| 77 |
+
relative = str(Path(file_path).relative_to(tmpdir)).replace("\\", "/")
|
| 78 |
+
except ValueError:
|
| 79 |
+
relative = Path(file_path).name
|
| 80 |
+
|
| 81 |
+
for func in functions:
|
| 82 |
+
if not isinstance(func, dict):
|
| 83 |
+
continue
|
| 84 |
+
name = func.get("name", "unknown")
|
| 85 |
+
complexity = func.get("complexity", 0)
|
| 86 |
+
rank = func.get("rank", "?")
|
| 87 |
+
lineno = func.get("lineno", 0)
|
| 88 |
+
findings.append(
|
| 89 |
+
f"- {relative}:{lineno} — `{name}()` complexity={complexity} (grade {rank})"
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
if not findings:
|
| 93 |
+
return ""
|
| 94 |
+
|
| 95 |
+
summary = (
|
| 96 |
+
f"Radon complexity analysis found {len(findings)} high-complexity function(s):\n"
|
| 97 |
+
+ "\n".join(findings)
|
| 98 |
+
)
|
| 99 |
+
logger.info("Radon analysis complete", high_complexity_count=len(findings))
|
| 100 |
+
return summary
|
| 101 |
+
|
| 102 |
+
except FileNotFoundError:
|
| 103 |
+
logger.warning("radon not found in PATH — skipping complexity analysis")
|
| 104 |
+
return ""
|
| 105 |
+
except Exception as e:
|
| 106 |
+
logger.warning("Radon analysis failed", error=str(e))
|
| 107 |
+
return ""
|
dashboard/.gitignore
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
|
| 2 |
+
|
| 3 |
+
# dependencies
|
| 4 |
+
/node_modules
|
| 5 |
+
/.pnp
|
| 6 |
+
.pnp.*
|
| 7 |
+
.yarn/*
|
| 8 |
+
!.yarn/patches
|
| 9 |
+
!.yarn/plugins
|
| 10 |
+
!.yarn/releases
|
| 11 |
+
!.yarn/versions
|
| 12 |
+
|
| 13 |
+
# testing
|
| 14 |
+
/coverage
|
| 15 |
+
|
| 16 |
+
# next.js
|
| 17 |
+
/.next/
|
| 18 |
+
/out/
|
| 19 |
+
|
| 20 |
+
# production
|
| 21 |
+
/build
|
| 22 |
+
|
| 23 |
+
# misc
|
| 24 |
+
.DS_Store
|
| 25 |
+
*.pem
|
| 26 |
+
|
| 27 |
+
# debug
|
| 28 |
+
npm-debug.log*
|
| 29 |
+
yarn-debug.log*
|
| 30 |
+
yarn-error.log*
|
| 31 |
+
.pnpm-debug.log*
|
| 32 |
+
|
| 33 |
+
# env files (can opt-in for committing if needed)
|
| 34 |
+
.env*
|
| 35 |
+
|
| 36 |
+
# vercel
|
| 37 |
+
.vercel
|
| 38 |
+
|
| 39 |
+
# typescript
|
| 40 |
+
*.tsbuildinfo
|
| 41 |
+
next-env.d.ts
|
dashboard/AGENTS.md
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!-- BEGIN:nextjs-agent-rules -->
|
| 2 |
+
# This is NOT the Next.js you know
|
| 3 |
+
|
| 4 |
+
This version has breaking changes — APIs, conventions, and file structure may all differ from your training data. Read the relevant guide in `node_modules/next/dist/docs/` before writing any code. Heed deprecation notices.
|
| 5 |
+
<!-- END:nextjs-agent-rules -->
|
dashboard/CLAUDE.md
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
@AGENTS.md
|
dashboard/README.md
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
|
| 2 |
+
|
| 3 |
+
## Getting Started
|
| 4 |
+
|
| 5 |
+
First, run the development server:
|
| 6 |
+
|
| 7 |
+
```bash
|
| 8 |
+
npm run dev
|
| 9 |
+
# or
|
| 10 |
+
yarn dev
|
| 11 |
+
# or
|
| 12 |
+
pnpm dev
|
| 13 |
+
# or
|
| 14 |
+
bun dev
|
| 15 |
+
```
|
| 16 |
+
|
| 17 |
+
Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
|
| 18 |
+
|
| 19 |
+
You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
|
| 20 |
+
|
| 21 |
+
This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
|
| 22 |
+
|
| 23 |
+
## Learn More
|
| 24 |
+
|
| 25 |
+
To learn more about Next.js, take a look at the following resources:
|
| 26 |
+
|
| 27 |
+
- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
|
| 28 |
+
- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
|
| 29 |
+
|
| 30 |
+
You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
|
| 31 |
+
|
| 32 |
+
## Deploy on Vercel
|
| 33 |
+
|
| 34 |
+
The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
|
| 35 |
+
|
| 36 |
+
Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
|
dashboard/app/favicon.ico
ADDED
|
|
dashboard/app/globals.css
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@import "tailwindcss";
|
| 2 |
+
|
| 3 |
+
:root {
|
| 4 |
+
--background: #050507;
|
| 5 |
+
--foreground: #f4f4f5;
|
| 6 |
+
--glass-bg: rgba(255, 255, 255, 0.03);
|
| 7 |
+
--glass-border: rgba(255, 255, 255, 0.06);
|
| 8 |
+
--glass-hover: rgba(255, 255, 255, 0.06);
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
@theme inline {
|
| 12 |
+
--color-background: var(--background);
|
| 13 |
+
--color-foreground: var(--foreground);
|
| 14 |
+
--font-sans: var(--font-geist-sans);
|
| 15 |
+
--font-mono: var(--font-geist-mono);
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
body {
|
| 19 |
+
background: var(--background);
|
| 20 |
+
color: var(--foreground);
|
| 21 |
+
font-family: var(--font-sans, system-ui, -apple-system, sans-serif);
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
/* ─── Dot grid background ─── */
|
| 25 |
+
.dot-grid {
|
| 26 |
+
background-image: radial-gradient(circle, rgba(255, 255, 255, 0.04) 1px, transparent 1px);
|
| 27 |
+
background-size: 32px 32px;
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
/* ─── Animated gradient orbs ─── */
|
| 31 |
+
.gradient-orb {
|
| 32 |
+
position: absolute;
|
| 33 |
+
border-radius: 50%;
|
| 34 |
+
filter: blur(120px);
|
| 35 |
+
opacity: 0.15;
|
| 36 |
+
pointer-events: none;
|
| 37 |
+
animation: orbFloat 20s ease-in-out infinite;
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
.gradient-orb-1 {
|
| 41 |
+
width: 600px;
|
| 42 |
+
height: 600px;
|
| 43 |
+
background: linear-gradient(135deg, #7c3aed, #6d28d9);
|
| 44 |
+
top: -200px;
|
| 45 |
+
right: -100px;
|
| 46 |
+
animation-delay: 0s;
|
| 47 |
+
}
|
| 48 |
+
|
| 49 |
+
.gradient-orb-2 {
|
| 50 |
+
width: 500px;
|
| 51 |
+
height: 500px;
|
| 52 |
+
background: linear-gradient(135deg, #06b6d4, #0891b2);
|
| 53 |
+
bottom: -150px;
|
| 54 |
+
left: -100px;
|
| 55 |
+
animation-delay: -7s;
|
| 56 |
+
}
|
| 57 |
+
|
| 58 |
+
.gradient-orb-3 {
|
| 59 |
+
width: 400px;
|
| 60 |
+
height: 400px;
|
| 61 |
+
background: linear-gradient(135deg, #ec4899, #be185d);
|
| 62 |
+
top: 40%;
|
| 63 |
+
left: 50%;
|
| 64 |
+
animation-delay: -14s;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
@keyframes orbFloat {
|
| 68 |
+
0%, 100% { transform: translate(0, 0) scale(1); }
|
| 69 |
+
25% { transform: translate(30px, -40px) scale(1.05); }
|
| 70 |
+
50% { transform: translate(-20px, 20px) scale(0.95); }
|
| 71 |
+
75% { transform: translate(40px, 30px) scale(1.03); }
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
/* ─── Glass card ─── */
|
| 75 |
+
.glass {
|
| 76 |
+
background: var(--glass-bg);
|
| 77 |
+
border: 1px solid var(--glass-border);
|
| 78 |
+
backdrop-filter: blur(20px);
|
| 79 |
+
-webkit-backdrop-filter: blur(20px);
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
.glass-hover:hover {
|
| 83 |
+
background: var(--glass-hover);
|
| 84 |
+
border-color: rgba(255, 255, 255, 0.1);
|
| 85 |
+
}
|
| 86 |
+
|
| 87 |
+
/* ─── Glow effects ─── */
|
| 88 |
+
.glow-violet { box-shadow: 0 0 40px -10px rgba(139, 92, 246, 0.3); }
|
| 89 |
+
.glow-green { box-shadow: 0 0 40px -10px rgba(34, 197, 94, 0.3); }
|
| 90 |
+
.glow-red { box-shadow: 0 0 40px -10px rgba(239, 68, 68, 0.3); }
|
| 91 |
+
.glow-amber { box-shadow: 0 0 40px -10px rgba(245, 158, 11, 0.3); }
|
| 92 |
+
|
| 93 |
+
/* ─── Gradient text ─── */
|
| 94 |
+
.text-gradient {
|
| 95 |
+
background: linear-gradient(135deg, #c4b5fd 0%, #818cf8 50%, #6d28d9 100%);
|
| 96 |
+
-webkit-background-clip: text;
|
| 97 |
+
-webkit-text-fill-color: transparent;
|
| 98 |
+
background-clip: text;
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
.text-gradient-cyan {
|
| 102 |
+
background: linear-gradient(135deg, #67e8f9 0%, #22d3ee 50%, #06b6d4 100%);
|
| 103 |
+
-webkit-background-clip: text;
|
| 104 |
+
-webkit-text-fill-color: transparent;
|
| 105 |
+
background-clip: text;
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
/* ─── Shimmer border animation ─── */
|
| 109 |
+
@keyframes shimmer {
|
| 110 |
+
0% { background-position: 200% 0; }
|
| 111 |
+
100% { background-position: -200% 0; }
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
.shimmer-border {
|
| 115 |
+
background: linear-gradient(
|
| 116 |
+
90deg,
|
| 117 |
+
transparent 0%,
|
| 118 |
+
rgba(139, 92, 246, 0.15) 25%,
|
| 119 |
+
rgba(6, 182, 212, 0.15) 50%,
|
| 120 |
+
rgba(139, 92, 246, 0.15) 75%,
|
| 121 |
+
transparent 100%
|
| 122 |
+
);
|
| 123 |
+
background-size: 200% 100%;
|
| 124 |
+
animation: shimmer 6s ease-in-out infinite;
|
| 125 |
+
}
|
| 126 |
+
|
| 127 |
+
/* ─── Scrollbar ─── */
|
| 128 |
+
::-webkit-scrollbar {
|
| 129 |
+
width: 6px;
|
| 130 |
+
height: 6px;
|
| 131 |
+
}
|
| 132 |
+
::-webkit-scrollbar-track {
|
| 133 |
+
background: transparent;
|
| 134 |
+
}
|
| 135 |
+
::-webkit-scrollbar-thumb {
|
| 136 |
+
background: rgba(113, 113, 122, 0.3);
|
| 137 |
+
border-radius: 3px;
|
| 138 |
+
}
|
| 139 |
+
::-webkit-scrollbar-thumb:hover {
|
| 140 |
+
background: rgba(113, 113, 122, 0.5);
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
/* ─── Noise texture overlay ─── */
|
| 144 |
+
.noise::before {
|
| 145 |
+
content: "";
|
| 146 |
+
position: fixed;
|
| 147 |
+
inset: 0;
|
| 148 |
+
z-index: 100;
|
| 149 |
+
pointer-events: none;
|
| 150 |
+
opacity: 0.015;
|
| 151 |
+
background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E");
|
| 152 |
+
}
|
dashboard/app/layout.tsx
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import type { Metadata } from "next";
|
| 2 |
+
import { Geist, Geist_Mono } from "next/font/google";
|
| 3 |
+
import Link from "next/link";
|
| 4 |
+
import "./globals.css";
|
| 5 |
+
|
| 6 |
+
const geistSans = Geist({
|
| 7 |
+
variable: "--font-geist-sans",
|
| 8 |
+
subsets: ["latin"],
|
| 9 |
+
});
|
| 10 |
+
|
| 11 |
+
const geistMono = Geist_Mono({
|
| 12 |
+
variable: "--font-geist-mono",
|
| 13 |
+
subsets: ["latin"],
|
| 14 |
+
});
|
| 15 |
+
|
| 16 |
+
export const metadata: Metadata = {
|
| 17 |
+
title: "Ninja Code Guard",
|
| 18 |
+
description:
|
| 19 |
+
"Multi-agent AI code review dashboard — security, performance & style analysis at a glance.",
|
| 20 |
+
};
|
| 21 |
+
|
| 22 |
+
export default function RootLayout({
|
| 23 |
+
children,
|
| 24 |
+
}: Readonly<{
|
| 25 |
+
children: React.ReactNode;
|
| 26 |
+
}>) {
|
| 27 |
+
return (
|
| 28 |
+
<html
|
| 29 |
+
lang="en"
|
| 30 |
+
className={`${geistSans.variable} ${geistMono.variable} h-full antialiased dark`}
|
| 31 |
+
>
|
| 32 |
+
<body className="noise min-h-full flex flex-col bg-[#050507] text-zinc-100">
|
| 33 |
+
{/* ── Gradient orbs (ambient background) ── */}
|
| 34 |
+
<div className="fixed inset-0 overflow-hidden pointer-events-none z-0">
|
| 35 |
+
<div className="gradient-orb gradient-orb-1" />
|
| 36 |
+
<div className="gradient-orb gradient-orb-2" />
|
| 37 |
+
<div className="gradient-orb gradient-orb-3" />
|
| 38 |
+
</div>
|
| 39 |
+
|
| 40 |
+
{/* ── Navigation ── */}
|
| 41 |
+
<header className="sticky top-0 z-50 border-b border-white/[0.06] bg-[#050507]/70 backdrop-blur-2xl">
|
| 42 |
+
<div className="mx-auto flex h-16 max-w-7xl items-center justify-between px-6 lg:px-8">
|
| 43 |
+
<Link href="/" className="flex items-center gap-3 group">
|
| 44 |
+
<span className="relative flex items-center justify-center w-9 h-9 rounded-xl bg-gradient-to-br from-violet-600 to-violet-800 shadow-lg shadow-violet-900/30 group-hover:shadow-violet-700/40 transition-shadow">
|
| 45 |
+
<svg
|
| 46 |
+
xmlns="http://www.w3.org/2000/svg"
|
| 47 |
+
viewBox="0 0 24 24"
|
| 48 |
+
fill="currentColor"
|
| 49 |
+
className="w-5 h-5 text-white"
|
| 50 |
+
>
|
| 51 |
+
<path
|
| 52 |
+
fillRule="evenodd"
|
| 53 |
+
d="M12.516 2.17a.75.75 0 00-1.032 0 11.209 11.209 0 01-7.877 3.08.75.75 0 00-.722.515A12.74 12.74 0 002.25 9.75c0 5.942 4.064 10.933 9.563 12.348a.749.749 0 00.374 0c5.499-1.415 9.563-6.406 9.563-12.348 0-1.39-.223-2.73-.635-3.985a.75.75 0 00-.722-.516 11.209 11.209 0 01-7.877-3.08z"
|
| 54 |
+
clipRule="evenodd"
|
| 55 |
+
/>
|
| 56 |
+
</svg>
|
| 57 |
+
</span>
|
| 58 |
+
<div className="flex flex-col">
|
| 59 |
+
<span className="text-[15px] font-semibold tracking-tight text-white leading-tight">
|
| 60 |
+
Ninja Code Guard
|
| 61 |
+
</span>
|
| 62 |
+
<span className="text-[10px] font-medium text-zinc-500 tracking-widest uppercase">
|
| 63 |
+
AI Review Platform
|
| 64 |
+
</span>
|
| 65 |
+
</div>
|
| 66 |
+
</Link>
|
| 67 |
+
|
| 68 |
+
<nav className="flex items-center gap-1">
|
| 69 |
+
<Link
|
| 70 |
+
href="/"
|
| 71 |
+
className="px-4 py-2 text-sm text-zinc-400 hover:text-white hover:bg-white/[0.04] rounded-lg transition-all duration-200"
|
| 72 |
+
>
|
| 73 |
+
Dashboard
|
| 74 |
+
</Link>
|
| 75 |
+
<a
|
| 76 |
+
href="https://github.com"
|
| 77 |
+
target="_blank"
|
| 78 |
+
rel="noopener noreferrer"
|
| 79 |
+
className="px-4 py-2 text-sm text-zinc-400 hover:text-white hover:bg-white/[0.04] rounded-lg transition-all duration-200"
|
| 80 |
+
>
|
| 81 |
+
GitHub
|
| 82 |
+
</a>
|
| 83 |
+
</nav>
|
| 84 |
+
</div>
|
| 85 |
+
</header>
|
| 86 |
+
|
| 87 |
+
{/* ── Content ── */}
|
| 88 |
+
<main className="relative z-10 flex-1">{children}</main>
|
| 89 |
+
|
| 90 |
+
{/* ── Footer ── */}
|
| 91 |
+
<footer className="relative z-10 border-t border-white/[0.04] py-8">
|
| 92 |
+
<div className="mx-auto max-w-7xl px-6 lg:px-8 flex items-center justify-between">
|
| 93 |
+
<p className="text-xs text-zinc-600">
|
| 94 |
+
© {new Date().getFullYear()} Ninja Code Guard
|
| 95 |
+
</p>
|
| 96 |
+
<p className="text-xs text-zinc-700">
|
| 97 |
+
Multi-Agent AI Code Review Platform
|
| 98 |
+
</p>
|
| 99 |
+
</div>
|
| 100 |
+
</footer>
|
| 101 |
+
</body>
|
| 102 |
+
</html>
|
| 103 |
+
);
|
| 104 |
+
}
|
dashboard/app/page.tsx
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import Link from "next/link";
|
| 4 |
+
import { motion } from "framer-motion";
|
| 5 |
+
import { MOCK_REPOS } from "@/lib/api";
|
| 6 |
+
import {
|
| 7 |
+
StaggerContainer,
|
| 8 |
+
StaggerItem,
|
| 9 |
+
FadeIn,
|
| 10 |
+
HoverCard,
|
| 11 |
+
} from "@/components/motion";
|
| 12 |
+
import { AnimatedCounter } from "@/components/AnimatedCounter";
|
| 13 |
+
|
| 14 |
+
function scoreColor(score: number): string {
|
| 15 |
+
if (score >= 80) return "text-emerald-400";
|
| 16 |
+
if (score >= 60) return "text-amber-400";
|
| 17 |
+
return "text-red-400";
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
function scoreGlow(score: number): string {
|
| 21 |
+
if (score >= 80) return "group-hover:shadow-emerald-500/10";
|
| 22 |
+
if (score >= 60) return "group-hover:shadow-amber-500/10";
|
| 23 |
+
return "group-hover:shadow-red-500/10";
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
function scoreDot(score: number): string {
|
| 27 |
+
if (score >= 80) return "bg-emerald-400";
|
| 28 |
+
if (score >= 60) return "bg-amber-400";
|
| 29 |
+
return "bg-red-400";
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
const STATS = [
|
| 33 |
+
{ label: "Repos Monitored", value: MOCK_REPOS.length, suffix: "" },
|
| 34 |
+
{
|
| 35 |
+
label: "Avg Health Score",
|
| 36 |
+
value: Math.round(
|
| 37 |
+
MOCK_REPOS.reduce((s, r) => s + r.health_score, 0) / MOCK_REPOS.length
|
| 38 |
+
),
|
| 39 |
+
suffix: "%",
|
| 40 |
+
},
|
| 41 |
+
{ label: "PRs Reviewed", value: 47, suffix: "" },
|
| 42 |
+
{ label: "Issues Found", value: 132, suffix: "" },
|
| 43 |
+
];
|
| 44 |
+
|
| 45 |
+
const AGENTS = [
|
| 46 |
+
{
|
| 47 |
+
icon: (
|
| 48 |
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-6 h-6">
|
| 49 |
+
<path fillRule="evenodd" d="M12.516 2.17a.75.75 0 00-1.032 0 11.209 11.209 0 01-7.877 3.08.75.75 0 00-.722.515A12.74 12.74 0 002.25 9.75c0 5.942 4.064 10.933 9.563 12.348a.749.749 0 00.374 0c5.499-1.415 9.563-6.406 9.563-12.348 0-1.39-.223-2.73-.635-3.985a.75.75 0 00-.722-.516 11.209 11.209 0 01-7.877-3.08z" clipRule="evenodd" />
|
| 50 |
+
</svg>
|
| 51 |
+
),
|
| 52 |
+
title: "Security Agent",
|
| 53 |
+
desc: "Scans for vulnerabilities, injection flaws, auth issues, and CWE-classified risks using Bandit and detect-secrets.",
|
| 54 |
+
color: "text-red-400",
|
| 55 |
+
bg: "from-red-500/10 via-red-500/5 to-transparent",
|
| 56 |
+
iconBg: "bg-red-500/10 text-red-400",
|
| 57 |
+
border: "border-red-500/10 hover:border-red-500/20",
|
| 58 |
+
},
|
| 59 |
+
{
|
| 60 |
+
icon: (
|
| 61 |
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-6 h-6">
|
| 62 |
+
<path fillRule="evenodd" d="M14.615 1.595a.75.75 0 01.359.852L12.982 9.75h7.268a.75.75 0 01.548 1.262l-10.5 11.25a.75.75 0 01-1.272-.71l1.992-7.302H3.75a.75.75 0 01-.548-1.262l10.5-11.25a.75.75 0 01.913-.143z" clipRule="evenodd" />
|
| 63 |
+
</svg>
|
| 64 |
+
),
|
| 65 |
+
title: "Performance Agent",
|
| 66 |
+
desc: "Detects N+1 queries, memory leaks, blocking operations, and algorithmic inefficiencies with Radon analysis.",
|
| 67 |
+
color: "text-amber-400",
|
| 68 |
+
bg: "from-amber-500/10 via-amber-500/5 to-transparent",
|
| 69 |
+
iconBg: "bg-amber-500/10 text-amber-400",
|
| 70 |
+
border: "border-amber-500/10 hover:border-amber-500/20",
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
icon: (
|
| 74 |
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-6 h-6">
|
| 75 |
+
<path d="M11.7 2.805a.75.75 0 01.6 0A60.65 60.65 0 0122.83 8.72a.75.75 0 01-.231 1.337 49.949 49.949 0 00-9.902 3.912l-.003.002-.34.18a.75.75 0 01-.707 0A50.009 50.009 0 007.5 12.174v-.224c0-.131.067-.248.172-.311a54.614 54.614 0 014.653-2.52.75.75 0 00-.65-1.352 56.129 56.129 0 00-4.78 2.589 1.858 1.858 0 00-.859 1.228 49.803 49.803 0 00-4.634-1.527.75.75 0 01-.231-1.337A60.653 60.653 0 0111.7 2.805z" />
|
| 76 |
+
<path d="M13.06 15.473a48.45 48.45 0 017.666-3.282c.134 1.414.22 2.843.255 4.285a.75.75 0 01-.46.71 47.878 47.878 0 00-8.105 4.342.75.75 0 01-.832 0 47.877 47.877 0 00-8.104-4.342.75.75 0 01-.461-.71c.035-1.442.121-2.87.255-4.286A48.4 48.4 0 016 13.18v1.27a1.5 1.5 0 00-.14 2.508c-.09.38-.222.753-.397 1.11.452.213.901.434 1.346.661a6.729 6.729 0 00.551-1.608 1.5 1.5 0 00.14-2.67v-.645a48.549 48.549 0 013.44 1.668 2.25 2.25 0 002.12 0z" />
|
| 77 |
+
<path d="M4.462 19.462c.42-.419.753-.89 1-1.394.453.213.902.434 1.347.661a6.743 6.743 0 01-1.286 1.794.75.75 0 11-1.06-1.06z" />
|
| 78 |
+
</svg>
|
| 79 |
+
),
|
| 80 |
+
title: "Style Agent",
|
| 81 |
+
desc: "Enforces naming conventions, reduces complexity, and ensures code consistency via Ruff linting.",
|
| 82 |
+
color: "text-cyan-400",
|
| 83 |
+
bg: "from-cyan-500/10 via-cyan-500/5 to-transparent",
|
| 84 |
+
iconBg: "bg-cyan-500/10 text-cyan-400",
|
| 85 |
+
border: "border-cyan-500/10 hover:border-cyan-500/20",
|
| 86 |
+
},
|
| 87 |
+
];
|
| 88 |
+
|
| 89 |
+
export default function HomePage() {
|
| 90 |
+
return (
|
| 91 |
+
<div className="dot-grid">
|
| 92 |
+
<div className="mx-auto max-w-7xl px-6 lg:px-8 py-16">
|
| 93 |
+
{/* ── Hero ── */}
|
| 94 |
+
<section className="text-center mb-20 pt-8">
|
| 95 |
+
<FadeIn delay={0}>
|
| 96 |
+
<div className="inline-flex items-center gap-2 rounded-full border border-violet-500/20 bg-violet-500/[0.06] px-4 py-1.5 text-sm text-violet-300 mb-8">
|
| 97 |
+
<span className="relative flex h-2 w-2">
|
| 98 |
+
<span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-violet-400 opacity-75" />
|
| 99 |
+
<span className="relative inline-flex rounded-full h-2 w-2 bg-violet-500" />
|
| 100 |
+
</span>
|
| 101 |
+
Multi-Agent AI Review Platform
|
| 102 |
+
</div>
|
| 103 |
+
</FadeIn>
|
| 104 |
+
|
| 105 |
+
<FadeIn delay={0.1}>
|
| 106 |
+
<h1 className="text-5xl sm:text-7xl font-bold tracking-tight mb-6">
|
| 107 |
+
<span className="text-white">Code reviews,</span>
|
| 108 |
+
<br />
|
| 109 |
+
<span className="text-gradient">reimagined.</span>
|
| 110 |
+
</h1>
|
| 111 |
+
</FadeIn>
|
| 112 |
+
|
| 113 |
+
<FadeIn delay={0.2}>
|
| 114 |
+
<p className="text-lg sm:text-xl text-zinc-400 max-w-2xl mx-auto leading-relaxed">
|
| 115 |
+
Three specialised AI agents analyse every pull request for{" "}
|
| 116 |
+
<span className="text-red-400 font-medium">security</span>,{" "}
|
| 117 |
+
<span className="text-amber-400 font-medium">performance</span>,
|
| 118 |
+
and{" "}
|
| 119 |
+
<span className="text-cyan-400 font-medium">style</span>{" "}
|
| 120 |
+
— then synthesise a single, actionable review.
|
| 121 |
+
</p>
|
| 122 |
+
</FadeIn>
|
| 123 |
+
</section>
|
| 124 |
+
|
| 125 |
+
{/* ── Stats ── */}
|
| 126 |
+
<FadeIn delay={0.3}>
|
| 127 |
+
<section className="grid grid-cols-2 sm:grid-cols-4 gap-4 mb-20">
|
| 128 |
+
{STATS.map((s, i) => (
|
| 129 |
+
<div
|
| 130 |
+
key={s.label}
|
| 131 |
+
className="glass rounded-2xl p-5 text-center"
|
| 132 |
+
>
|
| 133 |
+
<p className="text-3xl sm:text-4xl font-bold text-white tabular-nums">
|
| 134 |
+
<AnimatedCounter
|
| 135 |
+
value={s.value}
|
| 136 |
+
suffix={s.suffix}
|
| 137 |
+
duration={1200 + i * 200}
|
| 138 |
+
/>
|
| 139 |
+
</p>
|
| 140 |
+
<p className="text-xs text-zinc-500 mt-2 font-medium tracking-wide uppercase">
|
| 141 |
+
{s.label}
|
| 142 |
+
</p>
|
| 143 |
+
</div>
|
| 144 |
+
))}
|
| 145 |
+
</section>
|
| 146 |
+
</FadeIn>
|
| 147 |
+
|
| 148 |
+
{/* ── Repositories ── */}
|
| 149 |
+
<section className="mb-24">
|
| 150 |
+
<FadeIn delay={0.15}>
|
| 151 |
+
<div className="flex items-center justify-between mb-6">
|
| 152 |
+
<h2 className="text-xl font-semibold text-white">
|
| 153 |
+
Repositories
|
| 154 |
+
</h2>
|
| 155 |
+
<span className="text-xs text-zinc-600 font-mono">
|
| 156 |
+
{MOCK_REPOS.length} monitored
|
| 157 |
+
</span>
|
| 158 |
+
</div>
|
| 159 |
+
</FadeIn>
|
| 160 |
+
|
| 161 |
+
<StaggerContainer className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 gap-4">
|
| 162 |
+
{MOCK_REPOS.map((repo) => (
|
| 163 |
+
<StaggerItem key={repo.full_name}>
|
| 164 |
+
<HoverCard>
|
| 165 |
+
<Link
|
| 166 |
+
href={`/repos/${repo.owner}/${repo.repo}`}
|
| 167 |
+
className={`group block glass glass-hover rounded-2xl p-6 transition-all duration-300 hover:shadow-xl ${scoreGlow(
|
| 168 |
+
repo.health_score
|
| 169 |
+
)}`}
|
| 170 |
+
>
|
| 171 |
+
<div className="flex items-start justify-between mb-5">
|
| 172 |
+
<div>
|
| 173 |
+
<p className="text-xs text-zinc-600 font-mono mb-1">
|
| 174 |
+
{repo.owner}/
|
| 175 |
+
</p>
|
| 176 |
+
<p className="text-base font-semibold text-zinc-200 group-hover:text-white transition-colors">
|
| 177 |
+
{repo.repo}
|
| 178 |
+
</p>
|
| 179 |
+
</div>
|
| 180 |
+
<div className="text-right">
|
| 181 |
+
<span
|
| 182 |
+
className={`text-3xl font-bold tabular-nums ${scoreColor(
|
| 183 |
+
repo.health_score
|
| 184 |
+
)}`}
|
| 185 |
+
>
|
| 186 |
+
{repo.health_score}
|
| 187 |
+
</span>
|
| 188 |
+
</div>
|
| 189 |
+
</div>
|
| 190 |
+
|
| 191 |
+
{/* Mini bar */}
|
| 192 |
+
<div className="w-full h-1.5 rounded-full bg-white/[0.04] mb-4 overflow-hidden">
|
| 193 |
+
<motion.div
|
| 194 |
+
initial={{ width: 0 }}
|
| 195 |
+
animate={{ width: `${repo.health_score}%` }}
|
| 196 |
+
transition={{
|
| 197 |
+
duration: 1,
|
| 198 |
+
delay: 0.5,
|
| 199 |
+
ease: [0.25, 0.46, 0.45, 0.94],
|
| 200 |
+
}}
|
| 201 |
+
className={`h-full rounded-full ${
|
| 202 |
+
repo.health_score >= 80
|
| 203 |
+
? "bg-emerald-500"
|
| 204 |
+
: repo.health_score >= 60
|
| 205 |
+
? "bg-amber-500"
|
| 206 |
+
: "bg-red-500"
|
| 207 |
+
}`}
|
| 208 |
+
/>
|
| 209 |
+
</div>
|
| 210 |
+
|
| 211 |
+
<div className="flex items-center justify-between text-xs text-zinc-500">
|
| 212 |
+
<span className="flex items-center gap-1.5">
|
| 213 |
+
<span className={`w-1.5 h-1.5 rounded-full ${scoreDot(repo.health_score)}`} />
|
| 214 |
+
{repo.open_prs} open PRs
|
| 215 |
+
</span>
|
| 216 |
+
<span>{repo.last_review}</span>
|
| 217 |
+
</div>
|
| 218 |
+
</Link>
|
| 219 |
+
</HoverCard>
|
| 220 |
+
</StaggerItem>
|
| 221 |
+
))}
|
| 222 |
+
</StaggerContainer>
|
| 223 |
+
</section>
|
| 224 |
+
|
| 225 |
+
{/* ── How It Works ── */}
|
| 226 |
+
<section className="mb-12">
|
| 227 |
+
<FadeIn>
|
| 228 |
+
<div className="text-center mb-12">
|
| 229 |
+
<h2 className="text-2xl font-bold text-white mb-3">
|
| 230 |
+
How It Works
|
| 231 |
+
</h2>
|
| 232 |
+
<p className="text-sm text-zinc-500 max-w-lg mx-auto">
|
| 233 |
+
Each PR triggers three specialised agents that run in parallel,
|
| 234 |
+
then a synthesizer merges their findings into one review.
|
| 235 |
+
</p>
|
| 236 |
+
</div>
|
| 237 |
+
</FadeIn>
|
| 238 |
+
|
| 239 |
+
{/* Pipeline visualization */}
|
| 240 |
+
<FadeIn delay={0.1}>
|
| 241 |
+
<div className="flex items-center justify-center mb-12">
|
| 242 |
+
<div className="flex items-center gap-2 text-xs font-mono text-zinc-500">
|
| 243 |
+
<span className="px-3 py-1.5 rounded-lg glass border border-white/[0.06]">
|
| 244 |
+
PR Opened
|
| 245 |
+
</span>
|
| 246 |
+
<svg className="w-4 h-4 text-zinc-700" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" /></svg>
|
| 247 |
+
<span className="px-3 py-1.5 rounded-lg glass border border-violet-500/20 text-violet-400">
|
| 248 |
+
3 Agents
|
| 249 |
+
</span>
|
| 250 |
+
<svg className="w-4 h-4 text-zinc-700" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" /></svg>
|
| 251 |
+
<span className="px-3 py-1.5 rounded-lg glass border border-cyan-500/20 text-cyan-400">
|
| 252 |
+
Synthesize
|
| 253 |
+
</span>
|
| 254 |
+
<svg className="w-4 h-4 text-zinc-700" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" /></svg>
|
| 255 |
+
<span className="px-3 py-1.5 rounded-lg glass border border-emerald-500/20 text-emerald-400">
|
| 256 |
+
Review Posted
|
| 257 |
+
</span>
|
| 258 |
+
</div>
|
| 259 |
+
</div>
|
| 260 |
+
</FadeIn>
|
| 261 |
+
|
| 262 |
+
<StaggerContainer className="grid grid-cols-1 sm:grid-cols-3 gap-5">
|
| 263 |
+
{AGENTS.map((agent) => (
|
| 264 |
+
<StaggerItem key={agent.title}>
|
| 265 |
+
<HoverCard>
|
| 266 |
+
<div
|
| 267 |
+
className={`glass rounded-2xl p-6 border ${agent.border} transition-all duration-300 h-full`}
|
| 268 |
+
>
|
| 269 |
+
<div
|
| 270 |
+
className={`w-11 h-11 rounded-xl ${agent.iconBg} flex items-center justify-center mb-4`}
|
| 271 |
+
>
|
| 272 |
+
{agent.icon}
|
| 273 |
+
</div>
|
| 274 |
+
<h3
|
| 275 |
+
className={`text-base font-semibold mb-2 ${agent.color}`}
|
| 276 |
+
>
|
| 277 |
+
{agent.title}
|
| 278 |
+
</h3>
|
| 279 |
+
<p className="text-sm text-zinc-500 leading-relaxed">
|
| 280 |
+
{agent.desc}
|
| 281 |
+
</p>
|
| 282 |
+
</div>
|
| 283 |
+
</HoverCard>
|
| 284 |
+
</StaggerItem>
|
| 285 |
+
))}
|
| 286 |
+
</StaggerContainer>
|
| 287 |
+
</section>
|
| 288 |
+
</div>
|
| 289 |
+
</div>
|
| 290 |
+
);
|
| 291 |
+
}
|
dashboard/app/repos/[owner]/[repo]/page.tsx
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import Link from "next/link";
|
| 2 |
+
import { getRepoReviews, getRepoStats } from "@/lib/api";
|
| 3 |
+
import HealthScoreRing from "@/components/HealthScoreRing";
|
| 4 |
+
import TrendChart from "@/components/TrendChart";
|
| 5 |
+
import AgentBreakdown from "@/components/AgentBreakdown";
|
| 6 |
+
import SeverityBadge from "@/components/SeverityBadge";
|
| 7 |
+
import type { Severity } from "@/lib/types";
|
| 8 |
+
|
| 9 |
+
export default async function RepoPage({
|
| 10 |
+
params,
|
| 11 |
+
}: {
|
| 12 |
+
params: Promise<{ owner: string; repo: string }>;
|
| 13 |
+
}) {
|
| 14 |
+
const { owner, repo } = await params;
|
| 15 |
+
const [reviews, stats] = await Promise.all([
|
| 16 |
+
getRepoReviews(owner, repo),
|
| 17 |
+
getRepoStats(owner, repo),
|
| 18 |
+
]);
|
| 19 |
+
|
| 20 |
+
const latestScore = reviews[0]?.health_score ?? 0;
|
| 21 |
+
const previousScore = reviews[1]?.health_score;
|
| 22 |
+
const allFindings = reviews.flatMap((r) => r.findings);
|
| 23 |
+
|
| 24 |
+
return (
|
| 25 |
+
<div className="dot-grid">
|
| 26 |
+
<div className="mx-auto max-w-7xl px-6 lg:px-8 py-10">
|
| 27 |
+
{/* ── Breadcrumb ── */}
|
| 28 |
+
<nav className="flex items-center gap-2 text-sm text-zinc-600 mb-8">
|
| 29 |
+
<Link href="/" className="hover:text-zinc-400 transition-colors">
|
| 30 |
+
Dashboard
|
| 31 |
+
</Link>
|
| 32 |
+
<svg className="w-3.5 h-3.5 text-zinc-700" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" /></svg>
|
| 33 |
+
<span className="text-zinc-400 font-medium">
|
| 34 |
+
{owner}/{repo}
|
| 35 |
+
</span>
|
| 36 |
+
</nav>
|
| 37 |
+
|
| 38 |
+
{/* ── Header ── */}
|
| 39 |
+
<div className="flex flex-col sm:flex-row sm:items-end sm:justify-between gap-6 mb-12">
|
| 40 |
+
<div>
|
| 41 |
+
<p className="text-xs text-zinc-600 font-mono mb-1">{owner}/</p>
|
| 42 |
+
<h1 className="text-3xl font-bold text-white">{repo}</h1>
|
| 43 |
+
</div>
|
| 44 |
+
<div className="flex items-center gap-8 text-sm">
|
| 45 |
+
{[
|
| 46 |
+
{ label: "Reviews", value: stats.total_reviews },
|
| 47 |
+
{ label: "Findings", value: stats.total_findings },
|
| 48 |
+
{ label: "Avg Score", value: `${stats.average_health_score}%` },
|
| 49 |
+
].map((s) => (
|
| 50 |
+
<div key={s.label} className="text-center">
|
| 51 |
+
<p className="text-2xl font-bold text-white tabular-nums">
|
| 52 |
+
{s.value}
|
| 53 |
+
</p>
|
| 54 |
+
<p className="text-[10px] text-zinc-600 uppercase tracking-wider mt-0.5">
|
| 55 |
+
{s.label}
|
| 56 |
+
</p>
|
| 57 |
+
</div>
|
| 58 |
+
))}
|
| 59 |
+
</div>
|
| 60 |
+
</div>
|
| 61 |
+
|
| 62 |
+
{/* ── Score + Trend ── */}
|
| 63 |
+
<div className="grid grid-cols-1 lg:grid-cols-[200px_1fr] gap-8 mb-12">
|
| 64 |
+
<div className="flex items-center justify-center">
|
| 65 |
+
<HealthScoreRing
|
| 66 |
+
score={latestScore}
|
| 67 |
+
previousScore={previousScore}
|
| 68 |
+
label="Latest Score"
|
| 69 |
+
/>
|
| 70 |
+
</div>
|
| 71 |
+
<TrendChart scores={stats.recent_scores} />
|
| 72 |
+
</div>
|
| 73 |
+
|
| 74 |
+
{/* ── Agent Breakdown ── */}
|
| 75 |
+
<section className="mb-12">
|
| 76 |
+
<h2 className="text-sm font-semibold text-zinc-400 mb-4 uppercase tracking-wider">
|
| 77 |
+
Agent Breakdown
|
| 78 |
+
</h2>
|
| 79 |
+
<AgentBreakdown findings={allFindings} />
|
| 80 |
+
</section>
|
| 81 |
+
|
| 82 |
+
{/* ── PR Reviews Table ── */}
|
| 83 |
+
<section>
|
| 84 |
+
<h2 className="text-sm font-semibold text-zinc-400 mb-4 uppercase tracking-wider">
|
| 85 |
+
Recent PR Reviews
|
| 86 |
+
</h2>
|
| 87 |
+
<div className="overflow-x-auto glass rounded-2xl">
|
| 88 |
+
<table className="w-full text-sm text-left">
|
| 89 |
+
<thead>
|
| 90 |
+
<tr className="border-b border-white/[0.04] text-zinc-500 text-[11px] uppercase tracking-wider">
|
| 91 |
+
<th className="px-5 py-3.5 font-medium">PR</th>
|
| 92 |
+
<th className="px-5 py-3.5 font-medium">Score</th>
|
| 93 |
+
<th className="px-5 py-3.5 font-medium">Critical</th>
|
| 94 |
+
<th className="px-5 py-3.5 font-medium">High</th>
|
| 95 |
+
<th className="px-5 py-3.5 font-medium">Medium</th>
|
| 96 |
+
<th className="px-5 py-3.5 font-medium">Low</th>
|
| 97 |
+
<th className="px-5 py-3.5 font-medium">Summary</th>
|
| 98 |
+
<th className="px-5 py-3.5 font-medium">Duration</th>
|
| 99 |
+
</tr>
|
| 100 |
+
</thead>
|
| 101 |
+
<tbody>
|
| 102 |
+
{reviews.map((r) => {
|
| 103 |
+
const scoreClass =
|
| 104 |
+
r.health_score >= 80
|
| 105 |
+
? "text-emerald-400"
|
| 106 |
+
: r.health_score >= 60
|
| 107 |
+
? "text-amber-400"
|
| 108 |
+
: "text-red-400";
|
| 109 |
+
|
| 110 |
+
return (
|
| 111 |
+
<tr
|
| 112 |
+
key={r.id}
|
| 113 |
+
className="border-b border-white/[0.03] hover:bg-white/[0.02] transition-colors"
|
| 114 |
+
>
|
| 115 |
+
<td className="px-5 py-3.5">
|
| 116 |
+
<Link
|
| 117 |
+
href={`/repos/${owner}/${repo}/prs/${r.pr_number}`}
|
| 118 |
+
className="text-violet-400 hover:text-violet-300 font-medium transition-colors"
|
| 119 |
+
>
|
| 120 |
+
#{r.pr_number}
|
| 121 |
+
</Link>
|
| 122 |
+
</td>
|
| 123 |
+
<td className={`px-5 py-3.5 font-bold tabular-nums ${scoreClass}`}>
|
| 124 |
+
{r.health_score}
|
| 125 |
+
</td>
|
| 126 |
+
<td className="px-5 py-3.5">
|
| 127 |
+
{r.critical_count > 0 ? (
|
| 128 |
+
<SeverityBadge severity={"critical" as Severity} />
|
| 129 |
+
) : (
|
| 130 |
+
<span className="text-zinc-700">0</span>
|
| 131 |
+
)}
|
| 132 |
+
</td>
|
| 133 |
+
<td className="px-5 py-3.5">
|
| 134 |
+
{r.high_count > 0 ? (
|
| 135 |
+
<span className="text-orange-400 font-medium tabular-nums">
|
| 136 |
+
{r.high_count}
|
| 137 |
+
</span>
|
| 138 |
+
) : (
|
| 139 |
+
<span className="text-zinc-700">0</span>
|
| 140 |
+
)}
|
| 141 |
+
</td>
|
| 142 |
+
<td className="px-5 py-3.5">
|
| 143 |
+
{r.medium_count > 0 ? (
|
| 144 |
+
<span className="text-amber-400 tabular-nums">
|
| 145 |
+
{r.medium_count}
|
| 146 |
+
</span>
|
| 147 |
+
) : (
|
| 148 |
+
<span className="text-zinc-700">0</span>
|
| 149 |
+
)}
|
| 150 |
+
</td>
|
| 151 |
+
<td className="px-5 py-3.5 text-zinc-600 tabular-nums">
|
| 152 |
+
{r.low_count}
|
| 153 |
+
</td>
|
| 154 |
+
<td className="px-5 py-3.5 text-zinc-500 truncate max-w-[240px] text-xs">
|
| 155 |
+
{r.summary}
|
| 156 |
+
</td>
|
| 157 |
+
<td className="px-5 py-3.5 text-zinc-600 tabular-nums text-xs font-mono">
|
| 158 |
+
{(r.duration_ms / 1000).toFixed(1)}s
|
| 159 |
+
</td>
|
| 160 |
+
</tr>
|
| 161 |
+
);
|
| 162 |
+
})}
|
| 163 |
+
</tbody>
|
| 164 |
+
</table>
|
| 165 |
+
</div>
|
| 166 |
+
</section>
|
| 167 |
+
</div>
|
| 168 |
+
</div>
|
| 169 |
+
);
|
| 170 |
+
}
|
dashboard/app/repos/[owner]/[repo]/prs/[number]/page.tsx
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import Link from "next/link";
|
| 2 |
+
import { getReviewDetail } from "@/lib/api";
|
| 3 |
+
import HealthScoreRing from "@/components/HealthScoreRing";
|
| 4 |
+
import FindingsTable from "@/components/FindingsTable";
|
| 5 |
+
import AgentBreakdown from "@/components/AgentBreakdown";
|
| 6 |
+
import type { Recommendation } from "@/lib/types";
|
| 7 |
+
|
| 8 |
+
const RECOMMENDATION_STYLE: Record<
|
| 9 |
+
Recommendation,
|
| 10 |
+
{ bg: string; text: string; label: string; dot: string }
|
| 11 |
+
> = {
|
| 12 |
+
approve: {
|
| 13 |
+
bg: "bg-emerald-500/10",
|
| 14 |
+
text: "text-emerald-400",
|
| 15 |
+
label: "Approve",
|
| 16 |
+
dot: "bg-emerald-400",
|
| 17 |
+
},
|
| 18 |
+
request_changes: {
|
| 19 |
+
bg: "bg-amber-500/10",
|
| 20 |
+
text: "text-amber-400",
|
| 21 |
+
label: "Request Changes",
|
| 22 |
+
dot: "bg-amber-400",
|
| 23 |
+
},
|
| 24 |
+
block: {
|
| 25 |
+
bg: "bg-red-500/10",
|
| 26 |
+
text: "text-red-400",
|
| 27 |
+
label: "Block",
|
| 28 |
+
dot: "bg-red-400",
|
| 29 |
+
},
|
| 30 |
+
};
|
| 31 |
+
|
| 32 |
+
export default async function PRReviewPage({
|
| 33 |
+
params,
|
| 34 |
+
}: {
|
| 35 |
+
params: Promise<{ owner: string; repo: string; number: string }>;
|
| 36 |
+
}) {
|
| 37 |
+
const { owner, repo, number: prNum } = await params;
|
| 38 |
+
const prNumber = parseInt(prNum, 10);
|
| 39 |
+
const { review, record } = await getReviewDetail(owner, repo, prNumber);
|
| 40 |
+
|
| 41 |
+
const rec = RECOMMENDATION_STYLE[review.recommendation];
|
| 42 |
+
|
| 43 |
+
return (
|
| 44 |
+
<div className="dot-grid">
|
| 45 |
+
<div className="mx-auto max-w-7xl px-6 lg:px-8 py-10">
|
| 46 |
+
{/* ── Breadcrumb ── */}
|
| 47 |
+
<nav className="flex items-center gap-2 text-sm text-zinc-600 mb-8">
|
| 48 |
+
<Link href="/" className="hover:text-zinc-400 transition-colors">
|
| 49 |
+
Dashboard
|
| 50 |
+
</Link>
|
| 51 |
+
<svg className="w-3.5 h-3.5 text-zinc-700" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" /></svg>
|
| 52 |
+
<Link
|
| 53 |
+
href={`/repos/${owner}/${repo}`}
|
| 54 |
+
className="hover:text-zinc-400 transition-colors"
|
| 55 |
+
>
|
| 56 |
+
{owner}/{repo}
|
| 57 |
+
</Link>
|
| 58 |
+
<svg className="w-3.5 h-3.5 text-zinc-700" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" /></svg>
|
| 59 |
+
<span className="text-zinc-400 font-medium">PR #{prNumber}</span>
|
| 60 |
+
</nav>
|
| 61 |
+
|
| 62 |
+
{/* ── Header ── */}
|
| 63 |
+
<div className="flex flex-col sm:flex-row sm:items-start sm:justify-between gap-6 mb-12">
|
| 64 |
+
<div>
|
| 65 |
+
<p className="text-xs text-zinc-600 font-mono mb-1">
|
| 66 |
+
{owner}/{repo}
|
| 67 |
+
</p>
|
| 68 |
+
<h1 className="text-3xl font-bold text-white mb-4">
|
| 69 |
+
Pull Request #{prNumber}
|
| 70 |
+
</h1>
|
| 71 |
+
<div className="flex items-center gap-3">
|
| 72 |
+
<span
|
| 73 |
+
className={`inline-flex items-center gap-1.5 rounded-full px-3 py-1 text-xs font-semibold ${rec.bg} ${rec.text}`}
|
| 74 |
+
>
|
| 75 |
+
<span className={`w-1.5 h-1.5 rounded-full ${rec.dot}`} />
|
| 76 |
+
{rec.label}
|
| 77 |
+
</span>
|
| 78 |
+
<span className="text-[11px] text-zinc-600 font-mono">
|
| 79 |
+
{record.commit_sha}
|
| 80 |
+
</span>
|
| 81 |
+
<span className="text-[11px] text-zinc-700 font-mono">
|
| 82 |
+
{(record.duration_ms / 1000).toFixed(1)}s
|
| 83 |
+
</span>
|
| 84 |
+
</div>
|
| 85 |
+
</div>
|
| 86 |
+
<HealthScoreRing
|
| 87 |
+
score={review.health_score}
|
| 88 |
+
size={140}
|
| 89 |
+
label="Health Score"
|
| 90 |
+
/>
|
| 91 |
+
</div>
|
| 92 |
+
|
| 93 |
+
{/* ── Executive Summary ── */}
|
| 94 |
+
<section className="glass rounded-2xl p-6 mb-8">
|
| 95 |
+
<h2 className="text-[10px] text-zinc-600 uppercase tracking-widest font-medium mb-3">
|
| 96 |
+
Executive Summary
|
| 97 |
+
</h2>
|
| 98 |
+
<p className="text-zinc-300 leading-relaxed text-[15px]">
|
| 99 |
+
{review.executive_summary}
|
| 100 |
+
</p>
|
| 101 |
+
</section>
|
| 102 |
+
|
| 103 |
+
{/* ── Severity Counts ── */}
|
| 104 |
+
<div className="grid grid-cols-2 sm:grid-cols-4 gap-4 mb-8">
|
| 105 |
+
{[
|
| 106 |
+
{
|
| 107 |
+
label: "Critical",
|
| 108 |
+
count: review.critical_count,
|
| 109 |
+
color: "text-red-400",
|
| 110 |
+
border: "border-red-500/[0.08]",
|
| 111 |
+
dot: "bg-red-400",
|
| 112 |
+
},
|
| 113 |
+
{
|
| 114 |
+
label: "High",
|
| 115 |
+
count: review.high_count,
|
| 116 |
+
color: "text-orange-400",
|
| 117 |
+
border: "border-orange-500/[0.08]",
|
| 118 |
+
dot: "bg-orange-400",
|
| 119 |
+
},
|
| 120 |
+
{
|
| 121 |
+
label: "Medium",
|
| 122 |
+
count: review.medium_count,
|
| 123 |
+
color: "text-amber-400",
|
| 124 |
+
border: "border-amber-500/[0.08]",
|
| 125 |
+
dot: "bg-amber-400",
|
| 126 |
+
},
|
| 127 |
+
{
|
| 128 |
+
label: "Low",
|
| 129 |
+
count: review.low_count,
|
| 130 |
+
color: "text-zinc-400",
|
| 131 |
+
border: "border-zinc-700/30",
|
| 132 |
+
dot: "bg-zinc-500",
|
| 133 |
+
},
|
| 134 |
+
].map((s) => (
|
| 135 |
+
<div
|
| 136 |
+
key={s.label}
|
| 137 |
+
className={`glass rounded-2xl border ${s.border} p-5 text-center`}
|
| 138 |
+
>
|
| 139 |
+
<p className={`text-3xl font-bold tabular-nums ${s.color}`}>
|
| 140 |
+
{s.count}
|
| 141 |
+
</p>
|
| 142 |
+
<p className="text-[10px] text-zinc-600 mt-1 uppercase tracking-wider flex items-center justify-center gap-1.5">
|
| 143 |
+
<span className={`w-1.5 h-1.5 rounded-full ${s.dot}`} />
|
| 144 |
+
{s.label}
|
| 145 |
+
</p>
|
| 146 |
+
</div>
|
| 147 |
+
))}
|
| 148 |
+
</div>
|
| 149 |
+
|
| 150 |
+
{/* ── Agent Breakdown ── */}
|
| 151 |
+
<section className="mb-8">
|
| 152 |
+
<h2 className="text-sm font-semibold text-zinc-400 mb-4 uppercase tracking-wider">
|
| 153 |
+
Agent Breakdown
|
| 154 |
+
</h2>
|
| 155 |
+
<AgentBreakdown findings={review.findings} />
|
| 156 |
+
</section>
|
| 157 |
+
|
| 158 |
+
{/* ── Findings ── */}
|
| 159 |
+
<section>
|
| 160 |
+
<h2 className="text-sm font-semibold text-zinc-400 mb-4 uppercase tracking-wider">
|
| 161 |
+
All Findings ({review.findings.length})
|
| 162 |
+
</h2>
|
| 163 |
+
<FindingsTable findings={review.findings} />
|
| 164 |
+
</section>
|
| 165 |
+
</div>
|
| 166 |
+
</div>
|
| 167 |
+
);
|
| 168 |
+
}
|
dashboard/components/AgentBreakdown.tsx
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { motion } from "framer-motion";
|
| 4 |
+
import type { Finding, AgentKind } from "@/lib/types";
|
| 5 |
+
|
| 6 |
+
interface AgentBreakdownProps {
|
| 7 |
+
findings: Finding[];
|
| 8 |
+
}
|
| 9 |
+
|
| 10 |
+
const AGENT_META: Record<
|
| 11 |
+
AgentKind,
|
| 12 |
+
{
|
| 13 |
+
icon: React.ReactNode;
|
| 14 |
+
label: string;
|
| 15 |
+
color: string;
|
| 16 |
+
iconBg: string;
|
| 17 |
+
border: string;
|
| 18 |
+
}
|
| 19 |
+
> = {
|
| 20 |
+
security: {
|
| 21 |
+
icon: (
|
| 22 |
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-5 h-5">
|
| 23 |
+
<path fillRule="evenodd" d="M12.516 2.17a.75.75 0 00-1.032 0 11.209 11.209 0 01-7.877 3.08.75.75 0 00-.722.515A12.74 12.74 0 002.25 9.75c0 5.942 4.064 10.933 9.563 12.348a.749.749 0 00.374 0c5.499-1.415 9.563-6.406 9.563-12.348 0-1.39-.223-2.73-.635-3.985a.75.75 0 00-.722-.516 11.209 11.209 0 01-7.877-3.08z" clipRule="evenodd" />
|
| 24 |
+
</svg>
|
| 25 |
+
),
|
| 26 |
+
label: "Security",
|
| 27 |
+
color: "text-red-400",
|
| 28 |
+
iconBg: "bg-red-500/10 text-red-400",
|
| 29 |
+
border: "border-red-500/[0.08]",
|
| 30 |
+
},
|
| 31 |
+
performance: {
|
| 32 |
+
icon: (
|
| 33 |
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-5 h-5">
|
| 34 |
+
<path fillRule="evenodd" d="M14.615 1.595a.75.75 0 01.359.852L12.982 9.75h7.268a.75.75 0 01.548 1.262l-10.5 11.25a.75.75 0 01-1.272-.71l1.992-7.302H3.75a.75.75 0 01-.548-1.262l10.5-11.25a.75.75 0 01.913-.143z" clipRule="evenodd" />
|
| 35 |
+
</svg>
|
| 36 |
+
),
|
| 37 |
+
label: "Performance",
|
| 38 |
+
color: "text-amber-400",
|
| 39 |
+
iconBg: "bg-amber-500/10 text-amber-400",
|
| 40 |
+
border: "border-amber-500/[0.08]",
|
| 41 |
+
},
|
| 42 |
+
style: {
|
| 43 |
+
icon: (
|
| 44 |
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-5 h-5">
|
| 45 |
+
<path d="M11.7 2.805a.75.75 0 01.6 0A60.65 60.65 0 0122.83 8.72a.75.75 0 01-.231 1.337 49.949 49.949 0 00-9.902 3.912l-.003.002-.34.18a.75.75 0 01-.707 0A50.009 50.009 0 007.5 12.174v-.224c0-.131.067-.248.172-.311a54.614 54.614 0 014.653-2.52.75.75 0 00-.65-1.352 56.129 56.129 0 00-4.78 2.589 1.858 1.858 0 00-.859 1.228 49.803 49.803 0 00-4.634-1.527.75.75 0 01-.231-1.337A60.653 60.653 0 0111.7 2.805z" />
|
| 46 |
+
<path d="M13.06 15.473a48.45 48.45 0 017.666-3.282c.134 1.414.22 2.843.255 4.285a.75.75 0 01-.46.71 47.878 47.878 0 00-8.105 4.342.75.75 0 01-.832 0 47.877 47.877 0 00-8.104-4.342.75.75 0 01-.461-.71c.035-1.442.121-2.87.255-4.286A48.4 48.4 0 016 13.18v1.27a1.5 1.5 0 00-.14 2.508c-.09.38-.222.753-.397 1.11.452.213.901.434 1.346.661a6.729 6.729 0 00.551-1.608 1.5 1.5 0 00.14-2.67v-.645a48.549 48.549 0 013.44 1.668 2.25 2.25 0 002.12 0z" />
|
| 47 |
+
<path d="M4.462 19.462c.42-.419.753-.89 1-1.394.453.213.902.434 1.347.661a6.743 6.743 0 01-1.286 1.794.75.75 0 11-1.06-1.06z" />
|
| 48 |
+
</svg>
|
| 49 |
+
),
|
| 50 |
+
label: "Style",
|
| 51 |
+
color: "text-cyan-400",
|
| 52 |
+
iconBg: "bg-cyan-500/10 text-cyan-400",
|
| 53 |
+
border: "border-cyan-500/[0.08]",
|
| 54 |
+
},
|
| 55 |
+
};
|
| 56 |
+
|
| 57 |
+
export default function AgentBreakdown({ findings }: AgentBreakdownProps) {
|
| 58 |
+
const agents: AgentKind[] = ["security", "performance", "style"];
|
| 59 |
+
|
| 60 |
+
const stats = agents.map((agent) => {
|
| 61 |
+
const agentFindings = findings.filter((f) => f.agent === agent);
|
| 62 |
+
const catCounts: Record<string, number> = {};
|
| 63 |
+
agentFindings.forEach((f) => {
|
| 64 |
+
catCounts[f.category] = (catCounts[f.category] ?? 0) + 1;
|
| 65 |
+
});
|
| 66 |
+
const topCategory =
|
| 67 |
+
Object.entries(catCounts).sort((a, b) => b[1] - a[1])[0]?.[0] ?? "—";
|
| 68 |
+
return {
|
| 69 |
+
agent,
|
| 70 |
+
count: agentFindings.length,
|
| 71 |
+
topCategory,
|
| 72 |
+
meta: AGENT_META[agent],
|
| 73 |
+
};
|
| 74 |
+
});
|
| 75 |
+
|
| 76 |
+
return (
|
| 77 |
+
<div className="grid grid-cols-1 sm:grid-cols-3 gap-4">
|
| 78 |
+
{stats.map(({ agent, count, topCategory, meta }, i) => (
|
| 79 |
+
<motion.div
|
| 80 |
+
key={agent}
|
| 81 |
+
initial={{ opacity: 0, y: 16 }}
|
| 82 |
+
animate={{ opacity: 1, y: 0 }}
|
| 83 |
+
transition={{ duration: 0.4, delay: i * 0.08 }}
|
| 84 |
+
whileHover={{ y: -2, transition: { duration: 0.15 } }}
|
| 85 |
+
className={`glass rounded-2xl p-5 border ${meta.border} transition-colors duration-300`}
|
| 86 |
+
>
|
| 87 |
+
<div className="flex items-center gap-3 mb-4">
|
| 88 |
+
<div
|
| 89 |
+
className={`w-9 h-9 rounded-xl ${meta.iconBg} flex items-center justify-center`}
|
| 90 |
+
>
|
| 91 |
+
{meta.icon}
|
| 92 |
+
</div>
|
| 93 |
+
<h3 className={`text-sm font-semibold ${meta.color}`}>
|
| 94 |
+
{meta.label}
|
| 95 |
+
</h3>
|
| 96 |
+
</div>
|
| 97 |
+
<p className="text-3xl font-bold text-white tabular-nums">{count}</p>
|
| 98 |
+
<p className="text-[11px] text-zinc-600 mt-0.5 uppercase tracking-wider">
|
| 99 |
+
findings
|
| 100 |
+
</p>
|
| 101 |
+
<div className="mt-4 pt-3 border-t border-white/[0.04]">
|
| 102 |
+
<p className="text-[10px] text-zinc-600 uppercase tracking-wider">
|
| 103 |
+
Top category
|
| 104 |
+
</p>
|
| 105 |
+
<p className="text-xs text-zinc-400 font-medium truncate mt-0.5">
|
| 106 |
+
{topCategory}
|
| 107 |
+
</p>
|
| 108 |
+
</div>
|
| 109 |
+
</motion.div>
|
| 110 |
+
))}
|
| 111 |
+
</div>
|
| 112 |
+
);
|
| 113 |
+
}
|
dashboard/components/AnimatedCounter.tsx
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useEffect, useRef, useState } from "react";
|
| 4 |
+
|
| 5 |
+
interface AnimatedCounterProps {
|
| 6 |
+
value: number;
|
| 7 |
+
suffix?: string;
|
| 8 |
+
duration?: number;
|
| 9 |
+
className?: string;
|
| 10 |
+
}
|
| 11 |
+
|
| 12 |
+
export function AnimatedCounter({
|
| 13 |
+
value,
|
| 14 |
+
suffix = "",
|
| 15 |
+
duration = 1200,
|
| 16 |
+
className,
|
| 17 |
+
}: AnimatedCounterProps) {
|
| 18 |
+
const [display, setDisplay] = useState(0);
|
| 19 |
+
const ref = useRef<HTMLSpanElement>(null);
|
| 20 |
+
const hasAnimated = useRef(false);
|
| 21 |
+
|
| 22 |
+
useEffect(() => {
|
| 23 |
+
if (hasAnimated.current) return;
|
| 24 |
+
hasAnimated.current = true;
|
| 25 |
+
|
| 26 |
+
const start = performance.now();
|
| 27 |
+
function tick(now: number) {
|
| 28 |
+
const elapsed = now - start;
|
| 29 |
+
const progress = Math.min(elapsed / duration, 1);
|
| 30 |
+
// ease-out expo
|
| 31 |
+
const ease = progress === 1 ? 1 : 1 - Math.pow(2, -10 * progress);
|
| 32 |
+
setDisplay(Math.round(ease * value));
|
| 33 |
+
if (progress < 1) requestAnimationFrame(tick);
|
| 34 |
+
}
|
| 35 |
+
requestAnimationFrame(tick);
|
| 36 |
+
}, [value, duration]);
|
| 37 |
+
|
| 38 |
+
return (
|
| 39 |
+
<span ref={ref} className={className}>
|
| 40 |
+
{display}
|
| 41 |
+
{suffix}
|
| 42 |
+
</span>
|
| 43 |
+
);
|
| 44 |
+
}
|
dashboard/components/FindingsTable.tsx
ADDED
|
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"use client";
|
| 2 |
+
|
| 3 |
+
import { useState, useMemo } from "react";
|
| 4 |
+
import { motion, AnimatePresence } from "framer-motion";
|
| 5 |
+
import type { Finding, Severity } from "@/lib/types";
|
| 6 |
+
import SeverityBadge from "./SeverityBadge";
|
| 7 |
+
|
| 8 |
+
const AGENT_ICON: Record<string, React.ReactNode> = {
|
| 9 |
+
security: (
|
| 10 |
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-4 h-4 text-red-400">
|
| 11 |
+
<path fillRule="evenodd" d="M12.516 2.17a.75.75 0 00-1.032 0 11.209 11.209 0 01-7.877 3.08.75.75 0 00-.722.515A12.74 12.74 0 002.25 9.75c0 5.942 4.064 10.933 9.563 12.348a.749.749 0 00.374 0c5.499-1.415 9.563-6.406 9.563-12.348 0-1.39-.223-2.73-.635-3.985a.75.75 0 00-.722-.516 11.209 11.209 0 01-7.877-3.08z" clipRule="evenodd" />
|
| 12 |
+
</svg>
|
| 13 |
+
),
|
| 14 |
+
performance: (
|
| 15 |
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-4 h-4 text-amber-400">
|
| 16 |
+
<path fillRule="evenodd" d="M14.615 1.595a.75.75 0 01.359.852L12.982 9.75h7.268a.75.75 0 01.548 1.262l-10.5 11.25a.75.75 0 01-1.272-.71l1.992-7.302H3.75a.75.75 0 01-.548-1.262l10.5-11.25a.75.75 0 01.913-.143z" clipRule="evenodd" />
|
| 17 |
+
</svg>
|
| 18 |
+
),
|
| 19 |
+
style: (
|
| 20 |
+
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-4 h-4 text-cyan-400">
|
| 21 |
+
<path d="M11.7 2.805a.75.75 0 01.6 0A60.65 60.65 0 0122.83 8.72a.75.75 0 01-.231 1.337 49.949 49.949 0 00-9.902 3.912l-.003.002-.34.18a.75.75 0 01-.707 0A50.009 50.009 0 007.5 12.174v-.224c0-.131.067-.248.172-.311a54.614 54.614 0 014.653-2.52.75.75 0 00-.65-1.352 56.129 56.129 0 00-4.78 2.589 1.858 1.858 0 00-.859 1.228 49.803 49.803 0 00-4.634-1.527.75.75 0 01-.231-1.337A60.653 60.653 0 0111.7 2.805z" />
|
| 22 |
+
<path d="M13.06 15.473a48.45 48.45 0 017.666-3.282c.134 1.414.22 2.843.255 4.285a.75.75 0 01-.46.71 47.878 47.878 0 00-8.105 4.342.75.75 0 01-.832 0 47.877 47.877 0 00-8.104-4.342.75.75 0 01-.461-.71c.035-1.442.121-2.87.255-4.286z" />
|
| 23 |
+
</svg>
|
| 24 |
+
),
|
| 25 |
+
};
|
| 26 |
+
|
| 27 |
+
const SEVERITY_ORDER: Record<Severity, number> = {
|
| 28 |
+
critical: 0,
|
| 29 |
+
high: 1,
|
| 30 |
+
medium: 2,
|
| 31 |
+
low: 3,
|
| 32 |
+
};
|
| 33 |
+
|
| 34 |
+
type SortKey = "severity" | "agent" | "file_path" | "category" | "title";
|
| 35 |
+
|
| 36 |
+
export default function FindingsTable({
|
| 37 |
+
findings,
|
| 38 |
+
}: {
|
| 39 |
+
findings: Finding[];
|
| 40 |
+
}) {
|
| 41 |
+
const [sortKey, setSortKey] = useState<SortKey>("severity");
|
| 42 |
+
const [sortAsc, setSortAsc] = useState(true);
|
| 43 |
+
const [expandedIdx, setExpandedIdx] = useState<number | null>(null);
|
| 44 |
+
|
| 45 |
+
const sorted = useMemo(() => {
|
| 46 |
+
const copy = [...findings];
|
| 47 |
+
copy.sort((a, b) => {
|
| 48 |
+
let cmp = 0;
|
| 49 |
+
if (sortKey === "severity") {
|
| 50 |
+
cmp = SEVERITY_ORDER[a.severity] - SEVERITY_ORDER[b.severity];
|
| 51 |
+
} else {
|
| 52 |
+
cmp = (a[sortKey] as string).localeCompare(b[sortKey] as string);
|
| 53 |
+
}
|
| 54 |
+
return sortAsc ? cmp : -cmp;
|
| 55 |
+
});
|
| 56 |
+
return copy;
|
| 57 |
+
}, [findings, sortKey, sortAsc]);
|
| 58 |
+
|
| 59 |
+
function handleSort(key: SortKey) {
|
| 60 |
+
if (key === sortKey) setSortAsc((v) => !v);
|
| 61 |
+
else {
|
| 62 |
+
setSortKey(key);
|
| 63 |
+
setSortAsc(true);
|
| 64 |
+
}
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
const arrow = (key: SortKey) =>
|
| 68 |
+
sortKey === key ? (sortAsc ? " \u25B2" : " \u25BC") : "";
|
| 69 |
+
|
| 70 |
+
return (
|
| 71 |
+
<motion.div
|
| 72 |
+
initial={{ opacity: 0, y: 12 }}
|
| 73 |
+
animate={{ opacity: 1, y: 0 }}
|
| 74 |
+
transition={{ duration: 0.4, delay: 0.1 }}
|
| 75 |
+
className="overflow-x-auto glass rounded-2xl"
|
| 76 |
+
>
|
| 77 |
+
<table className="w-full text-sm text-left">
|
| 78 |
+
<thead>
|
| 79 |
+
<tr className="border-b border-white/[0.04] text-zinc-500 text-[11px] uppercase tracking-wider">
|
| 80 |
+
{(
|
| 81 |
+
[
|
| 82 |
+
["severity", "Severity"],
|
| 83 |
+
["agent", "Agent"],
|
| 84 |
+
["file_path", "File"],
|
| 85 |
+
["category", "Category"],
|
| 86 |
+
["title", "Title"],
|
| 87 |
+
] as [SortKey, string][]
|
| 88 |
+
).map(([key, label]) => (
|
| 89 |
+
<th
|
| 90 |
+
key={key}
|
| 91 |
+
onClick={() => handleSort(key)}
|
| 92 |
+
className="px-4 py-3.5 cursor-pointer select-none hover:text-zinc-300 transition-colors font-medium"
|
| 93 |
+
>
|
| 94 |
+
{label}
|
| 95 |
+
<span className="text-violet-400/70">{arrow(key)}</span>
|
| 96 |
+
</th>
|
| 97 |
+
))}
|
| 98 |
+
</tr>
|
| 99 |
+
</thead>
|
| 100 |
+
<tbody>
|
| 101 |
+
{sorted.map((f, i) => {
|
| 102 |
+
const isExpanded = expandedIdx === i;
|
| 103 |
+
return (
|
| 104 |
+
<tr key={i} className="group">
|
| 105 |
+
<td colSpan={5} className="p-0">
|
| 106 |
+
<button
|
| 107 |
+
onClick={() => setExpandedIdx(isExpanded ? null : i)}
|
| 108 |
+
className="w-full grid grid-cols-[100px_50px_1fr_130px_1fr] items-center text-left px-4 py-3 border-b border-white/[0.03] hover:bg-white/[0.02] transition-colors cursor-pointer"
|
| 109 |
+
>
|
| 110 |
+
<span>
|
| 111 |
+
<SeverityBadge severity={f.severity} />
|
| 112 |
+
</span>
|
| 113 |
+
<span title={f.agent}>
|
| 114 |
+
{AGENT_ICON[f.agent] ?? f.agent}
|
| 115 |
+
</span>
|
| 116 |
+
<span className="font-mono text-zinc-400 text-xs truncate pr-2">
|
| 117 |
+
{f.file_path}
|
| 118 |
+
<span className="text-zinc-700 ml-1">
|
| 119 |
+
:{f.line_start}
|
| 120 |
+
</span>
|
| 121 |
+
</span>
|
| 122 |
+
<span className="text-zinc-500 text-xs">{f.category}</span>
|
| 123 |
+
<span className="text-zinc-300 text-xs truncate">
|
| 124 |
+
{f.title}
|
| 125 |
+
</span>
|
| 126 |
+
</button>
|
| 127 |
+
|
| 128 |
+
<AnimatePresence>
|
| 129 |
+
{isExpanded && (
|
| 130 |
+
<motion.div
|
| 131 |
+
initial={{ height: 0, opacity: 0 }}
|
| 132 |
+
animate={{ height: "auto", opacity: 1 }}
|
| 133 |
+
exit={{ height: 0, opacity: 0 }}
|
| 134 |
+
transition={{ duration: 0.25, ease: "easeInOut" }}
|
| 135 |
+
className="overflow-hidden"
|
| 136 |
+
>
|
| 137 |
+
<div className="bg-white/[0.01] border-b border-white/[0.04] px-6 py-5 space-y-4">
|
| 138 |
+
<div>
|
| 139 |
+
<h4 className="text-[10px] text-zinc-600 uppercase tracking-widest mb-1.5 font-medium">
|
| 140 |
+
Description
|
| 141 |
+
</h4>
|
| 142 |
+
<p className="text-zinc-300 text-sm leading-relaxed">
|
| 143 |
+
{f.description}
|
| 144 |
+
</p>
|
| 145 |
+
</div>
|
| 146 |
+
{f.suggested_fix && (
|
| 147 |
+
<div>
|
| 148 |
+
<h4 className="text-[10px] text-zinc-600 uppercase tracking-widest mb-1.5 font-medium">
|
| 149 |
+
Suggested Fix
|
| 150 |
+
</h4>
|
| 151 |
+
<pre className="text-emerald-400/90 text-xs bg-emerald-500/[0.04] border border-emerald-500/10 rounded-xl px-4 py-3 overflow-x-auto whitespace-pre-wrap font-mono">
|
| 152 |
+
{f.suggested_fix}
|
| 153 |
+
</pre>
|
| 154 |
+
</div>
|
| 155 |
+
)}
|
| 156 |
+
<div className="flex gap-5 text-[11px] text-zinc-600 pt-1">
|
| 157 |
+
{f.cwe_id && (
|
| 158 |
+
<span className="font-mono">{f.cwe_id}</span>
|
| 159 |
+
)}
|
| 160 |
+
<span>
|
| 161 |
+
Confidence:{" "}
|
| 162 |
+
<span className="text-zinc-400">
|
| 163 |
+
{(f.confidence * 100).toFixed(0)}%
|
| 164 |
+
</span>
|
| 165 |
+
</span>
|
| 166 |
+
<span>
|
| 167 |
+
Lines{" "}
|
| 168 |
+
<span className="text-zinc-400 font-mono">
|
| 169 |
+
{f.line_start}–{f.line_end}
|
| 170 |
+
</span>
|
| 171 |
+
</span>
|
| 172 |
+
</div>
|
| 173 |
+
</div>
|
| 174 |
+
</motion.div>
|
| 175 |
+
)}
|
| 176 |
+
</AnimatePresence>
|
| 177 |
+
</td>
|
| 178 |
+
</tr>
|
| 179 |
+
);
|
| 180 |
+
})}
|
| 181 |
+
</tbody>
|
| 182 |
+
</table>
|
| 183 |
+
</motion.div>
|
| 184 |
+
);
|
| 185 |
+
}
|