NinjainPJs commited on
Commit
4b445f6
·
0 Parent(s):

initial - commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .env.example +27 -0
  2. .github/workflows/ci.yml +31 -0
  3. .github/workflows/prewarm.yml +14 -0
  4. .gitignore +67 -0
  5. PROJECT_PLAN.md +704 -0
  6. README.md +161 -0
  7. app/__init__.py +0 -0
  8. app/agents/__init__.py +0 -0
  9. app/agents/base_agent.py +295 -0
  10. app/agents/performance_agent.py +44 -0
  11. app/agents/security_agent.py +107 -0
  12. app/agents/style_agent.py +43 -0
  13. app/agents/synthesizer.py +291 -0
  14. app/config.py +40 -0
  15. app/context/__init__.py +0 -0
  16. app/context/embedder.py +126 -0
  17. app/context/indexer.py +127 -0
  18. app/context/retriever.py +116 -0
  19. app/db/__init__.py +0 -0
  20. app/db/postgres.py +144 -0
  21. app/db/redis_cache.py +121 -0
  22. app/github/__init__.py +0 -0
  23. app/github/auth.py +135 -0
  24. app/github/client.py +362 -0
  25. app/github/comment_formatter.py +215 -0
  26. app/github/webhook.py +84 -0
  27. app/main.py +355 -0
  28. app/models/__init__.py +0 -0
  29. app/models/findings.py +55 -0
  30. app/models/webhook_payloads.py +55 -0
  31. app/services/__init__.py +0 -0
  32. app/services/health_score.py +85 -0
  33. app/tools/__init__.py +0 -0
  34. app/tools/bandit_tool.py +173 -0
  35. app/tools/detect_secrets_tool.py +118 -0
  36. app/tools/linter_tool.py +113 -0
  37. app/tools/radon_tool.py +107 -0
  38. dashboard/.gitignore +41 -0
  39. dashboard/AGENTS.md +5 -0
  40. dashboard/CLAUDE.md +1 -0
  41. dashboard/README.md +36 -0
  42. dashboard/app/favicon.ico +0 -0
  43. dashboard/app/globals.css +152 -0
  44. dashboard/app/layout.tsx +104 -0
  45. dashboard/app/page.tsx +291 -0
  46. dashboard/app/repos/[owner]/[repo]/page.tsx +170 -0
  47. dashboard/app/repos/[owner]/[repo]/prs/[number]/page.tsx +168 -0
  48. dashboard/components/AgentBreakdown.tsx +113 -0
  49. dashboard/components/AnimatedCounter.tsx +44 -0
  50. dashboard/components/FindingsTable.tsx +185 -0
.env.example ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # === LLM APIs ===
2
+ GROQ_API_KEY=gsk_your_groq_api_key_here
3
+ GEMINI_API_KEY=AIza_your_gemini_api_key_here
4
+
5
+ # === GitHub App ===
6
+ GITHUB_APP_ID=123456
7
+ GITHUB_APP_PRIVATE_KEY_PATH=./keys/app.pem
8
+ GITHUB_WEBHOOK_SECRET=your_webhook_secret_here
9
+
10
+ # === Database ===
11
+ DATABASE_URL=postgresql://user:pass@host.neon.tech/sentinel_ai?sslmode=require
12
+
13
+ # === Redis Cache ===
14
+ UPSTASH_REDIS_URL=rediss://default:your_token@your-endpoint.upstash.io:6379
15
+
16
+ # === Embedding Model ===
17
+ EMBEDDING_MODEL=all-MiniLM-L6-v2
18
+
19
+ # === App Config ===
20
+ ENVIRONMENT=development
21
+ LOG_LEVEL=INFO
22
+ CONFIDENCE_THRESHOLD=0.6
23
+ MAX_REPO_FILES_INDEX=500
24
+
25
+ # === Security ===
26
+ DASHBOARD_API_KEY=generate-a-random-key-here
27
+ CORS_ALLOWED_ORIGINS=http://localhost:3000
.github/workflows/ci.yml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint-and-test:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+
15
+ - name: Set up Python
16
+ uses: actions/setup-python@v5
17
+ with:
18
+ python-version: "3.11"
19
+
20
+ - name: Install dependencies
21
+ run: pip install -r requirements-dev.txt
22
+
23
+ - name: Lint with ruff
24
+ run: ruff check app/ tests/
25
+
26
+ - name: Type check with mypy
27
+ run: mypy app/ --ignore-missing-imports
28
+ continue-on-error: true
29
+
30
+ - name: Run tests
31
+ run: pytest tests/ -v --tb=short
.github/workflows/prewarm.yml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Pre-warm Render
2
+
3
+ on:
4
+ schedule:
5
+ # Ping every 10 minutes during working hours (UTC)
6
+ - cron: "*/10 6-20 * * 1-5"
7
+
8
+ jobs:
9
+ ping:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - name: Ping health endpoint
13
+ run: |
14
+ curl -sf "${{ secrets.RENDER_HEALTH_URL }}/health" || echo "Service cold — will wake on next request"
.gitignore ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Project planning docs (confidential)
2
+ *.pdf
3
+
4
+ # Python
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+ *.so
9
+ *.egg-info/
10
+ dist/
11
+ build/
12
+ .eggs/
13
+ *.egg
14
+
15
+ # Virtual environments
16
+ .venv/
17
+ venv/
18
+ env/
19
+
20
+ # Environment variables
21
+ .env
22
+ .env.local
23
+ .env.production
24
+
25
+ # Keys & secrets
26
+ keys/
27
+ *.pem
28
+ *.key
29
+
30
+ # IDE
31
+ .vscode/
32
+ .idea/
33
+ *.swp
34
+ *.swo
35
+ *~
36
+
37
+ # OS
38
+ .DS_Store
39
+ Thumbs.db
40
+
41
+ # ChromaDB persistence
42
+ chroma_data/
43
+ chromadb/
44
+
45
+ # Test & coverage
46
+ .pytest_cache/
47
+ htmlcov/
48
+ .coverage
49
+ coverage.xml
50
+
51
+ # Node (dashboard)
52
+ dashboard/node_modules/
53
+ dashboard/.next/
54
+ dashboard/out/
55
+
56
+ # Render
57
+ .render/
58
+
59
+ # Claude Code
60
+ .claude/
61
+
62
+ # Screenshots (local only)
63
+ *.png
64
+
65
+ # Misc
66
+ *.log
67
+ *.tmp
PROJECT_PLAN.md ADDED
@@ -0,0 +1,704 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # CodeProbe — Complete Project Plan & Progress Tracker
2
+
3
+ > **Multi-Agent Code Review System**
4
+ > Author: Ninjacode911 | Started: March 2026 | Target: 10 Weeks
5
+
6
+ ---
7
+
8
+ ## Table of Contents
9
+
10
+ 1. [Project Overview](#1-project-overview)
11
+ 2. [Architecture Deep Dive](#2-architecture-deep-dive)
12
+ 3. [Complete Tech Stack](#3-complete-tech-stack)
13
+ 4. [Directory Structure](#4-directory-structure)
14
+ 5. [Week-by-Week Implementation Plan](#5-week-by-week-implementation-plan)
15
+ 6. [Non-Coding Tasks](#6-non-coding-tasks)
16
+ 7. [GPU / WSL Tasks](#7-gpu--wsl-tasks)
17
+ 8. [Data Models & Schemas](#8-data-models--schemas)
18
+ 9. [API Endpoints](#9-api-endpoints)
19
+ 10. [Agent Prompt Design](#10-agent-prompt-design)
20
+ 11. [Evaluation Plan](#11-evaluation-plan)
21
+ 12. [Deployment Checklist](#12-deployment-checklist)
22
+ 13. [Progress Tracker](#13-progress-tracker)
23
+
24
+ ---
25
+
26
+ ## 1. Project Overview
27
+
28
+ **What:** A multi-agent PR review system that reviews GitHub pull requests using 4 specialized LangChain agents (Security, Performance, Style, Synthesizer), posts inline GitHub comments, and tracks code health via a Next.js dashboard.
29
+
30
+ **Why:** AI-generated code (41% of GitHub commits) introduces 1.7x more issues. Existing tools use single-pass LLM calls. Sentinel AI uses domain-specialized agents with debate/consensus, RAG context, and static analysis tools.
31
+
32
+ **Core Thesis:** Separate security, performance, and style review into specialized agents — each with distinct prompts, tools, and context — then merge via a Synthesizer into a coherent, ranked, deduplicated review.
33
+
34
+ **Key Differentiators:**
35
+ - Multi-agent specialization (3 domain + 1 synthesizer)
36
+ - Debate & consensus protocol (agents challenge each other before synthesis)
37
+ - Repo-aware RAG context (ChromaDB indexes full repo, not just diff)
38
+ - $0/month architecture (all free tiers)
39
+ - Structured severity scoring (Critical/High/Medium/Low with CWE IDs)
40
+ - Auto-fix suggestions (corrected code snippets inline)
41
+
42
+ ---
43
+
44
+ ## 2. Architecture Deep Dive
45
+
46
+ ### 2.1 Four Layers
47
+
48
+ ```
49
+ ┌─────────────────────────────────────────────────────┐
50
+ │ GITHUB LAYER │
51
+ │ Webhooks · PR Events · Inline Comments │
52
+ └──────────────────────┬──────────────────────────────┘
53
+ │ pull_request webhook
54
+ ┌──────────────────────▼──────────────────────────────┐
55
+ │ ORCHESTRATION LAYER (FastAPI on Render) │
56
+ │ Webhook receiver · HMAC validation · Redis cache │
57
+ │ Agent dispatcher · GitHub API client │
58
+ └──────────────────────┬──────────────────────────────┘
59
+ │ asyncio.gather()
60
+ ┌──────────────────────▼──────────────────────────────┐
61
+ │ AGENT LAYER (LangChain ReAct Agents) │
62
+ │ ┌──────────┐ ┌──────────────┐ ┌─────────┐ │
63
+ │ │ Security │ │ Performance │ │ Style │ PARALLEL │
64
+ │ │ Agent │ │ Agent │ │ Agent │ │
65
+ │ └────┬─────┘ └──────┬───────┘ └────┬────┘ │
66
+ │ └──────────────┼───────────────┘ │
67
+ │ ▼ │
68
+ │ ┌──────────────────┐ │
69
+ │ │ Synthesizer │ SEQUENTIAL │
70
+ │ │ Agent │ │
71
+ │ └──────────────────┘ │
72
+ └──────────────────────┬──────────────────────────────┘
73
+
74
+ ┌──────────────────────▼──────────────────────────────┐
75
+ │ KNOWLEDGE LAYER │
76
+ │ ChromaDB (vector store) · Upstash Redis (cache) │
77
+ │ Neon Postgres (history) · sentence-transformers │
78
+ └─────────────────────────────────────────────────────┘
79
+ ```
80
+
81
+ ### 2.2 Data Flow (11 Steps)
82
+
83
+ 1. GitHub fires `pull_request` webhook → Render FastAPI endpoint
84
+ 2. FastAPI validates HMAC-SHA256 signature (GitHub App secret)
85
+ 3. Check Upstash Redis: commit SHA already reviewed? → return cached
86
+ 4. Fetch via GitHub API: PR diff, changed files, full contents, commit history
87
+ 5. Build repo context: embed chunks with sentence-transformers → upsert ChromaDB
88
+ 6. Dispatch 3 parallel agents: `asyncio.gather(security, performance, style)`
89
+ 7. Each agent: system prompt + RAG context → Groq API → static tools → typed findings
90
+ 8. Synthesizer: deduplicate + resolve conflicts + Health Score + executive summary
91
+ 9. GitHub API: post inline comment per finding + PR summary comment
92
+ 10. Write review to Neon Postgres + set Redis cache (TTL: 7 days)
93
+ 11. Next.js dashboard fetches from Neon and updates Health Score chart
94
+
95
+ ### 2.3 Context Loading (5 Layers per Agent)
96
+
97
+ 1. Raw PR diff (changed lines, file paths, additions/deletions)
98
+ 2. Relevant file sections from full repo (ChromaDB semantic search on diff)
99
+ 3. Recent commit history for changed files (pattern detection)
100
+ 4. Repo configuration (language, framework, linter rules, test coverage)
101
+ 5. Domain-specific knowledge base (OWASP Top 10, DDIA patterns, style guides)
102
+
103
+ ---
104
+
105
+ ## 3. Complete Tech Stack
106
+
107
+ ### 3.1 LLM & AI
108
+
109
+ | Tool | Free Tier | Purpose |
110
+ |------|-----------|---------|
111
+ | **Groq API** (Llama-3.1-70B) | 14,400 req/day, 500 tok/sec | Primary LLM for all agents |
112
+ | **Gemini 1.5 Flash** | 1M tokens/day | Fallback when Groq exhausted |
113
+ | **LangChain** | OSS | Agent orchestration, LCEL, ReAct framework |
114
+ | **sentence-transformers** | Local (GPU) | Embeddings for ChromaDB — runs on RTX 5070 via WSL |
115
+
116
+ ### 3.2 Backend & APIs
117
+
118
+ | Tool | Free Tier | Purpose |
119
+ |------|-----------|---------|
120
+ | **FastAPI** | OSS | Webhook receiver, agent dispatcher, REST API |
121
+ | **Render.com** | Free web service | Hosts backend (30s cold start after 15min idle) |
122
+ | **GitHub Apps API** | Free | Webhooks, PR comments, file fetching |
123
+ | **Upstash Redis** | 10K req/day | Cache PR analysis by commit SHA |
124
+ | **Neon.tech** | Free Postgres 512MB | Review history, Health Score trends |
125
+
126
+ ### 3.3 Knowledge & Static Analysis
127
+
128
+ | Tool | Free Tier | Purpose |
129
+ |------|-----------|---------|
130
+ | **ChromaDB** | OSS, in-memory/persisted | Vector store for RAG context retrieval |
131
+ | **Semgrep OSS** | Free, 3K+ rules | SAST rules for Security Agent |
132
+ | **Bandit** | Free | Python AST security analysis |
133
+ | **detect-secrets** | Free | Credential/API key scanning |
134
+ | **radon** | Free | Cyclomatic complexity & maintainability index |
135
+ | **pylint/ESLint/Ruff** | Free | Linting for Style Agent |
136
+
137
+ ### 3.4 Frontend & Deployment
138
+
139
+ | Tool | Free Tier | Purpose |
140
+ |------|-----------|---------|
141
+ | **Vercel** | Free hobby tier | Hosts Next.js dashboard |
142
+ | **Next.js** | OSS | Dashboard UI |
143
+ | **Recharts** | OSS | Health Score trend charts, pie charts |
144
+ | **GitHub Actions** | 2K min/month | CI/CD for Sentinel AI itself |
145
+
146
+ ---
147
+
148
+ ## 4. Directory Structure
149
+
150
+ ```
151
+ sentinel-ai/
152
+ ├── app/
153
+ │ ├── __init__.py
154
+ │ ├── main.py # FastAPI app, webhook endpoint, lifespan
155
+ │ ├── config.py # Settings via pydantic-settings (env vars)
156
+ │ ├── agents/
157
+ │ │ ├── __init__.py
158
+ │ │ ├── base_agent.py # Shared agent interface / base class
159
+ │ │ ├── security_agent.py # Security ReAct agent
160
+ │ │ ├── performance_agent.py # Performance ReAct agent
161
+ │ │ ├── style_agent.py # Style & Maintainability agent
162
+ │ │ └── synthesizer.py # Synthesizer + Health Score + dedup
163
+ │ ├── tools/
164
+ │ │ ├── __init__.py
165
+ │ │ ├── semgrep_tool.py # LangChain tool wrapper for Semgrep
166
+ │ │ ├── bandit_tool.py # LangChain tool wrapper for Bandit
167
+ │ │ ├── detect_secrets_tool.py # Credential scanner tool
168
+ │ │ ├── radon_tool.py # Complexity metrics tool
169
+ │ │ ├── ast_analyzer.py # Python AST analysis (N+1, patterns)
170
+ │ │ └── linter_tool.py # Ruff/ESLint/pylint subprocess tool
171
+ │ ├── context/
172
+ │ │ ├── __init__.py
173
+ │ │ ├── embedder.py # sentence-transformers embedding pipeline
174
+ │ │ ├── indexer.py # ChromaDB repo indexer (upsert chunks)
175
+ │ │ └── retriever.py # RAG retriever (query ChromaDB for context)
176
+ │ ├── github/
177
+ │ │ ├── __init__.py
178
+ │ │ ├── webhook.py # Webhook validation (HMAC-SHA256)
179
+ │ │ ├── client.py # GitHub API client (fetch diff, post comments)
180
+ │ │ └── comment_formatter.py # Format findings as GitHub Markdown comments
181
+ │ ├── models/
182
+ │ │ ├── __init__.py
183
+ │ │ ├── findings.py # Finding, PRReview Pydantic schemas
184
+ │ │ └── webhook_payloads.py # GitHub webhook event schemas
185
+ │ ├── db/
186
+ │ │ ├── __init__.py
187
+ │ │ ├── postgres.py # Neon Postgres connection + queries
188
+ │ │ └── redis_cache.py # Upstash Redis cache logic
189
+ │ └── services/
190
+ │ ├── __init__.py
191
+ │ ├── orchestrator.py # Main orchestration: dispatch agents, synthesize
192
+ │ └── health_score.py # Health Score calculation formula
193
+ ├── dashboard/ # Next.js app (deployed to Vercel)
194
+ │ ├── package.json
195
+ │ ├── next.config.js
196
+ │ ├── tsconfig.json
197
+ │ ├── app/
198
+ │ │ ├── layout.tsx
199
+ │ │ ├── page.tsx # / — Repository Overview
200
+ │ │ ├── repos/
201
+ │ │ │ └── [owner]/
202
+ │ │ │ └── [repo]/
203
+ │ │ │ ├── page.tsx # Repo Detail (trends, charts)
204
+ │ │ │ └── prs/
205
+ │ │ │ └── [number]/
206
+ │ │ │ └── page.tsx # PR Review Detail
207
+ │ │ └── api/
208
+ │ │ ├── repos/
209
+ │ │ │ └── route.ts # API proxy to FastAPI backend
210
+ │ │ └── health/
211
+ │ │ └── route.ts
212
+ │ ├── components/
213
+ │ │ ├── HealthScoreRing.tsx # Circular gauge 0-100
214
+ │ │ ├── FindingsTable.tsx # Sortable, filterable findings
215
+ │ │ ├── TrendChart.tsx # Recharts LineChart
216
+ │ │ ├── AgentBreakdown.tsx # 3-column agent summary cards
217
+ │ │ ├── SeverityBadge.tsx # Color-coded severity pill
218
+ │ │ └── Navbar.tsx
219
+ │ └── lib/
220
+ │ ├── api.ts # Fetch wrapper for backend API
221
+ │ └── types.ts # TypeScript types matching backend schemas
222
+ ├── tests/
223
+ │ ├── __init__.py
224
+ │ ├── conftest.py # Shared fixtures
225
+ │ ├── unit/
226
+ │ │ ├── test_findings_schema.py
227
+ │ │ ├── test_synthesizer_dedup.py
228
+ │ │ ├── test_webhook_validation.py
229
+ │ │ ├── test_redis_cache.py
230
+ │ │ └── test_health_score.py
231
+ │ ├── integration/
232
+ │ │ ├── test_full_pipeline.py
233
+ │ │ └── test_github_posting.py
234
+ │ └── eval/
235
+ │ ├── dataset/ # 20-PR benchmark dataset (JSON fixtures)
236
+ │ ├── run_eval.py # Evaluation harness
237
+ │ └── metrics.py # Precision, recall, latency tracking
238
+ ├── prompts/
239
+ │ ├── security_system.md # Security Agent system prompt
240
+ │ ├── performance_system.md # Performance Agent system prompt
241
+ │ ├── style_system.md # Style Agent system prompt
242
+ │ └── synthesizer_system.md # Synthesizer system prompt
243
+ ├── knowledge/
244
+ │ ├── owasp_top10_2025.md # OWASP cheat sheet for Security RAG
245
+ │ ├── ddia_patterns.md # DDIA patterns for Performance RAG
246
+ │ └── style_guides/ # Language style guides for Style RAG
247
+ ├── .env.example # Template for env vars (no secrets)
248
+ ├── .gitignore
249
+ ├── requirements.txt # Python dependencies
250
+ ├── requirements-dev.txt # Dev/test dependencies
251
+ ├── render.yaml # Render deployment config
252
+ ├── sentinel.yml.example # Per-repo config template
253
+ ├── Dockerfile # For Render deployment
254
+ ├── pyproject.toml # Project metadata + tool configs
255
+ └── README.md # Installation, usage, architecture docs
256
+ ```
257
+
258
+ ---
259
+
260
+ ## 5. Week-by-Week Implementation Plan
261
+
262
+ ### WEEK 1: Foundation & Setup
263
+ **Goal:** Project skeleton running locally, all external services provisioned.
264
+
265
+ | # | Task | Type | Status |
266
+ |---|------|------|--------|
267
+ | 1.1 | Initialize git repo, create directory structure | Code | [ ] |
268
+ | 1.2 | Set up Python virtual environment + requirements.txt | Code | [ ] |
269
+ | 1.3 | Register GitHub App (dev.github.com/settings/apps) | Config | [ ] |
270
+ | 1.4 | Provision Neon.tech Postgres database + create `pr_reviews` table | Config | [ ] |
271
+ | 1.5 | Provision Upstash Redis instance | Config | [ ] |
272
+ | 1.6 | Get Groq API key (console.groq.com) | Config | [ ] |
273
+ | 1.7 | Get Gemini API key (aistudio.google.com) | Config | [ ] |
274
+ | 1.8 | Create FastAPI skeleton (`app/main.py`) with health endpoint | Code | [ ] |
275
+ | 1.9 | Create `app/config.py` with pydantic-settings (all env vars) | Code | [ ] |
276
+ | 1.10 | Create Pydantic models (`Finding`, `PRReview` schemas) | Code | [ ] |
277
+ | 1.11 | Set up .env.example, .gitignore, pyproject.toml | Code | [ ] |
278
+ | 1.12 | Deploy FastAPI skeleton to Render (verify /health works) | Deploy | [ ] |
279
+ | 1.13 | Write unit tests for Finding schema validation | Test | [ ] |
280
+ | 1.14 | Set up GitHub Actions CI (lint + test on push) | CI/CD | [ ] |
281
+
282
+ ### WEEK 2: GitHub Integration
283
+ **Goal:** Receive webhooks, validate signatures, fetch PR data, post dummy comment.
284
+
285
+ | # | Task | Type | Status |
286
+ |---|------|------|--------|
287
+ | 2.1 | Implement HMAC-SHA256 webhook validation (`app/github/webhook.py`) | Code | [ ] |
288
+ | 2.2 | Implement GitHub API client — fetch PR diff (`app/github/client.py`) | Code | [ ] |
289
+ | 2.3 | Implement GitHub API client — fetch file contents | Code | [ ] |
290
+ | 2.4 | Implement GitHub API client — fetch commit history | Code | [ ] |
291
+ | 2.5 | Implement GitHub API client — post inline review comments | Code | [ ] |
292
+ | 2.6 | Implement GitHub API client — post PR summary comment | Code | [ ] |
293
+ | 2.7 | Create webhook endpoint (`POST /webhook/github`) in main.py | Code | [ ] |
294
+ | 2.8 | Implement comment formatter (`app/github/comment_formatter.py`) | Code | [ ] |
295
+ | 2.9 | Set up ngrok for local webhook testing | Config | [ ] |
296
+ | 2.10 | End-to-end test: open PR on test repo → dummy comment posted | Test | [ ] |
297
+ | 2.11 | Implement Redis cache check (skip if commit SHA already reviewed) | Code | [ ] |
298
+ | 2.12 | Write unit tests for HMAC validation (valid + invalid signatures) | Test | [ ] |
299
+ | 2.13 | Write unit tests for Redis cache hit/miss logic | Test | [ ] |
300
+
301
+ ### WEEK 3: Security Agent v1
302
+ **Goal:** Security Agent analyzes diffs, returns structured findings with CWE IDs.
303
+
304
+ | # | Task | Type | Status |
305
+ |---|------|------|--------|
306
+ | 3.1 | Install & configure Semgrep OSS with security rulesets | Config | [ ] |
307
+ | 3.2 | Create Semgrep LangChain tool (`app/tools/semgrep_tool.py`) | Code | [ ] |
308
+ | 3.3 | Install & configure Bandit for Python AST security analysis | Config | [ ] |
309
+ | 3.4 | Create Bandit LangChain tool (`app/tools/bandit_tool.py`) | Code | [ ] |
310
+ | 3.5 | Install & configure detect-secrets | Config | [ ] |
311
+ | 3.6 | Create detect-secrets LangChain tool (`app/tools/detect_secrets_tool.py`) | Code | [ ] |
312
+ | 3.7 | Write Security Agent system prompt (`prompts/security_system.md`) | Prompt | [ ] |
313
+ | 3.8 | Prepare OWASP Top 10 (2025) knowledge base (`knowledge/owasp_top10_2025.md`) | Data | [ ] |
314
+ | 3.9 | Implement Security Agent ReAct loop (`app/agents/security_agent.py`) | Code | [ ] |
315
+ | 3.10 | Implement base agent interface (`app/agents/base_agent.py`) | Code | [ ] |
316
+ | 3.11 | Set up Groq LLM client via LangChain (`ChatGroq`) | Code | [ ] |
317
+ | 3.12 | Implement structured output parsing (JSON → Finding objects) | Code | [ ] |
318
+ | 3.13 | Create 10 synthetic security-vulnerable PRs for testing | Data | [ ] |
319
+ | 3.14 | Evaluate Security Agent on synthetic dataset — measure precision/recall | Eval | [ ] |
320
+ | 3.15 | Iterate on system prompt based on eval results | Prompt | [ ] |
321
+
322
+ ### WEEK 4: Performance Agent v1
323
+ **Goal:** Performance Agent detects N+1 queries, complexity issues, returns findings.
324
+
325
+ | # | Task | Type | Status |
326
+ |---|------|------|--------|
327
+ | 4.1 | Create Python AST analyzer tool (`app/tools/ast_analyzer.py`) | Code | [ ] |
328
+ | 4.2 | Implement N+1 query pattern detector (Django/SQLAlchemy ORM patterns) | Code | [ ] |
329
+ | 4.3 | Create radon complexity tool (`app/tools/radon_tool.py`) | Code | [ ] |
330
+ | 4.4 | Write Performance Agent system prompt (`prompts/performance_system.md`) | Prompt | [ ] |
331
+ | 4.5 | Prepare DDIA patterns knowledge base (`knowledge/ddia_patterns.md`) | Data | [ ] |
332
+ | 4.6 | Implement Performance Agent ReAct loop (`app/agents/performance_agent.py`) | Code | [ ] |
333
+ | 4.7 | Fetch 10 Django PRs with known performance issues for testing | Data | [ ] |
334
+ | 4.8 | Evaluate Performance Agent on Django PR dataset | Eval | [ ] |
335
+ | 4.9 | Iterate on system prompt based on eval results | Prompt | [ ] |
336
+
337
+ ### WEEK 5: Style Agent v1
338
+ **Goal:** Style Agent checks naming, complexity, dead code, test coverage gaps.
339
+
340
+ | # | Task | Type | Status |
341
+ |---|------|------|--------|
342
+ | 5.1 | Create linter tool wrapper — Ruff/ESLint/pylint (`app/tools/linter_tool.py`) | Code | [ ] |
343
+ | 5.2 | Implement dead code detector (unused imports, unreachable branches) | Code | [ ] |
344
+ | 5.3 | Write Style Agent system prompt (`prompts/style_system.md`) | Prompt | [ ] |
345
+ | 5.4 | Prepare language style guides knowledge base (`knowledge/style_guides/`) | Data | [ ] |
346
+ | 5.5 | Implement Style Agent ReAct loop (`app/agents/style_agent.py`) | Code | [ ] |
347
+ | 5.6 | Fetch 10 Exercism PRs with style/refactoring issues | Data | [ ] |
348
+ | 5.7 | Evaluate Style Agent on Exercism dataset | Eval | [ ] |
349
+ | 5.8 | Iterate on system prompt based on eval results | Prompt | [ ] |
350
+
351
+ ### WEEK 6: ChromaDB + RAG Context
352
+ **Goal:** Full RAG pipeline — embed repo, retrieve context, inject into agents.
353
+
354
+ | # | Task | Type | Status |
355
+ |---|------|------|--------|
356
+ | 6.1 | Set up sentence-transformers embedding pipeline (`app/context/embedder.py`) | Code | [ ] |
357
+ | 6.2 | **Run embedding model on RTX 5070 via WSL** — benchmark speed | GPU | [ ] |
358
+ | 6.3 | Implement ChromaDB repo indexer (`app/context/indexer.py`) — chunk files, upsert | Code | [ ] |
359
+ | 6.4 | Implement RAG retriever (`app/context/retriever.py`) — query by diff content | Code | [ ] |
360
+ | 6.5 | Integrate RAG context into Security Agent | Code | [ ] |
361
+ | 6.6 | Integrate RAG context into Performance Agent | Code | [ ] |
362
+ | 6.7 | Integrate RAG context into Style Agent | Code | [ ] |
363
+ | 6.8 | Evaluate: does cross-file RAG context improve recall vs. diff-only? | Eval | [ ] |
364
+ | 6.9 | Optimize chunk size and retrieval top-k for quality vs. latency | Code | [ ] |
365
+ | 6.10 | Limit repo index to 500 most recently changed files (Render memory constraint) | Code | [ ] |
366
+
367
+ ### WEEK 7: Synthesizer Agent
368
+ **Goal:** Deduplication, conflict resolution, Health Score, executive summary, full pipeline.
369
+
370
+ | # | Task | Type | Status |
371
+ |---|------|------|--------|
372
+ | 7.1 | Write Synthesizer system prompt (`prompts/synthesizer_system.md`) | Prompt | [ ] |
373
+ | 7.2 | Implement deduplication logic (cosine similarity on findings via ChromaDB) | Code | [ ] |
374
+ | 7.3 | Implement severity conflict resolution (Security > Performance > Style precedence) | Code | [ ] |
375
+ | 7.4 | Implement composite re-ranking: severity × exploitability × fix_complexity | Code | [ ] |
376
+ | 7.5 | Implement PR Health Score formula (0-100) (`app/services/health_score.py`) | Code | [ ] |
377
+ | 7.6 | Implement executive summary generation (3-5 sentences) | Code | [ ] |
378
+ | 7.7 | Implement auto-block logic (Critical findings → block merge recommendation) | Code | [ ] |
379
+ | 7.8 | Implement Synthesizer Agent (`app/agents/synthesizer.py`) | Code | [ ] |
380
+ | 7.9 | Build main orchestrator (`app/services/orchestrator.py`) — ties everything together | Code | [ ] |
381
+ | 7.10 | Implement Gemini Flash fallback when Groq quota exhausted | Code | [ ] |
382
+ | 7.11 | Full end-to-end pipeline test: PR → agents → synthesizer → GitHub comments | Test | [ ] |
383
+ | 7.12 | Write unit tests for Health Score formula | Test | [ ] |
384
+ | 7.13 | Write unit tests for deduplication with synthetic conflicting findings | Test | [ ] |
385
+ | 7.14 | Implement Neon Postgres write (store review record) | Code | [ ] |
386
+
387
+ ### WEEK 8: Next.js Dashboard
388
+ **Goal:** Dashboard on Vercel showing review history, Health Scores, charts.
389
+
390
+ | # | Task | Type | Status |
391
+ |---|------|------|--------|
392
+ | 8.1 | Initialize Next.js app in `dashboard/` with TypeScript | Code | [ ] |
393
+ | 8.2 | Deploy to Vercel (connect GitHub repo) | Deploy | [ ] |
394
+ | 8.3 | Create TypeScript types matching backend schemas (`lib/types.ts`) | Code | [ ] |
395
+ | 8.4 | Create API fetch wrapper (`lib/api.ts`) — calls FastAPI backend | Code | [ ] |
396
+ | 8.5 | Build `HealthScoreRing` component (circular gauge, animated) | Code | [ ] |
397
+ | 8.6 | Build `SeverityBadge` component (color-coded pills) | Code | [ ] |
398
+ | 8.7 | Build `TrendChart` component (Recharts LineChart, 30-day trend) | Code | [ ] |
399
+ | 8.8 | Build `FindingsTable` component (sortable, filterable) | Code | [ ] |
400
+ | 8.9 | Build `AgentBreakdown` component (3-column cards) | Code | [ ] |
401
+ | 8.10 | Build `/` page — Repository Overview (connected repos, avg scores) | Code | [ ] |
402
+ | 8.11 | Build `/repos/[owner]/[repo]` page — Repo Detail (charts, PR list) | Code | [ ] |
403
+ | 8.12 | Build `/repos/[owner]/[repo]/prs/[number]` page — PR Review Detail | Code | [ ] |
404
+ | 8.13 | Add FastAPI CORS middleware for Vercel domain | Code | [ ] |
405
+ | 8.14 | Implement REST API endpoints on FastAPI side for dashboard | Code | [ ] |
406
+
407
+ ### WEEK 9: Polish & Evaluation
408
+ **Goal:** Full benchmark, prompt tuning, latency optimization, documentation.
409
+
410
+ | # | Task | Type | Status |
411
+ |---|------|------|--------|
412
+ | 9.1 | Curate full 20-PR benchmark dataset (Django, Next.js, synthetic, Exercism) | Data | [ ] |
413
+ | 9.2 | Build evaluation harness (`tests/eval/run_eval.py`) | Code | [ ] |
414
+ | 9.3 | Run full benchmark — measure precision, recall, latency per agent | Eval | [ ] |
415
+ | 9.4 | Tune agent prompts to reduce false positives (target: <30% FP rate) | Prompt | [ ] |
416
+ | 9.5 | Implement confidence threshold: findings <0.6 shown as 'Suggestions' | Code | [ ] |
417
+ | 9.6 | Latency optimization: measure p50/p95/p99 per PR size bucket | Eval | [ ] |
418
+ | 9.7 | Optimize Groq API calls (reduce token usage, cache prompts) | Code | [ ] |
419
+ | 9.8 | Write comprehensive README.md | Docs | [ ] |
420
+ | 9.9 | Write installation guide in README | Docs | [ ] |
421
+ | 9.10 | Add GitHub Actions pre-warm cron (ping /health every 10min) | CI/CD | [ ] |
422
+
423
+ ### WEEK 10: Launch & Promotion
424
+ **Goal:** Live on GitHub Marketplace, installed on public repos, launch posts published.
425
+
426
+ | # | Task | Type | Status |
427
+ |---|------|------|--------|
428
+ | 10.1 | Install Sentinel AI on 3 public open-source repos | Launch | [ ] |
429
+ | 10.2 | Record demo video (screen recording: PR opened → comments posted) | Content | [ ] |
430
+ | 10.3 | Write Dev.to / HackerNews launch post | Content | [ ] |
431
+ | 10.4 | Write LinkedIn demo post | Content | [ ] |
432
+ | 10.5 | Submit to GitHub Marketplace (needs privacy policy, logo, description) | Launch | [ ] |
433
+ | 10.6 | Create sentinel.yml.example per-repo config template | Code | [ ] |
434
+ | 10.7 | Monitor first 48 hours — fix any production bugs | Ops | [ ] |
435
+
436
+ ---
437
+
438
+ ## 6. Non-Coding Tasks
439
+
440
+ These tasks don't involve writing project code but are essential for the project:
441
+
442
+ ### 6.1 External Service Provisioning
443
+
444
+ | Service | Action | URL | Notes |
445
+ |---------|--------|-----|-------|
446
+ | **GitHub App** | Register new app | github.com/settings/apps/new | Need: App ID, Private Key (.pem), Webhook Secret |
447
+ | **Groq** | Get API key | console.groq.com | Free: 14,400 req/day |
448
+ | **Google AI Studio** | Get Gemini key | aistudio.google.com | Free: 1M tokens/day |
449
+ | **Neon.tech** | Create Postgres DB | console.neon.tech | Free: 512MB, create `pr_reviews` table |
450
+ | **Upstash** | Create Redis instance | console.upstash.com | Free: 10K req/day |
451
+ | **Render** | Create web service | dashboard.render.com | Free tier, connect GitHub repo |
452
+ | **Vercel** | Create project | vercel.com/new | Free hobby tier, connect dashboard/ |
453
+ | **ngrok** | Install for local testing | ngrok.com | Free: 1 tunnel |
454
+
455
+ ### 6.2 GitHub App Configuration
456
+
457
+ **Permissions required:**
458
+ - Pull requests: Read & Write
459
+ - Contents: Read
460
+ - Metadata: Read
461
+ - Commit statuses: Write (optional)
462
+
463
+ **Webhook events to subscribe:**
464
+ - `pull_request` (opened, synchronize, reopened, ready_for_review)
465
+ - `pull_request_review_comment` (for @sentinel-ai re-review)
466
+
467
+ ### 6.3 Data Curation Tasks
468
+
469
+ | Dataset | Source | Count | Purpose |
470
+ |---------|--------|-------|---------|
471
+ | Synthetic security PRs | Hand-crafted | 10 PRs | SQL injection, XSS, IDOR, hardcoded secrets |
472
+ | Django security PRs | github.com/django/django | 5 PRs | Real-world Python security fixes |
473
+ | Next.js performance PRs | github.com/vercel/next.js | 5 PRs | JS/TS performance changes |
474
+ | Exercism style PRs | github.com/exercism | 5 PRs | Naming, complexity, documentation issues |
475
+ | Mixed benchmark set | All above | 20 PRs | Full evaluation benchmark |
476
+
477
+ ### 6.4 Knowledge Base Curation
478
+
479
+ | Document | Source | For Agent |
480
+ |----------|--------|-----------|
481
+ | OWASP Top 10 (2025) | owasp.org | Security Agent RAG |
482
+ | DDIA performance patterns | "Designing Data-Intensive Applications" | Performance Agent RAG |
483
+ | Python style guide (PEP 8) | python.org | Style Agent RAG |
484
+ | JavaScript style guide | Various (Airbnb, Google) | Style Agent RAG |
485
+ | TypeScript best practices | typescript-eslint.io | Style Agent RAG |
486
+
487
+ ---
488
+
489
+ ## 7. GPU / WSL Tasks
490
+
491
+ Your **RTX 5070** with WSL will be used for:
492
+
493
+ ### 7.1 sentence-transformers Embedding (Required)
494
+
495
+ **No training needed** — these are pre-trained models used for embedding generation.
496
+
497
+ ```
498
+ Model: all-MiniLM-L6-v2 (or all-mpnet-base-v2 for higher quality)
499
+ Task: Embed code chunks for ChromaDB indexing
500
+ Where: Runs locally during repo indexing (can also run on Render CPU, slower)
501
+ GPU benefit: ~10-50x faster embedding generation vs CPU
502
+ ```
503
+
504
+ **Setup steps:**
505
+ 1. Ensure CUDA toolkit installed in WSL (`nvidia-smi` should show RTX 5070)
506
+ 2. `pip install sentence-transformers torch` (with CUDA support)
507
+ 3. Benchmark: embed 1000 code chunks, measure time GPU vs CPU
508
+ 4. Decision: if embedding is fast enough on CPU, skip GPU for deployment simplicity
509
+
510
+ ### 7.2 Local LLM Testing (Optional, Recommended)
511
+
512
+ Running a local LLM for testing avoids burning Groq API quota during development:
513
+
514
+ ```
515
+ Model: Llama-3.1-8B-Instruct (via Ollama or vLLM)
516
+ Task: Test agent prompts locally before hitting Groq
517
+ GPU benefit: Full inference locally, no API calls, no quota burn
518
+ ```
519
+
520
+ **Setup steps:**
521
+ 1. Install Ollama in WSL: `curl -fsSL https://ollama.com/install.sh | sh`
522
+ 2. Pull model: `ollama pull llama3.1:8b`
523
+ 3. Use for prompt iteration — switch to Groq (70B) for production quality
524
+
525
+ ### 7.3 What You Do NOT Need to Train
526
+
527
+ | Item | Reason |
528
+ |------|--------|
529
+ | LLM (Llama-3.1-70B) | Used via Groq API — inference only, no fine-tuning |
530
+ | sentence-transformers | Pre-trained model, no fine-tuning needed for code embeddings |
531
+ | Semgrep/Bandit/radon | Rule-based tools, no ML training |
532
+ | Agent prompts | Iterative prompt engineering, not model training |
533
+
534
+ **Bottom line:** This project is an **inference and orchestration** project, not a training project. Your GPU is used for fast local embeddings and optional local LLM testing — no model training required.
535
+
536
+ ---
537
+
538
+ ## 8. Data Models & Schemas
539
+
540
+ ### 8.1 Finding (per agent output)
541
+
542
+ ```python
543
+ class Finding(BaseModel):
544
+ agent: Literal['security', 'performance', 'style']
545
+ file_path: str # e.g. 'src/auth/login.py'
546
+ line_start: int
547
+ line_end: int
548
+ severity: Literal['critical', 'high', 'medium', 'low']
549
+ category: str # e.g. 'sql_injection', 'n+1_query', 'naming'
550
+ title: str # Short one-liner
551
+ description: str # Full explanation
552
+ suggested_fix: str # Corrected code snippet
553
+ cwe_id: Optional[str] # For security findings (e.g. 'CWE-89')
554
+ confidence: float # 0.0 – 1.0
555
+ ```
556
+
557
+ ### 8.2 SynthesizedReview (Synthesizer output)
558
+
559
+ ```python
560
+ class SynthesizedReview(BaseModel):
561
+ health_score: int # 0-100
562
+ executive_summary: str # 3-5 sentences
563
+ recommendation: Literal['approve', 'request_changes', 'block']
564
+ findings: List[Finding] # Deduplicated, re-ranked
565
+ critical_count: int
566
+ high_count: int
567
+ medium_count: int
568
+ low_count: int
569
+ duration_ms: int
570
+ ```
571
+
572
+ ### 8.3 PR Review Record (Neon Postgres)
573
+
574
+ ```sql
575
+ CREATE TABLE pr_reviews (
576
+ id UUID PRIMARY KEY DEFAULT gen_random_uuid(),
577
+ repo_full_name TEXT NOT NULL,
578
+ pr_number INT NOT NULL,
579
+ commit_sha TEXT NOT NULL,
580
+ health_score INT NOT NULL,
581
+ critical_count INT DEFAULT 0,
582
+ high_count INT DEFAULT 0,
583
+ medium_count INT DEFAULT 0,
584
+ low_count INT DEFAULT 0,
585
+ summary TEXT,
586
+ findings JSONB NOT NULL,
587
+ duration_ms INT,
588
+ created_at TIMESTAMPTZ DEFAULT NOW()
589
+ );
590
+
591
+ CREATE INDEX idx_pr_reviews_repo ON pr_reviews(repo_full_name);
592
+ CREATE INDEX idx_pr_reviews_sha ON pr_reviews(commit_sha);
593
+ ```
594
+
595
+ ---
596
+
597
+ ## 9. API Endpoints
598
+
599
+ | Endpoint | Method | Description |
600
+ |----------|--------|-------------|
601
+ | `POST /webhook/github` | POST | Receive GitHub webhook, validate HMAC, enqueue analysis |
602
+ | `GET /api/repos/{owner}/{repo}/reviews` | GET | Paginated PR review list + Health Score trend |
603
+ | `GET /api/repos/{owner}/{repo}/reviews/{pr_number}` | GET | Full findings for specific PR |
604
+ | `GET /api/repos/{owner}/{repo}/stats` | GET | Aggregate stats: avg score, top categories, 30-day trend |
605
+ | `POST /api/repos/{owner}/{repo}/reanalyze/{pr_number}` | POST | Re-trigger analysis (bypass cache) |
606
+ | `GET /health` | GET | Health check: agent status, Groq quota remaining |
607
+
608
+ ---
609
+
610
+ ## 10. Agent Prompt Design
611
+
612
+ Each agent prompt must include:
613
+
614
+ 1. **Role definition** — who the agent is (e.g., "senior AppSec engineer")
615
+ 2. **Scope boundaries** — what to look for and what to ignore
616
+ 3. **Output schema** — exact JSON structure expected
617
+ 4. **Severity guidelines** — when to use Critical vs. High vs. Medium vs. Low
618
+ 5. **Confidence scoring** — how to self-assess confidence (0.0-1.0)
619
+ 6. **Examples** — 2-3 few-shot examples of good findings
620
+ 7. **Anti-patterns** — common false positives to avoid
621
+
622
+ Prompts are stored in `prompts/` as Markdown files and loaded at agent initialization.
623
+
624
+ ---
625
+
626
+ ## 11. Evaluation Plan
627
+
628
+ ### 11.1 Metrics
629
+
630
+ | Metric | Target | Formula |
631
+ |--------|--------|---------|
632
+ | Security precision | >70% | true_positives / (true_positives + false_positives) |
633
+ | Performance recall | >60% | true_positives / (true_positives + false_negatives) |
634
+ | Deduplication rate | >15% | duplicates_removed / total_findings |
635
+ | e2e latency (p95) | <20s | Time from webhook to first comment posted |
636
+ | Groq quota usage | <10K/day | Total API calls per day |
637
+ | System uptime | >95% | (total_time - downtime) / total_time |
638
+
639
+ ### 11.2 Evaluation Harness
640
+
641
+ Located in `tests/eval/`:
642
+ - `dataset/` — 20 PRs as JSON fixtures (diff, expected findings, ground truth labels)
643
+ - `run_eval.py` — Runs each PR through full pipeline, compares output vs ground truth
644
+ - `metrics.py` — Computes precision, recall, F1, latency percentiles
645
+ - Results logged to console + optionally to LangSmith (free self-hosted)
646
+
647
+ ---
648
+
649
+ ## 12. Deployment Checklist
650
+
651
+ ### Render (FastAPI Backend)
652
+ - [ ] `render.yaml` configured with build + start commands
653
+ - [ ] Environment variables set in Render dashboard
654
+ - [ ] Health check endpoint (`/health`) configured
655
+ - [ ] Auto-deploy from `main` branch enabled
656
+
657
+ ### Vercel (Next.js Dashboard)
658
+ - [ ] Connected to GitHub repo `dashboard/` directory
659
+ - [ ] Environment variable: `NEXT_PUBLIC_API_URL` pointing to Render backend
660
+ - [ ] Custom domain (optional)
661
+
662
+ ### GitHub App
663
+ - [ ] App registered with correct permissions
664
+ - [ ] Webhook URL set to Render endpoint (`/webhook/github`)
665
+ - [ ] Private key (.pem) downloaded and stored securely
666
+ - [ ] App installed on test repo for development
667
+
668
+ ### GitHub Actions
669
+ - [ ] CI workflow: lint (ruff) + test (pytest) on push/PR
670
+ - [ ] Pre-warm cron: ping /health every 10 minutes during working hours
671
+
672
+ ---
673
+
674
+ ## 13. Progress Tracker
675
+
676
+ ### Overall Status
677
+
678
+ | Week | Milestone | Status | Notes |
679
+ |------|-----------|--------|-------|
680
+ | 1 | Foundation & Setup | COMPLETE | All services provisioned, project scaffolded |
681
+ | 2 | GitHub Integration | COMPLETE | E2E tested: webhook → fetch → comment on PR #1 |
682
+ | 3 | Security Agent v1 | COMPLETE | Bandit + Llama-3.3-70B, live-tested on PR #3, 4 findings |
683
+ | 4 | Performance Agent v1 | COMPLETE | Radon complexity + Llama-3.3-70B, 3 findings on PR #4 |
684
+ | 5 | Style Agent v1 | COMPLETE | Ruff linter + Llama-3.3-70B, 6 findings on PR #4 |
685
+ | 6 | ChromaDB + RAG Context | COMPLETE | sentence-transformers + ChromaDB, integrated into all agents |
686
+ | 7 | Synthesizer Agent | COMPLETE | Dedup, conflict resolution, Health Score formula, exec summary |
687
+ | 8 | Next.js Dashboard | COMPLETE | Next.js + Tailwind + Recharts, mock data, all pages |
688
+ | 9 | Polish & Evaluation | COMPLETE | Eval harness, metrics, README, DB persistence |
689
+ | 10 | Launch & Promotion | COMPLETE | Render config, Vercel ready, API endpoints for dashboard |
690
+
691
+ ### Key Decisions Log
692
+
693
+ | Date | Decision | Rationale |
694
+ |------|----------|-----------|
695
+ | 2026-03-19 | Project plan created | Starting from scratch, PDF spec as source of truth |
696
+ | 2026-03-19 | Project renamed to "Ninja Code Guard" | User's personal branding choice |
697
+ | 2026-03-19 | GitHub App: "Ninja's Code Guard" (ID: 3133457) | Registered and tested with live PR |
698
+ | 2026-03-19 | Test repo: ninjacode911/codeguard-test | Used for e2e webhook testing |
699
+ | 2026-03-19 | Fail-open pattern for Redis cache | Missing a review is worse than duplicating |
700
+ | 2026-03-19 | Background tasks for webhook processing | GitHub's 10s timeout requires async processing |
701
+
702
+ ---
703
+
704
+ *Last updated: 2026-03-19*
README.md ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Ninja Code Guard
2
+
3
+ **Multi-agent code review system that reviews GitHub pull requests the way a senior engineering team would.**
4
+
5
+ Three specialized AI agents — Security, Performance, and Style — analyze your code in parallel, then a Synthesizer merges their findings into a single, prioritized, non-overlapping review with inline GitHub comments.
6
+
7
+ ## How It Works
8
+
9
+ ```
10
+ PR opened on GitHub
11
+
12
+
13
+ Webhook received ──→ HMAC-SHA256 validated
14
+
15
+
16
+ Redis cache check ──→ Skip if already reviewed
17
+
18
+
19
+ Fetch PR data ──→ Diff + full file contents
20
+
21
+
22
+ RAG Context ──→ Embed files → ChromaDB → Retrieve related code
23
+
24
+
25
+ ┌─────────────────────────────────────────┐
26
+ │ 3 Agents run IN PARALLEL │
27
+ │ 🔒 Security ⚡ Performance ✏️ Style │
28
+ │ Bandit+LLM Radon+LLM Ruff+LLM │
29
+ └─────────────┬───────────────────────────┘
30
+
31
+
32
+ Synthesizer ──→ Deduplicate → Rank → Score → Summarize
33
+
34
+
35
+ Post to GitHub ──→ Inline comments + Summary with Health Score
36
+ ```
37
+
38
+ ## What Each Agent Does
39
+
40
+ | Agent | Focus | Static Tools | Example Findings |
41
+ |-------|-------|-------------|------------------|
42
+ | 🔒 **Security** | Vulnerabilities, auth, secrets | Bandit, detect-secrets | SQL injection, hardcoded API keys, weak crypto |
43
+ | ⚡ **Performance** | Efficiency, scalability | Radon complexity | N+1 queries, O(n²) loops, blocking I/O |
44
+ | ✏️ **Style** | Readability, maintainability | Ruff linter | Unused imports, bad naming, dead code |
45
+ | 🧠 **Synthesizer** | Merge & prioritize | — | Deduplication, conflict resolution, Health Score |
46
+
47
+ ## Tech Stack
48
+
49
+ | Layer | Technology | Why |
50
+ |-------|-----------|-----|
51
+ | LLM | Groq (Llama-3.3-70B) | 500+ tokens/sec, free 14.4K req/day |
52
+ | Agents | LangChain + Structured Output | Typed JSON responses, prompt templates |
53
+ | Backend | FastAPI on Render | Async, auto OpenAPI docs, free tier |
54
+ | Vector DB | ChromaDB + sentence-transformers | RAG context, semantic code search |
55
+ | Cache | Upstash Redis | Prevent duplicate reviews |
56
+ | Database | Neon Postgres | Review history, Health Score trends |
57
+ | Dashboard | Next.js on Vercel | Review history, trend charts |
58
+ | GitHub | GitHub App (webhooks) | Inline PR comments, bot identity |
59
+
60
+ ## Quick Start
61
+
62
+ ### Prerequisites
63
+ - Python 3.11+
64
+ - Groq API key (free at console.groq.com)
65
+ - GitHub App (registered at github.com/settings/apps)
66
+
67
+ ### Setup
68
+
69
+ ```bash
70
+ # Clone and setup
71
+ git clone https://github.com/ninjacode911/ninja-code-guard
72
+ cd ninja-code-guard
73
+ python -m venv .venv && source .venv/bin/activate # Windows: .venv\Scripts\activate
74
+ pip install -r requirements.txt
75
+
76
+ # Configure
77
+ cp .env.example .env
78
+ # Edit .env with your API keys
79
+
80
+ # Run
81
+ uvicorn app.main:app --reload --port 8000
82
+ ```
83
+
84
+ ### Environment Variables
85
+
86
+ ```env
87
+ GROQ_API_KEY=gsk_...
88
+ GITHUB_APP_ID=123456
89
+ GITHUB_APP_PRIVATE_KEY_PATH=./keys/app.pem
90
+ GITHUB_WEBHOOK_SECRET=...
91
+ DATABASE_URL=postgresql://...
92
+ UPSTASH_REDIS_URL=rediss://...
93
+ ```
94
+
95
+ ## Architecture
96
+
97
+ **4 Layers:**
98
+ - **GitHub Layer** — Webhooks, PR events, inline comments
99
+ - **Orchestration Layer** — FastAPI, agent dispatch, asyncio.gather
100
+ - **Agent Layer** — 3 domain agents + synthesizer (LangChain ReAct)
101
+ - **Knowledge Layer** — ChromaDB (RAG), Redis (cache), Postgres (history)
102
+
103
+ **Key Design Patterns:**
104
+ - Template Method — All agents share a base class, override only prompt + tools
105
+ - Structured Output — LLM constrained to return valid JSON (Pydantic schema)
106
+ - Fail-Open Cache — If Redis is down, proceed with analysis
107
+ - Background Tasks — Return 200 to GitHub immediately, review asynchronously
108
+ - Parallel Execution — asyncio.gather runs 3 agents concurrently
109
+
110
+ ## Test Results
111
+
112
+ ```
113
+ PR #4 on codeguard-test repo:
114
+ Security: 5 findings (SQL injection, weak crypto, hardcoded secrets)
115
+ Performance: 3 findings (O(n²) loop, blocking I/O, high complexity)
116
+ Style: 6 findings (unused imports, magic numbers, bad naming)
117
+ Total: 14 findings
118
+ Health Score: 14/100
119
+ Latency: ~13 seconds (after model load)
120
+ ```
121
+
122
+ ## Running Tests
123
+
124
+ ```bash
125
+ pytest tests/unit/ -v
126
+ ```
127
+
128
+ ## Project Structure
129
+
130
+ ```
131
+ app/
132
+ agents/ # Security, Performance, Style, Synthesizer
133
+ tools/ # Bandit, detect-secrets, Radon, Ruff wrappers
134
+ context/ # RAG pipeline (embedder, indexer, retriever)
135
+ github/ # Webhook validation, API client, comment formatter
136
+ models/ # Pydantic schemas (Finding, SynthesizedReview)
137
+ db/ # Redis cache, Postgres queries
138
+ services/ # Health Score calculator
139
+ dashboard/ # Next.js frontend (Vercel)
140
+ tests/ # Unit tests + evaluation harness
141
+ prompts/ # Agent system prompts (Markdown)
142
+ docs/ # Week-by-week documentation
143
+ ```
144
+
145
+ ## Documentation
146
+
147
+ Detailed week-by-week documentation available in `docs/`:
148
+ - [Week 1: Foundation & Setup](docs/WEEK1_FOUNDATION_AND_SETUP.md)
149
+ - [Week 2: GitHub Integration](docs/WEEK2_GITHUB_INTEGRATION.md)
150
+ - [Week 3: Security Agent](docs/WEEK3_SECURITY_AGENT.md)
151
+ - [Week 4: Performance Agent](docs/WEEK4_PERFORMANCE_AGENT.md)
152
+ - [Week 5: Style Agent](docs/WEEK5_STYLE_AGENT.md)
153
+ - [Week 6: RAG & Parallel Execution](docs/WEEK6_RAG_AND_PARALLEL.md)
154
+
155
+ ## License
156
+
157
+ MIT
158
+
159
+ ---
160
+
161
+ Built by [ninjacode911](https://github.com/ninjacode911)
app/__init__.py ADDED
File without changes
app/agents/__init__.py ADDED
File without changes
app/agents/base_agent.py ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Base Agent Interface
3
+ =====================
4
+
5
+ All domain agents (Security, Performance, Style) inherit from this base class.
6
+ It provides shared infrastructure:
7
+
8
+ 1. **Groq LLM client** — ChatGroq configured with Llama-3.1-70B
9
+ 2. **Structured output** — LLM returns typed Finding objects, not raw text
10
+ 3. **Error handling** — graceful fallback if the LLM call fails
11
+ 4. **Timing** — measures how long each agent takes (for latency metrics)
12
+
13
+ Design pattern: Template Method
14
+ - The base class defines the algorithm skeleton (receive diff → run tools → call LLM → return findings)
15
+ - Subclasses override specific steps (system_prompt, run_static_tools)
16
+ - This prevents code duplication across 3 agents that follow the same flow
17
+
18
+ Why LangChain?
19
+ - Provides a unified interface across LLM providers (Groq, Gemini, OpenAI)
20
+ - If Groq goes down, we swap to Gemini by changing one line
21
+ - Structured output parsing is built in (with_structured_output)
22
+ - Prompt templates with variable substitution
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import time
28
+ from abc import ABC, abstractmethod
29
+
30
+ import structlog
31
+ from langchain_core.prompts import ChatPromptTemplate
32
+ from langchain_groq import ChatGroq
33
+ from pydantic import BaseModel, Field
34
+
35
+ from app.config import settings
36
+ from app.github.client import PRData
37
+ from app.models.findings import Finding
38
+
39
+ logger = structlog.get_logger()
40
+
41
+
42
+ class AgentFindings(BaseModel):
43
+ """
44
+ Schema for the LLM's structured output.
45
+
46
+ By wrapping findings in a Pydantic model, we can use LangChain's
47
+ `with_structured_output()` which constrains the LLM to return
48
+ valid JSON matching this exact schema. No more parsing raw text!
49
+
50
+ How with_structured_output() works under the hood:
51
+ 1. It adds the JSON schema to the system prompt
52
+ 2. It sets response_format to JSON mode (if the model supports it)
53
+ 3. It validates the response against the schema
54
+ 4. If validation fails, it retries (configurable)
55
+ """
56
+
57
+ findings: list[FindingOutput] = Field(
58
+ default_factory=list,
59
+ description="List of security/performance/style findings",
60
+ )
61
+
62
+
63
+ class FindingOutput(BaseModel):
64
+ """
65
+ The schema we ask the LLM to produce for each finding.
66
+
67
+ This is slightly different from our internal Finding model because:
68
+ - The LLM doesn't know which agent it is (we add that after)
69
+ - We give the LLM freedom on field names that match its training
70
+ - We validate and convert to our Finding model post-LLM
71
+
72
+ Note: This class is defined BEFORE AgentFindings because Python
73
+ needs it to exist when AgentFindings references it. But Pydantic
74
+ handles forward references with model_rebuild().
75
+ """
76
+
77
+ file_path: str = Field(description="Path to the file (e.g., 'app.py')")
78
+ line_start: int = Field(description="Starting line number of the issue")
79
+ line_end: int = Field(description="Ending line number of the issue")
80
+ severity: str = Field(description="One of: critical, high, medium, low")
81
+ category: str = Field(description="Issue category (e.g., 'sql_injection', 'hardcoded_secret')")
82
+ title: str = Field(description="Short one-line title of the finding")
83
+ description: str = Field(description="Detailed explanation of the issue and its impact")
84
+ suggested_fix: str = Field(default="", description="Corrected code snippet")
85
+ cwe_id: str | None = Field(default=None, description="CWE ID if applicable (e.g., 'CWE-89')")
86
+ confidence: float = Field(description="Confidence score from 0.0 to 1.0")
87
+
88
+
89
+ # Rebuild the model to resolve the forward reference
90
+ AgentFindings.model_rebuild()
91
+
92
+
93
+ class BaseAgent(ABC):
94
+ """
95
+ Abstract base class for all domain agents.
96
+
97
+ Subclasses must implement:
98
+ - agent_name: which agent this is ("security", "performance", "style")
99
+ - system_prompt: the detailed system prompt for the LLM
100
+ - run_static_analysis(): optional static tools (Bandit, Semgrep, etc.)
101
+
102
+ Usage:
103
+ agent = SecurityAgent()
104
+ findings = await agent.review(pr_data)
105
+ """
106
+
107
+ def __init__(self):
108
+ """
109
+ Initialize the LLM client.
110
+
111
+ ChatGroq connects to Groq's API which runs Llama-3.1-70B at
112
+ 500+ tokens/sec — the fastest open-source LLM inference available.
113
+ This speed is critical: we need each agent to complete in 3-8 seconds
114
+ so the full review stays under 15 seconds.
115
+
116
+ Temperature=0.1: We want nearly deterministic output. Code review
117
+ should be consistent — the same code should get the same findings.
118
+ A small temperature (not 0) allows slight variation to avoid
119
+ getting stuck in repetitive patterns.
120
+ """
121
+ self.llm = ChatGroq(
122
+ model="llama-3.3-70b-versatile",
123
+ api_key=settings.groq_api_key,
124
+ temperature=0.1,
125
+ max_tokens=4096,
126
+ )
127
+
128
+ @property
129
+ @abstractmethod
130
+ def agent_name(self) -> str:
131
+ """The agent identifier: 'security', 'performance', or 'style'."""
132
+ ...
133
+
134
+ @property
135
+ @abstractmethod
136
+ def system_prompt(self) -> str:
137
+ """The full system prompt for this agent."""
138
+ ...
139
+
140
+ async def run_static_analysis(self, pr_data: PRData) -> str:
141
+ """
142
+ Run static analysis tools on the PR files.
143
+
144
+ Override in subclasses to run agent-specific tools:
145
+ - SecurityAgent: Bandit + detect-secrets
146
+ - PerformanceAgent: radon + AST analysis
147
+ - StyleAgent: Ruff/pylint
148
+
149
+ Returns a string summary of tool findings to include in the LLM prompt.
150
+ Default: no static analysis (LLM-only review).
151
+ """
152
+ return ""
153
+
154
+ def _build_prompt(self) -> ChatPromptTemplate:
155
+ """
156
+ Build the LangChain prompt template.
157
+
158
+ ChatPromptTemplate.from_messages() creates a multi-turn prompt:
159
+ - ("system", ...) → the system message (agent persona + instructions)
160
+ - ("human", ...) → the user message (the actual PR data to review)
161
+
162
+ Variables in {curly_braces} are substituted at runtime with .ainvoke().
163
+ """
164
+ return ChatPromptTemplate.from_messages([
165
+ ("system", self.system_prompt),
166
+ ("human", (
167
+ "## PR Diff\n"
168
+ "```diff\n{diff}\n```\n\n"
169
+ "## Changed File Contents\n"
170
+ "{file_contents}\n\n"
171
+ "## Static Analysis Results\n"
172
+ "{static_analysis}\n\n"
173
+ "{rag_context}\n\n"
174
+ "Analyze this PR and return your findings as structured JSON."
175
+ )),
176
+ ])
177
+
178
+ def _convert_to_findings(self, agent_output: AgentFindings) -> list[Finding]:
179
+ """
180
+ Convert the LLM's output to our internal Finding model.
181
+
182
+ This adds the agent_name field and validates/clamps values:
183
+ - Severity is lowercased and validated
184
+ - Confidence is clamped to [0.0, 1.0]
185
+ - Invalid findings are skipped (not crashed on)
186
+ """
187
+ findings = []
188
+ for f in agent_output.findings:
189
+ try:
190
+ severity = f.severity.lower().strip()
191
+ if severity not in ("critical", "high", "medium", "low"):
192
+ severity = "medium" # Default for ambiguous severity
193
+
194
+ confidence = max(0.0, min(1.0, f.confidence))
195
+
196
+ finding = Finding(
197
+ agent=self.agent_name,
198
+ file_path=f.file_path,
199
+ line_start=f.line_start,
200
+ line_end=f.line_end,
201
+ severity=severity,
202
+ category=f.category,
203
+ title=f.title,
204
+ description=f.description,
205
+ suggested_fix=f.suggested_fix,
206
+ cwe_id=f.cwe_id,
207
+ confidence=confidence,
208
+ )
209
+ findings.append(finding)
210
+ except Exception as e:
211
+ logger.warning(
212
+ "Skipping malformed finding",
213
+ agent=self.agent_name,
214
+ error=str(e),
215
+ )
216
+ return findings
217
+
218
+ def _format_file_contents(self, file_contents: dict[str, str]) -> str:
219
+ """
220
+ Format file contents for the LLM prompt.
221
+
222
+ Each file is wrapped in a code block with its path as a header.
223
+ We truncate very long files to stay within LLM context limits.
224
+ Groq's Llama-3.1-70B has 128K context, so we have plenty of room
225
+ for typical PRs, but we cap each file at 500 lines to be safe.
226
+ """
227
+ parts = []
228
+ for filepath, content in file_contents.items():
229
+ lines = content.split("\n")
230
+ if len(lines) > 500:
231
+ content = "\n".join(lines[:500]) + "\n... (truncated)"
232
+ parts.append(f"### {filepath}\n```\n{content}\n```")
233
+ return "\n\n".join(parts) if parts else "No file contents available."
234
+
235
+ async def review(self, pr_data: PRData, rag_context: str = "") -> list[Finding]:
236
+ """
237
+ Main entry point: review a PR and return findings.
238
+
239
+ This is the Template Method:
240
+ 1. Run static analysis tools (subclass-specific)
241
+ 2. Build the prompt with diff + files + tool output + RAG context
242
+ 3. Call the LLM with structured output
243
+ 4. Convert to Finding objects
244
+ 5. Log timing and return
245
+
246
+ If the LLM call fails, we return an empty list rather than crashing
247
+ the entire pipeline. The other agents can still contribute findings.
248
+
249
+ Args:
250
+ pr_data: The PR diff, file contents, and metadata
251
+ rag_context: Optional RAG context from ChromaDB (related code chunks)
252
+ """
253
+ start_time = time.time()
254
+
255
+ try:
256
+ # Step 1: Run static analysis tools
257
+ static_results = await self.run_static_analysis(pr_data)
258
+
259
+ # Step 2: Build the prompt
260
+ prompt = self._build_prompt()
261
+
262
+ # Step 3: Create the structured output chain
263
+ structured_llm = self.llm.with_structured_output(AgentFindings)
264
+ chain = prompt | structured_llm
265
+
266
+ # Step 4: Call the LLM
267
+ result = await chain.ainvoke({
268
+ "diff": pr_data.diff[:15000], # Cap diff size for token limits
269
+ "file_contents": self._format_file_contents(pr_data.file_contents),
270
+ "static_analysis": static_results or "No static analysis results.",
271
+ "rag_context": rag_context or "",
272
+ })
273
+
274
+ # Step 5: Convert to Finding objects
275
+ findings = self._convert_to_findings(result)
276
+
277
+ elapsed_ms = int((time.time() - start_time) * 1000)
278
+ logger.info(
279
+ "Agent review completed",
280
+ agent=self.agent_name,
281
+ findings_count=len(findings),
282
+ elapsed_ms=elapsed_ms,
283
+ )
284
+
285
+ return findings
286
+
287
+ except Exception as e:
288
+ elapsed_ms = int((time.time() - start_time) * 1000)
289
+ logger.error(
290
+ "Agent review failed",
291
+ agent=self.agent_name,
292
+ error=str(e),
293
+ elapsed_ms=elapsed_ms,
294
+ )
295
+ return [] # Don't crash the pipeline — other agents can still work
app/agents/performance_agent.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Performance Agent
3
+ ==================
4
+
5
+ Evaluates code for computational efficiency, memory usage, and scalability.
6
+ Uses radon for complexity metrics and the LLM for semantic analysis of
7
+ query patterns, I/O operations, and algorithmic efficiency.
8
+
9
+ Same architecture as SecurityAgent — inherits from BaseAgent, overrides
10
+ only agent_name, system_prompt, and run_static_analysis().
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from pathlib import Path
16
+
17
+ import structlog
18
+
19
+ from app.agents.base_agent import BaseAgent
20
+ from app.github.client import PRData
21
+ from app.tools.radon_tool import run_radon
22
+
23
+ logger = structlog.get_logger()
24
+
25
+
26
+ class PerformanceAgent(BaseAgent):
27
+
28
+ @property
29
+ def agent_name(self) -> str:
30
+ return "performance"
31
+
32
+ @property
33
+ def system_prompt(self) -> str:
34
+ prompt_path = (
35
+ Path(__file__).resolve().parent.parent.parent
36
+ / "prompts"
37
+ / "performance_system.md"
38
+ )
39
+ return prompt_path.read_text(encoding="utf-8")
40
+
41
+ async def run_static_analysis(self, pr_data: PRData) -> str:
42
+ """Run radon complexity analysis on changed Python files."""
43
+ radon_output = await run_radon(pr_data.file_contents)
44
+ return radon_output if radon_output else ""
app/agents/security_agent.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Security Agent
3
+ ===============
4
+
5
+ The Security Agent acts as a senior application security engineer (AppSec).
6
+ It reviews every changed line through the lens of exploitability, data exposure,
7
+ and authentication integrity.
8
+
9
+ Architecture:
10
+ 1. Run static analysis tools (Bandit + detect-secrets) on changed files
11
+ 2. Combine static results with PR diff and full file contents
12
+ 3. Send everything to Groq's Llama-3.1-70B with a security-focused system prompt
13
+ 4. LLM produces structured JSON findings with CWE IDs and suggested fixes
14
+
15
+ Why both static tools AND an LLM?
16
+
17
+ Static tools (Bandit):
18
+ ✅ Fast, deterministic, zero false negatives for known patterns
19
+ ✅ Free — no API cost
20
+ ❌ Can't understand context (doesn't know if input is already sanitized)
21
+ ❌ Only catches patterns it has rules for
22
+
23
+ LLM (Llama-3.1-70B):
24
+ ✅ Understands context, intent, data flow between functions
25
+ ✅ Can catch novel vulnerability patterns
26
+ ✅ Provides natural language explanations and fixes
27
+ ❌ Can hallucinate findings (false positives)
28
+ ❌ Costs API calls (though Groq's free tier is generous)
29
+
30
+ Together: static tools provide HIGH-CONFIDENCE anchors, the LLM provides DEPTH.
31
+ The Synthesizer (Week 7) will merge and deduplicate their outputs.
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ from pathlib import Path
37
+
38
+ import structlog
39
+
40
+ from app.agents.base_agent import BaseAgent
41
+ from app.github.client import PRData
42
+ from app.tools.bandit_tool import run_bandit
43
+ from app.tools.detect_secrets_tool import run_detect_secrets
44
+
45
+ logger = structlog.get_logger()
46
+
47
+
48
+ class SecurityAgent(BaseAgent):
49
+ """
50
+ Security-focused code review agent.
51
+
52
+ Inherits from BaseAgent which provides:
53
+ - Groq LLM client (ChatGroq with Llama-3.1-70B)
54
+ - Structured output parsing (with_structured_output)
55
+ - Error handling and timing
56
+ - The review() method that orchestrates the flow
57
+
58
+ This class only needs to provide:
59
+ - agent_name: "security"
60
+ - system_prompt: loaded from prompts/security_system.md
61
+ - run_static_analysis(): runs Bandit + detect-secrets
62
+ """
63
+
64
+ @property
65
+ def agent_name(self) -> str:
66
+ return "security"
67
+
68
+ @property
69
+ def system_prompt(self) -> str:
70
+ """
71
+ Load the system prompt from the Markdown file.
72
+
73
+ We store prompts as separate files (not inline strings) because:
74
+ 1. They're long (50+ lines) — inline strings clutter the code
75
+ 2. They change frequently during prompt tuning (Week 9)
76
+ 3. Non-engineers (product managers) can review/edit them
77
+ 4. Git diff shows prompt changes clearly
78
+ """
79
+ prompt_path = Path(__file__).resolve().parent.parent.parent / "prompts" / "security_system.md"
80
+ return prompt_path.read_text(encoding="utf-8")
81
+
82
+ async def run_static_analysis(self, pr_data: PRData) -> str:
83
+ """
84
+ Run security-specific static analysis tools.
85
+
86
+ We run Bandit and detect-secrets in sequence (not parallel) because:
87
+ 1. Each takes <5 seconds — parallelism gains are minimal
88
+ 2. They both write to temp dirs — simpler to keep sequential
89
+ 3. If one fails, the other still runs (independent try/except in each tool)
90
+
91
+ The results are concatenated into a single string that gets injected
92
+ into the LLM prompt. The LLM uses these as high-confidence signals
93
+ to anchor its own analysis.
94
+ """
95
+ results = []
96
+
97
+ # Run Bandit (Python security linter)
98
+ bandit_output = await run_bandit(pr_data.file_contents)
99
+ if bandit_output:
100
+ results.append(bandit_output)
101
+
102
+ # Run detect-secrets (credential scanner)
103
+ secrets_output = await run_detect_secrets(pr_data.file_contents)
104
+ if secrets_output:
105
+ results.append(secrets_output)
106
+
107
+ return "\n\n".join(results) if results else ""
app/agents/style_agent.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Style & Maintainability Agent
3
+ ===============================
4
+
5
+ Reviews code for readability, naming quality, documentation, test coverage,
6
+ and architectural consistency. Uses Ruff for mechanical lint checks and the
7
+ LLM for deeper maintainability analysis.
8
+
9
+ Same architecture as SecurityAgent and PerformanceAgent.
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from pathlib import Path
15
+
16
+ import structlog
17
+
18
+ from app.agents.base_agent import BaseAgent
19
+ from app.github.client import PRData
20
+ from app.tools.linter_tool import run_ruff
21
+
22
+ logger = structlog.get_logger()
23
+
24
+
25
+ class StyleAgent(BaseAgent):
26
+
27
+ @property
28
+ def agent_name(self) -> str:
29
+ return "style"
30
+
31
+ @property
32
+ def system_prompt(self) -> str:
33
+ prompt_path = (
34
+ Path(__file__).resolve().parent.parent.parent
35
+ / "prompts"
36
+ / "style_system.md"
37
+ )
38
+ return prompt_path.read_text(encoding="utf-8")
39
+
40
+ async def run_static_analysis(self, pr_data: PRData) -> str:
41
+ """Run Ruff linter on changed Python files."""
42
+ ruff_output = await run_ruff(pr_data.file_contents)
43
+ return ruff_output if ruff_output else ""
app/agents/synthesizer.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Synthesizer Agent
3
+ ==================
4
+
5
+ The Synthesizer is the "senior engineering manager" of Ninja Code Guard.
6
+ It takes findings from all three domain agents (Security, Performance, Style)
7
+ and produces a unified, non-redundant review.
8
+
9
+ Responsibilities:
10
+ 1. **Deduplicate** — If Security and Performance flag the same line for
11
+ different reasons, merge them into one finding with both perspectives.
12
+ 2. **Resolve conflicts** — If agents disagree on severity, use a precedence
13
+ hierarchy: Security > Performance > Style.
14
+ 3. **Re-rank** — Sort findings by composite score: severity × confidence.
15
+ 4. **Compute Health Score** — 0-100 based on weighted finding density.
16
+ 5. **Generate executive summary** — 3-5 sentences summarizing the review.
17
+ 6. **Determine recommendation** — approve / request_changes / block.
18
+
19
+ Why a Synthesizer instead of just concatenating findings?
20
+ - Without dedup: the same SQL injection might be flagged by both Security
21
+ (as CWE-89) and Performance (as "unbounded query") — confusing for devs.
22
+ - Without conflict resolution: Security says "critical", Style says "medium"
23
+ for the same issue — which severity should the comment show?
24
+ - Without re-ranking: findings appear in arbitrary order — devs should see
25
+ the most important issues first.
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import time
31
+ from collections import defaultdict
32
+
33
+ import structlog
34
+
35
+ from app.models.findings import Finding, SynthesizedReview
36
+ from app.services.health_score import calculate_health_score, determine_recommendation
37
+
38
+ logger = structlog.get_logger()
39
+
40
+ # Agent precedence for severity conflicts (higher = takes priority)
41
+ AGENT_PRECEDENCE = {
42
+ "security": 3,
43
+ "performance": 2,
44
+ "style": 1,
45
+ }
46
+
47
+ SEVERITY_RANK = {
48
+ "critical": 4,
49
+ "high": 3,
50
+ "medium": 2,
51
+ "low": 1,
52
+ }
53
+
54
+
55
+ def _finding_key(f: Finding) -> str:
56
+ """
57
+ Generate a deduplication key for a finding.
58
+
59
+ Two findings are considered duplicates if they reference the same
60
+ file and overlapping line ranges. We use a simplified key based on
61
+ file_path and line_start — findings on the same line from different
62
+ agents are candidates for merging.
63
+ """
64
+ return f"{f.file_path}:{f.line_start}:{f.category}"
65
+
66
+
67
+ def deduplicate_findings(findings: list[Finding]) -> list[Finding]:
68
+ """
69
+ Remove duplicate findings that reference the same code location.
70
+
71
+ When multiple agents flag the same file+line, we keep the finding from
72
+ the highest-precedence agent (Security > Performance > Style) and take
73
+ the maximum severity between them.
74
+
75
+ Example:
76
+ Security flags app.py:5 as "critical" (SQL injection)
77
+ Performance flags app.py:5 as "high" (unbounded query)
78
+ → Keep Security's finding with "critical" severity
79
+ → Append Performance's insight to the description
80
+ """
81
+ # Group findings by location
82
+ groups: dict[str, list[Finding]] = defaultdict(list)
83
+ for finding in findings:
84
+ key = _finding_key(finding)
85
+ groups[key].append(finding)
86
+
87
+ deduped = []
88
+ duplicates_removed = 0
89
+
90
+ for key, group in groups.items():
91
+ if len(group) == 1:
92
+ deduped.append(group[0])
93
+ continue
94
+
95
+ # Sort by agent precedence (highest first)
96
+ group.sort(
97
+ key=lambda f: AGENT_PRECEDENCE.get(f.agent, 0), reverse=True
98
+ )
99
+
100
+ # Take the primary finding (highest precedence agent)
101
+ primary = group[0]
102
+
103
+ # Take the maximum severity across all agents
104
+ max_severity = max(group, key=lambda f: SEVERITY_RANK.get(f.severity, 0))
105
+
106
+ # Merge: keep primary's structure, upgrade severity if needed
107
+ merged_description = primary.description
108
+ if len(group) > 1:
109
+ other_agents = [f.agent for f in group[1:]]
110
+ merged_description += (
111
+ f"\n\n*Also flagged by: {', '.join(other_agents)} agent(s).*"
112
+ )
113
+
114
+ merged = Finding(
115
+ agent=primary.agent,
116
+ file_path=primary.file_path,
117
+ line_start=primary.line_start,
118
+ line_end=primary.line_end,
119
+ severity=max_severity.severity,
120
+ category=primary.category,
121
+ title=primary.title,
122
+ description=merged_description,
123
+ suggested_fix=primary.suggested_fix,
124
+ cwe_id=primary.cwe_id,
125
+ confidence=max(f.confidence for f in group),
126
+ )
127
+ deduped.append(merged)
128
+ duplicates_removed += len(group) - 1
129
+
130
+ if duplicates_removed > 0:
131
+ logger.info(
132
+ "Deduplicated findings",
133
+ removed=duplicates_removed,
134
+ before=len(findings),
135
+ after=len(deduped),
136
+ )
137
+
138
+ return deduped
139
+
140
+
141
+ def rank_findings(findings: list[Finding]) -> list[Finding]:
142
+ """
143
+ Sort findings by importance: severity (desc) then confidence (desc).
144
+
145
+ Developers should see the most critical, highest-confidence issues first.
146
+ This matches how a senior engineer would present a review — lead with
147
+ the blocking issues, then the nice-to-haves.
148
+ """
149
+ return sorted(
150
+ findings,
151
+ key=lambda f: (SEVERITY_RANK.get(f.severity, 0), f.confidence),
152
+ reverse=True,
153
+ )
154
+
155
+
156
+ def generate_executive_summary(
157
+ findings: list[Finding],
158
+ health_score: int,
159
+ recommendation: str,
160
+ ) -> str:
161
+ """
162
+ Generate a 3-5 sentence executive summary of the review.
163
+
164
+ This appears at the top of the PR comment, giving the author a quick
165
+ overview without needing to read every finding.
166
+ """
167
+ if not findings:
168
+ return (
169
+ "No issues were found in this pull request. "
170
+ "The code changes look clean across security, performance, and style dimensions. "
171
+ "Safe to merge."
172
+ )
173
+
174
+ # Count by agent
175
+ agent_counts = defaultdict(int)
176
+ for f in findings:
177
+ agent_counts[f.agent] += 1
178
+
179
+ # Count by severity
180
+ sev_counts = defaultdict(int)
181
+ for f in findings:
182
+ sev_counts[f.severity] += 1
183
+
184
+ parts = []
185
+
186
+ # Opening line
187
+ total = len(findings)
188
+ parts.append(
189
+ f"Multi-agent review analyzed this PR across security, performance, and style dimensions, "
190
+ f"finding {total} issue{'s' if total != 1 else ''}."
191
+ )
192
+
193
+ # Severity breakdown
194
+ sev_parts = []
195
+ for sev in ["critical", "high", "medium", "low"]:
196
+ count = sev_counts.get(sev, 0)
197
+ if count > 0:
198
+ sev_parts.append(f"{count} {sev}")
199
+ if sev_parts:
200
+ parts.append(f"Breakdown: {', '.join(sev_parts)}.")
201
+
202
+ # Agent breakdown
203
+ agent_parts = []
204
+ for agent in ["security", "performance", "style"]:
205
+ count = agent_counts.get(agent, 0)
206
+ if count > 0:
207
+ agent_parts.append(f"{agent.capitalize()}: {count}")
208
+ if agent_parts:
209
+ parts.append(f"By domain: {', '.join(agent_parts)}.")
210
+
211
+ # Top issue highlight
212
+ if sev_counts.get("critical", 0) > 0:
213
+ critical_finding = next(f for f in findings if f.severity == "critical")
214
+ parts.append(
215
+ f"Most urgent: {critical_finding.title} in `{critical_finding.file_path}`."
216
+ )
217
+ elif sev_counts.get("high", 0) > 0:
218
+ high_finding = next(f for f in findings if f.severity == "high")
219
+ parts.append(
220
+ f"Top priority: {high_finding.title} in `{high_finding.file_path}`."
221
+ )
222
+
223
+ return " ".join(parts)
224
+
225
+
226
+ def synthesize(
227
+ security_findings: list[Finding],
228
+ performance_findings: list[Finding],
229
+ style_findings: list[Finding],
230
+ ) -> SynthesizedReview:
231
+ """
232
+ Main entry point: synthesize findings from all agents into a unified review.
233
+
234
+ Pipeline:
235
+ 1. Combine all findings
236
+ 2. Deduplicate (merge overlapping findings)
237
+ 3. Rank by severity and confidence
238
+ 4. Calculate Health Score
239
+ 5. Determine recommendation
240
+ 6. Generate executive summary
241
+
242
+ Returns a SynthesizedReview ready for posting to GitHub.
243
+ """
244
+ start = time.time()
245
+
246
+ # Step 1: Combine
247
+ all_findings = security_findings + performance_findings + style_findings
248
+
249
+ # Step 2: Deduplicate
250
+ deduped = deduplicate_findings(all_findings)
251
+
252
+ # Step 3: Rank
253
+ ranked = rank_findings(deduped)
254
+
255
+ # Step 4: Health Score
256
+ health_score = calculate_health_score(ranked)
257
+
258
+ # Step 5: Recommendation
259
+ recommendation = determine_recommendation(ranked, health_score)
260
+
261
+ # Step 6: Executive summary
262
+ summary = generate_executive_summary(ranked, health_score, recommendation)
263
+
264
+ # Count by severity
265
+ critical = sum(1 for f in ranked if f.severity == "critical")
266
+ high = sum(1 for f in ranked if f.severity == "high")
267
+ medium = sum(1 for f in ranked if f.severity == "medium")
268
+ low = sum(1 for f in ranked if f.severity == "low")
269
+
270
+ elapsed_ms = int((time.time() - start) * 1000)
271
+
272
+ logger.info(
273
+ "Synthesis complete",
274
+ input_findings=len(all_findings),
275
+ after_dedup=len(ranked),
276
+ health_score=health_score,
277
+ recommendation=recommendation,
278
+ elapsed_ms=elapsed_ms,
279
+ )
280
+
281
+ return SynthesizedReview(
282
+ health_score=health_score,
283
+ executive_summary=summary,
284
+ recommendation=recommendation,
285
+ findings=ranked,
286
+ critical_count=critical,
287
+ high_count=high,
288
+ medium_count=medium,
289
+ low_count=low,
290
+ duration_ms=elapsed_ms,
291
+ )
app/config.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Application configuration via environment variables."""
2
+
3
+ from pydantic_settings import BaseSettings
4
+
5
+
6
+ class Settings(BaseSettings):
7
+ """All configuration loaded from environment variables."""
8
+
9
+ # LLM APIs
10
+ groq_api_key: str = ""
11
+ gemini_api_key: str = ""
12
+
13
+ # GitHub App
14
+ github_app_id: str = ""
15
+ github_app_private_key_path: str = "./keys/app.pem"
16
+ github_webhook_secret: str = ""
17
+
18
+ # Database
19
+ database_url: str = ""
20
+
21
+ # Redis Cache
22
+ upstash_redis_url: str = ""
23
+
24
+ # Embedding
25
+ embedding_model: str = "all-MiniLM-L6-v2"
26
+
27
+ # App Config
28
+ environment: str = "development"
29
+ log_level: str = "INFO"
30
+ confidence_threshold: float = 0.6
31
+ max_repo_files_index: int = 500
32
+
33
+ # Security
34
+ dashboard_api_key: str = "" # Set in production to protect dashboard API
35
+ cors_allowed_origins: str = "" # Comma-separated origins, e.g. "https://myapp.vercel.app"
36
+
37
+ model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
38
+
39
+
40
+ settings = Settings()
app/context/__init__.py ADDED
File without changes
app/context/embedder.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Code Embedding Pipeline
3
+ ========================
4
+
5
+ Converts source code into vector embeddings using sentence-transformers.
6
+ These embeddings are stored in ChromaDB for semantic search.
7
+
8
+ How it works:
9
+ 1. Source code is split into chunks (functions, classes, or fixed-size blocks)
10
+ 2. Each chunk is embedded into a 384-dimensional vector
11
+ 3. Vectors capture semantic meaning — similar code has similar vectors
12
+ 4. When reviewing a PR, we query ChromaDB with the diff to find related code
13
+
14
+ Why embeddings for code?
15
+ Consider this diff:
16
+ + user_id = request.args.get("id")
17
+ + data = db.query(f"SELECT * FROM users WHERE id = {user_id}")
18
+
19
+ To evaluate this, the agent needs to know:
20
+ - Does `db.query()` parameterize inputs? → Need the DB wrapper's source code
21
+ - Is there middleware that validates `user_id`? → Need the middleware source
22
+ - Are there other similar patterns in the codebase? → Need semantic search
23
+
24
+ Embeddings let us find this related code WITHOUT knowing the exact file paths.
25
+ The query "SQL query with user input" returns relevant code chunks ranked by
26
+ semantic similarity — not keyword matching, but meaning matching.
27
+
28
+ Model: all-MiniLM-L6-v2
29
+ - 384 dimensions, 22M parameters
30
+ - Runs locally on CPU in ~10ms per chunk (GPU: ~1ms)
31
+ - Optimized for semantic similarity tasks
32
+ - Good enough for code — not perfect, but fast and free
33
+ """
34
+
35
+ from __future__ import annotations
36
+
37
+ import structlog
38
+
39
+ from app.config import settings
40
+
41
+ logger = structlog.get_logger()
42
+
43
+ # Lazy-loaded model to avoid slow import at startup
44
+ _model = None
45
+
46
+
47
+ def get_embedding_model():
48
+ """
49
+ Lazy-load the sentence-transformers model.
50
+
51
+ We load on first use (not at import time) because:
52
+ 1. The model takes ~2 seconds to load
53
+ 2. Not every request needs embeddings (cached reviews skip this)
54
+ 3. Tests shouldn't load a real ML model
55
+ """
56
+ global _model
57
+ if _model is None:
58
+ try:
59
+ from sentence_transformers import SentenceTransformer
60
+ _model = SentenceTransformer(settings.embedding_model)
61
+ logger.info("Loaded embedding model", model=settings.embedding_model)
62
+ except ImportError:
63
+ logger.warning("sentence-transformers not installed — RAG context disabled")
64
+ return None
65
+ return _model
66
+
67
+
68
+ def embed_texts(texts: list[str]) -> list[list[float]]:
69
+ """
70
+ Embed a list of text strings into vectors.
71
+
72
+ Args:
73
+ texts: List of code chunks or queries to embed
74
+
75
+ Returns:
76
+ List of embedding vectors (each is a list of floats)
77
+ """
78
+ model = get_embedding_model()
79
+ if model is None:
80
+ return []
81
+
82
+ embeddings = model.encode(texts, show_progress_bar=False)
83
+ return embeddings.tolist()
84
+
85
+
86
+ def chunk_code(content: str, filepath: str, chunk_size: int = 60) -> list[dict]:
87
+ """
88
+ Split source code into overlapping chunks for embedding.
89
+
90
+ Strategy: We chunk by lines with overlap. Each chunk is ~60 lines
91
+ with 10 lines of overlap to preserve context across boundaries.
92
+
93
+ Why 60 lines? It's roughly one function/class — the natural unit of
94
+ code that a developer would reason about. Too small (10 lines) loses
95
+ context. Too large (200 lines) dilutes the embedding signal.
96
+
97
+ Args:
98
+ content: Full file source code
99
+ filepath: The file path (included as metadata)
100
+ chunk_size: Lines per chunk (default: 60)
101
+
102
+ Returns:
103
+ List of dicts with 'text', 'filepath', 'start_line', 'end_line'
104
+ """
105
+ lines = content.split("\n")
106
+ chunks = []
107
+ overlap = 10
108
+ start = 0
109
+
110
+ while start < len(lines):
111
+ end = min(start + chunk_size, len(lines))
112
+ chunk_text = "\n".join(lines[start:end])
113
+
114
+ # Skip very small chunks (less than 5 non-empty lines)
115
+ non_empty = sum(1 for line in lines[start:end] if line.strip())
116
+ if non_empty >= 5:
117
+ chunks.append({
118
+ "text": f"# File: {filepath}\n{chunk_text}",
119
+ "filepath": filepath,
120
+ "start_line": start + 1,
121
+ "end_line": end,
122
+ })
123
+
124
+ start += max(chunk_size - overlap, 1) # Overlap for context continuity
125
+
126
+ return chunks
app/context/indexer.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ChromaDB Repo Indexer
3
+ ======================
4
+
5
+ Indexes repository source code into ChromaDB for semantic search.
6
+ Each repo gets its own ChromaDB collection, keyed by the repo's full name.
7
+
8
+ How indexing works:
9
+ 1. Receive file contents from GitHub API
10
+ 2. Chunk each file into ~60-line blocks
11
+ 3. Embed each chunk using sentence-transformers
12
+ 4. Upsert into ChromaDB collection for this repo
13
+
14
+ ChromaDB is an open-source vector database that:
15
+ - Runs embedded in the Python process (no separate server needed)
16
+ - Stores vectors + metadata + documents together
17
+ - Supports fast approximate nearest neighbor (ANN) search
18
+ - Can persist to disk or run entirely in-memory
19
+
20
+ We use in-memory mode on Render (ephemeral storage) — the index is rebuilt
21
+ on each PR review. This is acceptable because indexing the changed files
22
+ takes <1 second for typical PRs.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import chromadb
28
+ import structlog
29
+
30
+ from app.config import settings
31
+ from app.context.embedder import chunk_code, embed_texts
32
+
33
+ logger = structlog.get_logger()
34
+
35
+ # Singleton ChromaDB client (in-memory)
36
+ _chroma_client: chromadb.ClientAPI | None = None
37
+
38
+
39
+ def _get_chroma_client() -> chromadb.ClientAPI:
40
+ """Get or create the ChromaDB client."""
41
+ global _chroma_client
42
+ if _chroma_client is None:
43
+ _chroma_client = chromadb.Client() # In-memory, no persistence
44
+ return _chroma_client
45
+
46
+
47
+ def _collection_name(repo_full_name: str) -> str:
48
+ """Generate a valid ChromaDB collection name from a repo name."""
49
+ # ChromaDB requires alphanumeric + underscores, 3-63 chars
50
+ name = repo_full_name.replace("/", "_").replace("-", "_")
51
+ return f"repo_{name}"[:63]
52
+
53
+
54
+ async def index_repo_files(
55
+ repo_full_name: str, file_contents: dict[str, str]
56
+ ) -> str:
57
+ """
58
+ Index repository files into ChromaDB for RAG retrieval.
59
+
60
+ This is called during each PR review to ensure the vector store
61
+ has the latest file contents. We upsert (insert or update) so
62
+ re-indexing the same file just overwrites the old vectors.
63
+
64
+ Args:
65
+ repo_full_name: "owner/repo" — used as collection name
66
+ file_contents: dict of {filepath: source_code}
67
+
68
+ Returns:
69
+ Collection name (for retrieval)
70
+ """
71
+ client = _get_chroma_client()
72
+ collection_name = _collection_name(repo_full_name)
73
+
74
+ # Get or create a collection for this repo
75
+ collection = client.get_or_create_collection(
76
+ name=collection_name,
77
+ metadata={"repo": repo_full_name},
78
+ )
79
+
80
+ # Chunk all files
81
+ all_chunks = []
82
+ for filepath, content in file_contents.items():
83
+ # Skip very large files (binary, generated code, etc.)
84
+ if len(content) > 100_000:
85
+ continue
86
+ chunks = chunk_code(content, filepath)
87
+ all_chunks.extend(chunks)
88
+
89
+ if not all_chunks:
90
+ logger.info("No chunks to index", repo=repo_full_name)
91
+ return collection_name
92
+
93
+ # Limit total chunks (Render memory constraint)
94
+ max_chunks = settings.max_repo_files_index
95
+ if len(all_chunks) > max_chunks:
96
+ all_chunks = all_chunks[:max_chunks]
97
+
98
+ # Embed all chunks
99
+ texts = [chunk["text"] for chunk in all_chunks]
100
+ embeddings = embed_texts(texts)
101
+
102
+ if not embeddings:
103
+ logger.warning("Embedding failed — RAG context unavailable")
104
+ return collection_name
105
+
106
+ # Upsert into ChromaDB
107
+ ids = [f"{chunk['filepath']}:{chunk['start_line']}" for chunk in all_chunks]
108
+ metadatas = [
109
+ {"filepath": chunk["filepath"], "start_line": chunk["start_line"], "end_line": chunk["end_line"]}
110
+ for chunk in all_chunks
111
+ ]
112
+
113
+ collection.upsert(
114
+ ids=ids,
115
+ embeddings=embeddings,
116
+ documents=texts,
117
+ metadatas=metadatas,
118
+ )
119
+
120
+ logger.info(
121
+ "Indexed repo files",
122
+ repo=repo_full_name,
123
+ chunks=len(all_chunks),
124
+ collection=collection_name,
125
+ )
126
+
127
+ return collection_name
app/context/retriever.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ RAG Context Retriever
3
+ ======================
4
+
5
+ Retrieves relevant code context from ChromaDB based on the PR diff.
6
+ This is the "R" in RAG (Retrieval-Augmented Generation).
7
+
8
+ How retrieval works:
9
+ 1. Take the PR diff text as a query
10
+ 2. Embed the query using the same model used for indexing
11
+ 3. Search ChromaDB for the most similar code chunks
12
+ 4. Return the top-k chunks as additional context for the LLM
13
+
14
+ Why RAG for code review?
15
+ The PR diff only shows CHANGED lines. But understanding a change often
16
+ requires seeing RELATED code:
17
+ - If a function is called from 5 places, changing it affects all callers
18
+ - If a variable is validated in another file, the validation matters here
19
+ - If the same pattern exists elsewhere, inconsistency is a style issue
20
+
21
+ RAG gives the agents "peripheral vision" — they see not just the change,
22
+ but the surrounding codebase context that makes the change meaningful.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import structlog
28
+
29
+ from app.context.embedder import embed_texts
30
+ from app.context.indexer import _get_chroma_client
31
+
32
+ logger = structlog.get_logger()
33
+
34
+
35
+ async def retrieve_context(
36
+ collection_name: str,
37
+ query_text: str,
38
+ top_k: int = 5,
39
+ ) -> str:
40
+ """
41
+ Retrieve relevant code context from ChromaDB.
42
+
43
+ Args:
44
+ collection_name: The ChromaDB collection to search
45
+ query_text: The PR diff or a specific query
46
+ top_k: Number of results to return (default: 5)
47
+
48
+ Returns:
49
+ A formatted string of relevant code chunks to include in the LLM prompt.
50
+ Returns empty string if retrieval fails or no results found.
51
+ """
52
+ try:
53
+ client = _get_chroma_client()
54
+
55
+ # Check if collection exists
56
+ try:
57
+ collection = client.get_collection(name=collection_name)
58
+ except Exception:
59
+ logger.debug("Collection not found — no RAG context", collection=collection_name)
60
+ return ""
61
+
62
+ # Skip if collection is empty
63
+ if collection.count() == 0:
64
+ return ""
65
+
66
+ # Embed the query
67
+ query_embeddings = embed_texts([query_text[:5000]]) # Cap query size
68
+ if not query_embeddings:
69
+ return ""
70
+
71
+ # Search for similar code chunks
72
+ results = collection.query(
73
+ query_embeddings=query_embeddings,
74
+ n_results=min(top_k, collection.count()),
75
+ include=["documents", "metadatas", "distances"],
76
+ )
77
+
78
+ if not results or not results["documents"] or not results["documents"][0]:
79
+ return ""
80
+
81
+ # Format results as context for the LLM
82
+ context_parts = ["## Related Code Context (from repository)\n"]
83
+
84
+ for doc, metadata, distance in zip(
85
+ results["documents"][0],
86
+ results["metadatas"][0],
87
+ results["distances"][0],
88
+ ):
89
+ filepath = metadata.get("filepath", "unknown")
90
+ start = metadata.get("start_line", "?")
91
+ end = metadata.get("end_line", "?")
92
+ # ChromaDB returns L2 distance — lower = more similar
93
+ similarity = max(0, 1 - distance / 2) # Rough conversion to 0-1
94
+
95
+ if similarity < 0.3:
96
+ continue # Skip low-relevance results
97
+
98
+ context_parts.append(
99
+ f"### {filepath} (lines {start}-{end}, relevance: {similarity:.0%})\n"
100
+ f"```\n{doc}\n```\n"
101
+ )
102
+
103
+ if len(context_parts) == 1: # Only the header, no results
104
+ return ""
105
+
106
+ context = "\n".join(context_parts)
107
+ logger.info(
108
+ "Retrieved RAG context",
109
+ collection=collection_name,
110
+ chunks_returned=len(context_parts) - 1,
111
+ )
112
+ return context
113
+
114
+ except Exception as e:
115
+ logger.warning("RAG retrieval failed", error=str(e))
116
+ return ""
app/db/__init__.py ADDED
File without changes
app/db/postgres.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Neon Postgres Database Client
3
+ ===============================
4
+
5
+ Stores PR review history for the dashboard: health scores, finding counts,
6
+ executive summaries, and full findings JSON.
7
+
8
+ Uses psycopg2 for synchronous queries (sufficient for dashboard reads)
9
+ and asyncpg for async writes from the webhook pipeline.
10
+
11
+ Schema is auto-created on first connection via ensure_tables().
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ from datetime import datetime, timezone
18
+ from uuid import uuid4
19
+
20
+ import structlog
21
+
22
+ from app.config import settings
23
+ from app.models.findings import SynthesizedReview
24
+
25
+ logger = structlog.get_logger()
26
+
27
+ # ── Connection pool (reuse connections instead of connect-per-query) ──────
28
+ _pool = None
29
+
30
+
31
+ async def _get_pool():
32
+ global _pool
33
+ if _pool is None:
34
+ import asyncpg
35
+ _pool = await asyncpg.create_pool(
36
+ settings.database_url,
37
+ min_size=1,
38
+ max_size=5,
39
+ command_timeout=10,
40
+ )
41
+ return _pool
42
+
43
+
44
+ CREATE_TABLE_SQL = """
45
+ CREATE TABLE IF NOT EXISTS pr_reviews (
46
+ id TEXT PRIMARY KEY,
47
+ repo_full_name TEXT NOT NULL,
48
+ pr_number INT NOT NULL,
49
+ commit_sha TEXT NOT NULL,
50
+ health_score INT NOT NULL,
51
+ critical_count INT DEFAULT 0,
52
+ high_count INT DEFAULT 0,
53
+ medium_count INT DEFAULT 0,
54
+ low_count INT DEFAULT 0,
55
+ summary TEXT,
56
+ findings JSONB NOT NULL DEFAULT '[]',
57
+ duration_ms INT DEFAULT 0,
58
+ created_at TIMESTAMPTZ DEFAULT NOW()
59
+ );
60
+
61
+ CREATE INDEX IF NOT EXISTS idx_pr_reviews_repo ON pr_reviews(repo_full_name);
62
+ CREATE INDEX IF NOT EXISTS idx_pr_reviews_sha ON pr_reviews(commit_sha);
63
+ """
64
+
65
+
66
+ async def ensure_tables():
67
+ """Create the pr_reviews table if it doesn't exist."""
68
+ if not settings.database_url:
69
+ logger.warning("DATABASE_URL not set — skipping table creation")
70
+ return
71
+
72
+ try:
73
+ pool = await _get_pool()
74
+ async with pool.acquire() as conn:
75
+ await conn.execute(CREATE_TABLE_SQL)
76
+ logger.info("Database tables ensured")
77
+ except Exception as e:
78
+ logger.warning("Database setup failed", error=str(e))
79
+
80
+
81
+ async def save_review(
82
+ repo_full_name: str,
83
+ pr_number: int,
84
+ commit_sha: str,
85
+ review: SynthesizedReview,
86
+ ) -> None:
87
+ """Save a PR review to the database."""
88
+ if not settings.database_url:
89
+ return
90
+
91
+ try:
92
+ pool = await _get_pool()
93
+ async with pool.acquire() as conn:
94
+ await conn.execute(
95
+ """
96
+ INSERT INTO pr_reviews (id, repo_full_name, pr_number, commit_sha,
97
+ health_score, critical_count, high_count, medium_count, low_count,
98
+ summary, findings, duration_ms)
99
+ VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)
100
+ """,
101
+ str(uuid4()),
102
+ repo_full_name,
103
+ pr_number,
104
+ commit_sha,
105
+ review.health_score,
106
+ review.critical_count,
107
+ review.high_count,
108
+ review.medium_count,
109
+ review.low_count,
110
+ review.executive_summary,
111
+ json.dumps([f.model_dump() for f in review.findings]),
112
+ review.duration_ms,
113
+ )
114
+ logger.info("Saved review to database", repo=repo_full_name, pr=pr_number)
115
+ except Exception as e:
116
+ logger.warning("Database save failed", error=str(e))
117
+
118
+
119
+ async def get_repo_reviews(repo_full_name: str, limit: int = 20) -> list[dict]:
120
+ limit = min(limit, 100) # Cap to prevent excessive queries
121
+ """Get recent reviews for a repo."""
122
+ if not settings.database_url:
123
+ return []
124
+
125
+ try:
126
+ pool = await _get_pool()
127
+ async with pool.acquire() as conn:
128
+ rows = await conn.fetch(
129
+ """
130
+ SELECT id, pr_number, commit_sha, health_score,
131
+ critical_count, high_count, medium_count, low_count,
132
+ summary, duration_ms, created_at
133
+ FROM pr_reviews
134
+ WHERE repo_full_name = $1
135
+ ORDER BY created_at DESC
136
+ LIMIT $2
137
+ """,
138
+ repo_full_name,
139
+ limit,
140
+ )
141
+ return [dict(row) for row in rows]
142
+ except Exception as e:
143
+ logger.warning("Database query failed", error=str(e))
144
+ return []
app/db/redis_cache.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Redis Cache for PR Review Deduplication
3
+ ========================================
4
+
5
+ When a developer pushes multiple commits quickly (or force-pushes), GitHub sends
6
+ a webhook for each push. Without caching, we'd re-analyze the same PR multiple times,
7
+ wasting Groq API quota and spamming the PR with duplicate comments.
8
+
9
+ Solution: Before analyzing a PR, we check Redis: "Have we already reviewed this
10
+ exact commit SHA?" If yes, we skip the analysis entirely.
11
+
12
+ Why Redis (Upstash) instead of in-memory cache?
13
+ - Our Render free tier restarts the server frequently (cold starts)
14
+ - In-memory cache would be lost on every restart
15
+ - Redis persists across restarts and is shared if we scale to multiple workers
16
+ - Upstash's serverless Redis gives us 10K requests/day free — more than enough
17
+
18
+ Cache key structure: "ninjacg:reviewed:{commit_sha}"
19
+ Cache value: "1" (just a flag — we don't store the review result here, that's in Postgres)
20
+ TTL: 7 days (after which re-analysis is allowed)
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import redis.asyncio as redis
26
+ import structlog
27
+
28
+ from app.config import settings
29
+
30
+ logger = structlog.get_logger()
31
+
32
+ # Connection pool — reused across requests for efficiency.
33
+ # Redis connections are expensive to create (TCP handshake + TLS negotiation).
34
+ # A pool keeps connections open and reuses them.
35
+ _redis_client: redis.Redis | None = None
36
+
37
+ # Cache TTL in seconds (7 days)
38
+ CACHE_TTL = 7 * 24 * 60 * 60
39
+
40
+
41
+ def _get_redis_client() -> redis.Redis:
42
+ """
43
+ Get or create the Redis client singleton.
44
+
45
+ Uses lazy initialization — the client is created on first use, not at import time.
46
+ This prevents connection errors during module import (e.g., in tests).
47
+ """
48
+ global _redis_client
49
+ if _redis_client is None:
50
+ _redis_client = redis.from_url(
51
+ settings.upstash_redis_url,
52
+ decode_responses=True,
53
+ )
54
+ return _redis_client
55
+
56
+
57
+ def _cache_key(commit_sha: str) -> str:
58
+ """Build the Redis key for a commit SHA."""
59
+ return f"ninjacg:reviewed:{commit_sha}"
60
+
61
+
62
+ async def is_already_reviewed(commit_sha: str) -> bool:
63
+ """
64
+ Check if a commit has already been reviewed.
65
+
66
+ This is called at the start of every webhook handler to short-circuit
67
+ duplicate analysis. Returns True if we should skip.
68
+
69
+ Args:
70
+ commit_sha: The HEAD commit SHA of the PR
71
+
72
+ Returns:
73
+ True if this commit has already been reviewed, False otherwise
74
+ """
75
+ try:
76
+ client = _get_redis_client()
77
+ result = await client.exists(_cache_key(commit_sha))
78
+ if result:
79
+ logger.info("Cache hit — skipping re-analysis", commit_sha=commit_sha[:8])
80
+ return bool(result)
81
+ except Exception as e:
82
+ # If Redis is down, we proceed with analysis (fail open).
83
+ # Better to review a PR twice than to miss a review entirely.
84
+ logger.warning("Redis check failed, proceeding with analysis", error=str(e))
85
+ return False
86
+
87
+
88
+ async def mark_as_reviewed(commit_sha: str) -> None:
89
+ """
90
+ Mark a commit as reviewed in the cache.
91
+
92
+ Called after successfully posting a review to GitHub.
93
+ The TTL ensures stale entries are automatically cleaned up.
94
+
95
+ Args:
96
+ commit_sha: The HEAD commit SHA that was reviewed
97
+ """
98
+ try:
99
+ client = _get_redis_client()
100
+ await client.set(_cache_key(commit_sha), "1", ex=CACHE_TTL)
101
+ logger.info("Cached review result", commit_sha=commit_sha[:8], ttl_days=7)
102
+ except Exception as e:
103
+ # Non-fatal — if we can't cache, we'll just re-analyze next time
104
+ logger.warning("Redis set failed", error=str(e))
105
+
106
+
107
+ async def invalidate_cache(commit_sha: str) -> None:
108
+ """
109
+ Remove a commit from the cache, forcing re-analysis.
110
+
111
+ Used by the /reanalyze endpoint when a user manually requests re-review.
112
+
113
+ Args:
114
+ commit_sha: The commit SHA to invalidate
115
+ """
116
+ try:
117
+ client = _get_redis_client()
118
+ await client.delete(_cache_key(commit_sha))
119
+ logger.info("Cache invalidated", commit_sha=commit_sha[:8])
120
+ except Exception as e:
121
+ logger.warning("Redis delete failed", error=str(e))
app/github/__init__.py ADDED
File without changes
app/github/auth.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ GitHub App Authentication
3
+ ==========================
4
+
5
+ GitHub Apps authenticate via a two-step process:
6
+
7
+ 1. **JWT Generation**: We create a JSON Web Token (JWT) signed with our private key
8
+ (.pem file). This JWT proves we are the registered GitHub App. It's valid for
9
+ max 10 minutes — intentionally short-lived for security.
10
+
11
+ 2. **Installation Access Token**: We exchange the JWT for an installation access token
12
+ via GitHub's API. This token is scoped to a specific installation (a specific set
13
+ of repos where the app is installed) and lasts 1 hour.
14
+
15
+ Why two steps? A GitHub App can be installed on hundreds of orgs/repos. The JWT says
16
+ "I am CodeProbe app" — the installation token says "I have permission to access
17
+ @ninjacode911's repos specifically." This separation of identity vs. authorization
18
+ is a production-grade security pattern (similar to OAuth2 client credentials).
19
+
20
+ We cache the installation token in memory and refresh it when it expires, so we
21
+ don't make unnecessary API calls.
22
+
23
+ Reference: https://docs.github.com/en/apps/creating-github-apps/authenticating-with-a-github-app
24
+ """
25
+
26
+ import asyncio
27
+ import time
28
+ from pathlib import Path
29
+
30
+ import httpx
31
+ import jwt # PyJWT library — used to create JSON Web Tokens
32
+
33
+ from app.config import settings
34
+
35
+ # In-memory cache for installation tokens
36
+ _token_cache: dict[int, dict] = {}
37
+
38
+ # Asyncio lock to prevent race conditions on token cache
39
+ _token_lock = asyncio.Lock()
40
+
41
+ # Cached private key (read from disk once, reused)
42
+ _private_key: str | None = None
43
+
44
+ # GitHub API base URL
45
+ GITHUB_API = "https://api.github.com"
46
+
47
+
48
+ def _generate_jwt() -> str:
49
+ """
50
+ Generate a JWT (JSON Web Token) signed with our GitHub App's private key.
51
+
52
+ A JWT has three parts (separated by dots):
53
+ 1. Header: algorithm (RS256) and token type
54
+ 2. Payload: who we are (iss = app ID), when issued, when it expires
55
+ 3. Signature: the header+payload signed with our RSA private key
56
+
57
+ GitHub verifies the signature using our app's public key (which GitHub stores
58
+ when we register the app). This is asymmetric cryptography — we sign with the
59
+ private key, GitHub verifies with the public key.
60
+
61
+ RS256 = RSA + SHA-256 — the industry standard for JWT signing.
62
+ """
63
+ now = int(time.time())
64
+
65
+ # Cache the private key in memory after first read (avoid repeated disk I/O)
66
+ global _private_key
67
+ if _private_key is None:
68
+ project_root = Path(__file__).resolve().parent.parent.parent
69
+ private_key_path = project_root / settings.github_app_private_key_path
70
+ _private_key = private_key_path.read_text()
71
+
72
+ payload = {
73
+ # iat = "issued at" — when this token was created
74
+ "iat": now - 60, # 60 seconds in the past to account for clock drift
75
+ # exp = "expires at" — GitHub rejects JWTs older than 10 minutes
76
+ "exp": now + (9 * 60), # 9 minutes (safely under the 10-min limit)
77
+ # iss = "issuer" — our GitHub App ID, proving which app we are
78
+ "iss": settings.github_app_id,
79
+ }
80
+
81
+ # Sign the JWT with our private RSA key using RS256 algorithm
82
+ return jwt.encode(payload, _private_key, algorithm="RS256")
83
+
84
+
85
+ async def get_installation_token(installation_id: int) -> str:
86
+ """
87
+ Get an installation access token for a specific GitHub App installation.
88
+
89
+ This token is what we actually use to call GitHub APIs (fetch PRs, post comments).
90
+ It's scoped to the specific repos where the app is installed.
91
+
92
+ We cache tokens in memory and reuse them until they expire (1 hour lifetime).
93
+ This avoids making a new token request for every API call.
94
+
95
+ Args:
96
+ installation_id: The GitHub installation ID (sent in webhook payloads).
97
+ Each org/user that installs our app gets a unique ID.
98
+
99
+ Returns:
100
+ A valid installation access token string.
101
+ """
102
+ # Check cache first (outside lock for fast path)
103
+ cached = _token_cache.get(installation_id)
104
+ if cached and cached["expires_at"] > time.time() + 60:
105
+ return cached["token"]
106
+
107
+ # Lock prevents race condition: two coroutines seeing cache miss simultaneously
108
+ async with _token_lock:
109
+ # Double-check inside lock (another coroutine may have filled the cache)
110
+ cached = _token_cache.get(installation_id)
111
+ if cached and cached["expires_at"] > time.time() + 60:
112
+ return cached["token"]
113
+
114
+ app_jwt = _generate_jwt()
115
+
116
+ # Exchange the JWT for an installation-scoped access token
117
+ async with httpx.AsyncClient(timeout=30.0) as client:
118
+ response = await client.post(
119
+ f"{GITHUB_API}/app/installations/{installation_id}/access_tokens",
120
+ headers={
121
+ "Authorization": f"Bearer {app_jwt}",
122
+ "Accept": "application/vnd.github+json",
123
+ "X-GitHub-Api-Version": "2022-11-28",
124
+ },
125
+ )
126
+ response.raise_for_status()
127
+ data = response.json()
128
+
129
+ # Cache the token
130
+ _token_cache[installation_id] = {
131
+ "token": data["token"],
132
+ "expires_at": time.time() + 3500,
133
+ }
134
+
135
+ return data["token"]
app/github/client.py ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ GitHub API Client
3
+ ==================
4
+
5
+ This module handles all communication with GitHub's REST API. It provides
6
+ methods to:
7
+
8
+ 1. Fetch PR diff (the raw unified diff showing what changed)
9
+ 2. Fetch file contents (full source code for context/RAG)
10
+ 3. Fetch changed file list (which files were modified)
11
+ 4. Post a PR review with inline comments (anchored to specific lines)
12
+ 5. Post a summary comment on the PR conversation
13
+
14
+ GitHub API Authentication:
15
+ - We authenticate using installation access tokens (from auth.py)
16
+ - Every request includes the token in the Authorization header
17
+ - The token is scoped to the specific repos where our app is installed
18
+
19
+ GitHub API Versioning:
20
+ - We pin to version "2022-11-28" via X-GitHub-Api-Version header
21
+ - This ensures our code doesn't break when GitHub ships API changes
22
+ - This is a best practice for any API integration in production
23
+
24
+ Rate Limits:
25
+ - GitHub Apps get 5,000 requests/hour per installation
26
+ - That's plenty for our use case (~10-20 API calls per PR review)
27
+
28
+ Reference: https://docs.github.com/en/rest
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import base64
34
+ from dataclasses import dataclass
35
+
36
+ import httpx
37
+ import structlog
38
+
39
+ from app.github.auth import get_installation_token
40
+
41
+ logger = structlog.get_logger()
42
+
43
+ GITHUB_API = "https://api.github.com"
44
+
45
+
46
+ @dataclass
47
+ class PRData:
48
+ """
49
+ All the data we fetch about a PR, bundled together.
50
+
51
+ This is passed to the agent orchestrator so agents have full context.
52
+ A dataclass (vs a dict) gives us type safety and autocomplete in the IDE.
53
+ """
54
+
55
+ repo_full_name: str # e.g. "ninjacode911/myapp"
56
+ pr_number: int
57
+ commit_sha: str # HEAD commit of the PR
58
+ title: str
59
+ diff: str # Raw unified diff (the actual code changes)
60
+ changed_files: list[dict] # List of {filename, status, additions, deletions, patch}
61
+ file_contents: dict[str, str] # {filepath: full_file_content} for changed files
62
+
63
+
64
+ class GitHubClient:
65
+ """
66
+ Async GitHub API client for a specific installation.
67
+
68
+ Usage:
69
+ client = GitHubClient(installation_id=12345)
70
+ pr_data = await client.fetch_pr_data("ninjacode911/myapp", 42)
71
+ await client.post_review_comment(...)
72
+
73
+ Why a class instead of standalone functions?
74
+ - The installation_id and token are shared across all API calls for one webhook event
75
+ - A class groups these related operations together with shared state
76
+ - Makes it easy to test by mocking one object
77
+ """
78
+
79
+ def __init__(self, installation_id: int):
80
+ self.installation_id = installation_id
81
+
82
+ async def _get_headers(self) -> dict[str, str]:
83
+ """
84
+ Build the authorization headers for GitHub API requests.
85
+
86
+ Delegates to auth.py which handles token caching and refresh.
87
+ No client-level cache — auth.py's cache is the single source of truth.
88
+ """
89
+ token = await get_installation_token(self.installation_id)
90
+
91
+ return {
92
+ "Authorization": f"token {token}",
93
+ "Accept": "application/vnd.github+json",
94
+ "X-GitHub-Api-Version": "2022-11-28",
95
+ }
96
+
97
+ async def fetch_pr_data(self, repo_full_name: str, pr_number: int) -> PRData:
98
+ """
99
+ Fetch all data needed to review a PR in one method.
100
+
101
+ This makes 3 API calls:
102
+ 1. GET /repos/{owner}/{repo}/pulls/{pr_number} — PR metadata + diff
103
+ 2. GET /repos/{owner}/{repo}/pulls/{pr_number}/files — list of changed files
104
+ 3. GET /repos/{owner}/{repo}/contents/{path} — full content per changed file
105
+
106
+ We fetch full file contents (not just the diff) because our agents need
107
+ surrounding context. The diff alone doesn't show imports, class definitions,
108
+ or the rest of the function — all critical for understanding security and
109
+ performance implications.
110
+
111
+ Args:
112
+ repo_full_name: "owner/repo" format (e.g. "ninjacode911/myapp")
113
+ pr_number: The PR number
114
+
115
+ Returns:
116
+ PRData with diff, changed files, and full file contents
117
+ """
118
+ headers = await self._get_headers()
119
+
120
+ async with httpx.AsyncClient(timeout=30.0) as http:
121
+ # --- 1. Fetch PR metadata ---
122
+ pr_response = await http.get(
123
+ f"{GITHUB_API}/repos/{repo_full_name}/pulls/{pr_number}",
124
+ headers=headers,
125
+ )
126
+ pr_response.raise_for_status()
127
+ pr_json = pr_response.json()
128
+
129
+ commit_sha = pr_json["head"]["sha"]
130
+ title = pr_json["title"]
131
+
132
+ # --- 2. Fetch the raw diff ---
133
+ # By setting Accept to "application/vnd.github.diff", GitHub returns
134
+ # the raw unified diff instead of JSON. This is the same format you
135
+ # see with `git diff` — it's what our agents will analyze.
136
+ diff_response = await http.get(
137
+ f"{GITHUB_API}/repos/{repo_full_name}/pulls/{pr_number}",
138
+ headers={**headers, "Accept": "application/vnd.github.diff"},
139
+ )
140
+ diff_response.raise_for_status()
141
+ diff = diff_response.text
142
+
143
+ # --- 3. Fetch list of changed files ---
144
+ # This gives us structured data: filename, status (added/modified/removed),
145
+ # number of additions/deletions, and the patch (per-file diff).
146
+ # We paginate because large PRs can have 100+ files.
147
+ changed_files = []
148
+ page = 1
149
+ while page <= 30: # Cap at 3000 files to prevent runaway loops
150
+ files_response = await http.get(
151
+ f"{GITHUB_API}/repos/{repo_full_name}/pulls/{pr_number}/files",
152
+ headers=headers,
153
+ params={"per_page": 100, "page": page},
154
+ )
155
+ files_response.raise_for_status()
156
+ batch = files_response.json()
157
+ if not batch:
158
+ break
159
+ changed_files.extend(batch)
160
+ if len(batch) < 100:
161
+ break
162
+ page += 1
163
+
164
+ # --- 4. Fetch full file contents for each changed file ---
165
+ # We need the complete source code (not just the diff) for RAG context.
166
+ # The agents can then understand imports, class hierarchy, etc.
167
+ file_contents = {}
168
+ for file_info in changed_files:
169
+ filename = file_info["filename"]
170
+ status = file_info["status"]
171
+
172
+ # Skip deleted files and binary files — no content to review
173
+ if status == "removed":
174
+ continue
175
+
176
+ try:
177
+ content = await self._fetch_file_content(
178
+ http, headers, repo_full_name, filename, commit_sha
179
+ )
180
+ if content is not None:
181
+ file_contents[filename] = content
182
+ except Exception as e:
183
+ # Non-fatal: if we can't fetch one file, continue with the rest
184
+ logger.warning(
185
+ "Failed to fetch file content",
186
+ filename=filename,
187
+ error=str(e),
188
+ )
189
+
190
+ logger.info(
191
+ "Fetched PR data",
192
+ repo=repo_full_name,
193
+ pr=pr_number,
194
+ changed_files=len(changed_files),
195
+ files_with_content=len(file_contents),
196
+ )
197
+
198
+ return PRData(
199
+ repo_full_name=repo_full_name,
200
+ pr_number=pr_number,
201
+ commit_sha=commit_sha,
202
+ title=title,
203
+ diff=diff,
204
+ changed_files=changed_files,
205
+ file_contents=file_contents,
206
+ )
207
+
208
+ async def _fetch_file_content(
209
+ self,
210
+ http: httpx.AsyncClient,
211
+ headers: dict,
212
+ repo_full_name: str,
213
+ filepath: str,
214
+ ref: str,
215
+ ) -> str | None:
216
+ """
217
+ Fetch the full content of a single file at a specific commit.
218
+
219
+ GitHub's Contents API returns file content as base64-encoded string.
220
+ We decode it to get the actual source code text.
221
+
222
+ Why base64? Because GitHub's API is JSON-based, and JSON can't safely
223
+ contain arbitrary binary content. Base64 encodes binary as ASCII text.
224
+ This is the same encoding used in email attachments (MIME).
225
+
226
+ Args:
227
+ http: The httpx client (reused for connection pooling)
228
+ headers: Auth headers
229
+ repo_full_name: "owner/repo"
230
+ filepath: Path to the file in the repo
231
+ ref: Git ref (commit SHA) to fetch the file at
232
+
233
+ Returns:
234
+ The file content as a string, or None if the file is binary/too large
235
+ """
236
+ response = await http.get(
237
+ f"{GITHUB_API}/repos/{repo_full_name}/contents/{filepath}",
238
+ headers=headers,
239
+ params={"ref": ref},
240
+ )
241
+
242
+ if response.status_code == 404:
243
+ return None
244
+
245
+ response.raise_for_status()
246
+ data = response.json()
247
+
248
+ # GitHub returns "file" type for regular files.
249
+ # Skip directories, symlinks, or submodules.
250
+ if data.get("type") != "file":
251
+ return None
252
+
253
+ # Files > 1MB use a different API (Blobs). Skip for now — these are
254
+ # usually auto-generated or binary files, not worth reviewing.
255
+ if data.get("size", 0) > 1_000_000:
256
+ logger.info("Skipping large file", filepath=filepath, size=data["size"])
257
+ return None
258
+
259
+ # Decode the base64-encoded content
260
+ content_b64 = data.get("content", "")
261
+ try:
262
+ return base64.b64decode(content_b64).decode("utf-8")
263
+ except (UnicodeDecodeError, Exception):
264
+ # Binary file — can't decode as UTF-8
265
+ return None
266
+
267
+ async def post_review(
268
+ self,
269
+ repo_full_name: str,
270
+ pr_number: int,
271
+ commit_sha: str,
272
+ body: str,
273
+ comments: list[dict],
274
+ ) -> dict:
275
+ """
276
+ Post a pull request review with inline comments.
277
+
278
+ This is the core output mechanism of CodeProbe. A "review" in GitHub terms
279
+ is a batch of inline comments submitted together, optionally with a top-level
280
+ body and an event type (APPROVE, REQUEST_CHANGES, COMMENT).
281
+
282
+ Each inline comment is anchored to a specific file and line, so it appears
283
+ right next to the relevant code — just like a human reviewer would comment.
284
+
285
+ GitHub's review API is atomic: either all comments post successfully, or
286
+ none do. This prevents partial reviews that would confuse developers.
287
+
288
+ Args:
289
+ repo_full_name: "owner/repo"
290
+ pr_number: PR number
291
+ commit_sha: The exact commit SHA these comments reference
292
+ body: The top-level review summary (shown above inline comments)
293
+ comments: List of dicts with keys:
294
+ - path: file path (e.g. "src/auth/login.py")
295
+ - line: line number in the diff (the new file's line number)
296
+ - body: the comment text (Markdown supported)
297
+
298
+ Returns:
299
+ The GitHub API response as a dict
300
+ """
301
+ headers = await self._get_headers()
302
+
303
+ # We use "COMMENT" event — this posts the review without approving or
304
+ # requesting changes. Our bot shouldn't block PRs at the GitHub level;
305
+ # instead, we indicate blocking via the Health Score in the summary.
306
+ review_payload = {
307
+ "commit_id": commit_sha,
308
+ "body": body,
309
+ "event": "COMMENT",
310
+ "comments": comments,
311
+ }
312
+
313
+ async with httpx.AsyncClient(timeout=30.0) as http:
314
+ response = await http.post(
315
+ f"{GITHUB_API}/repos/{repo_full_name}/pulls/{pr_number}/reviews",
316
+ headers=headers,
317
+ json=review_payload,
318
+ )
319
+ response.raise_for_status()
320
+
321
+ logger.info(
322
+ "Posted PR review",
323
+ repo=repo_full_name,
324
+ pr=pr_number,
325
+ inline_comments=len(comments),
326
+ )
327
+
328
+ return response.json()
329
+
330
+ async def post_comment(
331
+ self, repo_full_name: str, pr_number: int, body: str
332
+ ) -> dict:
333
+ """
334
+ Post a standalone comment on the PR conversation (not inline).
335
+
336
+ Used for the summary comment (Health Score, finding counts, executive summary)
337
+ when we don't have inline comments, or as a fallback.
338
+
339
+ This uses the Issues API (PRs are issues in GitHub's data model) rather
340
+ than the Pull Request Review API.
341
+
342
+ Args:
343
+ repo_full_name: "owner/repo"
344
+ pr_number: PR number
345
+ body: Comment text (Markdown)
346
+
347
+ Returns:
348
+ The GitHub API response as a dict
349
+ """
350
+ headers = await self._get_headers()
351
+
352
+ async with httpx.AsyncClient(timeout=30.0) as http:
353
+ response = await http.post(
354
+ f"{GITHUB_API}/repos/{repo_full_name}/issues/{pr_number}/comments",
355
+ headers=headers,
356
+ json={"body": body},
357
+ )
358
+ response.raise_for_status()
359
+
360
+ logger.info("Posted PR comment", repo=repo_full_name, pr=pr_number)
361
+
362
+ return response.json()
app/github/comment_formatter.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ GitHub Comment Formatter
3
+ =========================
4
+
5
+ Converts our internal Finding and SynthesizedReview data structures into
6
+ GitHub-flavored Markdown for posting as PR comments.
7
+
8
+ Two types of output:
9
+ 1. **Inline comments** — one per finding, anchored to a specific file+line.
10
+ These appear right next to the code, like a human reviewer's comments.
11
+ 2. **Summary comment** — a top-level PR comment with the Health Score,
12
+ finding counts by severity, and an executive summary.
13
+
14
+ Design decisions:
15
+ - We use emoji prefixes for severity to make scanning fast (most devs skim reviews)
16
+ - Each inline comment includes the agent name and category for traceability
17
+ - CWE IDs are linked for security findings (so devs can learn about the vulnerability)
18
+ - Suggested fixes use fenced code blocks for easy copy-paste
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ from app.models.findings import Finding, SynthesizedReview
24
+
25
+ # Emoji and color mapping for severity levels
26
+ SEVERITY_EMOJI = {
27
+ "critical": "\U0001f6a8", # 🚨
28
+ "high": "\U0001f7e0", # 🟠
29
+ "medium": "\U0001f7e1", # 🟡
30
+ "low": "\u2139\ufe0f", # ℹ️
31
+ }
32
+
33
+ AGENT_EMOJI = {
34
+ "security": "\U0001f512", # 🔒
35
+ "performance": "\u26a1", # ⚡
36
+ "style": "\u270f\ufe0f", # ✏️
37
+ }
38
+
39
+
40
+ def format_inline_comment(finding: Finding) -> str:
41
+ """
42
+ Format a single Finding as a GitHub inline comment body.
43
+
44
+ This Markdown will appear anchored to the specific file+line in the PR diff.
45
+
46
+ Example output:
47
+ 🚨 **[CRITICAL — Security] SQL Injection Risk**
48
+
49
+ The query on line 47 constructs SQL via string interpolation.
50
+ User input is directly embedded without sanitization.
51
+
52
+ **Suggested fix:**
53
+ ```python
54
+ cursor.execute('SELECT * FROM users WHERE id = %s', (user_id,))
55
+ ```
56
+
57
+ > 🔒 Security · CWE-89 · Confidence: 0.92
58
+ """
59
+ severity_emoji = SEVERITY_EMOJI.get(finding.severity, "")
60
+ agent_emoji = AGENT_EMOJI.get(finding.agent, "")
61
+ severity_upper = finding.severity.upper()
62
+ agent_title = finding.agent.capitalize()
63
+
64
+ # Build the comment body
65
+ lines = [
66
+ f"{severity_emoji} **[{severity_upper} — {agent_title}] {finding.title}**",
67
+ "",
68
+ finding.description,
69
+ ]
70
+
71
+ # Add suggested fix if present
72
+ if finding.suggested_fix:
73
+ lines.extend([
74
+ "",
75
+ "**Suggested fix:**",
76
+ "```",
77
+ finding.suggested_fix,
78
+ "```",
79
+ ])
80
+
81
+ # Add metadata footer
82
+ footer_parts = [f"{agent_emoji} {agent_title}"]
83
+ if finding.cwe_id:
84
+ footer_parts.append(f"[{finding.cwe_id}](https://cwe.mitre.org/data/definitions/{finding.cwe_id.split('-')[1]}.html)")
85
+ footer_parts.append(f"Confidence: {finding.confidence:.2f}")
86
+
87
+ lines.extend(["", f"> {' · '.join(footer_parts)}"])
88
+
89
+ return "\n".join(lines)
90
+
91
+
92
+ def format_summary_comment(review: SynthesizedReview) -> str:
93
+ """
94
+ Format the top-level PR summary comment with Health Score and finding overview.
95
+
96
+ This is posted as a regular PR comment (not inline). It gives the PR author
97
+ a quick overview without needing to look at every inline comment.
98
+
99
+ The Health Score gauge uses block characters to create a visual progress bar
100
+ in pure Unicode (works in GitHub Markdown without images).
101
+ """
102
+ score = review.health_score
103
+
104
+ # Determine overall status
105
+ if score >= 80:
106
+ status_emoji = "\u2705" # ✅
107
+ status_text = "Healthy"
108
+ elif score >= 60:
109
+ status_emoji = "\u26a0\ufe0f" # ⚠️
110
+ status_text = "Needs Attention"
111
+ else:
112
+ status_emoji = "\u274c" # ❌
113
+ status_text = "Action Required"
114
+
115
+ # Build the visual health bar (20 segments)
116
+ filled = round(score / 5)
117
+ bar = "\u2588" * filled + "\u2591" * (20 - filled)
118
+
119
+ # Count total findings
120
+ total = (
121
+ review.critical_count
122
+ + review.high_count
123
+ + review.medium_count
124
+ + review.low_count
125
+ )
126
+
127
+ lines = [
128
+ f"## {status_emoji} Ninja Code Guard Review — Health Score: {score}/100",
129
+ "",
130
+ f"`{bar}` **{score}**/100 — {status_text}",
131
+ "",
132
+ "### Findings Summary",
133
+ "",
134
+ f"| Severity | Count |",
135
+ f"|----------|-------|",
136
+ f"| \U0001f6a8 Critical | {review.critical_count} |",
137
+ f"| \U0001f7e0 High | {review.high_count} |",
138
+ f"| \U0001f7e1 Medium | {review.medium_count} |",
139
+ f"| \u2139\ufe0f Low | {review.low_count} |",
140
+ f"| **Total** | **{total}** |",
141
+ "",
142
+ ]
143
+
144
+ # Add recommendation
145
+ rec_map = {
146
+ "approve": "\u2705 **Recommendation: Approve** — No critical issues found.",
147
+ "request_changes": "\u26a0\ufe0f **Recommendation: Request Changes** — Issues found that should be addressed.",
148
+ "block": "\u274c **Recommendation: Block Merge** — Critical issues must be resolved before merging.",
149
+ }
150
+ lines.append(rec_map.get(review.recommendation, ""))
151
+ lines.append("")
152
+
153
+ # Add executive summary
154
+ lines.extend([
155
+ "### Executive Summary",
156
+ "",
157
+ review.executive_summary,
158
+ "",
159
+ ])
160
+
161
+ # Add detailed findings (so all info is visible even if inline comments fail)
162
+ if review.findings:
163
+ lines.append("### Detailed Findings")
164
+ lines.append("")
165
+ for i, finding in enumerate(review.findings, 1):
166
+ severity_emoji = SEVERITY_EMOJI.get(finding.severity, "")
167
+ agent_emoji = AGENT_EMOJI.get(finding.agent, "")
168
+ lines.append(
169
+ f"<details>\n"
170
+ f"<summary>{severity_emoji} <b>[{finding.severity.upper()}]</b> "
171
+ f"{finding.title} — <code>{finding.file_path}:{finding.line_start}</code></summary>\n\n"
172
+ f"{finding.description}\n"
173
+ )
174
+ if finding.suggested_fix:
175
+ lines.append(f"**Suggested fix:**\n```\n{finding.suggested_fix}\n```\n")
176
+ footer_parts = [f"{agent_emoji} {finding.agent.capitalize()}"]
177
+ if finding.cwe_id:
178
+ cwe_num = finding.cwe_id.split("-")[-1] if "-" in finding.cwe_id else ""
179
+ footer_parts.append(f"[{finding.cwe_id}](https://cwe.mitre.org/data/definitions/{cwe_num}.html)")
180
+ footer_parts.append(f"Confidence: {finding.confidence:.2f}")
181
+ lines.append(f"> {' · '.join(footer_parts)}\n")
182
+ lines.append("</details>\n")
183
+
184
+ lines.extend([
185
+ "---",
186
+ "*Reviewed by [Ninja Code Guard](https://github.com/ninjacode911/ninja-code-guard) — Multi-agent code review*",
187
+ ])
188
+
189
+ return "\n".join(lines)
190
+
191
+
192
+ def findings_to_review_comments(findings: list[Finding]) -> list[dict]:
193
+ """
194
+ Convert a list of Findings into GitHub review comment dicts.
195
+
196
+ Each dict has the structure that GitHub's Create Review API expects:
197
+ - path: the file path relative to repo root
198
+ - line: the line number in the NEW version of the file
199
+ - body: the formatted Markdown comment
200
+
201
+ Note: GitHub requires `line` to be within the diff hunk. If a finding
202
+ references a line outside the diff, we skip it (GitHub API would reject it).
203
+ We use `line` (not `position`) because position-based comments are deprecated.
204
+ """
205
+ comments = []
206
+ for finding in findings:
207
+ comment = {
208
+ "path": finding.file_path,
209
+ "line": finding.line_start,
210
+ "side": "RIGHT", # RIGHT = new version of the file (what the PR introduces)
211
+ "body": format_inline_comment(finding),
212
+ }
213
+ comments.append(comment)
214
+
215
+ return comments
app/github/webhook.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ GitHub Webhook Signature Validation
3
+ ====================================
4
+
5
+ When GitHub sends a webhook event to our server, it includes a cryptographic
6
+ signature in the `X-Hub-Signature-256` header. This signature proves the request
7
+ genuinely came from GitHub, not from an attacker.
8
+
9
+ The signature is computed as: HMAC-SHA256(webhook_secret, request_body)
10
+
11
+ We recompute the same HMAC on our side and compare. If they match, the request
12
+ is authentic. We use `hmac.compare_digest()` for constant-time comparison to
13
+ prevent timing attacks — where an attacker measures response time differences
14
+ to guess the signature byte by byte.
15
+
16
+ Reference: https://docs.github.com/en/webhooks/using-webhooks/validating-webhook-deliveries
17
+ """
18
+
19
+ import hashlib
20
+ import hmac
21
+
22
+ from fastapi import Header, HTTPException, Request
23
+
24
+ from app.config import settings
25
+
26
+
27
+ async def validate_webhook_signature(
28
+ request: Request,
29
+ x_hub_signature_256: str = Header(..., alias="X-Hub-Signature-256"),
30
+ ) -> bytes:
31
+ """
32
+ FastAPI dependency that validates the GitHub webhook HMAC-SHA256 signature.
33
+
34
+ How this works as a FastAPI dependency:
35
+ - FastAPI's dependency injection system calls this function before your endpoint runs
36
+ - It automatically extracts the X-Hub-Signature-256 header from the request
37
+ - If validation fails, it raises HTTPException and the endpoint never executes
38
+ - If it passes, it returns the raw request body for further processing
39
+
40
+ Args:
41
+ request: The incoming FastAPI request object (injected automatically)
42
+ x_hub_signature_256: The signature header from GitHub (extracted by FastAPI)
43
+
44
+ Returns:
45
+ The raw request body bytes (so the endpoint can parse it as JSON)
46
+
47
+ Raises:
48
+ HTTPException 401: If the signature is missing or invalid
49
+ """
50
+ # Read the raw request body — we need the exact bytes GitHub used to compute the HMAC.
51
+ # Important: we read raw bytes, NOT parsed JSON, because even a single whitespace
52
+ # difference would produce a completely different HMAC hash.
53
+ body = await request.body()
54
+
55
+ # Reject if webhook secret is not configured — empty secret = no security
56
+ if not settings.github_webhook_secret:
57
+ raise HTTPException(status_code=500, detail="Webhook secret not configured")
58
+
59
+ if not x_hub_signature_256:
60
+ raise HTTPException(status_code=401, detail="Missing webhook signature header")
61
+
62
+ # GitHub sends the signature as "sha256=<hex_digest>"
63
+ # We need to strip the "sha256=" prefix to get just the hex digest
64
+ if not x_hub_signature_256.startswith("sha256="):
65
+ raise HTTPException(status_code=401, detail="Invalid signature format")
66
+
67
+ received_signature = x_hub_signature_256[7:] # Strip "sha256=" prefix
68
+
69
+ # Compute the expected HMAC using our stored webhook secret
70
+ # hmac.new() takes: key (bytes), message (bytes), hash algorithm
71
+ expected_signature = hmac.new(
72
+ key=settings.github_webhook_secret.encode("utf-8"),
73
+ msg=body,
74
+ digestmod=hashlib.sha256,
75
+ ).hexdigest()
76
+
77
+ # Constant-time comparison — this is critical for security.
78
+ # A naive `==` comparison short-circuits on the first different byte,
79
+ # which leaks timing information. compare_digest() always takes the
80
+ # same amount of time regardless of where the mismatch is.
81
+ if not hmac.compare_digest(expected_signature, received_signature):
82
+ raise HTTPException(status_code=401, detail="Invalid webhook signature")
83
+
84
+ return body
app/main.py ADDED
@@ -0,0 +1,355 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Ninja Code Guard — FastAPI Application Entry Point
3
+ =============================================
4
+
5
+ This is the main entry point for the Ninja Code Guard backend. It sets up:
6
+
7
+ 1. The FastAPI application with CORS middleware
8
+ 2. The /health endpoint (used by Render health checks and the pre-warm cron)
9
+ 3. The /webhook/github endpoint (receives PR events from GitHub)
10
+
11
+ Request lifecycle for a PR review:
12
+ GitHub webhook → HMAC validation → Redis cache check → fetch PR data
13
+ → (Week 3+: run agents) → post review comments → cache result
14
+
15
+ The webhook handler uses FastAPI's "Background Tasks" feature to process
16
+ the review asynchronously. This means we return 200 to GitHub immediately
17
+ (within their 10-second timeout) and do the heavy lifting in the background.
18
+ Without this, GitHub would retry the webhook if we took too long.
19
+ """
20
+
21
+ import asyncio
22
+ import json
23
+ import traceback
24
+
25
+ from fastapi import (
26
+ BackgroundTasks, Depends, FastAPI, Header, HTTPException,
27
+ Request, Response, Security,
28
+ )
29
+ from fastapi.middleware.cors import CORSMiddleware
30
+ from fastapi.security import APIKeyHeader
31
+ import structlog
32
+
33
+ from app.config import settings
34
+
35
+ # ── API Key auth for dashboard endpoints ──────────────────────────────────
36
+ _api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
37
+
38
+
39
+ async def verify_api_key(api_key: str = Security(_api_key_header)):
40
+ """Reject dashboard API requests that don't carry a valid API key."""
41
+ if not settings.dashboard_api_key:
42
+ return # No key configured → allow (dev mode)
43
+ if api_key != settings.dashboard_api_key:
44
+ raise HTTPException(status_code=403, detail="Invalid or missing API key")
45
+
46
+
47
+ from app.agents.performance_agent import PerformanceAgent
48
+ from app.agents.security_agent import SecurityAgent
49
+ from app.agents.style_agent import StyleAgent
50
+ from app.agents.synthesizer import synthesize
51
+ from app.context.indexer import index_repo_files
52
+ from app.context.retriever import retrieve_context
53
+ from app.db.postgres import save_review
54
+ from app.db.redis_cache import is_already_reviewed, mark_as_reviewed
55
+ from app.github.client import GitHubClient
56
+ from app.github.comment_formatter import (
57
+ findings_to_review_comments,
58
+ format_inline_comment,
59
+ format_summary_comment,
60
+ )
61
+ from app.github.webhook import validate_webhook_signature
62
+
63
+ logger = structlog.get_logger()
64
+
65
+ _is_production = settings.environment == "production"
66
+
67
+ app = FastAPI(
68
+ title="Ninja Code Guard",
69
+ description="Multi-agent PR review system",
70
+ version="0.1.0",
71
+ # Disable auto-generated docs in production (exposes API schema)
72
+ docs_url=None if _is_production else "/docs",
73
+ redoc_url=None if _is_production else "/redoc",
74
+ openapi_url=None if _is_production else "/openapi.json",
75
+ )
76
+
77
+ # CORS middleware allows the Next.js dashboard (on Vercel) to call our API.
78
+ # In production, restrict origins to your actual Vercel domain.
79
+ _allowed_origins = (
80
+ [o.strip() for o in settings.cors_allowed_origins.split(",") if o.strip()]
81
+ if settings.cors_allowed_origins
82
+ else ["http://localhost:3000"]
83
+ )
84
+
85
+ app.add_middleware(
86
+ CORSMiddleware,
87
+ allow_origins=_allowed_origins,
88
+ allow_credentials=True,
89
+ allow_methods=["GET", "POST"],
90
+ allow_headers=["Content-Type", "X-API-Key", "X-GitHub-Event", "X-Hub-Signature-256"],
91
+ )
92
+
93
+
94
+ @app.get("/health")
95
+ async def health_check():
96
+ """
97
+ Health check endpoint.
98
+
99
+ Used by:
100
+ - Render.com to verify the service is running (healthCheckPath in render.yaml)
101
+ - The GitHub Actions pre-warm cron to keep the service from going cold
102
+ - Our Next.js dashboard to show service status
103
+ """
104
+ return {"status": "ok", "service": "Ninja Code Guard"}
105
+
106
+
107
+ # --- Dashboard API Endpoints ---
108
+
109
+
110
+ @app.get("/api/repos/{owner}/{repo}/reviews")
111
+ async def get_reviews(owner: str, repo: str, _=Depends(verify_api_key)):
112
+ """Get recent PR reviews for a repo (used by dashboard)."""
113
+ from app.db.postgres import get_repo_reviews
114
+ repo_full_name = f"{owner}/{repo}"
115
+ reviews = await get_repo_reviews(repo_full_name)
116
+ return {"repo": repo_full_name, "reviews": reviews}
117
+
118
+
119
+ @app.get("/api/repos/{owner}/{repo}/stats")
120
+ async def get_stats(owner: str, repo: str, _=Depends(verify_api_key)):
121
+ """Get aggregate stats for a repo (used by dashboard)."""
122
+ from app.db.postgres import get_repo_reviews
123
+ repo_full_name = f"{owner}/{repo}"
124
+ reviews = await get_repo_reviews(repo_full_name, limit=50)
125
+ if not reviews:
126
+ return {"repo": repo_full_name, "total_reviews": 0, "avg_health_score": 0}
127
+ avg_score = sum(r.get("health_score", 0) for r in reviews) / len(reviews)
128
+ return {
129
+ "repo": repo_full_name,
130
+ "total_reviews": len(reviews),
131
+ "avg_health_score": round(avg_score),
132
+ "reviews": reviews[:10],
133
+ }
134
+
135
+
136
+ # --- Webhook Actions (what to do for each event type) ---
137
+
138
+ # We only process these PR actions. Others (labeled, assigned, etc.) are irrelevant.
139
+ RELEVANT_PR_ACTIONS = {"opened", "synchronize", "reopened", "ready_for_review"}
140
+
141
+
142
+ async def _process_pr_review(
143
+ repo_full_name: str,
144
+ pr_number: int,
145
+ commit_sha: str,
146
+ installation_id: int,
147
+ ) -> None:
148
+ """
149
+ Background task: fetch PR data and post a review.
150
+
151
+ Pipeline:
152
+ 1. Fetch PR diff and file contents from GitHub
153
+ 2. Index files into ChromaDB for RAG context
154
+ 3. Run 3 domain agents IN PARALLEL (asyncio.gather)
155
+ 4. Merge all findings and compute health score
156
+ 5. Post review to GitHub
157
+ 6. Cache result in Redis
158
+ """
159
+ try:
160
+ logger.info(
161
+ "Starting PR review",
162
+ repo=repo_full_name,
163
+ pr=pr_number,
164
+ sha=commit_sha[:8],
165
+ )
166
+
167
+ # Step 1: Fetch PR data
168
+ client = GitHubClient(installation_id)
169
+ pr_data = await client.fetch_pr_data(repo_full_name, pr_number)
170
+
171
+ # Step 2: Index files for RAG context
172
+ # This embeds the file contents into ChromaDB so agents can
173
+ # semantically search for related code across the repo
174
+ rag_context = ""
175
+ try:
176
+ collection_name = await index_repo_files(
177
+ repo_full_name, pr_data.file_contents
178
+ )
179
+ rag_context = await retrieve_context(
180
+ collection_name, pr_data.diff[:5000]
181
+ )
182
+ except Exception as rag_err:
183
+ logger.warning("RAG context unavailable", error=str(rag_err))
184
+
185
+ # Step 3: Run all 3 domain agents IN PARALLEL
186
+ # asyncio.gather() runs all three concurrently — total latency is
187
+ # max(agent_latencies) instead of sum(agent_latencies).
188
+ # With Groq at 500+ tokens/sec, each agent takes 2-5 seconds.
189
+ # Parallel: ~5 seconds total. Sequential: ~15 seconds.
190
+ security_agent = SecurityAgent()
191
+ performance_agent = PerformanceAgent()
192
+ style_agent = StyleAgent()
193
+
194
+ security_findings, performance_findings, style_findings = await asyncio.gather(
195
+ security_agent.review(pr_data, rag_context),
196
+ performance_agent.review(pr_data, rag_context),
197
+ style_agent.review(pr_data, rag_context),
198
+ )
199
+
200
+ logger.info(
201
+ "All agents completed",
202
+ security=len(security_findings),
203
+ performance=len(performance_findings),
204
+ style=len(style_findings),
205
+ total=len(security_findings) + len(performance_findings) + len(style_findings),
206
+ repo=repo_full_name,
207
+ pr=pr_number,
208
+ )
209
+
210
+ # Step 4: Synthesize — deduplicate, rank, score, summarize
211
+ review = synthesize(security_findings, performance_findings, style_findings)
212
+
213
+ # Post the review to GitHub
214
+ if review.findings:
215
+ # Post inline comments anchored to specific lines
216
+ review_comments = findings_to_review_comments(review.findings)
217
+ try:
218
+ await client.post_review(
219
+ repo_full_name,
220
+ pr_number,
221
+ commit_sha,
222
+ body=format_summary_comment(review),
223
+ comments=review_comments,
224
+ )
225
+ except Exception as review_err:
226
+ # If inline comments fail (e.g., line not in diff), fall back to summary only
227
+ logger.warning(
228
+ "Inline review failed, posting summary comment instead",
229
+ error=str(review_err),
230
+ )
231
+ await client.post_comment(
232
+ repo_full_name, pr_number, format_summary_comment(review)
233
+ )
234
+ else:
235
+ # No findings — post a clean bill of health
236
+ await client.post_comment(
237
+ repo_full_name,
238
+ pr_number,
239
+ format_summary_comment(review),
240
+ )
241
+
242
+ # Save to Neon Postgres (for dashboard)
243
+ await save_review(repo_full_name, pr_number, commit_sha, review)
244
+
245
+ # Mark this commit as reviewed in Redis cache
246
+ await mark_as_reviewed(commit_sha)
247
+
248
+ logger.info(
249
+ "PR review completed",
250
+ repo=repo_full_name,
251
+ pr=pr_number,
252
+ sha=commit_sha[:8],
253
+ )
254
+
255
+ except Exception as e:
256
+ # Log the full traceback so we can debug failures
257
+ logger.error(
258
+ "PR review failed",
259
+ repo=repo_full_name,
260
+ pr=pr_number,
261
+ error=str(e),
262
+ traceback=traceback.format_exc(),
263
+ )
264
+
265
+
266
+ @app.post("/webhook/github")
267
+ async def webhook_github(
268
+ request: Request,
269
+ background_tasks: BackgroundTasks,
270
+ x_github_event: str = Header(..., alias="X-GitHub-Event"),
271
+ body: bytes = Depends(validate_webhook_signature),
272
+ ):
273
+ """
274
+ Receive and process GitHub webhook events.
275
+
276
+ This endpoint is called by GitHub whenever a PR event occurs on repos
277
+ where Ninja Code Guard is installed.
278
+
279
+ How the flow works:
280
+ 1. FastAPI calls validate_webhook_signature() BEFORE this function runs
281
+ (it's a Depends() dependency). If HMAC validation fails, we never get here.
282
+ 2. We parse the validated payload and check if it's a relevant event.
283
+ 3. If it's a PR event we care about, we check Redis cache.
284
+ 4. If not cached, we enqueue the review as a background task.
285
+ 5. We return 200 immediately — GitHub expects a response within 10 seconds.
286
+
287
+ Why background tasks?
288
+ - GitHub has a 10-second webhook timeout. If we don't respond in time,
289
+ GitHub marks the delivery as failed and may retry (causing duplicates).
290
+ - Our actual review takes 15-20 seconds (agent calls + synthesis).
291
+ - So we acknowledge receipt immediately and process in the background.
292
+
293
+ Args:
294
+ request: The FastAPI request object
295
+ background_tasks: FastAPI's background task queue
296
+ x_github_event: The event type header (e.g., "pull_request")
297
+ body: The validated request body (returned by validate_webhook_signature)
298
+ """
299
+ # Parse the validated JSON payload
300
+ payload = json.loads(body)
301
+
302
+ # We only handle pull_request events for now
303
+ if x_github_event != "pull_request":
304
+ logger.debug("Ignoring non-PR event", github_event=x_github_event)
305
+ return {"status": "ignored", "reason": f"event type: {x_github_event}"}
306
+
307
+ action = payload.get("action", "")
308
+ if action not in RELEVANT_PR_ACTIONS:
309
+ logger.debug("Ignoring irrelevant PR action", action=action)
310
+ return {"status": "ignored", "reason": f"action: {action}"}
311
+
312
+ # Extract key data from the webhook payload
313
+ pr = payload["pull_request"]
314
+ repo_full_name = payload["repository"]["full_name"]
315
+ pr_number = payload["number"]
316
+ commit_sha = pr["head"]["sha"]
317
+
318
+ # Skip draft PRs — they're not ready for review
319
+ if pr.get("draft", False):
320
+ logger.info("Skipping draft PR", repo=repo_full_name, pr=pr_number)
321
+ return {"status": "ignored", "reason": "draft PR"}
322
+
323
+ # Check Redis cache — have we already reviewed this exact commit?
324
+ if await is_already_reviewed(commit_sha):
325
+ return {"status": "skipped", "reason": "already reviewed", "sha": commit_sha[:8]}
326
+
327
+ # Get the installation ID (needed for GitHub App authentication)
328
+ installation_id = payload.get("installation", {}).get("id")
329
+ if not installation_id:
330
+ logger.error("No installation ID in webhook payload")
331
+ return Response(status_code=400, content="Missing installation ID")
332
+
333
+ # Enqueue the review as a background task
334
+ # This returns 200 to GitHub immediately while processing continues
335
+ background_tasks.add_task(
336
+ _process_pr_review,
337
+ repo_full_name=repo_full_name,
338
+ pr_number=pr_number,
339
+ commit_sha=commit_sha,
340
+ installation_id=installation_id,
341
+ )
342
+
343
+ logger.info(
344
+ "Webhook received — review enqueued",
345
+ repo=repo_full_name,
346
+ pr=pr_number,
347
+ sha=commit_sha[:8],
348
+ action=action,
349
+ )
350
+
351
+ return {
352
+ "status": "accepted",
353
+ "pr": pr_number,
354
+ "sha": commit_sha[:8],
355
+ }
app/models/__init__.py ADDED
File without changes
app/models/findings.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Core data models for agent findings and PR reviews."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Literal, Optional
6
+ from uuid import UUID, uuid4
7
+
8
+ from pydantic import BaseModel, Field
9
+
10
+
11
+ class Finding(BaseModel):
12
+ """A single finding produced by a domain agent."""
13
+
14
+ agent: Literal["security", "performance", "style"]
15
+ file_path: str
16
+ line_start: int
17
+ line_end: int
18
+ severity: Literal["critical", "high", "medium", "low"]
19
+ category: str
20
+ title: str
21
+ description: str
22
+ suggested_fix: str = ""
23
+ cwe_id: Optional[str] = None
24
+ confidence: float = Field(ge=0.0, le=1.0)
25
+
26
+
27
+ class SynthesizedReview(BaseModel):
28
+ """Final synthesized review output from the Synthesizer Agent."""
29
+
30
+ health_score: int = Field(ge=0, le=100)
31
+ executive_summary: str
32
+ recommendation: Literal["approve", "request_changes", "block"]
33
+ findings: list[Finding]
34
+ critical_count: int = 0
35
+ high_count: int = 0
36
+ medium_count: int = 0
37
+ low_count: int = 0
38
+ duration_ms: int = 0
39
+
40
+
41
+ class PRReviewRecord(BaseModel):
42
+ """Database record for a completed PR review."""
43
+
44
+ id: UUID = Field(default_factory=uuid4)
45
+ repo_full_name: str
46
+ pr_number: int
47
+ commit_sha: str
48
+ health_score: int = Field(ge=0, le=100)
49
+ critical_count: int = 0
50
+ high_count: int = 0
51
+ medium_count: int = 0
52
+ low_count: int = 0
53
+ summary: str = ""
54
+ findings: list[Finding] = []
55
+ duration_ms: int = 0
app/models/webhook_payloads.py ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """GitHub webhook event payload schemas."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+
7
+ from pydantic import BaseModel
8
+
9
+
10
+ class GitHubUser(BaseModel):
11
+ login: str
12
+ id: int
13
+
14
+
15
+ class GitHubRepo(BaseModel):
16
+ id: int
17
+ full_name: str
18
+ private: bool
19
+ default_branch: str = "main"
20
+
21
+
22
+ class PullRequestHead(BaseModel):
23
+ sha: str
24
+ ref: str
25
+
26
+
27
+ class PullRequest(BaseModel):
28
+ number: int
29
+ title: str
30
+ state: str
31
+ head: PullRequestHead
32
+ draft: bool = False
33
+ changed_files: Optional[int] = None
34
+ additions: Optional[int] = None
35
+ deletions: Optional[int] = None
36
+
37
+
38
+ class PullRequestEvent(BaseModel):
39
+ """GitHub pull_request webhook event."""
40
+
41
+ action: str # opened, synchronize, reopened, ready_for_review
42
+ number: int
43
+ pull_request: PullRequest
44
+ repository: GitHubRepo
45
+ sender: GitHubUser
46
+
47
+
48
+ class Installation(BaseModel):
49
+ id: int
50
+
51
+
52
+ class PullRequestEventWithInstallation(PullRequestEvent):
53
+ """Pull request event with GitHub App installation context."""
54
+
55
+ installation: Optional[Installation] = None
app/services/__init__.py ADDED
File without changes
app/services/health_score.py ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ PR Health Score Calculator
3
+ ===========================
4
+
5
+ Computes a 0-100 health score for a PR based on finding density and severity.
6
+
7
+ Formula:
8
+ base_score = 100
9
+ penalty = sum(SEVERITY_WEIGHTS[f.severity] * CONFIDENCE_FACTOR(f.confidence) for f in findings)
10
+ health_score = max(0, min(100, base_score - penalty))
11
+
12
+ Severity weights are calibrated so that:
13
+ - 1 critical finding drops the score by 25 points (one critical = action required)
14
+ - 1 high finding drops by 15 points
15
+ - 1 medium finding drops by 7 points
16
+ - 1 low finding drops by 2 points
17
+
18
+ Confidence factor scales the penalty — a finding with 0.5 confidence penalizes
19
+ half as much as one with 1.0 confidence. This rewards agents for being honest
20
+ about uncertainty.
21
+
22
+ Score interpretation:
23
+ 90-100: Excellent — safe to merge
24
+ 70-89: Good — minor issues, merge at discretion
25
+ 50-69: Needs attention — address before merging
26
+ 30-49: Poor — significant issues found
27
+ 0-29: Critical — do not merge
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ from app.models.findings import Finding
33
+
34
+ SEVERITY_WEIGHTS = {
35
+ "critical": 25,
36
+ "high": 15,
37
+ "medium": 7,
38
+ "low": 2,
39
+ }
40
+
41
+
42
+ def calculate_health_score(findings: list[Finding]) -> int:
43
+ """
44
+ Calculate the PR Health Score from 0-100.
45
+
46
+ Higher confidence findings penalize more heavily. This incentivizes
47
+ agents to set confidence honestly — flagging everything as 1.0
48
+ confidence would over-penalize, while honest 0.6 confidence
49
+ for uncertain findings results in fairer scores.
50
+ """
51
+ if not findings:
52
+ return 100
53
+
54
+ total_penalty = 0.0
55
+ for finding in findings:
56
+ weight = SEVERITY_WEIGHTS.get(finding.severity, 5)
57
+ confidence_factor = max(0.3, finding.confidence) # Minimum 0.3 floor
58
+ total_penalty += weight * confidence_factor
59
+
60
+ score = 100 - total_penalty
61
+ return max(0, min(100, round(score)))
62
+
63
+
64
+ def determine_recommendation(
65
+ findings: list[Finding], health_score: int
66
+ ) -> str:
67
+ """
68
+ Determine the PR recommendation based on findings and score.
69
+
70
+ Logic:
71
+ - Any critical finding → block (regardless of score)
72
+ - Score < 50 → request_changes
73
+ - Score < 70 with high findings → request_changes
74
+ - Otherwise → approve
75
+ """
76
+ has_critical = any(f.severity == "critical" for f in findings)
77
+ has_high = any(f.severity == "high" for f in findings)
78
+
79
+ if has_critical:
80
+ return "block"
81
+ if health_score < 50:
82
+ return "request_changes"
83
+ if health_score < 70 and has_high:
84
+ return "request_changes"
85
+ return "approve"
app/tools/__init__.py ADDED
File without changes
app/tools/bandit_tool.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Bandit Static Analysis Tool
3
+ =============================
4
+
5
+ Bandit is an open-source Python security linter. It parses Python code into an
6
+ Abstract Syntax Tree (AST) and checks each node against a set of security rules.
7
+
8
+ What Bandit catches:
9
+ - SQL injection patterns (string formatting in SQL calls)
10
+ - Use of eval(), exec(), os.system() (command injection risk)
11
+ - Hardcoded passwords and bind addresses
12
+ - Use of insecure hash functions (MD5, SHA1)
13
+ - Insecure temp file creation
14
+ - SSL/TLS verification disabled (requests.get(verify=False))
15
+ - Use of pickle (deserialization attacks)
16
+
17
+ What Bandit CANNOT catch:
18
+ - Business logic flaws
19
+ - Missing authentication/authorization
20
+ - Cross-file data flow (it analyzes one file at a time)
21
+ - Vulnerabilities in non-Python code
22
+
23
+ That's why we combine Bandit (mechanical pattern matching) with the LLM (semantic
24
+ understanding). Bandit provides high-confidence, low-noise signals that anchor the
25
+ LLM's analysis.
26
+
27
+ How it works:
28
+ 1. We write the changed Python files to a temp directory
29
+ 2. Run `bandit -r <dir> -f json` as a subprocess
30
+ 3. Parse the JSON output into a human-readable summary
31
+ 4. Feed this summary into the LLM's prompt as additional context
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import json
37
+ import subprocess
38
+ import tempfile
39
+ from pathlib import Path
40
+
41
+ import structlog
42
+
43
+ logger = structlog.get_logger()
44
+
45
+
46
+ async def run_bandit(file_contents: dict[str, str]) -> str:
47
+ """
48
+ Run Bandit security analysis on Python files.
49
+
50
+ Args:
51
+ file_contents: dict of {filepath: source_code} for changed files
52
+
53
+ Returns:
54
+ A formatted string summarizing Bandit's findings, suitable for
55
+ including in an LLM prompt. Returns empty string if no Python
56
+ files or no findings.
57
+ """
58
+ # Filter to only Python files — Bandit only understands Python
59
+ python_files = {
60
+ path: content
61
+ for path, content in file_contents.items()
62
+ if path.endswith(".py")
63
+ }
64
+
65
+ if not python_files:
66
+ return ""
67
+
68
+ try:
69
+ # Create a temp directory and write the Python files there.
70
+ # We need files on disk because Bandit operates on the filesystem.
71
+ # tempfile.mkdtemp() creates a secure temp dir that only we can access.
72
+ with tempfile.TemporaryDirectory(prefix="ninjacg_bandit_") as tmpdir:
73
+ tmpdir_path = Path(tmpdir)
74
+
75
+ for filepath, content in python_files.items():
76
+ # Recreate the directory structure (e.g., src/auth/login.py)
77
+ file_path = tmpdir_path / filepath
78
+ file_path.parent.mkdir(parents=True, exist_ok=True)
79
+ file_path.write_text(content, encoding="utf-8")
80
+
81
+ # Run Bandit as a subprocess
82
+ # -r: recursive (scan all files in directory)
83
+ # -f json: output as JSON (machine-parseable)
84
+ # -ll: only report medium severity and above
85
+ # --quiet: suppress progress bar
86
+ result = subprocess.run(
87
+ [
88
+ "bandit",
89
+ "-r", str(tmpdir_path),
90
+ "-f", "json",
91
+ "-ll",
92
+ "--quiet",
93
+ ],
94
+ capture_output=True,
95
+ text=True,
96
+ timeout=30, # Kill if it takes too long
97
+ )
98
+
99
+ # Bandit exit codes:
100
+ # 0 = no issues found
101
+ # 1 = issues found (this is NOT an error)
102
+ # 2+ = actual error
103
+ if result.returncode > 1:
104
+ logger.warning("Bandit returned error", stderr=result.stderr[:500])
105
+ return ""
106
+
107
+ if not result.stdout.strip():
108
+ return ""
109
+
110
+ # Parse the JSON output
111
+ bandit_output = json.loads(result.stdout)
112
+ findings = bandit_output.get("results", [])
113
+
114
+ if not findings:
115
+ return "Bandit static analysis: No security issues detected."
116
+
117
+ # Format findings as a human-readable summary for the LLM
118
+ summary_lines = [
119
+ f"Bandit static analysis found {len(findings)} issue(s):\n"
120
+ ]
121
+
122
+ for i, finding in enumerate(findings, 1):
123
+ # Map the temp file path back to the original file path
124
+ temp_path = finding.get("filename", "")
125
+ original_path = _map_temp_to_original(temp_path, tmpdir, python_files)
126
+
127
+ severity = finding.get("issue_severity", "UNKNOWN")
128
+ confidence = finding.get("issue_confidence", "UNKNOWN")
129
+ text = finding.get("issue_text", "")
130
+ test_id = finding.get("test_id", "")
131
+ line_no = finding.get("line_number", 0)
132
+ code = finding.get("code", "").strip()
133
+
134
+ summary_lines.append(
135
+ f"{i}. [{severity}/{confidence}] {text}\n"
136
+ f" File: {original_path}, Line: {line_no}\n"
137
+ f" Test: {test_id}\n"
138
+ f" Code: {code}\n"
139
+ )
140
+
141
+ summary = "\n".join(summary_lines)
142
+ logger.info("Bandit analysis complete", findings_count=len(findings))
143
+ return summary
144
+
145
+ except subprocess.TimeoutExpired:
146
+ logger.warning("Bandit timed out after 30 seconds")
147
+ return ""
148
+ except FileNotFoundError:
149
+ # Bandit not installed — this is OK, the LLM can still analyze
150
+ logger.warning("Bandit not found in PATH — skipping static analysis")
151
+ return ""
152
+ except Exception as e:
153
+ logger.warning("Bandit analysis failed", error=str(e))
154
+ return ""
155
+
156
+
157
+ def _map_temp_to_original(
158
+ temp_path: str, tmpdir: str, original_files: dict[str, str]
159
+ ) -> str:
160
+ """Map a temp directory path back to the original file path."""
161
+ try:
162
+ # The temp path looks like: /tmp/ninjacg_bandit_xxx/src/auth/login.py
163
+ # We need to strip the tmpdir prefix to get: src/auth/login.py
164
+ relative = str(Path(temp_path).relative_to(tmpdir))
165
+ # Normalize path separators
166
+ relative = relative.replace("\\", "/")
167
+ # Verify it's one of our original files
168
+ if relative in original_files:
169
+ return relative
170
+ except (ValueError, Exception):
171
+ pass
172
+ # Fallback: return the filename only
173
+ return Path(temp_path).name
app/tools/detect_secrets_tool.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ detect-secrets Tool
3
+ ====================
4
+
5
+ detect-secrets scans code for hardcoded credentials: API keys, passwords,
6
+ database connection strings, AWS access keys, private keys, etc.
7
+
8
+ Why a dedicated tool for secrets?
9
+ - Hardcoded secrets are the #1 most common security finding in code reviews
10
+ - They're easy to detect with regex/entropy analysis but easy to miss manually
11
+ - detect-secrets uses both pattern matching AND Shannon entropy analysis:
12
+ - Pattern matching: finds things that LOOK like API keys (e.g., "sk_live_...")
13
+ - Entropy analysis: finds random-looking strings that might be secrets
14
+ (high entropy = lots of randomness = probably a key, not a variable name)
15
+
16
+ What Shannon entropy means:
17
+ - "hello" has low entropy (~2.8 bits/char) — predictable, probably not a secret
18
+ - "a3f8g2kx9m" has high entropy (~3.9 bits/char) — random, might be a secret
19
+ - detect-secrets flags strings above a configurable entropy threshold
20
+
21
+ We run this on the PR diff specifically (not full files) because we only care
22
+ about NEWLY introduced secrets, not pre-existing ones.
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ import json
28
+ import subprocess
29
+ import tempfile
30
+ from pathlib import Path
31
+
32
+ import structlog
33
+
34
+ logger = structlog.get_logger()
35
+
36
+
37
+ async def run_detect_secrets(file_contents: dict[str, str]) -> str:
38
+ """
39
+ Scan changed files for hardcoded secrets.
40
+
41
+ Args:
42
+ file_contents: dict of {filepath: source_code}
43
+
44
+ Returns:
45
+ A formatted string listing detected secrets, suitable for
46
+ including in an LLM prompt. Empty string if no secrets found.
47
+ """
48
+ if not file_contents:
49
+ return ""
50
+
51
+ try:
52
+ with tempfile.TemporaryDirectory(prefix="ninjacg_secrets_") as tmpdir:
53
+ tmpdir_path = Path(tmpdir)
54
+
55
+ for filepath, content in file_contents.items():
56
+ file_path = tmpdir_path / filepath
57
+ file_path.parent.mkdir(parents=True, exist_ok=True)
58
+ file_path.write_text(content, encoding="utf-8")
59
+
60
+ # Run detect-secrets scan
61
+ # --all-files: scan all file types
62
+ # --force-use-all-plugins: use every detection plugin
63
+ result = subprocess.run(
64
+ [
65
+ "detect-secrets", "scan",
66
+ str(tmpdir_path),
67
+ "--all-files",
68
+ ],
69
+ capture_output=True,
70
+ text=True,
71
+ timeout=30,
72
+ )
73
+
74
+ if result.returncode != 0 and not result.stdout:
75
+ logger.warning("detect-secrets error", stderr=result.stderr[:500])
76
+ return ""
77
+
78
+ if not result.stdout.strip():
79
+ return ""
80
+
81
+ scan_results = json.loads(result.stdout)
82
+ results_map = scan_results.get("results", {})
83
+
84
+ # Count total secrets found
85
+ total_secrets = sum(len(secrets) for secrets in results_map.values())
86
+
87
+ if total_secrets == 0:
88
+ return "detect-secrets scan: No hardcoded secrets detected."
89
+
90
+ # Format findings
91
+ summary_lines = [
92
+ f"detect-secrets found {total_secrets} potential secret(s):\n"
93
+ ]
94
+
95
+ for file_path, secrets in results_map.items():
96
+ # Map temp path back to original
97
+ try:
98
+ relative = str(Path(file_path).relative_to(tmpdir)).replace("\\", "/")
99
+ except ValueError:
100
+ relative = Path(file_path).name
101
+
102
+ for secret in secrets:
103
+ secret_type = secret.get("type", "Unknown")
104
+ line_no = secret.get("line_number", 0)
105
+ summary_lines.append(
106
+ f"- {secret_type} in {relative} at line {line_no}"
107
+ )
108
+
109
+ summary = "\n".join(summary_lines)
110
+ logger.info("detect-secrets scan complete", secrets_found=total_secrets)
111
+ return summary
112
+
113
+ except FileNotFoundError:
114
+ logger.warning("detect-secrets not found in PATH — skipping")
115
+ return ""
116
+ except Exception as e:
117
+ logger.warning("detect-secrets scan failed", error=str(e))
118
+ return ""
app/tools/linter_tool.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Linter Tool (Ruff)
3
+ ===================
4
+
5
+ Ruff is an extremely fast Python linter written in Rust. It replaces
6
+ flake8, isort, pycodestyle, and dozens of other tools in a single binary.
7
+ It runs 10-100x faster than traditional Python linters.
8
+
9
+ What Ruff catches:
10
+ - Unused imports (F401)
11
+ - Undefined names (F821)
12
+ - Unused variables (F841)
13
+ - Import ordering issues (I001)
14
+ - Unnecessary f-strings (F541)
15
+ - Bare except clauses (E722)
16
+ - And 800+ other rules
17
+
18
+ We run Ruff on the changed files and feed the output to the Style Agent
19
+ as additional context. The LLM then combines Ruff's mechanical findings
20
+ with its own understanding of readability and maintainability.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import json
26
+ import subprocess
27
+ import tempfile
28
+ from pathlib import Path
29
+
30
+ import structlog
31
+
32
+ logger = structlog.get_logger()
33
+
34
+
35
+ async def run_ruff(file_contents: dict[str, str]) -> str:
36
+ """
37
+ Run Ruff linter on Python files.
38
+
39
+ Returns a formatted string of linting issues.
40
+ """
41
+ python_files = {
42
+ path: content
43
+ for path, content in file_contents.items()
44
+ if path.endswith(".py")
45
+ }
46
+
47
+ if not python_files:
48
+ return ""
49
+
50
+ try:
51
+ with tempfile.TemporaryDirectory(prefix="ninjacg_ruff_") as tmpdir:
52
+ tmpdir_path = Path(tmpdir)
53
+
54
+ for filepath, content in python_files.items():
55
+ file_path = tmpdir_path / filepath
56
+ file_path.parent.mkdir(parents=True, exist_ok=True)
57
+ file_path.write_text(content, encoding="utf-8")
58
+
59
+ # Run ruff check with JSON output
60
+ # --output-format json: machine-parseable output
61
+ # --select ALL: enable all rules (we want comprehensive feedback)
62
+ # --ignore E501: skip line-length (too noisy, not actionable)
63
+ result = subprocess.run(
64
+ [
65
+ "ruff", "check",
66
+ str(tmpdir_path),
67
+ "--output-format", "json",
68
+ "--select", "F,E,W,I,N,UP,B,A,SIM,RET,ARG",
69
+ "--ignore", "E501,E402",
70
+ ],
71
+ capture_output=True,
72
+ text=True,
73
+ timeout=30,
74
+ )
75
+
76
+ # Ruff exit code 1 means issues found (not an error)
77
+ if not result.stdout.strip() or result.stdout.strip() == "[]":
78
+ return ""
79
+
80
+ issues = json.loads(result.stdout)
81
+
82
+ if not issues:
83
+ return ""
84
+
85
+ # Format findings
86
+ summary_lines = [f"Ruff linter found {len(issues)} issue(s):\n"]
87
+
88
+ for issue in issues[:20]: # Cap at 20 to avoid prompt bloat
89
+ code = issue.get("code", "?")
90
+ message = issue.get("message", "")
91
+ filename = issue.get("filename", "")
92
+ line = issue.get("location", {}).get("row", 0)
93
+
94
+ try:
95
+ relative = str(Path(filename).relative_to(tmpdir)).replace("\\", "/")
96
+ except ValueError:
97
+ relative = Path(filename).name
98
+
99
+ summary_lines.append(f"- [{code}] {relative}:{line} — {message}")
100
+
101
+ if len(issues) > 20:
102
+ summary_lines.append(f" ... and {len(issues) - 20} more issues")
103
+
104
+ summary = "\n".join(summary_lines)
105
+ logger.info("Ruff analysis complete", issues_count=len(issues))
106
+ return summary
107
+
108
+ except FileNotFoundError:
109
+ logger.warning("ruff not found in PATH — skipping lint analysis")
110
+ return ""
111
+ except Exception as e:
112
+ logger.warning("Ruff analysis failed", error=str(e))
113
+ return ""
app/tools/radon_tool.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Radon Complexity Analysis Tool
3
+ ================================
4
+
5
+ Radon measures cyclomatic complexity — the number of independent execution paths
6
+ through a function. Higher complexity = more branches = harder to test and maintain,
7
+ AND often correlates with performance issues (deeply nested conditionals often
8
+ indicate O(n²) or worse algorithms).
9
+
10
+ Complexity grades:
11
+ A (1-5): Simple, low risk
12
+ B (6-10): Moderate complexity
13
+ C (11-15): High complexity — consider refactoring
14
+ D (16-20): Very high — likely performance and maintenance issues
15
+ E (21-25): Extremely complex
16
+ F (26+): Unmaintainable
17
+
18
+ We report functions with complexity grade C or worse (>10) to the Performance Agent.
19
+ The agent uses this as a signal to look deeper at those functions for algorithmic issues.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import json
25
+ import subprocess
26
+ import tempfile
27
+ from pathlib import Path
28
+
29
+ import structlog
30
+
31
+ logger = structlog.get_logger()
32
+
33
+
34
+ async def run_radon(file_contents: dict[str, str]) -> str:
35
+ """
36
+ Run radon cyclomatic complexity analysis on Python files.
37
+
38
+ Returns a formatted string summarizing high-complexity functions.
39
+ """
40
+ python_files = {
41
+ path: content
42
+ for path, content in file_contents.items()
43
+ if path.endswith(".py")
44
+ }
45
+
46
+ if not python_files:
47
+ return ""
48
+
49
+ try:
50
+ with tempfile.TemporaryDirectory(prefix="ninjacg_radon_") as tmpdir:
51
+ tmpdir_path = Path(tmpdir)
52
+
53
+ for filepath, content in python_files.items():
54
+ file_path = tmpdir_path / filepath
55
+ file_path.parent.mkdir(parents=True, exist_ok=True)
56
+ file_path.write_text(content, encoding="utf-8")
57
+
58
+ # Run radon cc (cyclomatic complexity) with JSON output
59
+ # -j: JSON output
60
+ # -n C: only show grade C or worse (complexity > 10)
61
+ result = subprocess.run(
62
+ ["radon", "cc", "-j", "-n", "C", str(tmpdir_path)],
63
+ capture_output=True,
64
+ text=True,
65
+ timeout=30,
66
+ )
67
+
68
+ if not result.stdout.strip() or result.stdout.strip() == "{}":
69
+ return ""
70
+
71
+ radon_output = json.loads(result.stdout)
72
+
73
+ # Collect high-complexity functions
74
+ findings = []
75
+ for file_path, functions in radon_output.items():
76
+ try:
77
+ relative = str(Path(file_path).relative_to(tmpdir)).replace("\\", "/")
78
+ except ValueError:
79
+ relative = Path(file_path).name
80
+
81
+ for func in functions:
82
+ if not isinstance(func, dict):
83
+ continue
84
+ name = func.get("name", "unknown")
85
+ complexity = func.get("complexity", 0)
86
+ rank = func.get("rank", "?")
87
+ lineno = func.get("lineno", 0)
88
+ findings.append(
89
+ f"- {relative}:{lineno} — `{name}()` complexity={complexity} (grade {rank})"
90
+ )
91
+
92
+ if not findings:
93
+ return ""
94
+
95
+ summary = (
96
+ f"Radon complexity analysis found {len(findings)} high-complexity function(s):\n"
97
+ + "\n".join(findings)
98
+ )
99
+ logger.info("Radon analysis complete", high_complexity_count=len(findings))
100
+ return summary
101
+
102
+ except FileNotFoundError:
103
+ logger.warning("radon not found in PATH — skipping complexity analysis")
104
+ return ""
105
+ except Exception as e:
106
+ logger.warning("Radon analysis failed", error=str(e))
107
+ return ""
dashboard/.gitignore ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2
+
3
+ # dependencies
4
+ /node_modules
5
+ /.pnp
6
+ .pnp.*
7
+ .yarn/*
8
+ !.yarn/patches
9
+ !.yarn/plugins
10
+ !.yarn/releases
11
+ !.yarn/versions
12
+
13
+ # testing
14
+ /coverage
15
+
16
+ # next.js
17
+ /.next/
18
+ /out/
19
+
20
+ # production
21
+ /build
22
+
23
+ # misc
24
+ .DS_Store
25
+ *.pem
26
+
27
+ # debug
28
+ npm-debug.log*
29
+ yarn-debug.log*
30
+ yarn-error.log*
31
+ .pnpm-debug.log*
32
+
33
+ # env files (can opt-in for committing if needed)
34
+ .env*
35
+
36
+ # vercel
37
+ .vercel
38
+
39
+ # typescript
40
+ *.tsbuildinfo
41
+ next-env.d.ts
dashboard/AGENTS.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ <!-- BEGIN:nextjs-agent-rules -->
2
+ # This is NOT the Next.js you know
3
+
4
+ This version has breaking changes — APIs, conventions, and file structure may all differ from your training data. Read the relevant guide in `node_modules/next/dist/docs/` before writing any code. Heed deprecation notices.
5
+ <!-- END:nextjs-agent-rules -->
dashboard/CLAUDE.md ADDED
@@ -0,0 +1 @@
 
 
1
+ @AGENTS.md
dashboard/README.md ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ This is a [Next.js](https://nextjs.org) project bootstrapped with [`create-next-app`](https://nextjs.org/docs/app/api-reference/cli/create-next-app).
2
+
3
+ ## Getting Started
4
+
5
+ First, run the development server:
6
+
7
+ ```bash
8
+ npm run dev
9
+ # or
10
+ yarn dev
11
+ # or
12
+ pnpm dev
13
+ # or
14
+ bun dev
15
+ ```
16
+
17
+ Open [http://localhost:3000](http://localhost:3000) with your browser to see the result.
18
+
19
+ You can start editing the page by modifying `app/page.tsx`. The page auto-updates as you edit the file.
20
+
21
+ This project uses [`next/font`](https://nextjs.org/docs/app/building-your-application/optimizing/fonts) to automatically optimize and load [Geist](https://vercel.com/font), a new font family for Vercel.
22
+
23
+ ## Learn More
24
+
25
+ To learn more about Next.js, take a look at the following resources:
26
+
27
+ - [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API.
28
+ - [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial.
29
+
30
+ You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js) - your feedback and contributions are welcome!
31
+
32
+ ## Deploy on Vercel
33
+
34
+ The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js.
35
+
36
+ Check out our [Next.js deployment documentation](https://nextjs.org/docs/app/building-your-application/deploying) for more details.
dashboard/app/favicon.ico ADDED
dashboard/app/globals.css ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ @import "tailwindcss";
2
+
3
+ :root {
4
+ --background: #050507;
5
+ --foreground: #f4f4f5;
6
+ --glass-bg: rgba(255, 255, 255, 0.03);
7
+ --glass-border: rgba(255, 255, 255, 0.06);
8
+ --glass-hover: rgba(255, 255, 255, 0.06);
9
+ }
10
+
11
+ @theme inline {
12
+ --color-background: var(--background);
13
+ --color-foreground: var(--foreground);
14
+ --font-sans: var(--font-geist-sans);
15
+ --font-mono: var(--font-geist-mono);
16
+ }
17
+
18
+ body {
19
+ background: var(--background);
20
+ color: var(--foreground);
21
+ font-family: var(--font-sans, system-ui, -apple-system, sans-serif);
22
+ }
23
+
24
+ /* ─── Dot grid background ─── */
25
+ .dot-grid {
26
+ background-image: radial-gradient(circle, rgba(255, 255, 255, 0.04) 1px, transparent 1px);
27
+ background-size: 32px 32px;
28
+ }
29
+
30
+ /* ─── Animated gradient orbs ─── */
31
+ .gradient-orb {
32
+ position: absolute;
33
+ border-radius: 50%;
34
+ filter: blur(120px);
35
+ opacity: 0.15;
36
+ pointer-events: none;
37
+ animation: orbFloat 20s ease-in-out infinite;
38
+ }
39
+
40
+ .gradient-orb-1 {
41
+ width: 600px;
42
+ height: 600px;
43
+ background: linear-gradient(135deg, #7c3aed, #6d28d9);
44
+ top: -200px;
45
+ right: -100px;
46
+ animation-delay: 0s;
47
+ }
48
+
49
+ .gradient-orb-2 {
50
+ width: 500px;
51
+ height: 500px;
52
+ background: linear-gradient(135deg, #06b6d4, #0891b2);
53
+ bottom: -150px;
54
+ left: -100px;
55
+ animation-delay: -7s;
56
+ }
57
+
58
+ .gradient-orb-3 {
59
+ width: 400px;
60
+ height: 400px;
61
+ background: linear-gradient(135deg, #ec4899, #be185d);
62
+ top: 40%;
63
+ left: 50%;
64
+ animation-delay: -14s;
65
+ }
66
+
67
+ @keyframes orbFloat {
68
+ 0%, 100% { transform: translate(0, 0) scale(1); }
69
+ 25% { transform: translate(30px, -40px) scale(1.05); }
70
+ 50% { transform: translate(-20px, 20px) scale(0.95); }
71
+ 75% { transform: translate(40px, 30px) scale(1.03); }
72
+ }
73
+
74
+ /* ─── Glass card ─── */
75
+ .glass {
76
+ background: var(--glass-bg);
77
+ border: 1px solid var(--glass-border);
78
+ backdrop-filter: blur(20px);
79
+ -webkit-backdrop-filter: blur(20px);
80
+ }
81
+
82
+ .glass-hover:hover {
83
+ background: var(--glass-hover);
84
+ border-color: rgba(255, 255, 255, 0.1);
85
+ }
86
+
87
+ /* ─── Glow effects ─── */
88
+ .glow-violet { box-shadow: 0 0 40px -10px rgba(139, 92, 246, 0.3); }
89
+ .glow-green { box-shadow: 0 0 40px -10px rgba(34, 197, 94, 0.3); }
90
+ .glow-red { box-shadow: 0 0 40px -10px rgba(239, 68, 68, 0.3); }
91
+ .glow-amber { box-shadow: 0 0 40px -10px rgba(245, 158, 11, 0.3); }
92
+
93
+ /* ─── Gradient text ─── */
94
+ .text-gradient {
95
+ background: linear-gradient(135deg, #c4b5fd 0%, #818cf8 50%, #6d28d9 100%);
96
+ -webkit-background-clip: text;
97
+ -webkit-text-fill-color: transparent;
98
+ background-clip: text;
99
+ }
100
+
101
+ .text-gradient-cyan {
102
+ background: linear-gradient(135deg, #67e8f9 0%, #22d3ee 50%, #06b6d4 100%);
103
+ -webkit-background-clip: text;
104
+ -webkit-text-fill-color: transparent;
105
+ background-clip: text;
106
+ }
107
+
108
+ /* ─── Shimmer border animation ─── */
109
+ @keyframes shimmer {
110
+ 0% { background-position: 200% 0; }
111
+ 100% { background-position: -200% 0; }
112
+ }
113
+
114
+ .shimmer-border {
115
+ background: linear-gradient(
116
+ 90deg,
117
+ transparent 0%,
118
+ rgba(139, 92, 246, 0.15) 25%,
119
+ rgba(6, 182, 212, 0.15) 50%,
120
+ rgba(139, 92, 246, 0.15) 75%,
121
+ transparent 100%
122
+ );
123
+ background-size: 200% 100%;
124
+ animation: shimmer 6s ease-in-out infinite;
125
+ }
126
+
127
+ /* ─── Scrollbar ─── */
128
+ ::-webkit-scrollbar {
129
+ width: 6px;
130
+ height: 6px;
131
+ }
132
+ ::-webkit-scrollbar-track {
133
+ background: transparent;
134
+ }
135
+ ::-webkit-scrollbar-thumb {
136
+ background: rgba(113, 113, 122, 0.3);
137
+ border-radius: 3px;
138
+ }
139
+ ::-webkit-scrollbar-thumb:hover {
140
+ background: rgba(113, 113, 122, 0.5);
141
+ }
142
+
143
+ /* ─── Noise texture overlay ─── */
144
+ .noise::before {
145
+ content: "";
146
+ position: fixed;
147
+ inset: 0;
148
+ z-index: 100;
149
+ pointer-events: none;
150
+ opacity: 0.015;
151
+ background-image: url("data:image/svg+xml,%3Csvg viewBox='0 0 256 256' xmlns='http://www.w3.org/2000/svg'%3E%3Cfilter id='n'%3E%3CfeTurbulence type='fractalNoise' baseFrequency='0.9' numOctaves='4' stitchTiles='stitch'/%3E%3C/filter%3E%3Crect width='100%25' height='100%25' filter='url(%23n)'/%3E%3C/svg%3E");
152
+ }
dashboard/app/layout.tsx ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import type { Metadata } from "next";
2
+ import { Geist, Geist_Mono } from "next/font/google";
3
+ import Link from "next/link";
4
+ import "./globals.css";
5
+
6
+ const geistSans = Geist({
7
+ variable: "--font-geist-sans",
8
+ subsets: ["latin"],
9
+ });
10
+
11
+ const geistMono = Geist_Mono({
12
+ variable: "--font-geist-mono",
13
+ subsets: ["latin"],
14
+ });
15
+
16
+ export const metadata: Metadata = {
17
+ title: "Ninja Code Guard",
18
+ description:
19
+ "Multi-agent AI code review dashboard — security, performance & style analysis at a glance.",
20
+ };
21
+
22
+ export default function RootLayout({
23
+ children,
24
+ }: Readonly<{
25
+ children: React.ReactNode;
26
+ }>) {
27
+ return (
28
+ <html
29
+ lang="en"
30
+ className={`${geistSans.variable} ${geistMono.variable} h-full antialiased dark`}
31
+ >
32
+ <body className="noise min-h-full flex flex-col bg-[#050507] text-zinc-100">
33
+ {/* ── Gradient orbs (ambient background) ── */}
34
+ <div className="fixed inset-0 overflow-hidden pointer-events-none z-0">
35
+ <div className="gradient-orb gradient-orb-1" />
36
+ <div className="gradient-orb gradient-orb-2" />
37
+ <div className="gradient-orb gradient-orb-3" />
38
+ </div>
39
+
40
+ {/* ── Navigation ── */}
41
+ <header className="sticky top-0 z-50 border-b border-white/[0.06] bg-[#050507]/70 backdrop-blur-2xl">
42
+ <div className="mx-auto flex h-16 max-w-7xl items-center justify-between px-6 lg:px-8">
43
+ <Link href="/" className="flex items-center gap-3 group">
44
+ <span className="relative flex items-center justify-center w-9 h-9 rounded-xl bg-gradient-to-br from-violet-600 to-violet-800 shadow-lg shadow-violet-900/30 group-hover:shadow-violet-700/40 transition-shadow">
45
+ <svg
46
+ xmlns="http://www.w3.org/2000/svg"
47
+ viewBox="0 0 24 24"
48
+ fill="currentColor"
49
+ className="w-5 h-5 text-white"
50
+ >
51
+ <path
52
+ fillRule="evenodd"
53
+ d="M12.516 2.17a.75.75 0 00-1.032 0 11.209 11.209 0 01-7.877 3.08.75.75 0 00-.722.515A12.74 12.74 0 002.25 9.75c0 5.942 4.064 10.933 9.563 12.348a.749.749 0 00.374 0c5.499-1.415 9.563-6.406 9.563-12.348 0-1.39-.223-2.73-.635-3.985a.75.75 0 00-.722-.516 11.209 11.209 0 01-7.877-3.08z"
54
+ clipRule="evenodd"
55
+ />
56
+ </svg>
57
+ </span>
58
+ <div className="flex flex-col">
59
+ <span className="text-[15px] font-semibold tracking-tight text-white leading-tight">
60
+ Ninja Code Guard
61
+ </span>
62
+ <span className="text-[10px] font-medium text-zinc-500 tracking-widest uppercase">
63
+ AI Review Platform
64
+ </span>
65
+ </div>
66
+ </Link>
67
+
68
+ <nav className="flex items-center gap-1">
69
+ <Link
70
+ href="/"
71
+ className="px-4 py-2 text-sm text-zinc-400 hover:text-white hover:bg-white/[0.04] rounded-lg transition-all duration-200"
72
+ >
73
+ Dashboard
74
+ </Link>
75
+ <a
76
+ href="https://github.com"
77
+ target="_blank"
78
+ rel="noopener noreferrer"
79
+ className="px-4 py-2 text-sm text-zinc-400 hover:text-white hover:bg-white/[0.04] rounded-lg transition-all duration-200"
80
+ >
81
+ GitHub
82
+ </a>
83
+ </nav>
84
+ </div>
85
+ </header>
86
+
87
+ {/* ── Content ── */}
88
+ <main className="relative z-10 flex-1">{children}</main>
89
+
90
+ {/* ── Footer ── */}
91
+ <footer className="relative z-10 border-t border-white/[0.04] py-8">
92
+ <div className="mx-auto max-w-7xl px-6 lg:px-8 flex items-center justify-between">
93
+ <p className="text-xs text-zinc-600">
94
+ &copy; {new Date().getFullYear()} Ninja Code Guard
95
+ </p>
96
+ <p className="text-xs text-zinc-700">
97
+ Multi-Agent AI Code Review Platform
98
+ </p>
99
+ </div>
100
+ </footer>
101
+ </body>
102
+ </html>
103
+ );
104
+ }
dashboard/app/page.tsx ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import Link from "next/link";
4
+ import { motion } from "framer-motion";
5
+ import { MOCK_REPOS } from "@/lib/api";
6
+ import {
7
+ StaggerContainer,
8
+ StaggerItem,
9
+ FadeIn,
10
+ HoverCard,
11
+ } from "@/components/motion";
12
+ import { AnimatedCounter } from "@/components/AnimatedCounter";
13
+
14
+ function scoreColor(score: number): string {
15
+ if (score >= 80) return "text-emerald-400";
16
+ if (score >= 60) return "text-amber-400";
17
+ return "text-red-400";
18
+ }
19
+
20
+ function scoreGlow(score: number): string {
21
+ if (score >= 80) return "group-hover:shadow-emerald-500/10";
22
+ if (score >= 60) return "group-hover:shadow-amber-500/10";
23
+ return "group-hover:shadow-red-500/10";
24
+ }
25
+
26
+ function scoreDot(score: number): string {
27
+ if (score >= 80) return "bg-emerald-400";
28
+ if (score >= 60) return "bg-amber-400";
29
+ return "bg-red-400";
30
+ }
31
+
32
+ const STATS = [
33
+ { label: "Repos Monitored", value: MOCK_REPOS.length, suffix: "" },
34
+ {
35
+ label: "Avg Health Score",
36
+ value: Math.round(
37
+ MOCK_REPOS.reduce((s, r) => s + r.health_score, 0) / MOCK_REPOS.length
38
+ ),
39
+ suffix: "%",
40
+ },
41
+ { label: "PRs Reviewed", value: 47, suffix: "" },
42
+ { label: "Issues Found", value: 132, suffix: "" },
43
+ ];
44
+
45
+ const AGENTS = [
46
+ {
47
+ icon: (
48
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-6 h-6">
49
+ <path fillRule="evenodd" d="M12.516 2.17a.75.75 0 00-1.032 0 11.209 11.209 0 01-7.877 3.08.75.75 0 00-.722.515A12.74 12.74 0 002.25 9.75c0 5.942 4.064 10.933 9.563 12.348a.749.749 0 00.374 0c5.499-1.415 9.563-6.406 9.563-12.348 0-1.39-.223-2.73-.635-3.985a.75.75 0 00-.722-.516 11.209 11.209 0 01-7.877-3.08z" clipRule="evenodd" />
50
+ </svg>
51
+ ),
52
+ title: "Security Agent",
53
+ desc: "Scans for vulnerabilities, injection flaws, auth issues, and CWE-classified risks using Bandit and detect-secrets.",
54
+ color: "text-red-400",
55
+ bg: "from-red-500/10 via-red-500/5 to-transparent",
56
+ iconBg: "bg-red-500/10 text-red-400",
57
+ border: "border-red-500/10 hover:border-red-500/20",
58
+ },
59
+ {
60
+ icon: (
61
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-6 h-6">
62
+ <path fillRule="evenodd" d="M14.615 1.595a.75.75 0 01.359.852L12.982 9.75h7.268a.75.75 0 01.548 1.262l-10.5 11.25a.75.75 0 01-1.272-.71l1.992-7.302H3.75a.75.75 0 01-.548-1.262l10.5-11.25a.75.75 0 01.913-.143z" clipRule="evenodd" />
63
+ </svg>
64
+ ),
65
+ title: "Performance Agent",
66
+ desc: "Detects N+1 queries, memory leaks, blocking operations, and algorithmic inefficiencies with Radon analysis.",
67
+ color: "text-amber-400",
68
+ bg: "from-amber-500/10 via-amber-500/5 to-transparent",
69
+ iconBg: "bg-amber-500/10 text-amber-400",
70
+ border: "border-amber-500/10 hover:border-amber-500/20",
71
+ },
72
+ {
73
+ icon: (
74
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-6 h-6">
75
+ <path d="M11.7 2.805a.75.75 0 01.6 0A60.65 60.65 0 0122.83 8.72a.75.75 0 01-.231 1.337 49.949 49.949 0 00-9.902 3.912l-.003.002-.34.18a.75.75 0 01-.707 0A50.009 50.009 0 007.5 12.174v-.224c0-.131.067-.248.172-.311a54.614 54.614 0 014.653-2.52.75.75 0 00-.65-1.352 56.129 56.129 0 00-4.78 2.589 1.858 1.858 0 00-.859 1.228 49.803 49.803 0 00-4.634-1.527.75.75 0 01-.231-1.337A60.653 60.653 0 0111.7 2.805z" />
76
+ <path d="M13.06 15.473a48.45 48.45 0 017.666-3.282c.134 1.414.22 2.843.255 4.285a.75.75 0 01-.46.71 47.878 47.878 0 00-8.105 4.342.75.75 0 01-.832 0 47.877 47.877 0 00-8.104-4.342.75.75 0 01-.461-.71c.035-1.442.121-2.87.255-4.286A48.4 48.4 0 016 13.18v1.27a1.5 1.5 0 00-.14 2.508c-.09.38-.222.753-.397 1.11.452.213.901.434 1.346.661a6.729 6.729 0 00.551-1.608 1.5 1.5 0 00.14-2.67v-.645a48.549 48.549 0 013.44 1.668 2.25 2.25 0 002.12 0z" />
77
+ <path d="M4.462 19.462c.42-.419.753-.89 1-1.394.453.213.902.434 1.347.661a6.743 6.743 0 01-1.286 1.794.75.75 0 11-1.06-1.06z" />
78
+ </svg>
79
+ ),
80
+ title: "Style Agent",
81
+ desc: "Enforces naming conventions, reduces complexity, and ensures code consistency via Ruff linting.",
82
+ color: "text-cyan-400",
83
+ bg: "from-cyan-500/10 via-cyan-500/5 to-transparent",
84
+ iconBg: "bg-cyan-500/10 text-cyan-400",
85
+ border: "border-cyan-500/10 hover:border-cyan-500/20",
86
+ },
87
+ ];
88
+
89
+ export default function HomePage() {
90
+ return (
91
+ <div className="dot-grid">
92
+ <div className="mx-auto max-w-7xl px-6 lg:px-8 py-16">
93
+ {/* ── Hero ── */}
94
+ <section className="text-center mb-20 pt-8">
95
+ <FadeIn delay={0}>
96
+ <div className="inline-flex items-center gap-2 rounded-full border border-violet-500/20 bg-violet-500/[0.06] px-4 py-1.5 text-sm text-violet-300 mb-8">
97
+ <span className="relative flex h-2 w-2">
98
+ <span className="animate-ping absolute inline-flex h-full w-full rounded-full bg-violet-400 opacity-75" />
99
+ <span className="relative inline-flex rounded-full h-2 w-2 bg-violet-500" />
100
+ </span>
101
+ Multi-Agent AI Review Platform
102
+ </div>
103
+ </FadeIn>
104
+
105
+ <FadeIn delay={0.1}>
106
+ <h1 className="text-5xl sm:text-7xl font-bold tracking-tight mb-6">
107
+ <span className="text-white">Code reviews,</span>
108
+ <br />
109
+ <span className="text-gradient">reimagined.</span>
110
+ </h1>
111
+ </FadeIn>
112
+
113
+ <FadeIn delay={0.2}>
114
+ <p className="text-lg sm:text-xl text-zinc-400 max-w-2xl mx-auto leading-relaxed">
115
+ Three specialised AI agents analyse every pull request for{" "}
116
+ <span className="text-red-400 font-medium">security</span>,{" "}
117
+ <span className="text-amber-400 font-medium">performance</span>,
118
+ and{" "}
119
+ <span className="text-cyan-400 font-medium">style</span>{" "}
120
+ — then synthesise a single, actionable review.
121
+ </p>
122
+ </FadeIn>
123
+ </section>
124
+
125
+ {/* ── Stats ── */}
126
+ <FadeIn delay={0.3}>
127
+ <section className="grid grid-cols-2 sm:grid-cols-4 gap-4 mb-20">
128
+ {STATS.map((s, i) => (
129
+ <div
130
+ key={s.label}
131
+ className="glass rounded-2xl p-5 text-center"
132
+ >
133
+ <p className="text-3xl sm:text-4xl font-bold text-white tabular-nums">
134
+ <AnimatedCounter
135
+ value={s.value}
136
+ suffix={s.suffix}
137
+ duration={1200 + i * 200}
138
+ />
139
+ </p>
140
+ <p className="text-xs text-zinc-500 mt-2 font-medium tracking-wide uppercase">
141
+ {s.label}
142
+ </p>
143
+ </div>
144
+ ))}
145
+ </section>
146
+ </FadeIn>
147
+
148
+ {/* ── Repositories ── */}
149
+ <section className="mb-24">
150
+ <FadeIn delay={0.15}>
151
+ <div className="flex items-center justify-between mb-6">
152
+ <h2 className="text-xl font-semibold text-white">
153
+ Repositories
154
+ </h2>
155
+ <span className="text-xs text-zinc-600 font-mono">
156
+ {MOCK_REPOS.length} monitored
157
+ </span>
158
+ </div>
159
+ </FadeIn>
160
+
161
+ <StaggerContainer className="grid grid-cols-1 sm:grid-cols-2 lg:grid-cols-4 gap-4">
162
+ {MOCK_REPOS.map((repo) => (
163
+ <StaggerItem key={repo.full_name}>
164
+ <HoverCard>
165
+ <Link
166
+ href={`/repos/${repo.owner}/${repo.repo}`}
167
+ className={`group block glass glass-hover rounded-2xl p-6 transition-all duration-300 hover:shadow-xl ${scoreGlow(
168
+ repo.health_score
169
+ )}`}
170
+ >
171
+ <div className="flex items-start justify-between mb-5">
172
+ <div>
173
+ <p className="text-xs text-zinc-600 font-mono mb-1">
174
+ {repo.owner}/
175
+ </p>
176
+ <p className="text-base font-semibold text-zinc-200 group-hover:text-white transition-colors">
177
+ {repo.repo}
178
+ </p>
179
+ </div>
180
+ <div className="text-right">
181
+ <span
182
+ className={`text-3xl font-bold tabular-nums ${scoreColor(
183
+ repo.health_score
184
+ )}`}
185
+ >
186
+ {repo.health_score}
187
+ </span>
188
+ </div>
189
+ </div>
190
+
191
+ {/* Mini bar */}
192
+ <div className="w-full h-1.5 rounded-full bg-white/[0.04] mb-4 overflow-hidden">
193
+ <motion.div
194
+ initial={{ width: 0 }}
195
+ animate={{ width: `${repo.health_score}%` }}
196
+ transition={{
197
+ duration: 1,
198
+ delay: 0.5,
199
+ ease: [0.25, 0.46, 0.45, 0.94],
200
+ }}
201
+ className={`h-full rounded-full ${
202
+ repo.health_score >= 80
203
+ ? "bg-emerald-500"
204
+ : repo.health_score >= 60
205
+ ? "bg-amber-500"
206
+ : "bg-red-500"
207
+ }`}
208
+ />
209
+ </div>
210
+
211
+ <div className="flex items-center justify-between text-xs text-zinc-500">
212
+ <span className="flex items-center gap-1.5">
213
+ <span className={`w-1.5 h-1.5 rounded-full ${scoreDot(repo.health_score)}`} />
214
+ {repo.open_prs} open PRs
215
+ </span>
216
+ <span>{repo.last_review}</span>
217
+ </div>
218
+ </Link>
219
+ </HoverCard>
220
+ </StaggerItem>
221
+ ))}
222
+ </StaggerContainer>
223
+ </section>
224
+
225
+ {/* ── How It Works ── */}
226
+ <section className="mb-12">
227
+ <FadeIn>
228
+ <div className="text-center mb-12">
229
+ <h2 className="text-2xl font-bold text-white mb-3">
230
+ How It Works
231
+ </h2>
232
+ <p className="text-sm text-zinc-500 max-w-lg mx-auto">
233
+ Each PR triggers three specialised agents that run in parallel,
234
+ then a synthesizer merges their findings into one review.
235
+ </p>
236
+ </div>
237
+ </FadeIn>
238
+
239
+ {/* Pipeline visualization */}
240
+ <FadeIn delay={0.1}>
241
+ <div className="flex items-center justify-center mb-12">
242
+ <div className="flex items-center gap-2 text-xs font-mono text-zinc-500">
243
+ <span className="px-3 py-1.5 rounded-lg glass border border-white/[0.06]">
244
+ PR Opened
245
+ </span>
246
+ <svg className="w-4 h-4 text-zinc-700" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" /></svg>
247
+ <span className="px-3 py-1.5 rounded-lg glass border border-violet-500/20 text-violet-400">
248
+ 3 Agents
249
+ </span>
250
+ <svg className="w-4 h-4 text-zinc-700" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" /></svg>
251
+ <span className="px-3 py-1.5 rounded-lg glass border border-cyan-500/20 text-cyan-400">
252
+ Synthesize
253
+ </span>
254
+ <svg className="w-4 h-4 text-zinc-700" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" /></svg>
255
+ <span className="px-3 py-1.5 rounded-lg glass border border-emerald-500/20 text-emerald-400">
256
+ Review Posted
257
+ </span>
258
+ </div>
259
+ </div>
260
+ </FadeIn>
261
+
262
+ <StaggerContainer className="grid grid-cols-1 sm:grid-cols-3 gap-5">
263
+ {AGENTS.map((agent) => (
264
+ <StaggerItem key={agent.title}>
265
+ <HoverCard>
266
+ <div
267
+ className={`glass rounded-2xl p-6 border ${agent.border} transition-all duration-300 h-full`}
268
+ >
269
+ <div
270
+ className={`w-11 h-11 rounded-xl ${agent.iconBg} flex items-center justify-center mb-4`}
271
+ >
272
+ {agent.icon}
273
+ </div>
274
+ <h3
275
+ className={`text-base font-semibold mb-2 ${agent.color}`}
276
+ >
277
+ {agent.title}
278
+ </h3>
279
+ <p className="text-sm text-zinc-500 leading-relaxed">
280
+ {agent.desc}
281
+ </p>
282
+ </div>
283
+ </HoverCard>
284
+ </StaggerItem>
285
+ ))}
286
+ </StaggerContainer>
287
+ </section>
288
+ </div>
289
+ </div>
290
+ );
291
+ }
dashboard/app/repos/[owner]/[repo]/page.tsx ADDED
@@ -0,0 +1,170 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import Link from "next/link";
2
+ import { getRepoReviews, getRepoStats } from "@/lib/api";
3
+ import HealthScoreRing from "@/components/HealthScoreRing";
4
+ import TrendChart from "@/components/TrendChart";
5
+ import AgentBreakdown from "@/components/AgentBreakdown";
6
+ import SeverityBadge from "@/components/SeverityBadge";
7
+ import type { Severity } from "@/lib/types";
8
+
9
+ export default async function RepoPage({
10
+ params,
11
+ }: {
12
+ params: Promise<{ owner: string; repo: string }>;
13
+ }) {
14
+ const { owner, repo } = await params;
15
+ const [reviews, stats] = await Promise.all([
16
+ getRepoReviews(owner, repo),
17
+ getRepoStats(owner, repo),
18
+ ]);
19
+
20
+ const latestScore = reviews[0]?.health_score ?? 0;
21
+ const previousScore = reviews[1]?.health_score;
22
+ const allFindings = reviews.flatMap((r) => r.findings);
23
+
24
+ return (
25
+ <div className="dot-grid">
26
+ <div className="mx-auto max-w-7xl px-6 lg:px-8 py-10">
27
+ {/* ── Breadcrumb ── */}
28
+ <nav className="flex items-center gap-2 text-sm text-zinc-600 mb-8">
29
+ <Link href="/" className="hover:text-zinc-400 transition-colors">
30
+ Dashboard
31
+ </Link>
32
+ <svg className="w-3.5 h-3.5 text-zinc-700" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" /></svg>
33
+ <span className="text-zinc-400 font-medium">
34
+ {owner}/{repo}
35
+ </span>
36
+ </nav>
37
+
38
+ {/* ── Header ── */}
39
+ <div className="flex flex-col sm:flex-row sm:items-end sm:justify-between gap-6 mb-12">
40
+ <div>
41
+ <p className="text-xs text-zinc-600 font-mono mb-1">{owner}/</p>
42
+ <h1 className="text-3xl font-bold text-white">{repo}</h1>
43
+ </div>
44
+ <div className="flex items-center gap-8 text-sm">
45
+ {[
46
+ { label: "Reviews", value: stats.total_reviews },
47
+ { label: "Findings", value: stats.total_findings },
48
+ { label: "Avg Score", value: `${stats.average_health_score}%` },
49
+ ].map((s) => (
50
+ <div key={s.label} className="text-center">
51
+ <p className="text-2xl font-bold text-white tabular-nums">
52
+ {s.value}
53
+ </p>
54
+ <p className="text-[10px] text-zinc-600 uppercase tracking-wider mt-0.5">
55
+ {s.label}
56
+ </p>
57
+ </div>
58
+ ))}
59
+ </div>
60
+ </div>
61
+
62
+ {/* ── Score + Trend ── */}
63
+ <div className="grid grid-cols-1 lg:grid-cols-[200px_1fr] gap-8 mb-12">
64
+ <div className="flex items-center justify-center">
65
+ <HealthScoreRing
66
+ score={latestScore}
67
+ previousScore={previousScore}
68
+ label="Latest Score"
69
+ />
70
+ </div>
71
+ <TrendChart scores={stats.recent_scores} />
72
+ </div>
73
+
74
+ {/* ── Agent Breakdown ── */}
75
+ <section className="mb-12">
76
+ <h2 className="text-sm font-semibold text-zinc-400 mb-4 uppercase tracking-wider">
77
+ Agent Breakdown
78
+ </h2>
79
+ <AgentBreakdown findings={allFindings} />
80
+ </section>
81
+
82
+ {/* ── PR Reviews Table ── */}
83
+ <section>
84
+ <h2 className="text-sm font-semibold text-zinc-400 mb-4 uppercase tracking-wider">
85
+ Recent PR Reviews
86
+ </h2>
87
+ <div className="overflow-x-auto glass rounded-2xl">
88
+ <table className="w-full text-sm text-left">
89
+ <thead>
90
+ <tr className="border-b border-white/[0.04] text-zinc-500 text-[11px] uppercase tracking-wider">
91
+ <th className="px-5 py-3.5 font-medium">PR</th>
92
+ <th className="px-5 py-3.5 font-medium">Score</th>
93
+ <th className="px-5 py-3.5 font-medium">Critical</th>
94
+ <th className="px-5 py-3.5 font-medium">High</th>
95
+ <th className="px-5 py-3.5 font-medium">Medium</th>
96
+ <th className="px-5 py-3.5 font-medium">Low</th>
97
+ <th className="px-5 py-3.5 font-medium">Summary</th>
98
+ <th className="px-5 py-3.5 font-medium">Duration</th>
99
+ </tr>
100
+ </thead>
101
+ <tbody>
102
+ {reviews.map((r) => {
103
+ const scoreClass =
104
+ r.health_score >= 80
105
+ ? "text-emerald-400"
106
+ : r.health_score >= 60
107
+ ? "text-amber-400"
108
+ : "text-red-400";
109
+
110
+ return (
111
+ <tr
112
+ key={r.id}
113
+ className="border-b border-white/[0.03] hover:bg-white/[0.02] transition-colors"
114
+ >
115
+ <td className="px-5 py-3.5">
116
+ <Link
117
+ href={`/repos/${owner}/${repo}/prs/${r.pr_number}`}
118
+ className="text-violet-400 hover:text-violet-300 font-medium transition-colors"
119
+ >
120
+ #{r.pr_number}
121
+ </Link>
122
+ </td>
123
+ <td className={`px-5 py-3.5 font-bold tabular-nums ${scoreClass}`}>
124
+ {r.health_score}
125
+ </td>
126
+ <td className="px-5 py-3.5">
127
+ {r.critical_count > 0 ? (
128
+ <SeverityBadge severity={"critical" as Severity} />
129
+ ) : (
130
+ <span className="text-zinc-700">0</span>
131
+ )}
132
+ </td>
133
+ <td className="px-5 py-3.5">
134
+ {r.high_count > 0 ? (
135
+ <span className="text-orange-400 font-medium tabular-nums">
136
+ {r.high_count}
137
+ </span>
138
+ ) : (
139
+ <span className="text-zinc-700">0</span>
140
+ )}
141
+ </td>
142
+ <td className="px-5 py-3.5">
143
+ {r.medium_count > 0 ? (
144
+ <span className="text-amber-400 tabular-nums">
145
+ {r.medium_count}
146
+ </span>
147
+ ) : (
148
+ <span className="text-zinc-700">0</span>
149
+ )}
150
+ </td>
151
+ <td className="px-5 py-3.5 text-zinc-600 tabular-nums">
152
+ {r.low_count}
153
+ </td>
154
+ <td className="px-5 py-3.5 text-zinc-500 truncate max-w-[240px] text-xs">
155
+ {r.summary}
156
+ </td>
157
+ <td className="px-5 py-3.5 text-zinc-600 tabular-nums text-xs font-mono">
158
+ {(r.duration_ms / 1000).toFixed(1)}s
159
+ </td>
160
+ </tr>
161
+ );
162
+ })}
163
+ </tbody>
164
+ </table>
165
+ </div>
166
+ </section>
167
+ </div>
168
+ </div>
169
+ );
170
+ }
dashboard/app/repos/[owner]/[repo]/prs/[number]/page.tsx ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import Link from "next/link";
2
+ import { getReviewDetail } from "@/lib/api";
3
+ import HealthScoreRing from "@/components/HealthScoreRing";
4
+ import FindingsTable from "@/components/FindingsTable";
5
+ import AgentBreakdown from "@/components/AgentBreakdown";
6
+ import type { Recommendation } from "@/lib/types";
7
+
8
+ const RECOMMENDATION_STYLE: Record<
9
+ Recommendation,
10
+ { bg: string; text: string; label: string; dot: string }
11
+ > = {
12
+ approve: {
13
+ bg: "bg-emerald-500/10",
14
+ text: "text-emerald-400",
15
+ label: "Approve",
16
+ dot: "bg-emerald-400",
17
+ },
18
+ request_changes: {
19
+ bg: "bg-amber-500/10",
20
+ text: "text-amber-400",
21
+ label: "Request Changes",
22
+ dot: "bg-amber-400",
23
+ },
24
+ block: {
25
+ bg: "bg-red-500/10",
26
+ text: "text-red-400",
27
+ label: "Block",
28
+ dot: "bg-red-400",
29
+ },
30
+ };
31
+
32
+ export default async function PRReviewPage({
33
+ params,
34
+ }: {
35
+ params: Promise<{ owner: string; repo: string; number: string }>;
36
+ }) {
37
+ const { owner, repo, number: prNum } = await params;
38
+ const prNumber = parseInt(prNum, 10);
39
+ const { review, record } = await getReviewDetail(owner, repo, prNumber);
40
+
41
+ const rec = RECOMMENDATION_STYLE[review.recommendation];
42
+
43
+ return (
44
+ <div className="dot-grid">
45
+ <div className="mx-auto max-w-7xl px-6 lg:px-8 py-10">
46
+ {/* ── Breadcrumb ── */}
47
+ <nav className="flex items-center gap-2 text-sm text-zinc-600 mb-8">
48
+ <Link href="/" className="hover:text-zinc-400 transition-colors">
49
+ Dashboard
50
+ </Link>
51
+ <svg className="w-3.5 h-3.5 text-zinc-700" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" /></svg>
52
+ <Link
53
+ href={`/repos/${owner}/${repo}`}
54
+ className="hover:text-zinc-400 transition-colors"
55
+ >
56
+ {owner}/{repo}
57
+ </Link>
58
+ <svg className="w-3.5 h-3.5 text-zinc-700" fill="none" viewBox="0 0 24 24" stroke="currentColor"><path strokeLinecap="round" strokeLinejoin="round" strokeWidth={2} d="M9 5l7 7-7 7" /></svg>
59
+ <span className="text-zinc-400 font-medium">PR #{prNumber}</span>
60
+ </nav>
61
+
62
+ {/* ── Header ── */}
63
+ <div className="flex flex-col sm:flex-row sm:items-start sm:justify-between gap-6 mb-12">
64
+ <div>
65
+ <p className="text-xs text-zinc-600 font-mono mb-1">
66
+ {owner}/{repo}
67
+ </p>
68
+ <h1 className="text-3xl font-bold text-white mb-4">
69
+ Pull Request #{prNumber}
70
+ </h1>
71
+ <div className="flex items-center gap-3">
72
+ <span
73
+ className={`inline-flex items-center gap-1.5 rounded-full px-3 py-1 text-xs font-semibold ${rec.bg} ${rec.text}`}
74
+ >
75
+ <span className={`w-1.5 h-1.5 rounded-full ${rec.dot}`} />
76
+ {rec.label}
77
+ </span>
78
+ <span className="text-[11px] text-zinc-600 font-mono">
79
+ {record.commit_sha}
80
+ </span>
81
+ <span className="text-[11px] text-zinc-700 font-mono">
82
+ {(record.duration_ms / 1000).toFixed(1)}s
83
+ </span>
84
+ </div>
85
+ </div>
86
+ <HealthScoreRing
87
+ score={review.health_score}
88
+ size={140}
89
+ label="Health Score"
90
+ />
91
+ </div>
92
+
93
+ {/* ── Executive Summary ── */}
94
+ <section className="glass rounded-2xl p-6 mb-8">
95
+ <h2 className="text-[10px] text-zinc-600 uppercase tracking-widest font-medium mb-3">
96
+ Executive Summary
97
+ </h2>
98
+ <p className="text-zinc-300 leading-relaxed text-[15px]">
99
+ {review.executive_summary}
100
+ </p>
101
+ </section>
102
+
103
+ {/* ── Severity Counts ── */}
104
+ <div className="grid grid-cols-2 sm:grid-cols-4 gap-4 mb-8">
105
+ {[
106
+ {
107
+ label: "Critical",
108
+ count: review.critical_count,
109
+ color: "text-red-400",
110
+ border: "border-red-500/[0.08]",
111
+ dot: "bg-red-400",
112
+ },
113
+ {
114
+ label: "High",
115
+ count: review.high_count,
116
+ color: "text-orange-400",
117
+ border: "border-orange-500/[0.08]",
118
+ dot: "bg-orange-400",
119
+ },
120
+ {
121
+ label: "Medium",
122
+ count: review.medium_count,
123
+ color: "text-amber-400",
124
+ border: "border-amber-500/[0.08]",
125
+ dot: "bg-amber-400",
126
+ },
127
+ {
128
+ label: "Low",
129
+ count: review.low_count,
130
+ color: "text-zinc-400",
131
+ border: "border-zinc-700/30",
132
+ dot: "bg-zinc-500",
133
+ },
134
+ ].map((s) => (
135
+ <div
136
+ key={s.label}
137
+ className={`glass rounded-2xl border ${s.border} p-5 text-center`}
138
+ >
139
+ <p className={`text-3xl font-bold tabular-nums ${s.color}`}>
140
+ {s.count}
141
+ </p>
142
+ <p className="text-[10px] text-zinc-600 mt-1 uppercase tracking-wider flex items-center justify-center gap-1.5">
143
+ <span className={`w-1.5 h-1.5 rounded-full ${s.dot}`} />
144
+ {s.label}
145
+ </p>
146
+ </div>
147
+ ))}
148
+ </div>
149
+
150
+ {/* ── Agent Breakdown ── */}
151
+ <section className="mb-8">
152
+ <h2 className="text-sm font-semibold text-zinc-400 mb-4 uppercase tracking-wider">
153
+ Agent Breakdown
154
+ </h2>
155
+ <AgentBreakdown findings={review.findings} />
156
+ </section>
157
+
158
+ {/* ── Findings ── */}
159
+ <section>
160
+ <h2 className="text-sm font-semibold text-zinc-400 mb-4 uppercase tracking-wider">
161
+ All Findings ({review.findings.length})
162
+ </h2>
163
+ <FindingsTable findings={review.findings} />
164
+ </section>
165
+ </div>
166
+ </div>
167
+ );
168
+ }
dashboard/components/AgentBreakdown.tsx ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { motion } from "framer-motion";
4
+ import type { Finding, AgentKind } from "@/lib/types";
5
+
6
+ interface AgentBreakdownProps {
7
+ findings: Finding[];
8
+ }
9
+
10
+ const AGENT_META: Record<
11
+ AgentKind,
12
+ {
13
+ icon: React.ReactNode;
14
+ label: string;
15
+ color: string;
16
+ iconBg: string;
17
+ border: string;
18
+ }
19
+ > = {
20
+ security: {
21
+ icon: (
22
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-5 h-5">
23
+ <path fillRule="evenodd" d="M12.516 2.17a.75.75 0 00-1.032 0 11.209 11.209 0 01-7.877 3.08.75.75 0 00-.722.515A12.74 12.74 0 002.25 9.75c0 5.942 4.064 10.933 9.563 12.348a.749.749 0 00.374 0c5.499-1.415 9.563-6.406 9.563-12.348 0-1.39-.223-2.73-.635-3.985a.75.75 0 00-.722-.516 11.209 11.209 0 01-7.877-3.08z" clipRule="evenodd" />
24
+ </svg>
25
+ ),
26
+ label: "Security",
27
+ color: "text-red-400",
28
+ iconBg: "bg-red-500/10 text-red-400",
29
+ border: "border-red-500/[0.08]",
30
+ },
31
+ performance: {
32
+ icon: (
33
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-5 h-5">
34
+ <path fillRule="evenodd" d="M14.615 1.595a.75.75 0 01.359.852L12.982 9.75h7.268a.75.75 0 01.548 1.262l-10.5 11.25a.75.75 0 01-1.272-.71l1.992-7.302H3.75a.75.75 0 01-.548-1.262l10.5-11.25a.75.75 0 01.913-.143z" clipRule="evenodd" />
35
+ </svg>
36
+ ),
37
+ label: "Performance",
38
+ color: "text-amber-400",
39
+ iconBg: "bg-amber-500/10 text-amber-400",
40
+ border: "border-amber-500/[0.08]",
41
+ },
42
+ style: {
43
+ icon: (
44
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-5 h-5">
45
+ <path d="M11.7 2.805a.75.75 0 01.6 0A60.65 60.65 0 0122.83 8.72a.75.75 0 01-.231 1.337 49.949 49.949 0 00-9.902 3.912l-.003.002-.34.18a.75.75 0 01-.707 0A50.009 50.009 0 007.5 12.174v-.224c0-.131.067-.248.172-.311a54.614 54.614 0 014.653-2.52.75.75 0 00-.65-1.352 56.129 56.129 0 00-4.78 2.589 1.858 1.858 0 00-.859 1.228 49.803 49.803 0 00-4.634-1.527.75.75 0 01-.231-1.337A60.653 60.653 0 0111.7 2.805z" />
46
+ <path d="M13.06 15.473a48.45 48.45 0 017.666-3.282c.134 1.414.22 2.843.255 4.285a.75.75 0 01-.46.71 47.878 47.878 0 00-8.105 4.342.75.75 0 01-.832 0 47.877 47.877 0 00-8.104-4.342.75.75 0 01-.461-.71c.035-1.442.121-2.87.255-4.286A48.4 48.4 0 016 13.18v1.27a1.5 1.5 0 00-.14 2.508c-.09.38-.222.753-.397 1.11.452.213.901.434 1.346.661a6.729 6.729 0 00.551-1.608 1.5 1.5 0 00.14-2.67v-.645a48.549 48.549 0 013.44 1.668 2.25 2.25 0 002.12 0z" />
47
+ <path d="M4.462 19.462c.42-.419.753-.89 1-1.394.453.213.902.434 1.347.661a6.743 6.743 0 01-1.286 1.794.75.75 0 11-1.06-1.06z" />
48
+ </svg>
49
+ ),
50
+ label: "Style",
51
+ color: "text-cyan-400",
52
+ iconBg: "bg-cyan-500/10 text-cyan-400",
53
+ border: "border-cyan-500/[0.08]",
54
+ },
55
+ };
56
+
57
+ export default function AgentBreakdown({ findings }: AgentBreakdownProps) {
58
+ const agents: AgentKind[] = ["security", "performance", "style"];
59
+
60
+ const stats = agents.map((agent) => {
61
+ const agentFindings = findings.filter((f) => f.agent === agent);
62
+ const catCounts: Record<string, number> = {};
63
+ agentFindings.forEach((f) => {
64
+ catCounts[f.category] = (catCounts[f.category] ?? 0) + 1;
65
+ });
66
+ const topCategory =
67
+ Object.entries(catCounts).sort((a, b) => b[1] - a[1])[0]?.[0] ?? "—";
68
+ return {
69
+ agent,
70
+ count: agentFindings.length,
71
+ topCategory,
72
+ meta: AGENT_META[agent],
73
+ };
74
+ });
75
+
76
+ return (
77
+ <div className="grid grid-cols-1 sm:grid-cols-3 gap-4">
78
+ {stats.map(({ agent, count, topCategory, meta }, i) => (
79
+ <motion.div
80
+ key={agent}
81
+ initial={{ opacity: 0, y: 16 }}
82
+ animate={{ opacity: 1, y: 0 }}
83
+ transition={{ duration: 0.4, delay: i * 0.08 }}
84
+ whileHover={{ y: -2, transition: { duration: 0.15 } }}
85
+ className={`glass rounded-2xl p-5 border ${meta.border} transition-colors duration-300`}
86
+ >
87
+ <div className="flex items-center gap-3 mb-4">
88
+ <div
89
+ className={`w-9 h-9 rounded-xl ${meta.iconBg} flex items-center justify-center`}
90
+ >
91
+ {meta.icon}
92
+ </div>
93
+ <h3 className={`text-sm font-semibold ${meta.color}`}>
94
+ {meta.label}
95
+ </h3>
96
+ </div>
97
+ <p className="text-3xl font-bold text-white tabular-nums">{count}</p>
98
+ <p className="text-[11px] text-zinc-600 mt-0.5 uppercase tracking-wider">
99
+ findings
100
+ </p>
101
+ <div className="mt-4 pt-3 border-t border-white/[0.04]">
102
+ <p className="text-[10px] text-zinc-600 uppercase tracking-wider">
103
+ Top category
104
+ </p>
105
+ <p className="text-xs text-zinc-400 font-medium truncate mt-0.5">
106
+ {topCategory}
107
+ </p>
108
+ </div>
109
+ </motion.div>
110
+ ))}
111
+ </div>
112
+ );
113
+ }
dashboard/components/AnimatedCounter.tsx ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { useEffect, useRef, useState } from "react";
4
+
5
+ interface AnimatedCounterProps {
6
+ value: number;
7
+ suffix?: string;
8
+ duration?: number;
9
+ className?: string;
10
+ }
11
+
12
+ export function AnimatedCounter({
13
+ value,
14
+ suffix = "",
15
+ duration = 1200,
16
+ className,
17
+ }: AnimatedCounterProps) {
18
+ const [display, setDisplay] = useState(0);
19
+ const ref = useRef<HTMLSpanElement>(null);
20
+ const hasAnimated = useRef(false);
21
+
22
+ useEffect(() => {
23
+ if (hasAnimated.current) return;
24
+ hasAnimated.current = true;
25
+
26
+ const start = performance.now();
27
+ function tick(now: number) {
28
+ const elapsed = now - start;
29
+ const progress = Math.min(elapsed / duration, 1);
30
+ // ease-out expo
31
+ const ease = progress === 1 ? 1 : 1 - Math.pow(2, -10 * progress);
32
+ setDisplay(Math.round(ease * value));
33
+ if (progress < 1) requestAnimationFrame(tick);
34
+ }
35
+ requestAnimationFrame(tick);
36
+ }, [value, duration]);
37
+
38
+ return (
39
+ <span ref={ref} className={className}>
40
+ {display}
41
+ {suffix}
42
+ </span>
43
+ );
44
+ }
dashboard/components/FindingsTable.tsx ADDED
@@ -0,0 +1,185 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ "use client";
2
+
3
+ import { useState, useMemo } from "react";
4
+ import { motion, AnimatePresence } from "framer-motion";
5
+ import type { Finding, Severity } from "@/lib/types";
6
+ import SeverityBadge from "./SeverityBadge";
7
+
8
+ const AGENT_ICON: Record<string, React.ReactNode> = {
9
+ security: (
10
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-4 h-4 text-red-400">
11
+ <path fillRule="evenodd" d="M12.516 2.17a.75.75 0 00-1.032 0 11.209 11.209 0 01-7.877 3.08.75.75 0 00-.722.515A12.74 12.74 0 002.25 9.75c0 5.942 4.064 10.933 9.563 12.348a.749.749 0 00.374 0c5.499-1.415 9.563-6.406 9.563-12.348 0-1.39-.223-2.73-.635-3.985a.75.75 0 00-.722-.516 11.209 11.209 0 01-7.877-3.08z" clipRule="evenodd" />
12
+ </svg>
13
+ ),
14
+ performance: (
15
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-4 h-4 text-amber-400">
16
+ <path fillRule="evenodd" d="M14.615 1.595a.75.75 0 01.359.852L12.982 9.75h7.268a.75.75 0 01.548 1.262l-10.5 11.25a.75.75 0 01-1.272-.71l1.992-7.302H3.75a.75.75 0 01-.548-1.262l10.5-11.25a.75.75 0 01.913-.143z" clipRule="evenodd" />
17
+ </svg>
18
+ ),
19
+ style: (
20
+ <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 24 24" fill="currentColor" className="w-4 h-4 text-cyan-400">
21
+ <path d="M11.7 2.805a.75.75 0 01.6 0A60.65 60.65 0 0122.83 8.72a.75.75 0 01-.231 1.337 49.949 49.949 0 00-9.902 3.912l-.003.002-.34.18a.75.75 0 01-.707 0A50.009 50.009 0 007.5 12.174v-.224c0-.131.067-.248.172-.311a54.614 54.614 0 014.653-2.52.75.75 0 00-.65-1.352 56.129 56.129 0 00-4.78 2.589 1.858 1.858 0 00-.859 1.228 49.803 49.803 0 00-4.634-1.527.75.75 0 01-.231-1.337A60.653 60.653 0 0111.7 2.805z" />
22
+ <path d="M13.06 15.473a48.45 48.45 0 017.666-3.282c.134 1.414.22 2.843.255 4.285a.75.75 0 01-.46.71 47.878 47.878 0 00-8.105 4.342.75.75 0 01-.832 0 47.877 47.877 0 00-8.104-4.342.75.75 0 01-.461-.71c.035-1.442.121-2.87.255-4.286z" />
23
+ </svg>
24
+ ),
25
+ };
26
+
27
+ const SEVERITY_ORDER: Record<Severity, number> = {
28
+ critical: 0,
29
+ high: 1,
30
+ medium: 2,
31
+ low: 3,
32
+ };
33
+
34
+ type SortKey = "severity" | "agent" | "file_path" | "category" | "title";
35
+
36
+ export default function FindingsTable({
37
+ findings,
38
+ }: {
39
+ findings: Finding[];
40
+ }) {
41
+ const [sortKey, setSortKey] = useState<SortKey>("severity");
42
+ const [sortAsc, setSortAsc] = useState(true);
43
+ const [expandedIdx, setExpandedIdx] = useState<number | null>(null);
44
+
45
+ const sorted = useMemo(() => {
46
+ const copy = [...findings];
47
+ copy.sort((a, b) => {
48
+ let cmp = 0;
49
+ if (sortKey === "severity") {
50
+ cmp = SEVERITY_ORDER[a.severity] - SEVERITY_ORDER[b.severity];
51
+ } else {
52
+ cmp = (a[sortKey] as string).localeCompare(b[sortKey] as string);
53
+ }
54
+ return sortAsc ? cmp : -cmp;
55
+ });
56
+ return copy;
57
+ }, [findings, sortKey, sortAsc]);
58
+
59
+ function handleSort(key: SortKey) {
60
+ if (key === sortKey) setSortAsc((v) => !v);
61
+ else {
62
+ setSortKey(key);
63
+ setSortAsc(true);
64
+ }
65
+ }
66
+
67
+ const arrow = (key: SortKey) =>
68
+ sortKey === key ? (sortAsc ? " \u25B2" : " \u25BC") : "";
69
+
70
+ return (
71
+ <motion.div
72
+ initial={{ opacity: 0, y: 12 }}
73
+ animate={{ opacity: 1, y: 0 }}
74
+ transition={{ duration: 0.4, delay: 0.1 }}
75
+ className="overflow-x-auto glass rounded-2xl"
76
+ >
77
+ <table className="w-full text-sm text-left">
78
+ <thead>
79
+ <tr className="border-b border-white/[0.04] text-zinc-500 text-[11px] uppercase tracking-wider">
80
+ {(
81
+ [
82
+ ["severity", "Severity"],
83
+ ["agent", "Agent"],
84
+ ["file_path", "File"],
85
+ ["category", "Category"],
86
+ ["title", "Title"],
87
+ ] as [SortKey, string][]
88
+ ).map(([key, label]) => (
89
+ <th
90
+ key={key}
91
+ onClick={() => handleSort(key)}
92
+ className="px-4 py-3.5 cursor-pointer select-none hover:text-zinc-300 transition-colors font-medium"
93
+ >
94
+ {label}
95
+ <span className="text-violet-400/70">{arrow(key)}</span>
96
+ </th>
97
+ ))}
98
+ </tr>
99
+ </thead>
100
+ <tbody>
101
+ {sorted.map((f, i) => {
102
+ const isExpanded = expandedIdx === i;
103
+ return (
104
+ <tr key={i} className="group">
105
+ <td colSpan={5} className="p-0">
106
+ <button
107
+ onClick={() => setExpandedIdx(isExpanded ? null : i)}
108
+ className="w-full grid grid-cols-[100px_50px_1fr_130px_1fr] items-center text-left px-4 py-3 border-b border-white/[0.03] hover:bg-white/[0.02] transition-colors cursor-pointer"
109
+ >
110
+ <span>
111
+ <SeverityBadge severity={f.severity} />
112
+ </span>
113
+ <span title={f.agent}>
114
+ {AGENT_ICON[f.agent] ?? f.agent}
115
+ </span>
116
+ <span className="font-mono text-zinc-400 text-xs truncate pr-2">
117
+ {f.file_path}
118
+ <span className="text-zinc-700 ml-1">
119
+ :{f.line_start}
120
+ </span>
121
+ </span>
122
+ <span className="text-zinc-500 text-xs">{f.category}</span>
123
+ <span className="text-zinc-300 text-xs truncate">
124
+ {f.title}
125
+ </span>
126
+ </button>
127
+
128
+ <AnimatePresence>
129
+ {isExpanded && (
130
+ <motion.div
131
+ initial={{ height: 0, opacity: 0 }}
132
+ animate={{ height: "auto", opacity: 1 }}
133
+ exit={{ height: 0, opacity: 0 }}
134
+ transition={{ duration: 0.25, ease: "easeInOut" }}
135
+ className="overflow-hidden"
136
+ >
137
+ <div className="bg-white/[0.01] border-b border-white/[0.04] px-6 py-5 space-y-4">
138
+ <div>
139
+ <h4 className="text-[10px] text-zinc-600 uppercase tracking-widest mb-1.5 font-medium">
140
+ Description
141
+ </h4>
142
+ <p className="text-zinc-300 text-sm leading-relaxed">
143
+ {f.description}
144
+ </p>
145
+ </div>
146
+ {f.suggested_fix && (
147
+ <div>
148
+ <h4 className="text-[10px] text-zinc-600 uppercase tracking-widest mb-1.5 font-medium">
149
+ Suggested Fix
150
+ </h4>
151
+ <pre className="text-emerald-400/90 text-xs bg-emerald-500/[0.04] border border-emerald-500/10 rounded-xl px-4 py-3 overflow-x-auto whitespace-pre-wrap font-mono">
152
+ {f.suggested_fix}
153
+ </pre>
154
+ </div>
155
+ )}
156
+ <div className="flex gap-5 text-[11px] text-zinc-600 pt-1">
157
+ {f.cwe_id && (
158
+ <span className="font-mono">{f.cwe_id}</span>
159
+ )}
160
+ <span>
161
+ Confidence:{" "}
162
+ <span className="text-zinc-400">
163
+ {(f.confidence * 100).toFixed(0)}%
164
+ </span>
165
+ </span>
166
+ <span>
167
+ Lines{" "}
168
+ <span className="text-zinc-400 font-mono">
169
+ {f.line_start}–{f.line_end}
170
+ </span>
171
+ </span>
172
+ </div>
173
+ </div>
174
+ </motion.div>
175
+ )}
176
+ </AnimatePresence>
177
+ </td>
178
+ </tr>
179
+ );
180
+ })}
181
+ </tbody>
182
+ </table>
183
+ </motion.div>
184
+ );
185
+ }