amine-yagoub commited on
Commit
d5341cc
·
1 Parent(s): 38cd7bb

feat: Add initial CodeTribunal implementation

Browse files
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Amine Yagoub
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -1,3 +1,4 @@
 
1
  ---
2
  title: CodeTribunal
3
  emoji: 💻
@@ -10,3 +11,31 @@ short_description: The AI Courtroom That Exposes Bad Freelance Code
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <<<<<<< HEAD
2
  ---
3
  title: CodeTribunal
4
  emoji: 💻
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
14
+ =======
15
+ # CodeTribunal
16
+
17
+ The AI courtroom that exposes bad freelance code.
18
+
19
+ Multi-agent forensic investigation powered by GLM 5.1. Instead of guessing code quality, CodeTribunal puts it on trial — a live-streaming debate where an AI Prosecutor and Defense Attorney clash over real, deterministic technical evidence.
20
+
21
+ ## Install
22
+
23
+ ```bash
24
+ pip install -e .
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ ```bash
30
+ code-tribunal ./path/to/codebase
31
+ ```
32
+
33
+ ## How it works
34
+
35
+ 1. **Evidence Gathering** — Deterministic scans (security, code smells, hardcoded secrets, TODOs)
36
+ 2. **Investigation** — GLM 5.1 agents analyze the evidence
37
+ 3. **The Trial** — Prosecutor and Defense debate in a live-streamed courtroom
38
+ 4. **Verdict** — The Judge delivers a final ruling
39
+
40
+ Built for the [Build with GLM 5.1](https://build-with-glm-5-1-challenge.devpost.com) hackathon.
41
+ >>>>>>> b4fcdee (feat: Add initial CodeTribunal implementation)
pyproject.toml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["hatchling"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [project]
6
+ name = "code-tribunal"
7
+ version = "0.1.0"
8
+ description = "AI courtroom that exposes bad freelance code through multi-agent forensic investigation"
9
+ readme = "README.md"
10
+ license = "MIT"
11
+ license-files = ["LICEN[CS]E.*"]
12
+ requires-python = ">=3.11"
13
+ authors = [
14
+ {name = "Amine Yagoub"},
15
+ ]
16
+ keywords = ["ai", "code-review", "forensic", "multi-agent", "glm"]
17
+ classifiers = [
18
+ "Development Status :: 3 - Alpha",
19
+ "Intended Audience :: Developers",
20
+ "Programming Language :: Python :: 3",
21
+ "Programming Language :: Python :: 3.11",
22
+ "Programming Language :: Python :: 3.12",
23
+ "Programming Language :: Python :: 3.13",
24
+ ]
25
+ dependencies = [
26
+ "crewai[litellm]",
27
+ "gritql>=0.2.0",
28
+ "gradio>=5.0.0",
29
+ "rich>=13.0.0",
30
+ "click>=8.0.0",
31
+ "httpx>=0.27.0",
32
+ "python-dotenv>=1.0.0",
33
+ "fpdf2>=2.7.0",
34
+ ]
35
+
36
+ [project.optional-dependencies]
37
+ dev = [
38
+ "pytest>=8.0.0",
39
+ "ruff>=0.9.0",
40
+ ]
41
+
42
+ [project.urls]
43
+ Repository = "https://github.com/amineyagoub/CodeTribunal"
44
+
45
+ [project.scripts]
46
+ code-tribunal = "code_tribunal.cli:main"
src/code_tribunal/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """CodeTribunal: AI courtroom that exposes bad freelance code."""
src/code_tribunal/agents.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Courtroom agent definitions for CodeTribunal."""
2
+
3
+ import os
4
+ from crewai import Agent, LLM
5
+ from pathlib import Path
6
+ from dotenv import load_dotenv
7
+
8
+ load_dotenv(Path(__file__).resolve().parent.parent.parent / ".env")
9
+
10
+
11
+ def _get_llm() -> LLM:
12
+ return LLM(
13
+ model=os.environ.get("MODEL_NAME", "zai/glm-5.1"),
14
+ api_key=os.environ.get("ZAI_API_KEY"),
15
+ temperature=0.3,
16
+ )
17
+
18
+
19
+ # ---------------------------------------------------------------------------
20
+ # Phase 2: Investigators
21
+ # ---------------------------------------------------------------------------
22
+
23
+ def security_investigator() -> Agent:
24
+ return Agent(
25
+ role="Security Forensic Investigator",
26
+ goal=(
27
+ "Analyze security-related code evidence and produce a detailed investigation report. "
28
+ "Identify every vulnerability, rank by severity, explain the attack vector, "
29
+ "and describe the potential impact if exploited in production."
30
+ ),
31
+ backstory=(
32
+ "You are a former penetration tester turned code auditor. "
33
+ "You've found hardcoded AWS keys in Fortune 500 repos, SQL injection in banking APIs, "
34
+ "and deserialization bugs that would have cost millions. "
35
+ "You don't guess — you follow the evidence and build an airtight case. "
36
+ "You treat every hardcoded secret as a loaded weapon and every eval() as an open door."
37
+ ),
38
+ llm=_get_llm(),
39
+ verbose=True,
40
+ )
41
+
42
+
43
+ def quality_investigator() -> Agent:
44
+ return Agent(
45
+ role="Code Quality Forensic Investigator",
46
+ goal=(
47
+ "Analyze code quality evidence and produce a detailed investigation report. "
48
+ "Identify technical debt, abandoned code, missing error handling, and developer negligence indicators. "
49
+ "Focus on patterns that suggest rushed or careless development."
50
+ ),
51
+ backstory=(
52
+ "You are a principal engineer who has inherited nightmares from freelance developers. "
53
+ "You've seen TODO comments that are 5 years old, dead code that accounts for 40% of a codebase, "
54
+ "and functions so complex they defied testing. "
55
+ "You can spot the difference between 'agile iteration' and 'lazy corner-cutting' from a mile away."
56
+ ),
57
+ llm=_get_llm(),
58
+ verbose=True,
59
+ )
60
+
61
+
62
+ def architecture_investigator() -> Agent:
63
+ return Agent(
64
+ role="Architecture Forensic Investigator",
65
+ goal=(
66
+ "Analyze architectural evidence and produce a detailed investigation report. "
67
+ "Identify structural problems: tight coupling, missing abstractions, "
68
+ "hardcoded configuration that should be externalized, and patterns that won't scale."
69
+ ),
70
+ backstory=(
71
+ "You are a systems architect with 20 years of experience across startups and enterprises. "
72
+ "You can look at a codebase and tell whether it was built to last or built to invoice. "
73
+ "You identify patterns that indicate the developer didn't understand the domain "
74
+ "or deliberately cut corners to finish faster."
75
+ ),
76
+ llm=_get_llm(),
77
+ verbose=True,
78
+ )
79
+
80
+
81
+ # ---------------------------------------------------------------------------
82
+ # Phase 3: The Trial
83
+ # ---------------------------------------------------------------------------
84
+
85
+ def prosecutor() -> Agent:
86
+ return Agent(
87
+ role="The Prosecutor",
88
+ goal=(
89
+ "Build the strongest possible case that this code is negligent, dangerous, or fraudulent. "
90
+ "Use the investigation reports as evidence. Argue with precision and force. "
91
+ "Cite specific file paths, line numbers, and vulnerability types. "
92
+ "Make the jury understand why this code should never have been delivered."
93
+ ),
94
+ backstory=(
95
+ "You are a ruthless courtroom prosecutor specializing in technology fraud cases. "
96
+ "You've won cases against developers who delivered insecure code to non-technical clients. "
97
+ "You know how to take technical evidence and make it devastatingly clear. "
98
+ "You don't exaggerate — the facts are damning enough. "
99
+ "Your weapon is specificity: every claim backed by line numbers and evidence."
100
+ ),
101
+ llm=_get_llm(),
102
+ verbose=True,
103
+ )
104
+
105
+
106
+ def defense_attorney() -> Agent:
107
+ return Agent(
108
+ role="The Defense Attorney",
109
+ goal=(
110
+ "Mount the best possible defense of this code. "
111
+ "Challenge the prosecution's claims. Argue mitigating circumstances. "
112
+ "Point out that some patterns are acceptable in certain contexts. "
113
+ "Argue proportionality — not every issue is a catastrophe. "
114
+ "Be honest but vigorous in your defense."
115
+ ),
116
+ backstory=(
117
+ "You are a defense attorney who specializes in technology cases. "
118
+ "You believe everyone deserves a fair hearing, even bad code. "
119
+ "You're not dishonest — you argue context, proportionality, and intent. "
120
+ "A TODO comment isn't negligence, it's a roadmap. "
121
+ "An eval() in a private script isn't the same as eval() in a web server. "
122
+ "You force the prosecution to prove every claim."
123
+ ),
124
+ llm=_get_llm(),
125
+ verbose=True,
126
+ )
127
+
128
+
129
+ # ---------------------------------------------------------------------------
130
+ # Phase 4: The Verdict
131
+ # ---------------------------------------------------------------------------
132
+
133
+ def judge() -> Agent:
134
+ return Agent(
135
+ role="The Judge",
136
+ goal=(
137
+ "Review all evidence, investigation reports, and the trial transcript. "
138
+ "Deliver a final, structured verdict. "
139
+ "For each finding: severity, impact, and recommended remediation. "
140
+ "End with an overall assessment: GUILTY (negligent), MIXED (some issues), or NOT GUILTY (acceptable). "
141
+ "Include a 'reputational risk score' from 0-100 for the developer who wrote this code."
142
+ ),
143
+ backstory=(
144
+ "You are a senior judge who has presided over hundreds of technology disputes. "
145
+ "You are impartial, precise, and thorough. "
146
+ "You don't let the prosecution's rhetoric sway you — you follow the evidence. "
147
+ "But you also don't let the defense minimize real harm. "
148
+ "Your verdicts are known for being fair, detailed, and impossible to appeal."
149
+ ),
150
+ llm=_get_llm(),
151
+ verbose=True,
152
+ )
src/code_tribunal/app.py ADDED
@@ -0,0 +1,720 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Gradio streaming UI for the CodeTribunal courtroom."""
2
+
3
+ import os
4
+ import tempfile
5
+ import time
6
+ import zipfile
7
+ from pathlib import Path
8
+
9
+ import gradio as gr
10
+ from gradio import ChatMessage
11
+
12
+ from code_tribunal.evidence import (
13
+ EvidenceReport,
14
+ gather_evidence_streaming,
15
+ )
16
+ from code_tribunal.courtroom import (
17
+ StreamResult,
18
+ phase_investigation_stream,
19
+ phase_trial_stream,
20
+ phase_verdict_stream,
21
+ )
22
+
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Constants
26
+ # ---------------------------------------------------------------------------
27
+
28
+ AGENT_AVATARS = {
29
+ "Security Forensic Investigator": "🛡️",
30
+ "Code Quality Forensic Investigator": "📋",
31
+ "Architecture Forensic Investigator": "🏗️",
32
+ "The Prosecutor": "⚖️",
33
+ "The Defense Attorney": "🛡️",
34
+ "The Judge": "🔨",
35
+ "Investigator": "🔍",
36
+ }
37
+
38
+ SEVERITY_COLORS = {
39
+ "CRITICAL": "#dc2626",
40
+ "HIGH": "#ea580c",
41
+ "MEDIUM": "#ca8a04",
42
+ "LOW": "#2563eb",
43
+ }
44
+
45
+ STATUS_MESSAGES = {
46
+ "extracting": [
47
+ "Unpacking the evidence...",
48
+ "Extracting source files...",
49
+ "Cataloging submitted code...",
50
+ ],
51
+ "evidence": [
52
+ "Scanning with GritQL forensic patterns...",
53
+ "Searching for hardcoded secrets...",
54
+ "Analyzing code for dangerous functions...",
55
+ "Checking for SQL injection vectors...",
56
+ "Cataloging technical debt markers...",
57
+ "Building the evidence dossier...",
58
+ ],
59
+ "investigation": [
60
+ "🔍 Security Investigator analyzing vulnerabilities...",
61
+ "📋 Quality Investigator assessing code standards...",
62
+ "🏗️ Architecture Investigator reviewing structure...",
63
+ "Cross-referencing findings across domains...",
64
+ "Compiling investigation reports...",
65
+ ],
66
+ "trial": [
67
+ "Court is now in session...",
68
+ "The Prosecutor is building the case...",
69
+ "Examining the evidence in detail...",
70
+ "The Defense is cross-examining...",
71
+ "Hearing rebuttal arguments...",
72
+ "Closing arguments underway...",
73
+ ],
74
+ "verdict": [
75
+ "The Judge is reviewing all evidence...",
76
+ "Weighing prosecution arguments...",
77
+ "Considering defense testimony...",
78
+ "Preparing the final ruling...",
79
+ "The gavel is about to fall...",
80
+ ],
81
+ }
82
+
83
+ CUSTOM_CSS = """
84
+ /* ─── Global ─── */
85
+ .gradio-container {
86
+ max-width: 960px !important;
87
+ margin: 0 auto !important;
88
+ }
89
+ body {
90
+ background: #0a0a14 !important;
91
+ }
92
+ .dark {
93
+ background: #0f0f1a !important;
94
+ }
95
+
96
+ /* ─── Hero ─── */
97
+ .hero-logo {
98
+ display: block !important;
99
+ margin: 0 auto 12px auto !important;
100
+ border-radius: 16px !important;
101
+ }
102
+ .hero-title {
103
+ text-align: center !important;
104
+ color: #fbbf24 !important;
105
+ font-family: 'Georgia', serif !important;
106
+ font-size: 2.4em !important;
107
+ font-weight: 700 !important;
108
+ margin-bottom: 4px !important;
109
+ }
110
+ .hero-subtitle {
111
+ text-align: center !important;
112
+ color: #94a3b8 !important;
113
+ font-size: 1.1em !important;
114
+ margin-top: 0 !important;
115
+ }
116
+
117
+ /* ─── Upload area ─── */
118
+ .upload-area .file-preview {
119
+ min-height: 220px !important;
120
+ border: 2px dashed #fbbf2440 !important;
121
+ border-radius: 16px !important;
122
+ background: #1a1a2e !important;
123
+ transition: border-color 0.3s !important;
124
+ }
125
+ .upload-area .file-preview:hover {
126
+ border-color: #fbbf24 !important;
127
+ }
128
+
129
+ /* ─── Status ─── */
130
+ .status-phase {
131
+ text-align: center !important;
132
+ color: #fbbf24 !important;
133
+ font-size: 1.1em !important;
134
+ font-weight: 600 !important;
135
+ }
136
+ .status-detail {
137
+ text-align: center !important;
138
+ color: #94a3b8 !important;
139
+ font-style: italic !important;
140
+ }
141
+
142
+ /* ─── Evidence table ─── */
143
+ .evidence-table {
144
+ font-family: 'JetBrains Mono', 'Fira Code', monospace !important;
145
+ font-size: 13px !important;
146
+ color: #d4d4d4 !important;
147
+ }
148
+
149
+ /* ─── Chatbot ─── */
150
+ .chatbot-panel {
151
+ border: 1px solid #2a2a40 !important;
152
+ border-radius: 12px !important;
153
+ background: #12121f !important;
154
+ }
155
+
156
+ /* ─── Verdict ─── */
157
+ .verdict-box {
158
+ border: 2px solid #fbbf24 !important;
159
+ border-radius: 12px !important;
160
+ background: linear-gradient(135deg, #1a1a2e, #0f172a) !important;
161
+ padding: 24px !important;
162
+ color: #e2e8f0 !important;
163
+ }
164
+
165
+ /* ─── Export buttons ─── */
166
+ .export-btn {
167
+ border: 1px solid #fbbf2440 !important;
168
+ border-radius: 8px !important;
169
+ color: #fbbf24 !important;
170
+ background: #1a1a2e !important;
171
+ }
172
+ .export-btn:hover {
173
+ background: #2a2a40 !important;
174
+ border-color: #fbbf24 !important;
175
+ }
176
+
177
+ /* ─── Scrollbar ─── */
178
+ ::-webkit-scrollbar { width: 8px; }
179
+ ::-webkit-scrollbar-track { background: #0f0f1a; }
180
+ ::-webkit-scrollbar-thumb { background: #2a2a40; border-radius: 4px; }
181
+ ::-webkit-scrollbar-thumb:hover { background: #3a3a50; }
182
+ """
183
+
184
+
185
+ # ---------------------------------------------------------------------------
186
+ # Helpers
187
+ # ---------------------------------------------------------------------------
188
+
189
+ def _esc(text: str) -> str:
190
+ return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
191
+
192
+
193
+ def _severity_badge(sev: str) -> str:
194
+ color = SEVERITY_COLORS.get(sev, "#6b7280")
195
+ return f'<span style="background:{color};color:white;padding:2px 8px;border-radius:4px;font-size:12px;font-weight:bold">{sev}</span>'
196
+
197
+
198
+ def _evidence_html(report) -> str:
199
+ """Render evidence report as styled HTML."""
200
+ lines = [f"<h3>Evidence Report</h3>"]
201
+ lines.append(f"<p>Files scanned: <b>{report.file_count}</b> | Total findings: <b>{len(report.findings)}</b></p>")
202
+
203
+ for domain, findings in report.findings_by_domain.items():
204
+ lines.append(f'<h4 style="margin-top:16px">{domain.title()} Evidence ({len(findings)} findings)</h4>')
205
+ lines.append('<table style="width:100%;border-collapse:collapse">')
206
+ lines.append('<tr style="border-bottom:1px solid #333"><th style="text-align:left;padding:4px">Severity</th><th style="text-align:left;padding:4px">File</th><th style="text-align:left;padding:4px">Line</th><th style="text-align:left;padding:4px">Code</th></tr>')
207
+ for f in findings:
208
+ lines.append(
209
+ f'<tr style="border-bottom:1px solid #222">'
210
+ f'<td style="padding:4px">{_severity_badge(f.severity_hint)}</td>'
211
+ f'<td style="padding:4px;font-family:monospace;font-size:13px">{Path(f.file).name}</td>'
212
+ f'<td style="padding:4px;font-family:monospace">{f.line}</td>'
213
+ f'<td style="padding:4px;font-family:monospace;font-size:13px;color:#a0a0a0">{_esc(f.code)}</td>'
214
+ f'</tr>'
215
+ )
216
+ lines.append('</table>')
217
+
218
+ return "\n".join(lines)
219
+
220
+
221
+ def _agent_icon(role: str) -> str:
222
+ return AGENT_AVATARS.get(role, "📝")
223
+
224
+
225
+ def _yield(
226
+ hero_vis, upload_vis, proc_vis,
227
+ status, evidence, chat, verdict, export_vis,
228
+ ):
229
+ """Build the 8-tuple for a consistent yield protocol."""
230
+ return (
231
+ gr.update(visible=hero_vis),
232
+ gr.update(visible=upload_vis),
233
+ gr.update(visible=proc_vis),
234
+ status,
235
+ evidence,
236
+ chat,
237
+ verdict,
238
+ gr.update(visible=export_vis),
239
+ )
240
+
241
+
242
+ # ---------------------------------------------------------------------------
243
+ # Pipeline runner with progressive streaming updates
244
+ # ---------------------------------------------------------------------------
245
+
246
+ # Yield throttle — max ~20 updates/sec to prevent browser lag
247
+ _MIN_YIELD_INTERVAL = 0.05
248
+
249
+
250
+ def run_courtroom(code_input):
251
+ """Run the full pipeline, yielding progressive updates for the UI."""
252
+ chat_history = []
253
+ evidence_html_val = ""
254
+ verdict_text = ""
255
+ status_idx = 0
256
+ last_yield = 0.0
257
+
258
+ # --- Validate input ---
259
+ if code_input is None:
260
+ yield _yield(
261
+ True, True, False,
262
+ "Please upload a .zip file containing the code to investigate.",
263
+ None, [], None, False,
264
+ )
265
+ return
266
+
267
+ # --- Hide hero/upload, show processing ---
268
+ yield _yield(False, False, True, "### Extracting files...", None, [], None, False)
269
+
270
+ # --- Extract zip ---
271
+ tmpdir = tempfile.mkdtemp()
272
+ if not (hasattr(code_input, "name") and code_input.name.endswith(".zip")):
273
+ yield _yield(
274
+ False, False, True,
275
+ "Please upload a .zip file.",
276
+ None, [], None, False,
277
+ )
278
+ return
279
+
280
+ with zipfile.ZipFile(code_input.name, "r") as zf:
281
+ zf.extractall(tmpdir)
282
+
283
+ # Check API key
284
+ if not os.environ.get("ZAI_API_KEY"):
285
+ yield _yield(
286
+ False, False, True,
287
+ "ZAI_API_KEY not set. Configure .env file.",
288
+ None, [], None, False,
289
+ )
290
+ return
291
+
292
+ # ===================================================================
293
+ # Phase 1: Evidence — stream per-pattern progress
294
+ # ===================================================================
295
+ status_msgs = STATUS_MESSAGES["evidence"]
296
+ report = None
297
+
298
+ for update in gather_evidence_streaming(tmpdir):
299
+ if isinstance(update, str):
300
+ # Status update from evidence streaming
301
+ status_idx = (status_idx + 1) % len(status_msgs)
302
+ yield _yield(
303
+ False, False, True,
304
+ f"### Phase 1/4: Forensic Evidence\n{update}\n\n*{status_msgs[status_idx]}*",
305
+ None, [], None, False,
306
+ )
307
+ elif isinstance(update, EvidenceReport):
308
+ report = update
309
+
310
+ if report is None or not report.findings:
311
+ yield _yield(
312
+ False, False, True,
313
+ "### Phase 1/4: Evidence Complete\nNo findings detected. **Case dismissed** — code appears clean.",
314
+ None, [], None, False,
315
+ )
316
+ return
317
+
318
+ evidence_html_val = _evidence_html(report)
319
+ evidence_text = report.to_text()
320
+
321
+ # Add evidence message to chat
322
+ chat_history.append(ChatMessage(
323
+ role="user",
324
+ content=(
325
+ f"**Case Filed**: Code submitted for forensic analysis.\n\n"
326
+ f"**{report.file_count}** files scanned — **{len(report.findings)}** findings detected "
327
+ f"across **{len(report.findings_by_domain)}** domains."
328
+ ),
329
+ metadata={"title": "Court Clerk"},
330
+ ))
331
+
332
+ yield _yield(
333
+ False, False, True,
334
+ f"### Phase 1/4: Evidence Complete\n**{len(report.findings)}** findings detected. Proceeding to investigation...",
335
+ evidence_html_val, chat_history, None, False,
336
+ )
337
+
338
+ # ===================================================================
339
+ # Phase 2: Investigation — stream agent output
340
+ # ===================================================================
341
+ inv_result = StreamResult()
342
+ status_msgs = STATUS_MESSAGES["investigation"]
343
+ status_idx = 0
344
+
345
+ current_task_idx = -1
346
+ inv_labels = ["Security", "Quality", "Architecture"]
347
+
348
+ yield _yield(
349
+ False, False, True,
350
+ f"### Phase 2/4: Investigation\n*{status_msgs[0]}*",
351
+ evidence_html_val, chat_history, None, False,
352
+ )
353
+
354
+ for role, delta, task_idx in phase_investigation_stream(report, inv_result):
355
+ # New speaker? Start a new ChatMessage
356
+ if task_idx != current_task_idx:
357
+ label = inv_labels[task_idx] if task_idx < len(inv_labels) else f"Agent {task_idx}"
358
+ icon = _agent_icon(role)
359
+ chat_history.append(ChatMessage(
360
+ role="assistant",
361
+ content=delta,
362
+ metadata={"title": f"{icon} {label} Investigation"},
363
+ ))
364
+ current_task_idx = task_idx
365
+ else:
366
+ # Append to last message
367
+ chat_history[-1] = ChatMessage(
368
+ role="assistant",
369
+ content=chat_history[-1].content + delta,
370
+ metadata=chat_history[-1].metadata,
371
+ )
372
+
373
+ now = time.time()
374
+ if now - last_yield >= _MIN_YIELD_INTERVAL:
375
+ status_idx = (status_idx + 1) % len(status_msgs)
376
+ yield _yield(
377
+ False, False, True,
378
+ f"### Phase 2/4: Investigation\n*{status_msgs[status_idx]}*",
379
+ evidence_html_val, chat_history, None, False,
380
+ )
381
+ last_yield = now
382
+
383
+ # Final yield for phase 2
384
+ investigation_reports = inv_result.metadata.get("reports", {})
385
+ investigation_text = inv_result.text
386
+
387
+ yield _yield(
388
+ False, False, True,
389
+ "### Phase 2/4: Investigation Complete\n**3 reports** generated. Court is now in session...",
390
+ evidence_html_val, chat_history, None, False,
391
+ )
392
+
393
+ # ===================================================================
394
+ # Phase 3: Trial — stream prosecutor / defense / rebuttal
395
+ # ===================================================================
396
+ trial_result = StreamResult()
397
+ status_msgs = STATUS_MESSAGES["trial"]
398
+ status_idx = 0
399
+ current_task_idx = -1
400
+ last_yield = 0.0
401
+
402
+ yield _yield(
403
+ False, False, True,
404
+ f"### Phase 3/4: The Trial\n*{status_msgs[0]}*",
405
+ evidence_html_val, chat_history, None, False,
406
+ )
407
+
408
+ for role, delta, round_name, task_idx in phase_trial_stream(
409
+ evidence_text, investigation_reports, trial_result
410
+ ):
411
+ if task_idx != current_task_idx:
412
+ icon = _agent_icon(role)
413
+ chat_history.append(ChatMessage(
414
+ role="assistant",
415
+ content=delta,
416
+ metadata={"title": f"{icon} {round_name}"},
417
+ ))
418
+ current_task_idx = task_idx
419
+ else:
420
+ chat_history[-1] = ChatMessage(
421
+ role="assistant",
422
+ content=chat_history[-1].content + delta,
423
+ metadata=chat_history[-1].metadata,
424
+ )
425
+
426
+ now = time.time()
427
+ if now - last_yield >= _MIN_YIELD_INTERVAL:
428
+ status_idx = (status_idx + 1) % len(status_msgs)
429
+ yield _yield(
430
+ False, False, True,
431
+ f"### Phase 3/4: The Trial\n*{status_msgs[status_idx]}*",
432
+ evidence_html_val, chat_history, None, False,
433
+ )
434
+ last_yield = now
435
+
436
+ trial_transcript = trial_result.text
437
+
438
+ yield _yield(
439
+ False, False, True,
440
+ "### Phase 3/4: Trial Complete\nThe Judge is now deliberating...",
441
+ evidence_html_val, chat_history, None, False,
442
+ )
443
+
444
+ # ===================================================================
445
+ # Phase 4: Verdict — stream judge
446
+ # ===================================================================
447
+ verdict_result = StreamResult()
448
+ status_msgs = STATUS_MESSAGES["verdict"]
449
+ status_idx = 0
450
+ verdict_started = False
451
+ last_yield = 0.0
452
+
453
+ for role, delta in phase_verdict_stream(
454
+ evidence_text, investigation_text, trial_transcript, verdict_result
455
+ ):
456
+ if not verdict_started:
457
+ icon = _agent_icon(role)
458
+ chat_history.append(ChatMessage(
459
+ role="assistant",
460
+ content=delta,
461
+ metadata={"title": f"{icon} Verdict"},
462
+ ))
463
+ verdict_started = True
464
+ else:
465
+ chat_history[-1] = ChatMessage(
466
+ role="assistant",
467
+ content=chat_history[-1].content + delta,
468
+ metadata=chat_history[-1].metadata,
469
+ )
470
+
471
+ now = time.time()
472
+ if now - last_yield >= _MIN_YIELD_INTERVAL:
473
+ status_idx = (status_idx + 1) % len(status_msgs)
474
+ verdict_so_far = verdict_result.text or ""
475
+ yield _yield(
476
+ False, False, True,
477
+ f"### Phase 4/4: Verdict\n*{status_msgs[status_idx]}*",
478
+ evidence_html_val, chat_history, None, False,
479
+ )
480
+ last_yield = now
481
+
482
+ verdict_text = verdict_result.text
483
+
484
+ # Final yield — show verdict panel and export buttons
485
+ yield _yield(
486
+ False, False, True,
487
+ "### Trial Complete\nThe verdict has been delivered.",
488
+ evidence_html_val,
489
+ chat_history,
490
+ f"## 🔨 Judge's Verdict\n\n{verdict_text}",
491
+ True,
492
+ )
493
+
494
+
495
+ # ---------------------------------------------------------------------------
496
+ # Export helpers
497
+ # ---------------------------------------------------------------------------
498
+
499
+ def _build_results_dict(
500
+ evidence_html_val, chat_history, verdict_text,
501
+ ) -> dict:
502
+ """Extract results from the state for export."""
503
+ # Collect evidence text from chat messages
504
+ evidence_parts = []
505
+ investigation_parts = []
506
+ trial_parts = []
507
+ verdict_parts = []
508
+ current_section = "evidence"
509
+
510
+ for msg in chat_history:
511
+ title = (msg.metadata or {}).get("title", "")
512
+ content = msg.content or ""
513
+
514
+ if "Investigation" in title:
515
+ current_section = "investigation"
516
+ elif "Prosecution" in title or "Defense" in title or "Rebuttal" in title:
517
+ current_section = "trial"
518
+ elif "Verdict" in title:
519
+ current_section = "verdict"
520
+
521
+ if current_section == "investigation":
522
+ investigation_parts.append(f"### {title}\n{content}")
523
+ elif current_section == "trial":
524
+ trial_parts.append(f"### {title}\n{content}")
525
+ elif current_section == "verdict":
526
+ verdict_parts.append(content)
527
+
528
+ return {
529
+ "evidence_html": evidence_html_val,
530
+ "investigation": "\n\n".join(investigation_parts),
531
+ "transcript": "\n\n".join(trial_parts),
532
+ "verdict": verdict_text or "\n\n".join(verdict_parts),
533
+ "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
534
+ }
535
+
536
+
537
+ def generate_markdown_export(results_state: dict) -> str:
538
+ """Generate a Markdown report and return the filepath."""
539
+ md_lines = [
540
+ "# CodeTribunal — Trial Report\n",
541
+ f"**Generated**: {results_state.get('timestamp', 'N/A')}\n",
542
+ "---\n",
543
+ "## Investigation Reports\n",
544
+ results_state.get("investigation", ""),
545
+ "\n---\n",
546
+ "## Trial Transcript\n",
547
+ results_state.get("transcript", ""),
548
+ "\n---\n",
549
+ "## Verdict\n",
550
+ results_state.get("verdict", ""),
551
+ "\n",
552
+ ]
553
+ content = "\n".join(md_lines)
554
+ filepath = tempfile.mktemp(suffix="_CodeTribunal_Report.md")
555
+ Path(filepath).write_text(content)
556
+ return filepath
557
+
558
+
559
+ def generate_pdf_export(results_state: dict) -> str:
560
+ """Generate a PDF report and return the filepath."""
561
+ from fpdf import FPDF
562
+
563
+ pdf = FPDF()
564
+ pdf.set_auto_page_break(auto=True, margin=15)
565
+ pdf.add_page()
566
+
567
+ # Title
568
+ pdf.set_font("Helvetica", "B", 24)
569
+ pdf.set_text_color(200, 160, 30)
570
+ pdf.cell(0, 15, "CodeTribunal - Trial Report", ln=True, align="C")
571
+ pdf.set_font("Helvetica", "", 10)
572
+ pdf.set_text_color(120, 120, 140)
573
+ pdf.cell(0, 8, f"Generated: {results_state.get('timestamp', 'N/A')}", ln=True, align="C")
574
+ pdf.ln(10)
575
+
576
+ def _add_section(title: str, content: str):
577
+ pdf.set_font("Helvetica", "B", 14)
578
+ pdf.set_text_color(200, 160, 30)
579
+ pdf.cell(0, 10, title, ln=True)
580
+ pdf.set_text_color(50, 50, 60)
581
+ pdf.set_font("Helvetica", "", 10)
582
+ for line in content.split("\n"):
583
+ clean = line.encode("latin-1", "replace").decode("latin-1")
584
+ if clean.strip():
585
+ pdf.multi_cell(0, 5, clean)
586
+ else:
587
+ pdf.ln(3)
588
+ pdf.ln(6)
589
+
590
+ _add_section("Investigation Reports", results_state.get("investigation", ""))
591
+ _add_section("Trial Transcript", results_state.get("transcript", ""))
592
+ _add_section("Verdict", results_state.get("verdict", ""))
593
+
594
+ filepath = tempfile.mktemp(suffix="_CodeTribunal_Report.pdf")
595
+ pdf.output(filepath)
596
+ return filepath
597
+
598
+
599
+ # ---------------------------------------------------------------------------
600
+ # Gradio App
601
+ # ---------------------------------------------------------------------------
602
+
603
+ def create_app() -> gr.Blocks:
604
+ logo_path = Path(__file__).resolve().parent.parent.parent / "assets" / "logo.png"
605
+
606
+ with gr.Blocks(title="CodeTribunal — The AI Courtroom") as app:
607
+
608
+ # --- Hero Section ---
609
+ with gr.Column(visible=True) as hero_section:
610
+ if logo_path.exists():
611
+ gr.Image(
612
+ value=str(logo_path),
613
+ show_label=False,
614
+ height=160,
615
+ container=False,
616
+ elem_classes=["hero-logo"],
617
+ )
618
+ gr.Markdown(
619
+ "# CodeTribunal\n### The AI Courtroom That Exposes Bad Freelance Code",
620
+ elem_classes=["hero-title"],
621
+ )
622
+ gr.Markdown(
623
+ "Upload a .zip of code and watch a multi-agent forensic investigation unfold.\n"
624
+ "Powered by GLM 5 + GritQL + CrewAI.",
625
+ elem_classes=["hero-subtitle"],
626
+ )
627
+
628
+ # --- Upload Section ---
629
+ with gr.Column(visible=True, elem_classes=["upload-area"]) as upload_section:
630
+ code_input = gr.File(
631
+ label="Drop your .zip here or click to upload",
632
+ file_types=[".zip"],
633
+ interactive=True,
634
+ )
635
+
636
+ # --- Processing Section ---
637
+ with gr.Column(visible=False) as processing_section:
638
+ status_md = gr.Markdown(
639
+ "Initializing...",
640
+ elem_classes=["status-phase"],
641
+ )
642
+ evidence_html = gr.HTML(
643
+ value="",
644
+ visible=True,
645
+ )
646
+ chatbot = gr.Chatbot(
647
+ label="Courtroom Transcript",
648
+ height=600,
649
+ elem_classes=["chatbot-panel"],
650
+ )
651
+ verdict_md = gr.Markdown(
652
+ value="",
653
+ visible=True,
654
+ elem_classes=["verdict-box"],
655
+ )
656
+ with gr.Row(visible=False) as export_row:
657
+ export_md_btn = gr.Button(
658
+ "Export as Markdown",
659
+ elem_classes=["export-btn"],
660
+ )
661
+ export_pdf_btn = gr.Button(
662
+ "Export as PDF",
663
+ elem_classes=["export-btn"],
664
+ )
665
+ export_file = gr.File(label="Download Report", visible=False)
666
+
667
+ # Hidden state for export
668
+ results_state = gr.State(value={})
669
+
670
+ # --- Wire events ---
671
+
672
+ # Auto-trigger on file upload
673
+ code_input.upload(
674
+ fn=run_courtroom,
675
+ inputs=[code_input],
676
+ outputs=[
677
+ hero_section, upload_section, processing_section,
678
+ status_md, evidence_html, chatbot, verdict_md, export_row,
679
+ ],
680
+ )
681
+
682
+ # Export callbacks
683
+ def _do_export_md(ev_html, chat, verdict):
684
+ results = _build_results_dict(ev_html, chat, verdict)
685
+ return generate_markdown_export(results)
686
+
687
+ def _do_export_pdf(ev_html, chat, verdict):
688
+ results = _build_results_dict(ev_html, chat, verdict)
689
+ return generate_pdf_export(results)
690
+
691
+ export_md_btn.click(
692
+ fn=_do_export_md,
693
+ inputs=[evidence_html, chatbot, verdict_md],
694
+ outputs=[export_file],
695
+ )
696
+ export_pdf_btn.click(
697
+ fn=_do_export_pdf,
698
+ inputs=[evidence_html, chatbot, verdict_md],
699
+ outputs=[export_file],
700
+ )
701
+
702
+ return app
703
+
704
+
705
+ def main():
706
+ app = create_app()
707
+ app.launch(
708
+ server_name="0.0.0.0",
709
+ server_port=7860,
710
+ css=CUSTOM_CSS,
711
+ theme=gr.themes.Base(
712
+ primary_hue="amber",
713
+ secondary_hue="slate",
714
+ neutral_hue="slate",
715
+ ),
716
+ )
717
+
718
+
719
+ if __name__ == "__main__":
720
+ main()
src/code_tribunal/cli.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """CLI entry point for CodeTribunal."""
2
+
3
+ import json
4
+ import click
5
+ from pathlib import Path
6
+
7
+ from code_tribunal.courtroom import run_trial
8
+
9
+
10
+ @click.command()
11
+ @click.argument("path", type=click.Path(exists=True))
12
+ @click.option("--output", "-o", type=click.Path(), help="Save full report to file (JSON)")
13
+ @click.option("--evidence-only", is_flag=True, help="Only run Phase 1 (GritQL evidence), skip trial")
14
+ def main(path: str, output: str | None, evidence_only: bool) -> None:
15
+ """Put your code on trial. PATH is the directory or zip to investigate."""
16
+
17
+ if evidence_only:
18
+ from code_tribunal.evidence import gather_evidence
19
+ report = gather_evidence(path)
20
+ click.echo(report.to_text())
21
+ if output:
22
+ Path(output).write_text(json.dumps({
23
+ "findings": [str(f) for f in report.findings],
24
+ "stats": {
25
+ "files": report.file_count,
26
+ "total": len(report.findings),
27
+ "by_severity": {s: len(i) for s, i in report.findings_by_severity.items()},
28
+ },
29
+ }, indent=2))
30
+ click.echo(f"\nReport saved to {output}")
31
+ return
32
+
33
+ result = run_trial(path)
34
+
35
+ if output:
36
+ Path(output).write_text(json.dumps(result, indent=2, default=str))
37
+ click.echo(f"\nFull report saved to {output}")
38
+
39
+ # Print verdict
40
+ click.echo("\n" + result.get("verdict", "No verdict generated."))
41
+
42
+
43
+ if __name__ == "__main__":
44
+ main()
src/code_tribunal/courtroom.py ADDED
@@ -0,0 +1,618 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Courtroom pipeline orchestrator — wires evidence → investigation → trial → verdict."""
2
+
3
+ import json
4
+ import time
5
+ from dataclasses import asdict, dataclass, field
6
+
7
+ from crewai import Task, Crew, Process
8
+
9
+ from code_tribunal.agents import (
10
+ security_investigator,
11
+ quality_investigator,
12
+ architecture_investigator,
13
+ prosecutor,
14
+ defense_attorney,
15
+ judge,
16
+ )
17
+ from code_tribunal.evidence import gather_evidence, EvidenceReport
18
+
19
+
20
+ # ---------------------------------------------------------------------------
21
+ # Phase 1: Evidence (deterministic, no LLM)
22
+ # ---------------------------------------------------------------------------
23
+
24
+ def phase_evidence(target_dir: str) -> EvidenceReport:
25
+ """Run GritQL scans and return structured evidence."""
26
+ print("\n[Phase 1] Gathering evidence with GritQL...")
27
+ report = gather_evidence(target_dir)
28
+ print(f" Files scanned: {report.file_count}")
29
+ print(f" Findings: {len(report.findings)}")
30
+ print(f" By severity: " + ", ".join(
31
+ f"{sev}={len(items)}" for sev, items in sorted(report.findings_by_severity.items())
32
+ ))
33
+ return report
34
+
35
+
36
+ # ---------------------------------------------------------------------------
37
+ # Phase 2: Investigation (3 parallel investigators)
38
+ # ---------------------------------------------------------------------------
39
+
40
+ def _domain_evidence_text(report: EvidenceReport, domain: str) -> str:
41
+ """Extract findings for a specific domain as text."""
42
+ findings = report.findings_by_domain.get(domain, [])
43
+ if not findings:
44
+ return f"No {domain} findings detected."
45
+ return "\n".join(str(f) for f in findings)
46
+
47
+
48
+ def phase_investigation(report: EvidenceReport) -> dict[str, str]:
49
+ """Run three specialist investigators in parallel."""
50
+
51
+ print("\n[Phase 2] Investigation — deploying specialist agents...")
52
+
53
+ sec_agent = security_investigator()
54
+ qual_agent = quality_investigator()
55
+ arch_agent = architecture_investigator()
56
+
57
+ full_evidence = report.to_text()
58
+
59
+ sec_task = Task(
60
+ description=(
61
+ "You are investigating a codebase for security vulnerabilities.\n\n"
62
+ "SECURITY EVIDENCE:\n"
63
+ f"{_domain_evidence_text(report, 'security')}\n\n"
64
+ "FULL EVIDENCE REPORT FOR CONTEXT:\n"
65
+ f"{full_evidence}\n\n"
66
+ "Produce a detailed security investigation report. For each finding:\n"
67
+ "- What the vulnerability is\n"
68
+ "- The attack vector (how it could be exploited)\n"
69
+ "- Severity: CRITICAL / HIGH / MEDIUM / LOW\n"
70
+ "- Potential business impact\n"
71
+ "- Recommended fix"
72
+ ),
73
+ agent=sec_agent,
74
+ expected_output="A structured security investigation report with severity-ranked findings and remediation.",
75
+ )
76
+
77
+ qual_task = Task(
78
+ description=(
79
+ "You are investigating a codebase for quality and negligence indicators.\n\n"
80
+ "QUALITY EVIDENCE:\n"
81
+ f"{_domain_evidence_text(report, 'quality')}\n\n"
82
+ "FULL EVIDENCE REPORT FOR CONTEXT:\n"
83
+ f"{full_evidence}\n\n"
84
+ "Produce a quality investigation report. Assess:\n"
85
+ "- Technical debt indicators (TODOs, FIXMEs, HACKs)\n"
86
+ "- Dead code / unused functions\n"
87
+ "- Missing error handling\n"
88
+ "- Signs of rushed or careless development\n"
89
+ "- Whether the code was production-ready when delivered"
90
+ ),
91
+ agent=qual_agent,
92
+ expected_output="A structured quality investigation report identifying negligence indicators and technical debt.",
93
+ )
94
+
95
+ arch_task = Task(
96
+ description=(
97
+ "You are investigating a codebase for architectural problems.\n\n"
98
+ "FULL EVIDENCE REPORT:\n"
99
+ f"{full_evidence}\n\n"
100
+ "Produce an architecture investigation report. Assess:\n"
101
+ "- Hardcoded configuration that should be externalized\n"
102
+ "- Tight coupling and missing abstractions\n"
103
+ "- Whether the architecture supports the intended use case\n"
104
+ "- Scalability concerns\n"
105
+ "- Whether this looks like professional work or amateur delivery"
106
+ ),
107
+ agent=arch_agent,
108
+ expected_output="A structured architecture investigation report assessing structural soundness.",
109
+ )
110
+
111
+ # Run investigators in a single crew — CrewAI will execute tasks sequentially
112
+ # (parallel crews would require separate Crew instances kicked off concurrently)
113
+ investigation_crew = Crew(
114
+ agents=[sec_agent, qual_agent, arch_agent],
115
+ tasks=[sec_task, qual_task, arch_task],
116
+ process=Process.sequential,
117
+ verbose=True,
118
+ )
119
+
120
+ result = investigation_crew.kickoff()
121
+
122
+ # Extract individual task outputs
123
+ reports = {}
124
+ task_outputs = result.tasks_output if hasattr(result, "tasks_output") else []
125
+ labels = ["security", "quality", "architecture"]
126
+ for i, label in enumerate(labels):
127
+ if i < len(task_outputs):
128
+ reports[label] = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
129
+ else:
130
+ reports[label] = ""
131
+
132
+ print(f" Investigation complete: {len(reports)} reports generated.")
133
+ return reports
134
+
135
+
136
+ # ---------------------------------------------------------------------------
137
+ # Phase 3: The Trial (prosecutor vs defense)
138
+ # ---------------------------------------------------------------------------
139
+
140
+ def phase_trial(evidence_text: str, investigation_reports: dict[str, str]) -> str:
141
+ """Run the courtroom debate between prosecutor and defense attorney."""
142
+
143
+ print("\n[Phase 3] The Trial — Prosecutor vs Defense Attorney...")
144
+
145
+ # Separate agent instances for each role (no reuse)
146
+ pros_agent = prosecutor()
147
+ def_agent = defense_attorney()
148
+ pros_rebuttal_agent = prosecutor() # fresh instance for rebuttal
149
+
150
+ investigation_text = "\n\n".join(
151
+ f"=== {k.upper()} INVESTIGATION ===\n{v}"
152
+ for k, v in investigation_reports.items()
153
+ )
154
+
155
+ # Round 1: Prosecutor presents the case
156
+ prosecution_task = Task(
157
+ description=(
158
+ "PRESENT THE PROSECUTION'S CASE\n\n"
159
+ "You are presenting evidence against a freelance developer who delivered this code to a paying client.\n\n"
160
+ "RAW EVIDENCE:\n"
161
+ f"{evidence_text}\n\n"
162
+ "INVESTIGATION REPORTS:\n"
163
+ f"{investigation_text}\n\n"
164
+ "Build your case. Be specific. Cite findings by category, severity, and potential impact. "
165
+ "Argue that this code represents negligence, not mere imperfection."
166
+ ),
167
+ agent=pros_agent,
168
+ expected_output="A compelling prosecution argument citing specific evidence and arguing negligence.",
169
+ )
170
+
171
+ # Round 2: Defense cross-examines — receives prosecution output via context
172
+ defense_task = Task(
173
+ description=(
174
+ "PRESENT THE DEFENSE\n\n"
175
+ "The prosecution has presented their case against this code. "
176
+ "Below is the PROSECUTION'S ARGUMENT — read it carefully, then mount your defense.\n\n"
177
+ "RAW EVIDENCE:\n"
178
+ f"{evidence_text}\n\n"
179
+ "INVESTIGATION REPORTS:\n"
180
+ f"{investigation_text}\n\n"
181
+ "Challenge the prosecution's specific claims. Argue context, proportionality, and intent. "
182
+ "Not every issue is negligence. Some patterns are acceptable in certain contexts. "
183
+ "Be honest but vigorous."
184
+ ),
185
+ agent=def_agent,
186
+ context=[prosecution_task], # Defense sees the prosecution's output
187
+ expected_output="A vigorous defense argument challenging the prosecution's claims with context and proportionality.",
188
+ )
189
+
190
+ # Round 3: Prosecutor rebuttal — receives defense output via context
191
+ rebuttal_task = Task(
192
+ description=(
193
+ "REBUTTAL\n\n"
194
+ "The defense has responded to your case. Below is the DEFENSE'S ARGUMENT. "
195
+ "Now deliver your rebuttal.\n\n"
196
+ "Address their strongest points. Where are they wrong? "
197
+ "Where are they minimizing real harm? "
198
+ "End with a closing argument for the judge."
199
+ ),
200
+ agent=pros_rebuttal_agent,
201
+ context=[prosecution_task, defense_task], # Rebuttal sees both prior arguments
202
+ expected_output="A sharp rebuttal addressing the defense's arguments and closing the prosecution's case.",
203
+ )
204
+
205
+ trial_crew = Crew(
206
+ agents=[pros_agent, def_agent, pros_rebuttal_agent],
207
+ tasks=[prosecution_task, defense_task, rebuttal_task],
208
+ process=Process.sequential,
209
+ verbose=True,
210
+ )
211
+
212
+ result = trial_crew.kickoff()
213
+
214
+ # Collect the full trial transcript
215
+ transcript_parts = []
216
+ task_outputs = result.tasks_output if hasattr(result, "tasks_output") else []
217
+ round_names = ["PROSECUTION", "DEFENSE", "REBUTTAL"]
218
+ for i, name in enumerate(round_names):
219
+ if i < len(task_outputs):
220
+ raw = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
221
+ transcript_parts.append(f"=== {name} ===\n{raw}")
222
+
223
+ transcript = "\n\n".join(transcript_parts)
224
+ print(" Trial complete: 3 rounds of argument.")
225
+ return transcript
226
+
227
+
228
+ # ---------------------------------------------------------------------------
229
+ # Phase 4: The Verdict
230
+ # ---------------------------------------------------------------------------
231
+
232
+ def phase_verdict(evidence_text: str, investigation_text: str, trial_transcript: str) -> str:
233
+ """Judge delivers the final verdict."""
234
+
235
+ print("\n[Phase 4] The Verdict — Judge deliberating...")
236
+
237
+ judge_agent = judge()
238
+
239
+ verdict_task = Task(
240
+ description=(
241
+ "DELIVER YOUR VERDICT\n\n"
242
+ "You have reviewed all evidence, investigation reports, and the full trial transcript.\n\n"
243
+ "RAW EVIDENCE:\n"
244
+ f"{evidence_text}\n\n"
245
+ "INVESTIGATION REPORTS:\n"
246
+ f"{investigation_text}\n\n"
247
+ "TRIAL TRANSCRIPT:\n"
248
+ f"{trial_transcript}\n\n"
249
+ "Deliver a structured verdict:\n\n"
250
+ "## VERDICT\n"
251
+ "Overall: [GUILTY / MIXED / NOT GUILTY]\n"
252
+ "Reputational Risk Score: [0-100]\n\n"
253
+ "## FINDINGS SUMMARY\n"
254
+ "For each finding: severity, impact, remediation\n\n"
255
+ "## SENTENCE\n"
256
+ "Your final assessment and recommendations for the client."
257
+ ),
258
+ agent=judge_agent,
259
+ expected_output=(
260
+ "A structured verdict with overall ruling, reputational risk score (0-100), "
261
+ "findings summary, and final sentence."
262
+ ),
263
+ )
264
+
265
+ verdict_crew = Crew(
266
+ agents=[judge_agent],
267
+ tasks=[verdict_task],
268
+ verbose=True,
269
+ )
270
+
271
+ result = verdict_crew.kickoff()
272
+ verdict = result.raw if hasattr(result, "raw") else str(result)
273
+ print(" Verdict delivered.")
274
+ return verdict
275
+
276
+
277
+ # ---------------------------------------------------------------------------
278
+ # Full Pipeline
279
+ # ---------------------------------------------------------------------------
280
+
281
+ def run_trial(target_dir: str) -> dict:
282
+ """Execute the full CodeTribunal pipeline on a target directory."""
283
+
284
+ print("=" * 60)
285
+ print(" CODETRIBUNAL — THE AI COURTROOM")
286
+ print("=" * 60)
287
+
288
+ # Phase 1: Evidence
289
+ evidence_report = phase_evidence(target_dir)
290
+ evidence_text = evidence_report.to_text()
291
+
292
+ if not evidence_report.findings:
293
+ print("\nNo findings detected. Case dismissed — code appears clean.")
294
+ return {"verdict": "DISMISSED", "reason": "No evidence of issues found."}
295
+
296
+ # Phase 2: Investigation
297
+ investigation_reports = phase_investigation(evidence_report)
298
+ investigation_text = "\n\n".join(
299
+ f"=== {k.upper()} INVESTIGATION ===\n{v}"
300
+ for k, v in investigation_reports.items()
301
+ )
302
+
303
+ # Phase 3: Trial
304
+ trial_transcript = phase_trial(evidence_text, investigation_reports)
305
+
306
+ # Phase 4: Verdict
307
+ verdict = phase_verdict(evidence_text, investigation_text, trial_transcript)
308
+
309
+ print("\n" + "=" * 60)
310
+ print(" TRIAL COMPLETE")
311
+ print("=" * 60)
312
+
313
+ return {
314
+ "evidence": evidence_text,
315
+ "investigation": investigation_text,
316
+ "transcript": trial_transcript,
317
+ "verdict": verdict,
318
+ "stats": {
319
+ "files_scanned": evidence_report.file_count,
320
+ "total_findings": len(evidence_report.findings),
321
+ "by_severity": {
322
+ sev: len(items) for sev, items in evidence_report.findings_by_severity.items()
323
+ },
324
+ },
325
+ }
326
+
327
+
328
+ # ---------------------------------------------------------------------------
329
+ # Streaming variants (for Gradio UI — preserve existing functions for CLI)
330
+ # ---------------------------------------------------------------------------
331
+
332
+ @dataclass
333
+ class StreamResult:
334
+ """Mutable accumulator — populated after streaming completes."""
335
+ text: str = ""
336
+ metadata: dict = field(default_factory=dict)
337
+
338
+
339
+ def _simulate_stream(text: str, role: str, chunk_size: int = 4):
340
+ """Fallback: simulate token-by-token streaming from a complete text."""
341
+ for i in range(0, len(text), chunk_size):
342
+ yield role, text[i : i + chunk_size]
343
+ time.sleep(0.01)
344
+
345
+
346
+ def phase_investigation_stream(report: "EvidenceReport", result: StreamResult):
347
+ """Streaming variant of phase_investigation. Yields (agent_role, delta, task_index)."""
348
+
349
+ sec_agent = security_investigator()
350
+ qual_agent = quality_investigator()
351
+ arch_agent = architecture_investigator()
352
+
353
+ full_evidence = report.to_text()
354
+
355
+ sec_task = Task(
356
+ description=(
357
+ "You are investigating a codebase for security vulnerabilities.\n\n"
358
+ "SECURITY EVIDENCE:\n"
359
+ f"{_domain_evidence_text(report, 'security')}\n\n"
360
+ "FULL EVIDENCE REPORT FOR CONTEXT:\n"
361
+ f"{full_evidence}\n\n"
362
+ "Produce a detailed security investigation report. For each finding:\n"
363
+ "- What the vulnerability is\n"
364
+ "- The attack vector (how it could be exploited)\n"
365
+ "- Severity: CRITICAL / HIGH / MEDIUM / LOW\n"
366
+ "- Potential business impact\n"
367
+ "- Recommended fix"
368
+ ),
369
+ agent=sec_agent,
370
+ expected_output="A structured security investigation report with severity-ranked findings and remediation.",
371
+ )
372
+
373
+ qual_task = Task(
374
+ description=(
375
+ "You are investigating a codebase for quality and negligence indicators.\n\n"
376
+ "QUALITY EVIDENCE:\n"
377
+ f"{_domain_evidence_text(report, 'quality')}\n\n"
378
+ "FULL EVIDENCE REPORT FOR CONTEXT:\n"
379
+ f"{full_evidence}\n\n"
380
+ "Produce a quality investigation report. Assess:\n"
381
+ "- Technical debt indicators (TODOs, FIXMEs, HACKs)\n"
382
+ "- Dead code / unused functions\n"
383
+ "- Missing error handling\n"
384
+ "- Signs of rushed or careless development\n"
385
+ "- Whether the code was production-ready when delivered"
386
+ ),
387
+ agent=qual_agent,
388
+ expected_output="A structured quality investigation report identifying negligence indicators and technical debt.",
389
+ )
390
+
391
+ arch_task = Task(
392
+ description=(
393
+ "You are investigating a codebase for architectural problems.\n\n"
394
+ "FULL EVIDENCE REPORT:\n"
395
+ f"{full_evidence}\n\n"
396
+ "Produce an architecture investigation report. Assess:\n"
397
+ "- Hardcoded configuration that should be externalized\n"
398
+ "- Tight coupling and missing abstractions\n"
399
+ "- Whether the architecture supports the intended use case\n"
400
+ "- Scalability concerns\n"
401
+ "- Whether this looks like professional work or amateur delivery"
402
+ ),
403
+ agent=arch_agent,
404
+ expected_output="A structured architecture investigation report assessing structural soundness.",
405
+ )
406
+
407
+ try:
408
+ investigation_crew = Crew(
409
+ agents=[sec_agent, qual_agent, arch_agent],
410
+ tasks=[sec_task, qual_task, arch_task],
411
+ process=Process.sequential,
412
+ verbose=True,
413
+ stream=True,
414
+ )
415
+
416
+ accumulated = {"security": "", "quality": "", "architecture": ""}
417
+ labels = ["security", "quality", "architecture"]
418
+
419
+ streaming_output = investigation_crew.kickoff()
420
+ for chunk in streaming_output:
421
+ delta = chunk.content or ""
422
+ task_idx = chunk.task_index or 0
423
+ role = chunk.agent_role or "Investigator"
424
+ if task_idx < len(labels):
425
+ accumulated[labels[task_idx]] += delta
426
+ yield (role, delta, task_idx)
427
+
428
+ # Extract final task outputs
429
+ crew_result = streaming_output.result
430
+ task_outputs = crew_result.tasks_output if hasattr(crew_result, "tasks_output") else []
431
+ for i, label in enumerate(labels):
432
+ if i < len(task_outputs):
433
+ raw = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
434
+ accumulated[label] = raw
435
+
436
+ result.text = "\n\n".join(
437
+ f"=== {k.upper()} INVESTIGATION ===\n{v}"
438
+ for k, v in accumulated.items()
439
+ )
440
+ result.metadata["reports"] = accumulated
441
+
442
+ except Exception:
443
+ # Fallback: run blocking and simulate streaming
444
+ reports = phase_investigation(report)
445
+ investigation_text = "\n\n".join(
446
+ f"=== {k.upper()} INVESTIGATION ===\n{v}"
447
+ for k, v in reports.items()
448
+ )
449
+ roles = ["Security Forensic Investigator", "Code Quality Forensic Investigator", "Architecture Forensic Investigator"]
450
+ for i, (domain, text) in enumerate(reports.items()):
451
+ for role, delta in _simulate_stream(text, roles[i]):
452
+ yield (role, delta, i)
453
+ result.text = investigation_text
454
+ result.metadata["reports"] = reports
455
+
456
+
457
+ def phase_trial_stream(evidence_text: str, investigation_reports: dict, result: StreamResult):
458
+ """Streaming variant of phase_trial. Yields (agent_role, delta, round_name, task_index)."""
459
+
460
+ pros_agent = prosecutor()
461
+ def_agent = defense_attorney()
462
+ pros_rebuttal_agent = prosecutor()
463
+
464
+ investigation_text = "\n\n".join(
465
+ f"=== {k.upper()} INVESTIGATION ===\n{v}"
466
+ for k, v in investigation_reports.items()
467
+ )
468
+
469
+ prosecution_task = Task(
470
+ description=(
471
+ "PRESENT THE PROSECUTION'S CASE\n\n"
472
+ "You are presenting evidence against a freelance developer who delivered this code to a paying client.\n\n"
473
+ "RAW EVIDENCE:\n"
474
+ f"{evidence_text}\n\n"
475
+ "INVESTIGATION REPORTS:\n"
476
+ f"{investigation_text}\n\n"
477
+ "Build your case. Be specific. Cite findings by category, severity, and potential impact. "
478
+ "Argue that this code represents negligence, not mere imperfection."
479
+ ),
480
+ agent=pros_agent,
481
+ expected_output="A compelling prosecution argument citing specific evidence and arguing negligence.",
482
+ )
483
+
484
+ defense_task = Task(
485
+ description=(
486
+ "PRESENT THE DEFENSE\n\n"
487
+ "The prosecution has presented their case against this code. "
488
+ "Below is the PROSECUTION'S ARGUMENT — read it carefully, then mount your defense.\n\n"
489
+ "RAW EVIDENCE:\n"
490
+ f"{evidence_text}\n\n"
491
+ "INVESTIGATION REPORTS:\n"
492
+ f"{investigation_text}\n\n"
493
+ "Challenge the prosecution's specific claims. Argue context, proportionality, and intent. "
494
+ "Not every issue is negligence. Some patterns are acceptable in certain contexts. "
495
+ "Be honest but vigorous."
496
+ ),
497
+ agent=def_agent,
498
+ context=[prosecution_task],
499
+ expected_output="A vigorous defense argument challenging the prosecution's claims with context and proportionality.",
500
+ )
501
+
502
+ rebuttal_task = Task(
503
+ description=(
504
+ "REBUTTAL\n\n"
505
+ "The defense has responded to your case. Below is the DEFENSE'S ARGUMENT. "
506
+ "Now deliver your rebuttal.\n\n"
507
+ "Address their strongest points. Where are they wrong? "
508
+ "Where are they minimizing real harm? "
509
+ "End with a closing argument for the judge."
510
+ ),
511
+ agent=pros_rebuttal_agent,
512
+ context=[prosecution_task, defense_task],
513
+ expected_output="A sharp rebuttal addressing the defense's arguments and closing the prosecution's case.",
514
+ )
515
+
516
+ round_names = ["Prosecution", "Defense", "Rebuttal"]
517
+
518
+ try:
519
+ trial_crew = Crew(
520
+ agents=[pros_agent, def_agent, pros_rebuttal_agent],
521
+ tasks=[prosecution_task, defense_task, rebuttal_task],
522
+ process=Process.sequential,
523
+ verbose=True,
524
+ stream=True,
525
+ )
526
+
527
+ accumulated_rounds = ["", "", ""]
528
+
529
+ streaming_output = trial_crew.kickoff()
530
+ for chunk in streaming_output:
531
+ delta = chunk.content or ""
532
+ task_idx = chunk.task_index or 0
533
+ role = chunk.agent_role or "Unknown"
534
+ round_name = round_names[task_idx] if task_idx < len(round_names) else f"Round {task_idx}"
535
+ accumulated_rounds[task_idx] += delta
536
+ yield (role, delta, round_name, task_idx)
537
+
538
+ crew_result = streaming_output.result
539
+ task_outputs = crew_result.tasks_output if hasattr(crew_result, "tasks_output") else []
540
+ for i in range(min(len(round_names), len(task_outputs))):
541
+ raw = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
542
+ accumulated_rounds[i] = raw
543
+
544
+ transcript = "\n\n".join(
545
+ f"=== {name} ===\n{text}"
546
+ for name, text in zip(round_names, accumulated_rounds)
547
+ )
548
+ result.text = transcript
549
+
550
+ except Exception:
551
+ # Fallback
552
+ transcript = phase_trial(evidence_text, investigation_reports)
553
+ roles = ["The Prosecutor", "The Defense Attorney", "The Prosecutor"]
554
+ for section in transcript.split("\n\n"):
555
+ for round_name in round_names:
556
+ if section.startswith(f"=== {round_name}"):
557
+ content = section.replace(f"=== {round_name} ===", "").strip()
558
+ task_idx = round_names.index(round_name)
559
+ for role, delta in _simulate_stream(content, roles[task_idx]):
560
+ yield (role, delta, round_name, task_idx)
561
+ break
562
+ result.text = transcript
563
+
564
+
565
+ def phase_verdict_stream(evidence_text: str, investigation_text: str, trial_transcript: str, result: StreamResult):
566
+ """Streaming variant of phase_verdict. Yields (agent_role, delta)."""
567
+
568
+ judge_agent = judge()
569
+
570
+ verdict_task = Task(
571
+ description=(
572
+ "DELIVER YOUR VERDICT\n\n"
573
+ "You have reviewed all evidence, investigation reports, and the full trial transcript.\n\n"
574
+ "RAW EVIDENCE:\n"
575
+ f"{evidence_text}\n\n"
576
+ "INVESTIGATION REPORTS:\n"
577
+ f"{investigation_text}\n\n"
578
+ "TRIAL TRANSCRIPT:\n"
579
+ f"{trial_transcript}\n\n"
580
+ "Deliver a structured verdict:\n\n"
581
+ "## VERDICT\n"
582
+ "Overall: [GUILTY / MIXED / NOT GUILTY]\n"
583
+ "Reputational Risk Score: [0-100]\n\n"
584
+ "## FINDINGS SUMMARY\n"
585
+ "For each finding: severity, impact, remediation\n\n"
586
+ "## SENTENCE\n"
587
+ "Your final assessment and recommendations for the client."
588
+ ),
589
+ agent=judge_agent,
590
+ expected_output=(
591
+ "A structured verdict with overall ruling, reputational risk score (0-100), "
592
+ "findings summary, and final sentence."
593
+ ),
594
+ )
595
+
596
+ try:
597
+ verdict_crew = Crew(
598
+ agents=[judge_agent],
599
+ tasks=[verdict_task],
600
+ verbose=True,
601
+ stream=True,
602
+ )
603
+
604
+ streaming_output = verdict_crew.kickoff()
605
+ for chunk in streaming_output:
606
+ delta = chunk.content or ""
607
+ role = chunk.agent_role or "The Judge"
608
+ yield (role, delta)
609
+
610
+ crew_result = streaming_output.result
611
+ result.text = crew_result.raw if hasattr(crew_result, "raw") else str(crew_result)
612
+
613
+ except Exception:
614
+ # Fallback
615
+ verdict = phase_verdict(evidence_text, investigation_text, trial_transcript)
616
+ for role, delta in _simulate_stream(verdict, "The Judge"):
617
+ yield (role, delta)
618
+ result.text = verdict
src/code_tribunal/evidence.py ADDED
@@ -0,0 +1,337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Evidence gathering layer using GritQL for deterministic code analysis."""
2
+
3
+ import os
4
+ import subprocess
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+
8
+ from dotenv import load_dotenv
9
+
10
+ load_dotenv(Path(__file__).resolve().parent.parent.parent / ".env")
11
+
12
+ GRITQL_PATTERNS = [
13
+ # --- Hardcoded secrets (specific var names that reliably match) ---
14
+ {
15
+ "category": "secret_password",
16
+ "pattern": 'or { `DB_PASSWORD = $_`, `PASSWORD = $_`, `$PASS = $_` where { $PASS <: r"(?i).*password" } }',
17
+ "language": "python",
18
+ "severity_hint": "CRITICAL",
19
+ "domain": "security",
20
+ },
21
+ {
22
+ "category": "secret_api_key",
23
+ "pattern": 'or { `API_KEY = $_`, `SECRET_KEY = $_`, `STRIPE_KEY = $_` }',
24
+ "language": "python",
25
+ "severity_hint": "CRITICAL",
26
+ "domain": "security",
27
+ },
28
+ {
29
+ "category": "secret_aws",
30
+ "pattern": '`AWS_SECRET = $_`',
31
+ "language": "python",
32
+ "severity_hint": "CRITICAL",
33
+ "domain": "security",
34
+ },
35
+ {
36
+ "category": "secret_js",
37
+ "pattern": 'or { `STRIPE_KEY = $_`, `JWT_SECRET = $_` }',
38
+ "language": None,
39
+ "severity_hint": "CRITICAL",
40
+ "domain": "security",
41
+ },
42
+ {
43
+ "category": "connection_string",
44
+ "pattern": '`self.connection_string = "$CONN"` where { $CONN <: r"mysql://.+" }',
45
+ "language": "python",
46
+ "severity_hint": "CRITICAL",
47
+ "domain": "security",
48
+ },
49
+ # --- TODO / FIXME / HACK ---
50
+ {
51
+ "category": "todo_py",
52
+ "pattern": "`# TODO: $_`",
53
+ "language": "python",
54
+ "severity_hint": "LOW",
55
+ "domain": "quality",
56
+ },
57
+ {
58
+ "category": "todo_js",
59
+ "pattern": "`// TODO: $_`",
60
+ "language": None,
61
+ "severity_hint": "LOW",
62
+ "domain": "quality",
63
+ },
64
+ {
65
+ "category": "fixme_py",
66
+ "pattern": "`# FIXME: $_`",
67
+ "language": "python",
68
+ "severity_hint": "MEDIUM",
69
+ "domain": "quality",
70
+ },
71
+ {
72
+ "category": "fixme_js",
73
+ "pattern": "`// FIXME: $_`",
74
+ "language": None,
75
+ "severity_hint": "MEDIUM",
76
+ "domain": "quality",
77
+ },
78
+ {
79
+ "category": "hack_py",
80
+ "pattern": "`# HACK: $_`",
81
+ "language": "python",
82
+ "severity_hint": "MEDIUM",
83
+ "domain": "quality",
84
+ },
85
+ {
86
+ "category": "hack_js",
87
+ "pattern": "`// HACK: $_`",
88
+ "language": None,
89
+ "severity_hint": "MEDIUM",
90
+ "domain": "quality",
91
+ },
92
+ # --- Dangerous functions ---
93
+ {
94
+ "category": "eval_usage",
95
+ "pattern": "`eval($_)`",
96
+ "language": "python",
97
+ "severity_hint": "CRITICAL",
98
+ "domain": "security",
99
+ },
100
+ {
101
+ "category": "pickle_load",
102
+ "pattern": "`pickle.load($_)`",
103
+ "language": "python",
104
+ "severity_hint": "CRITICAL",
105
+ "domain": "security",
106
+ },
107
+ {
108
+ "category": "os_system",
109
+ "pattern": "`os.system($_)`",
110
+ "language": "python",
111
+ "severity_hint": "CRITICAL",
112
+ "domain": "security",
113
+ },
114
+ {
115
+ "category": "subprocess_shell",
116
+ "pattern": "`subprocess.call($_, shell=True)`",
117
+ "language": "python",
118
+ "severity_hint": "CRITICAL",
119
+ "domain": "security",
120
+ },
121
+ {
122
+ "category": "md5_hash",
123
+ "pattern": "`hashlib.md5($_)`",
124
+ "language": "python",
125
+ "severity_hint": "HIGH",
126
+ "domain": "security",
127
+ },
128
+ # --- SQL injection ---
129
+ {
130
+ "category": "sql_injection_fstring",
131
+ "pattern": r'`$S` where { $S <: r"f\"SELECT.*\{.*\}\"" }',
132
+ "language": "python",
133
+ "severity_hint": "CRITICAL",
134
+ "domain": "security",
135
+ },
136
+ {
137
+ "category": "sql_injection_js",
138
+ "pattern": r'`$STR` where { $STR <: r"`SELECT.*\$\{.*\}`" }',
139
+ "language": None,
140
+ "severity_hint": "CRITICAL",
141
+ "domain": "security",
142
+ },
143
+ ]
144
+
145
+
146
+ @dataclass
147
+ class Finding:
148
+ """A single finding from the evidence layer."""
149
+
150
+ category: str
151
+ file: str
152
+ line: str
153
+ code: str
154
+ severity_hint: str
155
+ domain: str
156
+
157
+ def __str__(self) -> str:
158
+ return f"[{self.severity_hint}] {self.file}:{self.line.strip()} — {self.code.strip()}"
159
+
160
+
161
+ @dataclass
162
+ class EvidenceReport:
163
+ """Aggregated evidence from all GritQL scans."""
164
+
165
+ target_path: str
166
+ findings: list[Finding] = field(default_factory=list)
167
+ file_count: int = 0
168
+ total_patterns: int = 0
169
+ patterns_with_hits: int = 0
170
+
171
+ @property
172
+ def findings_by_domain(self) -> dict[str, list[Finding]]:
173
+ grouped: dict[str, list[Finding]] = {}
174
+ for f in self.findings:
175
+ grouped.setdefault(f.domain, []).append(f)
176
+ return grouped
177
+
178
+ @property
179
+ def findings_by_severity(self) -> dict[str, list[Finding]]:
180
+ grouped: dict[str, list[Finding]] = {}
181
+ for f in self.findings:
182
+ grouped.setdefault(f.severity_hint, []).append(f)
183
+ return grouped
184
+
185
+ def to_text(self) -> str:
186
+ """Format the full report as text for agent context."""
187
+ lines = [f"=== FORENSIC EVIDENCE REPORT ==="]
188
+ lines.append(f"Target: {self.target_path}")
189
+ lines.append(f"Files scanned: {self.file_count}")
190
+ lines.append(f"Total findings: {len(self.findings)}")
191
+ lines.append("")
192
+
193
+ for domain, findings in self.findings_by_domain.items():
194
+ lines.append(f"--- {domain.upper()} EVIDENCE ({len(findings)} findings) ---")
195
+ for f in findings:
196
+ lines.append(str(f))
197
+ lines.append("")
198
+
199
+ return "\n".join(lines)
200
+
201
+
202
+ def _parse_gritql_output(raw: str) -> list[tuple[str, str, str]]:
203
+ """Parse grit CLI output into (file, line_number, code_snippet) tuples."""
204
+ results = []
205
+ current_file = None
206
+ for line in raw.splitlines():
207
+ stripped = line.rstrip()
208
+ if not stripped:
209
+ continue
210
+ # Skip summary lines like "Processed X files and found Y matches"
211
+ if stripped.startswith("Processed") and "files" in stripped:
212
+ continue
213
+ # File paths: no leading whitespace, contain a dot or slash
214
+ if stripped and not stripped[0].isspace() and ("." in stripped or "/" in stripped):
215
+ current_file = stripped
216
+ elif current_file and stripped and stripped[0].isspace():
217
+ # Indented line = finding: " 80 return eval(expression)"
218
+ content = stripped.strip()
219
+ if content and content[0].isdigit():
220
+ parts = content.split(None, 1)
221
+ if parts:
222
+ line_num = parts[0]
223
+ code = parts[1] if len(parts) > 1 else ""
224
+ results.append((current_file, line_num, code))
225
+ return results
226
+
227
+
228
+ def run_gritql_scan(pattern_def: dict, target_dir: str) -> list[Finding]:
229
+ """Run a single GritQL pattern and return structured findings."""
230
+ # --dry-run ensures no files are modified; --language overrides auto-detection
231
+ cmd = ["grit", "apply", "--dry-run", pattern_def["pattern"], target_dir]
232
+ if pattern_def.get("language"):
233
+ cmd += ["--language", pattern_def["language"]]
234
+
235
+ try:
236
+ result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
237
+ except FileNotFoundError:
238
+ raise RuntimeError("'grit' CLI not found. Install with: npm install -g @getgrit/cli")
239
+ except subprocess.TimeoutExpired:
240
+ return []
241
+
242
+ output = result.stdout.strip()
243
+ if not output:
244
+ return []
245
+
246
+ # "found 0 matches" is in stdout — bail if no actual matches
247
+ if "found 0 matches" in output:
248
+ return []
249
+
250
+ matches = _parse_gritql_output(output)
251
+ findings = []
252
+ for file_path, line_num, code in matches:
253
+ findings.append(
254
+ Finding(
255
+ category=pattern_def["category"],
256
+ file=file_path,
257
+ line=line_num,
258
+ code=code,
259
+ severity_hint=pattern_def["severity_hint"],
260
+ domain=pattern_def["domain"],
261
+ )
262
+ )
263
+ return findings
264
+
265
+
266
+ def _ensure_grit_initialized(target_dir: str) -> None:
267
+ """Run 'grit init' if no .grit directory exists, to enable standard library patterns."""
268
+ grit_dir = Path(target_dir) / ".grit"
269
+ if not grit_dir.exists():
270
+ try:
271
+ subprocess.run(
272
+ ["grit", "init"],
273
+ cwd=target_dir,
274
+ capture_output=True,
275
+ timeout=15,
276
+ )
277
+ except Exception:
278
+ pass # Non-critical; some patterns may still work without init
279
+
280
+
281
+ def gather_evidence(target_dir: str) -> EvidenceReport:
282
+ """Run all GritQL patterns and return a structured evidence report."""
283
+ _ensure_grit_initialized(target_dir)
284
+
285
+ # Count source files
286
+ file_count = 0
287
+ for ext in (".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".rb", ".php", ".c", ".cpp"):
288
+ file_count += sum(1 for _ in Path(target_dir).rglob(f"*{ext}"))
289
+
290
+ all_findings: list[Finding] = []
291
+ patterns_with_hits = 0
292
+
293
+ for p in GRITQL_PATTERNS:
294
+ findings = run_gritql_scan(p, target_dir)
295
+ if findings:
296
+ patterns_with_hits += 1
297
+ all_findings.extend(findings)
298
+
299
+ return EvidenceReport(
300
+ target_path=target_dir,
301
+ findings=all_findings,
302
+ file_count=file_count,
303
+ total_patterns=len(GRITQL_PATTERNS),
304
+ patterns_with_hits=patterns_with_hits,
305
+ )
306
+
307
+
308
+ def gather_evidence_streaming(target_dir: str):
309
+ """Run GritQL patterns one by one, yielding status after each pattern.
310
+
311
+ Yields status strings during scanning, then yields the final EvidenceReport.
312
+ """
313
+ _ensure_grit_initialized(target_dir)
314
+
315
+ # Count source files
316
+ file_count = 0
317
+ for ext in (".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".rb", ".php", ".c", ".cpp"):
318
+ file_count += sum(1 for _ in Path(target_dir).rglob(f"*{ext}"))
319
+
320
+ all_findings: list[Finding] = []
321
+ patterns_with_hits = 0
322
+
323
+ total = len(GRITQL_PATTERNS)
324
+ for i, p in enumerate(GRITQL_PATTERNS):
325
+ yield f"Scanning pattern {i + 1}/{total}: **{p['category']}**..."
326
+ findings = run_gritql_scan(p, target_dir)
327
+ if findings:
328
+ patterns_with_hits += 1
329
+ all_findings.extend(findings)
330
+
331
+ yield EvidenceReport(
332
+ target_path=target_dir,
333
+ findings=all_findings,
334
+ file_count=file_count,
335
+ total_patterns=total,
336
+ patterns_with_hits=patterns_with_hits,
337
+ )
tests/fixtures/bad_code.zip ADDED
Binary file (2.04 kB). View file
 
tests/fixtures/locale/app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Bad freelance code - deliberately terrible for testing
2
+ import os
3
+ import sys
4
+ import json
5
+ import pickle
6
+ import hashlib
7
+ import subprocess
8
+
9
+ # Hardcoded credentials
10
+ DB_PASSWORD = "super_secret_123"
11
+ API_KEY = "sk-abc123def456ghi789"
12
+ AWS_SECRET = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
13
+
14
+ # TODO: fix this later
15
+ # HACK: temporary workaround, will remove before production
16
+ # FIXME: this entire function is broken
17
+
18
+
19
+ def get_user(user_id):
20
+ # SQL injection vulnerability
21
+ query = f"SELECT * FROM users WHERE id = {user_id}"
22
+ return query
23
+
24
+
25
+ def execute_command(user_input):
26
+ # Command injection
27
+ os.system("ping " + user_input)
28
+ subprocess.call(user_input, shell=True)
29
+
30
+
31
+ def load_data(filename):
32
+ # Insecure deserialization
33
+ with open(filename, "rb") as f:
34
+ data = pickle.load(f)
35
+ return data
36
+
37
+
38
+ def hash_password(password):
39
+ # Weak hashing
40
+ return hashlib.md5(password.encode()).hexdigest()
41
+
42
+
43
+ def process_payment(card_number, cvv, amount):
44
+ # Logging sensitive data
45
+ print(f"Processing payment: card={card_number}, cvv={cvv}")
46
+ # No encryption, no validation
47
+ return True
48
+
49
+
50
+ class DatabaseConnection:
51
+ def __init__(self):
52
+ # Connection string with hardcoded credentials
53
+ self.connection_string = "mysql://admin:password123@localhost:3306/prod"
54
+ self.connected = False
55
+
56
+ def connect(self):
57
+ # No error handling
58
+ pass
59
+
60
+ def query(self, sql):
61
+ # Another SQL injection point
62
+ cursor = self.connection_string
63
+ return cursor
64
+
65
+
66
+ def unused_function_one():
67
+ pass
68
+
69
+
70
+ def unused_function_two():
71
+ pass
72
+
73
+
74
+ def unused_function_three():
75
+ pass
76
+
77
+
78
+ # eval on user input
79
+ def calculate(expression):
80
+ return eval(expression)
tests/fixtures/locale/utils.js ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // More bad freelance code
2
+
3
+ const express = require('express');
4
+ const app = express();
5
+
6
+ // Hardcoded secrets
7
+ const STRIPE_KEY = "sk_live_51HxxxxxXXXXXX";
8
+ const JWT_SECRET = "my-super-secret-jwt-key-12345";
9
+
10
+ // TODO: add authentication middleware
11
+ // FIXME: this is not secure at all
12
+
13
+ app.get('/api/users/:id', (req, res) => {
14
+ // SQL injection via string concatenation
15
+ const query = `SELECT * FROM users WHERE id = ${req.params.id}`;
16
+ db.query(query);
17
+ });
18
+
19
+ app.post('/api/login', (req, res) => {
20
+ // No password hashing comparison - plain text
21
+ const user = users.find(u => u.password === req.body.password);
22
+ if (user) {
23
+ // Exposing sensitive data in response
24
+ res.json({ user: user, token: generateToken(user) });
25
+ }
26
+ });
27
+
28
+ app.listen(3000, () => {
29
+ console.log("Server running on port 3000");
30
+ console.log("API_KEY:", process.env.API_KEY); // logging secrets
31
+ });
32
+
33
+ function generateToken(user) {
34
+ // Weak token generation
35
+ return Buffer.from(JSON.stringify(user)).toString('base64');
36
+ }
37
+
38
+ // Dead code - never called
39
+ function legacyHandler(req, res) {
40
+ console.log("This function is never used");
41
+ }
42
+
43
+ function oldMiddleware(req, res, next) {
44
+ console.log("Deprecated middleware");
45
+ next();
46
+ }
tests/test_integration.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Test integration: GritQL evidence → CrewAI agent analysis."""
2
+
3
+ import os
4
+ import subprocess
5
+ from pathlib import Path
6
+
7
+ from dotenv import load_dotenv
8
+ from crewai import Agent, Task, Crew, LLM
9
+
10
+ # Load .env from project root
11
+ load_dotenv(Path(__file__).resolve().parent.parent / ".env")
12
+
13
+ # --- Configuration ---
14
+ LOCALE_DIR = os.path.join(os.path.dirname(__file__), "fixtures", "locale")
15
+
16
+ # Patterns verified against test fixtures.
17
+ # JS patterns use // comments, Python patterns use # comments.
18
+ # Some patterns target Python specifically via --language flag.
19
+ GRITQL_PATTERNS = [
20
+ # --- Cross-language: hardcoded secrets ---
21
+ {
22
+ "category": "hardcoded_secrets_js",
23
+ "pattern": '`$VAR = "$VAL"` where { $VAR <: r"(?i).*(password|key|secret|token).*" }',
24
+ "language": None, # auto-detect (JS works natively)
25
+ },
26
+ {
27
+ "category": "hardcoded_secrets_py",
28
+ "pattern": '`$VAR = $VAL` where { $VAR <: r"(?i).*(PASSWORD|KEY|SECRET|TOKEN).*" }',
29
+ "language": "python",
30
+ },
31
+ # --- Connection strings ---
32
+ {
33
+ "category": "connection_strings",
34
+ "pattern": '`"$CONN"` where { $CONN <: r"mysql://.+" }',
35
+ "language": None,
36
+ },
37
+ # --- TODO / FIXME / HACK comments ---
38
+ {
39
+ "category": "todo_py",
40
+ "pattern": "`# TODO: $_`",
41
+ "language": "python",
42
+ },
43
+ {
44
+ "category": "todo_js",
45
+ "pattern": "`// TODO: $_`",
46
+ "language": None,
47
+ },
48
+ {
49
+ "category": "fixme_py",
50
+ "pattern": "`# FIXME: $_`",
51
+ "language": "python",
52
+ },
53
+ {
54
+ "category": "fixme_js",
55
+ "pattern": "`// FIXME: $_`",
56
+ "language": None,
57
+ },
58
+ {
59
+ "category": "hack_py",
60
+ "pattern": "`# HACK: $_`",
61
+ "language": "python",
62
+ },
63
+ {
64
+ "category": "hack_js",
65
+ "pattern": "`// HACK: $_`",
66
+ "language": None,
67
+ },
68
+ # --- Dangerous function calls ---
69
+ {
70
+ "category": "eval_usage",
71
+ "pattern": "`eval($_)`",
72
+ "language": "python",
73
+ },
74
+ {
75
+ "category": "pickle_load",
76
+ "pattern": "`pickle.load($_)`",
77
+ "language": "python",
78
+ },
79
+ {
80
+ "category": "os_system",
81
+ "pattern": "`os.system($_)`",
82
+ "language": "python",
83
+ },
84
+ {
85
+ "category": "subprocess_shell",
86
+ "pattern": "`subprocess.call($_, shell=True)`",
87
+ "language": "python",
88
+ },
89
+ {
90
+ "category": "md5_hash",
91
+ "pattern": "`hashlib.md5($_)`",
92
+ "language": "python",
93
+ },
94
+ # --- SQL injection ---
95
+ {
96
+ "category": "sql_injection_fstring",
97
+ "pattern": r'`$S` where { $S <: r"f\"SELECT.*\{.*\}\"" }',
98
+ "language": "python",
99
+ },
100
+ {
101
+ "category": "sql_injection_js",
102
+ "pattern": r'`$STR` where { $STR <: r"`SELECT.*\$\{.*\}`" }',
103
+ "language": None,
104
+ },
105
+ ]
106
+
107
+
108
+ def run_gritql(pattern: str, target_dir: str, language: str | None = None) -> dict:
109
+ """Run a single GritQL pattern and return structured results."""
110
+ cmd = ["grit", "apply", pattern, target_dir]
111
+ if language:
112
+ cmd += ["--language", language]
113
+
114
+ try:
115
+ result = subprocess.run(
116
+ cmd,
117
+ capture_output=True,
118
+ text=True,
119
+ timeout=30,
120
+ )
121
+ output = result.stdout.strip()
122
+ errors = result.stderr.strip()
123
+ # Grit prints "Processed X files and found Y matches" to stderr
124
+ match_line = [l for l in errors.splitlines() if "found" in l]
125
+ return {
126
+ "pattern": pattern,
127
+ "findings": output or None,
128
+ "summary": match_line[0] if match_line else None,
129
+ "returncode": result.returncode,
130
+ }
131
+ except FileNotFoundError:
132
+ return {"pattern": pattern, "findings": None, "error": "'grit' CLI not found. Run: npm install -g @getgrit/cli"}
133
+ except Exception as e:
134
+ return {"pattern": pattern, "findings": None, "error": str(e)}
135
+
136
+
137
+ def gather_evidence(target_dir: str) -> list[dict]:
138
+ """Run all GritQL patterns against the target directory."""
139
+ evidence = []
140
+ for p in GRITQL_PATTERNS:
141
+ print(f" Scanning: {p['category']}...")
142
+ result = run_gritql(p["pattern"], target_dir, p.get("language"))
143
+ result["category"] = p["category"]
144
+ evidence.append(result)
145
+ return evidence
146
+
147
+
148
+ def format_evidence_for_agent(evidence: list[dict]) -> str:
149
+ """Format evidence into a readable report for the LLM agent."""
150
+ lines = ["=== FORENSIC EVIDENCE REPORT ===\n"]
151
+ hits = 0
152
+ for item in evidence:
153
+ if item.get("findings"):
154
+ hits += 1
155
+ lines.append(f"--- {item['category'].upper()} ---")
156
+ lines.append(f"Pattern: {item['pattern']}")
157
+ lines.append(f"Findings:\n{item['findings']}")
158
+ lines.append("")
159
+ lines.insert(1, f"Total categories with findings: {hits} / {len(evidence)}\n")
160
+ return "\n".join(lines)
161
+
162
+
163
+ def run_crewai_analysis(evidence_report: str) -> str:
164
+ """Pass evidence to a CrewAI agent for analysis."""
165
+
166
+ llm = LLM(
167
+ model=os.environ.get("MODEL_NAME", "zai/glm-5.1"),
168
+ api_key=os.environ.get("ZAI_API_KEY"),
169
+ )
170
+
171
+ investigator = Agent(
172
+ role="Senior Code Forensic Investigator",
173
+ goal="Analyze code evidence and identify critical security vulnerabilities and code quality issues",
174
+ backstory=(
175
+ "You are a veteran code auditor with 15 years of experience. "
176
+ "You've seen every trick in the book — from hardcoded credentials to SQL injection. "
177
+ "You analyze deterministic scan results and provide clear, severity-ranked findings."
178
+ ),
179
+ llm=llm,
180
+ verbose=True,
181
+ )
182
+
183
+ analysis_task = Task(
184
+ description=(
185
+ "Analyze the following forensic evidence report from a codebase scan. "
186
+ "For each finding, assess severity (CRITICAL / HIGH / MEDIUM / LOW), "
187
+ "explain the risk, and suggest a fix.\n\n"
188
+ f"{evidence_report}"
189
+ ),
190
+ agent=investigator,
191
+ expected_output="A structured forensic analysis report with severity-ranked findings.",
192
+ )
193
+
194
+ crew = Crew(
195
+ agents=[investigator],
196
+ tasks=[analysis_task],
197
+ verbose=True,
198
+ )
199
+
200
+ result = crew.kickoff()
201
+ return result.raw if hasattr(result, "raw") else str(result)
202
+
203
+
204
+ def main():
205
+ print("=" * 60)
206
+ print("CodeTribunal Integration Test")
207
+ print("=" * 60)
208
+
209
+ # Phase 1: GritQL evidence gathering
210
+ print("\n[Phase 1] Gathering evidence with GritQL...")
211
+ evidence = gather_evidence(LOCALE_DIR)
212
+
213
+ hits = sum(1 for e in evidence if e.get("findings"))
214
+ print(f"\n Patterns scanned: {len(evidence)}")
215
+ print(f" Hits: {hits}")
216
+
217
+ evidence_report = format_evidence_for_agent(evidence)
218
+ print("\n" + evidence_report)
219
+
220
+ # Phase 2: CrewAI analysis
221
+ api_key = os.environ.get("ZAI_API_KEY")
222
+ if not api_key:
223
+ print("\n[Phase 2] SKIPPED — set ZAI_API_KEY to test CrewAI integration")
224
+ return
225
+
226
+ print("\n[Phase 2] Running CrewAI analysis with GLM 5.1...")
227
+ report = run_crewai_analysis(evidence_report)
228
+ print("\n" + "=" * 60)
229
+ print("AGENT REPORT")
230
+ print("=" * 60)
231
+ print(report)
232
+
233
+
234
+ if __name__ == "__main__":
235
+ main()