Spaces:
Sleeping
Sleeping
Commit ·
d5341cc
1
Parent(s): 38cd7bb
feat: Add initial CodeTribunal implementation
Browse files- LICENSE +21 -0
- README.md +29 -0
- pyproject.toml +46 -0
- src/code_tribunal/__init__.py +1 -0
- src/code_tribunal/agents.py +152 -0
- src/code_tribunal/app.py +720 -0
- src/code_tribunal/cli.py +44 -0
- src/code_tribunal/courtroom.py +618 -0
- src/code_tribunal/evidence.py +337 -0
- tests/fixtures/bad_code.zip +0 -0
- tests/fixtures/locale/app.py +80 -0
- tests/fixtures/locale/utils.js +46 -0
- tests/test_integration.py +235 -0
LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2026 Amine Yagoub
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
README.md
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
---
|
| 2 |
title: CodeTribunal
|
| 3 |
emoji: 💻
|
|
@@ -10,3 +11,31 @@ short_description: The AI Courtroom That Exposes Bad Freelance Code
|
|
| 10 |
---
|
| 11 |
|
| 12 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<<<<<<< HEAD
|
| 2 |
---
|
| 3 |
title: CodeTribunal
|
| 4 |
emoji: 💻
|
|
|
|
| 11 |
---
|
| 12 |
|
| 13 |
Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
|
| 14 |
+
=======
|
| 15 |
+
# CodeTribunal
|
| 16 |
+
|
| 17 |
+
The AI courtroom that exposes bad freelance code.
|
| 18 |
+
|
| 19 |
+
Multi-agent forensic investigation powered by GLM 5.1. Instead of guessing code quality, CodeTribunal puts it on trial — a live-streaming debate where an AI Prosecutor and Defense Attorney clash over real, deterministic technical evidence.
|
| 20 |
+
|
| 21 |
+
## Install
|
| 22 |
+
|
| 23 |
+
```bash
|
| 24 |
+
pip install -e .
|
| 25 |
+
```
|
| 26 |
+
|
| 27 |
+
## Usage
|
| 28 |
+
|
| 29 |
+
```bash
|
| 30 |
+
code-tribunal ./path/to/codebase
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
## How it works
|
| 34 |
+
|
| 35 |
+
1. **Evidence Gathering** — Deterministic scans (security, code smells, hardcoded secrets, TODOs)
|
| 36 |
+
2. **Investigation** — GLM 5.1 agents analyze the evidence
|
| 37 |
+
3. **The Trial** — Prosecutor and Defense debate in a live-streamed courtroom
|
| 38 |
+
4. **Verdict** — The Judge delivers a final ruling
|
| 39 |
+
|
| 40 |
+
Built for the [Build with GLM 5.1](https://build-with-glm-5-1-challenge.devpost.com) hackathon.
|
| 41 |
+
>>>>>>> b4fcdee (feat: Add initial CodeTribunal implementation)
|
pyproject.toml
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["hatchling"]
|
| 3 |
+
build-backend = "hatchling.build"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "code-tribunal"
|
| 7 |
+
version = "0.1.0"
|
| 8 |
+
description = "AI courtroom that exposes bad freelance code through multi-agent forensic investigation"
|
| 9 |
+
readme = "README.md"
|
| 10 |
+
license = "MIT"
|
| 11 |
+
license-files = ["LICEN[CS]E.*"]
|
| 12 |
+
requires-python = ">=3.11"
|
| 13 |
+
authors = [
|
| 14 |
+
{name = "Amine Yagoub"},
|
| 15 |
+
]
|
| 16 |
+
keywords = ["ai", "code-review", "forensic", "multi-agent", "glm"]
|
| 17 |
+
classifiers = [
|
| 18 |
+
"Development Status :: 3 - Alpha",
|
| 19 |
+
"Intended Audience :: Developers",
|
| 20 |
+
"Programming Language :: Python :: 3",
|
| 21 |
+
"Programming Language :: Python :: 3.11",
|
| 22 |
+
"Programming Language :: Python :: 3.12",
|
| 23 |
+
"Programming Language :: Python :: 3.13",
|
| 24 |
+
]
|
| 25 |
+
dependencies = [
|
| 26 |
+
"crewai[litellm]",
|
| 27 |
+
"gritql>=0.2.0",
|
| 28 |
+
"gradio>=5.0.0",
|
| 29 |
+
"rich>=13.0.0",
|
| 30 |
+
"click>=8.0.0",
|
| 31 |
+
"httpx>=0.27.0",
|
| 32 |
+
"python-dotenv>=1.0.0",
|
| 33 |
+
"fpdf2>=2.7.0",
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
[project.optional-dependencies]
|
| 37 |
+
dev = [
|
| 38 |
+
"pytest>=8.0.0",
|
| 39 |
+
"ruff>=0.9.0",
|
| 40 |
+
]
|
| 41 |
+
|
| 42 |
+
[project.urls]
|
| 43 |
+
Repository = "https://github.com/amineyagoub/CodeTribunal"
|
| 44 |
+
|
| 45 |
+
[project.scripts]
|
| 46 |
+
code-tribunal = "code_tribunal.cli:main"
|
src/code_tribunal/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
"""CodeTribunal: AI courtroom that exposes bad freelance code."""
|
src/code_tribunal/agents.py
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Courtroom agent definitions for CodeTribunal."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
from crewai import Agent, LLM
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
from dotenv import load_dotenv
|
| 7 |
+
|
| 8 |
+
load_dotenv(Path(__file__).resolve().parent.parent.parent / ".env")
|
| 9 |
+
|
| 10 |
+
|
| 11 |
+
def _get_llm() -> LLM:
|
| 12 |
+
return LLM(
|
| 13 |
+
model=os.environ.get("MODEL_NAME", "zai/glm-5.1"),
|
| 14 |
+
api_key=os.environ.get("ZAI_API_KEY"),
|
| 15 |
+
temperature=0.3,
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
# ---------------------------------------------------------------------------
|
| 20 |
+
# Phase 2: Investigators
|
| 21 |
+
# ---------------------------------------------------------------------------
|
| 22 |
+
|
| 23 |
+
def security_investigator() -> Agent:
|
| 24 |
+
return Agent(
|
| 25 |
+
role="Security Forensic Investigator",
|
| 26 |
+
goal=(
|
| 27 |
+
"Analyze security-related code evidence and produce a detailed investigation report. "
|
| 28 |
+
"Identify every vulnerability, rank by severity, explain the attack vector, "
|
| 29 |
+
"and describe the potential impact if exploited in production."
|
| 30 |
+
),
|
| 31 |
+
backstory=(
|
| 32 |
+
"You are a former penetration tester turned code auditor. "
|
| 33 |
+
"You've found hardcoded AWS keys in Fortune 500 repos, SQL injection in banking APIs, "
|
| 34 |
+
"and deserialization bugs that would have cost millions. "
|
| 35 |
+
"You don't guess — you follow the evidence and build an airtight case. "
|
| 36 |
+
"You treat every hardcoded secret as a loaded weapon and every eval() as an open door."
|
| 37 |
+
),
|
| 38 |
+
llm=_get_llm(),
|
| 39 |
+
verbose=True,
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def quality_investigator() -> Agent:
|
| 44 |
+
return Agent(
|
| 45 |
+
role="Code Quality Forensic Investigator",
|
| 46 |
+
goal=(
|
| 47 |
+
"Analyze code quality evidence and produce a detailed investigation report. "
|
| 48 |
+
"Identify technical debt, abandoned code, missing error handling, and developer negligence indicators. "
|
| 49 |
+
"Focus on patterns that suggest rushed or careless development."
|
| 50 |
+
),
|
| 51 |
+
backstory=(
|
| 52 |
+
"You are a principal engineer who has inherited nightmares from freelance developers. "
|
| 53 |
+
"You've seen TODO comments that are 5 years old, dead code that accounts for 40% of a codebase, "
|
| 54 |
+
"and functions so complex they defied testing. "
|
| 55 |
+
"You can spot the difference between 'agile iteration' and 'lazy corner-cutting' from a mile away."
|
| 56 |
+
),
|
| 57 |
+
llm=_get_llm(),
|
| 58 |
+
verbose=True,
|
| 59 |
+
)
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def architecture_investigator() -> Agent:
|
| 63 |
+
return Agent(
|
| 64 |
+
role="Architecture Forensic Investigator",
|
| 65 |
+
goal=(
|
| 66 |
+
"Analyze architectural evidence and produce a detailed investigation report. "
|
| 67 |
+
"Identify structural problems: tight coupling, missing abstractions, "
|
| 68 |
+
"hardcoded configuration that should be externalized, and patterns that won't scale."
|
| 69 |
+
),
|
| 70 |
+
backstory=(
|
| 71 |
+
"You are a systems architect with 20 years of experience across startups and enterprises. "
|
| 72 |
+
"You can look at a codebase and tell whether it was built to last or built to invoice. "
|
| 73 |
+
"You identify patterns that indicate the developer didn't understand the domain "
|
| 74 |
+
"or deliberately cut corners to finish faster."
|
| 75 |
+
),
|
| 76 |
+
llm=_get_llm(),
|
| 77 |
+
verbose=True,
|
| 78 |
+
)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# ---------------------------------------------------------------------------
|
| 82 |
+
# Phase 3: The Trial
|
| 83 |
+
# ---------------------------------------------------------------------------
|
| 84 |
+
|
| 85 |
+
def prosecutor() -> Agent:
|
| 86 |
+
return Agent(
|
| 87 |
+
role="The Prosecutor",
|
| 88 |
+
goal=(
|
| 89 |
+
"Build the strongest possible case that this code is negligent, dangerous, or fraudulent. "
|
| 90 |
+
"Use the investigation reports as evidence. Argue with precision and force. "
|
| 91 |
+
"Cite specific file paths, line numbers, and vulnerability types. "
|
| 92 |
+
"Make the jury understand why this code should never have been delivered."
|
| 93 |
+
),
|
| 94 |
+
backstory=(
|
| 95 |
+
"You are a ruthless courtroom prosecutor specializing in technology fraud cases. "
|
| 96 |
+
"You've won cases against developers who delivered insecure code to non-technical clients. "
|
| 97 |
+
"You know how to take technical evidence and make it devastatingly clear. "
|
| 98 |
+
"You don't exaggerate — the facts are damning enough. "
|
| 99 |
+
"Your weapon is specificity: every claim backed by line numbers and evidence."
|
| 100 |
+
),
|
| 101 |
+
llm=_get_llm(),
|
| 102 |
+
verbose=True,
|
| 103 |
+
)
|
| 104 |
+
|
| 105 |
+
|
| 106 |
+
def defense_attorney() -> Agent:
|
| 107 |
+
return Agent(
|
| 108 |
+
role="The Defense Attorney",
|
| 109 |
+
goal=(
|
| 110 |
+
"Mount the best possible defense of this code. "
|
| 111 |
+
"Challenge the prosecution's claims. Argue mitigating circumstances. "
|
| 112 |
+
"Point out that some patterns are acceptable in certain contexts. "
|
| 113 |
+
"Argue proportionality — not every issue is a catastrophe. "
|
| 114 |
+
"Be honest but vigorous in your defense."
|
| 115 |
+
),
|
| 116 |
+
backstory=(
|
| 117 |
+
"You are a defense attorney who specializes in technology cases. "
|
| 118 |
+
"You believe everyone deserves a fair hearing, even bad code. "
|
| 119 |
+
"You're not dishonest — you argue context, proportionality, and intent. "
|
| 120 |
+
"A TODO comment isn't negligence, it's a roadmap. "
|
| 121 |
+
"An eval() in a private script isn't the same as eval() in a web server. "
|
| 122 |
+
"You force the prosecution to prove every claim."
|
| 123 |
+
),
|
| 124 |
+
llm=_get_llm(),
|
| 125 |
+
verbose=True,
|
| 126 |
+
)
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
# ---------------------------------------------------------------------------
|
| 130 |
+
# Phase 4: The Verdict
|
| 131 |
+
# ---------------------------------------------------------------------------
|
| 132 |
+
|
| 133 |
+
def judge() -> Agent:
|
| 134 |
+
return Agent(
|
| 135 |
+
role="The Judge",
|
| 136 |
+
goal=(
|
| 137 |
+
"Review all evidence, investigation reports, and the trial transcript. "
|
| 138 |
+
"Deliver a final, structured verdict. "
|
| 139 |
+
"For each finding: severity, impact, and recommended remediation. "
|
| 140 |
+
"End with an overall assessment: GUILTY (negligent), MIXED (some issues), or NOT GUILTY (acceptable). "
|
| 141 |
+
"Include a 'reputational risk score' from 0-100 for the developer who wrote this code."
|
| 142 |
+
),
|
| 143 |
+
backstory=(
|
| 144 |
+
"You are a senior judge who has presided over hundreds of technology disputes. "
|
| 145 |
+
"You are impartial, precise, and thorough. "
|
| 146 |
+
"You don't let the prosecution's rhetoric sway you — you follow the evidence. "
|
| 147 |
+
"But you also don't let the defense minimize real harm. "
|
| 148 |
+
"Your verdicts are known for being fair, detailed, and impossible to appeal."
|
| 149 |
+
),
|
| 150 |
+
llm=_get_llm(),
|
| 151 |
+
verbose=True,
|
| 152 |
+
)
|
src/code_tribunal/app.py
ADDED
|
@@ -0,0 +1,720 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Gradio streaming UI for the CodeTribunal courtroom."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import tempfile
|
| 5 |
+
import time
|
| 6 |
+
import zipfile
|
| 7 |
+
from pathlib import Path
|
| 8 |
+
|
| 9 |
+
import gradio as gr
|
| 10 |
+
from gradio import ChatMessage
|
| 11 |
+
|
| 12 |
+
from code_tribunal.evidence import (
|
| 13 |
+
EvidenceReport,
|
| 14 |
+
gather_evidence_streaming,
|
| 15 |
+
)
|
| 16 |
+
from code_tribunal.courtroom import (
|
| 17 |
+
StreamResult,
|
| 18 |
+
phase_investigation_stream,
|
| 19 |
+
phase_trial_stream,
|
| 20 |
+
phase_verdict_stream,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
# ---------------------------------------------------------------------------
|
| 25 |
+
# Constants
|
| 26 |
+
# ---------------------------------------------------------------------------
|
| 27 |
+
|
| 28 |
+
AGENT_AVATARS = {
|
| 29 |
+
"Security Forensic Investigator": "🛡️",
|
| 30 |
+
"Code Quality Forensic Investigator": "📋",
|
| 31 |
+
"Architecture Forensic Investigator": "🏗️",
|
| 32 |
+
"The Prosecutor": "⚖️",
|
| 33 |
+
"The Defense Attorney": "🛡️",
|
| 34 |
+
"The Judge": "🔨",
|
| 35 |
+
"Investigator": "🔍",
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
SEVERITY_COLORS = {
|
| 39 |
+
"CRITICAL": "#dc2626",
|
| 40 |
+
"HIGH": "#ea580c",
|
| 41 |
+
"MEDIUM": "#ca8a04",
|
| 42 |
+
"LOW": "#2563eb",
|
| 43 |
+
}
|
| 44 |
+
|
| 45 |
+
STATUS_MESSAGES = {
|
| 46 |
+
"extracting": [
|
| 47 |
+
"Unpacking the evidence...",
|
| 48 |
+
"Extracting source files...",
|
| 49 |
+
"Cataloging submitted code...",
|
| 50 |
+
],
|
| 51 |
+
"evidence": [
|
| 52 |
+
"Scanning with GritQL forensic patterns...",
|
| 53 |
+
"Searching for hardcoded secrets...",
|
| 54 |
+
"Analyzing code for dangerous functions...",
|
| 55 |
+
"Checking for SQL injection vectors...",
|
| 56 |
+
"Cataloging technical debt markers...",
|
| 57 |
+
"Building the evidence dossier...",
|
| 58 |
+
],
|
| 59 |
+
"investigation": [
|
| 60 |
+
"🔍 Security Investigator analyzing vulnerabilities...",
|
| 61 |
+
"📋 Quality Investigator assessing code standards...",
|
| 62 |
+
"🏗️ Architecture Investigator reviewing structure...",
|
| 63 |
+
"Cross-referencing findings across domains...",
|
| 64 |
+
"Compiling investigation reports...",
|
| 65 |
+
],
|
| 66 |
+
"trial": [
|
| 67 |
+
"Court is now in session...",
|
| 68 |
+
"The Prosecutor is building the case...",
|
| 69 |
+
"Examining the evidence in detail...",
|
| 70 |
+
"The Defense is cross-examining...",
|
| 71 |
+
"Hearing rebuttal arguments...",
|
| 72 |
+
"Closing arguments underway...",
|
| 73 |
+
],
|
| 74 |
+
"verdict": [
|
| 75 |
+
"The Judge is reviewing all evidence...",
|
| 76 |
+
"Weighing prosecution arguments...",
|
| 77 |
+
"Considering defense testimony...",
|
| 78 |
+
"Preparing the final ruling...",
|
| 79 |
+
"The gavel is about to fall...",
|
| 80 |
+
],
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
CUSTOM_CSS = """
|
| 84 |
+
/* ─── Global ─── */
|
| 85 |
+
.gradio-container {
|
| 86 |
+
max-width: 960px !important;
|
| 87 |
+
margin: 0 auto !important;
|
| 88 |
+
}
|
| 89 |
+
body {
|
| 90 |
+
background: #0a0a14 !important;
|
| 91 |
+
}
|
| 92 |
+
.dark {
|
| 93 |
+
background: #0f0f1a !important;
|
| 94 |
+
}
|
| 95 |
+
|
| 96 |
+
/* ─── Hero ─── */
|
| 97 |
+
.hero-logo {
|
| 98 |
+
display: block !important;
|
| 99 |
+
margin: 0 auto 12px auto !important;
|
| 100 |
+
border-radius: 16px !important;
|
| 101 |
+
}
|
| 102 |
+
.hero-title {
|
| 103 |
+
text-align: center !important;
|
| 104 |
+
color: #fbbf24 !important;
|
| 105 |
+
font-family: 'Georgia', serif !important;
|
| 106 |
+
font-size: 2.4em !important;
|
| 107 |
+
font-weight: 700 !important;
|
| 108 |
+
margin-bottom: 4px !important;
|
| 109 |
+
}
|
| 110 |
+
.hero-subtitle {
|
| 111 |
+
text-align: center !important;
|
| 112 |
+
color: #94a3b8 !important;
|
| 113 |
+
font-size: 1.1em !important;
|
| 114 |
+
margin-top: 0 !important;
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
/* ─── Upload area ─── */
|
| 118 |
+
.upload-area .file-preview {
|
| 119 |
+
min-height: 220px !important;
|
| 120 |
+
border: 2px dashed #fbbf2440 !important;
|
| 121 |
+
border-radius: 16px !important;
|
| 122 |
+
background: #1a1a2e !important;
|
| 123 |
+
transition: border-color 0.3s !important;
|
| 124 |
+
}
|
| 125 |
+
.upload-area .file-preview:hover {
|
| 126 |
+
border-color: #fbbf24 !important;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
/* ─── Status ─── */
|
| 130 |
+
.status-phase {
|
| 131 |
+
text-align: center !important;
|
| 132 |
+
color: #fbbf24 !important;
|
| 133 |
+
font-size: 1.1em !important;
|
| 134 |
+
font-weight: 600 !important;
|
| 135 |
+
}
|
| 136 |
+
.status-detail {
|
| 137 |
+
text-align: center !important;
|
| 138 |
+
color: #94a3b8 !important;
|
| 139 |
+
font-style: italic !important;
|
| 140 |
+
}
|
| 141 |
+
|
| 142 |
+
/* ─── Evidence table ─── */
|
| 143 |
+
.evidence-table {
|
| 144 |
+
font-family: 'JetBrains Mono', 'Fira Code', monospace !important;
|
| 145 |
+
font-size: 13px !important;
|
| 146 |
+
color: #d4d4d4 !important;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
/* ─── Chatbot ─── */
|
| 150 |
+
.chatbot-panel {
|
| 151 |
+
border: 1px solid #2a2a40 !important;
|
| 152 |
+
border-radius: 12px !important;
|
| 153 |
+
background: #12121f !important;
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
/* ─── Verdict ─── */
|
| 157 |
+
.verdict-box {
|
| 158 |
+
border: 2px solid #fbbf24 !important;
|
| 159 |
+
border-radius: 12px !important;
|
| 160 |
+
background: linear-gradient(135deg, #1a1a2e, #0f172a) !important;
|
| 161 |
+
padding: 24px !important;
|
| 162 |
+
color: #e2e8f0 !important;
|
| 163 |
+
}
|
| 164 |
+
|
| 165 |
+
/* ─── Export buttons ─── */
|
| 166 |
+
.export-btn {
|
| 167 |
+
border: 1px solid #fbbf2440 !important;
|
| 168 |
+
border-radius: 8px !important;
|
| 169 |
+
color: #fbbf24 !important;
|
| 170 |
+
background: #1a1a2e !important;
|
| 171 |
+
}
|
| 172 |
+
.export-btn:hover {
|
| 173 |
+
background: #2a2a40 !important;
|
| 174 |
+
border-color: #fbbf24 !important;
|
| 175 |
+
}
|
| 176 |
+
|
| 177 |
+
/* ─── Scrollbar ─── */
|
| 178 |
+
::-webkit-scrollbar { width: 8px; }
|
| 179 |
+
::-webkit-scrollbar-track { background: #0f0f1a; }
|
| 180 |
+
::-webkit-scrollbar-thumb { background: #2a2a40; border-radius: 4px; }
|
| 181 |
+
::-webkit-scrollbar-thumb:hover { background: #3a3a50; }
|
| 182 |
+
"""
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
# ---------------------------------------------------------------------------
|
| 186 |
+
# Helpers
|
| 187 |
+
# ---------------------------------------------------------------------------
|
| 188 |
+
|
| 189 |
+
def _esc(text: str) -> str:
|
| 190 |
+
return text.replace("&", "&").replace("<", "<").replace(">", ">")
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def _severity_badge(sev: str) -> str:
|
| 194 |
+
color = SEVERITY_COLORS.get(sev, "#6b7280")
|
| 195 |
+
return f'<span style="background:{color};color:white;padding:2px 8px;border-radius:4px;font-size:12px;font-weight:bold">{sev}</span>'
|
| 196 |
+
|
| 197 |
+
|
| 198 |
+
def _evidence_html(report) -> str:
|
| 199 |
+
"""Render evidence report as styled HTML."""
|
| 200 |
+
lines = [f"<h3>Evidence Report</h3>"]
|
| 201 |
+
lines.append(f"<p>Files scanned: <b>{report.file_count}</b> | Total findings: <b>{len(report.findings)}</b></p>")
|
| 202 |
+
|
| 203 |
+
for domain, findings in report.findings_by_domain.items():
|
| 204 |
+
lines.append(f'<h4 style="margin-top:16px">{domain.title()} Evidence ({len(findings)} findings)</h4>')
|
| 205 |
+
lines.append('<table style="width:100%;border-collapse:collapse">')
|
| 206 |
+
lines.append('<tr style="border-bottom:1px solid #333"><th style="text-align:left;padding:4px">Severity</th><th style="text-align:left;padding:4px">File</th><th style="text-align:left;padding:4px">Line</th><th style="text-align:left;padding:4px">Code</th></tr>')
|
| 207 |
+
for f in findings:
|
| 208 |
+
lines.append(
|
| 209 |
+
f'<tr style="border-bottom:1px solid #222">'
|
| 210 |
+
f'<td style="padding:4px">{_severity_badge(f.severity_hint)}</td>'
|
| 211 |
+
f'<td style="padding:4px;font-family:monospace;font-size:13px">{Path(f.file).name}</td>'
|
| 212 |
+
f'<td style="padding:4px;font-family:monospace">{f.line}</td>'
|
| 213 |
+
f'<td style="padding:4px;font-family:monospace;font-size:13px;color:#a0a0a0">{_esc(f.code)}</td>'
|
| 214 |
+
f'</tr>'
|
| 215 |
+
)
|
| 216 |
+
lines.append('</table>')
|
| 217 |
+
|
| 218 |
+
return "\n".join(lines)
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def _agent_icon(role: str) -> str:
|
| 222 |
+
return AGENT_AVATARS.get(role, "📝")
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
def _yield(
|
| 226 |
+
hero_vis, upload_vis, proc_vis,
|
| 227 |
+
status, evidence, chat, verdict, export_vis,
|
| 228 |
+
):
|
| 229 |
+
"""Build the 8-tuple for a consistent yield protocol."""
|
| 230 |
+
return (
|
| 231 |
+
gr.update(visible=hero_vis),
|
| 232 |
+
gr.update(visible=upload_vis),
|
| 233 |
+
gr.update(visible=proc_vis),
|
| 234 |
+
status,
|
| 235 |
+
evidence,
|
| 236 |
+
chat,
|
| 237 |
+
verdict,
|
| 238 |
+
gr.update(visible=export_vis),
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
|
| 242 |
+
# ---------------------------------------------------------------------------
|
| 243 |
+
# Pipeline runner with progressive streaming updates
|
| 244 |
+
# ---------------------------------------------------------------------------
|
| 245 |
+
|
| 246 |
+
# Yield throttle — max ~20 updates/sec to prevent browser lag
|
| 247 |
+
_MIN_YIELD_INTERVAL = 0.05
|
| 248 |
+
|
| 249 |
+
|
| 250 |
+
def run_courtroom(code_input):
|
| 251 |
+
"""Run the full pipeline, yielding progressive updates for the UI."""
|
| 252 |
+
chat_history = []
|
| 253 |
+
evidence_html_val = ""
|
| 254 |
+
verdict_text = ""
|
| 255 |
+
status_idx = 0
|
| 256 |
+
last_yield = 0.0
|
| 257 |
+
|
| 258 |
+
# --- Validate input ---
|
| 259 |
+
if code_input is None:
|
| 260 |
+
yield _yield(
|
| 261 |
+
True, True, False,
|
| 262 |
+
"Please upload a .zip file containing the code to investigate.",
|
| 263 |
+
None, [], None, False,
|
| 264 |
+
)
|
| 265 |
+
return
|
| 266 |
+
|
| 267 |
+
# --- Hide hero/upload, show processing ---
|
| 268 |
+
yield _yield(False, False, True, "### Extracting files...", None, [], None, False)
|
| 269 |
+
|
| 270 |
+
# --- Extract zip ---
|
| 271 |
+
tmpdir = tempfile.mkdtemp()
|
| 272 |
+
if not (hasattr(code_input, "name") and code_input.name.endswith(".zip")):
|
| 273 |
+
yield _yield(
|
| 274 |
+
False, False, True,
|
| 275 |
+
"Please upload a .zip file.",
|
| 276 |
+
None, [], None, False,
|
| 277 |
+
)
|
| 278 |
+
return
|
| 279 |
+
|
| 280 |
+
with zipfile.ZipFile(code_input.name, "r") as zf:
|
| 281 |
+
zf.extractall(tmpdir)
|
| 282 |
+
|
| 283 |
+
# Check API key
|
| 284 |
+
if not os.environ.get("ZAI_API_KEY"):
|
| 285 |
+
yield _yield(
|
| 286 |
+
False, False, True,
|
| 287 |
+
"ZAI_API_KEY not set. Configure .env file.",
|
| 288 |
+
None, [], None, False,
|
| 289 |
+
)
|
| 290 |
+
return
|
| 291 |
+
|
| 292 |
+
# ===================================================================
|
| 293 |
+
# Phase 1: Evidence — stream per-pattern progress
|
| 294 |
+
# ===================================================================
|
| 295 |
+
status_msgs = STATUS_MESSAGES["evidence"]
|
| 296 |
+
report = None
|
| 297 |
+
|
| 298 |
+
for update in gather_evidence_streaming(tmpdir):
|
| 299 |
+
if isinstance(update, str):
|
| 300 |
+
# Status update from evidence streaming
|
| 301 |
+
status_idx = (status_idx + 1) % len(status_msgs)
|
| 302 |
+
yield _yield(
|
| 303 |
+
False, False, True,
|
| 304 |
+
f"### Phase 1/4: Forensic Evidence\n{update}\n\n*{status_msgs[status_idx]}*",
|
| 305 |
+
None, [], None, False,
|
| 306 |
+
)
|
| 307 |
+
elif isinstance(update, EvidenceReport):
|
| 308 |
+
report = update
|
| 309 |
+
|
| 310 |
+
if report is None or not report.findings:
|
| 311 |
+
yield _yield(
|
| 312 |
+
False, False, True,
|
| 313 |
+
"### Phase 1/4: Evidence Complete\nNo findings detected. **Case dismissed** — code appears clean.",
|
| 314 |
+
None, [], None, False,
|
| 315 |
+
)
|
| 316 |
+
return
|
| 317 |
+
|
| 318 |
+
evidence_html_val = _evidence_html(report)
|
| 319 |
+
evidence_text = report.to_text()
|
| 320 |
+
|
| 321 |
+
# Add evidence message to chat
|
| 322 |
+
chat_history.append(ChatMessage(
|
| 323 |
+
role="user",
|
| 324 |
+
content=(
|
| 325 |
+
f"**Case Filed**: Code submitted for forensic analysis.\n\n"
|
| 326 |
+
f"**{report.file_count}** files scanned — **{len(report.findings)}** findings detected "
|
| 327 |
+
f"across **{len(report.findings_by_domain)}** domains."
|
| 328 |
+
),
|
| 329 |
+
metadata={"title": "Court Clerk"},
|
| 330 |
+
))
|
| 331 |
+
|
| 332 |
+
yield _yield(
|
| 333 |
+
False, False, True,
|
| 334 |
+
f"### Phase 1/4: Evidence Complete\n**{len(report.findings)}** findings detected. Proceeding to investigation...",
|
| 335 |
+
evidence_html_val, chat_history, None, False,
|
| 336 |
+
)
|
| 337 |
+
|
| 338 |
+
# ===================================================================
|
| 339 |
+
# Phase 2: Investigation — stream agent output
|
| 340 |
+
# ===================================================================
|
| 341 |
+
inv_result = StreamResult()
|
| 342 |
+
status_msgs = STATUS_MESSAGES["investigation"]
|
| 343 |
+
status_idx = 0
|
| 344 |
+
|
| 345 |
+
current_task_idx = -1
|
| 346 |
+
inv_labels = ["Security", "Quality", "Architecture"]
|
| 347 |
+
|
| 348 |
+
yield _yield(
|
| 349 |
+
False, False, True,
|
| 350 |
+
f"### Phase 2/4: Investigation\n*{status_msgs[0]}*",
|
| 351 |
+
evidence_html_val, chat_history, None, False,
|
| 352 |
+
)
|
| 353 |
+
|
| 354 |
+
for role, delta, task_idx in phase_investigation_stream(report, inv_result):
|
| 355 |
+
# New speaker? Start a new ChatMessage
|
| 356 |
+
if task_idx != current_task_idx:
|
| 357 |
+
label = inv_labels[task_idx] if task_idx < len(inv_labels) else f"Agent {task_idx}"
|
| 358 |
+
icon = _agent_icon(role)
|
| 359 |
+
chat_history.append(ChatMessage(
|
| 360 |
+
role="assistant",
|
| 361 |
+
content=delta,
|
| 362 |
+
metadata={"title": f"{icon} {label} Investigation"},
|
| 363 |
+
))
|
| 364 |
+
current_task_idx = task_idx
|
| 365 |
+
else:
|
| 366 |
+
# Append to last message
|
| 367 |
+
chat_history[-1] = ChatMessage(
|
| 368 |
+
role="assistant",
|
| 369 |
+
content=chat_history[-1].content + delta,
|
| 370 |
+
metadata=chat_history[-1].metadata,
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
now = time.time()
|
| 374 |
+
if now - last_yield >= _MIN_YIELD_INTERVAL:
|
| 375 |
+
status_idx = (status_idx + 1) % len(status_msgs)
|
| 376 |
+
yield _yield(
|
| 377 |
+
False, False, True,
|
| 378 |
+
f"### Phase 2/4: Investigation\n*{status_msgs[status_idx]}*",
|
| 379 |
+
evidence_html_val, chat_history, None, False,
|
| 380 |
+
)
|
| 381 |
+
last_yield = now
|
| 382 |
+
|
| 383 |
+
# Final yield for phase 2
|
| 384 |
+
investigation_reports = inv_result.metadata.get("reports", {})
|
| 385 |
+
investigation_text = inv_result.text
|
| 386 |
+
|
| 387 |
+
yield _yield(
|
| 388 |
+
False, False, True,
|
| 389 |
+
"### Phase 2/4: Investigation Complete\n**3 reports** generated. Court is now in session...",
|
| 390 |
+
evidence_html_val, chat_history, None, False,
|
| 391 |
+
)
|
| 392 |
+
|
| 393 |
+
# ===================================================================
|
| 394 |
+
# Phase 3: Trial — stream prosecutor / defense / rebuttal
|
| 395 |
+
# ===================================================================
|
| 396 |
+
trial_result = StreamResult()
|
| 397 |
+
status_msgs = STATUS_MESSAGES["trial"]
|
| 398 |
+
status_idx = 0
|
| 399 |
+
current_task_idx = -1
|
| 400 |
+
last_yield = 0.0
|
| 401 |
+
|
| 402 |
+
yield _yield(
|
| 403 |
+
False, False, True,
|
| 404 |
+
f"### Phase 3/4: The Trial\n*{status_msgs[0]}*",
|
| 405 |
+
evidence_html_val, chat_history, None, False,
|
| 406 |
+
)
|
| 407 |
+
|
| 408 |
+
for role, delta, round_name, task_idx in phase_trial_stream(
|
| 409 |
+
evidence_text, investigation_reports, trial_result
|
| 410 |
+
):
|
| 411 |
+
if task_idx != current_task_idx:
|
| 412 |
+
icon = _agent_icon(role)
|
| 413 |
+
chat_history.append(ChatMessage(
|
| 414 |
+
role="assistant",
|
| 415 |
+
content=delta,
|
| 416 |
+
metadata={"title": f"{icon} {round_name}"},
|
| 417 |
+
))
|
| 418 |
+
current_task_idx = task_idx
|
| 419 |
+
else:
|
| 420 |
+
chat_history[-1] = ChatMessage(
|
| 421 |
+
role="assistant",
|
| 422 |
+
content=chat_history[-1].content + delta,
|
| 423 |
+
metadata=chat_history[-1].metadata,
|
| 424 |
+
)
|
| 425 |
+
|
| 426 |
+
now = time.time()
|
| 427 |
+
if now - last_yield >= _MIN_YIELD_INTERVAL:
|
| 428 |
+
status_idx = (status_idx + 1) % len(status_msgs)
|
| 429 |
+
yield _yield(
|
| 430 |
+
False, False, True,
|
| 431 |
+
f"### Phase 3/4: The Trial\n*{status_msgs[status_idx]}*",
|
| 432 |
+
evidence_html_val, chat_history, None, False,
|
| 433 |
+
)
|
| 434 |
+
last_yield = now
|
| 435 |
+
|
| 436 |
+
trial_transcript = trial_result.text
|
| 437 |
+
|
| 438 |
+
yield _yield(
|
| 439 |
+
False, False, True,
|
| 440 |
+
"### Phase 3/4: Trial Complete\nThe Judge is now deliberating...",
|
| 441 |
+
evidence_html_val, chat_history, None, False,
|
| 442 |
+
)
|
| 443 |
+
|
| 444 |
+
# ===================================================================
|
| 445 |
+
# Phase 4: Verdict — stream judge
|
| 446 |
+
# ===================================================================
|
| 447 |
+
verdict_result = StreamResult()
|
| 448 |
+
status_msgs = STATUS_MESSAGES["verdict"]
|
| 449 |
+
status_idx = 0
|
| 450 |
+
verdict_started = False
|
| 451 |
+
last_yield = 0.0
|
| 452 |
+
|
| 453 |
+
for role, delta in phase_verdict_stream(
|
| 454 |
+
evidence_text, investigation_text, trial_transcript, verdict_result
|
| 455 |
+
):
|
| 456 |
+
if not verdict_started:
|
| 457 |
+
icon = _agent_icon(role)
|
| 458 |
+
chat_history.append(ChatMessage(
|
| 459 |
+
role="assistant",
|
| 460 |
+
content=delta,
|
| 461 |
+
metadata={"title": f"{icon} Verdict"},
|
| 462 |
+
))
|
| 463 |
+
verdict_started = True
|
| 464 |
+
else:
|
| 465 |
+
chat_history[-1] = ChatMessage(
|
| 466 |
+
role="assistant",
|
| 467 |
+
content=chat_history[-1].content + delta,
|
| 468 |
+
metadata=chat_history[-1].metadata,
|
| 469 |
+
)
|
| 470 |
+
|
| 471 |
+
now = time.time()
|
| 472 |
+
if now - last_yield >= _MIN_YIELD_INTERVAL:
|
| 473 |
+
status_idx = (status_idx + 1) % len(status_msgs)
|
| 474 |
+
verdict_so_far = verdict_result.text or ""
|
| 475 |
+
yield _yield(
|
| 476 |
+
False, False, True,
|
| 477 |
+
f"### Phase 4/4: Verdict\n*{status_msgs[status_idx]}*",
|
| 478 |
+
evidence_html_val, chat_history, None, False,
|
| 479 |
+
)
|
| 480 |
+
last_yield = now
|
| 481 |
+
|
| 482 |
+
verdict_text = verdict_result.text
|
| 483 |
+
|
| 484 |
+
# Final yield — show verdict panel and export buttons
|
| 485 |
+
yield _yield(
|
| 486 |
+
False, False, True,
|
| 487 |
+
"### Trial Complete\nThe verdict has been delivered.",
|
| 488 |
+
evidence_html_val,
|
| 489 |
+
chat_history,
|
| 490 |
+
f"## 🔨 Judge's Verdict\n\n{verdict_text}",
|
| 491 |
+
True,
|
| 492 |
+
)
|
| 493 |
+
|
| 494 |
+
|
| 495 |
+
# ---------------------------------------------------------------------------
|
| 496 |
+
# Export helpers
|
| 497 |
+
# ---------------------------------------------------------------------------
|
| 498 |
+
|
| 499 |
+
def _build_results_dict(
|
| 500 |
+
evidence_html_val, chat_history, verdict_text,
|
| 501 |
+
) -> dict:
|
| 502 |
+
"""Extract results from the state for export."""
|
| 503 |
+
# Collect evidence text from chat messages
|
| 504 |
+
evidence_parts = []
|
| 505 |
+
investigation_parts = []
|
| 506 |
+
trial_parts = []
|
| 507 |
+
verdict_parts = []
|
| 508 |
+
current_section = "evidence"
|
| 509 |
+
|
| 510 |
+
for msg in chat_history:
|
| 511 |
+
title = (msg.metadata or {}).get("title", "")
|
| 512 |
+
content = msg.content or ""
|
| 513 |
+
|
| 514 |
+
if "Investigation" in title:
|
| 515 |
+
current_section = "investigation"
|
| 516 |
+
elif "Prosecution" in title or "Defense" in title or "Rebuttal" in title:
|
| 517 |
+
current_section = "trial"
|
| 518 |
+
elif "Verdict" in title:
|
| 519 |
+
current_section = "verdict"
|
| 520 |
+
|
| 521 |
+
if current_section == "investigation":
|
| 522 |
+
investigation_parts.append(f"### {title}\n{content}")
|
| 523 |
+
elif current_section == "trial":
|
| 524 |
+
trial_parts.append(f"### {title}\n{content}")
|
| 525 |
+
elif current_section == "verdict":
|
| 526 |
+
verdict_parts.append(content)
|
| 527 |
+
|
| 528 |
+
return {
|
| 529 |
+
"evidence_html": evidence_html_val,
|
| 530 |
+
"investigation": "\n\n".join(investigation_parts),
|
| 531 |
+
"transcript": "\n\n".join(trial_parts),
|
| 532 |
+
"verdict": verdict_text or "\n\n".join(verdict_parts),
|
| 533 |
+
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
|
| 534 |
+
}
|
| 535 |
+
|
| 536 |
+
|
| 537 |
+
def generate_markdown_export(results_state: dict) -> str:
|
| 538 |
+
"""Generate a Markdown report and return the filepath."""
|
| 539 |
+
md_lines = [
|
| 540 |
+
"# CodeTribunal — Trial Report\n",
|
| 541 |
+
f"**Generated**: {results_state.get('timestamp', 'N/A')}\n",
|
| 542 |
+
"---\n",
|
| 543 |
+
"## Investigation Reports\n",
|
| 544 |
+
results_state.get("investigation", ""),
|
| 545 |
+
"\n---\n",
|
| 546 |
+
"## Trial Transcript\n",
|
| 547 |
+
results_state.get("transcript", ""),
|
| 548 |
+
"\n---\n",
|
| 549 |
+
"## Verdict\n",
|
| 550 |
+
results_state.get("verdict", ""),
|
| 551 |
+
"\n",
|
| 552 |
+
]
|
| 553 |
+
content = "\n".join(md_lines)
|
| 554 |
+
filepath = tempfile.mktemp(suffix="_CodeTribunal_Report.md")
|
| 555 |
+
Path(filepath).write_text(content)
|
| 556 |
+
return filepath
|
| 557 |
+
|
| 558 |
+
|
| 559 |
+
def generate_pdf_export(results_state: dict) -> str:
|
| 560 |
+
"""Generate a PDF report and return the filepath."""
|
| 561 |
+
from fpdf import FPDF
|
| 562 |
+
|
| 563 |
+
pdf = FPDF()
|
| 564 |
+
pdf.set_auto_page_break(auto=True, margin=15)
|
| 565 |
+
pdf.add_page()
|
| 566 |
+
|
| 567 |
+
# Title
|
| 568 |
+
pdf.set_font("Helvetica", "B", 24)
|
| 569 |
+
pdf.set_text_color(200, 160, 30)
|
| 570 |
+
pdf.cell(0, 15, "CodeTribunal - Trial Report", ln=True, align="C")
|
| 571 |
+
pdf.set_font("Helvetica", "", 10)
|
| 572 |
+
pdf.set_text_color(120, 120, 140)
|
| 573 |
+
pdf.cell(0, 8, f"Generated: {results_state.get('timestamp', 'N/A')}", ln=True, align="C")
|
| 574 |
+
pdf.ln(10)
|
| 575 |
+
|
| 576 |
+
def _add_section(title: str, content: str):
|
| 577 |
+
pdf.set_font("Helvetica", "B", 14)
|
| 578 |
+
pdf.set_text_color(200, 160, 30)
|
| 579 |
+
pdf.cell(0, 10, title, ln=True)
|
| 580 |
+
pdf.set_text_color(50, 50, 60)
|
| 581 |
+
pdf.set_font("Helvetica", "", 10)
|
| 582 |
+
for line in content.split("\n"):
|
| 583 |
+
clean = line.encode("latin-1", "replace").decode("latin-1")
|
| 584 |
+
if clean.strip():
|
| 585 |
+
pdf.multi_cell(0, 5, clean)
|
| 586 |
+
else:
|
| 587 |
+
pdf.ln(3)
|
| 588 |
+
pdf.ln(6)
|
| 589 |
+
|
| 590 |
+
_add_section("Investigation Reports", results_state.get("investigation", ""))
|
| 591 |
+
_add_section("Trial Transcript", results_state.get("transcript", ""))
|
| 592 |
+
_add_section("Verdict", results_state.get("verdict", ""))
|
| 593 |
+
|
| 594 |
+
filepath = tempfile.mktemp(suffix="_CodeTribunal_Report.pdf")
|
| 595 |
+
pdf.output(filepath)
|
| 596 |
+
return filepath
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
# ---------------------------------------------------------------------------
|
| 600 |
+
# Gradio App
|
| 601 |
+
# ---------------------------------------------------------------------------
|
| 602 |
+
|
| 603 |
+
def create_app() -> gr.Blocks:
|
| 604 |
+
logo_path = Path(__file__).resolve().parent.parent.parent / "assets" / "logo.png"
|
| 605 |
+
|
| 606 |
+
with gr.Blocks(title="CodeTribunal — The AI Courtroom") as app:
|
| 607 |
+
|
| 608 |
+
# --- Hero Section ---
|
| 609 |
+
with gr.Column(visible=True) as hero_section:
|
| 610 |
+
if logo_path.exists():
|
| 611 |
+
gr.Image(
|
| 612 |
+
value=str(logo_path),
|
| 613 |
+
show_label=False,
|
| 614 |
+
height=160,
|
| 615 |
+
container=False,
|
| 616 |
+
elem_classes=["hero-logo"],
|
| 617 |
+
)
|
| 618 |
+
gr.Markdown(
|
| 619 |
+
"# CodeTribunal\n### The AI Courtroom That Exposes Bad Freelance Code",
|
| 620 |
+
elem_classes=["hero-title"],
|
| 621 |
+
)
|
| 622 |
+
gr.Markdown(
|
| 623 |
+
"Upload a .zip of code and watch a multi-agent forensic investigation unfold.\n"
|
| 624 |
+
"Powered by GLM 5 + GritQL + CrewAI.",
|
| 625 |
+
elem_classes=["hero-subtitle"],
|
| 626 |
+
)
|
| 627 |
+
|
| 628 |
+
# --- Upload Section ---
|
| 629 |
+
with gr.Column(visible=True, elem_classes=["upload-area"]) as upload_section:
|
| 630 |
+
code_input = gr.File(
|
| 631 |
+
label="Drop your .zip here or click to upload",
|
| 632 |
+
file_types=[".zip"],
|
| 633 |
+
interactive=True,
|
| 634 |
+
)
|
| 635 |
+
|
| 636 |
+
# --- Processing Section ---
|
| 637 |
+
with gr.Column(visible=False) as processing_section:
|
| 638 |
+
status_md = gr.Markdown(
|
| 639 |
+
"Initializing...",
|
| 640 |
+
elem_classes=["status-phase"],
|
| 641 |
+
)
|
| 642 |
+
evidence_html = gr.HTML(
|
| 643 |
+
value="",
|
| 644 |
+
visible=True,
|
| 645 |
+
)
|
| 646 |
+
chatbot = gr.Chatbot(
|
| 647 |
+
label="Courtroom Transcript",
|
| 648 |
+
height=600,
|
| 649 |
+
elem_classes=["chatbot-panel"],
|
| 650 |
+
)
|
| 651 |
+
verdict_md = gr.Markdown(
|
| 652 |
+
value="",
|
| 653 |
+
visible=True,
|
| 654 |
+
elem_classes=["verdict-box"],
|
| 655 |
+
)
|
| 656 |
+
with gr.Row(visible=False) as export_row:
|
| 657 |
+
export_md_btn = gr.Button(
|
| 658 |
+
"Export as Markdown",
|
| 659 |
+
elem_classes=["export-btn"],
|
| 660 |
+
)
|
| 661 |
+
export_pdf_btn = gr.Button(
|
| 662 |
+
"Export as PDF",
|
| 663 |
+
elem_classes=["export-btn"],
|
| 664 |
+
)
|
| 665 |
+
export_file = gr.File(label="Download Report", visible=False)
|
| 666 |
+
|
| 667 |
+
# Hidden state for export
|
| 668 |
+
results_state = gr.State(value={})
|
| 669 |
+
|
| 670 |
+
# --- Wire events ---
|
| 671 |
+
|
| 672 |
+
# Auto-trigger on file upload
|
| 673 |
+
code_input.upload(
|
| 674 |
+
fn=run_courtroom,
|
| 675 |
+
inputs=[code_input],
|
| 676 |
+
outputs=[
|
| 677 |
+
hero_section, upload_section, processing_section,
|
| 678 |
+
status_md, evidence_html, chatbot, verdict_md, export_row,
|
| 679 |
+
],
|
| 680 |
+
)
|
| 681 |
+
|
| 682 |
+
# Export callbacks
|
| 683 |
+
def _do_export_md(ev_html, chat, verdict):
|
| 684 |
+
results = _build_results_dict(ev_html, chat, verdict)
|
| 685 |
+
return generate_markdown_export(results)
|
| 686 |
+
|
| 687 |
+
def _do_export_pdf(ev_html, chat, verdict):
|
| 688 |
+
results = _build_results_dict(ev_html, chat, verdict)
|
| 689 |
+
return generate_pdf_export(results)
|
| 690 |
+
|
| 691 |
+
export_md_btn.click(
|
| 692 |
+
fn=_do_export_md,
|
| 693 |
+
inputs=[evidence_html, chatbot, verdict_md],
|
| 694 |
+
outputs=[export_file],
|
| 695 |
+
)
|
| 696 |
+
export_pdf_btn.click(
|
| 697 |
+
fn=_do_export_pdf,
|
| 698 |
+
inputs=[evidence_html, chatbot, verdict_md],
|
| 699 |
+
outputs=[export_file],
|
| 700 |
+
)
|
| 701 |
+
|
| 702 |
+
return app
|
| 703 |
+
|
| 704 |
+
|
| 705 |
+
def main():
|
| 706 |
+
app = create_app()
|
| 707 |
+
app.launch(
|
| 708 |
+
server_name="0.0.0.0",
|
| 709 |
+
server_port=7860,
|
| 710 |
+
css=CUSTOM_CSS,
|
| 711 |
+
theme=gr.themes.Base(
|
| 712 |
+
primary_hue="amber",
|
| 713 |
+
secondary_hue="slate",
|
| 714 |
+
neutral_hue="slate",
|
| 715 |
+
),
|
| 716 |
+
)
|
| 717 |
+
|
| 718 |
+
|
| 719 |
+
if __name__ == "__main__":
|
| 720 |
+
main()
|
src/code_tribunal/cli.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""CLI entry point for CodeTribunal."""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import click
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
from code_tribunal.courtroom import run_trial
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
@click.command()
|
| 11 |
+
@click.argument("path", type=click.Path(exists=True))
|
| 12 |
+
@click.option("--output", "-o", type=click.Path(), help="Save full report to file (JSON)")
|
| 13 |
+
@click.option("--evidence-only", is_flag=True, help="Only run Phase 1 (GritQL evidence), skip trial")
|
| 14 |
+
def main(path: str, output: str | None, evidence_only: bool) -> None:
|
| 15 |
+
"""Put your code on trial. PATH is the directory or zip to investigate."""
|
| 16 |
+
|
| 17 |
+
if evidence_only:
|
| 18 |
+
from code_tribunal.evidence import gather_evidence
|
| 19 |
+
report = gather_evidence(path)
|
| 20 |
+
click.echo(report.to_text())
|
| 21 |
+
if output:
|
| 22 |
+
Path(output).write_text(json.dumps({
|
| 23 |
+
"findings": [str(f) for f in report.findings],
|
| 24 |
+
"stats": {
|
| 25 |
+
"files": report.file_count,
|
| 26 |
+
"total": len(report.findings),
|
| 27 |
+
"by_severity": {s: len(i) for s, i in report.findings_by_severity.items()},
|
| 28 |
+
},
|
| 29 |
+
}, indent=2))
|
| 30 |
+
click.echo(f"\nReport saved to {output}")
|
| 31 |
+
return
|
| 32 |
+
|
| 33 |
+
result = run_trial(path)
|
| 34 |
+
|
| 35 |
+
if output:
|
| 36 |
+
Path(output).write_text(json.dumps(result, indent=2, default=str))
|
| 37 |
+
click.echo(f"\nFull report saved to {output}")
|
| 38 |
+
|
| 39 |
+
# Print verdict
|
| 40 |
+
click.echo("\n" + result.get("verdict", "No verdict generated."))
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
if __name__ == "__main__":
|
| 44 |
+
main()
|
src/code_tribunal/courtroom.py
ADDED
|
@@ -0,0 +1,618 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Courtroom pipeline orchestrator — wires evidence → investigation → trial → verdict."""
|
| 2 |
+
|
| 3 |
+
import json
|
| 4 |
+
import time
|
| 5 |
+
from dataclasses import asdict, dataclass, field
|
| 6 |
+
|
| 7 |
+
from crewai import Task, Crew, Process
|
| 8 |
+
|
| 9 |
+
from code_tribunal.agents import (
|
| 10 |
+
security_investigator,
|
| 11 |
+
quality_investigator,
|
| 12 |
+
architecture_investigator,
|
| 13 |
+
prosecutor,
|
| 14 |
+
defense_attorney,
|
| 15 |
+
judge,
|
| 16 |
+
)
|
| 17 |
+
from code_tribunal.evidence import gather_evidence, EvidenceReport
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
# ---------------------------------------------------------------------------
|
| 21 |
+
# Phase 1: Evidence (deterministic, no LLM)
|
| 22 |
+
# ---------------------------------------------------------------------------
|
| 23 |
+
|
| 24 |
+
def phase_evidence(target_dir: str) -> EvidenceReport:
|
| 25 |
+
"""Run GritQL scans and return structured evidence."""
|
| 26 |
+
print("\n[Phase 1] Gathering evidence with GritQL...")
|
| 27 |
+
report = gather_evidence(target_dir)
|
| 28 |
+
print(f" Files scanned: {report.file_count}")
|
| 29 |
+
print(f" Findings: {len(report.findings)}")
|
| 30 |
+
print(f" By severity: " + ", ".join(
|
| 31 |
+
f"{sev}={len(items)}" for sev, items in sorted(report.findings_by_severity.items())
|
| 32 |
+
))
|
| 33 |
+
return report
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
# ---------------------------------------------------------------------------
|
| 37 |
+
# Phase 2: Investigation (3 parallel investigators)
|
| 38 |
+
# ---------------------------------------------------------------------------
|
| 39 |
+
|
| 40 |
+
def _domain_evidence_text(report: EvidenceReport, domain: str) -> str:
|
| 41 |
+
"""Extract findings for a specific domain as text."""
|
| 42 |
+
findings = report.findings_by_domain.get(domain, [])
|
| 43 |
+
if not findings:
|
| 44 |
+
return f"No {domain} findings detected."
|
| 45 |
+
return "\n".join(str(f) for f in findings)
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def phase_investigation(report: EvidenceReport) -> dict[str, str]:
|
| 49 |
+
"""Run three specialist investigators in parallel."""
|
| 50 |
+
|
| 51 |
+
print("\n[Phase 2] Investigation — deploying specialist agents...")
|
| 52 |
+
|
| 53 |
+
sec_agent = security_investigator()
|
| 54 |
+
qual_agent = quality_investigator()
|
| 55 |
+
arch_agent = architecture_investigator()
|
| 56 |
+
|
| 57 |
+
full_evidence = report.to_text()
|
| 58 |
+
|
| 59 |
+
sec_task = Task(
|
| 60 |
+
description=(
|
| 61 |
+
"You are investigating a codebase for security vulnerabilities.\n\n"
|
| 62 |
+
"SECURITY EVIDENCE:\n"
|
| 63 |
+
f"{_domain_evidence_text(report, 'security')}\n\n"
|
| 64 |
+
"FULL EVIDENCE REPORT FOR CONTEXT:\n"
|
| 65 |
+
f"{full_evidence}\n\n"
|
| 66 |
+
"Produce a detailed security investigation report. For each finding:\n"
|
| 67 |
+
"- What the vulnerability is\n"
|
| 68 |
+
"- The attack vector (how it could be exploited)\n"
|
| 69 |
+
"- Severity: CRITICAL / HIGH / MEDIUM / LOW\n"
|
| 70 |
+
"- Potential business impact\n"
|
| 71 |
+
"- Recommended fix"
|
| 72 |
+
),
|
| 73 |
+
agent=sec_agent,
|
| 74 |
+
expected_output="A structured security investigation report with severity-ranked findings and remediation.",
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
qual_task = Task(
|
| 78 |
+
description=(
|
| 79 |
+
"You are investigating a codebase for quality and negligence indicators.\n\n"
|
| 80 |
+
"QUALITY EVIDENCE:\n"
|
| 81 |
+
f"{_domain_evidence_text(report, 'quality')}\n\n"
|
| 82 |
+
"FULL EVIDENCE REPORT FOR CONTEXT:\n"
|
| 83 |
+
f"{full_evidence}\n\n"
|
| 84 |
+
"Produce a quality investigation report. Assess:\n"
|
| 85 |
+
"- Technical debt indicators (TODOs, FIXMEs, HACKs)\n"
|
| 86 |
+
"- Dead code / unused functions\n"
|
| 87 |
+
"- Missing error handling\n"
|
| 88 |
+
"- Signs of rushed or careless development\n"
|
| 89 |
+
"- Whether the code was production-ready when delivered"
|
| 90 |
+
),
|
| 91 |
+
agent=qual_agent,
|
| 92 |
+
expected_output="A structured quality investigation report identifying negligence indicators and technical debt.",
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
arch_task = Task(
|
| 96 |
+
description=(
|
| 97 |
+
"You are investigating a codebase for architectural problems.\n\n"
|
| 98 |
+
"FULL EVIDENCE REPORT:\n"
|
| 99 |
+
f"{full_evidence}\n\n"
|
| 100 |
+
"Produce an architecture investigation report. Assess:\n"
|
| 101 |
+
"- Hardcoded configuration that should be externalized\n"
|
| 102 |
+
"- Tight coupling and missing abstractions\n"
|
| 103 |
+
"- Whether the architecture supports the intended use case\n"
|
| 104 |
+
"- Scalability concerns\n"
|
| 105 |
+
"- Whether this looks like professional work or amateur delivery"
|
| 106 |
+
),
|
| 107 |
+
agent=arch_agent,
|
| 108 |
+
expected_output="A structured architecture investigation report assessing structural soundness.",
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
# Run investigators in a single crew — CrewAI will execute tasks sequentially
|
| 112 |
+
# (parallel crews would require separate Crew instances kicked off concurrently)
|
| 113 |
+
investigation_crew = Crew(
|
| 114 |
+
agents=[sec_agent, qual_agent, arch_agent],
|
| 115 |
+
tasks=[sec_task, qual_task, arch_task],
|
| 116 |
+
process=Process.sequential,
|
| 117 |
+
verbose=True,
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
result = investigation_crew.kickoff()
|
| 121 |
+
|
| 122 |
+
# Extract individual task outputs
|
| 123 |
+
reports = {}
|
| 124 |
+
task_outputs = result.tasks_output if hasattr(result, "tasks_output") else []
|
| 125 |
+
labels = ["security", "quality", "architecture"]
|
| 126 |
+
for i, label in enumerate(labels):
|
| 127 |
+
if i < len(task_outputs):
|
| 128 |
+
reports[label] = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
|
| 129 |
+
else:
|
| 130 |
+
reports[label] = ""
|
| 131 |
+
|
| 132 |
+
print(f" Investigation complete: {len(reports)} reports generated.")
|
| 133 |
+
return reports
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# ---------------------------------------------------------------------------
|
| 137 |
+
# Phase 3: The Trial (prosecutor vs defense)
|
| 138 |
+
# ---------------------------------------------------------------------------
|
| 139 |
+
|
| 140 |
+
def phase_trial(evidence_text: str, investigation_reports: dict[str, str]) -> str:
|
| 141 |
+
"""Run the courtroom debate between prosecutor and defense attorney."""
|
| 142 |
+
|
| 143 |
+
print("\n[Phase 3] The Trial — Prosecutor vs Defense Attorney...")
|
| 144 |
+
|
| 145 |
+
# Separate agent instances for each role (no reuse)
|
| 146 |
+
pros_agent = prosecutor()
|
| 147 |
+
def_agent = defense_attorney()
|
| 148 |
+
pros_rebuttal_agent = prosecutor() # fresh instance for rebuttal
|
| 149 |
+
|
| 150 |
+
investigation_text = "\n\n".join(
|
| 151 |
+
f"=== {k.upper()} INVESTIGATION ===\n{v}"
|
| 152 |
+
for k, v in investigation_reports.items()
|
| 153 |
+
)
|
| 154 |
+
|
| 155 |
+
# Round 1: Prosecutor presents the case
|
| 156 |
+
prosecution_task = Task(
|
| 157 |
+
description=(
|
| 158 |
+
"PRESENT THE PROSECUTION'S CASE\n\n"
|
| 159 |
+
"You are presenting evidence against a freelance developer who delivered this code to a paying client.\n\n"
|
| 160 |
+
"RAW EVIDENCE:\n"
|
| 161 |
+
f"{evidence_text}\n\n"
|
| 162 |
+
"INVESTIGATION REPORTS:\n"
|
| 163 |
+
f"{investigation_text}\n\n"
|
| 164 |
+
"Build your case. Be specific. Cite findings by category, severity, and potential impact. "
|
| 165 |
+
"Argue that this code represents negligence, not mere imperfection."
|
| 166 |
+
),
|
| 167 |
+
agent=pros_agent,
|
| 168 |
+
expected_output="A compelling prosecution argument citing specific evidence and arguing negligence.",
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
# Round 2: Defense cross-examines — receives prosecution output via context
|
| 172 |
+
defense_task = Task(
|
| 173 |
+
description=(
|
| 174 |
+
"PRESENT THE DEFENSE\n\n"
|
| 175 |
+
"The prosecution has presented their case against this code. "
|
| 176 |
+
"Below is the PROSECUTION'S ARGUMENT — read it carefully, then mount your defense.\n\n"
|
| 177 |
+
"RAW EVIDENCE:\n"
|
| 178 |
+
f"{evidence_text}\n\n"
|
| 179 |
+
"INVESTIGATION REPORTS:\n"
|
| 180 |
+
f"{investigation_text}\n\n"
|
| 181 |
+
"Challenge the prosecution's specific claims. Argue context, proportionality, and intent. "
|
| 182 |
+
"Not every issue is negligence. Some patterns are acceptable in certain contexts. "
|
| 183 |
+
"Be honest but vigorous."
|
| 184 |
+
),
|
| 185 |
+
agent=def_agent,
|
| 186 |
+
context=[prosecution_task], # Defense sees the prosecution's output
|
| 187 |
+
expected_output="A vigorous defense argument challenging the prosecution's claims with context and proportionality.",
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
# Round 3: Prosecutor rebuttal — receives defense output via context
|
| 191 |
+
rebuttal_task = Task(
|
| 192 |
+
description=(
|
| 193 |
+
"REBUTTAL\n\n"
|
| 194 |
+
"The defense has responded to your case. Below is the DEFENSE'S ARGUMENT. "
|
| 195 |
+
"Now deliver your rebuttal.\n\n"
|
| 196 |
+
"Address their strongest points. Where are they wrong? "
|
| 197 |
+
"Where are they minimizing real harm? "
|
| 198 |
+
"End with a closing argument for the judge."
|
| 199 |
+
),
|
| 200 |
+
agent=pros_rebuttal_agent,
|
| 201 |
+
context=[prosecution_task, defense_task], # Rebuttal sees both prior arguments
|
| 202 |
+
expected_output="A sharp rebuttal addressing the defense's arguments and closing the prosecution's case.",
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
trial_crew = Crew(
|
| 206 |
+
agents=[pros_agent, def_agent, pros_rebuttal_agent],
|
| 207 |
+
tasks=[prosecution_task, defense_task, rebuttal_task],
|
| 208 |
+
process=Process.sequential,
|
| 209 |
+
verbose=True,
|
| 210 |
+
)
|
| 211 |
+
|
| 212 |
+
result = trial_crew.kickoff()
|
| 213 |
+
|
| 214 |
+
# Collect the full trial transcript
|
| 215 |
+
transcript_parts = []
|
| 216 |
+
task_outputs = result.tasks_output if hasattr(result, "tasks_output") else []
|
| 217 |
+
round_names = ["PROSECUTION", "DEFENSE", "REBUTTAL"]
|
| 218 |
+
for i, name in enumerate(round_names):
|
| 219 |
+
if i < len(task_outputs):
|
| 220 |
+
raw = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
|
| 221 |
+
transcript_parts.append(f"=== {name} ===\n{raw}")
|
| 222 |
+
|
| 223 |
+
transcript = "\n\n".join(transcript_parts)
|
| 224 |
+
print(" Trial complete: 3 rounds of argument.")
|
| 225 |
+
return transcript
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
# ---------------------------------------------------------------------------
|
| 229 |
+
# Phase 4: The Verdict
|
| 230 |
+
# ---------------------------------------------------------------------------
|
| 231 |
+
|
| 232 |
+
def phase_verdict(evidence_text: str, investigation_text: str, trial_transcript: str) -> str:
|
| 233 |
+
"""Judge delivers the final verdict."""
|
| 234 |
+
|
| 235 |
+
print("\n[Phase 4] The Verdict — Judge deliberating...")
|
| 236 |
+
|
| 237 |
+
judge_agent = judge()
|
| 238 |
+
|
| 239 |
+
verdict_task = Task(
|
| 240 |
+
description=(
|
| 241 |
+
"DELIVER YOUR VERDICT\n\n"
|
| 242 |
+
"You have reviewed all evidence, investigation reports, and the full trial transcript.\n\n"
|
| 243 |
+
"RAW EVIDENCE:\n"
|
| 244 |
+
f"{evidence_text}\n\n"
|
| 245 |
+
"INVESTIGATION REPORTS:\n"
|
| 246 |
+
f"{investigation_text}\n\n"
|
| 247 |
+
"TRIAL TRANSCRIPT:\n"
|
| 248 |
+
f"{trial_transcript}\n\n"
|
| 249 |
+
"Deliver a structured verdict:\n\n"
|
| 250 |
+
"## VERDICT\n"
|
| 251 |
+
"Overall: [GUILTY / MIXED / NOT GUILTY]\n"
|
| 252 |
+
"Reputational Risk Score: [0-100]\n\n"
|
| 253 |
+
"## FINDINGS SUMMARY\n"
|
| 254 |
+
"For each finding: severity, impact, remediation\n\n"
|
| 255 |
+
"## SENTENCE\n"
|
| 256 |
+
"Your final assessment and recommendations for the client."
|
| 257 |
+
),
|
| 258 |
+
agent=judge_agent,
|
| 259 |
+
expected_output=(
|
| 260 |
+
"A structured verdict with overall ruling, reputational risk score (0-100), "
|
| 261 |
+
"findings summary, and final sentence."
|
| 262 |
+
),
|
| 263 |
+
)
|
| 264 |
+
|
| 265 |
+
verdict_crew = Crew(
|
| 266 |
+
agents=[judge_agent],
|
| 267 |
+
tasks=[verdict_task],
|
| 268 |
+
verbose=True,
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
result = verdict_crew.kickoff()
|
| 272 |
+
verdict = result.raw if hasattr(result, "raw") else str(result)
|
| 273 |
+
print(" Verdict delivered.")
|
| 274 |
+
return verdict
|
| 275 |
+
|
| 276 |
+
|
| 277 |
+
# ---------------------------------------------------------------------------
|
| 278 |
+
# Full Pipeline
|
| 279 |
+
# ---------------------------------------------------------------------------
|
| 280 |
+
|
| 281 |
+
def run_trial(target_dir: str) -> dict:
|
| 282 |
+
"""Execute the full CodeTribunal pipeline on a target directory."""
|
| 283 |
+
|
| 284 |
+
print("=" * 60)
|
| 285 |
+
print(" CODETRIBUNAL — THE AI COURTROOM")
|
| 286 |
+
print("=" * 60)
|
| 287 |
+
|
| 288 |
+
# Phase 1: Evidence
|
| 289 |
+
evidence_report = phase_evidence(target_dir)
|
| 290 |
+
evidence_text = evidence_report.to_text()
|
| 291 |
+
|
| 292 |
+
if not evidence_report.findings:
|
| 293 |
+
print("\nNo findings detected. Case dismissed — code appears clean.")
|
| 294 |
+
return {"verdict": "DISMISSED", "reason": "No evidence of issues found."}
|
| 295 |
+
|
| 296 |
+
# Phase 2: Investigation
|
| 297 |
+
investigation_reports = phase_investigation(evidence_report)
|
| 298 |
+
investigation_text = "\n\n".join(
|
| 299 |
+
f"=== {k.upper()} INVESTIGATION ===\n{v}"
|
| 300 |
+
for k, v in investigation_reports.items()
|
| 301 |
+
)
|
| 302 |
+
|
| 303 |
+
# Phase 3: Trial
|
| 304 |
+
trial_transcript = phase_trial(evidence_text, investigation_reports)
|
| 305 |
+
|
| 306 |
+
# Phase 4: Verdict
|
| 307 |
+
verdict = phase_verdict(evidence_text, investigation_text, trial_transcript)
|
| 308 |
+
|
| 309 |
+
print("\n" + "=" * 60)
|
| 310 |
+
print(" TRIAL COMPLETE")
|
| 311 |
+
print("=" * 60)
|
| 312 |
+
|
| 313 |
+
return {
|
| 314 |
+
"evidence": evidence_text,
|
| 315 |
+
"investigation": investigation_text,
|
| 316 |
+
"transcript": trial_transcript,
|
| 317 |
+
"verdict": verdict,
|
| 318 |
+
"stats": {
|
| 319 |
+
"files_scanned": evidence_report.file_count,
|
| 320 |
+
"total_findings": len(evidence_report.findings),
|
| 321 |
+
"by_severity": {
|
| 322 |
+
sev: len(items) for sev, items in evidence_report.findings_by_severity.items()
|
| 323 |
+
},
|
| 324 |
+
},
|
| 325 |
+
}
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
# ---------------------------------------------------------------------------
|
| 329 |
+
# Streaming variants (for Gradio UI — preserve existing functions for CLI)
|
| 330 |
+
# ---------------------------------------------------------------------------
|
| 331 |
+
|
| 332 |
+
@dataclass
|
| 333 |
+
class StreamResult:
|
| 334 |
+
"""Mutable accumulator — populated after streaming completes."""
|
| 335 |
+
text: str = ""
|
| 336 |
+
metadata: dict = field(default_factory=dict)
|
| 337 |
+
|
| 338 |
+
|
| 339 |
+
def _simulate_stream(text: str, role: str, chunk_size: int = 4):
|
| 340 |
+
"""Fallback: simulate token-by-token streaming from a complete text."""
|
| 341 |
+
for i in range(0, len(text), chunk_size):
|
| 342 |
+
yield role, text[i : i + chunk_size]
|
| 343 |
+
time.sleep(0.01)
|
| 344 |
+
|
| 345 |
+
|
| 346 |
+
def phase_investigation_stream(report: "EvidenceReport", result: StreamResult):
|
| 347 |
+
"""Streaming variant of phase_investigation. Yields (agent_role, delta, task_index)."""
|
| 348 |
+
|
| 349 |
+
sec_agent = security_investigator()
|
| 350 |
+
qual_agent = quality_investigator()
|
| 351 |
+
arch_agent = architecture_investigator()
|
| 352 |
+
|
| 353 |
+
full_evidence = report.to_text()
|
| 354 |
+
|
| 355 |
+
sec_task = Task(
|
| 356 |
+
description=(
|
| 357 |
+
"You are investigating a codebase for security vulnerabilities.\n\n"
|
| 358 |
+
"SECURITY EVIDENCE:\n"
|
| 359 |
+
f"{_domain_evidence_text(report, 'security')}\n\n"
|
| 360 |
+
"FULL EVIDENCE REPORT FOR CONTEXT:\n"
|
| 361 |
+
f"{full_evidence}\n\n"
|
| 362 |
+
"Produce a detailed security investigation report. For each finding:\n"
|
| 363 |
+
"- What the vulnerability is\n"
|
| 364 |
+
"- The attack vector (how it could be exploited)\n"
|
| 365 |
+
"- Severity: CRITICAL / HIGH / MEDIUM / LOW\n"
|
| 366 |
+
"- Potential business impact\n"
|
| 367 |
+
"- Recommended fix"
|
| 368 |
+
),
|
| 369 |
+
agent=sec_agent,
|
| 370 |
+
expected_output="A structured security investigation report with severity-ranked findings and remediation.",
|
| 371 |
+
)
|
| 372 |
+
|
| 373 |
+
qual_task = Task(
|
| 374 |
+
description=(
|
| 375 |
+
"You are investigating a codebase for quality and negligence indicators.\n\n"
|
| 376 |
+
"QUALITY EVIDENCE:\n"
|
| 377 |
+
f"{_domain_evidence_text(report, 'quality')}\n\n"
|
| 378 |
+
"FULL EVIDENCE REPORT FOR CONTEXT:\n"
|
| 379 |
+
f"{full_evidence}\n\n"
|
| 380 |
+
"Produce a quality investigation report. Assess:\n"
|
| 381 |
+
"- Technical debt indicators (TODOs, FIXMEs, HACKs)\n"
|
| 382 |
+
"- Dead code / unused functions\n"
|
| 383 |
+
"- Missing error handling\n"
|
| 384 |
+
"- Signs of rushed or careless development\n"
|
| 385 |
+
"- Whether the code was production-ready when delivered"
|
| 386 |
+
),
|
| 387 |
+
agent=qual_agent,
|
| 388 |
+
expected_output="A structured quality investigation report identifying negligence indicators and technical debt.",
|
| 389 |
+
)
|
| 390 |
+
|
| 391 |
+
arch_task = Task(
|
| 392 |
+
description=(
|
| 393 |
+
"You are investigating a codebase for architectural problems.\n\n"
|
| 394 |
+
"FULL EVIDENCE REPORT:\n"
|
| 395 |
+
f"{full_evidence}\n\n"
|
| 396 |
+
"Produce an architecture investigation report. Assess:\n"
|
| 397 |
+
"- Hardcoded configuration that should be externalized\n"
|
| 398 |
+
"- Tight coupling and missing abstractions\n"
|
| 399 |
+
"- Whether the architecture supports the intended use case\n"
|
| 400 |
+
"- Scalability concerns\n"
|
| 401 |
+
"- Whether this looks like professional work or amateur delivery"
|
| 402 |
+
),
|
| 403 |
+
agent=arch_agent,
|
| 404 |
+
expected_output="A structured architecture investigation report assessing structural soundness.",
|
| 405 |
+
)
|
| 406 |
+
|
| 407 |
+
try:
|
| 408 |
+
investigation_crew = Crew(
|
| 409 |
+
agents=[sec_agent, qual_agent, arch_agent],
|
| 410 |
+
tasks=[sec_task, qual_task, arch_task],
|
| 411 |
+
process=Process.sequential,
|
| 412 |
+
verbose=True,
|
| 413 |
+
stream=True,
|
| 414 |
+
)
|
| 415 |
+
|
| 416 |
+
accumulated = {"security": "", "quality": "", "architecture": ""}
|
| 417 |
+
labels = ["security", "quality", "architecture"]
|
| 418 |
+
|
| 419 |
+
streaming_output = investigation_crew.kickoff()
|
| 420 |
+
for chunk in streaming_output:
|
| 421 |
+
delta = chunk.content or ""
|
| 422 |
+
task_idx = chunk.task_index or 0
|
| 423 |
+
role = chunk.agent_role or "Investigator"
|
| 424 |
+
if task_idx < len(labels):
|
| 425 |
+
accumulated[labels[task_idx]] += delta
|
| 426 |
+
yield (role, delta, task_idx)
|
| 427 |
+
|
| 428 |
+
# Extract final task outputs
|
| 429 |
+
crew_result = streaming_output.result
|
| 430 |
+
task_outputs = crew_result.tasks_output if hasattr(crew_result, "tasks_output") else []
|
| 431 |
+
for i, label in enumerate(labels):
|
| 432 |
+
if i < len(task_outputs):
|
| 433 |
+
raw = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
|
| 434 |
+
accumulated[label] = raw
|
| 435 |
+
|
| 436 |
+
result.text = "\n\n".join(
|
| 437 |
+
f"=== {k.upper()} INVESTIGATION ===\n{v}"
|
| 438 |
+
for k, v in accumulated.items()
|
| 439 |
+
)
|
| 440 |
+
result.metadata["reports"] = accumulated
|
| 441 |
+
|
| 442 |
+
except Exception:
|
| 443 |
+
# Fallback: run blocking and simulate streaming
|
| 444 |
+
reports = phase_investigation(report)
|
| 445 |
+
investigation_text = "\n\n".join(
|
| 446 |
+
f"=== {k.upper()} INVESTIGATION ===\n{v}"
|
| 447 |
+
for k, v in reports.items()
|
| 448 |
+
)
|
| 449 |
+
roles = ["Security Forensic Investigator", "Code Quality Forensic Investigator", "Architecture Forensic Investigator"]
|
| 450 |
+
for i, (domain, text) in enumerate(reports.items()):
|
| 451 |
+
for role, delta in _simulate_stream(text, roles[i]):
|
| 452 |
+
yield (role, delta, i)
|
| 453 |
+
result.text = investigation_text
|
| 454 |
+
result.metadata["reports"] = reports
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
def phase_trial_stream(evidence_text: str, investigation_reports: dict, result: StreamResult):
|
| 458 |
+
"""Streaming variant of phase_trial. Yields (agent_role, delta, round_name, task_index)."""
|
| 459 |
+
|
| 460 |
+
pros_agent = prosecutor()
|
| 461 |
+
def_agent = defense_attorney()
|
| 462 |
+
pros_rebuttal_agent = prosecutor()
|
| 463 |
+
|
| 464 |
+
investigation_text = "\n\n".join(
|
| 465 |
+
f"=== {k.upper()} INVESTIGATION ===\n{v}"
|
| 466 |
+
for k, v in investigation_reports.items()
|
| 467 |
+
)
|
| 468 |
+
|
| 469 |
+
prosecution_task = Task(
|
| 470 |
+
description=(
|
| 471 |
+
"PRESENT THE PROSECUTION'S CASE\n\n"
|
| 472 |
+
"You are presenting evidence against a freelance developer who delivered this code to a paying client.\n\n"
|
| 473 |
+
"RAW EVIDENCE:\n"
|
| 474 |
+
f"{evidence_text}\n\n"
|
| 475 |
+
"INVESTIGATION REPORTS:\n"
|
| 476 |
+
f"{investigation_text}\n\n"
|
| 477 |
+
"Build your case. Be specific. Cite findings by category, severity, and potential impact. "
|
| 478 |
+
"Argue that this code represents negligence, not mere imperfection."
|
| 479 |
+
),
|
| 480 |
+
agent=pros_agent,
|
| 481 |
+
expected_output="A compelling prosecution argument citing specific evidence and arguing negligence.",
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
defense_task = Task(
|
| 485 |
+
description=(
|
| 486 |
+
"PRESENT THE DEFENSE\n\n"
|
| 487 |
+
"The prosecution has presented their case against this code. "
|
| 488 |
+
"Below is the PROSECUTION'S ARGUMENT — read it carefully, then mount your defense.\n\n"
|
| 489 |
+
"RAW EVIDENCE:\n"
|
| 490 |
+
f"{evidence_text}\n\n"
|
| 491 |
+
"INVESTIGATION REPORTS:\n"
|
| 492 |
+
f"{investigation_text}\n\n"
|
| 493 |
+
"Challenge the prosecution's specific claims. Argue context, proportionality, and intent. "
|
| 494 |
+
"Not every issue is negligence. Some patterns are acceptable in certain contexts. "
|
| 495 |
+
"Be honest but vigorous."
|
| 496 |
+
),
|
| 497 |
+
agent=def_agent,
|
| 498 |
+
context=[prosecution_task],
|
| 499 |
+
expected_output="A vigorous defense argument challenging the prosecution's claims with context and proportionality.",
|
| 500 |
+
)
|
| 501 |
+
|
| 502 |
+
rebuttal_task = Task(
|
| 503 |
+
description=(
|
| 504 |
+
"REBUTTAL\n\n"
|
| 505 |
+
"The defense has responded to your case. Below is the DEFENSE'S ARGUMENT. "
|
| 506 |
+
"Now deliver your rebuttal.\n\n"
|
| 507 |
+
"Address their strongest points. Where are they wrong? "
|
| 508 |
+
"Where are they minimizing real harm? "
|
| 509 |
+
"End with a closing argument for the judge."
|
| 510 |
+
),
|
| 511 |
+
agent=pros_rebuttal_agent,
|
| 512 |
+
context=[prosecution_task, defense_task],
|
| 513 |
+
expected_output="A sharp rebuttal addressing the defense's arguments and closing the prosecution's case.",
|
| 514 |
+
)
|
| 515 |
+
|
| 516 |
+
round_names = ["Prosecution", "Defense", "Rebuttal"]
|
| 517 |
+
|
| 518 |
+
try:
|
| 519 |
+
trial_crew = Crew(
|
| 520 |
+
agents=[pros_agent, def_agent, pros_rebuttal_agent],
|
| 521 |
+
tasks=[prosecution_task, defense_task, rebuttal_task],
|
| 522 |
+
process=Process.sequential,
|
| 523 |
+
verbose=True,
|
| 524 |
+
stream=True,
|
| 525 |
+
)
|
| 526 |
+
|
| 527 |
+
accumulated_rounds = ["", "", ""]
|
| 528 |
+
|
| 529 |
+
streaming_output = trial_crew.kickoff()
|
| 530 |
+
for chunk in streaming_output:
|
| 531 |
+
delta = chunk.content or ""
|
| 532 |
+
task_idx = chunk.task_index or 0
|
| 533 |
+
role = chunk.agent_role or "Unknown"
|
| 534 |
+
round_name = round_names[task_idx] if task_idx < len(round_names) else f"Round {task_idx}"
|
| 535 |
+
accumulated_rounds[task_idx] += delta
|
| 536 |
+
yield (role, delta, round_name, task_idx)
|
| 537 |
+
|
| 538 |
+
crew_result = streaming_output.result
|
| 539 |
+
task_outputs = crew_result.tasks_output if hasattr(crew_result, "tasks_output") else []
|
| 540 |
+
for i in range(min(len(round_names), len(task_outputs))):
|
| 541 |
+
raw = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
|
| 542 |
+
accumulated_rounds[i] = raw
|
| 543 |
+
|
| 544 |
+
transcript = "\n\n".join(
|
| 545 |
+
f"=== {name} ===\n{text}"
|
| 546 |
+
for name, text in zip(round_names, accumulated_rounds)
|
| 547 |
+
)
|
| 548 |
+
result.text = transcript
|
| 549 |
+
|
| 550 |
+
except Exception:
|
| 551 |
+
# Fallback
|
| 552 |
+
transcript = phase_trial(evidence_text, investigation_reports)
|
| 553 |
+
roles = ["The Prosecutor", "The Defense Attorney", "The Prosecutor"]
|
| 554 |
+
for section in transcript.split("\n\n"):
|
| 555 |
+
for round_name in round_names:
|
| 556 |
+
if section.startswith(f"=== {round_name}"):
|
| 557 |
+
content = section.replace(f"=== {round_name} ===", "").strip()
|
| 558 |
+
task_idx = round_names.index(round_name)
|
| 559 |
+
for role, delta in _simulate_stream(content, roles[task_idx]):
|
| 560 |
+
yield (role, delta, round_name, task_idx)
|
| 561 |
+
break
|
| 562 |
+
result.text = transcript
|
| 563 |
+
|
| 564 |
+
|
| 565 |
+
def phase_verdict_stream(evidence_text: str, investigation_text: str, trial_transcript: str, result: StreamResult):
|
| 566 |
+
"""Streaming variant of phase_verdict. Yields (agent_role, delta)."""
|
| 567 |
+
|
| 568 |
+
judge_agent = judge()
|
| 569 |
+
|
| 570 |
+
verdict_task = Task(
|
| 571 |
+
description=(
|
| 572 |
+
"DELIVER YOUR VERDICT\n\n"
|
| 573 |
+
"You have reviewed all evidence, investigation reports, and the full trial transcript.\n\n"
|
| 574 |
+
"RAW EVIDENCE:\n"
|
| 575 |
+
f"{evidence_text}\n\n"
|
| 576 |
+
"INVESTIGATION REPORTS:\n"
|
| 577 |
+
f"{investigation_text}\n\n"
|
| 578 |
+
"TRIAL TRANSCRIPT:\n"
|
| 579 |
+
f"{trial_transcript}\n\n"
|
| 580 |
+
"Deliver a structured verdict:\n\n"
|
| 581 |
+
"## VERDICT\n"
|
| 582 |
+
"Overall: [GUILTY / MIXED / NOT GUILTY]\n"
|
| 583 |
+
"Reputational Risk Score: [0-100]\n\n"
|
| 584 |
+
"## FINDINGS SUMMARY\n"
|
| 585 |
+
"For each finding: severity, impact, remediation\n\n"
|
| 586 |
+
"## SENTENCE\n"
|
| 587 |
+
"Your final assessment and recommendations for the client."
|
| 588 |
+
),
|
| 589 |
+
agent=judge_agent,
|
| 590 |
+
expected_output=(
|
| 591 |
+
"A structured verdict with overall ruling, reputational risk score (0-100), "
|
| 592 |
+
"findings summary, and final sentence."
|
| 593 |
+
),
|
| 594 |
+
)
|
| 595 |
+
|
| 596 |
+
try:
|
| 597 |
+
verdict_crew = Crew(
|
| 598 |
+
agents=[judge_agent],
|
| 599 |
+
tasks=[verdict_task],
|
| 600 |
+
verbose=True,
|
| 601 |
+
stream=True,
|
| 602 |
+
)
|
| 603 |
+
|
| 604 |
+
streaming_output = verdict_crew.kickoff()
|
| 605 |
+
for chunk in streaming_output:
|
| 606 |
+
delta = chunk.content or ""
|
| 607 |
+
role = chunk.agent_role or "The Judge"
|
| 608 |
+
yield (role, delta)
|
| 609 |
+
|
| 610 |
+
crew_result = streaming_output.result
|
| 611 |
+
result.text = crew_result.raw if hasattr(crew_result, "raw") else str(crew_result)
|
| 612 |
+
|
| 613 |
+
except Exception:
|
| 614 |
+
# Fallback
|
| 615 |
+
verdict = phase_verdict(evidence_text, investigation_text, trial_transcript)
|
| 616 |
+
for role, delta in _simulate_stream(verdict, "The Judge"):
|
| 617 |
+
yield (role, delta)
|
| 618 |
+
result.text = verdict
|
src/code_tribunal/evidence.py
ADDED
|
@@ -0,0 +1,337 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Evidence gathering layer using GritQL for deterministic code analysis."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import subprocess
|
| 5 |
+
from dataclasses import dataclass, field
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
|
| 8 |
+
from dotenv import load_dotenv
|
| 9 |
+
|
| 10 |
+
load_dotenv(Path(__file__).resolve().parent.parent.parent / ".env")
|
| 11 |
+
|
| 12 |
+
GRITQL_PATTERNS = [
|
| 13 |
+
# --- Hardcoded secrets (specific var names that reliably match) ---
|
| 14 |
+
{
|
| 15 |
+
"category": "secret_password",
|
| 16 |
+
"pattern": 'or { `DB_PASSWORD = $_`, `PASSWORD = $_`, `$PASS = $_` where { $PASS <: r"(?i).*password" } }',
|
| 17 |
+
"language": "python",
|
| 18 |
+
"severity_hint": "CRITICAL",
|
| 19 |
+
"domain": "security",
|
| 20 |
+
},
|
| 21 |
+
{
|
| 22 |
+
"category": "secret_api_key",
|
| 23 |
+
"pattern": 'or { `API_KEY = $_`, `SECRET_KEY = $_`, `STRIPE_KEY = $_` }',
|
| 24 |
+
"language": "python",
|
| 25 |
+
"severity_hint": "CRITICAL",
|
| 26 |
+
"domain": "security",
|
| 27 |
+
},
|
| 28 |
+
{
|
| 29 |
+
"category": "secret_aws",
|
| 30 |
+
"pattern": '`AWS_SECRET = $_`',
|
| 31 |
+
"language": "python",
|
| 32 |
+
"severity_hint": "CRITICAL",
|
| 33 |
+
"domain": "security",
|
| 34 |
+
},
|
| 35 |
+
{
|
| 36 |
+
"category": "secret_js",
|
| 37 |
+
"pattern": 'or { `STRIPE_KEY = $_`, `JWT_SECRET = $_` }',
|
| 38 |
+
"language": None,
|
| 39 |
+
"severity_hint": "CRITICAL",
|
| 40 |
+
"domain": "security",
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"category": "connection_string",
|
| 44 |
+
"pattern": '`self.connection_string = "$CONN"` where { $CONN <: r"mysql://.+" }',
|
| 45 |
+
"language": "python",
|
| 46 |
+
"severity_hint": "CRITICAL",
|
| 47 |
+
"domain": "security",
|
| 48 |
+
},
|
| 49 |
+
# --- TODO / FIXME / HACK ---
|
| 50 |
+
{
|
| 51 |
+
"category": "todo_py",
|
| 52 |
+
"pattern": "`# TODO: $_`",
|
| 53 |
+
"language": "python",
|
| 54 |
+
"severity_hint": "LOW",
|
| 55 |
+
"domain": "quality",
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"category": "todo_js",
|
| 59 |
+
"pattern": "`// TODO: $_`",
|
| 60 |
+
"language": None,
|
| 61 |
+
"severity_hint": "LOW",
|
| 62 |
+
"domain": "quality",
|
| 63 |
+
},
|
| 64 |
+
{
|
| 65 |
+
"category": "fixme_py",
|
| 66 |
+
"pattern": "`# FIXME: $_`",
|
| 67 |
+
"language": "python",
|
| 68 |
+
"severity_hint": "MEDIUM",
|
| 69 |
+
"domain": "quality",
|
| 70 |
+
},
|
| 71 |
+
{
|
| 72 |
+
"category": "fixme_js",
|
| 73 |
+
"pattern": "`// FIXME: $_`",
|
| 74 |
+
"language": None,
|
| 75 |
+
"severity_hint": "MEDIUM",
|
| 76 |
+
"domain": "quality",
|
| 77 |
+
},
|
| 78 |
+
{
|
| 79 |
+
"category": "hack_py",
|
| 80 |
+
"pattern": "`# HACK: $_`",
|
| 81 |
+
"language": "python",
|
| 82 |
+
"severity_hint": "MEDIUM",
|
| 83 |
+
"domain": "quality",
|
| 84 |
+
},
|
| 85 |
+
{
|
| 86 |
+
"category": "hack_js",
|
| 87 |
+
"pattern": "`// HACK: $_`",
|
| 88 |
+
"language": None,
|
| 89 |
+
"severity_hint": "MEDIUM",
|
| 90 |
+
"domain": "quality",
|
| 91 |
+
},
|
| 92 |
+
# --- Dangerous functions ---
|
| 93 |
+
{
|
| 94 |
+
"category": "eval_usage",
|
| 95 |
+
"pattern": "`eval($_)`",
|
| 96 |
+
"language": "python",
|
| 97 |
+
"severity_hint": "CRITICAL",
|
| 98 |
+
"domain": "security",
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"category": "pickle_load",
|
| 102 |
+
"pattern": "`pickle.load($_)`",
|
| 103 |
+
"language": "python",
|
| 104 |
+
"severity_hint": "CRITICAL",
|
| 105 |
+
"domain": "security",
|
| 106 |
+
},
|
| 107 |
+
{
|
| 108 |
+
"category": "os_system",
|
| 109 |
+
"pattern": "`os.system($_)`",
|
| 110 |
+
"language": "python",
|
| 111 |
+
"severity_hint": "CRITICAL",
|
| 112 |
+
"domain": "security",
|
| 113 |
+
},
|
| 114 |
+
{
|
| 115 |
+
"category": "subprocess_shell",
|
| 116 |
+
"pattern": "`subprocess.call($_, shell=True)`",
|
| 117 |
+
"language": "python",
|
| 118 |
+
"severity_hint": "CRITICAL",
|
| 119 |
+
"domain": "security",
|
| 120 |
+
},
|
| 121 |
+
{
|
| 122 |
+
"category": "md5_hash",
|
| 123 |
+
"pattern": "`hashlib.md5($_)`",
|
| 124 |
+
"language": "python",
|
| 125 |
+
"severity_hint": "HIGH",
|
| 126 |
+
"domain": "security",
|
| 127 |
+
},
|
| 128 |
+
# --- SQL injection ---
|
| 129 |
+
{
|
| 130 |
+
"category": "sql_injection_fstring",
|
| 131 |
+
"pattern": r'`$S` where { $S <: r"f\"SELECT.*\{.*\}\"" }',
|
| 132 |
+
"language": "python",
|
| 133 |
+
"severity_hint": "CRITICAL",
|
| 134 |
+
"domain": "security",
|
| 135 |
+
},
|
| 136 |
+
{
|
| 137 |
+
"category": "sql_injection_js",
|
| 138 |
+
"pattern": r'`$STR` where { $STR <: r"`SELECT.*\$\{.*\}`" }',
|
| 139 |
+
"language": None,
|
| 140 |
+
"severity_hint": "CRITICAL",
|
| 141 |
+
"domain": "security",
|
| 142 |
+
},
|
| 143 |
+
]
|
| 144 |
+
|
| 145 |
+
|
| 146 |
+
@dataclass
|
| 147 |
+
class Finding:
|
| 148 |
+
"""A single finding from the evidence layer."""
|
| 149 |
+
|
| 150 |
+
category: str
|
| 151 |
+
file: str
|
| 152 |
+
line: str
|
| 153 |
+
code: str
|
| 154 |
+
severity_hint: str
|
| 155 |
+
domain: str
|
| 156 |
+
|
| 157 |
+
def __str__(self) -> str:
|
| 158 |
+
return f"[{self.severity_hint}] {self.file}:{self.line.strip()} — {self.code.strip()}"
|
| 159 |
+
|
| 160 |
+
|
| 161 |
+
@dataclass
|
| 162 |
+
class EvidenceReport:
|
| 163 |
+
"""Aggregated evidence from all GritQL scans."""
|
| 164 |
+
|
| 165 |
+
target_path: str
|
| 166 |
+
findings: list[Finding] = field(default_factory=list)
|
| 167 |
+
file_count: int = 0
|
| 168 |
+
total_patterns: int = 0
|
| 169 |
+
patterns_with_hits: int = 0
|
| 170 |
+
|
| 171 |
+
@property
|
| 172 |
+
def findings_by_domain(self) -> dict[str, list[Finding]]:
|
| 173 |
+
grouped: dict[str, list[Finding]] = {}
|
| 174 |
+
for f in self.findings:
|
| 175 |
+
grouped.setdefault(f.domain, []).append(f)
|
| 176 |
+
return grouped
|
| 177 |
+
|
| 178 |
+
@property
|
| 179 |
+
def findings_by_severity(self) -> dict[str, list[Finding]]:
|
| 180 |
+
grouped: dict[str, list[Finding]] = {}
|
| 181 |
+
for f in self.findings:
|
| 182 |
+
grouped.setdefault(f.severity_hint, []).append(f)
|
| 183 |
+
return grouped
|
| 184 |
+
|
| 185 |
+
def to_text(self) -> str:
|
| 186 |
+
"""Format the full report as text for agent context."""
|
| 187 |
+
lines = [f"=== FORENSIC EVIDENCE REPORT ==="]
|
| 188 |
+
lines.append(f"Target: {self.target_path}")
|
| 189 |
+
lines.append(f"Files scanned: {self.file_count}")
|
| 190 |
+
lines.append(f"Total findings: {len(self.findings)}")
|
| 191 |
+
lines.append("")
|
| 192 |
+
|
| 193 |
+
for domain, findings in self.findings_by_domain.items():
|
| 194 |
+
lines.append(f"--- {domain.upper()} EVIDENCE ({len(findings)} findings) ---")
|
| 195 |
+
for f in findings:
|
| 196 |
+
lines.append(str(f))
|
| 197 |
+
lines.append("")
|
| 198 |
+
|
| 199 |
+
return "\n".join(lines)
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
def _parse_gritql_output(raw: str) -> list[tuple[str, str, str]]:
|
| 203 |
+
"""Parse grit CLI output into (file, line_number, code_snippet) tuples."""
|
| 204 |
+
results = []
|
| 205 |
+
current_file = None
|
| 206 |
+
for line in raw.splitlines():
|
| 207 |
+
stripped = line.rstrip()
|
| 208 |
+
if not stripped:
|
| 209 |
+
continue
|
| 210 |
+
# Skip summary lines like "Processed X files and found Y matches"
|
| 211 |
+
if stripped.startswith("Processed") and "files" in stripped:
|
| 212 |
+
continue
|
| 213 |
+
# File paths: no leading whitespace, contain a dot or slash
|
| 214 |
+
if stripped and not stripped[0].isspace() and ("." in stripped or "/" in stripped):
|
| 215 |
+
current_file = stripped
|
| 216 |
+
elif current_file and stripped and stripped[0].isspace():
|
| 217 |
+
# Indented line = finding: " 80 return eval(expression)"
|
| 218 |
+
content = stripped.strip()
|
| 219 |
+
if content and content[0].isdigit():
|
| 220 |
+
parts = content.split(None, 1)
|
| 221 |
+
if parts:
|
| 222 |
+
line_num = parts[0]
|
| 223 |
+
code = parts[1] if len(parts) > 1 else ""
|
| 224 |
+
results.append((current_file, line_num, code))
|
| 225 |
+
return results
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
def run_gritql_scan(pattern_def: dict, target_dir: str) -> list[Finding]:
|
| 229 |
+
"""Run a single GritQL pattern and return structured findings."""
|
| 230 |
+
# --dry-run ensures no files are modified; --language overrides auto-detection
|
| 231 |
+
cmd = ["grit", "apply", "--dry-run", pattern_def["pattern"], target_dir]
|
| 232 |
+
if pattern_def.get("language"):
|
| 233 |
+
cmd += ["--language", pattern_def["language"]]
|
| 234 |
+
|
| 235 |
+
try:
|
| 236 |
+
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
| 237 |
+
except FileNotFoundError:
|
| 238 |
+
raise RuntimeError("'grit' CLI not found. Install with: npm install -g @getgrit/cli")
|
| 239 |
+
except subprocess.TimeoutExpired:
|
| 240 |
+
return []
|
| 241 |
+
|
| 242 |
+
output = result.stdout.strip()
|
| 243 |
+
if not output:
|
| 244 |
+
return []
|
| 245 |
+
|
| 246 |
+
# "found 0 matches" is in stdout — bail if no actual matches
|
| 247 |
+
if "found 0 matches" in output:
|
| 248 |
+
return []
|
| 249 |
+
|
| 250 |
+
matches = _parse_gritql_output(output)
|
| 251 |
+
findings = []
|
| 252 |
+
for file_path, line_num, code in matches:
|
| 253 |
+
findings.append(
|
| 254 |
+
Finding(
|
| 255 |
+
category=pattern_def["category"],
|
| 256 |
+
file=file_path,
|
| 257 |
+
line=line_num,
|
| 258 |
+
code=code,
|
| 259 |
+
severity_hint=pattern_def["severity_hint"],
|
| 260 |
+
domain=pattern_def["domain"],
|
| 261 |
+
)
|
| 262 |
+
)
|
| 263 |
+
return findings
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
def _ensure_grit_initialized(target_dir: str) -> None:
|
| 267 |
+
"""Run 'grit init' if no .grit directory exists, to enable standard library patterns."""
|
| 268 |
+
grit_dir = Path(target_dir) / ".grit"
|
| 269 |
+
if not grit_dir.exists():
|
| 270 |
+
try:
|
| 271 |
+
subprocess.run(
|
| 272 |
+
["grit", "init"],
|
| 273 |
+
cwd=target_dir,
|
| 274 |
+
capture_output=True,
|
| 275 |
+
timeout=15,
|
| 276 |
+
)
|
| 277 |
+
except Exception:
|
| 278 |
+
pass # Non-critical; some patterns may still work without init
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
def gather_evidence(target_dir: str) -> EvidenceReport:
|
| 282 |
+
"""Run all GritQL patterns and return a structured evidence report."""
|
| 283 |
+
_ensure_grit_initialized(target_dir)
|
| 284 |
+
|
| 285 |
+
# Count source files
|
| 286 |
+
file_count = 0
|
| 287 |
+
for ext in (".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".rb", ".php", ".c", ".cpp"):
|
| 288 |
+
file_count += sum(1 for _ in Path(target_dir).rglob(f"*{ext}"))
|
| 289 |
+
|
| 290 |
+
all_findings: list[Finding] = []
|
| 291 |
+
patterns_with_hits = 0
|
| 292 |
+
|
| 293 |
+
for p in GRITQL_PATTERNS:
|
| 294 |
+
findings = run_gritql_scan(p, target_dir)
|
| 295 |
+
if findings:
|
| 296 |
+
patterns_with_hits += 1
|
| 297 |
+
all_findings.extend(findings)
|
| 298 |
+
|
| 299 |
+
return EvidenceReport(
|
| 300 |
+
target_path=target_dir,
|
| 301 |
+
findings=all_findings,
|
| 302 |
+
file_count=file_count,
|
| 303 |
+
total_patterns=len(GRITQL_PATTERNS),
|
| 304 |
+
patterns_with_hits=patterns_with_hits,
|
| 305 |
+
)
|
| 306 |
+
|
| 307 |
+
|
| 308 |
+
def gather_evidence_streaming(target_dir: str):
|
| 309 |
+
"""Run GritQL patterns one by one, yielding status after each pattern.
|
| 310 |
+
|
| 311 |
+
Yields status strings during scanning, then yields the final EvidenceReport.
|
| 312 |
+
"""
|
| 313 |
+
_ensure_grit_initialized(target_dir)
|
| 314 |
+
|
| 315 |
+
# Count source files
|
| 316 |
+
file_count = 0
|
| 317 |
+
for ext in (".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".rb", ".php", ".c", ".cpp"):
|
| 318 |
+
file_count += sum(1 for _ in Path(target_dir).rglob(f"*{ext}"))
|
| 319 |
+
|
| 320 |
+
all_findings: list[Finding] = []
|
| 321 |
+
patterns_with_hits = 0
|
| 322 |
+
|
| 323 |
+
total = len(GRITQL_PATTERNS)
|
| 324 |
+
for i, p in enumerate(GRITQL_PATTERNS):
|
| 325 |
+
yield f"Scanning pattern {i + 1}/{total}: **{p['category']}**..."
|
| 326 |
+
findings = run_gritql_scan(p, target_dir)
|
| 327 |
+
if findings:
|
| 328 |
+
patterns_with_hits += 1
|
| 329 |
+
all_findings.extend(findings)
|
| 330 |
+
|
| 331 |
+
yield EvidenceReport(
|
| 332 |
+
target_path=target_dir,
|
| 333 |
+
findings=all_findings,
|
| 334 |
+
file_count=file_count,
|
| 335 |
+
total_patterns=total,
|
| 336 |
+
patterns_with_hits=patterns_with_hits,
|
| 337 |
+
)
|
tests/fixtures/bad_code.zip
ADDED
|
Binary file (2.04 kB). View file
|
|
|
tests/fixtures/locale/app.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Bad freelance code - deliberately terrible for testing
|
| 2 |
+
import os
|
| 3 |
+
import sys
|
| 4 |
+
import json
|
| 5 |
+
import pickle
|
| 6 |
+
import hashlib
|
| 7 |
+
import subprocess
|
| 8 |
+
|
| 9 |
+
# Hardcoded credentials
|
| 10 |
+
DB_PASSWORD = "super_secret_123"
|
| 11 |
+
API_KEY = "sk-abc123def456ghi789"
|
| 12 |
+
AWS_SECRET = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
|
| 13 |
+
|
| 14 |
+
# TODO: fix this later
|
| 15 |
+
# HACK: temporary workaround, will remove before production
|
| 16 |
+
# FIXME: this entire function is broken
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def get_user(user_id):
|
| 20 |
+
# SQL injection vulnerability
|
| 21 |
+
query = f"SELECT * FROM users WHERE id = {user_id}"
|
| 22 |
+
return query
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
def execute_command(user_input):
|
| 26 |
+
# Command injection
|
| 27 |
+
os.system("ping " + user_input)
|
| 28 |
+
subprocess.call(user_input, shell=True)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def load_data(filename):
|
| 32 |
+
# Insecure deserialization
|
| 33 |
+
with open(filename, "rb") as f:
|
| 34 |
+
data = pickle.load(f)
|
| 35 |
+
return data
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
def hash_password(password):
|
| 39 |
+
# Weak hashing
|
| 40 |
+
return hashlib.md5(password.encode()).hexdigest()
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def process_payment(card_number, cvv, amount):
|
| 44 |
+
# Logging sensitive data
|
| 45 |
+
print(f"Processing payment: card={card_number}, cvv={cvv}")
|
| 46 |
+
# No encryption, no validation
|
| 47 |
+
return True
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
class DatabaseConnection:
|
| 51 |
+
def __init__(self):
|
| 52 |
+
# Connection string with hardcoded credentials
|
| 53 |
+
self.connection_string = "mysql://admin:password123@localhost:3306/prod"
|
| 54 |
+
self.connected = False
|
| 55 |
+
|
| 56 |
+
def connect(self):
|
| 57 |
+
# No error handling
|
| 58 |
+
pass
|
| 59 |
+
|
| 60 |
+
def query(self, sql):
|
| 61 |
+
# Another SQL injection point
|
| 62 |
+
cursor = self.connection_string
|
| 63 |
+
return cursor
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def unused_function_one():
|
| 67 |
+
pass
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def unused_function_two():
|
| 71 |
+
pass
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
def unused_function_three():
|
| 75 |
+
pass
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# eval on user input
|
| 79 |
+
def calculate(expression):
|
| 80 |
+
return eval(expression)
|
tests/fixtures/locale/utils.js
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
// More bad freelance code
|
| 2 |
+
|
| 3 |
+
const express = require('express');
|
| 4 |
+
const app = express();
|
| 5 |
+
|
| 6 |
+
// Hardcoded secrets
|
| 7 |
+
const STRIPE_KEY = "sk_live_51HxxxxxXXXXXX";
|
| 8 |
+
const JWT_SECRET = "my-super-secret-jwt-key-12345";
|
| 9 |
+
|
| 10 |
+
// TODO: add authentication middleware
|
| 11 |
+
// FIXME: this is not secure at all
|
| 12 |
+
|
| 13 |
+
app.get('/api/users/:id', (req, res) => {
|
| 14 |
+
// SQL injection via string concatenation
|
| 15 |
+
const query = `SELECT * FROM users WHERE id = ${req.params.id}`;
|
| 16 |
+
db.query(query);
|
| 17 |
+
});
|
| 18 |
+
|
| 19 |
+
app.post('/api/login', (req, res) => {
|
| 20 |
+
// No password hashing comparison - plain text
|
| 21 |
+
const user = users.find(u => u.password === req.body.password);
|
| 22 |
+
if (user) {
|
| 23 |
+
// Exposing sensitive data in response
|
| 24 |
+
res.json({ user: user, token: generateToken(user) });
|
| 25 |
+
}
|
| 26 |
+
});
|
| 27 |
+
|
| 28 |
+
app.listen(3000, () => {
|
| 29 |
+
console.log("Server running on port 3000");
|
| 30 |
+
console.log("API_KEY:", process.env.API_KEY); // logging secrets
|
| 31 |
+
});
|
| 32 |
+
|
| 33 |
+
function generateToken(user) {
|
| 34 |
+
// Weak token generation
|
| 35 |
+
return Buffer.from(JSON.stringify(user)).toString('base64');
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
// Dead code - never called
|
| 39 |
+
function legacyHandler(req, res) {
|
| 40 |
+
console.log("This function is never used");
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
function oldMiddleware(req, res, next) {
|
| 44 |
+
console.log("Deprecated middleware");
|
| 45 |
+
next();
|
| 46 |
+
}
|
tests/test_integration.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Test integration: GritQL evidence → CrewAI agent analysis."""
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import subprocess
|
| 5 |
+
from pathlib import Path
|
| 6 |
+
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
+
from crewai import Agent, Task, Crew, LLM
|
| 9 |
+
|
| 10 |
+
# Load .env from project root
|
| 11 |
+
load_dotenv(Path(__file__).resolve().parent.parent / ".env")
|
| 12 |
+
|
| 13 |
+
# --- Configuration ---
|
| 14 |
+
LOCALE_DIR = os.path.join(os.path.dirname(__file__), "fixtures", "locale")
|
| 15 |
+
|
| 16 |
+
# Patterns verified against test fixtures.
|
| 17 |
+
# JS patterns use // comments, Python patterns use # comments.
|
| 18 |
+
# Some patterns target Python specifically via --language flag.
|
| 19 |
+
GRITQL_PATTERNS = [
|
| 20 |
+
# --- Cross-language: hardcoded secrets ---
|
| 21 |
+
{
|
| 22 |
+
"category": "hardcoded_secrets_js",
|
| 23 |
+
"pattern": '`$VAR = "$VAL"` where { $VAR <: r"(?i).*(password|key|secret|token).*" }',
|
| 24 |
+
"language": None, # auto-detect (JS works natively)
|
| 25 |
+
},
|
| 26 |
+
{
|
| 27 |
+
"category": "hardcoded_secrets_py",
|
| 28 |
+
"pattern": '`$VAR = $VAL` where { $VAR <: r"(?i).*(PASSWORD|KEY|SECRET|TOKEN).*" }',
|
| 29 |
+
"language": "python",
|
| 30 |
+
},
|
| 31 |
+
# --- Connection strings ---
|
| 32 |
+
{
|
| 33 |
+
"category": "connection_strings",
|
| 34 |
+
"pattern": '`"$CONN"` where { $CONN <: r"mysql://.+" }',
|
| 35 |
+
"language": None,
|
| 36 |
+
},
|
| 37 |
+
# --- TODO / FIXME / HACK comments ---
|
| 38 |
+
{
|
| 39 |
+
"category": "todo_py",
|
| 40 |
+
"pattern": "`# TODO: $_`",
|
| 41 |
+
"language": "python",
|
| 42 |
+
},
|
| 43 |
+
{
|
| 44 |
+
"category": "todo_js",
|
| 45 |
+
"pattern": "`// TODO: $_`",
|
| 46 |
+
"language": None,
|
| 47 |
+
},
|
| 48 |
+
{
|
| 49 |
+
"category": "fixme_py",
|
| 50 |
+
"pattern": "`# FIXME: $_`",
|
| 51 |
+
"language": "python",
|
| 52 |
+
},
|
| 53 |
+
{
|
| 54 |
+
"category": "fixme_js",
|
| 55 |
+
"pattern": "`// FIXME: $_`",
|
| 56 |
+
"language": None,
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"category": "hack_py",
|
| 60 |
+
"pattern": "`# HACK: $_`",
|
| 61 |
+
"language": "python",
|
| 62 |
+
},
|
| 63 |
+
{
|
| 64 |
+
"category": "hack_js",
|
| 65 |
+
"pattern": "`// HACK: $_`",
|
| 66 |
+
"language": None,
|
| 67 |
+
},
|
| 68 |
+
# --- Dangerous function calls ---
|
| 69 |
+
{
|
| 70 |
+
"category": "eval_usage",
|
| 71 |
+
"pattern": "`eval($_)`",
|
| 72 |
+
"language": "python",
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"category": "pickle_load",
|
| 76 |
+
"pattern": "`pickle.load($_)`",
|
| 77 |
+
"language": "python",
|
| 78 |
+
},
|
| 79 |
+
{
|
| 80 |
+
"category": "os_system",
|
| 81 |
+
"pattern": "`os.system($_)`",
|
| 82 |
+
"language": "python",
|
| 83 |
+
},
|
| 84 |
+
{
|
| 85 |
+
"category": "subprocess_shell",
|
| 86 |
+
"pattern": "`subprocess.call($_, shell=True)`",
|
| 87 |
+
"language": "python",
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"category": "md5_hash",
|
| 91 |
+
"pattern": "`hashlib.md5($_)`",
|
| 92 |
+
"language": "python",
|
| 93 |
+
},
|
| 94 |
+
# --- SQL injection ---
|
| 95 |
+
{
|
| 96 |
+
"category": "sql_injection_fstring",
|
| 97 |
+
"pattern": r'`$S` where { $S <: r"f\"SELECT.*\{.*\}\"" }',
|
| 98 |
+
"language": "python",
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"category": "sql_injection_js",
|
| 102 |
+
"pattern": r'`$STR` where { $STR <: r"`SELECT.*\$\{.*\}`" }',
|
| 103 |
+
"language": None,
|
| 104 |
+
},
|
| 105 |
+
]
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
def run_gritql(pattern: str, target_dir: str, language: str | None = None) -> dict:
|
| 109 |
+
"""Run a single GritQL pattern and return structured results."""
|
| 110 |
+
cmd = ["grit", "apply", pattern, target_dir]
|
| 111 |
+
if language:
|
| 112 |
+
cmd += ["--language", language]
|
| 113 |
+
|
| 114 |
+
try:
|
| 115 |
+
result = subprocess.run(
|
| 116 |
+
cmd,
|
| 117 |
+
capture_output=True,
|
| 118 |
+
text=True,
|
| 119 |
+
timeout=30,
|
| 120 |
+
)
|
| 121 |
+
output = result.stdout.strip()
|
| 122 |
+
errors = result.stderr.strip()
|
| 123 |
+
# Grit prints "Processed X files and found Y matches" to stderr
|
| 124 |
+
match_line = [l for l in errors.splitlines() if "found" in l]
|
| 125 |
+
return {
|
| 126 |
+
"pattern": pattern,
|
| 127 |
+
"findings": output or None,
|
| 128 |
+
"summary": match_line[0] if match_line else None,
|
| 129 |
+
"returncode": result.returncode,
|
| 130 |
+
}
|
| 131 |
+
except FileNotFoundError:
|
| 132 |
+
return {"pattern": pattern, "findings": None, "error": "'grit' CLI not found. Run: npm install -g @getgrit/cli"}
|
| 133 |
+
except Exception as e:
|
| 134 |
+
return {"pattern": pattern, "findings": None, "error": str(e)}
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def gather_evidence(target_dir: str) -> list[dict]:
|
| 138 |
+
"""Run all GritQL patterns against the target directory."""
|
| 139 |
+
evidence = []
|
| 140 |
+
for p in GRITQL_PATTERNS:
|
| 141 |
+
print(f" Scanning: {p['category']}...")
|
| 142 |
+
result = run_gritql(p["pattern"], target_dir, p.get("language"))
|
| 143 |
+
result["category"] = p["category"]
|
| 144 |
+
evidence.append(result)
|
| 145 |
+
return evidence
|
| 146 |
+
|
| 147 |
+
|
| 148 |
+
def format_evidence_for_agent(evidence: list[dict]) -> str:
|
| 149 |
+
"""Format evidence into a readable report for the LLM agent."""
|
| 150 |
+
lines = ["=== FORENSIC EVIDENCE REPORT ===\n"]
|
| 151 |
+
hits = 0
|
| 152 |
+
for item in evidence:
|
| 153 |
+
if item.get("findings"):
|
| 154 |
+
hits += 1
|
| 155 |
+
lines.append(f"--- {item['category'].upper()} ---")
|
| 156 |
+
lines.append(f"Pattern: {item['pattern']}")
|
| 157 |
+
lines.append(f"Findings:\n{item['findings']}")
|
| 158 |
+
lines.append("")
|
| 159 |
+
lines.insert(1, f"Total categories with findings: {hits} / {len(evidence)}\n")
|
| 160 |
+
return "\n".join(lines)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def run_crewai_analysis(evidence_report: str) -> str:
|
| 164 |
+
"""Pass evidence to a CrewAI agent for analysis."""
|
| 165 |
+
|
| 166 |
+
llm = LLM(
|
| 167 |
+
model=os.environ.get("MODEL_NAME", "zai/glm-5.1"),
|
| 168 |
+
api_key=os.environ.get("ZAI_API_KEY"),
|
| 169 |
+
)
|
| 170 |
+
|
| 171 |
+
investigator = Agent(
|
| 172 |
+
role="Senior Code Forensic Investigator",
|
| 173 |
+
goal="Analyze code evidence and identify critical security vulnerabilities and code quality issues",
|
| 174 |
+
backstory=(
|
| 175 |
+
"You are a veteran code auditor with 15 years of experience. "
|
| 176 |
+
"You've seen every trick in the book — from hardcoded credentials to SQL injection. "
|
| 177 |
+
"You analyze deterministic scan results and provide clear, severity-ranked findings."
|
| 178 |
+
),
|
| 179 |
+
llm=llm,
|
| 180 |
+
verbose=True,
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
analysis_task = Task(
|
| 184 |
+
description=(
|
| 185 |
+
"Analyze the following forensic evidence report from a codebase scan. "
|
| 186 |
+
"For each finding, assess severity (CRITICAL / HIGH / MEDIUM / LOW), "
|
| 187 |
+
"explain the risk, and suggest a fix.\n\n"
|
| 188 |
+
f"{evidence_report}"
|
| 189 |
+
),
|
| 190 |
+
agent=investigator,
|
| 191 |
+
expected_output="A structured forensic analysis report with severity-ranked findings.",
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
crew = Crew(
|
| 195 |
+
agents=[investigator],
|
| 196 |
+
tasks=[analysis_task],
|
| 197 |
+
verbose=True,
|
| 198 |
+
)
|
| 199 |
+
|
| 200 |
+
result = crew.kickoff()
|
| 201 |
+
return result.raw if hasattr(result, "raw") else str(result)
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
def main():
|
| 205 |
+
print("=" * 60)
|
| 206 |
+
print("CodeTribunal Integration Test")
|
| 207 |
+
print("=" * 60)
|
| 208 |
+
|
| 209 |
+
# Phase 1: GritQL evidence gathering
|
| 210 |
+
print("\n[Phase 1] Gathering evidence with GritQL...")
|
| 211 |
+
evidence = gather_evidence(LOCALE_DIR)
|
| 212 |
+
|
| 213 |
+
hits = sum(1 for e in evidence if e.get("findings"))
|
| 214 |
+
print(f"\n Patterns scanned: {len(evidence)}")
|
| 215 |
+
print(f" Hits: {hits}")
|
| 216 |
+
|
| 217 |
+
evidence_report = format_evidence_for_agent(evidence)
|
| 218 |
+
print("\n" + evidence_report)
|
| 219 |
+
|
| 220 |
+
# Phase 2: CrewAI analysis
|
| 221 |
+
api_key = os.environ.get("ZAI_API_KEY")
|
| 222 |
+
if not api_key:
|
| 223 |
+
print("\n[Phase 2] SKIPPED — set ZAI_API_KEY to test CrewAI integration")
|
| 224 |
+
return
|
| 225 |
+
|
| 226 |
+
print("\n[Phase 2] Running CrewAI analysis with GLM 5.1...")
|
| 227 |
+
report = run_crewai_analysis(evidence_report)
|
| 228 |
+
print("\n" + "=" * 60)
|
| 229 |
+
print("AGENT REPORT")
|
| 230 |
+
print("=" * 60)
|
| 231 |
+
print(report)
|
| 232 |
+
|
| 233 |
+
|
| 234 |
+
if __name__ == "__main__":
|
| 235 |
+
main()
|