Spaces:

amine-yagoub
/

CodeTribunal

Sleeping

App Files Files Community

amine-yagoub commited on Apr 1

Commit

d5341cc

1 Parent(s): 38cd7bb

feat: Add initial CodeTribunal implementation

Browse files

Files changed (13) hide show

LICENSE +21 -0
README.md +29 -0
pyproject.toml +46 -0
src/code_tribunal/__init__.py +1 -0
src/code_tribunal/agents.py +152 -0
src/code_tribunal/app.py +720 -0
src/code_tribunal/cli.py +44 -0
src/code_tribunal/courtroom.py +618 -0
src/code_tribunal/evidence.py +337 -0
tests/fixtures/bad_code.zip +0 -0
tests/fixtures/locale/app.py +80 -0
tests/fixtures/locale/utils.js +46 -0
tests/test_integration.py +235 -0

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2026 Amine Yagoub
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -1,3 +1,4 @@
 ---
 title: CodeTribunal
 emoji: 💻
@@ -10,3 +11,31 @@ short_description: The AI Courtroom That Exposes Bad Freelance Code
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+<<<<<<< HEAD
 ---
 title: CodeTribunal
 emoji: 💻
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
+=======
+# CodeTribunal
+The AI courtroom that exposes bad freelance code.
+Multi-agent forensic investigation powered by GLM 5.1. Instead of guessing code quality, CodeTribunal puts it on trial — a live-streaming debate where an AI Prosecutor and Defense Attorney clash over real, deterministic technical evidence.
+## Install
+```bash
+pip install -e .
+```
+## Usage
+```bash
+code-tribunal ./path/to/codebase
+```
+## How it works
+1. **Evidence Gathering** — Deterministic scans (security, code smells, hardcoded secrets, TODOs)
+2. **Investigation** — GLM 5.1 agents analyze the evidence
+3. **The Trial** — Prosecutor and Defense debate in a live-streamed courtroom
+4. **Verdict** — The Judge delivers a final ruling
+Built for the [Build with GLM 5.1](https://build-with-glm-5-1-challenge.devpost.com) hackathon.
+>>>>>>> b4fcdee (feat: Add initial CodeTribunal implementation)

pyproject.toml ADDED Viewed

	@@ -0,0 +1,46 @@

+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+[project]
+name = "code-tribunal"
+version = "0.1.0"
+description = "AI courtroom that exposes bad freelance code through multi-agent forensic investigation"
+readme = "README.md"
+license = "MIT"
+license-files = ["LICEN[CS]E.*"]
+requires-python = ">=3.11"
+authors = [
+    {name = "Amine Yagoub"},
+]
+keywords = ["ai", "code-review", "forensic", "multi-agent", "glm"]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+]
+dependencies = [
+    "crewai[litellm]",
+    "gritql>=0.2.0",
+    "gradio>=5.0.0",
+    "rich>=13.0.0",
+    "click>=8.0.0",
+    "httpx>=0.27.0",
+    "python-dotenv>=1.0.0",
+    "fpdf2>=2.7.0",
+]
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+    "ruff>=0.9.0",
+]
+[project.urls]
+Repository = "https://github.com/amineyagoub/CodeTribunal"
+[project.scripts]
+code-tribunal = "code_tribunal.cli:main"

src/code_tribunal/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """CodeTribunal: AI courtroom that exposes bad freelance code."""

src/code_tribunal/agents.py ADDED Viewed

	@@ -0,0 +1,152 @@

+"""Courtroom agent definitions for CodeTribunal."""
+import os
+from crewai import Agent, LLM
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv(Path(__file__).resolve().parent.parent.parent / ".env")
+def _get_llm() -> LLM:
+    return LLM(
+        model=os.environ.get("MODEL_NAME", "zai/glm-5.1"),
+        api_key=os.environ.get("ZAI_API_KEY"),
+        temperature=0.3,
+    )
+# ---------------------------------------------------------------------------
+# Phase 2: Investigators
+# ---------------------------------------------------------------------------
+def security_investigator() -> Agent:
+    return Agent(
+        role="Security Forensic Investigator",
+        goal=(
+            "Analyze security-related code evidence and produce a detailed investigation report. "
+            "Identify every vulnerability, rank by severity, explain the attack vector, "
+            "and describe the potential impact if exploited in production."
+        ),
+        backstory=(
+            "You are a former penetration tester turned code auditor. "
+            "You've found hardcoded AWS keys in Fortune 500 repos, SQL injection in banking APIs, "
+            "and deserialization bugs that would have cost millions. "
+            "You don't guess — you follow the evidence and build an airtight case. "
+            "You treat every hardcoded secret as a loaded weapon and every eval() as an open door."
+        ),
+        llm=_get_llm(),
+        verbose=True,
+    )
+def quality_investigator() -> Agent:
+    return Agent(
+        role="Code Quality Forensic Investigator",
+        goal=(
+            "Analyze code quality evidence and produce a detailed investigation report. "
+            "Identify technical debt, abandoned code, missing error handling, and developer negligence indicators. "
+            "Focus on patterns that suggest rushed or careless development."
+        ),
+        backstory=(
+            "You are a principal engineer who has inherited nightmares from freelance developers. "
+            "You've seen TODO comments that are 5 years old, dead code that accounts for 40% of a codebase, "
+            "and functions so complex they defied testing. "
+            "You can spot the difference between 'agile iteration' and 'lazy corner-cutting' from a mile away."
+        ),
+        llm=_get_llm(),
+        verbose=True,
+    )
+def architecture_investigator() -> Agent:
+    return Agent(
+        role="Architecture Forensic Investigator",
+        goal=(
+            "Analyze architectural evidence and produce a detailed investigation report. "
+            "Identify structural problems: tight coupling, missing abstractions, "
+            "hardcoded configuration that should be externalized, and patterns that won't scale."
+        ),
+        backstory=(
+            "You are a systems architect with 20 years of experience across startups and enterprises. "
+            "You can look at a codebase and tell whether it was built to last or built to invoice. "
+            "You identify patterns that indicate the developer didn't understand the domain "
+            "or deliberately cut corners to finish faster."
+        ),
+        llm=_get_llm(),
+        verbose=True,
+    )
+# ---------------------------------------------------------------------------
+# Phase 3: The Trial
+# ---------------------------------------------------------------------------
+def prosecutor() -> Agent:
+    return Agent(
+        role="The Prosecutor",
+        goal=(
+            "Build the strongest possible case that this code is negligent, dangerous, or fraudulent. "
+            "Use the investigation reports as evidence. Argue with precision and force. "
+            "Cite specific file paths, line numbers, and vulnerability types. "
+            "Make the jury understand why this code should never have been delivered."
+        ),
+        backstory=(
+            "You are a ruthless courtroom prosecutor specializing in technology fraud cases. "
+            "You've won cases against developers who delivered insecure code to non-technical clients. "
+            "You know how to take technical evidence and make it devastatingly clear. "
+            "You don't exaggerate — the facts are damning enough. "
+            "Your weapon is specificity: every claim backed by line numbers and evidence."
+        ),
+        llm=_get_llm(),
+        verbose=True,
+    )
+def defense_attorney() -> Agent:
+    return Agent(
+        role="The Defense Attorney",
+        goal=(
+            "Mount the best possible defense of this code. "
+            "Challenge the prosecution's claims. Argue mitigating circumstances. "
+            "Point out that some patterns are acceptable in certain contexts. "
+            "Argue proportionality — not every issue is a catastrophe. "
+            "Be honest but vigorous in your defense."
+        ),
+        backstory=(
+            "You are a defense attorney who specializes in technology cases. "
+            "You believe everyone deserves a fair hearing, even bad code. "
+            "You're not dishonest — you argue context, proportionality, and intent. "
+            "A TODO comment isn't negligence, it's a roadmap. "
+            "An eval() in a private script isn't the same as eval() in a web server. "
+            "You force the prosecution to prove every claim."
+        ),
+        llm=_get_llm(),
+        verbose=True,
+    )
+# ---------------------------------------------------------------------------
+# Phase 4: The Verdict
+# ---------------------------------------------------------------------------
+def judge() -> Agent:
+    return Agent(
+        role="The Judge",
+        goal=(
+            "Review all evidence, investigation reports, and the trial transcript. "
+            "Deliver a final, structured verdict. "
+            "For each finding: severity, impact, and recommended remediation. "
+            "End with an overall assessment: GUILTY (negligent), MIXED (some issues), or NOT GUILTY (acceptable). "
+            "Include a 'reputational risk score' from 0-100 for the developer who wrote this code."
+        ),
+        backstory=(
+            "You are a senior judge who has presided over hundreds of technology disputes. "
+            "You are impartial, precise, and thorough. "
+            "You don't let the prosecution's rhetoric sway you — you follow the evidence. "
+            "But you also don't let the defense minimize real harm. "
+            "Your verdicts are known for being fair, detailed, and impossible to appeal."
+        ),
+        llm=_get_llm(),
+        verbose=True,
+    )

src/code_tribunal/app.py ADDED Viewed

	@@ -0,0 +1,720 @@

+"""Gradio streaming UI for the CodeTribunal courtroom."""
+import os
+import tempfile
+import time
+import zipfile
+from pathlib import Path
+import gradio as gr
+from gradio import ChatMessage
+from code_tribunal.evidence import (
+    EvidenceReport,
+    gather_evidence_streaming,
+)
+from code_tribunal.courtroom import (
+    StreamResult,
+    phase_investigation_stream,
+    phase_trial_stream,
+    phase_verdict_stream,
+)
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+AGENT_AVATARS = {
+    "Security Forensic Investigator": "🛡️",
+    "Code Quality Forensic Investigator": "📋",
+    "Architecture Forensic Investigator": "🏗️",
+    "The Prosecutor": "⚖️",
+    "The Defense Attorney": "🛡️",
+    "The Judge": "🔨",
+    "Investigator": "🔍",
+}
+SEVERITY_COLORS = {
+    "CRITICAL": "#dc2626",
+    "HIGH": "#ea580c",
+    "MEDIUM": "#ca8a04",
+    "LOW": "#2563eb",
+}
+STATUS_MESSAGES = {
+    "extracting": [
+        "Unpacking the evidence...",
+        "Extracting source files...",
+        "Cataloging submitted code...",
+    ],
+    "evidence": [
+        "Scanning with GritQL forensic patterns...",
+        "Searching for hardcoded secrets...",
+        "Analyzing code for dangerous functions...",
+        "Checking for SQL injection vectors...",
+        "Cataloging technical debt markers...",
+        "Building the evidence dossier...",
+    ],
+    "investigation": [
+        "🔍 Security Investigator analyzing vulnerabilities...",
+        "📋 Quality Investigator assessing code standards...",
+        "🏗️ Architecture Investigator reviewing structure...",
+        "Cross-referencing findings across domains...",
+        "Compiling investigation reports...",
+    ],
+    "trial": [
+        "Court is now in session...",
+        "The Prosecutor is building the case...",
+        "Examining the evidence in detail...",
+        "The Defense is cross-examining...",
+        "Hearing rebuttal arguments...",
+        "Closing arguments underway...",
+    ],
+    "verdict": [
+        "The Judge is reviewing all evidence...",
+        "Weighing prosecution arguments...",
+        "Considering defense testimony...",
+        "Preparing the final ruling...",
+        "The gavel is about to fall...",
+    ],
+}
+CUSTOM_CSS = """
+/* ─── Global ─── */
+.gradio-container {
+    max-width: 960px !important;
+    margin: 0 auto !important;
+}
+body {
+    background: #0a0a14 !important;
+}
+.dark {
+    background: #0f0f1a !important;
+}
+/* ─── Hero ─── */
+.hero-logo {
+    display: block !important;
+    margin: 0 auto 12px auto !important;
+    border-radius: 16px !important;
+}
+.hero-title {
+    text-align: center !important;
+    color: #fbbf24 !important;
+    font-family: 'Georgia', serif !important;
+    font-size: 2.4em !important;
+    font-weight: 700 !important;
+    margin-bottom: 4px !important;
+}
+.hero-subtitle {
+    text-align: center !important;
+    color: #94a3b8 !important;
+    font-size: 1.1em !important;
+    margin-top: 0 !important;
+}
+/* ─── Upload area ─── */
+.upload-area .file-preview {
+    min-height: 220px !important;
+    border: 2px dashed #fbbf2440 !important;
+    border-radius: 16px !important;
+    background: #1a1a2e !important;
+    transition: border-color 0.3s !important;
+}
+.upload-area .file-preview:hover {
+    border-color: #fbbf24 !important;
+}
+/* ─── Status ─── */
+.status-phase {
+    text-align: center !important;
+    color: #fbbf24 !important;
+    font-size: 1.1em !important;
+    font-weight: 600 !important;
+}
+.status-detail {
+    text-align: center !important;
+    color: #94a3b8 !important;
+    font-style: italic !important;
+}
+/* ─── Evidence table ─── */
+.evidence-table {
+    font-family: 'JetBrains Mono', 'Fira Code', monospace !important;
+    font-size: 13px !important;
+    color: #d4d4d4 !important;
+}
+/* ─── Chatbot ─── */
+.chatbot-panel {
+    border: 1px solid #2a2a40 !important;
+    border-radius: 12px !important;
+    background: #12121f !important;
+}
+/* ─── Verdict ─── */
+.verdict-box {
+    border: 2px solid #fbbf24 !important;
+    border-radius: 12px !important;
+    background: linear-gradient(135deg, #1a1a2e, #0f172a) !important;
+    padding: 24px !important;
+    color: #e2e8f0 !important;
+}
+/* ─── Export buttons ─── */
+.export-btn {
+    border: 1px solid #fbbf2440 !important;
+    border-radius: 8px !important;
+    color: #fbbf24 !important;
+    background: #1a1a2e !important;
+}
+.export-btn:hover {
+    background: #2a2a40 !important;
+    border-color: #fbbf24 !important;
+}
+/* ─── Scrollbar ─── */
+::-webkit-scrollbar { width: 8px; }
+::-webkit-scrollbar-track { background: #0f0f1a; }
+::-webkit-scrollbar-thumb { background: #2a2a40; border-radius: 4px; }
+::-webkit-scrollbar-thumb:hover { background: #3a3a50; }
+"""
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def _esc(text: str) -> str:
+    return text.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
+def _severity_badge(sev: str) -> str:
+    color = SEVERITY_COLORS.get(sev, "#6b7280")
+    return f'<span style="background:{color};color:white;padding:2px 8px;border-radius:4px;font-size:12px;font-weight:bold">{sev}</span>'
+def _evidence_html(report) -> str:
+    """Render evidence report as styled HTML."""
+    lines = [f"<h3>Evidence Report</h3>"]
+    lines.append(f"<p>Files scanned: <b>{report.file_count}</b> | Total findings: <b>{len(report.findings)}</b></p>")
+    for domain, findings in report.findings_by_domain.items():
+        lines.append(f'<h4 style="margin-top:16px">{domain.title()} Evidence ({len(findings)} findings)</h4>')
+        lines.append('<table style="width:100%;border-collapse:collapse">')
+        lines.append('<tr style="border-bottom:1px solid #333"><th style="text-align:left;padding:4px">Severity</th><th style="text-align:left;padding:4px">File</th><th style="text-align:left;padding:4px">Line</th><th style="text-align:left;padding:4px">Code</th></tr>')
+        for f in findings:
+            lines.append(
+                f'<tr style="border-bottom:1px solid #222">'
+                f'<td style="padding:4px">{_severity_badge(f.severity_hint)}</td>'
+                f'<td style="padding:4px;font-family:monospace;font-size:13px">{Path(f.file).name}</td>'
+                f'<td style="padding:4px;font-family:monospace">{f.line}</td>'
+                f'<td style="padding:4px;font-family:monospace;font-size:13px;color:#a0a0a0">{_esc(f.code)}</td>'
+                f'</tr>'
+            )
+        lines.append('</table>')
+    return "\n".join(lines)
+def _agent_icon(role: str) -> str:
+    return AGENT_AVATARS.get(role, "📝")
+def _yield(
+    hero_vis, upload_vis, proc_vis,
+    status, evidence, chat, verdict, export_vis,
+):
+    """Build the 8-tuple for a consistent yield protocol."""
+    return (
+        gr.update(visible=hero_vis),
+        gr.update(visible=upload_vis),
+        gr.update(visible=proc_vis),
+        status,
+        evidence,
+        chat,
+        verdict,
+        gr.update(visible=export_vis),
+    )
+# ---------------------------------------------------------------------------
+# Pipeline runner with progressive streaming updates
+# ---------------------------------------------------------------------------
+# Yield throttle — max ~20 updates/sec to prevent browser lag
+_MIN_YIELD_INTERVAL = 0.05
+def run_courtroom(code_input):
+    """Run the full pipeline, yielding progressive updates for the UI."""
+    chat_history = []
+    evidence_html_val = ""
+    verdict_text = ""
+    status_idx = 0
+    last_yield = 0.0
+    # --- Validate input ---
+    if code_input is None:
+        yield _yield(
+            True, True, False,
+            "Please upload a .zip file containing the code to investigate.",
+            None, [], None, False,
+        )
+        return
+    # --- Hide hero/upload, show processing ---
+    yield _yield(False, False, True, "### Extracting files...", None, [], None, False)
+    # --- Extract zip ---
+    tmpdir = tempfile.mkdtemp()
+    if not (hasattr(code_input, "name") and code_input.name.endswith(".zip")):
+        yield _yield(
+            False, False, True,
+            "Please upload a .zip file.",
+            None, [], None, False,
+        )
+        return
+    with zipfile.ZipFile(code_input.name, "r") as zf:
+        zf.extractall(tmpdir)
+    # Check API key
+    if not os.environ.get("ZAI_API_KEY"):
+        yield _yield(
+            False, False, True,
+            "ZAI_API_KEY not set. Configure .env file.",
+            None, [], None, False,
+        )
+        return
+    # ===================================================================
+    # Phase 1: Evidence — stream per-pattern progress
+    # ===================================================================
+    status_msgs = STATUS_MESSAGES["evidence"]
+    report = None
+    for update in gather_evidence_streaming(tmpdir):
+        if isinstance(update, str):
+            # Status update from evidence streaming
+            status_idx = (status_idx + 1) % len(status_msgs)
+            yield _yield(
+                False, False, True,
+                f"### Phase 1/4: Forensic Evidence\n{update}\n\n*{status_msgs[status_idx]}*",
+                None, [], None, False,
+            )
+        elif isinstance(update, EvidenceReport):
+            report = update
+    if report is None or not report.findings:
+        yield _yield(
+            False, False, True,
+            "### Phase 1/4: Evidence Complete\nNo findings detected. **Case dismissed** — code appears clean.",
+            None, [], None, False,
+        )
+        return
+    evidence_html_val = _evidence_html(report)
+    evidence_text = report.to_text()
+    # Add evidence message to chat
+    chat_history.append(ChatMessage(
+        role="user",
+        content=(
+            f"**Case Filed**: Code submitted for forensic analysis.\n\n"
+            f"**{report.file_count}** files scanned — **{len(report.findings)}** findings detected "
+            f"across **{len(report.findings_by_domain)}** domains."
+        ),
+        metadata={"title": "Court Clerk"},
+    ))
+    yield _yield(
+        False, False, True,
+        f"### Phase 1/4: Evidence Complete\n**{len(report.findings)}** findings detected. Proceeding to investigation...",
+        evidence_html_val, chat_history, None, False,
+    )
+    # ===================================================================
+    # Phase 2: Investigation — stream agent output
+    # ===================================================================
+    inv_result = StreamResult()
+    status_msgs = STATUS_MESSAGES["investigation"]
+    status_idx = 0
+    current_task_idx = -1
+    inv_labels = ["Security", "Quality", "Architecture"]
+    yield _yield(
+        False, False, True,
+        f"### Phase 2/4: Investigation\n*{status_msgs[0]}*",
+        evidence_html_val, chat_history, None, False,
+    )
+    for role, delta, task_idx in phase_investigation_stream(report, inv_result):
+        # New speaker? Start a new ChatMessage
+        if task_idx != current_task_idx:
+            label = inv_labels[task_idx] if task_idx < len(inv_labels) else f"Agent {task_idx}"
+            icon = _agent_icon(role)
+            chat_history.append(ChatMessage(
+                role="assistant",
+                content=delta,
+                metadata={"title": f"{icon} {label} Investigation"},
+            ))
+            current_task_idx = task_idx
+        else:
+            # Append to last message
+            chat_history[-1] = ChatMessage(
+                role="assistant",
+                content=chat_history[-1].content + delta,
+                metadata=chat_history[-1].metadata,
+            )
+        now = time.time()
+        if now - last_yield >= _MIN_YIELD_INTERVAL:
+            status_idx = (status_idx + 1) % len(status_msgs)
+            yield _yield(
+                False, False, True,
+                f"### Phase 2/4: Investigation\n*{status_msgs[status_idx]}*",
+                evidence_html_val, chat_history, None, False,
+            )
+            last_yield = now
+    # Final yield for phase 2
+    investigation_reports = inv_result.metadata.get("reports", {})
+    investigation_text = inv_result.text
+    yield _yield(
+        False, False, True,
+        "### Phase 2/4: Investigation Complete\n**3 reports** generated. Court is now in session...",
+        evidence_html_val, chat_history, None, False,
+    )
+    # ===================================================================
+    # Phase 3: Trial — stream prosecutor / defense / rebuttal
+    # ===================================================================
+    trial_result = StreamResult()
+    status_msgs = STATUS_MESSAGES["trial"]
+    status_idx = 0
+    current_task_idx = -1
+    last_yield = 0.0
+    yield _yield(
+        False, False, True,
+        f"### Phase 3/4: The Trial\n*{status_msgs[0]}*",
+        evidence_html_val, chat_history, None, False,
+    )
+    for role, delta, round_name, task_idx in phase_trial_stream(
+        evidence_text, investigation_reports, trial_result
+    ):
+        if task_idx != current_task_idx:
+            icon = _agent_icon(role)
+            chat_history.append(ChatMessage(
+                role="assistant",
+                content=delta,
+                metadata={"title": f"{icon} {round_name}"},
+            ))
+            current_task_idx = task_idx
+        else:
+            chat_history[-1] = ChatMessage(
+                role="assistant",
+                content=chat_history[-1].content + delta,
+                metadata=chat_history[-1].metadata,
+            )
+        now = time.time()
+        if now - last_yield >= _MIN_YIELD_INTERVAL:
+            status_idx = (status_idx + 1) % len(status_msgs)
+            yield _yield(
+                False, False, True,
+                f"### Phase 3/4: The Trial\n*{status_msgs[status_idx]}*",
+                evidence_html_val, chat_history, None, False,
+            )
+            last_yield = now
+    trial_transcript = trial_result.text
+    yield _yield(
+        False, False, True,
+        "### Phase 3/4: Trial Complete\nThe Judge is now deliberating...",
+        evidence_html_val, chat_history, None, False,
+    )
+    # ===================================================================
+    # Phase 4: Verdict — stream judge
+    # ===================================================================
+    verdict_result = StreamResult()
+    status_msgs = STATUS_MESSAGES["verdict"]
+    status_idx = 0
+    verdict_started = False
+    last_yield = 0.0
+    for role, delta in phase_verdict_stream(
+        evidence_text, investigation_text, trial_transcript, verdict_result
+    ):
+        if not verdict_started:
+            icon = _agent_icon(role)
+            chat_history.append(ChatMessage(
+                role="assistant",
+                content=delta,
+                metadata={"title": f"{icon} Verdict"},
+            ))
+            verdict_started = True
+        else:
+            chat_history[-1] = ChatMessage(
+                role="assistant",
+                content=chat_history[-1].content + delta,
+                metadata=chat_history[-1].metadata,
+            )
+        now = time.time()
+        if now - last_yield >= _MIN_YIELD_INTERVAL:
+            status_idx = (status_idx + 1) % len(status_msgs)
+            verdict_so_far = verdict_result.text or ""
+            yield _yield(
+                False, False, True,
+                f"### Phase 4/4: Verdict\n*{status_msgs[status_idx]}*",
+                evidence_html_val, chat_history, None, False,
+            )
+            last_yield = now
+    verdict_text = verdict_result.text
+    # Final yield — show verdict panel and export buttons
+    yield _yield(
+        False, False, True,
+        "### Trial Complete\nThe verdict has been delivered.",
+        evidence_html_val,
+        chat_history,
+        f"## 🔨 Judge's Verdict\n\n{verdict_text}",
+        True,
+    )
+# ---------------------------------------------------------------------------
+# Export helpers
+# ---------------------------------------------------------------------------
+def _build_results_dict(
+    evidence_html_val, chat_history, verdict_text,
+) -> dict:
+    """Extract results from the state for export."""
+    # Collect evidence text from chat messages
+    evidence_parts = []
+    investigation_parts = []
+    trial_parts = []
+    verdict_parts = []
+    current_section = "evidence"
+    for msg in chat_history:
+        title = (msg.metadata or {}).get("title", "")
+        content = msg.content or ""
+        if "Investigation" in title:
+            current_section = "investigation"
+        elif "Prosecution" in title or "Defense" in title or "Rebuttal" in title:
+            current_section = "trial"
+        elif "Verdict" in title:
+            current_section = "verdict"
+        if current_section == "investigation":
+            investigation_parts.append(f"### {title}\n{content}")
+        elif current_section == "trial":
+            trial_parts.append(f"### {title}\n{content}")
+        elif current_section == "verdict":
+            verdict_parts.append(content)
+    return {
+        "evidence_html": evidence_html_val,
+        "investigation": "\n\n".join(investigation_parts),
+        "transcript": "\n\n".join(trial_parts),
+        "verdict": verdict_text or "\n\n".join(verdict_parts),
+        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S"),
+    }
+def generate_markdown_export(results_state: dict) -> str:
+    """Generate a Markdown report and return the filepath."""
+    md_lines = [
+        "# CodeTribunal — Trial Report\n",
+        f"**Generated**: {results_state.get('timestamp', 'N/A')}\n",
+        "---\n",
+        "## Investigation Reports\n",
+        results_state.get("investigation", ""),
+        "\n---\n",
+        "## Trial Transcript\n",
+        results_state.get("transcript", ""),
+        "\n---\n",
+        "## Verdict\n",
+        results_state.get("verdict", ""),
+        "\n",
+    ]
+    content = "\n".join(md_lines)
+    filepath = tempfile.mktemp(suffix="_CodeTribunal_Report.md")
+    Path(filepath).write_text(content)
+    return filepath
+def generate_pdf_export(results_state: dict) -> str:
+    """Generate a PDF report and return the filepath."""
+    from fpdf import FPDF
+    pdf = FPDF()
+    pdf.set_auto_page_break(auto=True, margin=15)
+    pdf.add_page()
+    # Title
+    pdf.set_font("Helvetica", "B", 24)
+    pdf.set_text_color(200, 160, 30)
+    pdf.cell(0, 15, "CodeTribunal - Trial Report", ln=True, align="C")
+    pdf.set_font("Helvetica", "", 10)
+    pdf.set_text_color(120, 120, 140)
+    pdf.cell(0, 8, f"Generated: {results_state.get('timestamp', 'N/A')}", ln=True, align="C")
+    pdf.ln(10)
+    def _add_section(title: str, content: str):
+        pdf.set_font("Helvetica", "B", 14)
+        pdf.set_text_color(200, 160, 30)
+        pdf.cell(0, 10, title, ln=True)
+        pdf.set_text_color(50, 50, 60)
+        pdf.set_font("Helvetica", "", 10)
+        for line in content.split("\n"):
+            clean = line.encode("latin-1", "replace").decode("latin-1")
+            if clean.strip():
+                pdf.multi_cell(0, 5, clean)
+            else:
+                pdf.ln(3)
+        pdf.ln(6)
+    _add_section("Investigation Reports", results_state.get("investigation", ""))
+    _add_section("Trial Transcript", results_state.get("transcript", ""))
+    _add_section("Verdict", results_state.get("verdict", ""))
+    filepath = tempfile.mktemp(suffix="_CodeTribunal_Report.pdf")
+    pdf.output(filepath)
+    return filepath
+# ---------------------------------------------------------------------------
+# Gradio App
+# ---------------------------------------------------------------------------
+def create_app() -> gr.Blocks:
+    logo_path = Path(__file__).resolve().parent.parent.parent / "assets" / "logo.png"
+    with gr.Blocks(title="CodeTribunal — The AI Courtroom") as app:
+        # --- Hero Section ---
+        with gr.Column(visible=True) as hero_section:
+            if logo_path.exists():
+                gr.Image(
+                    value=str(logo_path),
+                    show_label=False,
+                    height=160,
+                    container=False,
+                    elem_classes=["hero-logo"],
+                )
+            gr.Markdown(
+                "# CodeTribunal\n### The AI Courtroom That Exposes Bad Freelance Code",
+                elem_classes=["hero-title"],
+            )
+            gr.Markdown(
+                "Upload a .zip of code and watch a multi-agent forensic investigation unfold.\n"
+                "Powered by GLM 5 + GritQL + CrewAI.",
+                elem_classes=["hero-subtitle"],
+            )
+        # --- Upload Section ---
+        with gr.Column(visible=True, elem_classes=["upload-area"]) as upload_section:
+            code_input = gr.File(
+                label="Drop your .zip here or click to upload",
+                file_types=[".zip"],
+                interactive=True,
+            )
+        # --- Processing Section ---
+        with gr.Column(visible=False) as processing_section:
+            status_md = gr.Markdown(
+                "Initializing...",
+                elem_classes=["status-phase"],
+            )
+            evidence_html = gr.HTML(
+                value="",
+                visible=True,
+            )
+            chatbot = gr.Chatbot(
+                label="Courtroom Transcript",
+                height=600,
+                elem_classes=["chatbot-panel"],
+            )
+            verdict_md = gr.Markdown(
+                value="",
+                visible=True,
+                elem_classes=["verdict-box"],
+            )
+            with gr.Row(visible=False) as export_row:
+                export_md_btn = gr.Button(
+                    "Export as Markdown",
+                    elem_classes=["export-btn"],
+                )
+                export_pdf_btn = gr.Button(
+                    "Export as PDF",
+                    elem_classes=["export-btn"],
+                )
+            export_file = gr.File(label="Download Report", visible=False)
+        # Hidden state for export
+        results_state = gr.State(value={})
+        # --- Wire events ---
+        # Auto-trigger on file upload
+        code_input.upload(
+            fn=run_courtroom,
+            inputs=[code_input],
+            outputs=[
+                hero_section, upload_section, processing_section,
+                status_md, evidence_html, chatbot, verdict_md, export_row,
+            ],
+        )
+        # Export callbacks
+        def _do_export_md(ev_html, chat, verdict):
+            results = _build_results_dict(ev_html, chat, verdict)
+            return generate_markdown_export(results)
+        def _do_export_pdf(ev_html, chat, verdict):
+            results = _build_results_dict(ev_html, chat, verdict)
+            return generate_pdf_export(results)
+        export_md_btn.click(
+            fn=_do_export_md,
+            inputs=[evidence_html, chatbot, verdict_md],
+            outputs=[export_file],
+        )
+        export_pdf_btn.click(
+            fn=_do_export_pdf,
+            inputs=[evidence_html, chatbot, verdict_md],
+            outputs=[export_file],
+        )
+    return app
+def main():
+    app = create_app()
+    app.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        css=CUSTOM_CSS,
+        theme=gr.themes.Base(
+            primary_hue="amber",
+            secondary_hue="slate",
+            neutral_hue="slate",
+        ),
+    )
+if __name__ == "__main__":
+    main()

src/code_tribunal/cli.py ADDED Viewed

	@@ -0,0 +1,44 @@

+"""CLI entry point for CodeTribunal."""
+import json
+import click
+from pathlib import Path
+from code_tribunal.courtroom import run_trial
+@click.command()
+@click.argument("path", type=click.Path(exists=True))
+@click.option("--output", "-o", type=click.Path(), help="Save full report to file (JSON)")
+@click.option("--evidence-only", is_flag=True, help="Only run Phase 1 (GritQL evidence), skip trial")
+def main(path: str, output: str | None, evidence_only: bool) -> None:
+    """Put your code on trial. PATH is the directory or zip to investigate."""
+    if evidence_only:
+        from code_tribunal.evidence import gather_evidence
+        report = gather_evidence(path)
+        click.echo(report.to_text())
+        if output:
+            Path(output).write_text(json.dumps({
+                "findings": [str(f) for f in report.findings],
+                "stats": {
+                    "files": report.file_count,
+                    "total": len(report.findings),
+                    "by_severity": {s: len(i) for s, i in report.findings_by_severity.items()},
+                },
+            }, indent=2))
+            click.echo(f"\nReport saved to {output}")
+        return
+    result = run_trial(path)
+    if output:
+        Path(output).write_text(json.dumps(result, indent=2, default=str))
+        click.echo(f"\nFull report saved to {output}")
+    # Print verdict
+    click.echo("\n" + result.get("verdict", "No verdict generated."))
+if __name__ == "__main__":
+    main()

src/code_tribunal/courtroom.py ADDED Viewed

	@@ -0,0 +1,618 @@

+"""Courtroom pipeline orchestrator — wires evidence → investigation → trial → verdict."""
+import json
+import time
+from dataclasses import asdict, dataclass, field
+from crewai import Task, Crew, Process
+from code_tribunal.agents import (
+    security_investigator,
+    quality_investigator,
+    architecture_investigator,
+    prosecutor,
+    defense_attorney,
+    judge,
+)
+from code_tribunal.evidence import gather_evidence, EvidenceReport
+# ---------------------------------------------------------------------------
+# Phase 1: Evidence (deterministic, no LLM)
+# ---------------------------------------------------------------------------
+def phase_evidence(target_dir: str) -> EvidenceReport:
+    """Run GritQL scans and return structured evidence."""
+    print("\n[Phase 1] Gathering evidence with GritQL...")
+    report = gather_evidence(target_dir)
+    print(f"  Files scanned: {report.file_count}")
+    print(f"  Findings: {len(report.findings)}")
+    print(f"  By severity: " + ", ".join(
+        f"{sev}={len(items)}" for sev, items in sorted(report.findings_by_severity.items())
+    ))
+    return report
+# ---------------------------------------------------------------------------
+# Phase 2: Investigation (3 parallel investigators)
+# ---------------------------------------------------------------------------
+def _domain_evidence_text(report: EvidenceReport, domain: str) -> str:
+    """Extract findings for a specific domain as text."""
+    findings = report.findings_by_domain.get(domain, [])
+    if not findings:
+        return f"No {domain} findings detected."
+    return "\n".join(str(f) for f in findings)
+def phase_investigation(report: EvidenceReport) -> dict[str, str]:
+    """Run three specialist investigators in parallel."""
+    print("\n[Phase 2] Investigation — deploying specialist agents...")
+    sec_agent = security_investigator()
+    qual_agent = quality_investigator()
+    arch_agent = architecture_investigator()
+    full_evidence = report.to_text()
+    sec_task = Task(
+        description=(
+            "You are investigating a codebase for security vulnerabilities.\n\n"
+            "SECURITY EVIDENCE:\n"
+            f"{_domain_evidence_text(report, 'security')}\n\n"
+            "FULL EVIDENCE REPORT FOR CONTEXT:\n"
+            f"{full_evidence}\n\n"
+            "Produce a detailed security investigation report. For each finding:\n"
+            "- What the vulnerability is\n"
+            "- The attack vector (how it could be exploited)\n"
+            "- Severity: CRITICAL / HIGH / MEDIUM / LOW\n"
+            "- Potential business impact\n"
+            "- Recommended fix"
+        ),
+        agent=sec_agent,
+        expected_output="A structured security investigation report with severity-ranked findings and remediation.",
+    )
+    qual_task = Task(
+        description=(
+            "You are investigating a codebase for quality and negligence indicators.\n\n"
+            "QUALITY EVIDENCE:\n"
+            f"{_domain_evidence_text(report, 'quality')}\n\n"
+            "FULL EVIDENCE REPORT FOR CONTEXT:\n"
+            f"{full_evidence}\n\n"
+            "Produce a quality investigation report. Assess:\n"
+            "- Technical debt indicators (TODOs, FIXMEs, HACKs)\n"
+            "- Dead code / unused functions\n"
+            "- Missing error handling\n"
+            "- Signs of rushed or careless development\n"
+            "- Whether the code was production-ready when delivered"
+        ),
+        agent=qual_agent,
+        expected_output="A structured quality investigation report identifying negligence indicators and technical debt.",
+    )
+    arch_task = Task(
+        description=(
+            "You are investigating a codebase for architectural problems.\n\n"
+            "FULL EVIDENCE REPORT:\n"
+            f"{full_evidence}\n\n"
+            "Produce an architecture investigation report. Assess:\n"
+            "- Hardcoded configuration that should be externalized\n"
+            "- Tight coupling and missing abstractions\n"
+            "- Whether the architecture supports the intended use case\n"
+            "- Scalability concerns\n"
+            "- Whether this looks like professional work or amateur delivery"
+        ),
+        agent=arch_agent,
+        expected_output="A structured architecture investigation report assessing structural soundness.",
+    )
+    # Run investigators in a single crew — CrewAI will execute tasks sequentially
+    # (parallel crews would require separate Crew instances kicked off concurrently)
+    investigation_crew = Crew(
+        agents=[sec_agent, qual_agent, arch_agent],
+        tasks=[sec_task, qual_task, arch_task],
+        process=Process.sequential,
+        verbose=True,
+    )
+    result = investigation_crew.kickoff()
+    # Extract individual task outputs
+    reports = {}
+    task_outputs = result.tasks_output if hasattr(result, "tasks_output") else []
+    labels = ["security", "quality", "architecture"]
+    for i, label in enumerate(labels):
+        if i < len(task_outputs):
+            reports[label] = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
+        else:
+            reports[label] = ""
+    print(f"  Investigation complete: {len(reports)} reports generated.")
+    return reports
+# ---------------------------------------------------------------------------
+# Phase 3: The Trial (prosecutor vs defense)
+# ---------------------------------------------------------------------------
+def phase_trial(evidence_text: str, investigation_reports: dict[str, str]) -> str:
+    """Run the courtroom debate between prosecutor and defense attorney."""
+    print("\n[Phase 3] The Trial — Prosecutor vs Defense Attorney...")
+    # Separate agent instances for each role (no reuse)
+    pros_agent = prosecutor()
+    def_agent = defense_attorney()
+    pros_rebuttal_agent = prosecutor()  # fresh instance for rebuttal
+    investigation_text = "\n\n".join(
+        f"=== {k.upper()} INVESTIGATION ===\n{v}"
+        for k, v in investigation_reports.items()
+    )
+    # Round 1: Prosecutor presents the case
+    prosecution_task = Task(
+        description=(
+            "PRESENT THE PROSECUTION'S CASE\n\n"
+            "You are presenting evidence against a freelance developer who delivered this code to a paying client.\n\n"
+            "RAW EVIDENCE:\n"
+            f"{evidence_text}\n\n"
+            "INVESTIGATION REPORTS:\n"
+            f"{investigation_text}\n\n"
+            "Build your case. Be specific. Cite findings by category, severity, and potential impact. "
+            "Argue that this code represents negligence, not mere imperfection."
+        ),
+        agent=pros_agent,
+        expected_output="A compelling prosecution argument citing specific evidence and arguing negligence.",
+    )
+    # Round 2: Defense cross-examines — receives prosecution output via context
+    defense_task = Task(
+        description=(
+            "PRESENT THE DEFENSE\n\n"
+            "The prosecution has presented their case against this code. "
+            "Below is the PROSECUTION'S ARGUMENT — read it carefully, then mount your defense.\n\n"
+            "RAW EVIDENCE:\n"
+            f"{evidence_text}\n\n"
+            "INVESTIGATION REPORTS:\n"
+            f"{investigation_text}\n\n"
+            "Challenge the prosecution's specific claims. Argue context, proportionality, and intent. "
+            "Not every issue is negligence. Some patterns are acceptable in certain contexts. "
+            "Be honest but vigorous."
+        ),
+        agent=def_agent,
+        context=[prosecution_task],  # Defense sees the prosecution's output
+        expected_output="A vigorous defense argument challenging the prosecution's claims with context and proportionality.",
+    )
+    # Round 3: Prosecutor rebuttal — receives defense output via context
+    rebuttal_task = Task(
+        description=(
+            "REBUTTAL\n\n"
+            "The defense has responded to your case. Below is the DEFENSE'S ARGUMENT. "
+            "Now deliver your rebuttal.\n\n"
+            "Address their strongest points. Where are they wrong? "
+            "Where are they minimizing real harm? "
+            "End with a closing argument for the judge."
+        ),
+        agent=pros_rebuttal_agent,
+        context=[prosecution_task, defense_task],  # Rebuttal sees both prior arguments
+        expected_output="A sharp rebuttal addressing the defense's arguments and closing the prosecution's case.",
+    )
+    trial_crew = Crew(
+        agents=[pros_agent, def_agent, pros_rebuttal_agent],
+        tasks=[prosecution_task, defense_task, rebuttal_task],
+        process=Process.sequential,
+        verbose=True,
+    )
+    result = trial_crew.kickoff()
+    # Collect the full trial transcript
+    transcript_parts = []
+    task_outputs = result.tasks_output if hasattr(result, "tasks_output") else []
+    round_names = ["PROSECUTION", "DEFENSE", "REBUTTAL"]
+    for i, name in enumerate(round_names):
+        if i < len(task_outputs):
+            raw = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
+            transcript_parts.append(f"=== {name} ===\n{raw}")
+    transcript = "\n\n".join(transcript_parts)
+    print("  Trial complete: 3 rounds of argument.")
+    return transcript
+# ---------------------------------------------------------------------------
+# Phase 4: The Verdict
+# ---------------------------------------------------------------------------
+def phase_verdict(evidence_text: str, investigation_text: str, trial_transcript: str) -> str:
+    """Judge delivers the final verdict."""
+    print("\n[Phase 4] The Verdict — Judge deliberating...")
+    judge_agent = judge()
+    verdict_task = Task(
+        description=(
+            "DELIVER YOUR VERDICT\n\n"
+            "You have reviewed all evidence, investigation reports, and the full trial transcript.\n\n"
+            "RAW EVIDENCE:\n"
+            f"{evidence_text}\n\n"
+            "INVESTIGATION REPORTS:\n"
+            f"{investigation_text}\n\n"
+            "TRIAL TRANSCRIPT:\n"
+            f"{trial_transcript}\n\n"
+            "Deliver a structured verdict:\n\n"
+            "## VERDICT\n"
+            "Overall: [GUILTY / MIXED / NOT GUILTY]\n"
+            "Reputational Risk Score: [0-100]\n\n"
+            "## FINDINGS SUMMARY\n"
+            "For each finding: severity, impact, remediation\n\n"
+            "## SENTENCE\n"
+            "Your final assessment and recommendations for the client."
+        ),
+        agent=judge_agent,
+        expected_output=(
+            "A structured verdict with overall ruling, reputational risk score (0-100), "
+            "findings summary, and final sentence."
+        ),
+    )
+    verdict_crew = Crew(
+        agents=[judge_agent],
+        tasks=[verdict_task],
+        verbose=True,
+    )
+    result = verdict_crew.kickoff()
+    verdict = result.raw if hasattr(result, "raw") else str(result)
+    print("  Verdict delivered.")
+    return verdict
+# ---------------------------------------------------------------------------
+# Full Pipeline
+# ---------------------------------------------------------------------------
+def run_trial(target_dir: str) -> dict:
+    """Execute the full CodeTribunal pipeline on a target directory."""
+    print("=" * 60)
+    print("  CODETRIBUNAL — THE AI COURTROOM")
+    print("=" * 60)
+    # Phase 1: Evidence
+    evidence_report = phase_evidence(target_dir)
+    evidence_text = evidence_report.to_text()
+    if not evidence_report.findings:
+        print("\nNo findings detected. Case dismissed — code appears clean.")
+        return {"verdict": "DISMISSED", "reason": "No evidence of issues found."}
+    # Phase 2: Investigation
+    investigation_reports = phase_investigation(evidence_report)
+    investigation_text = "\n\n".join(
+        f"=== {k.upper()} INVESTIGATION ===\n{v}"
+        for k, v in investigation_reports.items()
+    )
+    # Phase 3: Trial
+    trial_transcript = phase_trial(evidence_text, investigation_reports)
+    # Phase 4: Verdict
+    verdict = phase_verdict(evidence_text, investigation_text, trial_transcript)
+    print("\n" + "=" * 60)
+    print("  TRIAL COMPLETE")
+    print("=" * 60)
+    return {
+        "evidence": evidence_text,
+        "investigation": investigation_text,
+        "transcript": trial_transcript,
+        "verdict": verdict,
+        "stats": {
+            "files_scanned": evidence_report.file_count,
+            "total_findings": len(evidence_report.findings),
+            "by_severity": {
+                sev: len(items) for sev, items in evidence_report.findings_by_severity.items()
+            },
+        },
+    }
+# ---------------------------------------------------------------------------
+# Streaming variants (for Gradio UI — preserve existing functions for CLI)
+# ---------------------------------------------------------------------------
+@dataclass
+class StreamResult:
+    """Mutable accumulator — populated after streaming completes."""
+    text: str = ""
+    metadata: dict = field(default_factory=dict)
+def _simulate_stream(text: str, role: str, chunk_size: int = 4):
+    """Fallback: simulate token-by-token streaming from a complete text."""
+    for i in range(0, len(text), chunk_size):
+        yield role, text[i : i + chunk_size]
+        time.sleep(0.01)
+def phase_investigation_stream(report: "EvidenceReport", result: StreamResult):
+    """Streaming variant of phase_investigation. Yields (agent_role, delta, task_index)."""
+    sec_agent = security_investigator()
+    qual_agent = quality_investigator()
+    arch_agent = architecture_investigator()
+    full_evidence = report.to_text()
+    sec_task = Task(
+        description=(
+            "You are investigating a codebase for security vulnerabilities.\n\n"
+            "SECURITY EVIDENCE:\n"
+            f"{_domain_evidence_text(report, 'security')}\n\n"
+            "FULL EVIDENCE REPORT FOR CONTEXT:\n"
+            f"{full_evidence}\n\n"
+            "Produce a detailed security investigation report. For each finding:\n"
+            "- What the vulnerability is\n"
+            "- The attack vector (how it could be exploited)\n"
+            "- Severity: CRITICAL / HIGH / MEDIUM / LOW\n"
+            "- Potential business impact\n"
+            "- Recommended fix"
+        ),
+        agent=sec_agent,
+        expected_output="A structured security investigation report with severity-ranked findings and remediation.",
+    )
+    qual_task = Task(
+        description=(
+            "You are investigating a codebase for quality and negligence indicators.\n\n"
+            "QUALITY EVIDENCE:\n"
+            f"{_domain_evidence_text(report, 'quality')}\n\n"
+            "FULL EVIDENCE REPORT FOR CONTEXT:\n"
+            f"{full_evidence}\n\n"
+            "Produce a quality investigation report. Assess:\n"
+            "- Technical debt indicators (TODOs, FIXMEs, HACKs)\n"
+            "- Dead code / unused functions\n"
+            "- Missing error handling\n"
+            "- Signs of rushed or careless development\n"
+            "- Whether the code was production-ready when delivered"
+        ),
+        agent=qual_agent,
+        expected_output="A structured quality investigation report identifying negligence indicators and technical debt.",
+    )
+    arch_task = Task(
+        description=(
+            "You are investigating a codebase for architectural problems.\n\n"
+            "FULL EVIDENCE REPORT:\n"
+            f"{full_evidence}\n\n"
+            "Produce an architecture investigation report. Assess:\n"
+            "- Hardcoded configuration that should be externalized\n"
+            "- Tight coupling and missing abstractions\n"
+            "- Whether the architecture supports the intended use case\n"
+            "- Scalability concerns\n"
+            "- Whether this looks like professional work or amateur delivery"
+        ),
+        agent=arch_agent,
+        expected_output="A structured architecture investigation report assessing structural soundness.",
+    )
+    try:
+        investigation_crew = Crew(
+            agents=[sec_agent, qual_agent, arch_agent],
+            tasks=[sec_task, qual_task, arch_task],
+            process=Process.sequential,
+            verbose=True,
+            stream=True,
+        )
+        accumulated = {"security": "", "quality": "", "architecture": ""}
+        labels = ["security", "quality", "architecture"]
+        streaming_output = investigation_crew.kickoff()
+        for chunk in streaming_output:
+            delta = chunk.content or ""
+            task_idx = chunk.task_index or 0
+            role = chunk.agent_role or "Investigator"
+            if task_idx < len(labels):
+                accumulated[labels[task_idx]] += delta
+            yield (role, delta, task_idx)
+        # Extract final task outputs
+        crew_result = streaming_output.result
+        task_outputs = crew_result.tasks_output if hasattr(crew_result, "tasks_output") else []
+        for i, label in enumerate(labels):
+            if i < len(task_outputs):
+                raw = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
+                accumulated[label] = raw
+        result.text = "\n\n".join(
+            f"=== {k.upper()} INVESTIGATION ===\n{v}"
+            for k, v in accumulated.items()
+        )
+        result.metadata["reports"] = accumulated
+    except Exception:
+        # Fallback: run blocking and simulate streaming
+        reports = phase_investigation(report)
+        investigation_text = "\n\n".join(
+            f"=== {k.upper()} INVESTIGATION ===\n{v}"
+            for k, v in reports.items()
+        )
+        roles = ["Security Forensic Investigator", "Code Quality Forensic Investigator", "Architecture Forensic Investigator"]
+        for i, (domain, text) in enumerate(reports.items()):
+            for role, delta in _simulate_stream(text, roles[i]):
+                yield (role, delta, i)
+        result.text = investigation_text
+        result.metadata["reports"] = reports
+def phase_trial_stream(evidence_text: str, investigation_reports: dict, result: StreamResult):
+    """Streaming variant of phase_trial. Yields (agent_role, delta, round_name, task_index)."""
+    pros_agent = prosecutor()
+    def_agent = defense_attorney()
+    pros_rebuttal_agent = prosecutor()
+    investigation_text = "\n\n".join(
+        f"=== {k.upper()} INVESTIGATION ===\n{v}"
+        for k, v in investigation_reports.items()
+    )
+    prosecution_task = Task(
+        description=(
+            "PRESENT THE PROSECUTION'S CASE\n\n"
+            "You are presenting evidence against a freelance developer who delivered this code to a paying client.\n\n"
+            "RAW EVIDENCE:\n"
+            f"{evidence_text}\n\n"
+            "INVESTIGATION REPORTS:\n"
+            f"{investigation_text}\n\n"
+            "Build your case. Be specific. Cite findings by category, severity, and potential impact. "
+            "Argue that this code represents negligence, not mere imperfection."
+        ),
+        agent=pros_agent,
+        expected_output="A compelling prosecution argument citing specific evidence and arguing negligence.",
+    )
+    defense_task = Task(
+        description=(
+            "PRESENT THE DEFENSE\n\n"
+            "The prosecution has presented their case against this code. "
+            "Below is the PROSECUTION'S ARGUMENT — read it carefully, then mount your defense.\n\n"
+            "RAW EVIDENCE:\n"
+            f"{evidence_text}\n\n"
+            "INVESTIGATION REPORTS:\n"
+            f"{investigation_text}\n\n"
+            "Challenge the prosecution's specific claims. Argue context, proportionality, and intent. "
+            "Not every issue is negligence. Some patterns are acceptable in certain contexts. "
+            "Be honest but vigorous."
+        ),
+        agent=def_agent,
+        context=[prosecution_task],
+        expected_output="A vigorous defense argument challenging the prosecution's claims with context and proportionality.",
+    )
+    rebuttal_task = Task(
+        description=(
+            "REBUTTAL\n\n"
+            "The defense has responded to your case. Below is the DEFENSE'S ARGUMENT. "
+            "Now deliver your rebuttal.\n\n"
+            "Address their strongest points. Where are they wrong? "
+            "Where are they minimizing real harm? "
+            "End with a closing argument for the judge."
+        ),
+        agent=pros_rebuttal_agent,
+        context=[prosecution_task, defense_task],
+        expected_output="A sharp rebuttal addressing the defense's arguments and closing the prosecution's case.",
+    )
+    round_names = ["Prosecution", "Defense", "Rebuttal"]
+    try:
+        trial_crew = Crew(
+            agents=[pros_agent, def_agent, pros_rebuttal_agent],
+            tasks=[prosecution_task, defense_task, rebuttal_task],
+            process=Process.sequential,
+            verbose=True,
+            stream=True,
+        )
+        accumulated_rounds = ["", "", ""]
+        streaming_output = trial_crew.kickoff()
+        for chunk in streaming_output:
+            delta = chunk.content or ""
+            task_idx = chunk.task_index or 0
+            role = chunk.agent_role or "Unknown"
+            round_name = round_names[task_idx] if task_idx < len(round_names) else f"Round {task_idx}"
+            accumulated_rounds[task_idx] += delta
+            yield (role, delta, round_name, task_idx)
+        crew_result = streaming_output.result
+        task_outputs = crew_result.tasks_output if hasattr(crew_result, "tasks_output") else []
+        for i in range(min(len(round_names), len(task_outputs))):
+            raw = task_outputs[i].raw if hasattr(task_outputs[i], "raw") else str(task_outputs[i])
+            accumulated_rounds[i] = raw
+        transcript = "\n\n".join(
+            f"=== {name} ===\n{text}"
+            for name, text in zip(round_names, accumulated_rounds)
+        )
+        result.text = transcript
+    except Exception:
+        # Fallback
+        transcript = phase_trial(evidence_text, investigation_reports)
+        roles = ["The Prosecutor", "The Defense Attorney", "The Prosecutor"]
+        for section in transcript.split("\n\n"):
+            for round_name in round_names:
+                if section.startswith(f"=== {round_name}"):
+                    content = section.replace(f"=== {round_name} ===", "").strip()
+                    task_idx = round_names.index(round_name)
+                    for role, delta in _simulate_stream(content, roles[task_idx]):
+                        yield (role, delta, round_name, task_idx)
+                    break
+        result.text = transcript
+def phase_verdict_stream(evidence_text: str, investigation_text: str, trial_transcript: str, result: StreamResult):
+    """Streaming variant of phase_verdict. Yields (agent_role, delta)."""
+    judge_agent = judge()
+    verdict_task = Task(
+        description=(
+            "DELIVER YOUR VERDICT\n\n"
+            "You have reviewed all evidence, investigation reports, and the full trial transcript.\n\n"
+            "RAW EVIDENCE:\n"
+            f"{evidence_text}\n\n"
+            "INVESTIGATION REPORTS:\n"
+            f"{investigation_text}\n\n"
+            "TRIAL TRANSCRIPT:\n"
+            f"{trial_transcript}\n\n"
+            "Deliver a structured verdict:\n\n"
+            "## VERDICT\n"
+            "Overall: [GUILTY / MIXED / NOT GUILTY]\n"
+            "Reputational Risk Score: [0-100]\n\n"
+            "## FINDINGS SUMMARY\n"
+            "For each finding: severity, impact, remediation\n\n"
+            "## SENTENCE\n"
+            "Your final assessment and recommendations for the client."
+        ),
+        agent=judge_agent,
+        expected_output=(
+            "A structured verdict with overall ruling, reputational risk score (0-100), "
+            "findings summary, and final sentence."
+        ),
+    )
+    try:
+        verdict_crew = Crew(
+            agents=[judge_agent],
+            tasks=[verdict_task],
+            verbose=True,
+            stream=True,
+        )
+        streaming_output = verdict_crew.kickoff()
+        for chunk in streaming_output:
+            delta = chunk.content or ""
+            role = chunk.agent_role or "The Judge"
+            yield (role, delta)
+        crew_result = streaming_output.result
+        result.text = crew_result.raw if hasattr(crew_result, "raw") else str(crew_result)
+    except Exception:
+        # Fallback
+        verdict = phase_verdict(evidence_text, investigation_text, trial_transcript)
+        for role, delta in _simulate_stream(verdict, "The Judge"):
+            yield (role, delta)
+        result.text = verdict

src/code_tribunal/evidence.py ADDED Viewed

	@@ -0,0 +1,337 @@

+"""Evidence gathering layer using GritQL for deterministic code analysis."""
+import os
+import subprocess
+from dataclasses import dataclass, field
+from pathlib import Path
+from dotenv import load_dotenv
+load_dotenv(Path(__file__).resolve().parent.parent.parent / ".env")
+GRITQL_PATTERNS = [
+    # --- Hardcoded secrets (specific var names that reliably match) ---
+    {
+        "category": "secret_password",
+        "pattern": 'or { `DB_PASSWORD = $_`, `PASSWORD = $_`, `$PASS = $_` where { $PASS <: r"(?i).*password" } }',
+        "language": "python",
+        "severity_hint": "CRITICAL",
+        "domain": "security",
+    },
+    {
+        "category": "secret_api_key",
+        "pattern": 'or { `API_KEY = $_`, `SECRET_KEY = $_`, `STRIPE_KEY = $_` }',
+        "language": "python",
+        "severity_hint": "CRITICAL",
+        "domain": "security",
+    },
+    {
+        "category": "secret_aws",
+        "pattern": '`AWS_SECRET = $_`',
+        "language": "python",
+        "severity_hint": "CRITICAL",
+        "domain": "security",
+    },
+    {
+        "category": "secret_js",
+        "pattern": 'or { `STRIPE_KEY = $_`, `JWT_SECRET = $_` }',
+        "language": None,
+        "severity_hint": "CRITICAL",
+        "domain": "security",
+    },
+    {
+        "category": "connection_string",
+        "pattern": '`self.connection_string = "$CONN"` where { $CONN <: r"mysql://.+" }',
+        "language": "python",
+        "severity_hint": "CRITICAL",
+        "domain": "security",
+    },
+    # --- TODO / FIXME / HACK ---
+    {
+        "category": "todo_py",
+        "pattern": "`# TODO: $_`",
+        "language": "python",
+        "severity_hint": "LOW",
+        "domain": "quality",
+    },
+    {
+        "category": "todo_js",
+        "pattern": "`// TODO: $_`",
+        "language": None,
+        "severity_hint": "LOW",
+        "domain": "quality",
+    },
+    {
+        "category": "fixme_py",
+        "pattern": "`# FIXME: $_`",
+        "language": "python",
+        "severity_hint": "MEDIUM",
+        "domain": "quality",
+    },
+    {
+        "category": "fixme_js",
+        "pattern": "`// FIXME: $_`",
+        "language": None,
+        "severity_hint": "MEDIUM",
+        "domain": "quality",
+    },
+    {
+        "category": "hack_py",
+        "pattern": "`# HACK: $_`",
+        "language": "python",
+        "severity_hint": "MEDIUM",
+        "domain": "quality",
+    },
+    {
+        "category": "hack_js",
+        "pattern": "`// HACK: $_`",
+        "language": None,
+        "severity_hint": "MEDIUM",
+        "domain": "quality",
+    },
+    # --- Dangerous functions ---
+    {
+        "category": "eval_usage",
+        "pattern": "`eval($_)`",
+        "language": "python",
+        "severity_hint": "CRITICAL",
+        "domain": "security",
+    },
+    {
+        "category": "pickle_load",
+        "pattern": "`pickle.load($_)`",
+        "language": "python",
+        "severity_hint": "CRITICAL",
+        "domain": "security",
+    },
+    {
+        "category": "os_system",
+        "pattern": "`os.system($_)`",
+        "language": "python",
+        "severity_hint": "CRITICAL",
+        "domain": "security",
+    },
+    {
+        "category": "subprocess_shell",
+        "pattern": "`subprocess.call($_, shell=True)`",
+        "language": "python",
+        "severity_hint": "CRITICAL",
+        "domain": "security",
+    },
+    {
+        "category": "md5_hash",
+        "pattern": "`hashlib.md5($_)`",
+        "language": "python",
+        "severity_hint": "HIGH",
+        "domain": "security",
+    },
+    # --- SQL injection ---
+    {
+        "category": "sql_injection_fstring",
+        "pattern": r'`$S` where { $S <: r"f\"SELECT.*\{.*\}\"" }',
+        "language": "python",
+        "severity_hint": "CRITICAL",
+        "domain": "security",
+    },
+    {
+        "category": "sql_injection_js",
+        "pattern": r'`$STR` where { $STR <: r"`SELECT.*\$\{.*\}`" }',
+        "language": None,
+        "severity_hint": "CRITICAL",
+        "domain": "security",
+    },
+]
+@dataclass
+class Finding:
+    """A single finding from the evidence layer."""
+    category: str
+    file: str
+    line: str
+    code: str
+    severity_hint: str
+    domain: str
+    def __str__(self) -> str:
+        return f"[{self.severity_hint}] {self.file}:{self.line.strip()} — {self.code.strip()}"
+@dataclass
+class EvidenceReport:
+    """Aggregated evidence from all GritQL scans."""
+    target_path: str
+    findings: list[Finding] = field(default_factory=list)
+    file_count: int = 0
+    total_patterns: int = 0
+    patterns_with_hits: int = 0
+    @property
+    def findings_by_domain(self) -> dict[str, list[Finding]]:
+        grouped: dict[str, list[Finding]] = {}
+        for f in self.findings:
+            grouped.setdefault(f.domain, []).append(f)
+        return grouped
+    @property
+    def findings_by_severity(self) -> dict[str, list[Finding]]:
+        grouped: dict[str, list[Finding]] = {}
+        for f in self.findings:
+            grouped.setdefault(f.severity_hint, []).append(f)
+        return grouped
+    def to_text(self) -> str:
+        """Format the full report as text for agent context."""
+        lines = [f"=== FORENSIC EVIDENCE REPORT ==="]
+        lines.append(f"Target: {self.target_path}")
+        lines.append(f"Files scanned: {self.file_count}")
+        lines.append(f"Total findings: {len(self.findings)}")
+        lines.append("")
+        for domain, findings in self.findings_by_domain.items():
+            lines.append(f"--- {domain.upper()} EVIDENCE ({len(findings)} findings) ---")
+            for f in findings:
+                lines.append(str(f))
+            lines.append("")
+        return "\n".join(lines)
+def _parse_gritql_output(raw: str) -> list[tuple[str, str, str]]:
+    """Parse grit CLI output into (file, line_number, code_snippet) tuples."""
+    results = []
+    current_file = None
+    for line in raw.splitlines():
+        stripped = line.rstrip()
+        if not stripped:
+            continue
+        # Skip summary lines like "Processed X files and found Y matches"
+        if stripped.startswith("Processed") and "files" in stripped:
+            continue
+        # File paths: no leading whitespace, contain a dot or slash
+        if stripped and not stripped[0].isspace() and ("." in stripped or "/" in stripped):
+            current_file = stripped
+        elif current_file and stripped and stripped[0].isspace():
+            # Indented line = finding: "    80      return eval(expression)"
+            content = stripped.strip()
+            if content and content[0].isdigit():
+                parts = content.split(None, 1)
+                if parts:
+                    line_num = parts[0]
+                    code = parts[1] if len(parts) > 1 else ""
+                    results.append((current_file, line_num, code))
+    return results
+def run_gritql_scan(pattern_def: dict, target_dir: str) -> list[Finding]:
+    """Run a single GritQL pattern and return structured findings."""
+    # --dry-run ensures no files are modified; --language overrides auto-detection
+    cmd = ["grit", "apply", "--dry-run", pattern_def["pattern"], target_dir]
+    if pattern_def.get("language"):
+        cmd += ["--language", pattern_def["language"]]
+    try:
+        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
+    except FileNotFoundError:
+        raise RuntimeError("'grit' CLI not found. Install with: npm install -g @getgrit/cli")
+    except subprocess.TimeoutExpired:
+        return []
+    output = result.stdout.strip()
+    if not output:
+        return []
+    # "found 0 matches" is in stdout — bail if no actual matches
+    if "found 0 matches" in output:
+        return []
+    matches = _parse_gritql_output(output)
+    findings = []
+    for file_path, line_num, code in matches:
+        findings.append(
+            Finding(
+                category=pattern_def["category"],
+                file=file_path,
+                line=line_num,
+                code=code,
+                severity_hint=pattern_def["severity_hint"],
+                domain=pattern_def["domain"],
+            )
+        )
+    return findings
+def _ensure_grit_initialized(target_dir: str) -> None:
+    """Run 'grit init' if no .grit directory exists, to enable standard library patterns."""
+    grit_dir = Path(target_dir) / ".grit"
+    if not grit_dir.exists():
+        try:
+            subprocess.run(
+                ["grit", "init"],
+                cwd=target_dir,
+                capture_output=True,
+                timeout=15,
+            )
+        except Exception:
+            pass  # Non-critical; some patterns may still work without init
+def gather_evidence(target_dir: str) -> EvidenceReport:
+    """Run all GritQL patterns and return a structured evidence report."""
+    _ensure_grit_initialized(target_dir)
+    # Count source files
+    file_count = 0
+    for ext in (".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".rb", ".php", ".c", ".cpp"):
+        file_count += sum(1 for _ in Path(target_dir).rglob(f"*{ext}"))
+    all_findings: list[Finding] = []
+    patterns_with_hits = 0
+    for p in GRITQL_PATTERNS:
+        findings = run_gritql_scan(p, target_dir)
+        if findings:
+            patterns_with_hits += 1
+        all_findings.extend(findings)
+    return EvidenceReport(
+        target_path=target_dir,
+        findings=all_findings,
+        file_count=file_count,
+        total_patterns=len(GRITQL_PATTERNS),
+        patterns_with_hits=patterns_with_hits,
+    )
+def gather_evidence_streaming(target_dir: str):
+    """Run GritQL patterns one by one, yielding status after each pattern.
+    Yields status strings during scanning, then yields the final EvidenceReport.
+    """
+    _ensure_grit_initialized(target_dir)
+    # Count source files
+    file_count = 0
+    for ext in (".py", ".js", ".ts", ".jsx", ".tsx", ".java", ".go", ".rb", ".php", ".c", ".cpp"):
+        file_count += sum(1 for _ in Path(target_dir).rglob(f"*{ext}"))
+    all_findings: list[Finding] = []
+    patterns_with_hits = 0
+    total = len(GRITQL_PATTERNS)
+    for i, p in enumerate(GRITQL_PATTERNS):
+        yield f"Scanning pattern {i + 1}/{total}: **{p['category']}**..."
+        findings = run_gritql_scan(p, target_dir)
+        if findings:
+            patterns_with_hits += 1
+            all_findings.extend(findings)
+    yield EvidenceReport(
+        target_path=target_dir,
+        findings=all_findings,
+        file_count=file_count,
+        total_patterns=total,
+        patterns_with_hits=patterns_with_hits,
+    )

tests/fixtures/bad_code.zip ADDED Viewed

Binary file (2.04 kB). View file

tests/fixtures/locale/app.py ADDED Viewed

	@@ -0,0 +1,80 @@

+# Bad freelance code - deliberately terrible for testing
+import os
+import sys
+import json
+import pickle
+import hashlib
+import subprocess
+# Hardcoded credentials
+DB_PASSWORD = "super_secret_123"
+API_KEY = "sk-abc123def456ghi789"
+AWS_SECRET = "wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY"
+# TODO: fix this later
+# HACK: temporary workaround, will remove before production
+# FIXME: this entire function is broken
+def get_user(user_id):
+    # SQL injection vulnerability
+    query = f"SELECT * FROM users WHERE id = {user_id}"
+    return query
+def execute_command(user_input):
+    # Command injection
+    os.system("ping " + user_input)
+    subprocess.call(user_input, shell=True)
+def load_data(filename):
+    # Insecure deserialization
+    with open(filename, "rb") as f:
+        data = pickle.load(f)
+    return data
+def hash_password(password):
+    # Weak hashing
+    return hashlib.md5(password.encode()).hexdigest()
+def process_payment(card_number, cvv, amount):
+    # Logging sensitive data
+    print(f"Processing payment: card={card_number}, cvv={cvv}")
+    # No encryption, no validation
+    return True
+class DatabaseConnection:
+    def __init__(self):
+        # Connection string with hardcoded credentials
+        self.connection_string = "mysql://admin:password123@localhost:3306/prod"
+        self.connected = False
+    def connect(self):
+        # No error handling
+        pass
+    def query(self, sql):
+        # Another SQL injection point
+        cursor = self.connection_string
+        return cursor
+def unused_function_one():
+    pass
+def unused_function_two():
+    pass
+def unused_function_three():
+    pass
+# eval on user input
+def calculate(expression):
+    return eval(expression)

tests/fixtures/locale/utils.js ADDED Viewed

	@@ -0,0 +1,46 @@

+// More bad freelance code
+const express = require('express');
+const app = express();
+// Hardcoded secrets
+const STRIPE_KEY = "sk_live_51HxxxxxXXXXXX";
+const JWT_SECRET = "my-super-secret-jwt-key-12345";
+// TODO: add authentication middleware
+// FIXME: this is not secure at all
+app.get('/api/users/:id', (req, res) => {
+    // SQL injection via string concatenation
+    const query = `SELECT * FROM users WHERE id = ${req.params.id}`;
+    db.query(query);
+});
+app.post('/api/login', (req, res) => {
+    // No password hashing comparison - plain text
+    const user = users.find(u => u.password === req.body.password);
+    if (user) {
+        // Exposing sensitive data in response
+        res.json({ user: user, token: generateToken(user) });
+    }
+});
+app.listen(3000, () => {
+    console.log("Server running on port 3000");
+    console.log("API_KEY:", process.env.API_KEY); // logging secrets
+});
+function generateToken(user) {
+    // Weak token generation
+    return Buffer.from(JSON.stringify(user)).toString('base64');
+}
+// Dead code - never called
+function legacyHandler(req, res) {
+    console.log("This function is never used");
+}
+function oldMiddleware(req, res, next) {
+    console.log("Deprecated middleware");
+    next();
+}

tests/test_integration.py ADDED Viewed

	@@ -0,0 +1,235 @@

+"""Test integration: GritQL evidence → CrewAI agent analysis."""
+import os
+import subprocess
+from pathlib import Path
+from dotenv import load_dotenv
+from crewai import Agent, Task, Crew, LLM
+# Load .env from project root
+load_dotenv(Path(__file__).resolve().parent.parent / ".env")
+# --- Configuration ---
+LOCALE_DIR = os.path.join(os.path.dirname(__file__), "fixtures", "locale")
+# Patterns verified against test fixtures.
+# JS patterns use // comments, Python patterns use # comments.
+# Some patterns target Python specifically via --language flag.
+GRITQL_PATTERNS = [
+    # --- Cross-language: hardcoded secrets ---
+    {
+        "category": "hardcoded_secrets_js",
+        "pattern": '`$VAR = "$VAL"` where { $VAR <: r"(?i).*(password|key|secret|token).*" }',
+        "language": None,  # auto-detect (JS works natively)
+    },
+    {
+        "category": "hardcoded_secrets_py",
+        "pattern": '`$VAR = $VAL` where { $VAR <: r"(?i).*(PASSWORD|KEY|SECRET|TOKEN).*" }',
+        "language": "python",
+    },
+    # --- Connection strings ---
+    {
+        "category": "connection_strings",
+        "pattern": '`"$CONN"` where { $CONN <: r"mysql://.+" }',
+        "language": None,
+    },
+    # --- TODO / FIXME / HACK comments ---
+    {
+        "category": "todo_py",
+        "pattern": "`# TODO: $_`",
+        "language": "python",
+    },
+    {
+        "category": "todo_js",
+        "pattern": "`// TODO: $_`",
+        "language": None,
+    },
+    {
+        "category": "fixme_py",
+        "pattern": "`# FIXME: $_`",
+        "language": "python",
+    },
+    {
+        "category": "fixme_js",
+        "pattern": "`// FIXME: $_`",
+        "language": None,
+    },
+    {
+        "category": "hack_py",
+        "pattern": "`# HACK: $_`",
+        "language": "python",
+    },
+    {
+        "category": "hack_js",
+        "pattern": "`// HACK: $_`",
+        "language": None,
+    },
+    # --- Dangerous function calls ---
+    {
+        "category": "eval_usage",
+        "pattern": "`eval($_)`",
+        "language": "python",
+    },
+    {
+        "category": "pickle_load",
+        "pattern": "`pickle.load($_)`",
+        "language": "python",
+    },
+    {
+        "category": "os_system",
+        "pattern": "`os.system($_)`",
+        "language": "python",
+    },
+    {
+        "category": "subprocess_shell",
+        "pattern": "`subprocess.call($_, shell=True)`",
+        "language": "python",
+    },
+    {
+        "category": "md5_hash",
+        "pattern": "`hashlib.md5($_)`",
+        "language": "python",
+    },
+    # --- SQL injection ---
+    {
+        "category": "sql_injection_fstring",
+        "pattern": r'`$S` where { $S <: r"f\"SELECT.*\{.*\}\"" }',
+        "language": "python",
+    },
+    {
+        "category": "sql_injection_js",
+        "pattern": r'`$STR` where { $STR <: r"`SELECT.*\$\{.*\}`" }',
+        "language": None,
+    },
+]
+def run_gritql(pattern: str, target_dir: str, language: str | None = None) -> dict:
+    """Run a single GritQL pattern and return structured results."""
+    cmd = ["grit", "apply", pattern, target_dir]
+    if language:
+        cmd += ["--language", language]
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=30,
+        )
+        output = result.stdout.strip()
+        errors = result.stderr.strip()
+        # Grit prints "Processed X files and found Y matches" to stderr
+        match_line = [l for l in errors.splitlines() if "found" in l]
+        return {
+            "pattern": pattern,
+            "findings": output or None,
+            "summary": match_line[0] if match_line else None,
+            "returncode": result.returncode,
+        }
+    except FileNotFoundError:
+        return {"pattern": pattern, "findings": None, "error": "'grit' CLI not found. Run: npm install -g @getgrit/cli"}
+    except Exception as e:
+        return {"pattern": pattern, "findings": None, "error": str(e)}
+def gather_evidence(target_dir: str) -> list[dict]:
+    """Run all GritQL patterns against the target directory."""
+    evidence = []
+    for p in GRITQL_PATTERNS:
+        print(f"  Scanning: {p['category']}...")
+        result = run_gritql(p["pattern"], target_dir, p.get("language"))
+        result["category"] = p["category"]
+        evidence.append(result)
+    return evidence
+def format_evidence_for_agent(evidence: list[dict]) -> str:
+    """Format evidence into a readable report for the LLM agent."""
+    lines = ["=== FORENSIC EVIDENCE REPORT ===\n"]
+    hits = 0
+    for item in evidence:
+        if item.get("findings"):
+            hits += 1
+            lines.append(f"--- {item['category'].upper()} ---")
+            lines.append(f"Pattern: {item['pattern']}")
+            lines.append(f"Findings:\n{item['findings']}")
+            lines.append("")
+    lines.insert(1, f"Total categories with findings: {hits} / {len(evidence)}\n")
+    return "\n".join(lines)
+def run_crewai_analysis(evidence_report: str) -> str:
+    """Pass evidence to a CrewAI agent for analysis."""
+    llm = LLM(
+        model=os.environ.get("MODEL_NAME", "zai/glm-5.1"),
+        api_key=os.environ.get("ZAI_API_KEY"),
+    )
+    investigator = Agent(
+        role="Senior Code Forensic Investigator",
+        goal="Analyze code evidence and identify critical security vulnerabilities and code quality issues",
+        backstory=(
+            "You are a veteran code auditor with 15 years of experience. "
+            "You've seen every trick in the book — from hardcoded credentials to SQL injection. "
+            "You analyze deterministic scan results and provide clear, severity-ranked findings."
+        ),
+        llm=llm,
+        verbose=True,
+    )
+    analysis_task = Task(
+        description=(
+            "Analyze the following forensic evidence report from a codebase scan. "
+            "For each finding, assess severity (CRITICAL / HIGH / MEDIUM / LOW), "
+            "explain the risk, and suggest a fix.\n\n"
+            f"{evidence_report}"
+        ),
+        agent=investigator,
+        expected_output="A structured forensic analysis report with severity-ranked findings.",
+    )
+    crew = Crew(
+        agents=[investigator],
+        tasks=[analysis_task],
+        verbose=True,
+    )
+    result = crew.kickoff()
+    return result.raw if hasattr(result, "raw") else str(result)
+def main():
+    print("=" * 60)
+    print("CodeTribunal Integration Test")
+    print("=" * 60)
+    # Phase 1: GritQL evidence gathering
+    print("\n[Phase 1] Gathering evidence with GritQL...")
+    evidence = gather_evidence(LOCALE_DIR)
+    hits = sum(1 for e in evidence if e.get("findings"))
+    print(f"\n  Patterns scanned: {len(evidence)}")
+    print(f"  Hits: {hits}")
+    evidence_report = format_evidence_for_agent(evidence)
+    print("\n" + evidence_report)
+    # Phase 2: CrewAI analysis
+    api_key = os.environ.get("ZAI_API_KEY")
+    if not api_key:
+        print("\n[Phase 2] SKIPPED — set ZAI_API_KEY to test CrewAI integration")
+        return
+    print("\n[Phase 2] Running CrewAI analysis with GLM 5.1...")
+    report = run_crewai_analysis(evidence_report)
+    print("\n" + "=" * 60)
+    print("AGENT REPORT")
+    print("=" * 60)
+    print(report)
+if __name__ == "__main__":
+    main()