Spaces:
Running on Zero
Running on Zero
File size: 9,515 Bytes
c8055f7 f4e9a2f c8055f7 7d1d321 b5e4366 7d1d321 8457788 7d1d321 c8055f7 8457788 bd351d2 c8055f7 8457788 c8055f7 7d1d321 c8055f7 7c8120d 8457788 bd351d2 8457788 bd351d2 8457788 bd351d2 8457788 bd351d2 8457788 c8055f7 8457788 c8055f7 8457788 bfb16e5 8457788 bfb16e5 8457788 bfb16e5 8457788 bfb16e5 8457788 bd351d2 c8055f7 8457788 c8055f7 8457788 bd351d2 c8055f7 8457788 bd351d2 7d1d321 b5e4366 c8055f7 8652b1a 8457788 8652b1a 8457788 8652b1a bd351d2 8652b1a 8457788 f4e9a2f 8457788 f4e9a2f 8457788 f4e9a2f c8055f7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 | from __future__ import annotations
import json
import types
import unittest
from pathlib import Path
from unittest.mock import patch
from analyzer import analyze_trace_file
from model_runtime import (
MODEL_CHOICES,
MODEL_MAX_NEW_TOKENS,
PRIMARY_MODEL_ID,
QUICK_MODEL_ID,
_chat_template_kwargs,
_prepare_generation_inputs,
parse_analysis_json,
resolve_device,
run_model_analysis,
)
ANALYSIS_JSON = {
"verdict": {
"tone": "partial",
"headline": "Reroute landed with a caveat.",
"detail": "The agent caught a wrong assumption about the upload shape and narrowed the fix.",
"honesty": "candid",
},
"overall_patterns": {
"difficulty_style": "One localization snag.",
"detour_style": "A productive narrowing.",
"recovery_style": "Reflective.",
"risk_or_caveat": "Deployment path left unverified.",
},
"episodes": [
{
"start_index": 0,
"end_index": 3,
"title": "Upload boundary fix",
"initial_intention": "Inspect the failing upload path.",
"reported_difficulty": "The Gradio file object can arrive as a temporary path.",
"difficulty_type": "localization_difficulty",
"appraisal": "initial_hypothesis_wrong",
"strategy_before": "Fix the parser.",
"strategy_after": "Narrow the fix to the upload boundary.",
"detour_type": "scope_narrowing",
"resolution_mode": "defensive_handling",
"recovery_pattern": "reflective_recovery",
"outcome_claim": "resolved_with_caveat",
"productive_detour": "yes",
"evidence_quotes": ["my initial assumption about the upload shape was wrong"],
"analyst_memo": "The agent names the wrong assumption and picks the smaller change.",
}
],
}
class RecordingGenerator:
"""Stand-in for the local GPU generator that records its call arguments."""
def __init__(self) -> None:
self.calls: list[dict] = []
def __call__(self, messages, *, model_id, max_new_tokens) -> str:
self.calls.append(
{"messages": messages, "model_id": model_id, "max_new_tokens": max_new_tokens}
)
return json.dumps(ANALYSIS_JSON)
class FakeTensor:
def __init__(self, shape: tuple[int, ...]) -> None:
self.shape = shape
self.device = None
def to(self, device: str) -> "FakeTensor":
self.device = device
return self
class ModelRuntimeTests(unittest.TestCase):
def test_nemotron_label_does_not_call_it_small(self) -> None:
label = str(MODEL_CHOICES["nemotron"]["label"])
self.assertIn("NVIDIA Nemotron 3 Nano 30B-A3B", label)
self.assertNotIn("small", label.lower())
def test_minicpm_is_the_quick_engine(self) -> None:
self.assertEqual(MODEL_CHOICES["minicpm"]["model_id"], QUICK_MODEL_ID)
self.assertIn("MiniCPM5 1B", str(MODEL_CHOICES["minicpm"]["label"]))
self.assertNotIn("qwen", MODEL_CHOICES)
def test_minicpm_chat_template_disables_thinking(self) -> None:
self.assertEqual(_chat_template_kwargs(QUICK_MODEL_ID), {"enable_thinking": False})
self.assertEqual(_chat_template_kwargs(PRIMARY_MODEL_ID), {})
def test_resolve_device_honors_explicit_override(self) -> None:
self.assertEqual(resolve_device("cpu"), "cpu")
self.assertEqual(resolve_device("cuda"), "cuda")
self.assertEqual(resolve_device("mps"), "mps")
def test_parse_analysis_json_validates_shape(self) -> None:
parsed = parse_analysis_json(json.dumps(ANALYSIS_JSON))
self.assertEqual(len(parsed["episodes"]), 1)
self.assertEqual(parsed["verdict"]["tone"], "partial")
def test_parse_analysis_json_recovers_from_code_fence(self) -> None:
parsed = parse_analysis_json("```json\n" + json.dumps(ANALYSIS_JSON) + "\n```")
self.assertEqual(parsed["episodes"][0]["difficulty_type"], "localization_difficulty")
def test_parse_analysis_json_extracts_object_from_prose(self) -> None:
raw = "Here is the report:\n" + json.dumps(ANALYSIS_JSON) + "\nDone."
parsed = parse_analysis_json(raw)
self.assertEqual(parsed["verdict"]["honesty"], "candid")
def test_parse_analysis_json_uses_final_object_after_thinking_braces(self) -> None:
raw = (
"<think>Draft {not json} and a scratch object "
'{"draft": "ignore this"} before the final answer.</think>\n'
+ json.dumps(ANALYSIS_JSON)
)
parsed = parse_analysis_json(raw)
self.assertEqual(len(parsed["episodes"]), 1)
def test_parse_analysis_json_requires_episodes_list(self) -> None:
with self.assertRaises(ValueError):
parse_analysis_json(json.dumps({"verdict": {}, "overall_patterns": {}}))
def test_run_model_analysis_uses_selected_model(self) -> None:
generate = RecordingGenerator()
produced = run_model_analysis(
engine="nemotron",
numbered_narrative="[0] assistant 10:00: hello",
generate=generate,
)
self.assertEqual(produced.model_id, PRIMARY_MODEL_ID)
self.assertEqual(len(produced.analysis["episodes"]), 1)
self.assertEqual(generate.calls[0]["model_id"], PRIMARY_MODEL_ID)
self.assertEqual(generate.calls[0]["max_new_tokens"], MODEL_MAX_NEW_TOKENS)
def test_prepare_generation_inputs_accepts_tensor_output(self) -> None:
tensor = FakeTensor((1, 12))
generation_inputs, prompt_tokens = _prepare_generation_inputs(tensor, device="cuda")
self.assertEqual(generation_inputs, {"inputs": tensor})
self.assertEqual(prompt_tokens, 12)
self.assertEqual(tensor.device, "cuda")
def test_prepare_generation_inputs_expands_batch_encoding_output(self) -> None:
input_ids = FakeTensor((1, 21))
attention_mask = FakeTensor((1, 21))
generation_inputs, prompt_tokens = _prepare_generation_inputs(
{"input_ids": input_ids, "attention_mask": attention_mask},
device="cuda",
)
self.assertEqual(generation_inputs["input_ids"], input_ids)
self.assertEqual(generation_inputs["attention_mask"], attention_mask)
self.assertEqual(prompt_tokens, 21)
def test_analyzer_records_unknown_engine_note(self) -> None:
result, _ = analyze_trace_file(
Path("examples/sample_trace_redacted.jsonl"),
analysis_engine="missing-engine",
)
self.assertTrue(result.model_notes)
self.assertIn("Unknown analysis engine", result.model_notes[0])
def test_analyzer_model_error_note_avoids_double_period(self) -> None:
with patch("analyzer.run_model_analysis", side_effect=ValueError("model unavailable.")):
result, _ = analyze_trace_file(
Path("examples/sample_trace_redacted.jsonl"),
analysis_engine="minicpm",
)
self.assertTrue(result.model_notes)
self.assertNotIn("..", result.model_notes[0])
self.assertIn("ValueError: model unavailable.", result.model_notes[0])
def test_analyzer_replaces_analysis_on_model_success(self) -> None:
with patch("analyzer.run_model_analysis") as run:
run.return_value = types.SimpleNamespace(
model_id=PRIMARY_MODEL_ID,
analysis=dict(ANALYSIS_JSON),
note=f"Analysis produced by {PRIMARY_MODEL_ID}.",
)
result, _ = analyze_trace_file(
Path("examples/sample_trace_redacted.jsonl"),
analysis_engine="nemotron",
)
self.assertEqual(result.engine, PRIMARY_MODEL_ID)
self.assertEqual(result.session_verdict["tone"], "partial")
self.assertEqual(result.episodes[0].episode_id, "E01")
self.assertEqual(result.episodes[0].difficulty_type, "localization_difficulty")
def test_analyzer_strips_placeholder_echoes(self) -> None:
bad = {
"verdict": {"tone": "stable", "headline": "<= 12 words", "detail": "2-4 sentences", "honesty": "candid"},
"overall_patterns": {},
"episodes": [
{
"start_index": 0,
"end_index": 0,
"title": "<= 10 words",
"reported_difficulty": "The build failed.",
"difficulty_type": "environment_blocker",
"analyst_memo": "1-3 sentences",
"evidence_quotes": ["short verbatim quote", "the build failed"],
"outcome_claim": "not_resolved",
}
],
}
with patch("analyzer.run_model_analysis") as run:
run.return_value = types.SimpleNamespace(model_id=QUICK_MODEL_ID, analysis=bad, note="ok")
result, _ = analyze_trace_file(
Path("examples/sample_trace_redacted.jsonl"), analysis_engine="minicpm"
)
episode = result.episodes[0]
self.assertEqual(episode.title, "The build failed.") # placeholder -> reported_difficulty
self.assertEqual(episode.analyst_memo, "") # "1-3 sentences" stripped
self.assertEqual(episode.evidence_quotes, ["the build failed"]) # placeholder quote dropped
self.assertNotIn("<", result.session_verdict["headline"])
if __name__ == "__main__":
unittest.main()
|