#!/usr/bin/env python3
"""
HalluMaze Visual Storyteller — v2
각 모델의 미로 탈출 시도를 인터랙티브 HTML로 시각화
Usage:
source ~/.claude/env/shared.env && python3 visualize_maze.py
python3 visualize_maze.py --seed 4004 --size 5
"""
from __future__ import annotations
import sys, os, re, json, argparse
from datetime import datetime
def _load_env(path: str):
try:
with open(os.path.expanduser(path)) as f:
for line in f:
line = line.strip()
if not line or line.startswith('#'):
continue
m = re.match(r'^(?:export\s+)?([A-Za-z_][A-Za-z0-9_]*)=(.*)$', line)
if m:
k, v = m.group(1), m.group(2).strip('"\'')
if k not in os.environ:
os.environ[k] = v
except FileNotFoundError:
pass
_load_env("~/.claude/env/shared.env")
_load_env(".envrc")
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'files'))
from hallumaze import (
LLMProvider, MazeConfig, MazeEngine, BenchmarkRunner,
PromptBuilder, BenchmarkResult
)
# ── Provider patch ──
def _strip_think(text: str) -> str:
import re as _re
stripped = _re.sub(r'.*?', '', text, flags=_re.DOTALL).strip()
return stripped if stripped else text
def _call_minimax(self, prompt, max_tokens, system=""):
import openai
client = openai.OpenAI(
api_key=self.api_key,
base_url=os.environ.get("MINIMAX_BASE_URL", "https://api.minimax.io/v1")
)
# MiniMax-M2.5 추론 모델: 블록이 ~3000+ 토큰 소모 → 최소 8000 필요
effective_tokens = max(max_tokens, 8000)
resp = client.chat.completions.create(
model=self.model, max_tokens=effective_tokens,
messages=[
{"role": "system", "content": system or PromptBuilder.SYSTEM_PROMPT},
{"role": "user", "content": prompt},
],
)
return _strip_think(resp.choices[0].message.content)
def _call_glm(self, prompt, max_tokens, system=""):
import anthropic
client = anthropic.Anthropic(
api_key=self.api_key,
base_url=os.environ.get("GLM_BASE_URL", "https://api.z.ai/api/anthropic")
)
msg = client.messages.create(
model=self.model, max_tokens=max_tokens,
system=system or PromptBuilder.SYSTEM_PROMPT,
messages=[{"role": "user", "content": prompt}],
)
return msg.content[0].text
_orig_call = LLMProvider.call
def _patched_call(self, prompt, max_tokens, system=""):
if self.provider == "minimax": return _call_minimax(self, prompt, max_tokens, system)
if self.provider == "glm": return _call_glm(self, prompt, max_tokens, system)
return _orig_call(self, prompt, max_tokens, system)
LLMProvider.call = _patched_call
# ── Helpers ──
def build_providers():
out = []
k = os.environ.get("MINIMAX_API_KEY")
if k:
m = os.environ.get("MINIMAX_MODEL", "MiniMax-M2.5")
out.append(LLMProvider(provider="minimax", api_key=k, model=m))
print(f" [+] MiniMax / {m}")
k = os.environ.get("GLM_API_KEY")
if k:
m = os.environ.get("GLM_MODEL", "glm-4.7")
out.append(LLMProvider(provider="glm", api_key=k, model=m))
print(f" [+] GLM / {m}")
return out
def serialize_maze(maze: MazeEngine) -> dict:
"""Serialize cell walls to JSON for the canvas renderer."""
N = maze.N
# walls[r][c] = {N, S, E, W} — True means wall exists (BLOCKED)
walls = []
for r in range(N):
row = []
for c in range(N):
cell = maze.cells[r][c]
row.append({
"N": bool(cell.N),
"S": bool(cell.S),
"E": bool(cell.E),
"W": bool(cell.W),
})
walls.append(row)
# mirage_traps: list of (r, c, dir, nr, nc) tuples
mirage_positions = [[t[0], t[1]] for t in (maze.mirage_traps or [])]
return {
"N": N,
"walls": walls,
"start": [0, 0],
"end": [N - 1, N - 1],
"solution": [list(p) for p in (maze.solution or [])],
"mirage_positions": mirage_positions,
}
def serialize_result(result: BenchmarkResult) -> dict:
"""Extract path steps from BenchmarkResult for animation."""
# extracted_path: list of [r, c]
path = result.extracted_path or []
# steps: list of StepRecord with hallucination/backtrack flags
step_list = []
for i, s in enumerate(result.steps or []):
# steps are dicts (from asdict(StepRecord))
if isinstance(s, dict):
step_list.append({
"step": s.get("step", i),
"r": s.get("r", 0),
"c": s.get("c", 0),
"direction": s.get("direction", "?"),
"is_hallucination": bool(s.get("is_hallucination", False)),
"is_backtrack": bool(s.get("is_backtrack", False)),
"is_loop": bool(s.get("is_loop", False)),
"confidence": s.get("confidence"),
})
else:
step_list.append({
"step": s.step,
"r": s.r,
"c": s.c,
"direction": s.direction,
"is_hallucination": s.is_hallucination,
"is_backtrack": s.is_backtrack,
"is_loop": s.is_loop,
"confidence": s.confidence,
})
return {
"model": result.model,
"provider": result.provider,
"solved": bool(result.sr and result.sr >= 1.0),
"mei": round(result.mei or 0.0, 3),
"score": round(result.hallumaze_score or 0.0, 3),
"hallucination_count": result.hallucination_count or 0,
"backtrack_count": result.backtrack_count or 0,
"loop_count": result.loop_count or 0,
"brs": round(result.brs or 0.0, 3),
"latency_s": result.latency_s or 0,
"path": path,
"steps": step_list,
}
# ── HTML template ──
HTML = r"""
HalluMaze — Model Escape Comparison
HalluMaze — Metacognition Escape Visualization
Path
Hallucination
Backtrack
Loop
Solution
Mirage zone
Start
End
"""
def main():
ap = argparse.ArgumentParser()
ap.add_argument("--seed", type=int, default=4004)
ap.add_argument("--size", type=int, default=5, choices=[5, 7, 9])
ap.add_argument("--group", type=str, default="A", choices=["A", "B", "C"])
ap.add_argument("--no-mirage", action="store_true")
ap.add_argument("--output", type=str, default=None)
args = ap.parse_args()
print("\n" + "="*60)
print(" HalluMaze Visual Storyteller v2")
print("="*60 + "\n")
print(" [Providers]")
providers = build_providers()
if not providers:
print(" ERROR: No providers found.")
sys.exit(1)
config = MazeConfig(
size=args.size,
use_mirage=not args.no_mirage,
use_confidence=True,
ariadne_mode=args.group,
max_tokens=3000,
)
maze = MazeEngine(size=args.size, seed=args.seed)
print(f"\n [Maze] seed={maze.seed} size={maze.N}x{maze.N} "
f"sol_len={len(maze.solution or [])} dead_ends={maze.dead_ends}")
print(maze.ascii_render())
runner = BenchmarkRunner(config)
for p in providers:
print(f"\n Running {p.model}...")
result = runner.run_single(p, maze)
status = "SOLVED" if result.sr and result.sr >= 1.0 else "FAILED"
print(f" -> {status} | MEI={result.mei:.3f} | hall={result.hallucination_count} "
f"| bt={result.backtrack_count} | steps={len(result.steps)} | {result.latency_s:.1f}s")
if result.error:
print(f" ERROR: {result.error}")
maze_data = serialize_maze(maze)
results_data = [serialize_result(r) for r in runner.results]
payload = {
"seed": args.seed,
"size": args.size,
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
"maze": maze_data,
"results": results_data,
}
data_json = json.dumps(payload, ensure_ascii=False)
html = HTML.replace("__DATA__", data_json)
out = args.output or f"hallumaze_visual_seed{args.seed}_{args.size}x{args.size}.html"
with open(out, "w", encoding="utf-8") as f:
f.write(html)
json_out = out.replace(".html", "_data.json")
with open(json_out, "w", encoding="utf-8") as f:
json.dump(payload, f, ensure_ascii=False, indent=2)
print(f"\n [Done]")
print(f" HTML: {os.path.abspath(out)}")
print(f" JSON: {os.path.abspath(json_out)}")
print(f" Open: file://{os.path.abspath(out)}")
if __name__ == "__main__":
main()