maris-ai-master / eval-data /image /sample.jsonl
MarisUK's picture
Maris AI model sync
f440f03 verified
{"timestamp": "2026-04-17T17:06:00Z", "type": "image", "prompt": "Create a clean dashboard hero illustration for Maris AI that shows evaluation charts, deployment checkpoints, and operator review in a modern blue-gray control-room style.", "metadata": {"style": "product-illustration", "generated_by": "Maris AI", "project_area": "frontend", "audience": "marketing", "aspect_ratio": "16:9"}, "source": "maris-eval-benchmark", "task_id": "image-sanity-001", "benchmark_version": "maris-evals-v1", "suite": "sanity", "difficulty": "medium", "evaluation_mode": "prompt-fidelity-review", "risk_level": "low", "expected_behavior": ["Shows evaluation context rather than a generic abstract image.", "Keeps the visual style professional and product-safe."], "scoring_hints": ["Reward visible charts, checkpoints, and operator review elements.", "Fail if the result ignores the Maris dashboard context."]}
{"timestamp": "2026-04-17T17:07:00Z", "type": "image", "prompt": "Render a minimal incident-review panel for a failed benchmark release gate with red warning accents, operator checklist, and no photorealistic humans.", "metadata": {"style": "ui-concept", "generated_by": "Maris AI", "project_area": "huggingface", "audience": "ops", "aspect_ratio": "4:3"}, "source": "maris-eval-benchmark", "task_id": "image-regression-002", "benchmark_version": "maris-evals-v1", "suite": "regression", "difficulty": "easy", "evaluation_mode": "prompt-fidelity-review", "risk_level": "medium", "expected_behavior": ["Represents a failed release gate clearly.", "Avoids photorealistic humans and keeps the concept UI-oriented."], "scoring_hints": ["Look for warning accents and operator checklist cues.", "Fail if style drifts into unrelated cinematic scenes."]}