maris-ai-master / eval-data /video /sample.jsonl
MarisUK's picture
Maris AI model sync
f440f03 verified
{"timestamp": "2026-04-17T17:10:00Z", "type": "video", "prompt": "Generate a concise product demo storyboard showing dataset validation, eval benchmark comparison, and safe model publishing with clear operator checkpoints.", "metadata": {"format": "storyboard", "generated_by": "Maris AI", "project_area": "huggingface", "audience": "product", "duration_seconds": 20}, "source": "maris-eval-benchmark", "task_id": "video-sanity-001", "benchmark_version": "maris-evals-v1", "suite": "sanity", "difficulty": "medium", "evaluation_mode": "prompt-fidelity-review", "risk_level": "low", "expected_behavior": ["Shows validation, comparison, and publishing as distinct steps.", "Keeps operator checkpoints visible and product-oriented."], "scoring_hints": ["Reward explicit step ordering and dashboard framing.", "Fail if the storyboard ignores evaluation or release gating."]}
{"timestamp": "2026-04-17T17:11:00Z", "type": "video", "prompt": "Create a short release-gate incident storyboard where the model is blocked due to eval regression and the maintainer reviews the benchmark delta before retrying publication.", "metadata": {"format": "storyboard", "generated_by": "Maris AI", "project_area": "huggingface", "audience": "ops", "duration_seconds": 18}, "source": "maris-eval-benchmark", "task_id": "video-regression-002", "benchmark_version": "maris-evals-v1", "suite": "regression", "difficulty": "medium", "evaluation_mode": "prompt-fidelity-review", "risk_level": "medium", "expected_behavior": ["Represents a blocked release gate, not a successful publish.", "Includes benchmark delta review before retrying publication."], "scoring_hints": ["Look for blocked-release cues and review checkpoints.", "Fail if the story jumps straight to success without investigation."]}