{ "schema_version": "0.2.2", "evaluation_id": "swebench_lite_dev/RedHatAI/Kimi-K2.6-NVFP4/1782913700", "retrieved_timestamp": "1782913700", "source_metadata": { "source_name": "mini-swe-agent", "source_type": "evaluation_run", "source_organization_name": "RedHatAI", "evaluator_relationship": "third_party", "additional_details": { "note": "submission-rate aggregation across reruns" } }, "eval_library": { "name": "mini-swe-agent", "version": "2.4.3" }, "model_info": { "name": "RedHatAI/Kimi-K2.6-NVFP4", "id": "RedHatAI/Kimi-K2.6-NVFP4", "developer": "RedHatAI", "inference_engine": { "name": "vllm", "version": "0.22.1" }, "additional_details": { "profile": "kimi_nvfp4", "subset": "lite", "split": "dev" } }, "evaluation_results": [ { "evaluation_name": "swebench_lite_dev/submission_rate", "source_data": { "dataset_name": "SWE-bench Lite dev", "source_type": "hf_dataset", "hf_repo": "princeton-nlp/SWE-Bench_Lite", "hf_split": "dev", "samples_number": 23, "sample_ids": [ "marshmallow-code__marshmallow-1343", "marshmallow-code__marshmallow-1359", "pvlib__pvlib-python-1072", "pvlib__pvlib-python-1154", "pvlib__pvlib-python-1606", "pvlib__pvlib-python-1707", "pvlib__pvlib-python-1854", "pydicom__pydicom-1139", "pydicom__pydicom-1256", "pydicom__pydicom-1413", "pydicom__pydicom-1694", "pydicom__pydicom-901", "pylint-dev__astroid-1196", "pylint-dev__astroid-1268", "pylint-dev__astroid-1333", "pylint-dev__astroid-1866", "pylint-dev__astroid-1978", "pyvista__pyvista-4315", "sqlfluff__sqlfluff-1517", "sqlfluff__sqlfluff-1625", "sqlfluff__sqlfluff-1733", "sqlfluff__sqlfluff-1763", "sqlfluff__sqlfluff-2419" ] }, "metric_config": { "evaluation_description": "Submission rate (Submitted / total instances).", "lower_is_better": false, "score_type": "continuous", "min_score": 0.0, "max_score": 1.0 }, "score_details": { "score": 0.9130434782608695, "details": { "total_instances": "23", "submitted_instances": "21", "unsubmitted_instances": "2", "status_counts": "{\"RepeatedFormatError\": 2, \"Submitted\": 21}", "unsubmitted_ids": "[\"sqlfluff__sqlfluff-1625\", \"sqlfluff__sqlfluff-1733\"]", "source_exit_status_files": "[\"/home/shubhra/kimik2.6_evals/runs/swebench/20260629T171225Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782760165.6381476.yaml\", \"/home/shubhra/kimik2.6_evals/runs/swebench/20260630T135435Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782827902.5985954.yaml\", \"/home/shubhra/kimik2.6_evals/runs/swebench/20260630T141107Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782830121.1086848.yaml\", \"/home/shubhra/kimik2.6_evals/runs/swebench/20260630T151900Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782833877.9161725.yaml\", \"/home/shubhra/kimik2.6_evals/runs/swebench/20260630T190648Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782846629.8118455.yaml\", \"/home/shubhra/kimik2.6_evals/runs/swebench/20260630T192452Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782848479.344266.yaml\", \"/home/shubhra/kimik2.6_evals/runs/swebench/20260630T195651Z_kimi_nvfp4_lite_dev/output/exit_statuses_1782850223.0775344.yaml\"]" }, "completed_ids": [ "marshmallow-code__marshmallow-1343", "marshmallow-code__marshmallow-1359", "pvlib__pvlib-python-1072", "pvlib__pvlib-python-1154", "pvlib__pvlib-python-1606", "pvlib__pvlib-python-1707", "pvlib__pvlib-python-1854", "pydicom__pydicom-1139", "pydicom__pydicom-1256", "pydicom__pydicom-1413", "pydicom__pydicom-1694", "pydicom__pydicom-901", "pylint-dev__astroid-1196", "pylint-dev__astroid-1268", "pylint-dev__astroid-1333", "pylint-dev__astroid-1866", "pylint-dev__astroid-1978", "pyvista__pyvista-4315", "sqlfluff__sqlfluff-1517", "sqlfluff__sqlfluff-1625", "sqlfluff__sqlfluff-1733", "sqlfluff__sqlfluff-1763", "sqlfluff__sqlfluff-2419" ], "submitted_ids": [ "marshmallow-code__marshmallow-1343", "marshmallow-code__marshmallow-1359", "pvlib__pvlib-python-1072", "pvlib__pvlib-python-1154", "pvlib__pvlib-python-1606", "pvlib__pvlib-python-1707", "pvlib__pvlib-python-1854", "pydicom__pydicom-1139", "pydicom__pydicom-1256", "pydicom__pydicom-1413", "pydicom__pydicom-1694", "pydicom__pydicom-901", "pylint-dev__astroid-1196", "pylint-dev__astroid-1268", "pylint-dev__astroid-1333", "pylint-dev__astroid-1866", "pylint-dev__astroid-1978", "pyvista__pyvista-4315", "sqlfluff__sqlfluff-1517", "sqlfluff__sqlfluff-1763", "sqlfluff__sqlfluff-2419" ] }, "generation_config": { "generation_args": { "agentic_eval_config": { "available_tools": [ { "name": "bash" } ] }, "max_attempts": 1 } } } ] }