codewraith / data /eval_results_8b_v5.json
slenk's picture
Upload folder using huggingface_hub
eeef81e verified
{
"metrics": {
"model": "CodeWraith-8b-v5 (Llama-3.1-8B-Instruct)",
"num_examples": 37,
"num_valid": 36,
"avg_structural_score": 0.9877113373947525,
"perfect_scores": 29,
"good_scores": 36,
"avg_inference_time": 25.293824969111263,
"total_time": 935.8715238571167,
"avg_function_coverage": 0.965521978021978,
"avg_class_coverage": 1.0,
"avg_argument_coverage": 0.9894483794265929,
"avg_return_type_coverage": 1.0
},
"per_example": [
{
"source_file": "clean/4600f935ed1d.py",
"scores": {
"function_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3039,
"reference_length": 2880,
"inference_time": 18.591609954833984
},
{
"source_file": "clean/71e2ec05a4ce.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4092,
"reference_length": 3616,
"inference_time": 21.81563091278076
},
{
"source_file": "messy/e444a542540d.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 7575,
"reference_length": 4374,
"inference_time": 47.71123433113098
},
{
"source_file": "clean/cdb08f4d273a.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3446,
"reference_length": 3614,
"inference_time": 21.064913511276245
},
{
"source_file": "clean/ca94c5502737.py",
"scores": {
"function_coverage": 0.6153846153846154,
"argument_coverage": 0.9523809523809523,
"has_structure": 1.0,
"parseable": true,
"overall": 0.8559218559218559
},
"prediction_length": 8648,
"reference_length": 6735,
"inference_time": 54.13129162788391
},
{
"source_file": "messy/1c73481ac309.py",
"scores": {
"function_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3096,
"reference_length": 1718,
"inference_time": 15.332524538040161
},
{
"source_file": "clean/12581ac99721.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4418,
"reference_length": 4130,
"inference_time": 38.43903708457947
},
{
"source_file": "clean/a2576bff34ae.py",
"scores": {
"function_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2490,
"reference_length": 2440,
"inference_time": 14.69094467163086
},
{
"source_file": "messy/666f4c354670.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4816,
"reference_length": 4536,
"inference_time": 29.665302991867065
},
{
"source_file": "clean/bf74c86796e4.py",
"scores": {
"overall": 0.5,
"parseable": false
},
"prediction_length": 3227,
"reference_length": 4026,
"inference_time": 20.119393587112427
},
{
"source_file": "clean/07c7264a775f.py",
"scores": {
"function_coverage": 0.875,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9583333333333334
},
"prediction_length": 6493,
"reference_length": 5133,
"inference_time": 36.57983469963074
},
{
"source_file": "clean/4e132b54cd4a.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3174,
"reference_length": 4065,
"inference_time": 20.479403257369995
},
{
"source_file": "clean/7645f390c515.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3679,
"reference_length": 2559,
"inference_time": 18.84472107887268
},
{
"source_file": "messy/9c892371385d.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4995,
"reference_length": 5041,
"inference_time": 35.20069479942322
},
{
"source_file": "messy/98c5f29240b9.py",
"scores": {
"function_coverage": 0.875,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.96875
},
"prediction_length": 5949,
"reference_length": 6841,
"inference_time": 34.81227779388428
},
{
"source_file": "clean/88b8d70f4fe4.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3785,
"reference_length": 3958,
"inference_time": 21.514402627944946
},
{
"source_file": "messy/73c72bfbe73b.py",
"scores": {
"function_coverage": 0.8076923076923077,
"class_coverage": 1.0,
"argument_coverage": 0.9411764705882353,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9372171945701357
},
"prediction_length": 4787,
"reference_length": 5624,
"inference_time": 28.55956482887268
},
{
"source_file": "clean/23b30f008578.py",
"scores": {
"function_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3345,
"reference_length": 3367,
"inference_time": 20.1626718044281
},
{
"source_file": "clean/d4a9d78d0281.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 7764,
"reference_length": 8236,
"inference_time": 43.4171040058136
},
{
"source_file": "clean/a3060596bc92.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2724,
"reference_length": 3016,
"inference_time": 15.7329843044281
},
{
"source_file": "messy/6f869b875813.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2930,
"reference_length": 3217,
"inference_time": 17.04488444328308
},
{
"source_file": "messy/e8a7155bfbec.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4835,
"reference_length": 4014,
"inference_time": 28.13184118270874
},
{
"source_file": "clean/5ffc696dc345.py",
"scores": {
"function_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4035,
"reference_length": 4605,
"inference_time": 23.935144186019897
},
{
"source_file": "clean/319db52d59c7.py",
"scores": {
"function_coverage": 0.9285714285714286,
"class_coverage": 1.0,
"argument_coverage": 0.8518518518518519,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9451058201058201
},
"prediction_length": 4992,
"reference_length": 7164,
"inference_time": 29.12249255180359
},
{
"source_file": "clean/d3728f31c0db.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3722,
"reference_length": 3077,
"inference_time": 23.72391390800476
},
{
"source_file": "messy/1a534773f649.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3613,
"reference_length": 2682,
"inference_time": 21.959770441055298
},
{
"source_file": "messy/273a3f8a3b29.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2903,
"reference_length": 3326,
"inference_time": 15.395075559616089
},
{
"source_file": "clean/46e001660b82.py",
"scores": {
"function_coverage": 0.8571428571428571,
"argument_coverage": 0.9696969696969697,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9422799422799423
},
"prediction_length": 5109,
"reference_length": 4027,
"inference_time": 34.29022479057312
},
{
"source_file": "messy/04e23b0b5d08.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4639,
"reference_length": 4653,
"inference_time": 25.926616668701172
},
{
"source_file": "messy/afc593ff8c4c.py",
"scores": {
"function_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 5742,
"reference_length": 4162,
"inference_time": 36.24571490287781
},
{
"source_file": "clean/60ccb349f496.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3714,
"reference_length": 2891,
"inference_time": 20.234167098999023
},
{
"source_file": "clean/d199807b678d.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2547,
"reference_length": 3783,
"inference_time": 15.620551347732544
},
{
"source_file": "clean/d2e8b7aa1e82.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3310,
"reference_length": 3348,
"inference_time": 18.21688151359558
},
{
"source_file": "messy/20afb8e5092a.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2232,
"reference_length": 3277,
"inference_time": 10.995202779769897
},
{
"source_file": "clean/d3c7051a19d9.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3051,
"reference_length": 3097,
"inference_time": 19.41274380683899
},
{
"source_file": "messy/6d62ae4e17f6.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2944,
"reference_length": 3356,
"inference_time": 17.790547370910645
},
{
"source_file": "clean/83b5f3fff612.py",
"scores": {
"function_coverage": 0.8,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.95
},
"prediction_length": 3536,
"reference_length": 4406,
"inference_time": 20.960208892822266
}
]
}