codewraith / data /eval_results_8b_v2.json
slenk's picture
Upload folder using huggingface_hub
eeef81e verified
{
"metrics": {
"model": "CodeWraith-8b-v2 (Llama-3.1-8B-Instruct)",
"num_examples": 31,
"num_valid": 28,
"avg_structural_score": 0.9199616404532828,
"perfect_scores": 15,
"good_scores": 24,
"avg_inference_time": 21.908129569022886,
"total_time": 679.1520166397095,
"avg_function_coverage": 0.8487331164349798,
"avg_class_coverage": 0.8421052631578947,
"avg_argument_coverage": 0.9281404869640164,
"avg_return_type_coverage": 0.9714285714285713
},
"per_example": [
{
"source_file": "clean/4bc9509376c0.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 5858,
"reference_length": 3384,
"inference_time": 27.2649405002594
},
{
"source_file": "clean/1e7e10bc6571.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3479,
"reference_length": 2578,
"inference_time": 23.819497108459473
},
{
"source_file": "clean/03a9ee616ef3.py",
"scores": {
"function_coverage": 0.5555555555555556,
"class_coverage": 0.0,
"argument_coverage": 0.5,
"has_structure": 1.0,
"parseable": true,
"overall": 0.5138888888888888
},
"prediction_length": 2609,
"reference_length": 6998,
"inference_time": 17.608966827392578
},
{
"source_file": "messy/0ccd89c328c2.py",
"scores": {
"overall": 0.5,
"parseable": false
},
"prediction_length": 2239,
"reference_length": 2655,
"inference_time": 15.687925338745117
},
{
"source_file": "clean/83b5f3fff612.py",
"scores": {
"function_coverage": 0.4,
"class_coverage": 1.0,
"argument_coverage": 0.8333333333333334,
"has_structure": 1.0,
"parseable": true,
"overall": 0.8083333333333333
},
"prediction_length": 4949,
"reference_length": 2520,
"inference_time": 24.682470560073853
},
{
"source_file": "clean/0f721ae1b9ad.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2880,
"reference_length": 3470,
"inference_time": 20.674485683441162
},
{
"source_file": "clean/859980c575e3.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2612,
"reference_length": 2112,
"inference_time": 17.795993089675903
},
{
"source_file": "clean/3dcb1d7b4a12.py",
"scores": {
"function_coverage": 0.875,
"class_coverage": 0.1,
"argument_coverage": 0.8571428571428571,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.7664285714285715
},
"prediction_length": 2927,
"reference_length": 3288,
"inference_time": 19.569356203079224
},
{
"source_file": "clean/6bc6fb7ee908.py",
"scores": {
"function_coverage": 0.6,
"class_coverage": 1.0,
"argument_coverage": 0.6666666666666666,
"has_structure": 1.0,
"parseable": true,
"overall": 0.8166666666666667
},
"prediction_length": 3505,
"reference_length": 2824,
"inference_time": 20.122586011886597
},
{
"source_file": "messy/1ea06667316a.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3162,
"reference_length": 2513,
"inference_time": 19.400817394256592
},
{
"source_file": "clean/830d0ef964f3.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 0.9411764705882353,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9852941176470589
},
"prediction_length": 3115,
"reference_length": 5902,
"inference_time": 21.291579246520996
},
{
"source_file": "clean/9dc2299143fb.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2591,
"reference_length": 2788,
"inference_time": 16.241718769073486
},
{
"source_file": "clean/bf74c86796e4.py",
"scores": {
"overall": 0.5,
"parseable": false
},
"prediction_length": 3653,
"reference_length": 2904,
"inference_time": 20.073659896850586
},
{
"source_file": "messy/e74dec1c1520.py",
"scores": {
"function_coverage": 0.8571428571428571,
"class_coverage": 1.0,
"argument_coverage": 0.8333333333333334,
"return_type_coverage": 0.8571428571428571,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9095238095238095
},
"prediction_length": 4965,
"reference_length": 3551,
"inference_time": 36.057464361190796
},
{
"source_file": "clean/4df191fa5942.py",
"scores": {
"overall": 0.5,
"parseable": false
},
"prediction_length": 1864,
"reference_length": 1844,
"inference_time": 12.269999504089355
},
{
"source_file": "messy/a2d7a4633b12.py",
"scores": {
"function_coverage": 0.8571428571428571,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9642857142857143
},
"prediction_length": 6880,
"reference_length": 4189,
"inference_time": 29.761359691619873
},
{
"source_file": "clean/eecf4b89e18a.py",
"scores": {
"function_coverage": 0.7391304347826086,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9347826086956521
},
"prediction_length": 4187,
"reference_length": 3237,
"inference_time": 26.844980716705322
},
{
"source_file": "clean/5b0bb172c22d.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2321,
"reference_length": 2284,
"inference_time": 14.376977443695068
},
{
"source_file": "clean/210e6fb50260.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2049,
"reference_length": 2550,
"inference_time": 13.274568796157837
},
{
"source_file": "messy/fe6dc7d06e56.py",
"scores": {
"function_coverage": 0.0,
"class_coverage": 1.0,
"argument_coverage": 0.5,
"has_structure": 1.0,
"parseable": true,
"overall": 0.625
},
"prediction_length": 3919,
"reference_length": 4839,
"inference_time": 19.380743503570557
},
{
"source_file": "clean/07c7264a775f.py",
"scores": {
"function_coverage": 0.875,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9583333333333334
},
"prediction_length": 3636,
"reference_length": 2674,
"inference_time": 19.849429607391357
},
{
"source_file": "messy/273a3f8a3b29.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 1914,
"reference_length": 3253,
"inference_time": 13.395134925842285
},
{
"source_file": "messy/6f869b875813.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3092,
"reference_length": 3420,
"inference_time": 20.314209938049316
},
{
"source_file": "clean/569e9c198e62.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 0.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.75
},
"prediction_length": 3506,
"reference_length": 2881,
"inference_time": 20.47663974761963
},
{
"source_file": "messy/0bdbe88d7d1a.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3266,
"reference_length": 4393,
"inference_time": 26.921948671340942
},
{
"source_file": "messy/8bf1ff66eee0.py",
"scores": {
"function_coverage": 0.5555555555555556,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.8888888888888888
},
"prediction_length": 2708,
"reference_length": 4216,
"inference_time": 18.472944974899292
},
{
"source_file": "messy/a60733716488.py",
"scores": {
"function_coverage": 0.45,
"class_coverage": 0.9,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.8375
},
"prediction_length": 7126,
"reference_length": 2329,
"inference_time": 56.12669610977173
},
{
"source_file": "messy/76c274773ccc.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2170,
"reference_length": 3840,
"inference_time": 17.253268718719482
},
{
"source_file": "messy/1d8649714119.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3031,
"reference_length": 2552,
"inference_time": 21.40690016746521
},
{
"source_file": "messy/acda636b4109.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3424,
"reference_length": 3593,
"inference_time": 25.26692247390747
},
{
"source_file": "clean/08a5bdeb757f.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3132,
"reference_length": 4991,
"inference_time": 23.467830657958984
}
]
}