codewraith / data /eval_results_3b.json
slenk's picture
Upload folder using huggingface_hub
eeef81e verified
{
"metrics": {
"model": "CodeWraith-3b (Llama-3.2-3B-Instruct)",
"num_examples": 28,
"num_valid": 27,
"avg_structural_score": 0.9148271576169591,
"perfect_scores": 19,
"good_scores": 24,
"avg_inference_time": 25.568465062550136,
"total_time": 715.9170217514038,
"avg_function_coverage": 0.8610007639419405,
"avg_class_coverage": 0.9555555555555556,
"avg_argument_coverage": 0.9283557570312287,
"avg_return_type_coverage": 0.6666666666666666
},
"per_example": [
{
"source_file": "messy/bd581f14cf48.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3290,
"reference_length": 3607,
"inference_time": 20.868906259536743
},
{
"source_file": "clean/3dcb1d7b4a12.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 7951,
"reference_length": 5881,
"inference_time": 44.38811230659485
},
{
"source_file": "clean/06e16975d4ac.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3563,
"reference_length": 2468,
"inference_time": 17.876902103424072
},
{
"source_file": "messy/94b2f20c5850.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2879,
"reference_length": 4012,
"inference_time": 17.086252689361572
},
{
"source_file": "messy/b374790e8e34.py",
"scores": {
"function_coverage": 0.75,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9375
},
"prediction_length": 4747,
"reference_length": 4683,
"inference_time": 24.646522045135498
},
{
"source_file": "clean/4600f935ed1d.py",
"scores": {
"function_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3706,
"reference_length": 2101,
"inference_time": 17.650084018707275
},
{
"source_file": "clean/d199807b678d.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3473,
"reference_length": 3896,
"inference_time": 20.227165460586548
},
{
"source_file": "clean/46e001660b82.py",
"scores": {
"function_coverage": 0.14285714285714285,
"argument_coverage": 0.36363636363636365,
"has_structure": 0.0,
"parseable": true,
"overall": 0.16883116883116886
},
"prediction_length": 7753,
"reference_length": 566,
"inference_time": 46.384787797927856
},
{
"source_file": "messy/273a3f8a3b29.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2771,
"reference_length": 4024,
"inference_time": 14.286058187484741
},
{
"source_file": "clean/c2a7bf304f8a.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4124,
"reference_length": 5151,
"inference_time": 25.00935935974121
},
{
"source_file": "clean/1be285af847d.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2977,
"reference_length": 3016,
"inference_time": 13.655282497406006
},
{
"source_file": "messy/e74dec1c1520.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 6736,
"reference_length": 6355,
"inference_time": 36.76592135429382
},
{
"source_file": "clean/615ca63ee16d.py",
"scores": {
"function_coverage": 0.7272727272727273,
"class_coverage": 1.0,
"argument_coverage": 0.7777777777777778,
"has_structure": 1.0,
"parseable": true,
"overall": 0.8762626262626263
},
"prediction_length": 8408,
"reference_length": 3434,
"inference_time": 41.89173746109009
},
{
"source_file": "messy/a7af7e6fe3a5.py",
"scores": {
"function_coverage": 0.06666666666666667,
"argument_coverage": 0.33962264150943394,
"return_type_coverage": 0.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.35157232704402513
},
"prediction_length": 7925,
"reference_length": 4043,
"inference_time": 48.157960653305054
},
{
"source_file": "messy/b1bc55044f1a.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4944,
"reference_length": 4325,
"inference_time": 23.324952125549316
},
{
"source_file": "clean/4df191fa5942.py",
"scores": {
"overall": 0.5,
"parseable": false
},
"prediction_length": 2531,
"reference_length": 3110,
"inference_time": 11.76628828048706
},
{
"source_file": "messy/9d4e5f709420.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3622,
"reference_length": 4905,
"inference_time": 20.453138828277588
},
{
"source_file": "clean/88b8d70f4fe4.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3921,
"reference_length": 2026,
"inference_time": 20.940979480743408
},
{
"source_file": "messy/4543867f9f05.py",
"scores": {
"function_coverage": 0.6666666666666666,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9166666666666666
},
"prediction_length": 3203,
"reference_length": 4209,
"inference_time": 15.503760814666748
},
{
"source_file": "clean/5b0bb172c22d.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3115,
"reference_length": 3073,
"inference_time": 18.117590188980103
},
{
"source_file": "clean/23b30f008578.py",
"scores": {
"function_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2693,
"reference_length": 4927,
"inference_time": 13.62615442276001
},
{
"source_file": "clean/9bb4c0bf7227.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3462,
"reference_length": 4779,
"inference_time": 16.623833179473877
},
{
"source_file": "clean/fe35435938f5.py",
"scores": {
"function_coverage": 0.5714285714285714,
"argument_coverage": 0.9583333333333334,
"has_structure": 1.0,
"parseable": true,
"overall": 0.8432539682539683
},
"prediction_length": 5900,
"reference_length": 2022,
"inference_time": 33.54556345939636
},
{
"source_file": "clean/d2e8b7aa1e82.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 5208,
"reference_length": 5141,
"inference_time": 26.93407130241394
},
{
"source_file": "clean/b6939d391a23.py",
"scores": {
"function_coverage": 0.9411764705882353,
"class_coverage": 1.0,
"argument_coverage": 0.96,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9752941176470589
},
"prediction_length": 7874,
"reference_length": 6450,
"inference_time": 40.9023551940918
},
{
"source_file": "clean/c953b65d83d5.py",
"scores": {
"function_coverage": 0.38095238095238093,
"class_coverage": 0.3333333333333333,
"argument_coverage": 0.8095238095238095,
"has_structure": 1.0,
"parseable": true,
"overall": 0.6309523809523809
},
"prediction_length": 9292,
"reference_length": 8957,
"inference_time": 45.998045206069946
},
{
"source_file": "clean/8a13f8afeb71.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2917,
"reference_length": 2271,
"inference_time": 16.089794397354126
},
{
"source_file": "messy/2cf5b37f3314.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4376,
"reference_length": 5243,
"inference_time": 23.19544267654419
}
]
}