codewraith / data /eval_results_3b_v2.json
slenk's picture
Upload folder using huggingface_hub
eeef81e verified
{
"metrics": {
"model": "CodeWraith-3b-v2 (Llama-3.2-3B-Instruct)",
"num_examples": 31,
"num_valid": 28,
"avg_structural_score": 0.9349173785126699,
"perfect_scores": 15,
"good_scores": 25,
"avg_inference_time": 20.007101074341804,
"total_time": 620.220133304596,
"avg_function_coverage": 0.8438498225377107,
"avg_class_coverage": 0.9736842105263158,
"avg_argument_coverage": 0.9082270670505964,
"avg_return_type_coverage": 0.9714285714285713
},
"per_example": [
{
"source_file": "clean/4bc9509376c0.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2844,
"reference_length": 3384,
"inference_time": 18.0885272026062
},
{
"source_file": "clean/1e7e10bc6571.py",
"scores": {
"function_coverage": 0.8,
"argument_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.95
},
"prediction_length": 4926,
"reference_length": 2578,
"inference_time": 29.220056772232056
},
{
"source_file": "clean/03a9ee616ef3.py",
"scores": {
"function_coverage": 0.4444444444444444,
"class_coverage": 1.0,
"argument_coverage": 0.16666666666666666,
"has_structure": 1.0,
"parseable": true,
"overall": 0.6527777777777778
},
"prediction_length": 3503,
"reference_length": 6998,
"inference_time": 20.247114658355713
},
{
"source_file": "messy/0ccd89c328c2.py",
"scores": {
"overall": 0.5,
"parseable": false
},
"prediction_length": 2863,
"reference_length": 2655,
"inference_time": 14.904531478881836
},
{
"source_file": "clean/83b5f3fff612.py",
"scores": {
"function_coverage": 0.4,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.85
},
"prediction_length": 3495,
"reference_length": 2520,
"inference_time": 23.264450073242188
},
{
"source_file": "clean/0f721ae1b9ad.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3473,
"reference_length": 3470,
"inference_time": 23.27858591079712
},
{
"source_file": "clean/859980c575e3.py",
"scores": {
"function_coverage": 0.5,
"class_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.875
},
"prediction_length": 3255,
"reference_length": 2112,
"inference_time": 13.077555179595947
},
{
"source_file": "clean/3dcb1d7b4a12.py",
"scores": {
"function_coverage": 0.875,
"class_coverage": 0.5,
"argument_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.875
},
"prediction_length": 5091,
"reference_length": 3288,
"inference_time": 29.730497360229492
},
{
"source_file": "clean/6bc6fb7ee908.py",
"scores": {
"function_coverage": 0.5,
"class_coverage": 1.0,
"argument_coverage": 0.7333333333333333,
"has_structure": 1.0,
"parseable": true,
"overall": 0.8083333333333333
},
"prediction_length": 4773,
"reference_length": 2824,
"inference_time": 28.070369243621826
},
{
"source_file": "messy/1ea06667316a.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2857,
"reference_length": 2513,
"inference_time": 16.786190271377563
},
{
"source_file": "clean/830d0ef964f3.py",
"scores": {
"function_coverage": 0.5555555555555556,
"class_coverage": 1.0,
"argument_coverage": 0.9411764705882353,
"has_structure": 1.0,
"parseable": true,
"overall": 0.8741830065359477
},
"prediction_length": 4630,
"reference_length": 5902,
"inference_time": 24.956990003585815
},
{
"source_file": "clean/9dc2299143fb.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3830,
"reference_length": 2788,
"inference_time": 21.43935513496399
},
{
"source_file": "clean/bf74c86796e4.py",
"scores": {
"overall": 0.5,
"parseable": false
},
"prediction_length": 3348,
"reference_length": 2904,
"inference_time": 15.760165691375732
},
{
"source_file": "messy/e74dec1c1520.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"return_type_coverage": 0.8571428571428571,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9714285714285713
},
"prediction_length": 5344,
"reference_length": 3551,
"inference_time": 29.408547401428223
},
{
"source_file": "clean/4df191fa5942.py",
"scores": {
"overall": 0.5,
"parseable": false
},
"prediction_length": 1601,
"reference_length": 1844,
"inference_time": 10.143949031829834
},
{
"source_file": "messy/a2d7a4633b12.py",
"scores": {
"function_coverage": 0.8571428571428571,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9642857142857143
},
"prediction_length": 3715,
"reference_length": 4189,
"inference_time": 20.434860467910767
},
{
"source_file": "clean/eecf4b89e18a.py",
"scores": {
"function_coverage": 0.6956521739130435,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9239130434782609
},
"prediction_length": 3652,
"reference_length": 3237,
"inference_time": 23.721028327941895
},
{
"source_file": "clean/5b0bb172c22d.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 1728,
"reference_length": 2284,
"inference_time": 10.692550897598267
},
{
"source_file": "clean/210e6fb50260.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2573,
"reference_length": 2550,
"inference_time": 16.10159707069397
},
{
"source_file": "messy/fe6dc7d06e56.py",
"scores": {
"function_coverage": 0.5,
"class_coverage": 1.0,
"argument_coverage": 0.5,
"has_structure": 1.0,
"parseable": true,
"overall": 0.75
},
"prediction_length": 3201,
"reference_length": 4839,
"inference_time": 17.795408964157104
},
{
"source_file": "clean/07c7264a775f.py",
"scores": {
"function_coverage": 0.875,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9583333333333334
},
"prediction_length": 2866,
"reference_length": 2674,
"inference_time": 14.786346435546875
},
{
"source_file": "messy/273a3f8a3b29.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2540,
"reference_length": 3253,
"inference_time": 13.740688800811768
},
{
"source_file": "messy/6f869b875813.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3033,
"reference_length": 3420,
"inference_time": 16.89442777633667
},
{
"source_file": "clean/569e9c198e62.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3779,
"reference_length": 2881,
"inference_time": 20.661334991455078
},
{
"source_file": "messy/0bdbe88d7d1a.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3082,
"reference_length": 4393,
"inference_time": 18.825385332107544
},
{
"source_file": "messy/8bf1ff66eee0.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3235,
"reference_length": 4216,
"inference_time": 18.384609699249268
},
{
"source_file": "messy/a60733716488.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4713,
"reference_length": 2329,
"inference_time": 29.785675764083862
},
{
"source_file": "messy/76c274773ccc.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2810,
"reference_length": 3840,
"inference_time": 17.719330310821533
},
{
"source_file": "messy/1d8649714119.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4883,
"reference_length": 2552,
"inference_time": 25.8785502910614
},
{
"source_file": "messy/acda636b4109.py",
"scores": {
"function_coverage": 0.625,
"class_coverage": 1.0,
"argument_coverage": 0.2727272727272727,
"has_structure": 1.0,
"parseable": true,
"overall": 0.7244318181818181
},
"prediction_length": 2875,
"reference_length": 3593,
"inference_time": 16.34171986579895
},
{
"source_file": "clean/08a5bdeb757f.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4286,
"reference_length": 4991,
"inference_time": 20.07973289489746
}
]
}