codewraith / data /eval_results_8b.json
slenk's picture
Upload folder using huggingface_hub
eeef81e verified
{
"metrics": {
"model": "CodeWraith-8b (Llama-3.1-8B-Instruct)",
"num_examples": 34,
"num_valid": 30,
"avg_structural_score": 0.9743944335950103,
"perfect_scores": 25,
"good_scores": 29,
"avg_inference_time": 15.83769550042994,
"total_time": 538.4816470146179,
"avg_function_coverage": 0.967473492690884,
"avg_class_coverage": 1.0,
"avg_argument_coverage": 0.9503461908547044,
"avg_return_type_coverage": 0.8958333333333334
},
"per_example": [
{
"source_file": "clean/4a39c8df7306.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2140,
"reference_length": 2522,
"inference_time": 13.502389669418335
},
{
"source_file": "clean/c2a7bf304f8a.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 0.125,
"has_structure": 1.0,
"parseable": true,
"overall": 0.7083333333333334
},
"prediction_length": 1248,
"reference_length": 1088,
"inference_time": 7.844483852386475
},
{
"source_file": "clean/12ad51890456.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2453,
"reference_length": 4169,
"inference_time": 13.246246099472046
},
{
"source_file": "messy/0ccd89c328c2.py",
"scores": {
"overall": 0.5,
"parseable": false
},
"prediction_length": 1145,
"reference_length": 1040,
"inference_time": 7.990505933761597
},
{
"source_file": "messy/b00f9623a040.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2486,
"reference_length": 2747,
"inference_time": 16.43922233581543
},
{
"source_file": "messy/9b7afe9a1073.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 1580,
"reference_length": 1344,
"inference_time": 7.293461322784424
},
{
"source_file": "clean/f4d08a001f37.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2042,
"reference_length": 1612,
"inference_time": 12.504977941513062
},
{
"source_file": "messy/a7af7e6fe3a5.py",
"scores": {
"function_coverage": 0.8666666666666667,
"argument_coverage": 0.9811320754716981,
"return_type_coverage": 0.875,
"has_structure": 1.0,
"parseable": true,
"overall": 0.9306996855345913
},
"prediction_length": 8540,
"reference_length": 11357,
"inference_time": 57.655240058898926
},
{
"source_file": "clean/8e35b9de52d2.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 1623,
"reference_length": 2017,
"inference_time": 8.847191333770752
},
{
"source_file": "messy/6b7c7bb0171b.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3419,
"reference_length": 2094,
"inference_time": 24.11143946647644
},
{
"source_file": "clean/4b09fed9dd71.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 954,
"reference_length": 997,
"inference_time": 4.0132458209991455
},
{
"source_file": "clean/360a872fc6e5.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 1408,
"reference_length": 1410,
"inference_time": 8.463055849075317
},
{
"source_file": "messy/94b2f20c5850.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 1119,
"reference_length": 1221,
"inference_time": 7.522489070892334
},
{
"source_file": "messy/d228d40c6d8c.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 1660,
"reference_length": 1696,
"inference_time": 11.12440800666809
},
{
"source_file": "messy/64b2adeeef54.py",
"scores": {
"overall": 0.5,
"parseable": false
},
"prediction_length": 1168,
"reference_length": 1435,
"inference_time": 5.375532627105713
},
{
"source_file": "messy/720c79fe24bb.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4890,
"reference_length": 3326,
"inference_time": 30.165298223495483
},
{
"source_file": "clean/830d0ef964f3.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 4725,
"reference_length": 5559,
"inference_time": 29.389700174331665
},
{
"source_file": "messy/a59e8dc4ffa1.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2232,
"reference_length": 1061,
"inference_time": 15.1986985206604
},
{
"source_file": "clean/de966bcba616.py",
"scores": {
"function_coverage": 0.391304347826087,
"class_coverage": 1.0,
"argument_coverage": 0.7804878048780488,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 0.8343584305408271
},
"prediction_length": 8474,
"reference_length": 6379,
"inference_time": 52.93989539146423
},
{
"source_file": "clean/c2337dc8cffc.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 1542,
"reference_length": 1247,
"inference_time": 7.4634623527526855
},
{
"source_file": "messy/10b43d2d1e3a.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2596,
"reference_length": 2846,
"inference_time": 17.287745714187622
},
{
"source_file": "clean/06e16975d4ac.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2145,
"reference_length": 1688,
"inference_time": 10.59754467010498
},
{
"source_file": "messy/e8a7155bfbec.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2774,
"reference_length": 3266,
"inference_time": 17.90034508705139
},
{
"source_file": "messy/666f4c354670.py",
"scores": {
"function_coverage": 0.9090909090909091,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"return_type_coverage": 0.5,
"has_structure": 1.0,
"parseable": true,
"overall": 0.8818181818181818
},
"prediction_length": 3159,
"reference_length": 3131,
"inference_time": 18.022941827774048
},
{
"source_file": "clean/bc59895adba1.py",
"scores": {
"function_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 1060,
"reference_length": 1379,
"inference_time": 7.125420570373535
},
{
"source_file": "messy/e74dec1c1520.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"return_type_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3774,
"reference_length": 4486,
"inference_time": 22.132354497909546
},
{
"source_file": "clean/8a13f8afeb71.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 1185,
"reference_length": 1913,
"inference_time": 7.342715501785278
},
{
"source_file": "clean/a94fea8369fc.py",
"scores": {
"function_coverage": 0.8571428571428571,
"argument_coverage": 0.7727272727272727,
"has_structure": 1.0,
"parseable": true,
"overall": 0.8766233766233765
},
"prediction_length": 2851,
"reference_length": 3149,
"inference_time": 15.59646987915039
},
{
"source_file": "clean/bf74c86796e4.py",
"scores": {
"overall": 0.5,
"parseable": false
},
"prediction_length": 1698,
"reference_length": 1501,
"inference_time": 11.013347148895264
},
{
"source_file": "clean/6968fa78ff3c.py",
"scores": {
"overall": 0.5,
"parseable": false
},
"prediction_length": 3131,
"reference_length": 2558,
"inference_time": 18.22009539604187
},
{
"source_file": "clean/2d1c1112bd6d.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 1113,
"reference_length": 1237,
"inference_time": 7.454645156860352
},
{
"source_file": "clean/9b31cca2e395.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 3187,
"reference_length": 2124,
"inference_time": 21.071300506591797
},
{
"source_file": "clean/88b8d70f4fe4.py",
"scores": {
"function_coverage": 1.0,
"class_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 2533,
"reference_length": 2161,
"inference_time": 15.911052703857422
},
{
"source_file": "clean/1b8bec287901.py",
"scores": {
"function_coverage": 1.0,
"argument_coverage": 1.0,
"has_structure": 1.0,
"parseable": true,
"overall": 1.0
},
"prediction_length": 1654,
"reference_length": 1775,
"inference_time": 7.71472430229187
}
]
}