Update README.md
Browse files
README.md
CHANGED
|
@@ -24,7 +24,7 @@ model-index:
|
|
| 24 |
metrics:
|
| 25 |
- name: pass@1
|
| 26 |
type: pass@1
|
| 27 |
-
value:
|
| 28 |
verified: false
|
| 29 |
---
|
| 30 |
|
|
@@ -43,11 +43,13 @@ Total 177,333 samples 316 MB
|
|
| 43 |
- TokenBender/python_eval_instruct_51k: “python” in output .39,596 samples
|
| 44 |
|
| 45 |
|
|
|
|
|
|
|
| 46 |
## HumanEval
|
| 47 |
|
| 48 |
| Metric | Value |
|
| 49 |
| --- | --- |
|
| 50 |
-
| humaneval-python |
|
| 51 |
|
| 52 |
[Big Code Models Leaderboard](https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard)
|
| 53 |
|
|
@@ -67,14 +69,14 @@ CodeLlama-13B: 35.07
|
|
| 67 |
|
| 68 |
| Metric | Value |
|
| 69 |
| --- | --- |
|
| 70 |
-
| python |
|
| 71 |
-
| java |
|
| 72 |
-
| javascript | 46.
|
| 73 |
-
| cpp | 37.
|
| 74 |
-
| rust |
|
| 75 |
-
| go |
|
| 76 |
-
| sh |
|
| 77 |
-
| julia |
|
| 78 |
| typescript | 47.80 |
|
| 79 |
|
| 80 |
## LMEval
|
|
|
|
| 24 |
metrics:
|
| 25 |
- name: pass@1
|
| 26 |
type: pass@1
|
| 27 |
+
value: 52.439
|
| 28 |
verified: false
|
| 29 |
---
|
| 30 |
|
|
|
|
| 43 |
- TokenBender/python_eval_instruct_51k: “python” in output .39,596 samples
|
| 44 |
|
| 45 |
|
| 46 |
+
50 samples/T=0.2/MaxTokens=512/Top_P=0.95
|
| 47 |
+
|
| 48 |
## HumanEval
|
| 49 |
|
| 50 |
| Metric | Value |
|
| 51 |
| --- | --- |
|
| 52 |
+
| humaneval-python | 52.44 |
|
| 53 |
|
| 54 |
[Big Code Models Leaderboard](https://huggingface.co/spaces/bigcode/bigcode-models-leaderboard)
|
| 55 |
|
|
|
|
| 69 |
|
| 70 |
| Metric | Value |
|
| 71 |
| --- | --- |
|
| 72 |
+
| python | 55.96 |
|
| 73 |
+
| java | 37.84 |
|
| 74 |
+
| javascript | 46.93 |
|
| 75 |
+
| cpp | 37.48 |
|
| 76 |
+
| rust | 29.01 |
|
| 77 |
+
| go | 28.99 |
|
| 78 |
+
| sh | 12.11 |
|
| 79 |
+
| julia | 31.47 |
|
| 80 |
| typescript | 47.80 |
|
| 81 |
|
| 82 |
## LMEval
|