update
Browse files- evaluation/intro.txt +2 -1
evaluation/intro.txt
CHANGED
|
@@ -19,6 +19,8 @@ In most papers, 200 candidate program completions are sampled, and pass@1, pass@
|
|
| 19 |
We can load HumanEval dataset and pass@k metric from the hub:
|
| 20 |
|
| 21 |
```python
|
|
|
|
|
|
|
| 22 |
human_eval = load_dataset("openai_humaneval")
|
| 23 |
code_eval_metric = load_metric("code_eval")
|
| 24 |
```
|
|
@@ -26,7 +28,6 @@ code_eval_metric = load_metric("code_eval")
|
|
| 26 |
We can easily compute the pass@k for a problem that asks for the implementation of a function that sums two integers:
|
| 27 |
|
| 28 |
```python
|
| 29 |
-
from datasets import load_metric
|
| 30 |
test_cases = ["assert add(2,3)==5"]
|
| 31 |
candidates = [["def add(a,b): return a*b", "def add(a, b): return a+b"]]
|
| 32 |
pass_at_k, results = code_eval_metric.compute(references=test_cases, predictions=candidates, k=[1, 2])
|
|
|
|
| 19 |
We can load HumanEval dataset and pass@k metric from the hub:
|
| 20 |
|
| 21 |
```python
|
| 22 |
+
from datasets import load_dataset, load_metric
|
| 23 |
+
|
| 24 |
human_eval = load_dataset("openai_humaneval")
|
| 25 |
code_eval_metric = load_metric("code_eval")
|
| 26 |
```
|
|
|
|
| 28 |
We can easily compute the pass@k for a problem that asks for the implementation of a function that sums two integers:
|
| 29 |
|
| 30 |
```python
|
|
|
|
| 31 |
test_cases = ["assert add(2,3)==5"]
|
| 32 |
candidates = [["def add(a,b): return a*b", "def add(a, b): return a+b"]]
|
| 33 |
pass_at_k, results = code_eval_metric.compute(references=test_cases, predictions=candidates, k=[1, 2])
|