|
|
--- |
|
|
language: |
|
|
- en |
|
|
license: bigscience-openrail-m |
|
|
library_name: transformers |
|
|
tags: |
|
|
- code |
|
|
datasets: |
|
|
- tyson0420/stackexchange-4dpo-filby-clang-keywords |
|
|
metrics: |
|
|
- code_eval |
|
|
model-index: |
|
|
- name: stack_codellama-7b-inst |
|
|
results: |
|
|
- task: |
|
|
type: text-generation |
|
|
name: Text Generation |
|
|
dataset: |
|
|
name: AI2 Reasoning Challenge (25-Shot) |
|
|
type: ai2_arc |
|
|
config: ARC-Challenge |
|
|
split: test |
|
|
args: |
|
|
num_few_shot: 25 |
|
|
metrics: |
|
|
- type: acc_norm |
|
|
value: 43.52 |
|
|
name: normalized accuracy |
|
|
source: |
|
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=tyson0420/stack_codellama-7b-inst |
|
|
name: Open LLM Leaderboard |
|
|
- task: |
|
|
type: text-generation |
|
|
name: Text Generation |
|
|
dataset: |
|
|
name: HellaSwag (10-Shot) |
|
|
type: hellaswag |
|
|
split: validation |
|
|
args: |
|
|
num_few_shot: 10 |
|
|
metrics: |
|
|
- type: acc_norm |
|
|
value: 66.17 |
|
|
name: normalized accuracy |
|
|
source: |
|
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=tyson0420/stack_codellama-7b-inst |
|
|
name: Open LLM Leaderboard |
|
|
- task: |
|
|
type: text-generation |
|
|
name: Text Generation |
|
|
dataset: |
|
|
name: MMLU (5-Shot) |
|
|
type: cais/mmlu |
|
|
config: all |
|
|
split: test |
|
|
args: |
|
|
num_few_shot: 5 |
|
|
metrics: |
|
|
- type: acc |
|
|
value: 39.59 |
|
|
name: accuracy |
|
|
source: |
|
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=tyson0420/stack_codellama-7b-inst |
|
|
name: Open LLM Leaderboard |
|
|
- task: |
|
|
type: text-generation |
|
|
name: Text Generation |
|
|
dataset: |
|
|
name: TruthfulQA (0-shot) |
|
|
type: truthful_qa |
|
|
config: multiple_choice |
|
|
split: validation |
|
|
args: |
|
|
num_few_shot: 0 |
|
|
metrics: |
|
|
- type: mc2 |
|
|
value: 39.03 |
|
|
source: |
|
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=tyson0420/stack_codellama-7b-inst |
|
|
name: Open LLM Leaderboard |
|
|
- task: |
|
|
type: text-generation |
|
|
name: Text Generation |
|
|
dataset: |
|
|
name: Winogrande (5-shot) |
|
|
type: winogrande |
|
|
config: winogrande_xl |
|
|
split: validation |
|
|
args: |
|
|
num_few_shot: 5 |
|
|
metrics: |
|
|
- type: acc |
|
|
value: 65.67 |
|
|
name: accuracy |
|
|
source: |
|
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=tyson0420/stack_codellama-7b-inst |
|
|
name: Open LLM Leaderboard |
|
|
- task: |
|
|
type: text-generation |
|
|
name: Text Generation |
|
|
dataset: |
|
|
name: GSM8k (5-shot) |
|
|
type: gsm8k |
|
|
config: main |
|
|
split: test |
|
|
args: |
|
|
num_few_shot: 5 |
|
|
metrics: |
|
|
- type: acc |
|
|
value: 15.85 |
|
|
name: accuracy |
|
|
source: |
|
|
url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=tyson0420/stack_codellama-7b-inst |
|
|
name: Open LLM Leaderboard |
|
|
--- |
|
|
|
|
|
# Model Card for Model ID |
|
|
|
|
|
<!-- Provide a quick summary of what the model is/does. --> |
|
|
|
|
|
|
|
|
|
|
|
## Model Details |
|
|
|
|
|
Evaluating generations... |
|
|
{ |
|
|
"humaneval": { |
|
|
"pass@1": 0.32499999999999996, |
|
|
"pass@10": 0.4329268292682927 |
|
|
}, |
|
|
"config": { |
|
|
"prefix": "", |
|
|
"do_sample": true, |
|
|
"temperature": 0.2, |
|
|
"top_k": 0, |
|
|
"top_p": 0.95, |
|
|
"n_samples": 10, |
|
|
"eos": "<|endoftext|>", |
|
|
"seed": 0, |
|
|
"model": "tyson0420/stack_codellama-7b-inst", |
|
|
"modeltype": "causal", |
|
|
"peft_model": null, |
|
|
"revision": null, |
|
|
"use_auth_token": false, |
|
|
"trust_remote_code": false, |
|
|
"tasks": "humaneval", |
|
|
"instruction_tokens": null, |
|
|
"batch_size": 10, |
|
|
"max_length_generation": 512, |
|
|
"precision": "fp32", |
|
|
"load_in_8bit": false, |
|
|
"load_in_4bit": false, |
|
|
"left_padding": false, |
|
|
"limit": null, |
|
|
"limit_start": 0, |
|
|
"save_every_k_tasks": -1, |
|
|
"postprocess": true, |
|
|
"allow_code_execution": true, |
|
|
"generation_only": false, |
|
|
"load_generations_path": null, |
|
|
"load_data_path": null, |
|
|
"metric_output_path": "evaluation_results.json", |
|
|
"save_generations": false, |
|
|
"load_generations_intermediate_paths": null, |
|
|
"save_generations_path": "generations.json", |
|
|
"save_references": false, |
|
|
"save_references_path": "references.json", |
|
|
"prompt": "prompt", |
|
|
"max_memory_per_gpu": null, |
|
|
"check_references": false |
|
|
} |
|
|
} |
|
|
|
|
|
### Model Description |
|
|
|
|
|
<!-- Provide a longer summary of what this model is. --> |
|
|
|
|
|
This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated. |
|
|
|
|
|
- **Developed by:** [tyson0420] |
|
|
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) |
|
|
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_tyson0420__stack_codellama-7b-inst) |
|
|
|
|
|
| Metric |Value| |
|
|
|---------------------------------|----:| |
|
|
|Avg. |44.97| |
|
|
|AI2 Reasoning Challenge (25-Shot)|43.52| |
|
|
|HellaSwag (10-Shot) |66.17| |
|
|
|MMLU (5-Shot) |39.59| |
|
|
|TruthfulQA (0-shot) |39.03| |
|
|
|Winogrande (5-shot) |65.67| |
|
|
|GSM8k (5-shot) |15.85| |
|
|
|
|
|
|