Update README.md
Browse files
README.md
CHANGED
|
@@ -15,9 +15,19 @@ model-index:
|
|
| 15 |
type: mmlu
|
| 16 |
metrics:
|
| 17 |
- name: MMMLU(DE_DE) (0-Shot)
|
| 18 |
-
type:
|
| 19 |
value: 25.57
|
| 20 |
verified: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
- task:
|
| 22 |
type: text-generation
|
| 23 |
dataset:
|
|
@@ -25,29 +35,39 @@ model-index:
|
|
| 25 |
type: arc
|
| 26 |
metrics:
|
| 27 |
- name: ARC Challenge (DE) (0-Shot)
|
| 28 |
-
type:
|
| 29 |
value: 24.29
|
| 30 |
verified: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
- task:
|
| 32 |
type: text-generation
|
| 33 |
dataset:
|
| 34 |
name: deutsche-telekom/Ger-RAG-eval
|
| 35 |
type: Ger-RAG-eval
|
| 36 |
metrics:
|
| 37 |
-
- name:
|
| 38 |
-
type:
|
| 39 |
value: 25.2
|
| 40 |
verified: false
|
| 41 |
-
- name:
|
| 42 |
-
type:
|
| 43 |
value: 27.1
|
| 44 |
verified: false
|
| 45 |
-
- name:
|
| 46 |
-
type:
|
| 47 |
value: 50.9
|
| 48 |
verified: false
|
| 49 |
-
- name:
|
| 50 |
-
type:
|
| 51 |
value: 50.0
|
| 52 |
verified: false
|
| 53 |
language:
|
|
@@ -155,7 +175,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
| 155 |
model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map=device, torch_dtype=torch.bfloat16)
|
| 156 |
messages = [
|
| 157 |
{"role": "system", "content": "Du bist ein hilfreicher Assistent."},
|
| 158 |
-
{"role": "user", "content": "
|
| 159 |
]
|
| 160 |
inputs = tokenizer.apply_chat_template(messages, tokenize=True, return_tensors="pt", add_generation_prompt=True).to(device)
|
| 161 |
outputs = model.generate(inputs, max_new_tokens=256, do_sample=True, temperature=0.4, top_p=0.9, repetition_penalty=1.1, top_k=512)
|
|
|
|
| 15 |
type: mmlu
|
| 16 |
metrics:
|
| 17 |
- name: MMMLU(DE_DE) (0-Shot)
|
| 18 |
+
type: accuracy
|
| 19 |
value: 25.57
|
| 20 |
verified: false
|
| 21 |
+
- task:
|
| 22 |
+
type: text-generation
|
| 23 |
+
dataset:
|
| 24 |
+
name: openai/MMMLU
|
| 25 |
+
type: mmlu
|
| 26 |
+
metrics:
|
| 27 |
+
- name: MMMLU(DE_DE) (5-Shot)
|
| 28 |
+
type: accuracy
|
| 29 |
+
value: 24.88
|
| 30 |
+
verified: false
|
| 31 |
- task:
|
| 32 |
type: text-generation
|
| 33 |
dataset:
|
|
|
|
| 35 |
type: arc
|
| 36 |
metrics:
|
| 37 |
- name: ARC Challenge (DE) (0-Shot)
|
| 38 |
+
type: accuracy
|
| 39 |
value: 24.29
|
| 40 |
verified: false
|
| 41 |
+
- task:
|
| 42 |
+
type: text-generation
|
| 43 |
+
dataset:
|
| 44 |
+
name: alexandrainst/m_arc
|
| 45 |
+
type: arc
|
| 46 |
+
metrics:
|
| 47 |
+
- name: ARC Challenge (DE) (5-Shot)
|
| 48 |
+
type: accuracy
|
| 49 |
+
value: 24.38
|
| 50 |
+
verified: false
|
| 51 |
- task:
|
| 52 |
type: text-generation
|
| 53 |
dataset:
|
| 54 |
name: deutsche-telekom/Ger-RAG-eval
|
| 55 |
type: Ger-RAG-eval
|
| 56 |
metrics:
|
| 57 |
+
- name: Task 1
|
| 58 |
+
type: accuracy
|
| 59 |
value: 25.2
|
| 60 |
verified: false
|
| 61 |
+
- name: Task 2
|
| 62 |
+
type: accuracy
|
| 63 |
value: 27.1
|
| 64 |
verified: false
|
| 65 |
+
- name: Task 3
|
| 66 |
+
type: accuracy
|
| 67 |
value: 50.9
|
| 68 |
verified: false
|
| 69 |
+
- name: Task 4
|
| 70 |
+
type: accuracy
|
| 71 |
value: 50.0
|
| 72 |
verified: false
|
| 73 |
language:
|
|
|
|
| 175 |
model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map=device, torch_dtype=torch.bfloat16)
|
| 176 |
messages = [
|
| 177 |
{"role": "system", "content": "Du bist ein hilfreicher Assistent."},
|
| 178 |
+
{"role": "user", "content": "Was ist der Sinn des Lebens?"}
|
| 179 |
]
|
| 180 |
inputs = tokenizer.apply_chat_template(messages, tokenize=True, return_tensors="pt", add_generation_prompt=True).to(device)
|
| 181 |
outputs = model.generate(inputs, max_new_tokens=256, do_sample=True, temperature=0.4, top_p=0.9, repetition_penalty=1.1, top_k=512)
|