LemiSt commited on
Commit
aad36d1
·
verified ·
1 Parent(s): d032d3e

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +31 -11
README.md CHANGED
@@ -15,9 +15,19 @@ model-index:
15
  type: mmlu
16
  metrics:
17
  - name: MMMLU(DE_DE) (0-Shot)
18
- type: MMMLU(DE_DE) (0-Shot)
19
  value: 25.57
20
  verified: false
 
 
 
 
 
 
 
 
 
 
21
  - task:
22
  type: text-generation
23
  dataset:
@@ -25,29 +35,39 @@ model-index:
25
  type: arc
26
  metrics:
27
  - name: ARC Challenge (DE) (0-Shot)
28
- type: ARC Challenge (DE) (0-Shot)
29
  value: 24.29
30
  verified: false
 
 
 
 
 
 
 
 
 
 
31
  - task:
32
  type: text-generation
33
  dataset:
34
  name: deutsche-telekom/Ger-RAG-eval
35
  type: Ger-RAG-eval
36
  metrics:
37
- - name: Ger-RAG-eval Choose Context By Question
38
- type: Ger-RAG-eval Task 1
39
  value: 25.2
40
  verified: false
41
- - name: Ger-RAG-eval Choose Question By Context
42
- type: Ger-RAG-eval Task 2
43
  value: 27.1
44
  verified: false
45
- - name: Ger-RAG-eval Context Question Match
46
- type: Ger-RAG-eval Task 3
47
  value: 50.9
48
  verified: false
49
- - name: Ger-RAG-eval Question Answer Match
50
- type: Ger-RAG-eval Task 4
51
  value: 50.0
52
  verified: false
53
  language:
@@ -155,7 +175,7 @@ device = "cuda" if torch.cuda.is_available() else "cpu"
155
  model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map=device, torch_dtype=torch.bfloat16)
156
  messages = [
157
  {"role": "system", "content": "Du bist ein hilfreicher Assistent."},
158
- {"role": "user", "content": "Wie viele Hände hat ein normaler Mensch?"}
159
  ]
160
  inputs = tokenizer.apply_chat_template(messages, tokenize=True, return_tensors="pt", add_generation_prompt=True).to(device)
161
  outputs = model.generate(inputs, max_new_tokens=256, do_sample=True, temperature=0.4, top_p=0.9, repetition_penalty=1.1, top_k=512)
 
15
  type: mmlu
16
  metrics:
17
  - name: MMMLU(DE_DE) (0-Shot)
18
+ type: accuracy
19
  value: 25.57
20
  verified: false
21
+ - task:
22
+ type: text-generation
23
+ dataset:
24
+ name: openai/MMMLU
25
+ type: mmlu
26
+ metrics:
27
+ - name: MMMLU(DE_DE) (5-Shot)
28
+ type: accuracy
29
+ value: 24.88
30
+ verified: false
31
  - task:
32
  type: text-generation
33
  dataset:
 
35
  type: arc
36
  metrics:
37
  - name: ARC Challenge (DE) (0-Shot)
38
+ type: accuracy
39
  value: 24.29
40
  verified: false
41
+ - task:
42
+ type: text-generation
43
+ dataset:
44
+ name: alexandrainst/m_arc
45
+ type: arc
46
+ metrics:
47
+ - name: ARC Challenge (DE) (5-Shot)
48
+ type: accuracy
49
+ value: 24.38
50
+ verified: false
51
  - task:
52
  type: text-generation
53
  dataset:
54
  name: deutsche-telekom/Ger-RAG-eval
55
  type: Ger-RAG-eval
56
  metrics:
57
+ - name: Task 1
58
+ type: accuracy
59
  value: 25.2
60
  verified: false
61
+ - name: Task 2
62
+ type: accuracy
63
  value: 27.1
64
  verified: false
65
+ - name: Task 3
66
+ type: accuracy
67
  value: 50.9
68
  verified: false
69
+ - name: Task 4
70
+ type: accuracy
71
  value: 50.0
72
  verified: false
73
  language:
 
175
  model = AutoModelForCausalLM.from_pretrained(checkpoint, device_map=device, torch_dtype=torch.bfloat16)
176
  messages = [
177
  {"role": "system", "content": "Du bist ein hilfreicher Assistent."},
178
+ {"role": "user", "content": "Was ist der Sinn des Lebens?"}
179
  ]
180
  inputs = tokenizer.apply_chat_template(messages, tokenize=True, return_tensors="pt", add_generation_prompt=True).to(device)
181
  outputs = model.generate(inputs, max_new_tokens=256, do_sample=True, temperature=0.4, top_p=0.9, repetition_penalty=1.1, top_k=512)