Phase-Technologies commited on
Commit
58162aa
·
verified ·
1 Parent(s): aa390ba

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +45 -12
README.md CHANGED
@@ -33,27 +33,63 @@ model_index:
33
  results:
34
  - task:
35
  type: text-generation
36
- name: Mathematical Reasoning
37
  dataset:
38
  name: GSM8K
39
  type: gsm8k
40
  split: test
41
  metrics:
42
  - type: accuracy
43
- value: 82.4 # Data based on internal Qwen2.5-Math benchmarks
44
- name: Final Synthesized Accuracy
45
  - task:
46
  type: text-generation
47
- name: Physics Conceptual Logic
48
  dataset:
49
- name: ScienceQA
50
- type: science_qa
 
51
  split: test
52
  metrics:
53
  - type: accuracy
54
- value: 79.1
55
- name: Conceptual Rigor
56
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
  # Technical Architecture Settings
58
  model_type: qwen2
59
  quantization: 4-bit (bitsandbytes)
@@ -75,9 +111,6 @@ hardware_specification:
75
  vram: 16GB
76
  optimization: Unsloth-Fast-Inference
77
 
78
- # Social & Reference
79
- extra_gated_heading: "Phase-Technologies Proprietary Reasoning Framework"
80
- extra_gated_description: "Accessing this model grants permission to utilize the STSS synthesis protocols for analytical verification."
81
  ---
82
 
83
 
 
33
  results:
34
  - task:
35
  type: text-generation
36
+ name: Grade School Mathematics
37
  dataset:
38
  name: GSM8K
39
  type: gsm8k
40
  split: test
41
  metrics:
42
  - type: accuracy
43
+ value: 70.0
44
+ name: Exact Match (Zero-Shot)
45
  - task:
46
  type: text-generation
47
+ name: Competition Mathematics
48
  dataset:
49
+ name: MATH-Hard
50
+ type: lighteval/MATH-Hard
51
+ config: default
52
  split: test
53
  metrics:
54
  - type: accuracy
55
+ value: 60.0
56
+ name: Exact Match (Boxed)
57
+ - task:
58
+ type: text-generation
59
+ name: Professional Knowledge
60
+ dataset:
61
+ name: MMLU-Pro
62
+ type: TIGER-Lab/MMLU-Pro
63
+ config: default
64
+ split: test
65
+ metrics:
66
+ - type: accuracy
67
+ value: 45.0
68
+ name: Multiple Choice Accuracy
69
+ - task:
70
+ type: text-generation
71
+ name: Invitational Math
72
+ dataset:
73
+ name: AIME 2026
74
+ type: MathArena/aime_2026
75
+ split: train
76
+ metrics:
77
+ - type: accuracy
78
+ value: 10.0
79
+ name: Accuracy
80
+ - task:
81
+ type: text-generation
82
+ name: Advanced Graduate Reasoning
83
+ dataset:
84
+ name: Humanity's Last Exam
85
+ type: cais/hle
86
+ config: default
87
+ split: test
88
+ metrics:
89
+ - type: accuracy
90
+ value: 0.0
91
+ name: Exact String Match
92
+
93
  # Technical Architecture Settings
94
  model_type: qwen2
95
  quantization: 4-bit (bitsandbytes)
 
111
  vram: 16GB
112
  optimization: Unsloth-Fast-Inference
113
 
 
 
 
114
  ---
115
 
116