Charlie81 commited on
Commit
6ad2738
·
1 Parent(s): af7366a
scripts/downloadweights.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+
3
+ model = AutoModelForCausalLM.from_pretrained(
4
+ "allenai/OLMoE-7B", # Exact name from Hugging Face
5
+ trust_remote_code=True, # Required if they use custom modeling_olmoe.py
6
+ use_safetensors=True # Ensures .safetensors file is used
7
+ )
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained("allenai/OLMoE-7B")
10
+ print(model.config)
11
+ print(model.__class__)
12
+
13
+ from transformers.utils.hub import cached_file
14
+
15
+ # Example: get the path to the config file or model weights index
16
+ config_path = cached_file("allenai/OLMoE-7B", "config.json", trust_remote_code=True)
17
+ print(config_path)
18
+ import os
19
+ model_path = os.path.dirname(config_path)
20
+ print(model_path)
scripts/eval.py ADDED
@@ -0,0 +1,301 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ eval.py Evaluation script for modified OLMoE model using lm-evaluation-harness
4
+ """
5
+ import argparse
6
+ import json
7
+ import os
8
+ from typing import Dict, List, Optional
9
+ import torch
10
+ from transformers import AutoConfig, AutoTokenizer
11
+ from lm_eval import evaluator
12
+ # Remove the problematic import - we don't need get_model
13
+ import logging
14
+
15
+ # Set up logging
16
+ logging.basicConfig(level=logging.INFO)
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ def parse_args():
21
+ """Parse command line arguments."""
22
+ parser = argparse.ArgumentParser(description="Evaluate myolmoe model")
23
+
24
+ # Model arguments
25
+ parser.add_argument("--model_path", type=str, default="/home/ianwu/.cache/huggingface/hub/models--allenai--OLMoE-7B/snapshots/6d84c48581ece794365f2b8e9cfb043c68ade9c5",
26
+ help="Path to the pretrained model")
27
+ parser.add_argument("--model_type", type=str, default="hf-auto",
28
+ help="Model type for lm-eval")
29
+
30
+ # Routing configuration
31
+ parser.add_argument("--routing_type", type=str, default="non_deterministic",
32
+ choices=["dense", "sparse", "non_deterministic"],
33
+ help="Type of routing to use")
34
+ parser.add_argument("--router_temperature", type=float, default=1.0,
35
+ help="Temperature for non-deterministic routing")
36
+ parser.add_argument("--num_experts_per_tok", type=int, default=8,
37
+ help="Number of experts per token")
38
+
39
+ # Evaluation arguments
40
+ parser.add_argument("--tasks", type=str, nargs="+",
41
+ default=['mmlu'],
42
+ # , 'gsm8k'
43
+ # default=["hellaswag", "arc_easy", "arc_challenge", "winogrande"],
44
+ help="Tasks to evaluate on")
45
+ parser.add_argument("--num_fewshot", type=int, default=0,
46
+ help="Number of few-shot examples")
47
+ parser.add_argument("--batch_size", type=int, default=64,
48
+ help="Batch size for evaluation")
49
+ parser.add_argument("--max_batch_size", type=int, default=None,
50
+ help="Maximum batch size")
51
+ parser.add_argument("--device", type=str, default="cuda",
52
+ help="Device to use for evaluation")
53
+ parser.add_argument("--dtype", type=str, default="float16",
54
+ choices=["float16", "bfloat16", "float32"],
55
+ help="Data type for model weights")
56
+
57
+ # Output arguments
58
+ parser.add_argument("--output_dir", type=str, default="./eval_results",
59
+ help="Directory to save evaluation results")
60
+ parser.add_argument("--output_filename", type=str, default=None,
61
+ help="Filename for results (auto-generated if not provided)")
62
+
63
+ # Additional arguments
64
+ parser.add_argument("--limit", type=int, default=None,
65
+ help="Limit number of examples per task")
66
+ parser.add_argument("--write_out", action="store_true",
67
+ help="Write out individual predictions")
68
+ parser.add_argument("--trust_remote_code", action="store_true",
69
+ help="Trust remote code when loading model")
70
+ parser.add_argument("--verbosity", type=str, default="INFO",
71
+ choices=["DEBUG", "INFO", "WARNING", "ERROR"],
72
+ help="Logging verbosity level")
73
+
74
+ return parser.parse_args()
75
+
76
+
77
+ def setup_model_config(model_path: str, routing_config: Dict) -> None:
78
+ """
79
+ Update model configuration with routing settings.
80
+ """
81
+ config_path = os.path.join(model_path, "config.json")
82
+
83
+ if os.path.exists(config_path):
84
+ with open(config_path, 'r') as f:
85
+ config = json.load(f)
86
+
87
+ # Update routing configuration
88
+ config.update(routing_config)
89
+
90
+ # Save updated config
91
+ with open(config_path, 'w') as f:
92
+ json.dump(config, f, indent=2)
93
+
94
+ logger.info(f"Updated model config with routing settings: {routing_config}")
95
+ else:
96
+ logger.warning(f"Config file not found at {config_path}")
97
+
98
+
99
+ def validate_model_setup(model_path: str) -> bool:
100
+ """
101
+ Validate that the model can be loaded with the current configuration.
102
+ """
103
+ try:
104
+ config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
105
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
106
+
107
+ logger.info(f"Model validation successful:")
108
+ logger.info(f" - Model type: {config.model_type}")
109
+ logger.info(f" - Routing type: {getattr(config, 'routing_type', 'not specified')}")
110
+ logger.info(f" - Vocab size: {config.vocab_size}")
111
+ logger.info(f" - Hidden size: {config.hidden_size}")
112
+ logger.info(f" - Num layers: {config.num_hidden_layers}")
113
+ logger.info(f" - Num experts: {getattr(config, 'num_experts', 'not specified')}")
114
+
115
+ return True
116
+ except Exception as e:
117
+ logger.error(f"Model validation failed: {e}")
118
+ return False
119
+
120
+ def run_evaluation(args) -> Dict:
121
+ """Run evaluation with properly wrapped model."""
122
+ from transformers import AutoModelForCausalLM
123
+ import sys, os
124
+ sys.path.insert(0, os.path.join(os.path.dirname(__file__), "myolmoe_model"))
125
+ from modeling_myolmoe import MyOLMoEForCausalLM # your modified class
126
+ from lm_eval.models.huggingface import HFLM # Add this import
127
+
128
+ # 1. Load config and override routing parameters
129
+ config = AutoConfig.from_pretrained(
130
+ args.model_path,
131
+ trust_remote_code=True
132
+ )
133
+ config.routing_type = args.routing_type
134
+ config.router_temperature = args.router_temperature
135
+ config.num_experts_per_tok = args.num_experts_per_tok
136
+
137
+
138
+ # 2. Load model with updated config
139
+ torch_dtype = {
140
+ "float16": torch.float16,
141
+ "bfloat16": torch.bfloat16,
142
+ "float32": torch.float32
143
+ }[args.dtype]
144
+
145
+ from modeling_myolmoe import MyOLMoEForCausalLM
146
+
147
+ hf_model = MyOLMoEForCausalLM.from_pretrained(
148
+ args.model_path,
149
+ config=config,
150
+ torch_dtype=torch_dtype,
151
+ device_map="auto"
152
+ ).eval()
153
+
154
+
155
+ # 3. Wrap the Hugging Face model in HFLM
156
+ eval_model = HFLM(
157
+ pretrained=hf_model, # Pass the initialized model
158
+ device=args.device,
159
+ batch_size=args.batch_size,
160
+ max_batch_size=args.max_batch_size,
161
+ dtype=args.dtype
162
+ )
163
+
164
+ # 4. Run evaluation with the wrapped model
165
+ results = evaluator.simple_evaluate(
166
+ model=eval_model, # Pass the wrapped model
167
+ tasks=args.tasks,
168
+ num_fewshot=args.num_fewshot,
169
+ limit=args.limit,
170
+ write_out=args.write_out,
171
+ verbosity=args.verbosity,
172
+ )
173
+
174
+ return results
175
+
176
+ import numpy as np
177
+ import torch
178
+
179
+ def make_serializable(obj):
180
+ if isinstance(obj, dict):
181
+ return {k: make_serializable(v) for k, v in obj.items()}
182
+ elif isinstance(obj, list):
183
+ return [make_serializable(v) for v in obj]
184
+ elif isinstance(obj, tuple):
185
+ return tuple(make_serializable(v) for v in obj)
186
+ # NumPy scalars
187
+ elif isinstance(obj, (np.integer, np.floating)):
188
+ return obj.item()
189
+ # NumPy dtypes
190
+ elif isinstance(obj, np.dtype):
191
+ return str(obj)
192
+ # PyTorch tensor → list
193
+ elif isinstance(obj, torch.Tensor):
194
+ return obj.tolist()
195
+ # PyTorch dtype (e.g. torch.float16)
196
+ elif isinstance(obj, torch.dtype):
197
+ return str(obj)
198
+ # Anything else leave alone
199
+ else:
200
+ return obj
201
+
202
+ def save_results(results: Dict, args) -> str:
203
+ """Save evaluation results to file, after converting to JSON-safe types, and print them."""
204
+ os.makedirs(args.output_dir, exist_ok=True)
205
+
206
+ # build filename exactly as before…
207
+ if args.output_filename is None:
208
+ model_name = os.path.basename(args.model_path.rstrip('/'))
209
+ tasks_str = "_".join(args.tasks[:3])
210
+ if len(args.tasks) > 3:
211
+ tasks_str += f"_and_{len(args.tasks)-3}_more"
212
+ filename = f"{model_name}_{args.routing_type}_{tasks_str}_results.json"
213
+ else:
214
+ filename = args.output_filename
215
+ if not filename.endswith('.json'):
216
+ filename += '.json'
217
+ output_path = os.path.join(args.output_dir, filename)
218
+
219
+ metadata = {
220
+ "model_path": args.model_path,
221
+ "routing_type": args.routing_type,
222
+ "router_temperature": args.router_temperature,
223
+ "num_experts_per_tok": args.num_experts_per_tok,
224
+ "tasks": args.tasks,
225
+ "num_fewshot": args.num_fewshot,
226
+ "batch_size": args.batch_size,
227
+ "device": args.device,
228
+ "dtype": args.dtype,
229
+ }
230
+ results_with_metadata = {
231
+ "metadata": metadata,
232
+ "results": results
233
+ }
234
+
235
+ # convert everything
236
+ serializable = make_serializable(results_with_metadata)
237
+
238
+ # write to disk
239
+ with open(output_path, 'w') as f:
240
+ json.dump(serializable, f, indent=2)
241
+
242
+ logger.info(f"Results saved to {output_path}")
243
+ return output_path
244
+
245
+
246
+
247
+
248
+ def print_summary(results: Dict, routing_type: str) -> None:
249
+ """
250
+ Print a summary of evaluation results.
251
+ """
252
+ print(f"\n{'='*60}")
253
+ print(f"EVALUATION SUMMARY - Routing: {routing_type.upper()}")
254
+ print(f"{'='*60}")
255
+
256
+ if "results" in results:
257
+ for task, metrics in results["results"].items():
258
+ if isinstance(metrics, dict):
259
+ print(f"\n{task.upper()}:")
260
+ for metric, value in metrics.items():
261
+ if isinstance(value, (int, float)):
262
+ if metric.endswith('_stderr'):
263
+ continue # Skip stderr for summary
264
+ stderr_key = f"{metric}_stderr"
265
+ stderr = metrics.get(stderr_key, 0)
266
+ print(f" {metric}: {value:.4f} (±{stderr:.4f})")
267
+ else:
268
+ print(f" {metric}: {value}")
269
+
270
+ print(f"\n{'='*60}")
271
+
272
+
273
+ def main():
274
+ """Main evaluation function."""
275
+ args = parse_args()
276
+
277
+ # Set logging level
278
+ numeric_level = getattr(logging, args.verbosity.upper(), None)
279
+ if isinstance(numeric_level, int):
280
+ logging.getLogger().setLevel(numeric_level)
281
+ logger.setLevel(numeric_level)
282
+
283
+ try:
284
+ # Run evaluation
285
+ results = run_evaluation(args)
286
+
287
+ # Save results
288
+ output_path = save_results(results, args)
289
+
290
+ # Print summary
291
+ print_summary(results, args.routing_type)
292
+
293
+ logger.info("Evaluation completed successfully!")
294
+
295
+ except Exception as e:
296
+ logger.error(f"Evaluation failed: {e}")
297
+ raise
298
+
299
+
300
+ if __name__ == "__main__":
301
+ main()
scripts/run_eval.py DELETED
File without changes