Charlie81 commited on
Commit
3adfc62
·
1 Parent(s): a83c539

HFLM forward to loglikelihoods

Browse files
Files changed (1) hide show
  1. scripts/evalexperts.py +27 -20
scripts/evalexperts.py CHANGED
@@ -169,12 +169,12 @@ def load_custom_model(args) -> Tuple[AutoModelForCausalLM, AutoTokenizer, HFLM]:
169
 
170
  # Create HFLM wrapper for evaluation
171
  hf_model = HFLM(
172
- pretrained=model,
173
- tokenizer=tokenizer,
174
  device=args.device,
175
  batch_size=args.batch_size,
176
  max_batch_size=args.max_batch_size,
177
- dtype=args.dtype
 
178
  )
179
 
180
  logger.info("Custom model, tokenizer, and HFLM wrapper loaded successfully")
@@ -194,16 +194,20 @@ def track_expert_usage(model, input_ids: torch.Tensor) -> List[Dict[int, int]]:
194
  expert_usage = [{} for _ in range(model.config.num_hidden_layers)]
195
 
196
  def hook_fn(module, input, output, layer_idx):
197
- # Assuming the module outputs selected expert indices
198
  if hasattr(module, 'selected_experts'): # Hypothetical attribute
199
  selected_experts = module.selected_experts # Shape: (batch_size, seq_len, top_k)
200
  for expert_idx in selected_experts.flatten().tolist():
201
  expert_usage[layer_idx][expert_idx] = expert_usage[layer_idx].get(expert_idx, 0) + 1
 
 
 
 
 
202
 
203
  # Register hooks for each MoE layer
204
  hooks = []
205
  for i, layer in enumerate(model.transformer.layers): # Adjust based on actual model structure
206
- if hasattr(layer, 'moe'): # Check if layer has MoE component
207
  hook = layer.moe.register_forward_hook(lambda m, inp, out: hook_fn(m, inp, out, i))
208
  hooks.append(hook)
209
 
@@ -237,19 +241,22 @@ def run_evaluation_with_tracking(model, hf_model, tokenizer, args) -> Tuple[Dict
237
  # Initialize expert usage tracking for each task
238
  task_expert_usage = {task: [] for task in args.tasks}
239
 
240
- # Custom evaluation loop to track expert usage
241
- def custom_forward(model, batch):
242
- input_ids = batch["input_ids"].to(model.device)
243
- # Track expert usage for this batch
244
- batch_expert_usage = track_expert_usage(model, input_ids)
245
- # Accumulate usage for the task
246
- task_name = batch.get("task_name", args.tasks[0]) # Fallback to first task
247
- task_expert_usage[task_name].append(batch_expert_usage)
248
- return model(input_ids)
249
-
250
- # Override HFLM's forward method to include expert tracking
251
- original_forward = hf_model.forward
252
- hf_model.forward = lambda batch: custom_forward(model, batch)
 
 
 
253
 
254
  # Run evaluation
255
  results = evaluator.simple_evaluate(
@@ -261,8 +268,8 @@ def run_evaluation_with_tracking(model, hf_model, tokenizer, args) -> Tuple[Dict
261
  max_batch_size=args.max_batch_size,
262
  )
263
 
264
- # Restore original forward method
265
- hf_model.forward = original_forward
266
 
267
  # Aggregate expert usage per task
268
  aggregated_usage = {}
 
169
 
170
  # Create HFLM wrapper for evaluation
171
  hf_model = HFLM(
172
+ pretrained=args.model_path, # Pass model path as string
 
173
  device=args.device,
174
  batch_size=args.batch_size,
175
  max_batch_size=args.max_batch_size,
176
+ dtype=args.dtype,
177
+ trust_remote_code=args.trust_remote_code
178
  )
179
 
180
  logger.info("Custom model, tokenizer, and HFLM wrapper loaded successfully")
 
194
  expert_usage = [{} for _ in range(model.config.num_hidden_layers)]
195
 
196
  def hook_fn(module, input, output, layer_idx):
 
197
  if hasattr(module, 'selected_experts'): # Hypothetical attribute
198
  selected_experts = module.selected_experts # Shape: (batch_size, seq_len, top_k)
199
  for expert_idx in selected_experts.flatten().tolist():
200
  expert_usage[layer_idx][expert_idx] = expert_usage[layer_idx].get(expert_idx, 0) + 1
201
+ elif hasattr(module, 'routing_weights'): # Alternative: use routing weights
202
+ weights = module.routing_weights # Shape: (batch_size, seq_len, num_experts)
203
+ top_k_indices = torch.topk(weights, k=model.config.top_k, dim=-1).indices
204
+ for expert_idx in top_k_indices.flatten().tolist():
205
+ expert_usage[layer_idx][expert_idx] = expert_usage[layer_idx].get(expert_idx, 0) + 1
206
 
207
  # Register hooks for each MoE layer
208
  hooks = []
209
  for i, layer in enumerate(model.transformer.layers): # Adjust based on actual model structure
210
+ if hasattr(layer, 'moe'):
211
  hook = layer.moe.register_forward_hook(lambda m, inp, out: hook_fn(m, inp, out, i))
212
  hooks.append(hook)
213
 
 
241
  # Initialize expert usage tracking for each task
242
  task_expert_usage = {task: [] for task in args.tasks}
243
 
244
+ # Custom batch processing to track expert usage
245
+ def custom_loglikelihood(self, requests):
246
+ from lm_eval.api.instance import Instance
247
+ res = []
248
+ for request in requests:
249
+ input_ids = tokenizer(request.arguments[0], return_tensors="pt").input_ids.to(model.device)
250
+ # Track expert usage
251
+ batch_expert_usage = track_expert_usage(model, input_ids)
252
+ task_expert_usage[request.task_name].append(batch_expert_usage)
253
+ # Original loglikelihood computation
254
+ res.append(self._loglikelihood([request]))
255
+ return [item for sublist in res for item in sublist]
256
+
257
+ # Override HFLM's loglikelihood method
258
+ original_loglikelihood = hf_model.loglikelihood
259
+ hf_model.loglikelihood = custom_loglikelihood.__get__(hf_model, HFLM)
260
 
261
  # Run evaluation
262
  results = evaluator.simple_evaluate(
 
268
  max_batch_size=args.max_batch_size,
269
  )
270
 
271
+ # Restore original method
272
+ hf_model.loglikelihood = original_loglikelihood
273
 
274
  # Aggregate expert usage per task
275
  aggregated_usage = {}