HFLM forward to loglikelihoods
Browse files- scripts/evalexperts.py +27 -20
scripts/evalexperts.py
CHANGED
|
@@ -169,12 +169,12 @@ def load_custom_model(args) -> Tuple[AutoModelForCausalLM, AutoTokenizer, HFLM]:
|
|
| 169 |
|
| 170 |
# Create HFLM wrapper for evaluation
|
| 171 |
hf_model = HFLM(
|
| 172 |
-
pretrained=
|
| 173 |
-
tokenizer=tokenizer,
|
| 174 |
device=args.device,
|
| 175 |
batch_size=args.batch_size,
|
| 176 |
max_batch_size=args.max_batch_size,
|
| 177 |
-
dtype=args.dtype
|
|
|
|
| 178 |
)
|
| 179 |
|
| 180 |
logger.info("Custom model, tokenizer, and HFLM wrapper loaded successfully")
|
|
@@ -194,16 +194,20 @@ def track_expert_usage(model, input_ids: torch.Tensor) -> List[Dict[int, int]]:
|
|
| 194 |
expert_usage = [{} for _ in range(model.config.num_hidden_layers)]
|
| 195 |
|
| 196 |
def hook_fn(module, input, output, layer_idx):
|
| 197 |
-
# Assuming the module outputs selected expert indices
|
| 198 |
if hasattr(module, 'selected_experts'): # Hypothetical attribute
|
| 199 |
selected_experts = module.selected_experts # Shape: (batch_size, seq_len, top_k)
|
| 200 |
for expert_idx in selected_experts.flatten().tolist():
|
| 201 |
expert_usage[layer_idx][expert_idx] = expert_usage[layer_idx].get(expert_idx, 0) + 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 202 |
|
| 203 |
# Register hooks for each MoE layer
|
| 204 |
hooks = []
|
| 205 |
for i, layer in enumerate(model.transformer.layers): # Adjust based on actual model structure
|
| 206 |
-
if hasattr(layer, 'moe'):
|
| 207 |
hook = layer.moe.register_forward_hook(lambda m, inp, out: hook_fn(m, inp, out, i))
|
| 208 |
hooks.append(hook)
|
| 209 |
|
|
@@ -237,19 +241,22 @@ def run_evaluation_with_tracking(model, hf_model, tokenizer, args) -> Tuple[Dict
|
|
| 237 |
# Initialize expert usage tracking for each task
|
| 238 |
task_expert_usage = {task: [] for task in args.tasks}
|
| 239 |
|
| 240 |
-
# Custom
|
| 241 |
-
def
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
|
|
|
|
|
|
|
|
|
| 253 |
|
| 254 |
# Run evaluation
|
| 255 |
results = evaluator.simple_evaluate(
|
|
@@ -261,8 +268,8 @@ def run_evaluation_with_tracking(model, hf_model, tokenizer, args) -> Tuple[Dict
|
|
| 261 |
max_batch_size=args.max_batch_size,
|
| 262 |
)
|
| 263 |
|
| 264 |
-
# Restore original
|
| 265 |
-
hf_model.
|
| 266 |
|
| 267 |
# Aggregate expert usage per task
|
| 268 |
aggregated_usage = {}
|
|
|
|
| 169 |
|
| 170 |
# Create HFLM wrapper for evaluation
|
| 171 |
hf_model = HFLM(
|
| 172 |
+
pretrained=args.model_path, # Pass model path as string
|
|
|
|
| 173 |
device=args.device,
|
| 174 |
batch_size=args.batch_size,
|
| 175 |
max_batch_size=args.max_batch_size,
|
| 176 |
+
dtype=args.dtype,
|
| 177 |
+
trust_remote_code=args.trust_remote_code
|
| 178 |
)
|
| 179 |
|
| 180 |
logger.info("Custom model, tokenizer, and HFLM wrapper loaded successfully")
|
|
|
|
| 194 |
expert_usage = [{} for _ in range(model.config.num_hidden_layers)]
|
| 195 |
|
| 196 |
def hook_fn(module, input, output, layer_idx):
|
|
|
|
| 197 |
if hasattr(module, 'selected_experts'): # Hypothetical attribute
|
| 198 |
selected_experts = module.selected_experts # Shape: (batch_size, seq_len, top_k)
|
| 199 |
for expert_idx in selected_experts.flatten().tolist():
|
| 200 |
expert_usage[layer_idx][expert_idx] = expert_usage[layer_idx].get(expert_idx, 0) + 1
|
| 201 |
+
elif hasattr(module, 'routing_weights'): # Alternative: use routing weights
|
| 202 |
+
weights = module.routing_weights # Shape: (batch_size, seq_len, num_experts)
|
| 203 |
+
top_k_indices = torch.topk(weights, k=model.config.top_k, dim=-1).indices
|
| 204 |
+
for expert_idx in top_k_indices.flatten().tolist():
|
| 205 |
+
expert_usage[layer_idx][expert_idx] = expert_usage[layer_idx].get(expert_idx, 0) + 1
|
| 206 |
|
| 207 |
# Register hooks for each MoE layer
|
| 208 |
hooks = []
|
| 209 |
for i, layer in enumerate(model.transformer.layers): # Adjust based on actual model structure
|
| 210 |
+
if hasattr(layer, 'moe'):
|
| 211 |
hook = layer.moe.register_forward_hook(lambda m, inp, out: hook_fn(m, inp, out, i))
|
| 212 |
hooks.append(hook)
|
| 213 |
|
|
|
|
| 241 |
# Initialize expert usage tracking for each task
|
| 242 |
task_expert_usage = {task: [] for task in args.tasks}
|
| 243 |
|
| 244 |
+
# Custom batch processing to track expert usage
|
| 245 |
+
def custom_loglikelihood(self, requests):
|
| 246 |
+
from lm_eval.api.instance import Instance
|
| 247 |
+
res = []
|
| 248 |
+
for request in requests:
|
| 249 |
+
input_ids = tokenizer(request.arguments[0], return_tensors="pt").input_ids.to(model.device)
|
| 250 |
+
# Track expert usage
|
| 251 |
+
batch_expert_usage = track_expert_usage(model, input_ids)
|
| 252 |
+
task_expert_usage[request.task_name].append(batch_expert_usage)
|
| 253 |
+
# Original loglikelihood computation
|
| 254 |
+
res.append(self._loglikelihood([request]))
|
| 255 |
+
return [item for sublist in res for item in sublist]
|
| 256 |
+
|
| 257 |
+
# Override HFLM's loglikelihood method
|
| 258 |
+
original_loglikelihood = hf_model.loglikelihood
|
| 259 |
+
hf_model.loglikelihood = custom_loglikelihood.__get__(hf_model, HFLM)
|
| 260 |
|
| 261 |
# Run evaluation
|
| 262 |
results = evaluator.simple_evaluate(
|
|
|
|
| 268 |
max_batch_size=args.max_batch_size,
|
| 269 |
)
|
| 270 |
|
| 271 |
+
# Restore original method
|
| 272 |
+
hf_model.loglikelihood = original_loglikelihood
|
| 273 |
|
| 274 |
# Aggregate expert usage per task
|
| 275 |
aggregated_usage = {}
|