changes

Browse files

Files changed (3) hide show

scripts/downloadweights.py +20 -0
scripts/eval.py +301 -0
scripts/run_eval.py +0 -0

scripts/downloadweights.py ADDED Viewed

	@@ -0,0 +1,20 @@

+from transformers import AutoModelForCausalLM, AutoTokenizer
+model = AutoModelForCausalLM.from_pretrained(
+    "allenai/OLMoE-7B",            # Exact name from Hugging Face
+    trust_remote_code=True,        # Required if they use custom modeling_olmoe.py
+    use_safetensors=True           # Ensures .safetensors file is used
+)
+tokenizer = AutoTokenizer.from_pretrained("allenai/OLMoE-7B")
+print(model.config)
+print(model.__class__)
+from transformers.utils.hub import cached_file
+# Example: get the path to the config file or model weights index
+config_path = cached_file("allenai/OLMoE-7B", "config.json", trust_remote_code=True)
+print(config_path)
+import os
+model_path = os.path.dirname(config_path)
+print(model_path)

scripts/eval.py ADDED Viewed

	@@ -0,0 +1,301 @@

+#!/usr/bin/env python3
+"""
+eval.py Evaluation script for modified OLMoE model using lm-evaluation-harness
+"""
+import argparse
+import json
+import os
+from typing import Dict, List, Optional
+import torch
+from transformers import AutoConfig, AutoTokenizer
+from lm_eval import evaluator
+# Remove the problematic import - we don't need get_model
+import logging
+# Set up logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def parse_args():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description="Evaluate myolmoe model")
+    # Model arguments
+    parser.add_argument("--model_path", type=str, default="/home/ianwu/.cache/huggingface/hub/models--allenai--OLMoE-7B/snapshots/6d84c48581ece794365f2b8e9cfb043c68ade9c5",
+                       help="Path to the pretrained model")
+    parser.add_argument("--model_type", type=str, default="hf-auto",
+                       help="Model type for lm-eval")
+    # Routing configuration
+    parser.add_argument("--routing_type", type=str, default="non_deterministic",
+                       choices=["dense", "sparse", "non_deterministic"],
+                       help="Type of routing to use")
+    parser.add_argument("--router_temperature", type=float, default=1.0,
+                       help="Temperature for non-deterministic routing")
+    parser.add_argument("--num_experts_per_tok", type=int, default=8,
+                       help="Number of experts per token")
+    # Evaluation arguments
+    parser.add_argument("--tasks", type=str, nargs="+",
+                       default=['mmlu'],
+                       # , 'gsm8k'
+                    #    default=["hellaswag", "arc_easy", "arc_challenge", "winogrande"],
+                       help="Tasks to evaluate on")
+    parser.add_argument("--num_fewshot", type=int, default=0,
+                       help="Number of few-shot examples")
+    parser.add_argument("--batch_size", type=int, default=64,
+                       help="Batch size for evaluation")
+    parser.add_argument("--max_batch_size", type=int, default=None,
+                       help="Maximum batch size")
+    parser.add_argument("--device", type=str, default="cuda",
+                       help="Device to use for evaluation")
+    parser.add_argument("--dtype", type=str, default="float16",
+                       choices=["float16", "bfloat16", "float32"],
+                       help="Data type for model weights")
+    # Output arguments
+    parser.add_argument("--output_dir", type=str, default="./eval_results",
+                       help="Directory to save evaluation results")
+    parser.add_argument("--output_filename", type=str, default=None,
+                       help="Filename for results (auto-generated if not provided)")
+    # Additional arguments
+    parser.add_argument("--limit", type=int, default=None,
+                       help="Limit number of examples per task")
+    parser.add_argument("--write_out", action="store_true",
+                       help="Write out individual predictions")
+    parser.add_argument("--trust_remote_code", action="store_true",
+                       help="Trust remote code when loading model")
+    parser.add_argument("--verbosity", type=str, default="INFO",
+                       choices=["DEBUG", "INFO", "WARNING", "ERROR"],
+                       help="Logging verbosity level")
+    return parser.parse_args()
+def setup_model_config(model_path: str, routing_config: Dict) -> None:
+    """
+    Update model configuration with routing settings.
+    """
+    config_path = os.path.join(model_path, "config.json")
+    if os.path.exists(config_path):
+        with open(config_path, 'r') as f:
+            config = json.load(f)
+        # Update routing configuration
+        config.update(routing_config)
+        # Save updated config
+        with open(config_path, 'w') as f:
+            json.dump(config, f, indent=2)
+        logger.info(f"Updated model config with routing settings: {routing_config}")
+    else:
+        logger.warning(f"Config file not found at {config_path}")
+def validate_model_setup(model_path: str) -> bool:
+    """
+    Validate that the model can be loaded with the current configuration.
+    """
+    try:
+        config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
+        tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+        logger.info(f"Model validation successful:")
+        logger.info(f"  - Model type: {config.model_type}")
+        logger.info(f"  - Routing type: {getattr(config, 'routing_type', 'not specified')}")
+        logger.info(f"  - Vocab size: {config.vocab_size}")
+        logger.info(f"  - Hidden size: {config.hidden_size}")
+        logger.info(f"  - Num layers: {config.num_hidden_layers}")
+        logger.info(f"  - Num experts: {getattr(config, 'num_experts', 'not specified')}")
+        return True
+    except Exception as e:
+        logger.error(f"Model validation failed: {e}")
+        return False
+def run_evaluation(args) -> Dict:
+    """Run evaluation with properly wrapped model."""
+    from transformers import AutoModelForCausalLM
+    import sys, os
+    sys.path.insert(0, os.path.join(os.path.dirname(__file__), "myolmoe_model"))
+    from modeling_myolmoe import MyOLMoEForCausalLM  # your modified class
+    from lm_eval.models.huggingface import HFLM  # Add this import
+    # 1. Load config and override routing parameters
+    config = AutoConfig.from_pretrained(
+        args.model_path,
+        trust_remote_code=True
+    )
+    config.routing_type = args.routing_type
+    config.router_temperature = args.router_temperature
+    config.num_experts_per_tok = args.num_experts_per_tok
+    # 2. Load model with updated config
+    torch_dtype = {
+        "float16": torch.float16,
+        "bfloat16": torch.bfloat16,
+        "float32": torch.float32
+    }[args.dtype]
+    from modeling_myolmoe import MyOLMoEForCausalLM
+    hf_model = MyOLMoEForCausalLM.from_pretrained(
+        args.model_path,
+        config=config,
+        torch_dtype=torch_dtype,
+        device_map="auto"
+    ).eval()
+    # 3. Wrap the Hugging Face model in HFLM
+    eval_model = HFLM(
+        pretrained=hf_model,  # Pass the initialized model
+        device=args.device,
+        batch_size=args.batch_size,
+        max_batch_size=args.max_batch_size,
+        dtype=args.dtype
+    )
+    # 4. Run evaluation with the wrapped model
+    results = evaluator.simple_evaluate(
+        model=eval_model,  # Pass the wrapped model
+        tasks=args.tasks,
+        num_fewshot=args.num_fewshot,
+        limit=args.limit,
+        write_out=args.write_out,
+        verbosity=args.verbosity,
+    )
+    return results
+import numpy as np
+import torch
+def make_serializable(obj):
+    if isinstance(obj, dict):
+        return {k: make_serializable(v) for k, v in obj.items()}
+    elif isinstance(obj, list):
+        return [make_serializable(v) for v in obj]
+    elif isinstance(obj, tuple):
+        return tuple(make_serializable(v) for v in obj)
+    # NumPy scalars
+    elif isinstance(obj, (np.integer, np.floating)):
+        return obj.item()
+    # NumPy dtypes
+    elif isinstance(obj, np.dtype):
+        return str(obj)
+    # PyTorch tensor → list
+    elif isinstance(obj, torch.Tensor):
+        return obj.tolist()
+    # PyTorch dtype (e.g. torch.float16)
+    elif isinstance(obj, torch.dtype):
+        return str(obj)
+    # Anything else leave alone
+    else:
+        return obj
+def save_results(results: Dict, args) -> str:
+    """Save evaluation results to file, after converting to JSON-safe types, and print them."""
+    os.makedirs(args.output_dir, exist_ok=True)
+    # build filename exactly as before…
+    if args.output_filename is None:
+        model_name = os.path.basename(args.model_path.rstrip('/'))
+        tasks_str = "_".join(args.tasks[:3])
+        if len(args.tasks) > 3:
+            tasks_str += f"_and_{len(args.tasks)-3}_more"
+        filename = f"{model_name}_{args.routing_type}_{tasks_str}_results.json"
+    else:
+        filename = args.output_filename
+    if not filename.endswith('.json'):
+        filename += '.json'
+    output_path = os.path.join(args.output_dir, filename)
+    metadata = {
+        "model_path": args.model_path,
+        "routing_type": args.routing_type,
+        "router_temperature": args.router_temperature,
+        "num_experts_per_tok": args.num_experts_per_tok,
+        "tasks": args.tasks,
+        "num_fewshot": args.num_fewshot,
+        "batch_size": args.batch_size,
+        "device": args.device,
+        "dtype": args.dtype,
+    }
+    results_with_metadata = {
+        "metadata": metadata,
+        "results": results
+    }
+    # convert everything
+    serializable = make_serializable(results_with_metadata)
+    # write to disk
+    with open(output_path, 'w') as f:
+        json.dump(serializable, f, indent=2)
+    logger.info(f"Results saved to {output_path}")
+    return output_path
+def print_summary(results: Dict, routing_type: str) -> None:
+    """
+    Print a summary of evaluation results.
+    """
+    print(f"\n{'='*60}")
+    print(f"EVALUATION SUMMARY - Routing: {routing_type.upper()}")
+    print(f"{'='*60}")
+    if "results" in results:
+        for task, metrics in results["results"].items():
+            if isinstance(metrics, dict):
+                print(f"\n{task.upper()}:")
+                for metric, value in metrics.items():
+                    if isinstance(value, (int, float)):
+                        if metric.endswith('_stderr'):
+                            continue  # Skip stderr for summary
+                        stderr_key = f"{metric}_stderr"
+                        stderr = metrics.get(stderr_key, 0)
+                        print(f"  {metric}: {value:.4f} (±{stderr:.4f})")
+                    else:
+                        print(f"  {metric}: {value}")
+    print(f"\n{'='*60}")
+def main():
+    """Main evaluation function."""
+    args = parse_args()
+    # Set logging level
+    numeric_level = getattr(logging, args.verbosity.upper(), None)
+    if isinstance(numeric_level, int):
+        logging.getLogger().setLevel(numeric_level)
+        logger.setLevel(numeric_level)
+    try:
+        # Run evaluation
+        results = run_evaluation(args)
+        # Save results
+        output_path = save_results(results, args)
+        # Print summary
+        print_summary(results, args.routing_type)
+        logger.info("Evaluation completed successfully!")
+    except Exception as e:
+        logger.error(f"Evaluation failed: {e}")
+        raise
+if __name__ == "__main__":
+    main()

scripts/run_eval.py DELETED Viewed

File without changes