Updated README with better usage instructions and helper scripts

Browse files

Files changed (1) hide show

usage_example.py +159 -0

usage_example.py ADDED Viewed

	@@ -0,0 +1,159 @@

+#!/usr/bin/env python3
+"""
+LOL-EVE Model Usage Example
+This script demonstrates how to download and use the LOL-EVE model
+from Hugging Face Hub.
+Usage:
+    python usage_example.py
+"""
+import torch
+import json
+import os
+from huggingface_hub import hf_hub_download
+def download_model_files():
+    """Download all necessary model files from Hugging Face Hub"""
+    print("Downloading LOL-EVE model files...")
+    repo_id = "Marks-lab/LOL-EVE"
+    files = {
+        'model': 'pytorch_model.bin',
+        'config': 'config.json',
+        'tokenizer': 'tokenizer.json',
+        'tokenizer_config': 'tokenizer_config.json',
+        'special_tokens': 'special_tokens_map.json'
+    }
+    downloaded_files = {}
+    for name, filename in files.items():
+        print(f"  Downloading {filename}...")
+        file_path = hf_hub_download(repo_id=repo_id, filename=filename)
+        downloaded_files[name] = file_path
+        print(f"    ✅ Downloaded to: {file_path}")
+    return downloaded_files
+def inspect_model_config(config_path):
+    """Inspect the model configuration"""
+    print("\nModel Configuration:")
+    print("-" * 30)
+    with open(config_path, 'r') as f:
+        config = json.load(f)
+    print(f"Model Type: {config.get('model_type', 'unknown')}")
+    print(f"Architecture: {config.get('architectures', ['unknown'])[0]}")
+    print(f"Layers: {config.get('num_layers', 'unknown')}")
+    print(f"Embedding Dimension: {config.get('num_embd', 'unknown')}")
+    print(f"Attention Heads: {config.get('num_heads', 'unknown')}")
+    print(f"Max Position Embeddings: {config.get('max_positional_embedding_size', 'unknown')}")
+    print(f"Position Embedding Type: {config.get('position_embedding_type', 'unknown')}")
+    print(f"Use Control Codes: {config.get('use_control_codes', 'unknown')}")
+def inspect_model_weights(model_path):
+    """Inspect the model weights"""
+    print("\nModel Weights:")
+    print("-" * 30)
+    # Load model state dict
+    model_state = torch.load(model_path, map_location='cpu')
+    print(f"Number of parameters: {sum(p.numel() for p in model_state.values()):,}")
+    print(f"Number of layers: {len([k for k in model_state.keys() if 'layers' in k])}")
+    # Show some key parameters
+    print("\nKey parameters:")
+    for key in list(model_state.keys())[:10]:  # Show first 10 keys
+        shape = model_state[key].shape if hasattr(model_state[key], 'shape') else 'N/A'
+        print(f"  {key}: {shape}")
+    if len(model_state.keys()) > 10:
+        print(f"  ... and {len(model_state.keys()) - 10} more parameters")
+def inspect_tokenizer(tokenizer_config_path, special_tokens_path):
+    """Inspect the tokenizer configuration"""
+    print("\nTokenizer Configuration:")
+    print("-" * 30)
+    # Load tokenizer config
+    with open(tokenizer_config_path, 'r') as f:
+        tokenizer_config = json.load(f)
+    print(f"Tokenizer Class: {tokenizer_config.get('tokenizer_class', 'unknown')}")
+    print(f"Vocab Size: {tokenizer_config.get('vocab_size', 'unknown')}")
+    # Load special tokens
+    with open(special_tokens_path, 'r') as f:
+        special_tokens = json.load(f)
+    print(f"Special Tokens: {list(special_tokens.keys())}")
+    # Show token mappings
+    print("\nToken Mappings:")
+    for token, token_id in special_tokens.items():
+        print(f"  {token}: {token_id}")
+def demonstrate_basic_usage(model_path, config_path):
+    """Demonstrate basic usage of the model files"""
+    print("\nBasic Usage Example:")
+    print("-" * 30)
+    # Load configuration
+    with open(config_path, 'r') as f:
+        config = json.load(f)
+    # Load model weights
+    model_state = torch.load(model_path, map_location='cpu')
+    print("✅ Model files loaded successfully!")
+    print("\nTo use this model in your research:")
+    print("1. Implement the LOLEVEForCausalLM model class")
+    print("2. Load the model weights into your model instance")
+    print("3. Use the tokenizer for input preprocessing")
+    print("4. Run inference on your genomic sequences")
+    print(f"\nModel architecture details:")
+    print(f"- {config['num_layers']} transformer layers")
+    print(f"- {config['num_embd']} embedding dimensions")
+    print(f"- {config['num_heads']} attention heads")
+    print(f"- Max sequence length: {config['max_positional_embedding_size']}")
+def main():
+    """Main function"""
+    print("🧬 LOL-EVE Model Usage Example")
+    print("=" * 50)
+    try:
+        # Download model files
+        files = download_model_files()
+        # Inspect model configuration
+        inspect_model_config(files['config'])
+        # Inspect model weights
+        inspect_model_weights(files['model'])
+        # Inspect tokenizer
+        inspect_tokenizer(files['tokenizer_config'], files['special_tokens'])
+        # Demonstrate basic usage
+        demonstrate_basic_usage(files['model'], files['config'])
+        print("\n" + "=" * 50)
+        print("✅ Example completed successfully!")
+        print("The model files are ready for use in your research.")
+    except Exception as e:
+        print(f"\n❌ Error: {e}")
+        print("Please check your internet connection and try again.")
+        return 1
+    return 0
+if __name__ == "__main__":
+    exit(main())