cshearer commited on
Commit
f7b65a5
·
verified ·
1 Parent(s): db70f7b

Updated README with better usage instructions and helper scripts

Browse files
Files changed (1) hide show
  1. usage_example.py +159 -0
usage_example.py ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ LOL-EVE Model Usage Example
4
+
5
+ This script demonstrates how to download and use the LOL-EVE model
6
+ from Hugging Face Hub.
7
+
8
+ Usage:
9
+ python usage_example.py
10
+ """
11
+
12
+ import torch
13
+ import json
14
+ import os
15
+ from huggingface_hub import hf_hub_download
16
+
17
+ def download_model_files():
18
+ """Download all necessary model files from Hugging Face Hub"""
19
+ print("Downloading LOL-EVE model files...")
20
+
21
+ repo_id = "Marks-lab/LOL-EVE"
22
+
23
+ files = {
24
+ 'model': 'pytorch_model.bin',
25
+ 'config': 'config.json',
26
+ 'tokenizer': 'tokenizer.json',
27
+ 'tokenizer_config': 'tokenizer_config.json',
28
+ 'special_tokens': 'special_tokens_map.json'
29
+ }
30
+
31
+ downloaded_files = {}
32
+
33
+ for name, filename in files.items():
34
+ print(f" Downloading {filename}...")
35
+ file_path = hf_hub_download(repo_id=repo_id, filename=filename)
36
+ downloaded_files[name] = file_path
37
+ print(f" ✅ Downloaded to: {file_path}")
38
+
39
+ return downloaded_files
40
+
41
+ def inspect_model_config(config_path):
42
+ """Inspect the model configuration"""
43
+ print("\nModel Configuration:")
44
+ print("-" * 30)
45
+
46
+ with open(config_path, 'r') as f:
47
+ config = json.load(f)
48
+
49
+ print(f"Model Type: {config.get('model_type', 'unknown')}")
50
+ print(f"Architecture: {config.get('architectures', ['unknown'])[0]}")
51
+ print(f"Layers: {config.get('num_layers', 'unknown')}")
52
+ print(f"Embedding Dimension: {config.get('num_embd', 'unknown')}")
53
+ print(f"Attention Heads: {config.get('num_heads', 'unknown')}")
54
+ print(f"Max Position Embeddings: {config.get('max_positional_embedding_size', 'unknown')}")
55
+ print(f"Position Embedding Type: {config.get('position_embedding_type', 'unknown')}")
56
+ print(f"Use Control Codes: {config.get('use_control_codes', 'unknown')}")
57
+
58
+ def inspect_model_weights(model_path):
59
+ """Inspect the model weights"""
60
+ print("\nModel Weights:")
61
+ print("-" * 30)
62
+
63
+ # Load model state dict
64
+ model_state = torch.load(model_path, map_location='cpu')
65
+
66
+ print(f"Number of parameters: {sum(p.numel() for p in model_state.values()):,}")
67
+ print(f"Number of layers: {len([k for k in model_state.keys() if 'layers' in k])}")
68
+
69
+ # Show some key parameters
70
+ print("\nKey parameters:")
71
+ for key in list(model_state.keys())[:10]: # Show first 10 keys
72
+ shape = model_state[key].shape if hasattr(model_state[key], 'shape') else 'N/A'
73
+ print(f" {key}: {shape}")
74
+
75
+ if len(model_state.keys()) > 10:
76
+ print(f" ... and {len(model_state.keys()) - 10} more parameters")
77
+
78
+ def inspect_tokenizer(tokenizer_config_path, special_tokens_path):
79
+ """Inspect the tokenizer configuration"""
80
+ print("\nTokenizer Configuration:")
81
+ print("-" * 30)
82
+
83
+ # Load tokenizer config
84
+ with open(tokenizer_config_path, 'r') as f:
85
+ tokenizer_config = json.load(f)
86
+
87
+ print(f"Tokenizer Class: {tokenizer_config.get('tokenizer_class', 'unknown')}")
88
+ print(f"Vocab Size: {tokenizer_config.get('vocab_size', 'unknown')}")
89
+
90
+ # Load special tokens
91
+ with open(special_tokens_path, 'r') as f:
92
+ special_tokens = json.load(f)
93
+
94
+ print(f"Special Tokens: {list(special_tokens.keys())}")
95
+
96
+ # Show token mappings
97
+ print("\nToken Mappings:")
98
+ for token, token_id in special_tokens.items():
99
+ print(f" {token}: {token_id}")
100
+
101
+ def demonstrate_basic_usage(model_path, config_path):
102
+ """Demonstrate basic usage of the model files"""
103
+ print("\nBasic Usage Example:")
104
+ print("-" * 30)
105
+
106
+ # Load configuration
107
+ with open(config_path, 'r') as f:
108
+ config = json.load(f)
109
+
110
+ # Load model weights
111
+ model_state = torch.load(model_path, map_location='cpu')
112
+
113
+ print("✅ Model files loaded successfully!")
114
+ print("\nTo use this model in your research:")
115
+ print("1. Implement the LOLEVEForCausalLM model class")
116
+ print("2. Load the model weights into your model instance")
117
+ print("3. Use the tokenizer for input preprocessing")
118
+ print("4. Run inference on your genomic sequences")
119
+
120
+ print(f"\nModel architecture details:")
121
+ print(f"- {config['num_layers']} transformer layers")
122
+ print(f"- {config['num_embd']} embedding dimensions")
123
+ print(f"- {config['num_heads']} attention heads")
124
+ print(f"- Max sequence length: {config['max_positional_embedding_size']}")
125
+
126
+ def main():
127
+ """Main function"""
128
+ print("🧬 LOL-EVE Model Usage Example")
129
+ print("=" * 50)
130
+
131
+ try:
132
+ # Download model files
133
+ files = download_model_files()
134
+
135
+ # Inspect model configuration
136
+ inspect_model_config(files['config'])
137
+
138
+ # Inspect model weights
139
+ inspect_model_weights(files['model'])
140
+
141
+ # Inspect tokenizer
142
+ inspect_tokenizer(files['tokenizer_config'], files['special_tokens'])
143
+
144
+ # Demonstrate basic usage
145
+ demonstrate_basic_usage(files['model'], files['config'])
146
+
147
+ print("\n" + "=" * 50)
148
+ print("✅ Example completed successfully!")
149
+ print("The model files are ready for use in your research.")
150
+
151
+ except Exception as e:
152
+ print(f"\n❌ Error: {e}")
153
+ print("Please check your internet connection and try again.")
154
+ return 1
155
+
156
+ return 0
157
+
158
+ if __name__ == "__main__":
159
+ exit(main())