Upload folder using huggingface_hub
Browse files- README.md +9 -0
- config.json +17 -2
- configuration_openelm.py +10 -0
- generation_config.json +6 -0
- modeling_openelm.py +5 -3
- test_proj/.gitignore +10 -0
- test_proj/.python-version +1 -0
- test_proj/README.md +0 -0
- test_proj/main.py +23 -0
- test_proj/pyproject.toml +14 -0
- test_proj/uv.lock +0 -0
README.md
CHANGED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apple-amlr
|
| 3 |
+
license_name: apple-sample-code-license
|
| 4 |
+
license_link: LICENSE
|
| 5 |
+
---
|
| 6 |
+
|
| 7 |
+
# OpenELM
|
| 8 |
+
|
| 9 |
+
|
config.json
CHANGED
|
@@ -8,6 +8,7 @@
|
|
| 8 |
"AutoModelForCausalLM": "modeling_openelm.OpenELMForCausalLM"
|
| 9 |
},
|
| 10 |
"bos_token_id": 1,
|
|
|
|
| 11 |
"eos_token_id": 2,
|
| 12 |
"ffn_dim_divisor": 256,
|
| 13 |
"ffn_multipliers": [
|
|
@@ -78,11 +79,25 @@
|
|
| 78 |
0.5,
|
| 79 |
1.0
|
| 80 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 81 |
"rope_freq_constant": 10000,
|
| 82 |
"rope_max_length": 4096,
|
| 83 |
"share_input_output_layers": true,
|
| 84 |
-
"
|
| 85 |
-
"transformers_version": "4.39.3",
|
| 86 |
"use_cache": true,
|
| 87 |
"vocab_size": 32000
|
| 88 |
}
|
|
|
|
| 8 |
"AutoModelForCausalLM": "modeling_openelm.OpenELMForCausalLM"
|
| 9 |
},
|
| 10 |
"bos_token_id": 1,
|
| 11 |
+
"dtype": "float16",
|
| 12 |
"eos_token_id": 2,
|
| 13 |
"ffn_dim_divisor": 256,
|
| 14 |
"ffn_multipliers": [
|
|
|
|
| 79 |
0.5,
|
| 80 |
1.0
|
| 81 |
],
|
| 82 |
+
"quantization_config": {
|
| 83 |
+
"_load_in_4bit": true,
|
| 84 |
+
"_load_in_8bit": false,
|
| 85 |
+
"bnb_4bit_compute_dtype": "bfloat16",
|
| 86 |
+
"bnb_4bit_quant_storage": "uint8",
|
| 87 |
+
"bnb_4bit_quant_type": "nf4",
|
| 88 |
+
"bnb_4bit_use_double_quant": true,
|
| 89 |
+
"llm_int8_enable_fp32_cpu_offload": false,
|
| 90 |
+
"llm_int8_has_fp16_weight": false,
|
| 91 |
+
"llm_int8_skip_modules": null,
|
| 92 |
+
"llm_int8_threshold": 6.0,
|
| 93 |
+
"load_in_4bit": true,
|
| 94 |
+
"load_in_8bit": false,
|
| 95 |
+
"quant_method": "bitsandbytes"
|
| 96 |
+
},
|
| 97 |
"rope_freq_constant": 10000,
|
| 98 |
"rope_max_length": 4096,
|
| 99 |
"share_input_output_layers": true,
|
| 100 |
+
"transformers_version": "4.57.3",
|
|
|
|
| 101 |
"use_cache": true,
|
| 102 |
"vocab_size": 32000
|
| 103 |
}
|
configuration_openelm.py
CHANGED
|
@@ -316,3 +316,13 @@ class OpenELMConfig(PretrainedConfig):
|
|
| 316 |
# check num_query_heads divisible by num_kv_heads for every layer
|
| 317 |
for layer_idx in range(len(query_dims)):
|
| 318 |
assert self.num_query_heads[layer_idx] % self.num_kv_heads[layer_idx] == 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
# check num_query_heads divisible by num_kv_heads for every layer
|
| 317 |
for layer_idx in range(len(query_dims)):
|
| 318 |
assert self.num_query_heads[layer_idx] % self.num_kv_heads[layer_idx] == 0
|
| 319 |
+
|
| 320 |
+
@property
|
| 321 |
+
def num_hidden_layers(self) -> int:
|
| 322 |
+
"""Alias for num_transformer_layers for compatibility with transformers library."""
|
| 323 |
+
return self.num_transformer_layers
|
| 324 |
+
|
| 325 |
+
@num_hidden_layers.setter
|
| 326 |
+
def num_hidden_layers(self, value: int) -> None:
|
| 327 |
+
"""Setter for num_hidden_layers that updates num_transformer_layers."""
|
| 328 |
+
self.num_transformer_layers = value
|
generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 1,
|
| 4 |
+
"eos_token_id": 2,
|
| 5 |
+
"transformers_version": "4.57.3"
|
| 6 |
+
}
|
modeling_openelm.py
CHANGED
|
@@ -661,7 +661,9 @@ class OpenELMModel(OpenELMPreTrainedModel):
|
|
| 661 |
|
| 662 |
past_seen_tokens = 0
|
| 663 |
if use_cache: # kept for BC (cache positions)
|
| 664 |
-
if
|
|
|
|
|
|
|
| 665 |
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
|
| 666 |
past_seen_tokens = past_key_values.get_seq_length()
|
| 667 |
|
|
@@ -920,8 +922,8 @@ class OpenELMForCausalLM(OpenELMPreTrainedModel):
|
|
| 920 |
if past_key_values is not None:
|
| 921 |
if isinstance(past_key_values, Cache):
|
| 922 |
cache_length = past_key_values.get_seq_length()
|
| 923 |
-
past_length =
|
| 924 |
-
max_cache_length = past_key_values
|
| 925 |
else:
|
| 926 |
cache_length = past_length = past_key_values[0][0].shape[2]
|
| 927 |
max_cache_length = None
|
|
|
|
| 661 |
|
| 662 |
past_seen_tokens = 0
|
| 663 |
if use_cache: # kept for BC (cache positions)
|
| 664 |
+
if past_key_values is None:
|
| 665 |
+
past_key_values = DynamicCache()
|
| 666 |
+
elif not isinstance(past_key_values, (StaticCache, DynamicCache)):
|
| 667 |
past_key_values = DynamicCache.from_legacy_cache(past_key_values)
|
| 668 |
past_seen_tokens = past_key_values.get_seq_length()
|
| 669 |
|
|
|
|
| 922 |
if past_key_values is not None:
|
| 923 |
if isinstance(past_key_values, Cache):
|
| 924 |
cache_length = past_key_values.get_seq_length()
|
| 925 |
+
past_length = cache_length
|
| 926 |
+
max_cache_length = getattr(past_key_values, 'get_max_length', lambda: None)()
|
| 927 |
else:
|
| 928 |
cache_length = past_length = past_key_values[0][0].shape[2]
|
| 929 |
max_cache_length = None
|
test_proj/.gitignore
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python-generated files
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[oc]
|
| 4 |
+
build/
|
| 5 |
+
dist/
|
| 6 |
+
wheels/
|
| 7 |
+
*.egg-info
|
| 8 |
+
|
| 9 |
+
# Virtual environments
|
| 10 |
+
.venv
|
test_proj/.python-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3.12
|
test_proj/README.md
ADDED
|
File without changes
|
test_proj/main.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 2 |
+
|
| 3 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 4 |
+
"Sharjeelbaig/apple-open_elm_4bit",
|
| 5 |
+
trust_remote_code=True,
|
| 6 |
+
)
|
| 7 |
+
|
| 8 |
+
tokenizer = AutoTokenizer.from_pretrained(
|
| 9 |
+
"NousResearch/Llama-2-7b-chat-hf",
|
| 10 |
+
use_fast=False
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
prompt = "Hello, how are you?"
|
| 14 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
| 15 |
+
|
| 16 |
+
outputs = model.generate(
|
| 17 |
+
**inputs,
|
| 18 |
+
max_new_tokens=50,
|
| 19 |
+
do_sample=True,
|
| 20 |
+
temperature=0.7,
|
| 21 |
+
)
|
| 22 |
+
|
| 23 |
+
print(tokenizer.decode(outputs[0], skip_special_tokens=True))
|
test_proj/pyproject.toml
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[project]
|
| 2 |
+
name = "test-proj"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = "Add your description here"
|
| 5 |
+
readme = "README.md"
|
| 6 |
+
requires-python = ">=3.12"
|
| 7 |
+
dependencies = [
|
| 8 |
+
"accelerate>=1.12.0",
|
| 9 |
+
"bitsandbytes>=0.49.0",
|
| 10 |
+
"protobuf>=6.33.2",
|
| 11 |
+
"sentencepiece>=0.2.1",
|
| 12 |
+
"torch>=2.9.1",
|
| 13 |
+
"transformers>=4.57.3",
|
| 14 |
+
]
|
test_proj/uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|