Add tokenizer files and update documentation
Browse files- Added tokenizer.json, tokenizer_config.json, special_tokens_map.json, spiece.model
- Updated README with comprehensive tokenizer usage examples
- Updated config.json to include tokenizer file information
- Now provides complete package for text-to-text generation
- Track spiece.model with Git LFS
- .gitattributes +1 -0
- README.md +51 -11
- config.json +6 -0
- special_tokens_map.json +107 -0
- spiece.model +3 -0
- test_download/downloaded_models/.DS_Store +0 -0
- test_download/downloaded_models/.gitattributes +1 -0
- test_download/downloaded_models/README.md +162 -0
- test_download/downloaded_models/config.json +38 -0
- test_download/downloaded_models/flan_t5_base_decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- test_download/downloaded_models/flan_t5_base_decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- test_download/downloaded_models/flan_t5_base_decoder.mlpackage/Manifest.json +3 -0
- test_download/downloaded_models/flan_t5_base_encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
- test_download/downloaded_models/flan_t5_base_encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
- test_download/downloaded_models/flan_t5_base_encoder.mlpackage/Manifest.json +3 -0
- tokenizer.json +0 -0
- tokenizer_config.json +113 -0
.gitattributes
CHANGED
|
@@ -1 +1,2 @@
|
|
| 1 |
*.mlpackage/** filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 1 |
*.mlpackage/** filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
README.md
CHANGED
|
@@ -15,9 +15,16 @@ This repository contains CoreML versions of Google's FLAN-T5 Base model, optimiz
|
|
| 15 |
|
| 16 |
## Files
|
| 17 |
|
|
|
|
| 18 |
- `flan_t5_base_encoder.mlpackage` - T5 Encoder component
|
| 19 |
- `flan_t5_base_decoder.mlpackage` - T5 Decoder component
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
## Model Architecture
|
| 22 |
|
| 23 |
FLAN-T5 is an encoder-decoder transformer model that has been converted into two separate CoreML models:
|
|
@@ -83,38 +90,71 @@ let decoderOutput = try decoderModel.prediction(from: decoderInput)
|
|
| 83 |
let logits = decoderOutput.featureValue(for: "logits")?.multiArrayValue
|
| 84 |
```
|
| 85 |
|
| 86 |
-
### Python Usage
|
| 87 |
|
| 88 |
```python
|
| 89 |
import coremltools as ct
|
| 90 |
import numpy as np
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
-
# Load models
|
| 93 |
encoder = ct.models.MLModel("flan_t5_base_encoder.mlpackage")
|
| 94 |
decoder = ct.models.MLModel("flan_t5_base_decoder.mlpackage")
|
| 95 |
|
| 96 |
-
# Example
|
| 97 |
-
|
| 98 |
-
|
|
|
|
| 99 |
|
| 100 |
# Run encoder
|
| 101 |
encoder_output = encoder.predict({
|
| 102 |
-
"input_ids": input_ids,
|
| 103 |
-
"attention_mask": attention_mask
|
| 104 |
})
|
| 105 |
hidden_states = encoder_output["hidden_states"]
|
| 106 |
|
| 107 |
-
#
|
| 108 |
-
decoder_input_ids = np.
|
| 109 |
-
|
|
|
|
|
|
|
| 110 |
|
|
|
|
| 111 |
decoder_output = decoder.predict({
|
| 112 |
"decoder_input_ids": decoder_input_ids,
|
| 113 |
"encoder_hidden_states": hidden_states,
|
| 114 |
"decoder_attention_mask": decoder_attention_mask,
|
| 115 |
-
"encoder_attention_mask": attention_mask
|
| 116 |
})
|
| 117 |
logits = decoder_output["logits"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
```
|
| 119 |
|
| 120 |
## Model Capabilities
|
|
|
|
| 15 |
|
| 16 |
## Files
|
| 17 |
|
| 18 |
+
### Model Files
|
| 19 |
- `flan_t5_base_encoder.mlpackage` - T5 Encoder component
|
| 20 |
- `flan_t5_base_decoder.mlpackage` - T5 Decoder component
|
| 21 |
|
| 22 |
+
### Tokenizer Files
|
| 23 |
+
- `tokenizer.json` - Fast tokenizer configuration
|
| 24 |
+
- `tokenizer_config.json` - Tokenizer metadata and settings
|
| 25 |
+
- `special_tokens_map.json` - Special token mappings
|
| 26 |
+
- `spiece.model` - SentencePiece model for tokenization
|
| 27 |
+
|
| 28 |
## Model Architecture
|
| 29 |
|
| 30 |
FLAN-T5 is an encoder-decoder transformer model that has been converted into two separate CoreML models:
|
|
|
|
| 90 |
let logits = decoderOutput.featureValue(for: "logits")?.multiArrayValue
|
| 91 |
```
|
| 92 |
|
| 93 |
+
### Python Usage with Tokenizer
|
| 94 |
|
| 95 |
```python
|
| 96 |
import coremltools as ct
|
| 97 |
import numpy as np
|
| 98 |
+
from transformers import T5Tokenizer
|
| 99 |
+
|
| 100 |
+
# Load tokenizer
|
| 101 |
+
tokenizer = T5Tokenizer.from_pretrained("./") # Uses local tokenizer files
|
| 102 |
|
| 103 |
+
# Load CoreML models
|
| 104 |
encoder = ct.models.MLModel("flan_t5_base_encoder.mlpackage")
|
| 105 |
decoder = ct.models.MLModel("flan_t5_base_decoder.mlpackage")
|
| 106 |
|
| 107 |
+
# Example text-to-text generation
|
| 108 |
+
input_text = "translate English to French: Hello, how are you?"
|
| 109 |
+
inputs = tokenizer(input_text, return_tensors="np", padding="max_length",
|
| 110 |
+
truncation=True, max_length=512)
|
| 111 |
|
| 112 |
# Run encoder
|
| 113 |
encoder_output = encoder.predict({
|
| 114 |
+
"input_ids": inputs["input_ids"].astype(np.int32),
|
| 115 |
+
"attention_mask": inputs["attention_mask"].astype(np.int32)
|
| 116 |
})
|
| 117 |
hidden_states = encoder_output["hidden_states"]
|
| 118 |
|
| 119 |
+
# For decoder, start with decoder start token
|
| 120 |
+
decoder_input_ids = np.array([[tokenizer.pad_token_id]], dtype=np.int32)
|
| 121 |
+
decoder_input_ids = np.pad(decoder_input_ids, ((0, 0), (0, 511)),
|
| 122 |
+
constant_values=tokenizer.pad_token_id).astype(np.int32)
|
| 123 |
+
decoder_attention_mask = (decoder_input_ids != tokenizer.pad_token_id).astype(np.int32)
|
| 124 |
|
| 125 |
+
# Run decoder
|
| 126 |
decoder_output = decoder.predict({
|
| 127 |
"decoder_input_ids": decoder_input_ids,
|
| 128 |
"encoder_hidden_states": hidden_states,
|
| 129 |
"decoder_attention_mask": decoder_attention_mask,
|
| 130 |
+
"encoder_attention_mask": inputs["attention_mask"].astype(np.int32)
|
| 131 |
})
|
| 132 |
logits = decoder_output["logits"]
|
| 133 |
+
|
| 134 |
+
# Get predicted token (greedy decoding)
|
| 135 |
+
predicted_token_id = np.argmax(logits[0, 0, :])
|
| 136 |
+
print(f"Predicted token: {tokenizer.decode([predicted_token_id])}")
|
| 137 |
+
```
|
| 138 |
+
|
| 139 |
+
### Basic Tokenizer Usage
|
| 140 |
+
|
| 141 |
+
```python
|
| 142 |
+
from transformers import T5Tokenizer
|
| 143 |
+
|
| 144 |
+
# Load tokenizer from downloaded files
|
| 145 |
+
tokenizer = T5Tokenizer.from_pretrained("./")
|
| 146 |
+
|
| 147 |
+
# Tokenize text
|
| 148 |
+
text = "summarize: The quick brown fox jumps over the lazy dog."
|
| 149 |
+
tokens = tokenizer(text, return_tensors="np", padding="max_length",
|
| 150 |
+
truncation=True, max_length=512)
|
| 151 |
+
|
| 152 |
+
print(f"Input IDs shape: {tokens['input_ids'].shape}")
|
| 153 |
+
print(f"Attention mask shape: {tokens['attention_mask'].shape}")
|
| 154 |
+
|
| 155 |
+
# Decode tokens back to text
|
| 156 |
+
decoded = tokenizer.decode(tokens['input_ids'][0], skip_special_tokens=True)
|
| 157 |
+
print(f"Decoded: {decoded}")
|
| 158 |
```
|
| 159 |
|
| 160 |
## Model Capabilities
|
config.json
CHANGED
|
@@ -34,5 +34,11 @@
|
|
| 34 |
"model_files": {
|
| 35 |
"encoder": "flan_t5_base_encoder.mlpackage",
|
| 36 |
"decoder": "flan_t5_base_decoder.mlpackage"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
}
|
| 38 |
}
|
|
|
|
| 34 |
"model_files": {
|
| 35 |
"encoder": "flan_t5_base_encoder.mlpackage",
|
| 36 |
"decoder": "flan_t5_base_decoder.mlpackage"
|
| 37 |
+
},
|
| 38 |
+
"tokenizer_files": {
|
| 39 |
+
"tokenizer": "tokenizer.json",
|
| 40 |
+
"tokenizer_config": "tokenizer_config.json",
|
| 41 |
+
"special_tokens_map": "special_tokens_map.json",
|
| 42 |
+
"spiece_model": "spiece.model"
|
| 43 |
}
|
| 44 |
}
|
special_tokens_map.json
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<extra_id_0>",
|
| 4 |
+
"<extra_id_1>",
|
| 5 |
+
"<extra_id_2>",
|
| 6 |
+
"<extra_id_3>",
|
| 7 |
+
"<extra_id_4>",
|
| 8 |
+
"<extra_id_5>",
|
| 9 |
+
"<extra_id_6>",
|
| 10 |
+
"<extra_id_7>",
|
| 11 |
+
"<extra_id_8>",
|
| 12 |
+
"<extra_id_9>",
|
| 13 |
+
"<extra_id_10>",
|
| 14 |
+
"<extra_id_11>",
|
| 15 |
+
"<extra_id_12>",
|
| 16 |
+
"<extra_id_13>",
|
| 17 |
+
"<extra_id_14>",
|
| 18 |
+
"<extra_id_15>",
|
| 19 |
+
"<extra_id_16>",
|
| 20 |
+
"<extra_id_17>",
|
| 21 |
+
"<extra_id_18>",
|
| 22 |
+
"<extra_id_19>",
|
| 23 |
+
"<extra_id_20>",
|
| 24 |
+
"<extra_id_21>",
|
| 25 |
+
"<extra_id_22>",
|
| 26 |
+
"<extra_id_23>",
|
| 27 |
+
"<extra_id_24>",
|
| 28 |
+
"<extra_id_25>",
|
| 29 |
+
"<extra_id_26>",
|
| 30 |
+
"<extra_id_27>",
|
| 31 |
+
"<extra_id_28>",
|
| 32 |
+
"<extra_id_29>",
|
| 33 |
+
"<extra_id_30>",
|
| 34 |
+
"<extra_id_31>",
|
| 35 |
+
"<extra_id_32>",
|
| 36 |
+
"<extra_id_33>",
|
| 37 |
+
"<extra_id_34>",
|
| 38 |
+
"<extra_id_35>",
|
| 39 |
+
"<extra_id_36>",
|
| 40 |
+
"<extra_id_37>",
|
| 41 |
+
"<extra_id_38>",
|
| 42 |
+
"<extra_id_39>",
|
| 43 |
+
"<extra_id_40>",
|
| 44 |
+
"<extra_id_41>",
|
| 45 |
+
"<extra_id_42>",
|
| 46 |
+
"<extra_id_43>",
|
| 47 |
+
"<extra_id_44>",
|
| 48 |
+
"<extra_id_45>",
|
| 49 |
+
"<extra_id_46>",
|
| 50 |
+
"<extra_id_47>",
|
| 51 |
+
"<extra_id_48>",
|
| 52 |
+
"<extra_id_49>",
|
| 53 |
+
"<extra_id_50>",
|
| 54 |
+
"<extra_id_51>",
|
| 55 |
+
"<extra_id_52>",
|
| 56 |
+
"<extra_id_53>",
|
| 57 |
+
"<extra_id_54>",
|
| 58 |
+
"<extra_id_55>",
|
| 59 |
+
"<extra_id_56>",
|
| 60 |
+
"<extra_id_57>",
|
| 61 |
+
"<extra_id_58>",
|
| 62 |
+
"<extra_id_59>",
|
| 63 |
+
"<extra_id_60>",
|
| 64 |
+
"<extra_id_61>",
|
| 65 |
+
"<extra_id_62>",
|
| 66 |
+
"<extra_id_63>",
|
| 67 |
+
"<extra_id_64>",
|
| 68 |
+
"<extra_id_65>",
|
| 69 |
+
"<extra_id_66>",
|
| 70 |
+
"<extra_id_67>",
|
| 71 |
+
"<extra_id_68>",
|
| 72 |
+
"<extra_id_69>",
|
| 73 |
+
"<extra_id_70>",
|
| 74 |
+
"<extra_id_71>",
|
| 75 |
+
"<extra_id_72>",
|
| 76 |
+
"<extra_id_73>",
|
| 77 |
+
"<extra_id_74>",
|
| 78 |
+
"<extra_id_75>",
|
| 79 |
+
"<extra_id_76>",
|
| 80 |
+
"<extra_id_77>",
|
| 81 |
+
"<extra_id_78>",
|
| 82 |
+
"<extra_id_79>",
|
| 83 |
+
"<extra_id_80>",
|
| 84 |
+
"<extra_id_81>",
|
| 85 |
+
"<extra_id_82>",
|
| 86 |
+
"<extra_id_83>",
|
| 87 |
+
"<extra_id_84>",
|
| 88 |
+
"<extra_id_85>",
|
| 89 |
+
"<extra_id_86>",
|
| 90 |
+
"<extra_id_87>",
|
| 91 |
+
"<extra_id_88>",
|
| 92 |
+
"<extra_id_89>",
|
| 93 |
+
"<extra_id_90>",
|
| 94 |
+
"<extra_id_91>",
|
| 95 |
+
"<extra_id_92>",
|
| 96 |
+
"<extra_id_93>",
|
| 97 |
+
"<extra_id_94>",
|
| 98 |
+
"<extra_id_95>",
|
| 99 |
+
"<extra_id_96>",
|
| 100 |
+
"<extra_id_97>",
|
| 101 |
+
"<extra_id_98>",
|
| 102 |
+
"<extra_id_99>"
|
| 103 |
+
],
|
| 104 |
+
"eos_token": "</s>",
|
| 105 |
+
"pad_token": "<pad>",
|
| 106 |
+
"unk_token": "<unk>"
|
| 107 |
+
}
|
spiece.model
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d60acb128cf7b7f2536e8f38a5b18a05535c9e14c7a355904270e15b0945ea86
|
| 3 |
+
size 791656
|
test_download/downloaded_models/.DS_Store
ADDED
|
Binary file (6.15 kB). View file
|
|
|
test_download/downloaded_models/.gitattributes
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
*.mlpackage/** filter=lfs diff=lfs merge=lfs -text
|
test_download/downloaded_models/README.md
ADDED
|
@@ -0,0 +1,162 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# FLAN-T5 Base CoreML Models
|
| 2 |
+
|
| 3 |
+
This repository contains CoreML versions of Google's FLAN-T5 Base model, optimized for efficient inference on Apple devices (macOS/iOS).
|
| 4 |
+
|
| 5 |
+
## Model Details
|
| 6 |
+
|
| 7 |
+
- **Base Model**: [google/flan-t5-base](https://huggingface.co/google/flan-t5-base)
|
| 8 |
+
- **Architecture**: T5 (Text-to-Text Transfer Transformer)
|
| 9 |
+
- **Model Size**:
|
| 10 |
+
- Encoder: 215MB
|
| 11 |
+
- Decoder: 324MB
|
| 12 |
+
- **Framework**: CoreML (.mlpackage format)
|
| 13 |
+
- **Precision**: FP16 for optimal performance
|
| 14 |
+
- **Deployment Target**: iOS 15+ / macOS 12+
|
| 15 |
+
|
| 16 |
+
## Files
|
| 17 |
+
|
| 18 |
+
- `flan_t5_base_encoder.mlpackage` - T5 Encoder component
|
| 19 |
+
- `flan_t5_base_decoder.mlpackage` - T5 Decoder component
|
| 20 |
+
|
| 21 |
+
## Model Architecture
|
| 22 |
+
|
| 23 |
+
FLAN-T5 is an encoder-decoder transformer model that has been converted into two separate CoreML models:
|
| 24 |
+
|
| 25 |
+
### Encoder
|
| 26 |
+
- **Input**: `input_ids` (shape: [1, 512], dtype: int32), `attention_mask` (shape: [1, 512], dtype: int32)
|
| 27 |
+
- **Output**: `hidden_states` (shape: [1, 512, 768], dtype: float32)
|
| 28 |
+
|
| 29 |
+
### Decoder
|
| 30 |
+
- **Inputs**:
|
| 31 |
+
- `decoder_input_ids` (shape: [1, 512], dtype: int32)
|
| 32 |
+
- `encoder_hidden_states` (shape: [1, 512, 768], dtype: float32)
|
| 33 |
+
- `decoder_attention_mask` (shape: [1, 512], dtype: int32)
|
| 34 |
+
- `encoder_attention_mask` (shape: [1, 512], dtype: int32)
|
| 35 |
+
- **Output**: `logits` (shape: [1, 512, 32128], dtype: float32)
|
| 36 |
+
|
| 37 |
+
## Usage
|
| 38 |
+
|
| 39 |
+
### Download Models
|
| 40 |
+
|
| 41 |
+
```bash
|
| 42 |
+
# Download both models
|
| 43 |
+
huggingface-cli download mazhewitt/flan-t5-base-coreml --local-dir ./models
|
| 44 |
+
|
| 45 |
+
# Or download individual models
|
| 46 |
+
huggingface-cli download mazhewitt/flan-t5-base-coreml flan_t5_base_encoder.mlpackage --local-dir ./models
|
| 47 |
+
huggingface-cli download mazhewitt/flan-t5-base-coreml flan_t5_base_decoder.mlpackage --local-dir ./models
|
| 48 |
+
```
|
| 49 |
+
|
| 50 |
+
### Swift/iOS Usage
|
| 51 |
+
|
| 52 |
+
```swift
|
| 53 |
+
import CoreML
|
| 54 |
+
|
| 55 |
+
// Load models
|
| 56 |
+
guard let encoderURL = Bundle.main.url(forResource: "flan_t5_base_encoder", withExtension: "mlpackage"),
|
| 57 |
+
let decoderURL = Bundle.main.url(forResource: "flan_t5_base_decoder", withExtension: "mlpackage") else {
|
| 58 |
+
fatalError("Models not found")
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
let encoderModel = try MLModel(contentsOf: encoderURL)
|
| 62 |
+
let decoderModel = try MLModel(contentsOf: decoderURL)
|
| 63 |
+
|
| 64 |
+
// Create prediction inputs
|
| 65 |
+
let encoderInput = try MLDictionaryFeatureProvider(dictionary: [
|
| 66 |
+
"input_ids": MLMultiArray(/* your input tokens */),
|
| 67 |
+
"attention_mask": MLMultiArray(/* your attention mask */)
|
| 68 |
+
])
|
| 69 |
+
|
| 70 |
+
// Run encoder
|
| 71 |
+
let encoderOutput = try encoderModel.prediction(from: encoderInput)
|
| 72 |
+
let hiddenStates = encoderOutput.featureValue(for: "hidden_states")?.multiArrayValue
|
| 73 |
+
|
| 74 |
+
// Run decoder with encoder outputs
|
| 75 |
+
let decoderInput = try MLDictionaryFeatureProvider(dictionary: [
|
| 76 |
+
"decoder_input_ids": MLMultiArray(/* decoder input tokens */),
|
| 77 |
+
"encoder_hidden_states": hiddenStates!,
|
| 78 |
+
"decoder_attention_mask": MLMultiArray(/* decoder attention mask */),
|
| 79 |
+
"encoder_attention_mask": MLMultiArray(/* encoder attention mask */)
|
| 80 |
+
])
|
| 81 |
+
|
| 82 |
+
let decoderOutput = try decoderModel.prediction(from: decoderInput)
|
| 83 |
+
let logits = decoderOutput.featureValue(for: "logits")?.multiArrayValue
|
| 84 |
+
```
|
| 85 |
+
|
| 86 |
+
### Python Usage (for testing)
|
| 87 |
+
|
| 88 |
+
```python
|
| 89 |
+
import coremltools as ct
|
| 90 |
+
import numpy as np
|
| 91 |
+
|
| 92 |
+
# Load models
|
| 93 |
+
encoder = ct.models.MLModel("flan_t5_base_encoder.mlpackage")
|
| 94 |
+
decoder = ct.models.MLModel("flan_t5_base_decoder.mlpackage")
|
| 95 |
+
|
| 96 |
+
# Example inference
|
| 97 |
+
input_ids = np.random.randint(0, 32128, (1, 512)).astype(np.int32)
|
| 98 |
+
attention_mask = np.ones((1, 512), dtype=np.int32)
|
| 99 |
+
|
| 100 |
+
# Run encoder
|
| 101 |
+
encoder_output = encoder.predict({
|
| 102 |
+
"input_ids": input_ids,
|
| 103 |
+
"attention_mask": attention_mask
|
| 104 |
+
})
|
| 105 |
+
hidden_states = encoder_output["hidden_states"]
|
| 106 |
+
|
| 107 |
+
# Run decoder
|
| 108 |
+
decoder_input_ids = np.random.randint(0, 32128, (1, 512)).astype(np.int32)
|
| 109 |
+
decoder_attention_mask = np.ones((1, 512), dtype=np.int32)
|
| 110 |
+
|
| 111 |
+
decoder_output = decoder.predict({
|
| 112 |
+
"decoder_input_ids": decoder_input_ids,
|
| 113 |
+
"encoder_hidden_states": hidden_states,
|
| 114 |
+
"decoder_attention_mask": decoder_attention_mask,
|
| 115 |
+
"encoder_attention_mask": attention_mask
|
| 116 |
+
})
|
| 117 |
+
logits = decoder_output["logits"]
|
| 118 |
+
```
|
| 119 |
+
|
| 120 |
+
## Model Capabilities
|
| 121 |
+
|
| 122 |
+
FLAN-T5 has been instruction-tuned and can perform various text-to-text tasks:
|
| 123 |
+
|
| 124 |
+
- **Text Summarization**: "summarize: [text]"
|
| 125 |
+
- **Translation**: "translate English to French: [text]"
|
| 126 |
+
- **Question Answering**: "answer the question: [question] context: [context]"
|
| 127 |
+
- **General Instructions**: Direct natural language instructions
|
| 128 |
+
|
| 129 |
+
## Performance Considerations
|
| 130 |
+
|
| 131 |
+
- **Memory**: Encoder (~215MB) + Decoder (~324MB) = ~539MB total
|
| 132 |
+
- **Precision**: FP16 for balance of accuracy and performance
|
| 133 |
+
- **Sequence Length**: Maximum 512 tokens
|
| 134 |
+
- **Device Compatibility**: Apple Neural Engine, GPU, or CPU depending on availability
|
| 135 |
+
|
| 136 |
+
## Conversion Details
|
| 137 |
+
|
| 138 |
+
- **Source Framework**: PyTorch/Transformers
|
| 139 |
+
- **Conversion Tool**: CoreML Tools 8.3.0
|
| 140 |
+
- **Date**: July 2025
|
| 141 |
+
- **Torch Version**: 2.7.1 (with compatibility warnings handled)
|
| 142 |
+
|
| 143 |
+
## License
|
| 144 |
+
|
| 145 |
+
This model follows the same license as the original FLAN-T5 model. Please refer to the [original model card](https://huggingface.co/google/flan-t5-base) for licensing details.
|
| 146 |
+
|
| 147 |
+
## Citation
|
| 148 |
+
|
| 149 |
+
If you use these models, please cite the original FLAN-T5 paper:
|
| 150 |
+
|
| 151 |
+
```bibtex
|
| 152 |
+
@article{chung2022scaling,
|
| 153 |
+
title={Scaling instruction-finetuned language models},
|
| 154 |
+
author={Chung, Hyung Won and Hou, Le and Longpre, Shayne and Zoph, Barret and Tay, Yi and Fedus, William and Li, Eric and Wang, Xuezhi and Mostafazadeh, Nasrin and Shen, Jianmo and others},
|
| 155 |
+
journal={arXiv preprint arXiv:2210.11416},
|
| 156 |
+
year={2022}
|
| 157 |
+
}
|
| 158 |
+
```
|
| 159 |
+
|
| 160 |
+
## Issues and Support
|
| 161 |
+
|
| 162 |
+
For issues specific to these CoreML conversions, please open an issue in this repository. For general FLAN-T5 questions, refer to the original model repository.
|
test_download/downloaded_models/config.json
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_type": "t5",
|
| 3 |
+
"framework": "coreml",
|
| 4 |
+
"base_model": "google/flan-t5-base",
|
| 5 |
+
"conversion_info": {
|
| 6 |
+
"converted_by": "coremltools",
|
| 7 |
+
"version": "8.3.0",
|
| 8 |
+
"date": "2025-07-20",
|
| 9 |
+
"precision": "float16",
|
| 10 |
+
"deployment_target": "iOS15"
|
| 11 |
+
},
|
| 12 |
+
"architecture": {
|
| 13 |
+
"encoder": {
|
| 14 |
+
"input_shape": {
|
| 15 |
+
"input_ids": [1, 512],
|
| 16 |
+
"attention_mask": [1, 512]
|
| 17 |
+
},
|
| 18 |
+
"output_shape": {
|
| 19 |
+
"hidden_states": [1, 512, 768]
|
| 20 |
+
}
|
| 21 |
+
},
|
| 22 |
+
"decoder": {
|
| 23 |
+
"input_shape": {
|
| 24 |
+
"decoder_input_ids": [1, 512],
|
| 25 |
+
"encoder_hidden_states": [1, 512, 768],
|
| 26 |
+
"decoder_attention_mask": [1, 512],
|
| 27 |
+
"encoder_attention_mask": [1, 512]
|
| 28 |
+
},
|
| 29 |
+
"output_shape": {
|
| 30 |
+
"logits": [1, 512, 32128]
|
| 31 |
+
}
|
| 32 |
+
}
|
| 33 |
+
},
|
| 34 |
+
"model_files": {
|
| 35 |
+
"encoder": "flan_t5_base_encoder.mlpackage",
|
| 36 |
+
"decoder": "flan_t5_base_decoder.mlpackage"
|
| 37 |
+
}
|
| 38 |
+
}
|
test_download/downloaded_models/flan_t5_base_decoder.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:91e68f516139c4694bf48b4ad7d9478899ef060ba23ae5230fb7ea0ccc7925f6
|
| 3 |
+
size 1011351
|
test_download/downloaded_models/flan_t5_base_decoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:89887d8432bddf1353aba61261a58f4187bca6687d064422cf85f4e056b208e0
|
| 3 |
+
size 338959296
|
test_download/downloaded_models/flan_t5_base_decoder.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:171ccb76696a53e4566d20ac21d5a4f2e61ec8c1e158ff72a170523d22a79064
|
| 3 |
+
size 617
|
test_download/downloaded_models/flan_t5_base_encoder.mlpackage/Data/com.apple.CoreML/model.mlmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4c30d7f442e7a938d05dd29e63e938ea2b72fe84748bcb82440e8ad658996a9b
|
| 3 |
+
size 144373
|
test_download/downloaded_models/flan_t5_base_encoder.mlpackage/Data/com.apple.CoreML/weights/weight.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7c2977bd6f2cb06e53067c0012b9944f44ef425f4a55e5a133fae9ca74c32876
|
| 3 |
+
size 225560704
|
test_download/downloaded_models/flan_t5_base_encoder.mlpackage/Manifest.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2221222f34d57f6aa55cdacc2f6cc1bfdd5e190fb0fcc0227c77ce06ccc40658
|
| 3 |
+
size 617
|
tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"additional_special_tokens": [
|
| 3 |
+
"<extra_id_0>",
|
| 4 |
+
"<extra_id_1>",
|
| 5 |
+
"<extra_id_2>",
|
| 6 |
+
"<extra_id_3>",
|
| 7 |
+
"<extra_id_4>",
|
| 8 |
+
"<extra_id_5>",
|
| 9 |
+
"<extra_id_6>",
|
| 10 |
+
"<extra_id_7>",
|
| 11 |
+
"<extra_id_8>",
|
| 12 |
+
"<extra_id_9>",
|
| 13 |
+
"<extra_id_10>",
|
| 14 |
+
"<extra_id_11>",
|
| 15 |
+
"<extra_id_12>",
|
| 16 |
+
"<extra_id_13>",
|
| 17 |
+
"<extra_id_14>",
|
| 18 |
+
"<extra_id_15>",
|
| 19 |
+
"<extra_id_16>",
|
| 20 |
+
"<extra_id_17>",
|
| 21 |
+
"<extra_id_18>",
|
| 22 |
+
"<extra_id_19>",
|
| 23 |
+
"<extra_id_20>",
|
| 24 |
+
"<extra_id_21>",
|
| 25 |
+
"<extra_id_22>",
|
| 26 |
+
"<extra_id_23>",
|
| 27 |
+
"<extra_id_24>",
|
| 28 |
+
"<extra_id_25>",
|
| 29 |
+
"<extra_id_26>",
|
| 30 |
+
"<extra_id_27>",
|
| 31 |
+
"<extra_id_28>",
|
| 32 |
+
"<extra_id_29>",
|
| 33 |
+
"<extra_id_30>",
|
| 34 |
+
"<extra_id_31>",
|
| 35 |
+
"<extra_id_32>",
|
| 36 |
+
"<extra_id_33>",
|
| 37 |
+
"<extra_id_34>",
|
| 38 |
+
"<extra_id_35>",
|
| 39 |
+
"<extra_id_36>",
|
| 40 |
+
"<extra_id_37>",
|
| 41 |
+
"<extra_id_38>",
|
| 42 |
+
"<extra_id_39>",
|
| 43 |
+
"<extra_id_40>",
|
| 44 |
+
"<extra_id_41>",
|
| 45 |
+
"<extra_id_42>",
|
| 46 |
+
"<extra_id_43>",
|
| 47 |
+
"<extra_id_44>",
|
| 48 |
+
"<extra_id_45>",
|
| 49 |
+
"<extra_id_46>",
|
| 50 |
+
"<extra_id_47>",
|
| 51 |
+
"<extra_id_48>",
|
| 52 |
+
"<extra_id_49>",
|
| 53 |
+
"<extra_id_50>",
|
| 54 |
+
"<extra_id_51>",
|
| 55 |
+
"<extra_id_52>",
|
| 56 |
+
"<extra_id_53>",
|
| 57 |
+
"<extra_id_54>",
|
| 58 |
+
"<extra_id_55>",
|
| 59 |
+
"<extra_id_56>",
|
| 60 |
+
"<extra_id_57>",
|
| 61 |
+
"<extra_id_58>",
|
| 62 |
+
"<extra_id_59>",
|
| 63 |
+
"<extra_id_60>",
|
| 64 |
+
"<extra_id_61>",
|
| 65 |
+
"<extra_id_62>",
|
| 66 |
+
"<extra_id_63>",
|
| 67 |
+
"<extra_id_64>",
|
| 68 |
+
"<extra_id_65>",
|
| 69 |
+
"<extra_id_66>",
|
| 70 |
+
"<extra_id_67>",
|
| 71 |
+
"<extra_id_68>",
|
| 72 |
+
"<extra_id_69>",
|
| 73 |
+
"<extra_id_70>",
|
| 74 |
+
"<extra_id_71>",
|
| 75 |
+
"<extra_id_72>",
|
| 76 |
+
"<extra_id_73>",
|
| 77 |
+
"<extra_id_74>",
|
| 78 |
+
"<extra_id_75>",
|
| 79 |
+
"<extra_id_76>",
|
| 80 |
+
"<extra_id_77>",
|
| 81 |
+
"<extra_id_78>",
|
| 82 |
+
"<extra_id_79>",
|
| 83 |
+
"<extra_id_80>",
|
| 84 |
+
"<extra_id_81>",
|
| 85 |
+
"<extra_id_82>",
|
| 86 |
+
"<extra_id_83>",
|
| 87 |
+
"<extra_id_84>",
|
| 88 |
+
"<extra_id_85>",
|
| 89 |
+
"<extra_id_86>",
|
| 90 |
+
"<extra_id_87>",
|
| 91 |
+
"<extra_id_88>",
|
| 92 |
+
"<extra_id_89>",
|
| 93 |
+
"<extra_id_90>",
|
| 94 |
+
"<extra_id_91>",
|
| 95 |
+
"<extra_id_92>",
|
| 96 |
+
"<extra_id_93>",
|
| 97 |
+
"<extra_id_94>",
|
| 98 |
+
"<extra_id_95>",
|
| 99 |
+
"<extra_id_96>",
|
| 100 |
+
"<extra_id_97>",
|
| 101 |
+
"<extra_id_98>",
|
| 102 |
+
"<extra_id_99>"
|
| 103 |
+
],
|
| 104 |
+
"eos_token": "</s>",
|
| 105 |
+
"extra_ids": 100,
|
| 106 |
+
"model_max_length": 512,
|
| 107 |
+
"name_or_path": "google/t5-v1_1-base",
|
| 108 |
+
"pad_token": "<pad>",
|
| 109 |
+
"sp_model_kwargs": {},
|
| 110 |
+
"special_tokens_map_file": "/home/younes_huggingface_co/.cache/huggingface/hub/models--google--t5-v1_1-base/snapshots/650d7745bf1e502d6949b22cc19155cd656d3d4e/special_tokens_map.json",
|
| 111 |
+
"tokenizer_class": "T5Tokenizer",
|
| 112 |
+
"unk_token": "<unk>"
|
| 113 |
+
}
|