Jinendra commited on
Commit
9f92f08
·
verified ·
1 Parent(s): 85dd72d

Upload 4 files

Browse files
Files changed (4) hide show
  1. config.json +69 -0
  2. convert_to_safetensor.py +103 -0
  3. generation_config.json +8 -0
  4. merges.txt +0 -0
config.json ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Salesforce/codet5-base",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "classifier_dropout": 0.0,
8
+ "d_ff": 3072,
9
+ "d_kv": 64,
10
+ "d_model": 768,
11
+ "decoder_start_token_id": 0,
12
+ "dense_act_fn": "relu",
13
+ "dropout_rate": 0.1,
14
+ "eos_token_id": 2,
15
+ "feed_forward_proj": "relu",
16
+ "gradient_checkpointing": false,
17
+ "id2label": {
18
+ "0": "LABEL_0"
19
+ },
20
+ "initializer_factor": 1.0,
21
+ "is_encoder_decoder": true,
22
+ "is_gated_act": false,
23
+ "label2id": {
24
+ "LABEL_0": 0
25
+ },
26
+ "layer_norm_epsilon": 1e-06,
27
+ "model_type": "t5",
28
+ "n_positions": 512,
29
+ "num_decoder_layers": 12,
30
+ "num_heads": 12,
31
+ "num_layers": 12,
32
+ "output_past": true,
33
+ "pad_token_id": 0,
34
+ "relative_attention_max_distance": 128,
35
+ "relative_attention_num_buckets": 32,
36
+ "task_specific_params": {
37
+ "summarization": {
38
+ "early_stopping": true,
39
+ "length_penalty": 2.0,
40
+ "max_length": 200,
41
+ "min_length": 30,
42
+ "no_repeat_ngram_size": 3,
43
+ "num_beams": 4,
44
+ "prefix": "summarize: "
45
+ },
46
+ "translation_en_to_de": {
47
+ "early_stopping": true,
48
+ "max_length": 300,
49
+ "num_beams": 4,
50
+ "prefix": "translate English to German: "
51
+ },
52
+ "translation_en_to_fr": {
53
+ "early_stopping": true,
54
+ "max_length": 300,
55
+ "num_beams": 4,
56
+ "prefix": "translate English to French: "
57
+ },
58
+ "translation_en_to_ro": {
59
+ "early_stopping": true,
60
+ "max_length": 300,
61
+ "num_beams": 4,
62
+ "prefix": "translate English to Romanian: "
63
+ }
64
+ },
65
+ "torch_dtype": "float32",
66
+ "transformers_version": "4.33.3",
67
+ "use_cache": true,
68
+ "vocab_size": 32100
69
+ }
convert_to_safetensor.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import shutil
4
+ import torch
5
+ from collections import defaultdict
6
+ from safetensors.torch import load_file, save_file
7
+ from tqdm import tqdm
8
+
9
+ def shared_pointers(tensors):
10
+ ptrs = defaultdict(list)
11
+ for k, v in tensors.items():
12
+ ptrs[v.data_ptr()].append(k)
13
+ return [names for names in ptrs.values() if len(names) > 1]
14
+
15
+ def check_file_size(sf_filename, pt_filename):
16
+ sf_size = os.stat(sf_filename).st_size
17
+ pt_size = os.stat(pt_filename).st_size
18
+ if (sf_size - pt_size) / pt_size > 0.01:
19
+ raise RuntimeError(f"File size difference exceeds 1% between {sf_filename} and {pt_filename}")
20
+
21
+ def convert_file(pt_filename, sf_filename, copy_add_data=True):
22
+ source_folder = os.path.dirname(pt_filename)
23
+ dest_folder = os.path.dirname(sf_filename)
24
+ loaded = torch.load(pt_filename, map_location="cpu")
25
+ loaded = loaded.get("state_dict", loaded)
26
+ shared = shared_pointers(loaded)
27
+
28
+ for shared_weights in shared:
29
+ for name in shared_weights[1:]:
30
+ loaded.pop(name)
31
+
32
+ loaded = {k: v.contiguous().half() for k, v in loaded.items()}
33
+
34
+ os.makedirs(dest_folder, exist_ok=True)
35
+ save_file(loaded, sf_filename, metadata={"format": "pt"})
36
+ check_file_size(sf_filename, pt_filename)
37
+ if copy_add_data:
38
+ copy_additional_files(source_folder, dest_folder)
39
+
40
+ reloaded = load_file(sf_filename)
41
+ for k, v in loaded.items():
42
+ if not torch.equal(v, reloaded[k]):
43
+ raise RuntimeError(f"Mismatch in tensors for key {k}.")
44
+
45
+ def rename(pt_filename):
46
+ return pt_filename.replace("pytorch_model", "model").replace(".bin", ".safetensors")
47
+
48
+ def copy_additional_files(source_folder, dest_folder):
49
+ for file in os.listdir(source_folder):
50
+ file_path = os.path.join(source_folder, file)
51
+ if os.path.isfile(file_path) and not (file.endswith('.bin') or file.endswith('.py')):
52
+ shutil.copy(file_path, dest_folder)
53
+
54
+ def find_index_file(source_folder):
55
+ for file in os.listdir(source_folder):
56
+ if file.endswith('.bin.index.json'):
57
+ return file
58
+ return None
59
+
60
+ def convert_files(source_folder, dest_folder, delete_old):
61
+ index_file = find_index_file(source_folder)
62
+ if not index_file:
63
+ raise RuntimeError("Index file not found. Please ensure the correct folder is specified.")
64
+
65
+ index_file = os.path.join(source_folder, index_file)
66
+ with open(index_file) as f:
67
+ index_data = json.load(f)
68
+
69
+ for pt_filename in tqdm(set(index_data["weight_map"].values())):
70
+ full_pt_filename = os.path.join(source_folder, pt_filename)
71
+ sf_filename = os.path.join(dest_folder, rename(pt_filename))
72
+ convert_file(full_pt_filename, sf_filename, copy_add_data=False)
73
+ if delete_old:
74
+ os.remove(full_pt_filename)
75
+
76
+ copy_additional_files(source_folder, dest_folder)
77
+
78
+ index_path = os.path.join(dest_folder, "model.safetensors.index.json")
79
+ with open(index_path, "w") as f:
80
+ new_map = {k: rename(v) for k, v in index_data["weight_map"].items()}
81
+ json.dump({**index_data, "weight_map": new_map}, f, indent=4)
82
+
83
+ def main():
84
+ script_dir = os.path.dirname(os.path.realpath(__file__))
85
+
86
+ source_folder = input("Source folder for PyTorch files (leave blank for script's directory): ").strip() or script_dir
87
+ dest_folder = input("Destination folder for SafeTensors files (leave blank for default): ").strip()
88
+
89
+ if not dest_folder:
90
+ model_name = os.path.basename(os.path.normpath(source_folder))
91
+ dest_folder = os.path.join(source_folder, model_name + "_safetensors")
92
+
93
+ delete_old = input("Delete old PyTorch files? (Y/N): ").strip().upper() == 'Y'
94
+
95
+ if "pytorch_model.bin" in os.listdir(source_folder):
96
+ convert_file(os.path.join(source_folder, "pytorch_model.bin"), os.path.join(dest_folder, "model.safetensors"), copy_add_data=True)
97
+ if delete_old:
98
+ os.remove(os.path.join(source_folder, "pytorch_model.bin"))
99
+ else:
100
+ convert_files(source_folder, dest_folder, delete_old)
101
+
102
+ if __name__ == "__main__":
103
+ main()
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "decoder_start_token_id": 0,
5
+ "eos_token_id": 2,
6
+ "pad_token_id": 0,
7
+ "transformers_version": "4.33.3"
8
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff