root commited on
Commit ·
34eb6e3
1
Parent(s): 7cf213a
update modeling with print
Browse files- modeling_srv1_tp.py +14 -7
modeling_srv1_tp.py
CHANGED
|
@@ -839,17 +839,22 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
|
|
| 839 |
trust_remote_code = kwargs.get("trust_remote_code", False)
|
| 840 |
quantize = kwargs.get("quantize", None)
|
| 841 |
dtype = kwargs.get("dtype", None)
|
| 842 |
-
|
|
|
|
|
|
|
| 843 |
self.process_group, rank, world_size = initialize_torch_distributed()
|
| 844 |
-
|
| 845 |
if torch.cuda.is_available():
|
| 846 |
device = torch.device(f"cuda:{rank}")
|
| 847 |
dtype = torch.float16 if dtype is None else dtype
|
| 848 |
-
|
| 849 |
else:
|
| 850 |
raise NotImplementedError("Flash is only available on GPU")
|
| 851 |
|
| 852 |
-
|
|
|
|
|
|
|
|
|
|
| 853 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 854 |
model_id,
|
| 855 |
revision=revision,
|
|
@@ -857,14 +862,15 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
|
|
| 857 |
truncation_side="left",
|
| 858 |
trust_remote_code=trust_remote_code,
|
| 859 |
)
|
| 860 |
-
|
| 861 |
-
# config = SRV1Config.from_pretrained(model_id, revision=revision, trust_remote_code=trust_remote_code)
|
| 862 |
config.quantize = quantize
|
| 863 |
torch.distributed.barrier(group=self.process_group)
|
| 864 |
import glob
|
| 865 |
filenames = glob.glob(f"{model_id}/*.safetensors")
|
| 866 |
-
|
|
|
|
| 867 |
weights = Weights(filenames=filenames, device=device, dtype=dtype, process_group=self.process_group)
|
|
|
|
| 868 |
print(f"RANK[{rank}]: Loaded Weights success. device:{device}")
|
| 869 |
|
| 870 |
torch.distributed.barrier(group=self.process_group)
|
|
@@ -887,4 +893,5 @@ class SRV1ForCausalLMParallel(SRV1ForCausalLM):
|
|
| 887 |
)
|
| 888 |
kwargs.update({"pretrained_model_name_or_path": pretrained_model_name_or_path})
|
| 889 |
model = cls(config, *model_args, **kwargs)
|
|
|
|
| 890 |
return model
|
|
|
|
| 839 |
trust_remote_code = kwargs.get("trust_remote_code", False)
|
| 840 |
quantize = kwargs.get("quantize", None)
|
| 841 |
dtype = kwargs.get("dtype", None)
|
| 842 |
+
if dtype is None:
|
| 843 |
+
dtype = config.torch_dtype
|
| 844 |
+
|
| 845 |
self.process_group, rank, world_size = initialize_torch_distributed()
|
| 846 |
+
|
| 847 |
if torch.cuda.is_available():
|
| 848 |
device = torch.device(f"cuda:{rank}")
|
| 849 |
dtype = torch.float16 if dtype is None else dtype
|
| 850 |
+
|
| 851 |
else:
|
| 852 |
raise NotImplementedError("Flash is only available on GPU")
|
| 853 |
|
| 854 |
+
if rank == 0:
|
| 855 |
+
print(config)
|
| 856 |
+
print(f"Final dtype {dtype}")
|
| 857 |
+
print(f"Will read model dir {model_id}")
|
| 858 |
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 859 |
model_id,
|
| 860 |
revision=revision,
|
|
|
|
| 862 |
truncation_side="left",
|
| 863 |
trust_remote_code=trust_remote_code,
|
| 864 |
)
|
| 865 |
+
|
|
|
|
| 866 |
config.quantize = quantize
|
| 867 |
torch.distributed.barrier(group=self.process_group)
|
| 868 |
import glob
|
| 869 |
filenames = glob.glob(f"{model_id}/*.safetensors")
|
| 870 |
+
if rank == 0:
|
| 871 |
+
print(f"Will read filename {filenames}")
|
| 872 |
weights = Weights(filenames=filenames, device=device, dtype=dtype, process_group=self.process_group)
|
| 873 |
+
|
| 874 |
print(f"RANK[{rank}]: Loaded Weights success. device:{device}")
|
| 875 |
|
| 876 |
torch.distributed.barrier(group=self.process_group)
|
|
|
|
| 893 |
)
|
| 894 |
kwargs.update({"pretrained_model_name_or_path": pretrained_model_name_or_path})
|
| 895 |
model = cls(config, *model_args, **kwargs)
|
| 896 |
+
|
| 897 |
return model
|