Prompt48 commited on
Commit
451496d
·
verified ·
1 Parent(s): 4c4113d

Upload edit\Qwen3-TTS-test\.venv\Lib\site-packages\accelerate\inference.py with huggingface_hub

Browse files
edit//Qwen3-TTS-test//.venv//Lib//site-packages//accelerate//inference.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2024 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ import math
15
+ from types import MethodType
16
+ from typing import Any, Optional, Union
17
+
18
+ from .state import PartialState
19
+ from .utils import (
20
+ calculate_maximum_sizes,
21
+ convert_bytes,
22
+ copy_tensor_to_devices,
23
+ ignorant_find_batch_size,
24
+ infer_auto_device_map,
25
+ is_pippy_available,
26
+ pad_input_tensors,
27
+ send_to_device,
28
+ )
29
+
30
+
31
+ def generate_device_map(
32
+ model, num_processes: int = 1, no_split_module_classes=None, max_memory: Optional[dict] = None
33
+ ):
34
+ """
35
+ Calculates the device map for `model` with an offset for PiPPy
36
+ """
37
+ if num_processes == 1:
38
+ return infer_auto_device_map(model, no_split_module_classes=no_split_module_classes, clean_result=False)
39
+ if max_memory is None:
40
+ model_size, shared = calculate_maximum_sizes(model)
41
+
42
+ # Split into `n` chunks for each GPU
43
+ memory = (model_size + shared[0]) / num_processes
44
+ memory = convert_bytes(memory)
45
+ value, ending = memory.split(" ")
46
+
47
+ # Add a chunk to deal with potential extra shared memory instances
48
+ memory = math.ceil(float(value)) * 1.1
49
+ memory = f"{memory} {ending}"
50
+ max_memory = {i: memory for i in range(num_processes)}
51
+ device_map = infer_auto_device_map(
52
+ model,
53
+ max_memory=max_memory,
54
+ no_split_module_classes=no_split_module_classes,
55
+ clean_result=False,
56
+ )
57
+ return device_map
58
+
59
+
60
+ def find_pippy_batch_size(args, kwargs):
61
+ found_batch_size = None
62
+ if args is not None:
63
+ for arg in args:
64
+ found_batch_size = ignorant_find_batch_size(arg)
65
+ if found_batch_size is not None:
66
+ break
67
+ if kwargs is not None and found_batch_size is None:
68
+ for kwarg in kwargs.values():
69
+ found_batch_size = ignorant_find_batch_size(kwarg)
70
+ if found_batch_size is not None:
71
+ break
72
+ return found_batch_size
73
+
74
+
75
+ def build_pipeline(model, split_points, args, kwargs, num_chunks):
76
+ """
77
+ Attaches the split points to the model based on `self.device_map` and generates a `PipelineStage`. Requires passing
78
+ in needed `args` and `kwargs` as the model needs on the CPU.
79
+
80
+ Users can pass in custom `num_chunks` as an optional hyper-parameter. By default will use
81
+ `AcceleratorState.num_processes`
82
+ """
83
+ # Note: We import here to reduce import time from general modules, and isolate outside dependencies
84
+ from torch.distributed.pipelining import ScheduleGPipe, SplitPoint, pipeline
85
+
86
+ # We need to annotate the split points in the model for PiPPy
87
+ state = PartialState()
88
+ split_spec = {split_point: SplitPoint.BEGINNING for split_point in split_points}
89
+ pipe = pipeline(
90
+ model,
91
+ mb_args=args,
92
+ mb_kwargs=kwargs,
93
+ split_spec=split_spec,
94
+ )
95
+ stage = pipe.build_stage(state.local_process_index, device=state.device)
96
+ schedule = ScheduleGPipe(stage, num_chunks)
97
+
98
+ return schedule
99
+
100
+
101
+ def pippy_forward(forward, num_chunks, gather_output, *args, **kwargs):
102
+ state = PartialState()
103
+ output = None
104
+
105
+ if state.num_processes == 1:
106
+ output = forward(*args, **kwargs)
107
+ elif state.is_local_main_process:
108
+ found_batch_size = find_pippy_batch_size(args, kwargs)
109
+ if found_batch_size is None:
110
+ raise ValueError("Could not find batch size from args or kwargs")
111
+ else:
112
+ if found_batch_size != num_chunks:
113
+ args = pad_input_tensors(args, found_batch_size, num_chunks)
114
+ kwargs = pad_input_tensors(kwargs, found_batch_size, num_chunks)
115
+ forward(*args, **kwargs)
116
+ elif state.is_last_process:
117
+ output = forward()
118
+ else:
119
+ forward()
120
+ if gather_output:
121
+ # Each node will get a copy of the full output which is only on the last GPU
122
+ output = copy_tensor_to_devices(output)
123
+ return output
124
+
125
+
126
+ def prepare_pippy(
127
+ model,
128
+ split_points: Optional[Union[str, list[str]]] = "auto",
129
+ no_split_module_classes: Optional[list[str]] = None,
130
+ example_args: Optional[tuple[Any]] = (),
131
+ example_kwargs: Optional[dict[str, Any]] = None,
132
+ num_chunks: Optional[int] = None,
133
+ gather_output: Optional[bool] = False,
134
+ ):
135
+ """
136
+ Wraps `model` for pipeline parallel inference.
137
+
138
+ Args:
139
+ model (`torch.nn.Module`):
140
+ A model we want to split for pipeline-parallel inference
141
+ split_points (`str` or `List[str]`, defaults to 'auto'):
142
+ How to generate the split points and chunk the model across each GPU. 'auto' will find the best balanced
143
+ split given any model. Should be a list of layer names in the model to split by otherwise.
144
+ no_split_module_classes (`List[str]`):
145
+ A list of class names for layers we don't want to be split.
146
+ example_args (tuple of model inputs):
147
+ The expected inputs for the model that uses order-based inputs for a *single process*. Recommended to use
148
+ this method if possible.
149
+ example_kwargs (dict of model inputs)
150
+ The expected inputs for the model that uses dictionary-based inputs for a *single process*. This is a
151
+ *highly* limiting structure that requires the same keys be present at *all* inference calls. Not
152
+ recommended unless the prior condition is true for all cases.
153
+ num_chunks (`int`, defaults to the number of available GPUs):
154
+ The number of different stages the Pipeline will have. By default it will assign one chunk per GPU, but
155
+ this can be tuned and played with. In general one should have num_chunks >= num_gpus.
156
+ gather_output (`bool`, defaults to `False`):
157
+ If `True`, the output from the last GPU (which holds the true outputs) is sent across to all GPUs.
158
+ """
159
+ if not is_pippy_available():
160
+ raise ImportError("Using `torch.distributed.pipelining` requires PyTorch 2.4.0 or later.")
161
+ state = PartialState()
162
+ example_args = send_to_device(example_args, "cpu")
163
+ example_kwargs = send_to_device(example_kwargs, "cpu")
164
+ if num_chunks is None:
165
+ num_chunks = state.num_processes
166
+ if split_points == "auto":
167
+ device_map = generate_device_map(model, num_chunks, no_split_module_classes=no_split_module_classes)
168
+ split_points = []
169
+ for i in range(1, num_chunks):
170
+ split_points.append(next(k for k, v in device_map.items() if v == i))
171
+ model.hf_split_points = split_points
172
+ stage = build_pipeline(model, split_points, example_args, example_kwargs, num_chunks)
173
+ model._original_forward = model.forward
174
+ model._original_call = model.__call__
175
+ model.pippy_stage = stage
176
+ model.hf_split_points = split_points
177
+
178
+ def forward(*args, **kwargs):
179
+ return pippy_forward(stage.step, num_chunks, gather_output, *args, **kwargs)
180
+
181
+ # To act like a decorator so that it can be popped when doing `extract_model_from_parallel`
182
+ # Note: creates an infinite recursion loop with `generate`
183
+ model_forward = MethodType(forward, model)
184
+ forward.__wrapped__ = model_forward
185
+ model.forward = forward
186
+ return model