Prompt48 commited on
Commit
6ccdd43
·
verified ·
1 Parent(s): 451496d

Upload edit\Qwen3-TTS-test\.venv\Lib\site-packages\accelerate\launchers.py with huggingface_hub

Browse files
edit//Qwen3-TTS-test//.venv//Lib//site-packages//accelerate//launchers.py ADDED
@@ -0,0 +1,307 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 The HuggingFace Team. All rights reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ import os
16
+ import sys
17
+ import tempfile
18
+
19
+ import torch
20
+
21
+ from .state import AcceleratorState, PartialState
22
+ from .utils import (
23
+ PrecisionType,
24
+ PrepareForLaunch,
25
+ are_libraries_initialized,
26
+ check_cuda_p2p_ib_support,
27
+ get_current_device_type,
28
+ get_gpu_info,
29
+ is_mps_available,
30
+ is_torch_version,
31
+ patch_environment,
32
+ )
33
+ from .utils.constants import ELASTIC_LOG_LINE_PREFIX_TEMPLATE_PYTORCH_VERSION
34
+
35
+
36
+ def test_launch():
37
+ "Verify a `PartialState` can be initialized."
38
+ _ = PartialState()
39
+
40
+
41
+ def notebook_launcher(
42
+ function,
43
+ args=(),
44
+ num_processes=None,
45
+ mixed_precision="no",
46
+ use_port="29500",
47
+ master_addr="127.0.0.1",
48
+ node_rank=0,
49
+ num_nodes=1,
50
+ rdzv_backend="static",
51
+ rdzv_endpoint="",
52
+ rdzv_conf=None,
53
+ rdzv_id="none",
54
+ max_restarts=0,
55
+ monitor_interval=0.1,
56
+ log_line_prefix_template=None,
57
+ ):
58
+ """
59
+ Launches a training function, using several processes or multiple nodes if it's possible in the current environment
60
+ (TPU with multiple cores for instance).
61
+
62
+ <Tip warning={true}>
63
+
64
+ To use this function absolutely zero calls to a device must be made in the notebook session before calling. If any
65
+ have been made, you will need to restart the notebook and make sure no cells use any device capability.
66
+
67
+ Setting `ACCELERATE_DEBUG_MODE="1"` in your environment will run a test before truly launching to ensure that none
68
+ of those calls have been made.
69
+
70
+ </Tip>
71
+
72
+ Args:
73
+ function (`Callable`):
74
+ The training function to execute. If it accepts arguments, the first argument should be the index of the
75
+ process run.
76
+ args (`Tuple`):
77
+ Tuple of arguments to pass to the function (it will receive `*args`).
78
+ num_processes (`int`, *optional*):
79
+ The number of processes to use for training. Will default to 8 in Colab/Kaggle if a TPU is available, to
80
+ the number of devices available otherwise.
81
+ mixed_precision (`str`, *optional*, defaults to `"no"`):
82
+ If `fp16` or `bf16`, will use mixed precision training on multi-device.
83
+ use_port (`str`, *optional*, defaults to `"29500"`):
84
+ The port to use to communicate between processes when launching a multi-device training.
85
+ master_addr (`str`, *optional*, defaults to `"127.0.0.1"`):
86
+ The address to use for communication between processes.
87
+ node_rank (`int`, *optional*, defaults to 0):
88
+ The rank of the current node.
89
+ num_nodes (`int`, *optional*, defaults to 1):
90
+ The number of nodes to use for training.
91
+ rdzv_backend (`str`, *optional*, defaults to `"static"`):
92
+ The rendezvous method to use, such as 'static' (the default) or 'c10d'
93
+ rdzv_endpoint (`str`, *optional*, defaults to `""`):
94
+ The endpoint of the rdzv sync. storage.
95
+ rdzv_conf (`Dict`, *optional*, defaults to `None`):
96
+ Additional rendezvous configuration.
97
+ rdzv_id (`str`, *optional*, defaults to `"none"`):
98
+ The unique run id of the job.
99
+ max_restarts (`int`, *optional*, defaults to 0):
100
+ The maximum amount of restarts that elastic agent will conduct on workers before failure.
101
+ monitor_interval (`float`, *optional*, defaults to 0.1):
102
+ The interval in seconds that is used by the elastic_agent as a period of monitoring workers.
103
+ log_line_prefix_template (`str`, *optional*, defaults to `None`):
104
+ The prefix template for elastic launch logging. Available from PyTorch 2.2.0.
105
+
106
+ Example:
107
+
108
+ ```python
109
+ # Assume this is defined in a Jupyter Notebook on an instance with two devices
110
+ from accelerate import notebook_launcher
111
+
112
+
113
+ def train(*args):
114
+ # Your training function here
115
+ ...
116
+
117
+
118
+ notebook_launcher(train, args=(arg1, arg2), num_processes=2, mixed_precision="fp16")
119
+ ```
120
+ """
121
+ # Are we in a google colab or a Kaggle Kernel?
122
+ in_colab = False
123
+ in_kaggle = False
124
+ if any(key.startswith("KAGGLE") for key in os.environ.keys()):
125
+ in_kaggle = True
126
+ elif "IPython" in sys.modules:
127
+ in_colab = "google.colab" in str(sys.modules["IPython"].get_ipython())
128
+
129
+ try:
130
+ mixed_precision = PrecisionType(mixed_precision.lower())
131
+ except ValueError:
132
+ raise ValueError(
133
+ f"Unknown mixed_precision mode: {args.mixed_precision.lower()}. Choose between {PrecisionType.list()}."
134
+ )
135
+
136
+ if (in_colab or in_kaggle) and (
137
+ (os.environ.get("TPU_NAME", None) is not None) or (os.environ.get("PJRT_DEVICE", "") == "TPU")
138
+ ):
139
+ # TPU launch
140
+ import torch_xla.distributed.xla_multiprocessing as xmp
141
+
142
+ if len(AcceleratorState._shared_state) > 0:
143
+ raise ValueError(
144
+ "To train on TPU in Colab or Kaggle Kernel, the `Accelerator` should only be initialized inside "
145
+ "your training function. Restart your notebook and make sure no cells initializes an "
146
+ "`Accelerator`."
147
+ )
148
+
149
+ launcher = PrepareForLaunch(function, distributed_type="XLA")
150
+ print("Launching a training on TPU cores.")
151
+ xmp.spawn(launcher, args=args, start_method="fork")
152
+ elif in_colab and get_gpu_info()[1] < 2:
153
+ # No need for a distributed launch otherwise as it's either CPU or one GPU.
154
+ if torch.cuda.is_available():
155
+ print("Launching training on one GPU.")
156
+ else:
157
+ print("Launching training on one CPU.")
158
+ function(*args)
159
+ else:
160
+ if num_processes is None:
161
+ raise ValueError(
162
+ "You have to specify the number of devices you would like to use, add `num_processes=...` to your call."
163
+ )
164
+ if node_rank >= num_nodes:
165
+ raise ValueError("The node_rank must be less than the number of nodes.")
166
+ if num_processes > 1:
167
+ # Multi-device launch
168
+ from torch.distributed.launcher.api import LaunchConfig, elastic_launch
169
+ from torch.multiprocessing import start_processes
170
+ from torch.multiprocessing.spawn import ProcessRaisedException
171
+
172
+ if len(AcceleratorState._shared_state) > 0:
173
+ raise ValueError(
174
+ "To launch a multi-device training from your notebook, the `Accelerator` should only be initialized "
175
+ "inside your training function. Restart your notebook and make sure no cells initializes an "
176
+ "`Accelerator`."
177
+ )
178
+ # Check for specific libraries known to initialize device that users constantly use
179
+ problematic_imports = are_libraries_initialized("bitsandbytes")
180
+ if len(problematic_imports) > 0:
181
+ err = (
182
+ "Could not start distributed process. Libraries known to initialize device upon import have been "
183
+ "imported already. Please keep these imports inside your training function to try and help with this:"
184
+ )
185
+ for lib_name in problematic_imports:
186
+ err += f"\n\t* `{lib_name}`"
187
+ raise RuntimeError(err)
188
+
189
+ patched_env = dict(
190
+ nproc=num_processes,
191
+ node_rank=node_rank,
192
+ world_size=num_nodes * num_processes,
193
+ master_addr=master_addr,
194
+ master_port=use_port,
195
+ mixed_precision=mixed_precision,
196
+ )
197
+
198
+ # Check for CUDA P2P and IB issues
199
+ if not check_cuda_p2p_ib_support():
200
+ patched_env["nccl_p2p_disable"] = "1"
201
+ patched_env["nccl_ib_disable"] = "1"
202
+
203
+ # torch.distributed will expect a few environment variable to be here. We set the ones common to each
204
+ # process here (the other ones will be set be the launcher).
205
+ with patch_environment(**patched_env):
206
+ # First dummy launch
207
+ # Determine device type without initializing any device (which would break fork)
208
+ device_type, distributed_type = get_current_device_type()
209
+ if os.environ.get("ACCELERATE_DEBUG_MODE", "false").lower() == "true":
210
+ launcher = PrepareForLaunch(test_launch, distributed_type=distributed_type)
211
+ try:
212
+ start_processes(launcher, args=(), nprocs=num_processes, start_method="fork")
213
+ except ProcessRaisedException as e:
214
+ err = "An issue was found when verifying a stable environment for the notebook launcher."
215
+ if f"Cannot re-initialize {device_type.upper()} in forked subprocess" in e.args[0]:
216
+ raise RuntimeError(
217
+ f"{err}"
218
+ "This likely stems from an outside import causing issues once the `notebook_launcher()` is called. "
219
+ "Please review your imports and test them when running the `notebook_launcher()` to identify "
220
+ f"which one is problematic and causing {device_type.upper()} to be initialized."
221
+ ) from e
222
+ else:
223
+ raise RuntimeError(f"{err} The following error was raised: {e}") from e
224
+ # Now the actual launch
225
+ launcher = PrepareForLaunch(function, distributed_type=distributed_type)
226
+ print(f"Launching training on {num_processes} {device_type.upper()}s.")
227
+ try:
228
+ if rdzv_conf is None:
229
+ rdzv_conf = {}
230
+ if rdzv_backend == "static":
231
+ rdzv_conf["rank"] = node_rank
232
+ if not rdzv_endpoint:
233
+ rdzv_endpoint = f"{master_addr}:{use_port}"
234
+ launch_config_kwargs = dict(
235
+ min_nodes=num_nodes,
236
+ max_nodes=num_nodes,
237
+ nproc_per_node=num_processes,
238
+ run_id=rdzv_id,
239
+ rdzv_endpoint=rdzv_endpoint,
240
+ rdzv_backend=rdzv_backend,
241
+ rdzv_configs=rdzv_conf,
242
+ max_restarts=max_restarts,
243
+ monitor_interval=monitor_interval,
244
+ start_method="fork",
245
+ )
246
+ if is_torch_version(">=", ELASTIC_LOG_LINE_PREFIX_TEMPLATE_PYTORCH_VERSION):
247
+ launch_config_kwargs["log_line_prefix_template"] = log_line_prefix_template
248
+ elastic_launch(config=LaunchConfig(**launch_config_kwargs), entrypoint=function)(*args)
249
+ except ProcessRaisedException as e:
250
+ if f"Cannot re-initialize {device_type.upper()} in forked subprocess" in e.args[0]:
251
+ raise RuntimeError(
252
+ f"{device_type.upper()} has been initialized before the `notebook_launcher` could create a forked subprocess. "
253
+ "This likely stems from an outside import causing issues once the `notebook_launcher()` is called. "
254
+ "Please review your imports and test them when running the `notebook_launcher()` to identify "
255
+ f"which one is problematic and causing {device_type.upper()} to be initialized."
256
+ ) from e
257
+ else:
258
+ raise RuntimeError(f"An issue was found when launching the training: {e}") from e
259
+
260
+ else:
261
+ # No need for a distributed launch otherwise as it's either CPU, GPU, XPU or MPS.
262
+ if is_mps_available():
263
+ os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
264
+ print("Launching training on MPS.")
265
+ elif torch.cuda.is_available():
266
+ print("Launching training on one GPU.")
267
+ elif torch.xpu.is_available():
268
+ print("Launching training on one XPU.")
269
+ else:
270
+ print("Launching training on CPU.")
271
+ function(*args)
272
+
273
+
274
+ def debug_launcher(function, args=(), num_processes=2):
275
+ """
276
+ Launches a training function using several processes on CPU for debugging purposes.
277
+
278
+ <Tip warning={true}>
279
+
280
+ This function is provided for internal testing and debugging, but it's not intended for real trainings. It will
281
+ only use the CPU.
282
+
283
+ </Tip>
284
+
285
+ Args:
286
+ function (`Callable`):
287
+ The training function to execute.
288
+ args (`Tuple`):
289
+ Tuple of arguments to pass to the function (it will receive `*args`).
290
+ num_processes (`int`, *optional*, defaults to 2):
291
+ The number of processes to use for training.
292
+ """
293
+ from torch.multiprocessing import start_processes
294
+
295
+ with tempfile.NamedTemporaryFile() as tmp_file:
296
+ # torch.distributed will expect a few environment variable to be here. We set the ones common to each
297
+ # process here (the other ones will be set be the launcher).
298
+ with patch_environment(
299
+ world_size=num_processes,
300
+ master_addr="127.0.0.1",
301
+ master_port="29500",
302
+ accelerate_mixed_precision="no",
303
+ accelerate_debug_rdv_file=tmp_file.name,
304
+ accelerate_use_cpu="yes",
305
+ ):
306
+ launcher = PrepareForLaunch(function, debug=True)
307
+ start_processes(launcher, args=args, nprocs=num_processes, start_method="fork")