Spaces:
Sleeping
Sleeping
Oleg Shulyakov
commited on
Commit
·
2ae55e9
1
Parent(s):
239afdd
Process timeouts
Browse files
app.py
CHANGED
|
@@ -159,9 +159,9 @@ class HuggingFaceModelProcessor:
|
|
| 159 |
"-o", quant_config.imatrix_file,
|
| 160 |
]
|
| 161 |
|
| 162 |
-
process = subprocess.Popen(imatrix_command, shell=False)
|
| 163 |
try:
|
| 164 |
-
process.wait(timeout=
|
| 165 |
except subprocess.TimeoutExpired:
|
| 166 |
print("Imatrix computation timed out. Sending SIGINT to allow graceful termination...")
|
| 167 |
process.send_signal(signal.SIGINT)
|
|
@@ -170,9 +170,10 @@ class HuggingFaceModelProcessor:
|
|
| 170 |
except subprocess.TimeoutExpired:
|
| 171 |
print("Imatrix proc still didn't term. Forcefully terminating process...")
|
| 172 |
process.kill()
|
|
|
|
| 173 |
|
| 174 |
if process.returncode != 0:
|
| 175 |
-
|
| 176 |
|
| 177 |
print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
|
| 178 |
|
|
@@ -195,16 +196,21 @@ class HuggingFaceModelProcessor:
|
|
| 195 |
split_cmd.extend([quant_config.quantized_gguf, model_path_prefix])
|
| 196 |
|
| 197 |
print(f"Split command: {split_cmd}")
|
| 198 |
-
process = subprocess.Popen(split_cmd, shell=False)
|
| 199 |
try:
|
| 200 |
process.wait(timeout=300)
|
| 201 |
except subprocess.TimeoutExpired:
|
| 202 |
-
print("Splitting timed out.
|
| 203 |
-
process.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
raise GGUFConverterError("Error splitting the model: Operation timed out.")
|
| 205 |
|
| 206 |
if process.returncode != 0:
|
| 207 |
-
raise GGUFConverterError(f"Error splitting the model")
|
| 208 |
|
| 209 |
print("Model split successfully!")
|
| 210 |
|
|
@@ -282,16 +288,21 @@ class HuggingFaceModelProcessor:
|
|
| 282 |
"python3", "/app/convert_hf_to_gguf.py", local_dir,
|
| 283 |
"--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
|
| 284 |
]
|
| 285 |
-
process = subprocess.Popen(convert_command, shell=False)
|
| 286 |
try:
|
| 287 |
process.wait(timeout=600)
|
| 288 |
except subprocess.TimeoutExpired:
|
| 289 |
-
print("Conversion timed out.
|
| 290 |
-
process.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
raise GGUFConverterError("Error converting to fp16: Operation timed out.")
|
| 292 |
|
| 293 |
if process.returncode != 0:
|
| 294 |
-
raise GGUFConverterError(f"Error converting to fp16")
|
| 295 |
|
| 296 |
print("Model converted to fp16 successfully!")
|
| 297 |
print(f"Converted model path: {os.path.abspath(processing_config.quant_config.fp16_model)}")
|
|
@@ -328,16 +339,21 @@ class HuggingFaceModelProcessor:
|
|
| 328 |
print(f"Quantizing model with {quantize_cmd}")
|
| 329 |
|
| 330 |
# Use Popen for quantization
|
| 331 |
-
process = subprocess.Popen(quantize_cmd, shell=False)
|
| 332 |
try:
|
| 333 |
process.wait(timeout=3600)
|
| 334 |
except subprocess.TimeoutExpired:
|
| 335 |
-
print("Quantization timed out.
|
| 336 |
-
process.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 337 |
raise GGUFConverterError("Error quantizing: Operation timed out.")
|
| 338 |
|
| 339 |
if process.returncode != 0:
|
| 340 |
-
raise GGUFConverterError(f"Error quantizing")
|
| 341 |
|
| 342 |
print(f"Quantized successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
|
| 343 |
print(f"Quantized model path: {os.path.abspath(quant_config.quantized_gguf)}")
|
|
|
|
| 159 |
"-o", quant_config.imatrix_file,
|
| 160 |
]
|
| 161 |
|
| 162 |
+
process = subprocess.Popen(imatrix_command, shell=False, stderr=subprocess.STDOUT)
|
| 163 |
try:
|
| 164 |
+
process.wait(timeout=600)
|
| 165 |
except subprocess.TimeoutExpired:
|
| 166 |
print("Imatrix computation timed out. Sending SIGINT to allow graceful termination...")
|
| 167 |
process.send_signal(signal.SIGINT)
|
|
|
|
| 170 |
except subprocess.TimeoutExpired:
|
| 171 |
print("Imatrix proc still didn't term. Forcefully terminating process...")
|
| 172 |
process.kill()
|
| 173 |
+
raise GGUFConverterError("Error generating imatrix: Operation timed out.")
|
| 174 |
|
| 175 |
if process.returncode != 0:
|
| 176 |
+
raise GGUFConverterError(f"Error generating imatrix: code={process.returncode}.")
|
| 177 |
|
| 178 |
print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
|
| 179 |
|
|
|
|
| 196 |
split_cmd.extend([quant_config.quantized_gguf, model_path_prefix])
|
| 197 |
|
| 198 |
print(f"Split command: {split_cmd}")
|
| 199 |
+
process = subprocess.Popen(split_cmd, shell=False, stderr=subprocess.STDOUT)
|
| 200 |
try:
|
| 201 |
process.wait(timeout=300)
|
| 202 |
except subprocess.TimeoutExpired:
|
| 203 |
+
print("Splitting timed out. Sending SIGINT to allow graceful termination...")
|
| 204 |
+
process.send_signal(signal.SIGINT)
|
| 205 |
+
try:
|
| 206 |
+
process.wait(timeout=5)
|
| 207 |
+
except subprocess.TimeoutExpired:
|
| 208 |
+
print("Splitting timed out. Killing process...")
|
| 209 |
+
process.kill()
|
| 210 |
raise GGUFConverterError("Error splitting the model: Operation timed out.")
|
| 211 |
|
| 212 |
if process.returncode != 0:
|
| 213 |
+
raise GGUFConverterError(f"Error splitting the model: code={process.returncode}")
|
| 214 |
|
| 215 |
print("Model split successfully!")
|
| 216 |
|
|
|
|
| 288 |
"python3", "/app/convert_hf_to_gguf.py", local_dir,
|
| 289 |
"--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
|
| 290 |
]
|
| 291 |
+
process = subprocess.Popen(convert_command, shell=False, stderr=subprocess.STDOUT)
|
| 292 |
try:
|
| 293 |
process.wait(timeout=600)
|
| 294 |
except subprocess.TimeoutExpired:
|
| 295 |
+
print("Conversion timed out. Sending SIGINT to allow graceful termination...")
|
| 296 |
+
process.send_signal(signal.SIGINT)
|
| 297 |
+
try:
|
| 298 |
+
process.wait(timeout=5)
|
| 299 |
+
except subprocess.TimeoutExpired:
|
| 300 |
+
print("Conversion timed out. Killing process...")
|
| 301 |
+
process.kill()
|
| 302 |
raise GGUFConverterError("Error converting to fp16: Operation timed out.")
|
| 303 |
|
| 304 |
if process.returncode != 0:
|
| 305 |
+
raise GGUFConverterError(f"Error converting to fp16: code={process.returncode}")
|
| 306 |
|
| 307 |
print("Model converted to fp16 successfully!")
|
| 308 |
print(f"Converted model path: {os.path.abspath(processing_config.quant_config.fp16_model)}")
|
|
|
|
| 339 |
print(f"Quantizing model with {quantize_cmd}")
|
| 340 |
|
| 341 |
# Use Popen for quantization
|
| 342 |
+
process = subprocess.Popen(quantize_cmd, shell=False, stderr=subprocess.STDOUT)
|
| 343 |
try:
|
| 344 |
process.wait(timeout=3600)
|
| 345 |
except subprocess.TimeoutExpired:
|
| 346 |
+
print("Quantization timed out. Sending SIGINT to allow graceful termination...")
|
| 347 |
+
process.send_signal(signal.SIGINT)
|
| 348 |
+
try:
|
| 349 |
+
process.wait(timeout=5)
|
| 350 |
+
except subprocess.TimeoutExpired:
|
| 351 |
+
print("Quantization timed out. Killing process...")
|
| 352 |
+
process.kill()
|
| 353 |
raise GGUFConverterError("Error quantizing: Operation timed out.")
|
| 354 |
|
| 355 |
if process.returncode != 0:
|
| 356 |
+
raise GGUFConverterError(f"Error quantizing: code={process.returncode}")
|
| 357 |
|
| 358 |
print(f"Quantized successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
|
| 359 |
print(f"Quantized model path: {os.path.abspath(quant_config.quantized_gguf)}")
|