Spaces:
Sleeping
Sleeping
Oleg Shulyakov
commited on
Commit
·
239afdd
1
Parent(s):
dd433e4
subprocess.Run
Browse files
app.py
CHANGED
|
@@ -31,6 +31,7 @@ class QuantizationConfig:
|
|
| 31 |
quantized_gguf: str = field(default="", init=False)
|
| 32 |
imatrix_file: str = field(default="", init=False)
|
| 33 |
|
|
|
|
| 34 |
@dataclass
|
| 35 |
class SplitConfig:
|
| 36 |
"""Configuration for model splitting."""
|
|
@@ -46,6 +47,7 @@ class OutputConfig:
|
|
| 46 |
repo_name: str = ""
|
| 47 |
filename: str = ""
|
| 48 |
|
|
|
|
| 49 |
@dataclass
|
| 50 |
class ModelProcessingConfig:
|
| 51 |
"""Configuration for the entire model processing pipeline."""
|
|
@@ -60,6 +62,7 @@ class ModelProcessingConfig:
|
|
| 60 |
new_repo_url: str = field(default="", init=False)
|
| 61 |
new_repo_id: str = field(default="", init=False)
|
| 62 |
|
|
|
|
| 63 |
class GGUFConverterError(Exception):
|
| 64 |
"""Custom exception for GGUF conversion errors."""
|
| 65 |
pass
|
|
@@ -143,9 +146,10 @@ class HuggingFaceModelProcessor:
|
|
| 143 |
train_data_path = self.CALIBRATION_FILE
|
| 144 |
if not os.path.isfile(train_data_path):
|
| 145 |
raise GGUFConverterError(f"Training data file not found: {train_data_path}")
|
| 146 |
-
print(f"Training data file path: {train_data_path}")
|
| 147 |
|
|
|
|
| 148 |
print("Running imatrix command...")
|
|
|
|
| 149 |
imatrix_command = [
|
| 150 |
"llama-imatrix",
|
| 151 |
"-m", quant_config.fp16_model,
|
|
@@ -157,16 +161,19 @@ class HuggingFaceModelProcessor:
|
|
| 157 |
|
| 158 |
process = subprocess.Popen(imatrix_command, shell=False)
|
| 159 |
try:
|
| 160 |
-
process.wait(timeout=
|
| 161 |
except subprocess.TimeoutExpired:
|
| 162 |
print("Imatrix computation timed out. Sending SIGINT to allow graceful termination...")
|
| 163 |
process.send_signal(signal.SIGINT)
|
| 164 |
try:
|
| 165 |
process.wait(timeout=5)
|
| 166 |
except subprocess.TimeoutExpired:
|
| 167 |
-
print("Imatrix proc still didn't term.
|
| 168 |
process.kill()
|
| 169 |
|
|
|
|
|
|
|
|
|
|
| 170 |
print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
|
| 171 |
|
| 172 |
def _split_and_upload_model(self, processing_config: ModelProcessingConfig) -> None:
|
|
@@ -188,14 +195,16 @@ class HuggingFaceModelProcessor:
|
|
| 188 |
split_cmd.extend([quant_config.quantized_gguf, model_path_prefix])
|
| 189 |
|
| 190 |
print(f"Split command: {split_cmd}")
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
-
if
|
| 197 |
-
|
| 198 |
-
raise GGUFConverterError(f"Error splitting the model: {stderr_str}")
|
| 199 |
|
| 200 |
print("Model split successfully!")
|
| 201 |
|
|
@@ -215,6 +224,7 @@ class HuggingFaceModelProcessor:
|
|
| 215 |
raise GGUFConverterError("No sharded files found.")
|
| 216 |
|
| 217 |
print(f"Sharded model files: {sharded_model_files}")
|
|
|
|
| 218 |
for file in sharded_model_files:
|
| 219 |
file_path = os.path.join(processing_config.outdir, file)
|
| 220 |
try:
|
|
@@ -268,19 +278,20 @@ class HuggingFaceModelProcessor:
|
|
| 268 |
|
| 269 |
# Convert HF to GGUF
|
| 270 |
print(f"Converting to GGUF FP16: {os.path.abspath(processing_config.quant_config.fp16_model)}")
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
stderr_str = result.stderr.decode("utf-8")
|
| 283 |
-
raise GGUFConverterError(f"Error converting to fp16: {stderr_str}")
|
| 284 |
|
| 285 |
print("Model converted to fp16 successfully!")
|
| 286 |
print(f"Converted model path: {os.path.abspath(processing_config.quant_config.fp16_model)}")
|
|
@@ -315,11 +326,18 @@ class HuggingFaceModelProcessor:
|
|
| 315 |
quantize_cmd.append(quant_config.method)
|
| 316 |
|
| 317 |
print(f"Quantizing model with {quantize_cmd}")
|
| 318 |
-
result = subprocess.run(quantize_cmd, shell=False, capture_output=True)
|
| 319 |
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
|
| 324 |
print(f"Quantized successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
|
| 325 |
print(f"Quantized model path: {os.path.abspath(quant_config.quantized_gguf)}")
|
|
@@ -338,7 +356,7 @@ class HuggingFaceModelProcessor:
|
|
| 338 |
|
| 339 |
return new_repo_url
|
| 340 |
|
| 341 |
-
def _generate_readme(self, processing_config
|
| 342 |
"""Generate README.md for the quantized model."""
|
| 343 |
creator = self._get_model_creator(processing_config.model_id)
|
| 344 |
username = whoami(processing_config.token)["name"]
|
|
|
|
| 31 |
quantized_gguf: str = field(default="", init=False)
|
| 32 |
imatrix_file: str = field(default="", init=False)
|
| 33 |
|
| 34 |
+
|
| 35 |
@dataclass
|
| 36 |
class SplitConfig:
|
| 37 |
"""Configuration for model splitting."""
|
|
|
|
| 47 |
repo_name: str = ""
|
| 48 |
filename: str = ""
|
| 49 |
|
| 50 |
+
|
| 51 |
@dataclass
|
| 52 |
class ModelProcessingConfig:
|
| 53 |
"""Configuration for the entire model processing pipeline."""
|
|
|
|
| 62 |
new_repo_url: str = field(default="", init=False)
|
| 63 |
new_repo_id: str = field(default="", init=False)
|
| 64 |
|
| 65 |
+
|
| 66 |
class GGUFConverterError(Exception):
|
| 67 |
"""Custom exception for GGUF conversion errors."""
|
| 68 |
pass
|
|
|
|
| 146 |
train_data_path = self.CALIBRATION_FILE
|
| 147 |
if not os.path.isfile(train_data_path):
|
| 148 |
raise GGUFConverterError(f"Training data file not found: {train_data_path}")
|
|
|
|
| 149 |
|
| 150 |
+
print(f"Training data file path: {train_data_path}")
|
| 151 |
print("Running imatrix command...")
|
| 152 |
+
|
| 153 |
imatrix_command = [
|
| 154 |
"llama-imatrix",
|
| 155 |
"-m", quant_config.fp16_model,
|
|
|
|
| 161 |
|
| 162 |
process = subprocess.Popen(imatrix_command, shell=False)
|
| 163 |
try:
|
| 164 |
+
process.wait(timeout=300)
|
| 165 |
except subprocess.TimeoutExpired:
|
| 166 |
print("Imatrix computation timed out. Sending SIGINT to allow graceful termination...")
|
| 167 |
process.send_signal(signal.SIGINT)
|
| 168 |
try:
|
| 169 |
process.wait(timeout=5)
|
| 170 |
except subprocess.TimeoutExpired:
|
| 171 |
+
print("Imatrix proc still didn't term. Forcefully terminating process...")
|
| 172 |
process.kill()
|
| 173 |
|
| 174 |
+
if process.returncode != 0:
|
| 175 |
+
raise GGUFConverterError(f"Error generating imatrix")
|
| 176 |
+
|
| 177 |
print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
|
| 178 |
|
| 179 |
def _split_and_upload_model(self, processing_config: ModelProcessingConfig) -> None:
|
|
|
|
| 195 |
split_cmd.extend([quant_config.quantized_gguf, model_path_prefix])
|
| 196 |
|
| 197 |
print(f"Split command: {split_cmd}")
|
| 198 |
+
process = subprocess.Popen(split_cmd, shell=False)
|
| 199 |
+
try:
|
| 200 |
+
process.wait(timeout=300)
|
| 201 |
+
except subprocess.TimeoutExpired:
|
| 202 |
+
print("Splitting timed out. Killing process...")
|
| 203 |
+
process.kill()
|
| 204 |
+
raise GGUFConverterError("Error splitting the model: Operation timed out.")
|
| 205 |
|
| 206 |
+
if process.returncode != 0:
|
| 207 |
+
raise GGUFConverterError(f"Error splitting the model")
|
|
|
|
| 208 |
|
| 209 |
print("Model split successfully!")
|
| 210 |
|
|
|
|
| 224 |
raise GGUFConverterError("No sharded files found.")
|
| 225 |
|
| 226 |
print(f"Sharded model files: {sharded_model_files}")
|
| 227 |
+
|
| 228 |
for file in sharded_model_files:
|
| 229 |
file_path = os.path.join(processing_config.outdir, file)
|
| 230 |
try:
|
|
|
|
| 278 |
|
| 279 |
# Convert HF to GGUF
|
| 280 |
print(f"Converting to GGUF FP16: {os.path.abspath(processing_config.quant_config.fp16_model)}")
|
| 281 |
+
convert_command = [
|
| 282 |
+
"python3", "/app/convert_hf_to_gguf.py", local_dir,
|
| 283 |
+
"--outtype", "f16", "--outfile", processing_config.quant_config.fp16_model
|
| 284 |
+
]
|
| 285 |
+
process = subprocess.Popen(convert_command, shell=False)
|
| 286 |
+
try:
|
| 287 |
+
process.wait(timeout=600)
|
| 288 |
+
except subprocess.TimeoutExpired:
|
| 289 |
+
print("Conversion timed out. Killing process...")
|
| 290 |
+
process.kill()
|
| 291 |
+
raise GGUFConverterError("Error converting to fp16: Operation timed out.")
|
| 292 |
|
| 293 |
+
if process.returncode != 0:
|
| 294 |
+
raise GGUFConverterError(f"Error converting to fp16")
|
|
|
|
|
|
|
| 295 |
|
| 296 |
print("Model converted to fp16 successfully!")
|
| 297 |
print(f"Converted model path: {os.path.abspath(processing_config.quant_config.fp16_model)}")
|
|
|
|
| 326 |
quantize_cmd.append(quant_config.method)
|
| 327 |
|
| 328 |
print(f"Quantizing model with {quantize_cmd}")
|
|
|
|
| 329 |
|
| 330 |
+
# Use Popen for quantization
|
| 331 |
+
process = subprocess.Popen(quantize_cmd, shell=False)
|
| 332 |
+
try:
|
| 333 |
+
process.wait(timeout=3600)
|
| 334 |
+
except subprocess.TimeoutExpired:
|
| 335 |
+
print("Quantization timed out. Killing process...")
|
| 336 |
+
process.kill()
|
| 337 |
+
raise GGUFConverterError("Error quantizing: Operation timed out.")
|
| 338 |
+
|
| 339 |
+
if process.returncode != 0:
|
| 340 |
+
raise GGUFConverterError(f"Error quantizing")
|
| 341 |
|
| 342 |
print(f"Quantized successfully with {quant_config.imatrix_method if quant_config.use_imatrix else quant_config.method} option!")
|
| 343 |
print(f"Quantized model path: {os.path.abspath(quant_config.quantized_gguf)}")
|
|
|
|
| 356 |
|
| 357 |
return new_repo_url
|
| 358 |
|
| 359 |
+
def _generate_readme(self, processing_config: ModelProcessingConfig) -> str:
|
| 360 |
"""Generate README.md for the quantized model."""
|
| 361 |
creator = self._get_model_creator(processing_config.model_id)
|
| 362 |
username = whoami(processing_config.token)["name"]
|