Spaces:
Sleeping
Sleeping
Oleg Shulyakov
commited on
Commit
·
1147115
1
Parent(s):
e7b8f47
Update _split_and_upload_model
Browse files
app.py
CHANGED
|
@@ -169,11 +169,13 @@ class HuggingFaceModelProcessor:
|
|
| 169 |
|
| 170 |
print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
|
| 171 |
|
| 172 |
-
def _split_and_upload_model(self,
|
| 173 |
-
split_config: SplitConfig) -> None:
|
| 174 |
"""Split large model files and upload shards."""
|
| 175 |
-
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
| 177 |
|
| 178 |
split_cmd = ["llama-gguf-split", "--split"]
|
| 179 |
|
|
@@ -182,8 +184,8 @@ class HuggingFaceModelProcessor:
|
|
| 182 |
else:
|
| 183 |
split_cmd.extend(["--split-max-tensors", str(split_config.max_tensors)])
|
| 184 |
|
| 185 |
-
model_path_prefix = '.'.join(
|
| 186 |
-
split_cmd.extend([
|
| 187 |
|
| 188 |
print(f"Split command: {split_cmd}")
|
| 189 |
result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
|
|
@@ -198,14 +200,14 @@ class HuggingFaceModelProcessor:
|
|
| 198 |
print("Model split successfully!")
|
| 199 |
|
| 200 |
# Remove original model file
|
| 201 |
-
if os.path.exists(
|
| 202 |
-
os.remove(
|
| 203 |
|
| 204 |
model_file_prefix = model_path_prefix.split('/')[-1]
|
| 205 |
print(f"Model file name prefix: {model_file_prefix}")
|
| 206 |
|
| 207 |
sharded_model_files = [
|
| 208 |
-
f for f in os.listdir(outdir)
|
| 209 |
if f.startswith(model_file_prefix) and f.endswith(".gguf")
|
| 210 |
]
|
| 211 |
|
|
@@ -214,10 +216,10 @@ class HuggingFaceModelProcessor:
|
|
| 214 |
|
| 215 |
print(f"Sharded model files: {sharded_model_files}")
|
| 216 |
for file in sharded_model_files:
|
| 217 |
-
file_path = os.path.join(outdir, file)
|
| 218 |
try:
|
| 219 |
print(f"Uploading file: {file_path}")
|
| 220 |
-
self._upload_file(
|
| 221 |
except Exception as e:
|
| 222 |
raise GGUFConverterError(f"Error uploading file {file_path}: {e}")
|
| 223 |
|
|
@@ -402,7 +404,7 @@ llama-server --hf-repo "{processing_config.new_repo_id}" --hf-file "{processing_
|
|
| 402 |
# Upload model
|
| 403 |
if split_config.enabled:
|
| 404 |
print(f"Splitting quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
|
| 405 |
-
self._split_and_upload_model(
|
| 406 |
else:
|
| 407 |
try:
|
| 408 |
print(f"Uploading quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
|
|
|
|
| 169 |
|
| 170 |
print(f"Importance matrix generation completed: {os.path.abspath(quant_config.imatrix_file)}")
|
| 171 |
|
| 172 |
+
def _split_and_upload_model(self, processing_config: ModelProcessingConfig) -> None:
|
|
|
|
| 173 |
"""Split large model files and upload shards."""
|
| 174 |
+
quant_config = processing_config.quant_config
|
| 175 |
+
split_config = processing_config.split_config
|
| 176 |
+
|
| 177 |
+
print(f"Model path: {quant_config.quantized_gguf}")
|
| 178 |
+
print(f"Output dir: {processing_config.outdir}")
|
| 179 |
|
| 180 |
split_cmd = ["llama-gguf-split", "--split"]
|
| 181 |
|
|
|
|
| 184 |
else:
|
| 185 |
split_cmd.extend(["--split-max-tensors", str(split_config.max_tensors)])
|
| 186 |
|
| 187 |
+
model_path_prefix = '.'.join(quant_config.quantized_gguf.split('.')[:-1])
|
| 188 |
+
split_cmd.extend([quant_config.quantized_gguf, model_path_prefix])
|
| 189 |
|
| 190 |
print(f"Split command: {split_cmd}")
|
| 191 |
result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
|
|
|
|
| 200 |
print("Model split successfully!")
|
| 201 |
|
| 202 |
# Remove original model file
|
| 203 |
+
if os.path.exists(quant_config.quantized_gguf):
|
| 204 |
+
os.remove(quant_config.quantized_gguf)
|
| 205 |
|
| 206 |
model_file_prefix = model_path_prefix.split('/')[-1]
|
| 207 |
print(f"Model file name prefix: {model_file_prefix}")
|
| 208 |
|
| 209 |
sharded_model_files = [
|
| 210 |
+
f for f in os.listdir(processing_config.outdir)
|
| 211 |
if f.startswith(model_file_prefix) and f.endswith(".gguf")
|
| 212 |
]
|
| 213 |
|
|
|
|
| 216 |
|
| 217 |
print(f"Sharded model files: {sharded_model_files}")
|
| 218 |
for file in sharded_model_files:
|
| 219 |
+
file_path = os.path.join(processing_config.outdir, file)
|
| 220 |
try:
|
| 221 |
print(f"Uploading file: {file_path}")
|
| 222 |
+
self._upload_file(processing_config, file_path, file)
|
| 223 |
except Exception as e:
|
| 224 |
raise GGUFConverterError(f"Error uploading file {file_path}: {e}")
|
| 225 |
|
|
|
|
| 404 |
# Upload model
|
| 405 |
if split_config.enabled:
|
| 406 |
print(f"Splitting quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
|
| 407 |
+
self._split_and_upload_model(processing_config)
|
| 408 |
else:
|
| 409 |
try:
|
| 410 |
print(f"Uploading quantized model: {os.path.abspath(quant_config.quantized_gguf)}")
|