Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -10,8 +10,8 @@ from huggingface_hub import HfApi
|
|
| 10 |
from transformers import AutoConfig, AutoModel, AutoTokenizer
|
| 11 |
from optimum.onnxruntime import ORTQuantizer, ORTModelForCausalLM
|
| 12 |
from optimum.onnxruntime.configuration import AutoQuantizationConfig
|
| 13 |
-
|
| 14 |
-
from optimum.exporters.
|
| 15 |
import torch.nn.utils.prune as prune
|
| 16 |
|
| 17 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
@@ -67,7 +67,7 @@ def stage_3_4_onnx_quantize(model_path: str, calibration_data_path: str):
|
|
| 67 |
model_name = os.path.basename(model_path)
|
| 68 |
onnx_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-onnx")
|
| 69 |
|
| 70 |
-
|
| 71 |
log_stream += f"Successfully exported base model to ONNX at: {onnx_path}\n"
|
| 72 |
|
| 73 |
quantizer = ORTQuantizer.from_pretrained(onnx_path)
|
|
@@ -105,7 +105,7 @@ def stage_3_4_gguf_quantize(model_id: str, quantization_strategy: str):
|
|
| 105 |
gguf_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf")
|
| 106 |
os.makedirs(gguf_path, exist_ok=True)
|
| 107 |
|
| 108 |
-
|
| 109 |
|
| 110 |
log_stream += f"Successfully exported and quantized model to GGUF at: {gguf_path}\n"
|
| 111 |
return gguf_path, log_stream
|
|
|
|
| 10 |
from transformers import AutoConfig, AutoModel, AutoTokenizer
|
| 11 |
from optimum.onnxruntime import ORTQuantizer, ORTModelForCausalLM
|
| 12 |
from optimum.onnxruntime.configuration import AutoQuantizationConfig
|
| 13 |
+
# Use the unified optimum.main_export entrypoint
|
| 14 |
+
from optimum.exporters.main import main_export
|
| 15 |
import torch.nn.utils.prune as prune
|
| 16 |
|
| 17 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
| 67 |
model_name = os.path.basename(model_path)
|
| 68 |
onnx_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-onnx")
|
| 69 |
|
| 70 |
+
main_export(model_path, output=onnx_path, task="auto", trust_remote_code=True)
|
| 71 |
log_stream += f"Successfully exported base model to ONNX at: {onnx_path}\n"
|
| 72 |
|
| 73 |
quantizer = ORTQuantizer.from_pretrained(onnx_path)
|
|
|
|
| 105 |
gguf_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf")
|
| 106 |
os.makedirs(gguf_path, exist_ok=True)
|
| 107 |
|
| 108 |
+
main_export(model_id, output=os.path.join(gguf_path, "model.gguf"), export_format="gguf", quantization_strategy=quantization_strategy, trust_remote_code=True)
|
| 109 |
|
| 110 |
log_stream += f"Successfully exported and quantized model to GGUF at: {gguf_path}\n"
|
| 111 |
return gguf_path, log_stream
|