Spaces:

broadfield-dev
/

AMOP

Paused

App Files Files Community

broadfield-dev commited on Sep 14, 2025

Commit

6e5122c

verified ·

1 Parent(s): f813450

Update app.py

Browse files

Files changed (1) hide show

app.py +43 -21

app.py CHANGED Viewed

@@ -5,13 +5,12 @@ import logging
 import time
 import tempfile
 import shutil
 from datetime import datetime
 from huggingface_hub import HfApi
 from transformers import AutoConfig, AutoModel, AutoTokenizer
-from optimum.onnxruntime import ORTQuantizer, ORTModelForCausalLM
 from optimum.onnxruntime.configuration import AutoQuantizationConfig
-# Use the unified optimum.main_export entrypoint
-from optimum.exporters.main import main_export
 import torch.nn.utils.prune as prune
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -62,14 +61,28 @@ def stage_2_prune_model(model, prune_percentage: float):
 def stage_3_4_onnx_quantize(model_path: str, calibration_data_path: str):
     log_stream = "[STAGE 3 & 4] Converting to ONNX and Quantizing...\n"
     try:
-        run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
-        model_name = os.path.basename(model_path)
-        onnx_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-onnx")
-        main_export(model_path, output=onnx_path, task="auto", trust_remote_code=True)
         log_stream += f"Successfully exported base model to ONNX at: {onnx_path}\n"
         quantizer = ORTQuantizer.from_pretrained(onnx_path)
         if calibration_data_path:
@@ -93,28 +106,37 @@ def stage_3_4_onnx_quantize(model_path: str, calibration_data_path: str):
         log_stream += f"Successfully quantized model to: {quantized_path}\n"
         return quantized_path, log_stream
     except Exception as e:
-        error_msg = f"Failed during ONNX conversion/quantization. Error: {e}"
         logging.error(error_msg, exc_info=True)
         raise RuntimeError(error_msg)
 def stage_3_4_gguf_quantize(model_id: str, quantization_strategy: str):
     log_stream = f"[STAGE 3 & 4] Converting to GGUF with '{quantization_strategy}' quantization...\n"
-    try:
-        run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
-        model_name = model_id.replace('/', '_')
-        gguf_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf")
-        os.makedirs(gguf_path, exist_ok=True)
-        main_export(model_id, output=os.path.join(gguf_path, "model.gguf"), export_format="gguf", quantization_strategy=quantization_strategy, trust_remote_code=True)
         log_stream += f"Successfully exported and quantized model to GGUF at: {gguf_path}\n"
         return gguf_path, log_stream
-    except Exception as e:
-        error_msg = f"Failed during GGUF conversion. Error: {e}"
-        logging.error(error_msg, exc_info=True)
         raise RuntimeError(error_msg)
 def stage_5_package_and_upload(model_id: str, optimized_model_path: str, pipeline_log: str, options: dict):
     log_stream = "[STAGE 5] Packaging and Uploading...\n"
     if not HF_TOKEN:

 import time
 import tempfile
 import shutil
+import subprocess
 from datetime import datetime
 from huggingface_hub import HfApi
 from transformers import AutoConfig, AutoModel, AutoTokenizer
+from optimum.onnxruntime import ORTQuantizer
 from optimum.onnxruntime.configuration import AutoQuantizationConfig
 import torch.nn.utils.prune as prune
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 def stage_3_4_onnx_quantize(model_path: str, calibration_data_path: str):
     log_stream = "[STAGE 3 & 4] Converting to ONNX and Quantizing...\n"
+    run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
+    model_name = os.path.basename(model_path)
+    onnx_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-onnx")
     try:
+        log_stream += "Executing `optimum-cli export onnx` via subprocess...\n"
+        export_command = [
+            "optimum-cli", "export", "onnx",
+            "--model", model_path,
+            "--trust-remote-code",
+            onnx_path
+        ]
+        process = subprocess.run(export_command, check=True, capture_output=True, text=True)
+        log_stream += process.stdout
+        if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
         log_stream += f"Successfully exported base model to ONNX at: {onnx_path}\n"
+    except subprocess.CalledProcessError as e:
+        error_msg = f"Failed during `optimum-cli export onnx`. Error:\n{e.stderr}"
+        logging.error(error_msg)
+        raise RuntimeError(error_msg)
+    try:
         quantizer = ORTQuantizer.from_pretrained(onnx_path)
         if calibration_data_path:
         log_stream += f"Successfully quantized model to: {quantized_path}\n"
         return quantized_path, log_stream
     except Exception as e:
+        error_msg = f"Failed during ONNX quantization step. Error: {e}"
         logging.error(error_msg, exc_info=True)
         raise RuntimeError(error_msg)
 def stage_3_4_gguf_quantize(model_id: str, quantization_strategy: str):
     log_stream = f"[STAGE 3 & 4] Converting to GGUF with '{quantization_strategy}' quantization...\n"
+    run_id = datetime.now().strftime("%Y%m%d-%H%M%S")
+    model_name = model_id.replace('/', '_')
+    gguf_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf")
+    os.makedirs(gguf_path, exist_ok=True)
+    output_file = os.path.join(gguf_path, "model.gguf")
+    try:
+        log_stream += "Executing `optimum-cli export gguf` via subprocess...\n"
+        export_command = [
+            "optimum-cli", "export", "gguf",
+            "--model", model_id,
+            "--quantization_strategy", quantization_strategy,
+            "--trust-remote-code",
+            output_file
+        ]
+        process = subprocess.run(export_command, check=True, capture_output=True, text=True)
+        log_stream += process.stdout
+        if process.stderr: log_stream += f"[STDERR]\n{process.stderr}\n"
         log_stream += f"Successfully exported and quantized model to GGUF at: {gguf_path}\n"
         return gguf_path, log_stream
+    except subprocess.CalledProcessError as e:
+        error_msg = f"Failed during `optimum-cli export gguf`. Error:\n{e.stderr}"
+        logging.error(error_msg)
         raise RuntimeError(error_msg)
 def stage_5_package_and_upload(model_id: str, optimized_model_path: str, pipeline_log: str, options: dict):
     log_stream = "[STAGE 5] Packaging and Uploading...\n"
     if not HF_TOKEN: