broadfield-dev commited on
Commit
7e81d62
·
verified ·
1 Parent(s): e002429

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -10,8 +10,8 @@ from huggingface_hub import HfApi
10
  from transformers import AutoConfig, AutoModel, AutoTokenizer
11
  from optimum.onnxruntime import ORTQuantizer, ORTModelForCausalLM
12
  from optimum.onnxruntime.configuration import AutoQuantizationConfig
13
- from optimum.exporters.onnx import main_export as onnx_export
14
- from optimum.exporters.gguf import main_export as gguf_export
15
  import torch.nn.utils.prune as prune
16
 
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -67,7 +67,7 @@ def stage_3_4_onnx_quantize(model_path: str, calibration_data_path: str):
67
  model_name = os.path.basename(model_path)
68
  onnx_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-onnx")
69
 
70
- onnx_export(model_path, output=onnx_path, task="auto", trust_remote_code=True)
71
  log_stream += f"Successfully exported base model to ONNX at: {onnx_path}\n"
72
 
73
  quantizer = ORTQuantizer.from_pretrained(onnx_path)
@@ -105,7 +105,7 @@ def stage_3_4_gguf_quantize(model_id: str, quantization_strategy: str):
105
  gguf_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf")
106
  os.makedirs(gguf_path, exist_ok=True)
107
 
108
- gguf_export(model_id, output=os.path.join(gguf_path, "model.gguf"), quantization_strategy=quantization_strategy, trust_remote_code=True)
109
 
110
  log_stream += f"Successfully exported and quantized model to GGUF at: {gguf_path}\n"
111
  return gguf_path, log_stream
 
10
  from transformers import AutoConfig, AutoModel, AutoTokenizer
11
  from optimum.onnxruntime import ORTQuantizer, ORTModelForCausalLM
12
  from optimum.onnxruntime.configuration import AutoQuantizationConfig
13
+ # Use the unified optimum.main_export entrypoint
14
+ from optimum.exporters.main import main_export
15
  import torch.nn.utils.prune as prune
16
 
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
67
  model_name = os.path.basename(model_path)
68
  onnx_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-onnx")
69
 
70
+ main_export(model_path, output=onnx_path, task="auto", trust_remote_code=True)
71
  log_stream += f"Successfully exported base model to ONNX at: {onnx_path}\n"
72
 
73
  quantizer = ORTQuantizer.from_pretrained(onnx_path)
 
105
  gguf_path = os.path.join(OUTPUT_DIR, f"{model_name}-{run_id}-gguf")
106
  os.makedirs(gguf_path, exist_ok=True)
107
 
108
+ main_export(model_id, output=os.path.join(gguf_path, "model.gguf"), export_format="gguf", quantization_strategy=quantization_strategy, trust_remote_code=True)
109
 
110
  log_stream += f"Successfully exported and quantized model to GGUF at: {gguf_path}\n"
111
  return gguf_path, log_stream