Spaces:
Running
Running
File size: 1,099 Bytes
89e8242 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 | import os
from optimum.onnxruntime import ORTModelForSequenceClassification
from transformers import AutoTokenizer
def optimize_neural_model():
model_id = "Hello-SimpleAI/chatgpt-detector-roberta"
save_dir = "pt_models/onnx_neural"
print(f"--- Optimizing {model_id} for CPU ---")
if not os.path.exists(save_dir):
os.makedirs(save_dir)
print("[1/3] Exporting to ONNX...")
# This exports to ONNX and also performs graph optimization (operator fusion, etc.)
model = ORTModelForSequenceClassification.from_pretrained(
model_id,
export=True
)
tokenizer = AutoTokenizer.from_pretrained(model_id)
print("[2/3] Saving Optimized Model...")
model.save_pretrained(save_dir)
tokenizer.save_pretrained(save_dir)
print(f"[3/3] DONE! Optimized model saved to {save_dir}")
print("\nNote: Quantization to INT8 would require a calibration dataset.")
print("For now, this FP32 optimized ONNX will already give a significant speedup on CPU.")
if __name__ == "__main__":
optimize_neural_model()
|