Spaces:

optimum
/

neuron-exporter

Running

App Files Files Community

badaoui HF Staff commited on Sep 10, 2025

Commit

dc818fc

verified ·

1 Parent(s): a081d41

Update optimum_neuron_export.py

Browse files

Files changed (1) hide show

optimum_neuron_export.py +29 -1

optimum_neuron_export.py CHANGED Viewed

@@ -116,6 +116,33 @@ def get_default_inputs(task_or_pipeline: str) -> Dict[str, int]:
         # Default to text-based shapes
         return {"batch_size": 1, "sequence_length": 128}
 def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discussion"]:
     try:
         discussions = api.get_repo_discussions(repo_id=model_id)
@@ -140,6 +167,7 @@ def export_and_git_add(model_id: str, task_or_pipeline: str, model_type: str, fo
         raise Exception(f"❌ Unsupported task/pipeline: {task_or_pipeline}. Supported: {supported}")
     inputs = get_default_inputs(task_or_pipeline)
     yield f"🔧 Using default inputs: {inputs}"
     try:
@@ -149,7 +177,7 @@ def export_and_git_add(model_id: str, task_or_pipeline: str, model_type: str, fo
             tensor_parallel_size=1,
             token=HF_TOKEN,
             cpu_backend=True,
-            compiler_args="--target inf2"
             **inputs,
         )
         model.save_pretrained(folder)

         # Default to text-based shapes
         return {"batch_size": 1, "sequence_length": 128}
+def prepare_compiler_flags(
+    auto_cast: str | None = None,
+    auto_cast_type: str = "bf16",
+    optlevel: str = "2",
+    instance_type: str = "trn1",
+):
+    if auto_cast is not None:
+        logger.info(f"Using Neuron: --auto-cast {auto_cast}")
+        auto_cast = "matmult" if auto_cast == "matmul" else auto_cast
+        compiler_args = ["--auto-cast", auto_cast]
+        logger.info(f"Using Neuron: --auto-cast-type {auto_cast_type}")
+        compiler_args.extend(["--auto-cast-type", auto_cast_type])
+    else:
+        compiler_args = ["--auto-cast", "none"]
+    compiler_args.extend(["--optlevel", optlevel])
+    logger.info(f"Using Neuron: --optlevel {optlevel}")
+    if instance_type == "trn2":
+        compiler_args.extend(["--target", "trn2"])
+    elif instance_type == "trn1":
+        compiler_args.extend(["--target", "trn1"])
+    compiler_args_str = " ".join(compiler_args)
+    return compiler_args_str
 def previous_pr(api: "HfApi", model_id: str, pr_title: str) -> Optional["Discussion"]:
     try:
         discussions = api.get_repo_discussions(repo_id=model_id)
         raise Exception(f"❌ Unsupported task/pipeline: {task_or_pipeline}. Supported: {supported}")
     inputs = get_default_inputs(task_or_pipeline)
+    compiler_args = prepare_compiler_flags(auto_cast, auto_cast_type, optlevel, instance_type)
     yield f"🔧 Using default inputs: {inputs}"
     try:
             tensor_parallel_size=1,
             token=HF_TOKEN,
             cpu_backend=True,
+            compiler_args=compiler_args,
             **inputs,
         )
         model.save_pretrained(folder)