Ubuntu
commited on
Commit
·
5ee43e9
1
Parent(s):
06d3040
tests
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- torch_compile/flux/test_clip_text_encoder.py.py +80 -0
- torch_compile/flux/test_flux_transformer.py +73 -0
- torch_compile/flux/test_t5_text_encoder.py +72 -0
- torch_compile/flux/test_vae_decoder.py +84 -0
- torch_compile/run_albert.py +63 -0
- torch_compile/run_ast.py +77 -0
- torch_compile/run_beit.py +81 -0
- torch_compile/run_bert.py +62 -0
- torch_compile/run_camembert.py +71 -0
- torch_compile/run_clip.py +76 -0
- torch_compile/run_convbert.py +72 -0
- torch_compile/run_convnext.py +72 -0
- torch_compile/run_convnextv2.py +72 -0
- torch_compile/run_cvt.py +72 -0
- torch_compile/run_deberta.py +72 -0
- torch_compile/run_deberta_v3.py +72 -0
- torch_compile/run_deit.py +71 -0
- torch_compile/run_distillbert.py +67 -0
- torch_compile/run_donutswin.py +76 -0
- torch_compile/run_dpt.py +66 -0
- torch_compile/run_electra.py +67 -0
- torch_compile/run_esm.py +67 -0
- torch_compile/run_flaubert.py +97 -0
- torch_compile/run_hubert.py +85 -0
- torch_compile/run_levit.py +70 -0
- torch_compile/run_mobilebert.py +67 -0
- torch_compile/run_mobilenetv2.py +71 -0
- torch_compile/run_mobilevit.py +70 -0
- torch_compile/run_modernbert.py +66 -0
- torch_compile/run_mpnet.py +62 -0
- torch_compile/run_phi.py +77 -0
- torch_compile/run_phi3.py +86 -0
- torch_compile/run_roberta.py +67 -0
- torch_compile/run_roformer.py +67 -0
- torch_compile/run_sam2.py +56 -0
- torch_compile/run_swin.py +76 -0
- torch_compile/run_t5_decoder.py +66 -0
- torch_compile/run_t5_encoder.py +59 -0
- torch_compile/run_unispeech.py +62 -0
- torch_compile/run_unispeech_sat.py +67 -0
- torch_compile/run_vit.py +64 -0
- torch_compile/run_wav2vec2.py +73 -0
- torch_compile/run_whisper.py +91 -0
- torch_compile/run_xlm.py +0 -0
- torch_compile/run_xlm_roberta.py +0 -0
- torch_compile/run_yolos.py +83 -0
- torch_compile/torch_neuronx_dump/0123150520_241/offloaded_ops.txt +1 -0
- torch_compile/torch_neuronx_dump/0123150520_241/used_ops.txt +8 -0
- torch_compile/torch_neuronx_dump/0123154351_1091/offloaded_ops.txt +1 -0
- torch_compile/torch_neuronx_dump/0123154351_1091/used_ops.txt +8 -0
torch_compile/flux/test_clip_text_encoder.py.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
CLIP (Flux variant) zero-shot image-classification on Neuron.
|
| 4 |
+
Flux pipeline uses: openai/clip-vit-large-patch14
|
| 5 |
+
"""
|
| 6 |
+
import argparse
|
| 7 |
+
import logging
|
| 8 |
+
import time
|
| 9 |
+
|
| 10 |
+
import torch
|
| 11 |
+
from transformers import CLIPProcessor, CLIPModel
|
| 12 |
+
from datasets import load_dataset
|
| 13 |
+
import torch_neuronx # noqa: F401 guarantees Neuron backend
|
| 14 |
+
|
| 15 |
+
logging.basicConfig(level=logging.INFO)
|
| 16 |
+
logger = logging.getLogger(__name__)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
def main():
|
| 20 |
+
parser = argparse.ArgumentParser(
|
| 21 |
+
description="CLIP (Flux checkpoint) zero-shot image classification with torch.compile on Neuron"
|
| 22 |
+
)
|
| 23 |
+
parser.add_argument(
|
| 24 |
+
"--model",
|
| 25 |
+
type=str,
|
| 26 |
+
default="openai/clip-vit-large-patch14", # Flux CLIP checkpoint
|
| 27 |
+
help="CLIP model name on Hugging Face Hub",
|
| 28 |
+
)
|
| 29 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 30 |
+
args = parser.parse_args()
|
| 31 |
+
|
| 32 |
+
torch.set_default_dtype(torch.float32)
|
| 33 |
+
torch.manual_seed(42)
|
| 34 |
+
|
| 35 |
+
# Load dataset and pick an image
|
| 36 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 37 |
+
image = dataset["test"]["image"][0]
|
| 38 |
+
|
| 39 |
+
# Load processor and model (Flux CLIP checkpoint)
|
| 40 |
+
processor = CLIPProcessor.from_pretrained(args.model)
|
| 41 |
+
model = CLIPModel.from_pretrained(
|
| 42 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 43 |
+
).eval()
|
| 44 |
+
|
| 45 |
+
# Zero-shot labels
|
| 46 |
+
texts = ["a photo of a cat", "a photo of a dog", "a photo of a bird"]
|
| 47 |
+
inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
|
| 48 |
+
|
| 49 |
+
# Pre-run once to freeze shapes before compilation
|
| 50 |
+
with torch.no_grad():
|
| 51 |
+
outputs = model(**inputs)
|
| 52 |
+
|
| 53 |
+
# Compile forward pass (allow graph breaks for big model)
|
| 54 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=False)
|
| 55 |
+
|
| 56 |
+
# Warmup
|
| 57 |
+
warmup_start = time.time()
|
| 58 |
+
with torch.no_grad():
|
| 59 |
+
_ = model(**inputs)
|
| 60 |
+
warmup_time = time.time() - warmup_start
|
| 61 |
+
|
| 62 |
+
# Actual run
|
| 63 |
+
run_start = time.time()
|
| 64 |
+
with torch.no_grad():
|
| 65 |
+
outputs = model(**inputs)
|
| 66 |
+
run_time = time.time() - run_start
|
| 67 |
+
|
| 68 |
+
# Compute probabilities
|
| 69 |
+
logits_per_image = outputs.logits_per_image # [B, num_texts]
|
| 70 |
+
probs = logits_per_image.softmax(dim=-1)
|
| 71 |
+
best_idx = int(probs.argmax().item())
|
| 72 |
+
best_label = texts[best_idx]
|
| 73 |
+
|
| 74 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 75 |
+
logger.info("Probabilities: %s", probs.tolist())
|
| 76 |
+
logger.info("Predicted label: %s", best_label)
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
if __name__ == "__main__":
|
| 80 |
+
main()
|
torch_compile/flux/test_flux_transformer.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# torchrun --nproc_per_node=8 test_flux_transformer.py
|
| 2 |
+
import os, time, argparse, logging, torch, torch.distributed as dist
|
| 3 |
+
from torch.distributed.device_mesh import DeviceMesh
|
| 4 |
+
from torch.distributed.tensor.parallel import (
|
| 5 |
+
ColwiseParallel, RowwiseParallel, PrepareModuleInput, parallelize_module
|
| 6 |
+
)
|
| 7 |
+
from diffusers import FluxTransformer2DModel
|
| 8 |
+
import torch_neuronx
|
| 9 |
+
logging.basicConfig(level=logging.INFO)
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
def apply_tp_flux(transformer: torch.nn.Module, tp_mesh: DeviceMesh):
|
| 13 |
+
# embed & final-norm replicated
|
| 14 |
+
plan = {"x_embedder": None, "norm_out": None}
|
| 15 |
+
parallelize_module(transformer, tp_mesh, plan)
|
| 16 |
+
|
| 17 |
+
# inside each transformer block
|
| 18 |
+
for block in transformer.transformer_blocks:
|
| 19 |
+
blk = {
|
| 20 |
+
"norm1": None,
|
| 21 |
+
"norm1_k": None,
|
| 22 |
+
"attn.qkv": ColwiseParallel(),
|
| 23 |
+
"attn.proj": RowwiseParallel(output_layouts=Replicate()),
|
| 24 |
+
"attn.norm_q": None,
|
| 25 |
+
"attn.norm_k": None,
|
| 26 |
+
"ffn.net.0": ColwiseParallel(), # gate
|
| 27 |
+
"ffn.net.2": RowwiseParallel(output_layouts=Replicate()),
|
| 28 |
+
}
|
| 29 |
+
parallelize_module(block, tp_mesh, blk)
|
| 30 |
+
return transformer
|
| 31 |
+
|
| 32 |
+
def main():
|
| 33 |
+
dist.init_process_group(backend="neuron")
|
| 34 |
+
rank = dist.get_rank()
|
| 35 |
+
device = torch.device(f"neuron:{rank}")
|
| 36 |
+
tp_mesh = DeviceMesh("neuron", list(range(dist.get_world_size())))
|
| 37 |
+
|
| 38 |
+
parser = argparse.ArgumentParser()
|
| 39 |
+
parser.add_argument("--model", default="black-forest-labs/FLUX.1-dev/transformer")
|
| 40 |
+
parser.add_argument("--seq-len", type=int, default=4096)
|
| 41 |
+
parser.add_argument("--dim", type=int, default=3072)
|
| 42 |
+
args = parser.parse_args()
|
| 43 |
+
|
| 44 |
+
# create on CPU, real tensors
|
| 45 |
+
with torch.device("cpu"):
|
| 46 |
+
transformer = FluxTransformer2DModel.from_pretrained(
|
| 47 |
+
args.model, torch_dtype=torch.bfloat16, attn_implementation="eager"
|
| 48 |
+
).eval()
|
| 49 |
+
|
| 50 |
+
transformer = apply_tp_flux(transformer, tp_mesh)
|
| 51 |
+
# move local shards to Neuron
|
| 52 |
+
for p in transformer.parameters():
|
| 53 |
+
if isinstance(p, DTensor):
|
| 54 |
+
p._local_tensor = p._local_tensor.to(device, dtype=torch.bfloat16)
|
| 55 |
+
else:
|
| 56 |
+
p.data = p.data.to(device, dtype=torch.bfloat16)
|
| 57 |
+
|
| 58 |
+
transformer = torch.compile(transformer, backend="neuron", fullgraph=False)
|
| 59 |
+
|
| 60 |
+
batch = 1
|
| 61 |
+
hidden = torch.randn(batch, args.seq_len, args.dim, dtype=torch.bfloat16, device=device)
|
| 62 |
+
encoder_hidden = torch.randn(batch, args.seq_len, 4096, dtype=torch.bfloat16, device=device)
|
| 63 |
+
timestep = torch.tensor([500], dtype=torch.int64, device=device)
|
| 64 |
+
|
| 65 |
+
with torch.no_grad():
|
| 66 |
+
_ = transformer(hidden=hidden, encoder_hidden=encoder_hidden, timestep=timestep)
|
| 67 |
+
t0 = time.time()
|
| 68 |
+
out = transformer(hidden=hidden, encoder_hidden=encoder_hidden, timestep=timestep)
|
| 69 |
+
logger.info("Rank %d Flux-TFM latency: %.3f ms shape: %s",
|
| 70 |
+
rank, (time.time()-t0)*1000, out.sample.shape)
|
| 71 |
+
|
| 72 |
+
if __name__ == "__main__":
|
| 73 |
+
main()
|
torch_compile/flux/test_t5_text_encoder.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# torchrun --nproc_per_node=4 test_t5_text_encoder.py
|
| 2 |
+
import os, time, argparse, logging, torch, torch.distributed as dist
|
| 3 |
+
from torch.distributed.device_mesh import DeviceMesh
|
| 4 |
+
from torch.distributed.tensor.parallel import (
|
| 5 |
+
ColwiseParallel, RowwiseParallel, PrepareModuleInput, parallelize_module
|
| 6 |
+
)
|
| 7 |
+
from transformers import T5EncoderModel, AutoTokenizer
|
| 8 |
+
from torchtitan.models.t5 import T5Encoder # or transformers T5EncoderModel
|
| 9 |
+
import torch_neuronx
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
def apply_tp_t5(encoder: torch.nn.Module, tp_mesh: DeviceMesh):
|
| 14 |
+
# encoder.embed_tokens already replicated
|
| 15 |
+
plan = {
|
| 16 |
+
"embed_tokens": None, # replicate
|
| 17 |
+
"encoder.block": None, # we will loop inside
|
| 18 |
+
}
|
| 19 |
+
parallelize_module(encoder, tp_mesh, plan)
|
| 20 |
+
|
| 21 |
+
# shard every dense layer inside each encoder block
|
| 22 |
+
for layer in encoder.encoder.block:
|
| 23 |
+
layer_plan = {
|
| 24 |
+
"layer.0.SelfAttention.q": ColwiseParallel(),
|
| 25 |
+
"layer.0.SelfAttention.k": ColwiseParallel(),
|
| 26 |
+
"layer.0.SelfAttention.v": ColwiseParallel(),
|
| 27 |
+
"layer.0.SelfAttention.o": RowwiseParallel(output_layouts=Replicate()),
|
| 28 |
+
"layer.0.dense": ColwiseParallel(),
|
| 29 |
+
"layer.1.dense": RowwiseParallel(output_layouts=Replicate()),
|
| 30 |
+
}
|
| 31 |
+
parallelize_module(layer, tp_mesh, layer_plan)
|
| 32 |
+
return encoder
|
| 33 |
+
|
| 34 |
+
def main():
|
| 35 |
+
dist.init_process_group(backend="neuron")
|
| 36 |
+
rank = dist.get_rank()
|
| 37 |
+
device = torch.device(f"neuron:{rank}")
|
| 38 |
+
tp_mesh = DeviceMesh("neuron", list(range(dist.get_world_size())))
|
| 39 |
+
|
| 40 |
+
parser = argparse.ArgumentParser()
|
| 41 |
+
parser.add_argument("--model", default="google/t5-v1_1-xxl")
|
| 42 |
+
parser.add_argument("--seq-len", type=int, default=512)
|
| 43 |
+
args = parser.parse_args()
|
| 44 |
+
|
| 45 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 46 |
+
# create model on CPU, real tensors
|
| 47 |
+
with torch.device("cpu"):
|
| 48 |
+
encoder = T5EncoderModel.from_pretrained(args.model, attn_implementation="eager").eval()
|
| 49 |
+
|
| 50 |
+
encoder = apply_tp_t5(encoder, tp_mesh)
|
| 51 |
+
# move local shards to Neuron
|
| 52 |
+
for p in encoder.parameters():
|
| 53 |
+
if isinstance(p, DTensor):
|
| 54 |
+
p._local_tensor = p._local_tensor.to(device)
|
| 55 |
+
else:
|
| 56 |
+
p.data = p.data.to(device)
|
| 57 |
+
|
| 58 |
+
encoder = torch.compile(encoder, backend="neuron", fullgraph=False)
|
| 59 |
+
|
| 60 |
+
text = ["a photo of a cat"]
|
| 61 |
+
txt_in = tokenizer(text, max_length=args.seq_len, padding="max_length", return_tensors="pt")
|
| 62 |
+
input_ids = txt_in.input_ids.to(device)
|
| 63 |
+
|
| 64 |
+
with torch.no_grad():
|
| 65 |
+
_ = encoder(input_ids) # compile
|
| 66 |
+
t0 = time.time()
|
| 67 |
+
out = encoder(input_ids).last_hidden_state
|
| 68 |
+
logger.info("Rank %d T5-XXL enc latency: %.3f ms shape: %s",
|
| 69 |
+
rank, (time.time()-t0)*1000, out.shape) # [1, seq_len, 4096]
|
| 70 |
+
|
| 71 |
+
if __name__ == "__main__":
|
| 72 |
+
main()
|
torch_compile/flux/test_vae_decoder.py
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Flux VAE decoder (16-ch latent → RGB image) on Neuron.
|
| 4 |
+
Checkpoint: black-forest-labs/FLUX.1-dev/vae
|
| 5 |
+
"""
|
| 6 |
+
import argparse
|
| 7 |
+
import logging
|
| 8 |
+
import time
|
| 9 |
+
from pathlib import Path
|
| 10 |
+
|
| 11 |
+
import torch
|
| 12 |
+
from diffusers import AutoencoderKL
|
| 13 |
+
import torch_neuronx # noqa: F401 guarantees Neuron backend
|
| 14 |
+
from PIL import Image
|
| 15 |
+
|
| 16 |
+
logging.basicConfig(level=logging.INFO)
|
| 17 |
+
logger = logging.getLogger(__name__)
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
def main():
|
| 21 |
+
parser = argparse.ArgumentParser(
|
| 22 |
+
description="Flux VAE decoder (latent → image) with torch.compile on Neuron"
|
| 23 |
+
)
|
| 24 |
+
parser.add_argument(
|
| 25 |
+
"--model",
|
| 26 |
+
type=str,
|
| 27 |
+
# default="black-forest-labs/FLUX.1-dev/vae",
|
| 28 |
+
default="/workspace/flux_weight/",
|
| 29 |
+
help="Flux VAE checkpoint on Hugging Face Hub",
|
| 30 |
+
)
|
| 31 |
+
parser.add_argument("--latent-ch", type=int, default=16, help="Latent channels (Flux=16)")
|
| 32 |
+
parser.add_argument("--scale", type=int, default=32, help="Latent spatial size (256 px / 8)")
|
| 33 |
+
parser.add_argument("--output", type=str, default="flux_vae_out.png", help="Output image path")
|
| 34 |
+
args = parser.parse_args()
|
| 35 |
+
|
| 36 |
+
torch.set_default_dtype(torch.float32)
|
| 37 |
+
torch.manual_seed(42)
|
| 38 |
+
|
| 39 |
+
# Load Flux VAE decoder
|
| 40 |
+
vae = AutoencoderKL.from_pretrained(args.model, subfolder="vae", torch_dtype=torch.float32).eval()
|
| 41 |
+
|
| 42 |
+
# Create dummy latent (bfloat16, N(0,1)) - shape: [B, 16, H/8, W/8]
|
| 43 |
+
latent = torch.randn(1, args.latent_ch, args.scale, args.scale, dtype=torch.float32)
|
| 44 |
+
|
| 45 |
+
# Pre-run once to freeze shapes before compilation
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
_ = vae.decode(latent).sample
|
| 48 |
+
|
| 49 |
+
# Compile decode function (allow graph breaks for big kernels)
|
| 50 |
+
decode_fn = torch.compile(vae.decode, backend="neuron", fullgraph=True)
|
| 51 |
+
|
| 52 |
+
# Warmup
|
| 53 |
+
warmup_start = time.time()
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
_ = decode_fn(latent)
|
| 56 |
+
warmup_time = time.time() - warmup_start
|
| 57 |
+
|
| 58 |
+
# Actual run
|
| 59 |
+
run_start = time.time()
|
| 60 |
+
with torch.no_grad():
|
| 61 |
+
image = decode_fn(latent).sample
|
| 62 |
+
run_time = time.time() - run_start
|
| 63 |
+
|
| 64 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 65 |
+
logger.info("VAE output shape: %s", image.shape) # [1, 3, H, W]
|
| 66 |
+
|
| 67 |
+
# Convert to PIL and save
|
| 68 |
+
image = (image / 2 + 0.5).clamp(0, 1) # scale to [0,1]
|
| 69 |
+
image = image.cpu().float()
|
| 70 |
+
Image.fromarray((image[0].permute(1, 2, 0).numpy() * 255).astype("uint8")).save(args.output)
|
| 71 |
+
logger.info("Saved decoded image to %s", Path(args.output).resolve())
|
| 72 |
+
|
| 73 |
+
|
| 74 |
+
if __name__ == "__main__":
|
| 75 |
+
main()
|
| 76 |
+
|
| 77 |
+
"""
|
| 78 |
+
The compilation process took more than 2 hours.
|
| 79 |
+
/usr/local/lib/python3.10/site-packages/torch_mlir/dialects/stablehlo/__init__.py:24: UserWarning: Could not import StableHLO C++ extension: libStablehloUnifiedPythonCAPI.so.22.0git: cannot open shared object file: No such file or directory
|
| 80 |
+
warnings.warn(f"Could not import StableHLO C++ extension: {e}")
|
| 81 |
+
INFO:__main__:Warmup: 4010.52 s, Run: 22.5420 s
|
| 82 |
+
INFO:__main__:VAE output shape: torch.Size([1, 3, 256, 256])
|
| 83 |
+
INFO:__main__:Saved decoded image to /workspace/torch_neuron_samples/torch-neuron-samples/scripts/torch_compile/flux/flux_vae_out.png
|
| 84 |
+
"""
|
torch_compile/run_albert.py
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoTokenizer, AlbertForSequenceClassification
|
| 7 |
+
|
| 8 |
+
import torch_neuronx # ensure Neuron backend is available
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
parser = argparse.ArgumentParser(description="Run ALBERT on Neuron")
|
| 16 |
+
parser.add_argument(
|
| 17 |
+
"--model", type=str, default="albert-base-v2", help="ALBERT model name"
|
| 18 |
+
)
|
| 19 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 20 |
+
args = parser.parse_args()
|
| 21 |
+
|
| 22 |
+
torch.set_default_dtype(torch.float32)
|
| 23 |
+
torch.manual_seed(42)
|
| 24 |
+
|
| 25 |
+
# Load ALBERT model and tokenizer
|
| 26 |
+
model = AlbertForSequenceClassification.from_pretrained(
|
| 27 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 28 |
+
)
|
| 29 |
+
model.eval()
|
| 30 |
+
|
| 31 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 32 |
+
inputs = tokenizer(
|
| 33 |
+
"Hamilton is considered to be the best musical of human history.",
|
| 34 |
+
return_tensors="pt"
|
| 35 |
+
)
|
| 36 |
+
|
| 37 |
+
# Pre-run once to fix shapes before compilation
|
| 38 |
+
with torch.no_grad():
|
| 39 |
+
_ = model(**inputs).logits
|
| 40 |
+
|
| 41 |
+
# Compile forward pass
|
| 42 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 43 |
+
|
| 44 |
+
# Warmup
|
| 45 |
+
warmup_start = time.time()
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
_ = model(**inputs)
|
| 48 |
+
warmup_time = time.time() - warmup_start
|
| 49 |
+
|
| 50 |
+
# Actual run
|
| 51 |
+
run_start = time.time()
|
| 52 |
+
with torch.no_grad():
|
| 53 |
+
logits = model(**inputs).logits
|
| 54 |
+
run_time = time.time() - run_start
|
| 55 |
+
predicted_class_id = logits.argmax().item()
|
| 56 |
+
predicted_class_label = model.config.id2label[predicted_class_id]
|
| 57 |
+
|
| 58 |
+
logger.info(f"Warmup: {warmup_time:.2f}s, Run: {run_time:.4f}s")
|
| 59 |
+
logger.info(f"Output label: {predicted_class_label}")
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
if __name__ == "__main__":
|
| 63 |
+
main()
|
torch_compile/run_ast.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoFeatureExtractor, ASTForAudioClassification
|
| 7 |
+
from datasets import load_dataset
|
| 8 |
+
|
| 9 |
+
import torch_neuronx # ensure Neuron backend is available
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="Run AST (Audio Spectrogram Transformer) on Neuron")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="MIT/ast-finetuned-audioset-10-10-0.4593",
|
| 21 |
+
help="AST model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# Load dataset and extract features
|
| 30 |
+
dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
| 31 |
+
dataset = dataset.sort("id")
|
| 32 |
+
sampling_rate = dataset.features["audio"].sampling_rate
|
| 33 |
+
|
| 34 |
+
feature_extractor = AutoFeatureExtractor.from_pretrained(args.model)
|
| 35 |
+
inputs = feature_extractor(
|
| 36 |
+
dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt"
|
| 37 |
+
)
|
| 38 |
+
|
| 39 |
+
# Load AST model
|
| 40 |
+
model = ASTForAudioClassification.from_pretrained(
|
| 41 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 42 |
+
)
|
| 43 |
+
model.eval()
|
| 44 |
+
|
| 45 |
+
# Pre-run once to fix shapes before compilation
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
logits = model(**inputs).logits
|
| 48 |
+
|
| 49 |
+
# Compile forward pass
|
| 50 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 51 |
+
|
| 52 |
+
# Warmup
|
| 53 |
+
warmup_start = time.time()
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
_ = model(**inputs)
|
| 56 |
+
warmup_time = time.time() - warmup_start
|
| 57 |
+
|
| 58 |
+
# Actual run
|
| 59 |
+
run_start = time.time()
|
| 60 |
+
with torch.no_grad():
|
| 61 |
+
logits = model(**inputs).logits
|
| 62 |
+
run_time = time.time() - run_start
|
| 63 |
+
|
| 64 |
+
# Decode result
|
| 65 |
+
predicted_class_ids = torch.argmax(logits, dim=-1).item()
|
| 66 |
+
predicted_label = model.config.id2label[predicted_class_ids]
|
| 67 |
+
|
| 68 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 69 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
if __name__ == "__main__":
|
| 73 |
+
main()
|
| 74 |
+
|
| 75 |
+
"""
|
| 76 |
+
Works
|
| 77 |
+
"""
|
torch_compile/run_beit.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoImageProcessor, BeitForImageClassification
|
| 7 |
+
from datasets import load_dataset
|
| 8 |
+
|
| 9 |
+
import torch_neuronx # ensure Neuron backend is available
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="Run BEiT on Neuron")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="microsoft/beit-base-patch16-224-pt22k",
|
| 21 |
+
help="BEiT model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# Load image
|
| 30 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 31 |
+
image = dataset["test"]["image"][0]
|
| 32 |
+
|
| 33 |
+
# Load processor and model
|
| 34 |
+
image_processor = AutoImageProcessor.from_pretrained(args.model)
|
| 35 |
+
model = BeitForImageClassification.from_pretrained(
|
| 36 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 37 |
+
)
|
| 38 |
+
model.eval()
|
| 39 |
+
|
| 40 |
+
# Preprocess
|
| 41 |
+
inputs = image_processor(image, return_tensors="pt")
|
| 42 |
+
|
| 43 |
+
# Pre-run once to fix shapes before compilation
|
| 44 |
+
with torch.no_grad():
|
| 45 |
+
logits = model(**inputs).logits
|
| 46 |
+
|
| 47 |
+
# Compile forward pass
|
| 48 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 49 |
+
|
| 50 |
+
# Warmup
|
| 51 |
+
warmup_start = time.time()
|
| 52 |
+
with torch.no_grad():
|
| 53 |
+
_ = model(**inputs)
|
| 54 |
+
warmup_time = time.time() - warmup_start
|
| 55 |
+
|
| 56 |
+
# Actual run
|
| 57 |
+
run_start = time.time()
|
| 58 |
+
with torch.no_grad():
|
| 59 |
+
logits = model(**inputs).logits
|
| 60 |
+
run_time = time.time() - run_start
|
| 61 |
+
|
| 62 |
+
# Predicted ImageNet class
|
| 63 |
+
predicted_label = logits.argmax(-1).item()
|
| 64 |
+
label_str = model.config.id2label[predicted_label]
|
| 65 |
+
|
| 66 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 67 |
+
logger.info("Predicted label: %s", label_str)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
if __name__ == "__main__":
|
| 71 |
+
main()
|
| 72 |
+
"""
|
| 73 |
+
root@d90ba90f3d81:/workspace/torch_neuron_samples/torch-neuron-samples/scripts/tests# torch-mlir-opt -pass-pipeline='builtin.module(torch-backend-to-stablehlo-backend-pipeline)' /tmp/UnnammedModule.mlir
|
| 74 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/beit/modeling_beit.py:593:0: error: failed to legalize operation 'torch.aten.fill.Tensor'
|
| 75 |
+
/usr/local/lib/python3.10/site-packages/transformers/pytorch_utils.py:361:0: note: called from
|
| 76 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/beit/modeling_beit.py:625:0: note: called from
|
| 77 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/beit/modeling_beit.py:688:0: note: called from
|
| 78 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/beit/modeling_beit.py:824:0: note: called from
|
| 79 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/beit/modeling_beit.py:1007:0: note: called from
|
| 80 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/beit/modeling_beit.py:593:0: note: see current operation: %733 = "torch.aten.fill.Tensor"(%732, %524) : (!torch.vtensor<[197],si64>, !torch.vtensor<[],si64>) -> !torch.vtensor<[197],si64>
|
| 81 |
+
"""
|
torch_compile/run_bert.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoTokenizer, BertForSequenceClassification
|
| 7 |
+
|
| 8 |
+
import torch_neuronx
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
parser = argparse.ArgumentParser(description="Run Bert on Neuron")
|
| 16 |
+
parser.add_argument(
|
| 17 |
+
"--model", type=str, default="google-bert/bert-base-uncased", help="Bert model name"
|
| 18 |
+
)
|
| 19 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 20 |
+
args = parser.parse_args()
|
| 21 |
+
|
| 22 |
+
torch.set_default_dtype(torch.float32)
|
| 23 |
+
torch.manual_seed(42)
|
| 24 |
+
|
| 25 |
+
model = BertForSequenceClassification.from_pretrained(
|
| 26 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 27 |
+
)
|
| 28 |
+
model.eval()
|
| 29 |
+
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 31 |
+
inputs = tokenizer("Hamilton is considered to be the best musical of human history.", return_tensors="pt")
|
| 32 |
+
|
| 33 |
+
# Run once to establish shapes before compile
|
| 34 |
+
with torch.no_grad():
|
| 35 |
+
logits = model(**inputs).logits
|
| 36 |
+
|
| 37 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 38 |
+
|
| 39 |
+
# Warmup
|
| 40 |
+
warmup_start = time.time()
|
| 41 |
+
with torch.no_grad():
|
| 42 |
+
logits = model(**inputs)
|
| 43 |
+
warmup_time = time.time() - warmup_start
|
| 44 |
+
|
| 45 |
+
# Run
|
| 46 |
+
run_start = time.time()
|
| 47 |
+
with torch.no_grad():
|
| 48 |
+
logits = model(**inputs).logits
|
| 49 |
+
run_time = time.time() - run_start
|
| 50 |
+
predicted_class_id = logits.argmax().item()
|
| 51 |
+
predicted_class_label = model.config.id2label[predicted_class_id]
|
| 52 |
+
|
| 53 |
+
logger.info(f"Warmup: {warmup_time:.2f}s, Run: {run_time:.4f}s")
|
| 54 |
+
logger.info(f"Output label: {predicted_class_label}")
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
if __name__ == "__main__":
|
| 58 |
+
main()
|
| 59 |
+
|
| 60 |
+
"""
|
| 61 |
+
Works
|
| 62 |
+
"""
|
torch_compile/run_camembert.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoTokenizer, CamembertForSequenceClassification
|
| 7 |
+
|
| 8 |
+
import torch_neuronx # ensure Neuron backend is available
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
parser = argparse.ArgumentParser(description="Run CamemBERT on Neuron")
|
| 16 |
+
parser.add_argument(
|
| 17 |
+
"--model",
|
| 18 |
+
type=str,
|
| 19 |
+
default="camembert-base",
|
| 20 |
+
help="CamemBERT model name on Hugging Face Hub",
|
| 21 |
+
)
|
| 22 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 23 |
+
args = parser.parse_args()
|
| 24 |
+
|
| 25 |
+
torch.set_default_dtype(torch.float32)
|
| 26 |
+
torch.manual_seed(42)
|
| 27 |
+
|
| 28 |
+
# Load tokenizer and model
|
| 29 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 30 |
+
model = CamembertForSequenceClassification.from_pretrained(
|
| 31 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 32 |
+
)
|
| 33 |
+
model.eval()
|
| 34 |
+
|
| 35 |
+
# Tokenize sample text
|
| 36 |
+
text = "CamemBERT est un modèle de langue français."
|
| 37 |
+
inputs = tokenizer(text, return_tensors="pt")
|
| 38 |
+
|
| 39 |
+
# Pre-run once to fix shapes before compilation
|
| 40 |
+
with torch.no_grad():
|
| 41 |
+
logits = model(**inputs).logits
|
| 42 |
+
|
| 43 |
+
# Compile forward pass
|
| 44 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 45 |
+
|
| 46 |
+
# Warmup
|
| 47 |
+
warmup_start = time.time()
|
| 48 |
+
with torch.no_grad():
|
| 49 |
+
_ = model(**inputs)
|
| 50 |
+
warmup_time = time.time() - warmup_start
|
| 51 |
+
|
| 52 |
+
# Actual run
|
| 53 |
+
run_start = time.time()
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
logits = model(**inputs).logits
|
| 56 |
+
run_time = time.time() - run_start
|
| 57 |
+
|
| 58 |
+
# Decode result
|
| 59 |
+
predicted_class_id = logits.argmax().item()
|
| 60 |
+
predicted_label = model.config.id2label[predicted_class_id]
|
| 61 |
+
|
| 62 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 63 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
main()
|
| 68 |
+
|
| 69 |
+
"""
|
| 70 |
+
Works
|
| 71 |
+
"""
|
torch_compile/run_clip.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import CLIPProcessor, CLIPModel
|
| 7 |
+
from datasets import load_dataset
|
| 8 |
+
import torch_neuronx # ensures Neuron backend is available
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
parser = argparse.ArgumentParser(
|
| 16 |
+
description="CLIP zero-shot image classification with torch.compile on Neuron"
|
| 17 |
+
)
|
| 18 |
+
parser.add_argument(
|
| 19 |
+
"--model",
|
| 20 |
+
type=str,
|
| 21 |
+
default="openai/clip-vit-base-patch32",
|
| 22 |
+
help="CLIP model name on Hugging Face Hub",
|
| 23 |
+
)
|
| 24 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 25 |
+
args = parser.parse_args()
|
| 26 |
+
|
| 27 |
+
torch.set_default_dtype(torch.float32)
|
| 28 |
+
torch.manual_seed(42)
|
| 29 |
+
|
| 30 |
+
# Load dataset and pick an image
|
| 31 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 32 |
+
image = dataset["test"]["image"][0]
|
| 33 |
+
|
| 34 |
+
# Load processor and model
|
| 35 |
+
processor = CLIPProcessor.from_pretrained(args.model)
|
| 36 |
+
model = CLIPModel.from_pretrained(
|
| 37 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 38 |
+
)
|
| 39 |
+
model.eval()
|
| 40 |
+
|
| 41 |
+
# Build zero-shot inputs
|
| 42 |
+
texts = ["a photo of a cat", "a photo of a dog", "a photo of a bird"]
|
| 43 |
+
inputs = processor(text=texts, images=image, return_tensors="pt", padding=True)
|
| 44 |
+
|
| 45 |
+
# Pre-run once to fix shapes before compilation
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
outputs = model(**inputs)
|
| 48 |
+
|
| 49 |
+
# Compile forward pass (allow graph breaks to avoid instruction-limit)
|
| 50 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=False)
|
| 51 |
+
|
| 52 |
+
# Warmup
|
| 53 |
+
warmup_start = time.time()
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
_ = model(**inputs)
|
| 56 |
+
warmup_time = time.time() - warmup_start
|
| 57 |
+
|
| 58 |
+
# Actual run
|
| 59 |
+
run_start = time.time()
|
| 60 |
+
with torch.no_grad():
|
| 61 |
+
outputs = model(**inputs)
|
| 62 |
+
run_time = time.time() - run_start
|
| 63 |
+
|
| 64 |
+
# Compute probabilities
|
| 65 |
+
logits_per_image = outputs.logits_per_image # [batch_size, num_texts]
|
| 66 |
+
probs = logits_per_image.softmax(dim=-1)
|
| 67 |
+
best_idx = int(probs.argmax())
|
| 68 |
+
best_label = texts[best_idx]
|
| 69 |
+
|
| 70 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 71 |
+
logger.info("Probabilities: %s", probs.tolist())
|
| 72 |
+
logger.info("Predicted label: %s", best_label)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
if __name__ == "__main__":
|
| 76 |
+
main()
|
torch_compile/run_convbert.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoTokenizer, ConvBertForSequenceClassification
|
| 7 |
+
|
| 8 |
+
import torch_neuronx # ensure Neuron backend is available
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
parser = argparse.ArgumentParser(description="Run ConvBERT on Neuron")
|
| 16 |
+
parser.add_argument(
|
| 17 |
+
"--model",
|
| 18 |
+
type=str,
|
| 19 |
+
default="YituTech/conv-bert-base",
|
| 20 |
+
help="ConvBERT model name on Hugging Face Hub",
|
| 21 |
+
)
|
| 22 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 23 |
+
args = parser.parse_args()
|
| 24 |
+
|
| 25 |
+
torch.set_default_dtype(torch.float32)
|
| 26 |
+
torch.manual_seed(42)
|
| 27 |
+
|
| 28 |
+
# Load tokenizer and model
|
| 29 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 30 |
+
model = ConvBertForSequenceClassification.from_pretrained(
|
| 31 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 32 |
+
)
|
| 33 |
+
model.eval()
|
| 34 |
+
|
| 35 |
+
# Tokenize sample text
|
| 36 |
+
text = "ConvBERT combines self-attention and lightweight convolutions."
|
| 37 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 38 |
+
|
| 39 |
+
# Pre-run once to fix shapes before compilation
|
| 40 |
+
with torch.no_grad():
|
| 41 |
+
logits = model(**inputs).logits
|
| 42 |
+
|
| 43 |
+
# Compile forward pass
|
| 44 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 45 |
+
|
| 46 |
+
# Warmup
|
| 47 |
+
warmup_start = time.time()
|
| 48 |
+
with torch.no_grad():
|
| 49 |
+
_ = model(**inputs)
|
| 50 |
+
warmup_time = time.time() - warmup_start
|
| 51 |
+
|
| 52 |
+
# Actual run
|
| 53 |
+
run_start = time.time()
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
logits = model(**inputs).logits
|
| 56 |
+
run_time = time.time() - run_start
|
| 57 |
+
|
| 58 |
+
# Decode result
|
| 59 |
+
predicted_class_id = logits.argmax().item()
|
| 60 |
+
predicted_label = model.config.id2label[predicted_class_id]
|
| 61 |
+
|
| 62 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 63 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
main()
|
| 68 |
+
|
| 69 |
+
"""
|
| 70 |
+
<unknown>:0: error: failed to legalize operation 'torch.constant.int'
|
| 71 |
+
<unknown>:0: note: see current operation: %0 = "torch.constant.int"() <{value = 9 : i64}> : () -> !torch.int
|
| 72 |
+
"""
|
torch_compile/run_convnext.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoImageProcessor, ConvNextForImageClassification
|
| 7 |
+
from datasets import load_dataset
|
| 8 |
+
import torch_neuronx # ensures Neuron backend is available
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
parser = argparse.ArgumentParser(
|
| 16 |
+
description="ConvNeXt image-classification with torch.compile on Neuron"
|
| 17 |
+
)
|
| 18 |
+
parser.add_argument(
|
| 19 |
+
"--model",
|
| 20 |
+
type=str,
|
| 21 |
+
default="facebook/convnext-tiny-224",
|
| 22 |
+
help="ConvNeXT model name on Hugging Face Hub",
|
| 23 |
+
)
|
| 24 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 25 |
+
args = parser.parse_args()
|
| 26 |
+
|
| 27 |
+
torch.set_default_dtype(torch.float32)
|
| 28 |
+
torch.manual_seed(42)
|
| 29 |
+
|
| 30 |
+
# Load dataset and pick an image
|
| 31 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 32 |
+
image = dataset["test"]["image"][0]
|
| 33 |
+
|
| 34 |
+
# Load processor and model
|
| 35 |
+
processor = AutoImageProcessor.from_pretrained(args.model)
|
| 36 |
+
model = ConvNextForImageClassification.from_pretrained(
|
| 37 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 38 |
+
)
|
| 39 |
+
model.eval()
|
| 40 |
+
|
| 41 |
+
# Preprocess image
|
| 42 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 43 |
+
|
| 44 |
+
# Pre-run once to fix shapes before compilation
|
| 45 |
+
with torch.no_grad():
|
| 46 |
+
outputs = model(**inputs)
|
| 47 |
+
|
| 48 |
+
# Compile forward pass (allow graph breaks to avoid instruction-limit)
|
| 49 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=False)
|
| 50 |
+
|
| 51 |
+
# Warmup
|
| 52 |
+
warmup_start = time.time()
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
_ = model(**inputs)
|
| 55 |
+
warmup_time = time.time() - warmup_start
|
| 56 |
+
|
| 57 |
+
# Actual run
|
| 58 |
+
run_start = time.time()
|
| 59 |
+
with torch.no_grad():
|
| 60 |
+
outputs = model(**inputs)
|
| 61 |
+
run_time = time.time() - run_start
|
| 62 |
+
|
| 63 |
+
# Predicted ImageNet class
|
| 64 |
+
predicted_class_idx = outputs.logits.argmax(-1).item()
|
| 65 |
+
predicted_label = model.config.id2label[predicted_class_idx]
|
| 66 |
+
|
| 67 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 68 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
if __name__ == "__main__":
|
| 72 |
+
main()
|
torch_compile/run_convnextv2.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoImageProcessor, ConvNextV2ForImageClassification
|
| 7 |
+
from datasets import load_dataset
|
| 8 |
+
import torch_neuronx # ensures Neuron backend is available
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
parser = argparse.ArgumentParser(
|
| 16 |
+
description="ConvNeXt-V2 image-classification with torch.compile on Neuron"
|
| 17 |
+
)
|
| 18 |
+
parser.add_argument(
|
| 19 |
+
"--model",
|
| 20 |
+
type=str,
|
| 21 |
+
default="facebook/convnextv2-tiny-1k-224",
|
| 22 |
+
help="ConvNeXt-V2 model name on Hugging Face Hub",
|
| 23 |
+
)
|
| 24 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 25 |
+
args = parser.parse_args()
|
| 26 |
+
|
| 27 |
+
torch.set_default_dtype(torch.float32)
|
| 28 |
+
torch.manual_seed(42)
|
| 29 |
+
|
| 30 |
+
# Load dataset and pick an image
|
| 31 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 32 |
+
image = dataset["test"]["image"][0]
|
| 33 |
+
|
| 34 |
+
# Load processor and model
|
| 35 |
+
processor = AutoImageProcessor.from_pretrained(args.model)
|
| 36 |
+
model = ConvNextV2ForImageClassification.from_pretrained(
|
| 37 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 38 |
+
)
|
| 39 |
+
model.eval()
|
| 40 |
+
|
| 41 |
+
# Preprocess image
|
| 42 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 43 |
+
|
| 44 |
+
# Pre-run once to fix shapes before compilation
|
| 45 |
+
with torch.no_grad():
|
| 46 |
+
outputs = model(**inputs)
|
| 47 |
+
|
| 48 |
+
# Compile forward pass (allow graph breaks to avoid instruction-limit)
|
| 49 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=False)
|
| 50 |
+
|
| 51 |
+
# Warmup
|
| 52 |
+
warmup_start = time.time()
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
_ = model(**inputs)
|
| 55 |
+
warmup_time = time.time() - warmup_start
|
| 56 |
+
|
| 57 |
+
# Actual run
|
| 58 |
+
run_start = time.time()
|
| 59 |
+
with torch.no_grad():
|
| 60 |
+
outputs = model(**inputs)
|
| 61 |
+
run_time = time.time() - run_start
|
| 62 |
+
|
| 63 |
+
# Predicted ImageNet class
|
| 64 |
+
predicted_class_idx = outputs.logits.argmax(-1).item()
|
| 65 |
+
predicted_label = model.config.id2label[predicted_class_idx]
|
| 66 |
+
|
| 67 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 68 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
if __name__ == "__main__":
|
| 72 |
+
main()
|
torch_compile/run_cvt.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoImageProcessor, CvtForImageClassification
|
| 7 |
+
from datasets import load_dataset
|
| 8 |
+
import torch_neuronx # ensures Neuron backend is available
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
parser = argparse.ArgumentParser(
|
| 16 |
+
description="CvT image-classification with torch.compile on Neuron"
|
| 17 |
+
)
|
| 18 |
+
parser.add_argument(
|
| 19 |
+
"--model",
|
| 20 |
+
type=str,
|
| 21 |
+
default="microsoft/cvt-13",
|
| 22 |
+
help="CvT model name on Hugging Face Hub",
|
| 23 |
+
)
|
| 24 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 25 |
+
args = parser.parse_args()
|
| 26 |
+
|
| 27 |
+
torch.set_default_dtype(torch.float32)
|
| 28 |
+
torch.manual_seed(42)
|
| 29 |
+
|
| 30 |
+
# Load dataset and pick an image
|
| 31 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 32 |
+
image = dataset["test"]["image"][0]
|
| 33 |
+
|
| 34 |
+
# Load processor and model
|
| 35 |
+
processor = AutoImageProcessor.from_pretrained(args.model)
|
| 36 |
+
model = CvtForImageClassification.from_pretrained(
|
| 37 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 38 |
+
)
|
| 39 |
+
model.eval()
|
| 40 |
+
|
| 41 |
+
# Preprocess image
|
| 42 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 43 |
+
|
| 44 |
+
# Pre-run once to fix shapes before compilation
|
| 45 |
+
with torch.no_grad():
|
| 46 |
+
outputs = model(**inputs)
|
| 47 |
+
|
| 48 |
+
# Compile forward pass (allow graph breaks to avoid instruction-limit)
|
| 49 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=False)
|
| 50 |
+
|
| 51 |
+
# Warmup
|
| 52 |
+
warmup_start = time.time()
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
_ = model(**inputs)
|
| 55 |
+
warmup_time = time.time() - warmup_start
|
| 56 |
+
|
| 57 |
+
# Actual run
|
| 58 |
+
run_start = time.time()
|
| 59 |
+
with torch.no_grad():
|
| 60 |
+
outputs = model(**inputs)
|
| 61 |
+
run_time = time.time() - run_start
|
| 62 |
+
|
| 63 |
+
# Predicted ImageNet class
|
| 64 |
+
predicted_class_idx = outputs.logits.argmax(-1).item()
|
| 65 |
+
predicted_label = model.config.id2label[predicted_class_idx]
|
| 66 |
+
|
| 67 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 68 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
if __name__ == "__main__":
|
| 72 |
+
main()
|
torch_compile/run_deberta.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoTokenizer, DebertaForSequenceClassification
|
| 7 |
+
import torch_neuronx # ensures Neuron backend is available
|
| 8 |
+
|
| 9 |
+
logging.basicConfig(level=logging.INFO)
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main():
|
| 14 |
+
parser = argparse.ArgumentParser(
|
| 15 |
+
description="DeBERTa sequence-classification with torch.compile on Neuron"
|
| 16 |
+
)
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="microsoft/deberta-base",
|
| 21 |
+
help="DeBERTa model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# Load tokenizer and model
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 31 |
+
model = DebertaForSequenceClassification.from_pretrained(
|
| 32 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 33 |
+
)
|
| 34 |
+
model.eval()
|
| 35 |
+
|
| 36 |
+
# Tokenize sample text
|
| 37 |
+
text = "DeBERTa improves BERT and RoBERTa using disentangled attention."
|
| 38 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 39 |
+
|
| 40 |
+
# Pre-run once to fix shapes before compilation
|
| 41 |
+
with torch.no_grad():
|
| 42 |
+
logits = model(**inputs).logits
|
| 43 |
+
|
| 44 |
+
# Compile forward pass (allow graph breaks to avoid instruction-limit)
|
| 45 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=False)
|
| 46 |
+
|
| 47 |
+
# Warmup
|
| 48 |
+
warmup_start = time.time()
|
| 49 |
+
with torch.no_grad():
|
| 50 |
+
_ = model(**inputs)
|
| 51 |
+
warmup_time = time.time() - warmup_start
|
| 52 |
+
|
| 53 |
+
# Actual run
|
| 54 |
+
run_start = time.time()
|
| 55 |
+
with torch.no_grad():
|
| 56 |
+
logits = model(**inputs).logits
|
| 57 |
+
run_time = time.time() - run_start
|
| 58 |
+
|
| 59 |
+
# Decode result
|
| 60 |
+
predicted_class_id = logits.argmax().item()
|
| 61 |
+
predicted_label = model.config.id2label[predicted_class_id]
|
| 62 |
+
|
| 63 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 64 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
if __name__ == "__main__":
|
| 68 |
+
main()
|
| 69 |
+
|
| 70 |
+
"""
|
| 71 |
+
torch._dynamo.exc.TorchRuntimeError: Dynamo failed to run FX node with fake tensors: call_function <built-in function linear>(*(FakeTensor(..., device='neuron:0', size=(1, 18, 768)), Parameter(FakeTensor(..., size=(2304, 768), requires_grad=True)), None), **{}): got RuntimeError('Unhandled FakeTensor Device Propagation for aten.mm.default, found two different devices neuron:0, cpu')
|
| 72 |
+
"""
|
torch_compile/run_deberta_v3.py
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoTokenizer, DebertaV2ForSequenceClassification
|
| 7 |
+
import torch_neuronx # ensures Neuron backend is available
|
| 8 |
+
|
| 9 |
+
logging.basicConfig(level=logging.INFO)
|
| 10 |
+
logger = logging.getLogger(__name__)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def main():
|
| 14 |
+
parser = argparse.ArgumentParser(
|
| 15 |
+
description="DeBERTa-v3 sequence-classification with torch.compile on Neuron"
|
| 16 |
+
)
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="microsoft/deberta-v3-base",
|
| 21 |
+
help="DeBERTa-v3 model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# Load tokenizer and model
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 31 |
+
model = DebertaV2ForSequenceClassification.from_pretrained(
|
| 32 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 33 |
+
)
|
| 34 |
+
model.eval()
|
| 35 |
+
|
| 36 |
+
# Tokenize sample text
|
| 37 |
+
text = "DeBERTa-v3 achieves stronger performance with improved pre-training."
|
| 38 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 39 |
+
|
| 40 |
+
# Pre-run once to fix shapes before compilation
|
| 41 |
+
with torch.no_grad():
|
| 42 |
+
logits = model(**inputs).logits
|
| 43 |
+
|
| 44 |
+
# Compile forward pass (allow graph breaks to avoid instruction-limit)
|
| 45 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=False)
|
| 46 |
+
|
| 47 |
+
# Warmup
|
| 48 |
+
warmup_start = time.time()
|
| 49 |
+
with torch.no_grad():
|
| 50 |
+
_ = model(**inputs)
|
| 51 |
+
warmup_time = time.time() - warmup_start
|
| 52 |
+
|
| 53 |
+
# Actual run
|
| 54 |
+
run_start = time.time()
|
| 55 |
+
with torch.no_grad():
|
| 56 |
+
logits = model(**inputs).logits
|
| 57 |
+
run_time = time.time() - run_start
|
| 58 |
+
|
| 59 |
+
# Decode result
|
| 60 |
+
predicted_class_id = logits.argmax().item()
|
| 61 |
+
predicted_label = model.config.id2label[predicted_class_id]
|
| 62 |
+
|
| 63 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 64 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
if __name__ == "__main__":
|
| 68 |
+
main()
|
| 69 |
+
|
| 70 |
+
"""
|
| 71 |
+
Works
|
| 72 |
+
"""
|
torch_compile/run_deit.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# DeiT (Vision Transformer) image-classification on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoImageProcessor, DeiTForImageClassification
|
| 9 |
+
from datasets import load_dataset
|
| 10 |
+
import torch_neuronx # ensures Neuron backend
|
| 11 |
+
|
| 12 |
+
logging.basicConfig(level=logging.INFO)
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def main():
|
| 17 |
+
parser = argparse.ArgumentParser(description="Run DeiT on Neuron")
|
| 18 |
+
parser.add_argument(
|
| 19 |
+
"--model",
|
| 20 |
+
type=str,
|
| 21 |
+
default="facebook/deit-base-distilled-patch16-224",
|
| 22 |
+
help="DeiT model name on Hugging Face Hub",
|
| 23 |
+
)
|
| 24 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 25 |
+
args = parser.parse_args()
|
| 26 |
+
|
| 27 |
+
torch.set_default_dtype(torch.float32)
|
| 28 |
+
torch.manual_seed(42)
|
| 29 |
+
|
| 30 |
+
# load dataset image
|
| 31 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 32 |
+
image = dataset["test"]["image"][0]
|
| 33 |
+
|
| 34 |
+
# load processor & distilled DeiT model
|
| 35 |
+
processor = AutoImageProcessor.from_pretrained(args.model)
|
| 36 |
+
model = DeiTForImageClassification.from_pretrained(
|
| 37 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 38 |
+
).eval()
|
| 39 |
+
|
| 40 |
+
# preprocess
|
| 41 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 42 |
+
|
| 43 |
+
# pre-run to lock shapes
|
| 44 |
+
with torch.no_grad():
|
| 45 |
+
_ = model(**inputs).logits
|
| 46 |
+
|
| 47 |
+
# compile
|
| 48 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=False)
|
| 49 |
+
|
| 50 |
+
# warmup
|
| 51 |
+
warmup_start = time.time()
|
| 52 |
+
with torch.no_grad():
|
| 53 |
+
_ = model(**inputs)
|
| 54 |
+
warmup_time = time.time() - warmup_start
|
| 55 |
+
|
| 56 |
+
# benchmark run
|
| 57 |
+
run_start = time.time()
|
| 58 |
+
with torch.no_grad():
|
| 59 |
+
logits = model(**inputs).logits
|
| 60 |
+
run_time = time.time() - run_start
|
| 61 |
+
|
| 62 |
+
# top-1 ImageNet class
|
| 63 |
+
predicted_class_idx = logits.argmax(-1).item()
|
| 64 |
+
predicted_label = model.config.id2label[predicted_class_idx]
|
| 65 |
+
|
| 66 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 67 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
if __name__ == "__main__":
|
| 71 |
+
main()
|
torch_compile/run_distillbert.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# DistilBERT text-classification on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoTokenizer, DistilBertForSequenceClassification
|
| 9 |
+
import torch_neuronx # ensures Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="Run DistilBERT on Neuron")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="distilbert-base-uncased-finetuned-sst-2-english",
|
| 21 |
+
help="DistilBERT model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# load tokenizer & model
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 31 |
+
model = DistilBertForSequenceClassification.from_pretrained(
|
| 32 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 33 |
+
).eval()
|
| 34 |
+
|
| 35 |
+
# tokenize sample
|
| 36 |
+
text = "DistilBERT is a compact, fast variant of BERT."
|
| 37 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 38 |
+
|
| 39 |
+
# pre-run to lock shapes
|
| 40 |
+
with torch.no_grad():
|
| 41 |
+
_ = model(**inputs).logits
|
| 42 |
+
|
| 43 |
+
# compile
|
| 44 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=False)
|
| 45 |
+
|
| 46 |
+
# warmup
|
| 47 |
+
warmup_start = time.time()
|
| 48 |
+
with torch.no_grad():
|
| 49 |
+
_ = model(**inputs)
|
| 50 |
+
warmup_time = time.time() - warmup_start
|
| 51 |
+
|
| 52 |
+
# benchmark run
|
| 53 |
+
run_start = time.time()
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
logits = model(**inputs).logits
|
| 56 |
+
run_time = time.time() - run_start
|
| 57 |
+
|
| 58 |
+
# top-1 label
|
| 59 |
+
predicted_class_id = logits.argmax().item()
|
| 60 |
+
predicted_label = model.config.id2label[predicted_class_id]
|
| 61 |
+
|
| 62 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 63 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
main()
|
torch_compile/run_donutswin.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# DonutSwin image-encoder on Neuron (no decoder, pure vision)
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import DonutImageProcessor, DonutSwinModel
|
| 9 |
+
from datasets import load_dataset
|
| 10 |
+
import torch_neuronx # ensures Neuron backend
|
| 11 |
+
|
| 12 |
+
logging.basicConfig(level=logging.INFO)
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def main():
|
| 17 |
+
parser = argparse.ArgumentParser(description="Run DonutSwin encoder on Neuron")
|
| 18 |
+
parser.add_argument(
|
| 19 |
+
"--model",
|
| 20 |
+
type=str,
|
| 21 |
+
default="naver-clova-ix/donut-base",
|
| 22 |
+
help="DonutSwin model name on Hugging Face Hub",
|
| 23 |
+
)
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# load dataset image
|
| 30 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 31 |
+
image = dataset["test"]["image"][0]
|
| 32 |
+
|
| 33 |
+
# load processor & vision encoder only
|
| 34 |
+
processor = DonutImageProcessor.from_pretrained(args.model)
|
| 35 |
+
model = DonutSwinModel.from_pretrained(
|
| 36 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 37 |
+
).eval()
|
| 38 |
+
|
| 39 |
+
# preprocess
|
| 40 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 41 |
+
|
| 42 |
+
# pre-run to lock shapes
|
| 43 |
+
with torch.no_grad():
|
| 44 |
+
_ = model(**inputs).last_hidden_state
|
| 45 |
+
|
| 46 |
+
# compile
|
| 47 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=False)
|
| 48 |
+
|
| 49 |
+
# warmup
|
| 50 |
+
warmup_start = time.time()
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
_ = model(**inputs)
|
| 53 |
+
warmup_time = time.time() - warmup_start
|
| 54 |
+
|
| 55 |
+
# benchmark run
|
| 56 |
+
run_start = time.time()
|
| 57 |
+
with torch.no_grad():
|
| 58 |
+
hidden = model(**inputs).last_hidden_state
|
| 59 |
+
run_time = time.time() - run_start
|
| 60 |
+
|
| 61 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 62 |
+
logger.info("Output hidden shape: %s", hidden.shape) # [B, seq_len, hidden_size]
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
if __name__ == "__main__":
|
| 66 |
+
main()
|
| 67 |
+
|
| 68 |
+
"""
|
| 69 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/donut/modeling_donut_swin.py:586:0: error: failed to legalize operation 'torch.aten.fill.Tensor'
|
| 70 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/donut/modeling_donut_swin.py:637:0: note: called from
|
| 71 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/donut/modeling_donut_swin.py:712:0: note: called from
|
| 72 |
+
/usr/local/lib/python3.10/site-packages/transformers/modeling_layers.py:94:0: note: called from
|
| 73 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/donut/modeling_donut_swin.py:783:0: note: called from
|
| 74 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/donut/modeling_donut_swin.py:922:0: note: called from
|
| 75 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/donut/modeling_donut_swin.py:586:0: note: see current operation: %1327 = "torch.aten.fill.Tensor"(%1326, %1091) : (!torch.vtensor<[1,630,470,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[1,630,470,1],f32>
|
| 76 |
+
"""
|
torch_compile/run_dpt.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# DPT (Dense Prediction Transformer) monocular depth estimation on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import DPTImageProcessor, DPTForDepthEstimation
|
| 9 |
+
from datasets import load_dataset
|
| 10 |
+
import torch_neuronx # ensures Neuron backend
|
| 11 |
+
|
| 12 |
+
logging.basicConfig(level=logging.INFO)
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def main():
|
| 17 |
+
parser = argparse.ArgumentParser(description="Run DPT depth estimation on Neuron")
|
| 18 |
+
parser.add_argument(
|
| 19 |
+
"--model",
|
| 20 |
+
type=str,
|
| 21 |
+
default="Intel/dpt-large",
|
| 22 |
+
help="DPT model name on Hugging Face Hub",
|
| 23 |
+
)
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# load dataset image
|
| 30 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 31 |
+
image = dataset["test"]["image"][0]
|
| 32 |
+
|
| 33 |
+
# load processor & DPT model
|
| 34 |
+
processor = DPTImageProcessor.from_pretrained(args.model)
|
| 35 |
+
model = DPTForDepthEstimation.from_pretrained(
|
| 36 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 37 |
+
).eval()
|
| 38 |
+
|
| 39 |
+
# preprocess
|
| 40 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 41 |
+
|
| 42 |
+
# pre-run to lock shapes
|
| 43 |
+
with torch.no_grad():
|
| 44 |
+
_ = model(**inputs).predicted_depth
|
| 45 |
+
|
| 46 |
+
# compile
|
| 47 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=False)
|
| 48 |
+
|
| 49 |
+
# warmup
|
| 50 |
+
warmup_start = time.time()
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
_ = model(**inputs)
|
| 53 |
+
warmup_time = time.time() - warmup_start
|
| 54 |
+
|
| 55 |
+
# benchmark run
|
| 56 |
+
run_start = time.time()
|
| 57 |
+
with torch.no_grad():
|
| 58 |
+
depth = model(**inputs).predicted_depth
|
| 59 |
+
run_time = time.time() - run_start
|
| 60 |
+
|
| 61 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 62 |
+
logger.info("Output depth shape: %s", depth.shape) # [B, 1, H, W]
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
if __name__ == "__main__":
|
| 66 |
+
main()
|
torch_compile/run_electra.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# ELECTRA (discriminator) text-classification on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoTokenizer, ElectraForSequenceClassification
|
| 9 |
+
import torch_neuronx # ensures Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="Run ELECTRA on Neuron")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="google/electra-base-discriminator",
|
| 21 |
+
help="ELECTRA model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# load tokenizer & model
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 31 |
+
model = ElectraForSequenceClassification.from_pretrained(
|
| 32 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 33 |
+
).eval()
|
| 34 |
+
|
| 35 |
+
# tokenize sample
|
| 36 |
+
text = "ELECTRA pre-trains a discriminator to detect replaced tokens."
|
| 37 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 38 |
+
|
| 39 |
+
# pre-run to lock shapes
|
| 40 |
+
with torch.no_grad():
|
| 41 |
+
_ = model(**inputs).logits
|
| 42 |
+
|
| 43 |
+
# compile
|
| 44 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 45 |
+
|
| 46 |
+
# warmup
|
| 47 |
+
warmup_start = time.time()
|
| 48 |
+
with torch.no_grad():
|
| 49 |
+
_ = model(**inputs)
|
| 50 |
+
warmup_time = time.time() - warmup_start
|
| 51 |
+
|
| 52 |
+
# benchmark run
|
| 53 |
+
run_start = time.time()
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
logits = model(**inputs).logits
|
| 56 |
+
run_time = time.time() - run_start
|
| 57 |
+
|
| 58 |
+
# top-1 label
|
| 59 |
+
predicted_class_id = logits.argmax().item()
|
| 60 |
+
predicted_label = model.config.id2label[predicted_class_id]
|
| 61 |
+
|
| 62 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 63 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
main()
|
torch_compile/run_esm.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# ESM (Evolutionary Scale Modeling) protein-sequence classification on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import EsmTokenizer, EsmForSequenceClassification
|
| 9 |
+
import torch_neuronx # ensures Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="Run ESM on Neuron")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="facebook/esm2_t33_650M_UR50D",
|
| 21 |
+
help="ESM model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# load tokenizer & model
|
| 30 |
+
tokenizer = EsmTokenizer.from_pretrained(args.model)
|
| 31 |
+
model = EsmForSequenceClassification.from_pretrained(
|
| 32 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 33 |
+
).eval()
|
| 34 |
+
|
| 35 |
+
# tokenize protein sequence
|
| 36 |
+
sequence = "MKTVRQERLKSIVRILERSKEPVSGAQLAEELSVSRQVIVQDIAYLRSLGYNIVATPRGYVLAGG"
|
| 37 |
+
inputs = tokenizer(sequence, return_tensors="pt", padding=True, truncation=True)
|
| 38 |
+
|
| 39 |
+
# pre-run to lock shapes
|
| 40 |
+
with torch.no_grad():
|
| 41 |
+
_ = model(**inputs).logits
|
| 42 |
+
|
| 43 |
+
# compile
|
| 44 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 45 |
+
|
| 46 |
+
# warmup
|
| 47 |
+
warmup_start = time.time()
|
| 48 |
+
with torch.no_grad():
|
| 49 |
+
_ = model(**inputs)
|
| 50 |
+
warmup_time = time.time() - warmup_start
|
| 51 |
+
|
| 52 |
+
# benchmark run
|
| 53 |
+
run_start = time.time()
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
logits = model(**inputs).logits
|
| 56 |
+
run_time = time.time() - run_start
|
| 57 |
+
|
| 58 |
+
# top-1 label
|
| 59 |
+
predicted_class_id = logits.argmax().item()
|
| 60 |
+
predicted_label = model.config.id2label[predicted_class_id]
|
| 61 |
+
|
| 62 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 63 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
main()
|
torch_compile/run_flaubert.py
ADDED
|
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# FlauBERT text-classification on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import FlaubertTokenizer, FlaubertForSequenceClassification
|
| 9 |
+
import torch_neuronx # ensures Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="Run FlauBERT on Neuron")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="flaubert/flaubert_base_cased",
|
| 21 |
+
help="FlauBERT model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# load tokenizer & model
|
| 30 |
+
tokenizer = FlaubertTokenizer.from_pretrained(args.model)
|
| 31 |
+
model = FlaubertForSequenceClassification.from_pretrained(
|
| 32 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 33 |
+
).eval()
|
| 34 |
+
|
| 35 |
+
# tokenize sample
|
| 36 |
+
text = "FlauBERT est un modèle de langue français performant."
|
| 37 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 38 |
+
|
| 39 |
+
# pre-run to lock shapes
|
| 40 |
+
with torch.no_grad():
|
| 41 |
+
_ = model(**inputs).logits
|
| 42 |
+
|
| 43 |
+
# compile
|
| 44 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 45 |
+
|
| 46 |
+
# warmup
|
| 47 |
+
warmup_start = time.time()
|
| 48 |
+
with torch.no_grad():
|
| 49 |
+
_ = model(**inputs)
|
| 50 |
+
warmup_time = time.time() - warmup_start
|
| 51 |
+
|
| 52 |
+
# benchmark run
|
| 53 |
+
run_start = time.time()
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
logits = model(**inputs).logits
|
| 56 |
+
run_time = time.time() - run_start
|
| 57 |
+
|
| 58 |
+
# top-1 label
|
| 59 |
+
predicted_class_id = logits.argmax().item()
|
| 60 |
+
predicted_label = model.config.id2label[predicted_class_id]
|
| 61 |
+
|
| 62 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 63 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
main()
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
"""
|
| 71 |
+
Traceback (most recent call last):
|
| 72 |
+
File "/workspace/torch_neuron_sample/torch-neuron-samples/scripts/torch_compile/run_flaubert.py", line 67, in <module>
|
| 73 |
+
main()
|
| 74 |
+
File "/workspace/torch_neuron_sample/torch-neuron-samples/scripts/torch_compile/run_flaubert.py", line 49, in main
|
| 75 |
+
_ = model(**inputs)
|
| 76 |
+
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1775, in _wrapped_call_impl
|
| 77 |
+
return self._call_impl(*args, **kwargs)
|
| 78 |
+
File "/usr/local/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1786, in _call_impl
|
| 79 |
+
return forward_call(*args, **kwargs)
|
| 80 |
+
File "/usr/local/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 841, in compile_wrapper
|
| 81 |
+
raise e.with_traceback(None) from e.__cause__ # User compiler error
|
| 82 |
+
torch._dynamo.exc.Unsupported: Unsupported Tensor.item() call with capture_scalar_outputs=False
|
| 83 |
+
Explanation: Dynamo does not support tracing `Tensor.item()` with config.capture_scalar_outputs=False.
|
| 84 |
+
Hint: Set `torch._dynamo.config.capture_scalar_outputs = True` or `export TORCHDYNAMO_CAPTURE_SCALAR_OUTPUTS=1` to include these operations in the captured graph.
|
| 85 |
+
|
| 86 |
+
Developer debug context: call_method TensorVariable() item () {}
|
| 87 |
+
|
| 88 |
+
For more details about this graph break, please visit: https://meta-pytorch.github.io/compile-graph-break-site/gb/gb0124.html
|
| 89 |
+
|
| 90 |
+
from user code:
|
| 91 |
+
File "/usr/local/lib/python3.10/site-packages/transformers/models/flaubert/modeling_flaubert.py", line 1156, in forward
|
| 92 |
+
transformer_outputs = self.transformer(
|
| 93 |
+
File "/usr/local/lib/python3.10/site-packages/transformers/models/flaubert/modeling_flaubert.py", line 873, in forward
|
| 94 |
+
assert lengths.max().item() <= slen
|
| 95 |
+
|
| 96 |
+
Set TORCHDYNAMO_VERBOSE=1 for the internal stack trace (please do this especially if you're reporting a bug to PyTorch). For even more developer context, set TORCH_LOGS="+dynamo"
|
| 97 |
+
"""
|
torch_compile/run_hubert.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# HuBERT-CTC speech-recognition on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoProcessor, HubertForCTC
|
| 9 |
+
from datasets import load_dataset
|
| 10 |
+
import torch_neuronx # ensures Neuron backend
|
| 11 |
+
from torch.nn.utils import remove_weight_norm
|
| 12 |
+
|
| 13 |
+
logging.basicConfig(level=logging.INFO)
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def main():
|
| 18 |
+
parser = argparse.ArgumentParser(description="Run HuBERT-CTC on Neuron")
|
| 19 |
+
parser.add_argument(
|
| 20 |
+
"--model",
|
| 21 |
+
type=str,
|
| 22 |
+
default="hf-internal-testing/tiny-random-HubertModel",
|
| 23 |
+
help="HuBERT-CTC model name on Hugging Face Hub",
|
| 24 |
+
)
|
| 25 |
+
args = parser.parse_args()
|
| 26 |
+
|
| 27 |
+
torch.set_default_dtype(torch.float32)
|
| 28 |
+
torch.manual_seed(42)
|
| 29 |
+
|
| 30 |
+
# load small speech snippet
|
| 31 |
+
dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
| 32 |
+
sample = dataset[0]["audio"]["array"] # 16 kHz numpy array
|
| 33 |
+
|
| 34 |
+
# processor + HuBERT-CTC model
|
| 35 |
+
processor = AutoProcessor.from_pretrained(args.model)
|
| 36 |
+
model = HubertForCTC.from_pretrained(
|
| 37 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 38 |
+
).eval()
|
| 39 |
+
for m in model.modules():
|
| 40 |
+
if hasattr(m, "weight_g") and hasattr(m, "weight_v"):
|
| 41 |
+
remove_weight_norm(m)
|
| 42 |
+
|
| 43 |
+
# preprocess
|
| 44 |
+
inputs = processor(sample, sampling_rate=16_000, return_tensors="pt", padding=True)
|
| 45 |
+
|
| 46 |
+
# pre-run to lock shapes
|
| 47 |
+
with torch.no_grad():
|
| 48 |
+
_ = model(**inputs).logits
|
| 49 |
+
|
| 50 |
+
# compile
|
| 51 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 52 |
+
|
| 53 |
+
# warmup
|
| 54 |
+
warmup_start = time.time()
|
| 55 |
+
with torch.no_grad():
|
| 56 |
+
_ = model(**inputs)
|
| 57 |
+
warmup_time = time.time() - warmup_start
|
| 58 |
+
|
| 59 |
+
# benchmark run
|
| 60 |
+
run_start = time.time()
|
| 61 |
+
with torch.no_grad():
|
| 62 |
+
logits = model(**inputs).logits
|
| 63 |
+
run_time = time.time() - run_start
|
| 64 |
+
|
| 65 |
+
# greedy decode
|
| 66 |
+
predicted_ids = logits.argmax(dim=-1)
|
| 67 |
+
transcription = processor.decode(predicted_ids[0])
|
| 68 |
+
|
| 69 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 70 |
+
logger.info("Transcription: %s", transcription)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
if __name__ == "__main__":
|
| 74 |
+
main()
|
| 75 |
+
|
| 76 |
+
"""
|
| 77 |
+
/usr/local/lib/python3.10/site-packages/torch/nn/utils/parametrizations.py:325:0: error: number of output elements (2048) doesn't match expected number of elements (16)
|
| 78 |
+
/usr/local/lib/python3.10/site-packages/torch/nn/utils/parametrize.py:303:0: note: called from
|
| 79 |
+
/usr/local/lib/python3.10/site-packages/torch/nn/utils/parametrize.py:407:0: note: called from
|
| 80 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/hubert/modeling_hubert.py:92:0: note: called from
|
| 81 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/hubert/modeling_hubert.py:448:0: note: called from
|
| 82 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/hubert/modeling_hubert.py:986:0: note: called from
|
| 83 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/hubert/modeling_hubert.py:1114:0: note: called from
|
| 84 |
+
|
| 85 |
+
"""
|
torch_compile/run_levit.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# LeViT vision-classification on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoImageProcessor, LevitForImageClassification
|
| 9 |
+
from datasets import load_dataset
|
| 10 |
+
import torch_neuronx # ensures Neuron backend
|
| 11 |
+
|
| 12 |
+
logging.basicConfig(level=logging.INFO)
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def main():
|
| 17 |
+
parser = argparse.ArgumentParser(description="Run LeViT on Neuron")
|
| 18 |
+
parser.add_argument(
|
| 19 |
+
"--model",
|
| 20 |
+
type=str,
|
| 21 |
+
default="facebook/levit-128S",
|
| 22 |
+
help="LeViT model name on Hugging Face Hub",
|
| 23 |
+
)
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# load dataset image
|
| 30 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 31 |
+
image = dataset["test"]["image"][0]
|
| 32 |
+
|
| 33 |
+
# load processor & model
|
| 34 |
+
processor = AutoImageProcessor.from_pretrained(args.model)
|
| 35 |
+
model = LevitForImageClassification.from_pretrained(
|
| 36 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 37 |
+
).eval()
|
| 38 |
+
|
| 39 |
+
# preprocess
|
| 40 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 41 |
+
|
| 42 |
+
# pre-run to lock shapes
|
| 43 |
+
with torch.no_grad():
|
| 44 |
+
_ = model(**inputs).logits
|
| 45 |
+
|
| 46 |
+
# compile
|
| 47 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 48 |
+
|
| 49 |
+
# warmup
|
| 50 |
+
warmup_start = time.time()
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
_ = model(**inputs)
|
| 53 |
+
warmup_time = time.time() - warmup_start
|
| 54 |
+
|
| 55 |
+
# benchmark run
|
| 56 |
+
run_start = time.time()
|
| 57 |
+
with torch.no_grad():
|
| 58 |
+
logits = model(**inputs).logits
|
| 59 |
+
run_time = time.time() - run_start
|
| 60 |
+
|
| 61 |
+
# top-1 ImageNet class
|
| 62 |
+
predicted_class_idx = logits.argmax(-1).item()
|
| 63 |
+
predicted_label = model.config.id2label[predicted_class_idx]
|
| 64 |
+
|
| 65 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 66 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
if __name__ == "__main__":
|
| 70 |
+
main()
|
torch_compile/run_mobilebert.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# MobileBERT text-classification on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoTokenizer, MobileBertForSequenceClassification
|
| 9 |
+
import torch_neuronx # ensures Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="Run MobileBERT on Neuron")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="google/mobilebert-uncased",
|
| 21 |
+
help="MobileBERT model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# load tokenizer & model
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 31 |
+
model = MobileBertForSequenceClassification.from_pretrained(
|
| 32 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 33 |
+
).eval()
|
| 34 |
+
|
| 35 |
+
# tokenize sample
|
| 36 |
+
text = "MobileBERT is a compact BERT for on-device NLP."
|
| 37 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 38 |
+
|
| 39 |
+
# pre-run to lock shapes
|
| 40 |
+
with torch.no_grad():
|
| 41 |
+
_ = model(**inputs).logits
|
| 42 |
+
|
| 43 |
+
# compile
|
| 44 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 45 |
+
|
| 46 |
+
# warmup
|
| 47 |
+
warmup_start = time.time()
|
| 48 |
+
with torch.no_grad():
|
| 49 |
+
_ = model(**inputs)
|
| 50 |
+
warmup_time = time.time() - warmup_start
|
| 51 |
+
|
| 52 |
+
# benchmark run
|
| 53 |
+
run_start = time.time()
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
logits = model(**inputs).logits
|
| 56 |
+
run_time = time.time() - run_start
|
| 57 |
+
|
| 58 |
+
# top-1 label
|
| 59 |
+
predicted_class_id = logits.argmax().item()
|
| 60 |
+
predicted_label = model.config.id2label[predicted_class_id]
|
| 61 |
+
|
| 62 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 63 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
main()
|
torch_compile/run_mobilenetv2.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# MobileNetV2 image-classification on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from torchvision import transforms
|
| 9 |
+
from transformers import AutoImageProcessor, MobileNetV2ForImageClassification
|
| 10 |
+
from datasets import load_dataset
|
| 11 |
+
import torch_neuronx # ensures Neuron backend
|
| 12 |
+
|
| 13 |
+
logging.basicConfig(level=logging.INFO)
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def main():
|
| 18 |
+
parser = argparse.ArgumentParser(description="Run MobileNetV2 on Neuron")
|
| 19 |
+
parser.add_argument(
|
| 20 |
+
"--model",
|
| 21 |
+
type=str,
|
| 22 |
+
default="google/mobilenet_v2_1.0_224",
|
| 23 |
+
help="MobileNetV2 model name on Hugging Face Hub",
|
| 24 |
+
)
|
| 25 |
+
args = parser.parse_args()
|
| 26 |
+
|
| 27 |
+
torch.set_default_dtype(torch.float32)
|
| 28 |
+
torch.manual_seed(42)
|
| 29 |
+
|
| 30 |
+
# load dataset image
|
| 31 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 32 |
+
image = dataset["test"]["image"][0]
|
| 33 |
+
|
| 34 |
+
# load processor & MobileNetV2 model
|
| 35 |
+
processor = AutoImageProcessor.from_pretrained(args.model)
|
| 36 |
+
model = MobileNetV2ForImageClassification.from_pretrained(
|
| 37 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 38 |
+
).eval()
|
| 39 |
+
|
| 40 |
+
# preprocess
|
| 41 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 42 |
+
|
| 43 |
+
# pre-run to lock shapes
|
| 44 |
+
with torch.no_grad():
|
| 45 |
+
_ = model(**inputs).logits
|
| 46 |
+
|
| 47 |
+
# compile
|
| 48 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 49 |
+
|
| 50 |
+
# warmup
|
| 51 |
+
warmup_start = time.time()
|
| 52 |
+
with torch.no_grad():
|
| 53 |
+
_ = model(**inputs)
|
| 54 |
+
warmup_time = time.time() - warmup_start
|
| 55 |
+
|
| 56 |
+
# benchmark run
|
| 57 |
+
run_start = time.time()
|
| 58 |
+
with torch.no_grad():
|
| 59 |
+
logits = model(**inputs).logits
|
| 60 |
+
run_time = time.time() - run_start
|
| 61 |
+
|
| 62 |
+
# top-1 ImageNet class
|
| 63 |
+
predicted_class_idx = logits.argmax(-1).item()
|
| 64 |
+
predicted_label = model.config.id2label[predicted_class_idx]
|
| 65 |
+
|
| 66 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 67 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
if __name__ == "__main__":
|
| 71 |
+
main()
|
torch_compile/run_mobilevit.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# MobileViT image-classification on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoImageProcessor, MobileViTForImageClassification
|
| 9 |
+
from datasets import load_dataset
|
| 10 |
+
import torch_neuronx # ensures Neuron backend
|
| 11 |
+
|
| 12 |
+
logging.basicConfig(level=logging.INFO)
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def main():
|
| 17 |
+
parser = argparse.ArgumentParser(description="Run MobileViT on Neuron")
|
| 18 |
+
parser.add_argument(
|
| 19 |
+
"--model",
|
| 20 |
+
type=str,
|
| 21 |
+
default="apple/mobilevit-small",
|
| 22 |
+
help="MobileViT model name on Hugging Face Hub",
|
| 23 |
+
)
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# load dataset image
|
| 30 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 31 |
+
image = dataset["test"]["image"][0]
|
| 32 |
+
|
| 33 |
+
# load processor & model
|
| 34 |
+
processor = AutoImageProcessor.from_pretrained(args.model)
|
| 35 |
+
model = MobileViTForImageClassification.from_pretrained(
|
| 36 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 37 |
+
).eval()
|
| 38 |
+
|
| 39 |
+
# preprocess
|
| 40 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 41 |
+
|
| 42 |
+
# pre-run to lock shapes
|
| 43 |
+
with torch.no_grad():
|
| 44 |
+
_ = model(**inputs).logits
|
| 45 |
+
|
| 46 |
+
# compile
|
| 47 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 48 |
+
|
| 49 |
+
# warmup
|
| 50 |
+
warmup_start = time.time()
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
_ = model(**inputs)
|
| 53 |
+
warmup_time = time.time() - warmup_start
|
| 54 |
+
|
| 55 |
+
# benchmark run
|
| 56 |
+
run_start = time.time()
|
| 57 |
+
with torch.no_grad():
|
| 58 |
+
logits = model(**inputs).logits
|
| 59 |
+
run_time = time.time() - run_start
|
| 60 |
+
|
| 61 |
+
# top-1 ImageNet class
|
| 62 |
+
predicted_class_idx = logits.argmax(-1).item()
|
| 63 |
+
predicted_label = model.config.id2label[predicted_class_idx]
|
| 64 |
+
|
| 65 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 66 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
if __name__ == "__main__":
|
| 70 |
+
main()
|
torch_compile/run_modernbert.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# ModernBERT-base text-classification on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoTokenizer, ModernBertForSequenceClassification
|
| 9 |
+
import torch_neuronx # ensures Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="Run ModernBERT on Neuron")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="answerdotai/ModernBERT-base",
|
| 21 |
+
help="ModernBERT model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
args = parser.parse_args()
|
| 24 |
+
|
| 25 |
+
torch.set_default_dtype(torch.float32)
|
| 26 |
+
torch.manual_seed(42)
|
| 27 |
+
|
| 28 |
+
# load tokenizer & model
|
| 29 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 30 |
+
model = ModernBertForSequenceClassification.from_pretrained(
|
| 31 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 32 |
+
).eval()
|
| 33 |
+
|
| 34 |
+
# tokenize sample
|
| 35 |
+
text = "Hello, my dog is cute"
|
| 36 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 37 |
+
|
| 38 |
+
# pre-run to lock shapes
|
| 39 |
+
with torch.no_grad():
|
| 40 |
+
_ = model(**inputs).logits
|
| 41 |
+
|
| 42 |
+
# compile (full graph for single encoder)
|
| 43 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 44 |
+
|
| 45 |
+
# warmup
|
| 46 |
+
warmup_start = time.time()
|
| 47 |
+
with torch.no_grad():
|
| 48 |
+
_ = model(**inputs)
|
| 49 |
+
warmup_time = time.time() - warmup_start
|
| 50 |
+
|
| 51 |
+
# benchmark run
|
| 52 |
+
run_start = time.time()
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
logits = model(**inputs).logits
|
| 55 |
+
run_time = time.time() - run_start
|
| 56 |
+
|
| 57 |
+
# top-1 label
|
| 58 |
+
predicted_class_id = logits.argmax().item()
|
| 59 |
+
predicted_label = model.config.id2label[predicted_class_id]
|
| 60 |
+
|
| 61 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 62 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
if __name__ == "__main__":
|
| 66 |
+
main()
|
torch_compile/run_mpnet.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# MPNet sentence-embedding on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoTokenizer, MPNetModel
|
| 9 |
+
import torch_neuronx # ensures Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="Run MPNet encoder on Neuron")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="microsoft/mpnet-base",
|
| 21 |
+
help="MPNet model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
args = parser.parse_args()
|
| 24 |
+
|
| 25 |
+
torch.set_default_dtype(torch.float32)
|
| 26 |
+
torch.manual_seed(42)
|
| 27 |
+
|
| 28 |
+
# load tokenizer & model
|
| 29 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 30 |
+
model = MPNetModel.from_pretrained(
|
| 31 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 32 |
+
).eval()
|
| 33 |
+
|
| 34 |
+
# tokenize sample sentence
|
| 35 |
+
text = "MPNet is a variant of BERT with permutation language modeling."
|
| 36 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 37 |
+
|
| 38 |
+
# pre-run to lock shapes
|
| 39 |
+
with torch.no_grad():
|
| 40 |
+
_ = model(**inputs).pooler_output
|
| 41 |
+
|
| 42 |
+
# compile
|
| 43 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 44 |
+
|
| 45 |
+
# warmup
|
| 46 |
+
warmup_start = time.time()
|
| 47 |
+
with torch.no_grad():
|
| 48 |
+
_ = model(**inputs)
|
| 49 |
+
warmup_time = time.time() - warmup_start
|
| 50 |
+
|
| 51 |
+
# benchmark run
|
| 52 |
+
run_start = time.time()
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
embeddings = model(**inputs).pooler_output
|
| 55 |
+
run_time = time.time() - run_start
|
| 56 |
+
|
| 57 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 58 |
+
logger.info("Output embedding shape: %s", embeddings.shape) # [1, hidden]
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
main()
|
torch_compile/run_phi.py
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# Phi (Phi-2 default) forward-trace + manual greedy on Neuron – fixed pad token
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 8 |
+
import torch_neuronx # guarantees Neuron backend
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@torch.no_grad()
|
| 15 |
+
def greedy_generate(model_forward, tokenizer, input_ids, max_new_tokens):
|
| 16 |
+
"""Manual greedy loop. Calls the *compiled* forward iteratively."""
|
| 17 |
+
B, seq_len = input_ids.shape
|
| 18 |
+
device = input_ids.device
|
| 19 |
+
position_ids = torch.arange(seq_len, dtype=torch.long, device=device).unsqueeze(0).expand(B, -1)
|
| 20 |
+
|
| 21 |
+
for _ in range(max_new_tokens):
|
| 22 |
+
logits = model_forward(input_ids, position_ids)[0] # unpack tuple
|
| 23 |
+
next_id = logits[:, -1, :].argmax(dim=-1, keepdim=True)
|
| 24 |
+
input_ids = torch.cat([input_ids, next_id], dim=1)[:, -seq_len:] # rolling window
|
| 25 |
+
return input_ids
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def main():
|
| 29 |
+
parser = argparse.ArgumentParser(description="Phi forward-compile + manual greedy on Neuron")
|
| 30 |
+
parser.add_argument("--model", default="microsoft/phi-2")
|
| 31 |
+
parser.add_argument("--seq-len", type=int, default=128, help="Fixed context length")
|
| 32 |
+
parser.add_argument("--new-tokens", type=int, default=20, help="Tokens to generate")
|
| 33 |
+
args = parser.parse_args()
|
| 34 |
+
|
| 35 |
+
torch.manual_seed(42)
|
| 36 |
+
torch.set_default_dtype(torch.float32)
|
| 37 |
+
|
| 38 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)
|
| 39 |
+
# Phi has no pad_token by default
|
| 40 |
+
if tokenizer.pad_token is None:
|
| 41 |
+
tokenizer.pad_token = tokenizer.eos_token
|
| 42 |
+
|
| 43 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 44 |
+
args.model,
|
| 45 |
+
torch_dtype=torch.float32,
|
| 46 |
+
attn_implementation="eager",
|
| 47 |
+
use_cache=False, # static shapes
|
| 48 |
+
).eval()
|
| 49 |
+
|
| 50 |
+
prompt = "The future of AI is"
|
| 51 |
+
inputs = tokenizer(prompt, max_length=args.seq_len, padding="max_length", truncation=True, return_tensors="pt")
|
| 52 |
+
input_ids = inputs.input_ids
|
| 53 |
+
B, seq_len = input_ids.shape
|
| 54 |
+
|
| 55 |
+
# shape lock & compile forward only (full graph)
|
| 56 |
+
with torch.no_grad():
|
| 57 |
+
position_ids = torch.arange(seq_len, dtype=torch.long).unsqueeze(0).expand(B, -1)
|
| 58 |
+
_ = model(input_ids, position_ids)
|
| 59 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 60 |
+
|
| 61 |
+
# warmup
|
| 62 |
+
start = time.time()
|
| 63 |
+
with torch.no_grad():
|
| 64 |
+
_ = model(input_ids, position_ids)
|
| 65 |
+
logger.info("Warmup (forward): %.3f s", time.time() - start)
|
| 66 |
+
|
| 67 |
+
# manual greedy generation
|
| 68 |
+
start = time.time()
|
| 69 |
+
final_ids = greedy_generate(model.forward, tokenizer, input_ids, args.new_tokens)
|
| 70 |
+
logger.info("Generate (manual loop): %.3f s", time.time() - start)
|
| 71 |
+
|
| 72 |
+
text = tokenizer.decode(final_ids[0], skip_special_tokens=True)
|
| 73 |
+
logger.info("Output: %s", text)
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
if __name__ == "__main__":
|
| 77 |
+
main()
|
torch_compile/run_phi3.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# Phi-3-mini – compile model.forward only, manual greedy loop on Neuron
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 8 |
+
import torch_neuronx # guarantees Neuron backend
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
@torch.no_grad()
|
| 15 |
+
def greedy_generate(model_forward, tokenizer, input_ids, max_new_tokens):
|
| 16 |
+
B, seq_len = input_ids.shape
|
| 17 |
+
device = input_ids.device
|
| 18 |
+
position_ids = torch.arange(seq_len, dtype=torch.long, device=device).unsqueeze(0).expand(B, -1)
|
| 19 |
+
|
| 20 |
+
for _ in range(max_new_tokens):
|
| 21 |
+
logits = model_forward(input_ids, position_ids)[0]
|
| 22 |
+
next_id = logits[:, -1, :].argmax(dim=-1, keepdim=True)
|
| 23 |
+
input_ids = torch.cat([input_ids, next_id], dim=1)[:, -seq_len:] # rolling window
|
| 24 |
+
# position_ids stays identical (fixed seq_len)
|
| 25 |
+
return input_ids
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def main():
|
| 29 |
+
parser = argparse.ArgumentParser(description="Phi-3-mini forward-compile + manual greedy on Neuron")
|
| 30 |
+
parser.add_argument("--model", default="microsoft/Phi-3-mini-4k-instruct")
|
| 31 |
+
parser.add_argument("--seq-len", type=int, default=128, help="Fixed context length")
|
| 32 |
+
parser.add_argument("--new-tokens", type=int, default=20, help="Tokens to generate")
|
| 33 |
+
args = parser.parse_args()
|
| 34 |
+
|
| 35 |
+
torch.manual_seed(42)
|
| 36 |
+
torch.set_default_dtype(torch.float32)
|
| 37 |
+
|
| 38 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model, trust_remote_code=True)
|
| 39 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 40 |
+
args.model,
|
| 41 |
+
torch_dtype=torch.float32,
|
| 42 |
+
attn_implementation="eager",
|
| 43 |
+
use_cache=False, # static shapes
|
| 44 |
+
).eval()
|
| 45 |
+
|
| 46 |
+
# fixed-shape prompt
|
| 47 |
+
prompt = "The future of AI is"
|
| 48 |
+
inputs = tokenizer(prompt, max_length=args.seq_len, padding="max_length", truncation=True, return_tensors="pt")
|
| 49 |
+
input_ids = inputs.input_ids
|
| 50 |
+
B, seq_len = input_ids.shape
|
| 51 |
+
|
| 52 |
+
# shape lock & compile forward only (full graph)
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
position_ids = torch.arange(seq_len, dtype=torch.long).unsqueeze(0).expand(B, -1)
|
| 55 |
+
_ = model(input_ids, position_ids)
|
| 56 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 57 |
+
|
| 58 |
+
# warmup
|
| 59 |
+
start = time.time()
|
| 60 |
+
with torch.no_grad():
|
| 61 |
+
_ = model(input_ids, position_ids)
|
| 62 |
+
logger.info("Warmup (forward): %.3f s", time.time() - start)
|
| 63 |
+
|
| 64 |
+
# manual greedy generation
|
| 65 |
+
start = time.time()
|
| 66 |
+
final_ids = greedy_generate(model.forward, tokenizer, input_ids, args.new_tokens)
|
| 67 |
+
logger.info("Generate (manual loop): %.3f s", time.time() - start)
|
| 68 |
+
|
| 69 |
+
text = tokenizer.decode(final_ids[0], skip_special_tokens=True)
|
| 70 |
+
logger.info("Output: %s", text)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
if __name__ == "__main__":
|
| 74 |
+
main()
|
| 75 |
+
|
| 76 |
+
"""
|
| 77 |
+
/usr/local/lib/python3.10/site-packages/torch_mlir/dialects/stablehlo/__init__.py:24: UserWarning: Could not import StableHLO C++ extension: libStablehloUnifiedPythonCAPI.so.22.0git: cannot open shared object file: No such file or directory
|
| 78 |
+
warnings.warn(f"Could not import StableHLO C++ extension: {e}")
|
| 79 |
+
`torch_dtype` is deprecated! Use `dtype` instead!
|
| 80 |
+
Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████████████████████| 2/2 [00:01<00:00, 1.90it/s]
|
| 81 |
+
INFO:__main__:Warmup (forward): 19.975 s
|
| 82 |
+
INFO:__main__:Generate (manual loop): 271.678 s
|
| 83 |
+
INFO:__main__:Output: The future of AI is
|
| 84 |
+
: 1iewer
|
| 85 |
+
I'melissa'
|
| 86 |
+
"""
|
torch_compile/run_roberta.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# RoBERTa text-classification on Neuron – full graph compile
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoTokenizer, RobertaForSequenceClassification
|
| 9 |
+
import torch_neuronx # guarantees Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="RoBERTa on Neuron (full graph)")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="roberta-base",
|
| 21 |
+
help="RoBERTa model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# load tokenizer & model
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 31 |
+
model = RobertaForSequenceClassification.from_pretrained(
|
| 32 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 33 |
+
).eval()
|
| 34 |
+
|
| 35 |
+
# tokenize sample
|
| 36 |
+
text = "RoBERTa is a robustly optimized BERT pretraining approach."
|
| 37 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 38 |
+
|
| 39 |
+
# pre-run to lock shapes
|
| 40 |
+
with torch.no_grad():
|
| 41 |
+
_ = model(**inputs).logits
|
| 42 |
+
|
| 43 |
+
# compile full graph
|
| 44 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 45 |
+
|
| 46 |
+
# warmup
|
| 47 |
+
warmup_start = time.time()
|
| 48 |
+
with torch.no_grad():
|
| 49 |
+
_ = model(**inputs)
|
| 50 |
+
warmup_time = time.time() - warmup_start
|
| 51 |
+
|
| 52 |
+
# benchmark run
|
| 53 |
+
run_start = time.time()
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
logits = model(**inputs).logits
|
| 56 |
+
run_time = time.time() - run_start
|
| 57 |
+
|
| 58 |
+
# top-1 label
|
| 59 |
+
predicted_class_id = logits.argmax().item()
|
| 60 |
+
predicted_label = model.config.id2label[predicted_class_id]
|
| 61 |
+
|
| 62 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 63 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
main()
|
torch_compile/run_roformer.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# RoFormer (Rotary-position Transformer) text-classification on Neuron – full graph
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoTokenizer, RoFormerForSequenceClassification
|
| 9 |
+
import torch_neuronx # guarantees Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="RoFormer on Neuron (full graph)")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model",
|
| 19 |
+
type=str,
|
| 20 |
+
default="junnyu/roformer_chinese_base",
|
| 21 |
+
help="RoFormer model name on Hugging Face Hub",
|
| 22 |
+
)
|
| 23 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 24 |
+
args = parser.parse_args()
|
| 25 |
+
|
| 26 |
+
torch.set_default_dtype(torch.float32)
|
| 27 |
+
torch.manual_seed(42)
|
| 28 |
+
|
| 29 |
+
# load tokenizer & model
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 31 |
+
model = RoFormerForSequenceClassification.from_pretrained(
|
| 32 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 33 |
+
).eval()
|
| 34 |
+
|
| 35 |
+
# tokenize sample
|
| 36 |
+
text = "RoFormer uses rotary position embeddings."
|
| 37 |
+
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True)
|
| 38 |
+
|
| 39 |
+
# pre-run to lock shapes
|
| 40 |
+
with torch.no_grad():
|
| 41 |
+
_ = model(**inputs).logits
|
| 42 |
+
|
| 43 |
+
# compile full graph
|
| 44 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 45 |
+
|
| 46 |
+
# warmup
|
| 47 |
+
warmup_start = time.time()
|
| 48 |
+
with torch.no_grad():
|
| 49 |
+
_ = model(**inputs)
|
| 50 |
+
warmup_time = time.time() - warmup_start
|
| 51 |
+
|
| 52 |
+
# benchmark run
|
| 53 |
+
run_start = time.time()
|
| 54 |
+
with torch.no_grad():
|
| 55 |
+
logits = model(**inputs).logits
|
| 56 |
+
run_time = time.time() - run_start
|
| 57 |
+
|
| 58 |
+
# top-1 label
|
| 59 |
+
predicted_class_id = logits.argmax().item()
|
| 60 |
+
predicted_label = model.config.id2label[predicted_class_id]
|
| 61 |
+
|
| 62 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 63 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
if __name__ == "__main__":
|
| 67 |
+
main()
|
torch_compile/run_sam2.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# SAM encoder on Neuron – constant-shape, no lambda
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import SamProcessor, SamModel
|
| 8 |
+
from PIL import Image
|
| 9 |
+
import torch_neuronx # guarantees Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="SAM encoder on Neuron (full graph)")
|
| 17 |
+
parser.add_argument("--model", default="facebook/sam-vit-base")
|
| 18 |
+
args = parser.parse_args()
|
| 19 |
+
|
| 20 |
+
torch.manual_seed(42)
|
| 21 |
+
torch.set_default_dtype(torch.float32)
|
| 22 |
+
|
| 23 |
+
# load processor & model
|
| 24 |
+
processor = SamProcessor.from_pretrained(args.model)
|
| 25 |
+
model = SamModel.from_pretrained(args.model, attn_implementation="eager").eval()
|
| 26 |
+
|
| 27 |
+
# dummy 224×224 RGB image
|
| 28 |
+
dummy_image = Image.new("RGB", (224, 224), color="red")
|
| 29 |
+
# constant-shape inputs (no points → encoder only)
|
| 30 |
+
inputs = processor(images=dummy_image, return_tensors="pt")
|
| 31 |
+
|
| 32 |
+
# pre-run to lock shapes
|
| 33 |
+
with torch.no_grad():
|
| 34 |
+
_ = model.get_image_embeddings(**inputs)
|
| 35 |
+
|
| 36 |
+
# compile encoder forward (full graph)
|
| 37 |
+
model.get_image_embeddings = torch.compile(
|
| 38 |
+
model.get_image_embeddings, backend="neuron", fullgraph=True
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
# warmup
|
| 42 |
+
start = time.time()
|
| 43 |
+
with torch.no_grad():
|
| 44 |
+
_ = model.get_image_embeddings(**inputs)
|
| 45 |
+
logger.info("Warmup: %.3f s", time.time() - start)
|
| 46 |
+
|
| 47 |
+
# benchmark
|
| 48 |
+
start = time.time()
|
| 49 |
+
with torch.no_grad():
|
| 50 |
+
embeddings = model.get_image_embeddings(**inputs)
|
| 51 |
+
logger.info("Run: %.3f s", time.time() - start)
|
| 52 |
+
logger.info("Embedding shape: %s", embeddings.shape) # [1, 256, 64, 64]
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
if __name__ == "__main__":
|
| 56 |
+
main()
|
torch_compile/run_swin.py
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# Swin Transformer image-classification on Neuron – full graph
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
|
| 7 |
+
import torch
|
| 8 |
+
from transformers import AutoImageProcessor, SwinForImageClassification
|
| 9 |
+
from datasets import load_dataset
|
| 10 |
+
import torch_neuronx # guarantees Neuron backend
|
| 11 |
+
|
| 12 |
+
logging.basicConfig(level=logging.INFO)
|
| 13 |
+
logger = logging.getLogger(__name__)
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def main():
|
| 17 |
+
parser = argparse.ArgumentParser(description="Swin on Neuron (full graph)")
|
| 18 |
+
parser.add_argument("--model", default="microsoft/swin-tiny-patch4-window7-224")
|
| 19 |
+
args = parser.parse_args()
|
| 20 |
+
|
| 21 |
+
torch.manual_seed(42)
|
| 22 |
+
torch.set_default_dtype(torch.float32)
|
| 23 |
+
|
| 24 |
+
# load dataset image
|
| 25 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 26 |
+
image = dataset["test"]["image"][0]
|
| 27 |
+
|
| 28 |
+
# load processor & model
|
| 29 |
+
processor = AutoImageProcessor.from_pretrained(args.model)
|
| 30 |
+
model = SwinForImageClassification.from_pretrained(
|
| 31 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 32 |
+
).eval()
|
| 33 |
+
|
| 34 |
+
# preprocess
|
| 35 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 36 |
+
|
| 37 |
+
# pre-run to lock shapes
|
| 38 |
+
with torch.no_grad():
|
| 39 |
+
_ = model(**inputs).logits
|
| 40 |
+
|
| 41 |
+
# compile full graph
|
| 42 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 43 |
+
|
| 44 |
+
# warmup
|
| 45 |
+
warmup_start = time.time()
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
_ = model(**inputs)
|
| 48 |
+
logger.info("Warmup: %.3f s", time.time() - warmup_start)
|
| 49 |
+
|
| 50 |
+
# benchmark run
|
| 51 |
+
run_start = time.time()
|
| 52 |
+
with torch.no_grad():
|
| 53 |
+
logits = model(**inputs).logits
|
| 54 |
+
run_time = time.time() - run_start
|
| 55 |
+
|
| 56 |
+
# top-1 ImageNet class
|
| 57 |
+
predicted_class_idx = logits.argmax(-1).item()
|
| 58 |
+
predicted_label = model.config.id2label[predicted_class_idx]
|
| 59 |
+
|
| 60 |
+
logger.info("Run: %.3f s", run_time)
|
| 61 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
if __name__ == "__main__":
|
| 65 |
+
main()
|
| 66 |
+
|
| 67 |
+
"""
|
| 68 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/swin/modeling_swin.py:611:0: error: failed to legalize operation 'torch.aten.fill.Tensor'
|
| 69 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/swin/modeling_swin.py:662:0: note: called from
|
| 70 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/swin/modeling_swin.py:736:0: note: called from
|
| 71 |
+
/usr/local/lib/python3.10/site-packages/transformers/modeling_layers.py:94:0: note: called from
|
| 72 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/swin/modeling_swin.py:806:0: note: called from
|
| 73 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/swin/modeling_swin.py:945:0: note: called from
|
| 74 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/swin/modeling_swin.py:1139:0: note: called from
|
| 75 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/swin/modeling_swin.py:611:0: note: see current operation: %1014 = "torch.aten.fill.Tensor"(%1013, %778) : (!torch.vtensor<[1,49,49,1],f32>, !torch.vtensor<[],f32>) -> !torch.vtensor<[1,49,49,1],f32>
|
| 76 |
+
"""
|
torch_compile/run_t5_decoder.py
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# T5 decoder (no cache) on Neuron – constant shapes, full graph, no Apex
|
| 3 |
+
import os
|
| 4 |
+
os.environ["USE_FUSED_LAYER_NORM"] = "0" # MUST be before any transformers import
|
| 5 |
+
|
| 6 |
+
import argparse
|
| 7 |
+
import logging
|
| 8 |
+
import time
|
| 9 |
+
import torch
|
| 10 |
+
from transformers import T5Tokenizer, T5Model
|
| 11 |
+
import torch_neuronx # guarantees Neuron backend
|
| 12 |
+
|
| 13 |
+
logging.basicConfig(level=logging.INFO)
|
| 14 |
+
logger = logging.getLogger(__name__)
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def main():
|
| 18 |
+
parser = argparse.ArgumentParser(description="T5 decoder on Neuron (full graph, no cache)")
|
| 19 |
+
parser.add_argument("--model", default="t5-small")
|
| 20 |
+
parser.add_argument("--seq-len", type=int, default=128, help="Fixed seq length")
|
| 21 |
+
args = parser.parse_args()
|
| 22 |
+
|
| 23 |
+
torch.manual_seed(42)
|
| 24 |
+
torch.set_default_dtype(torch.float32)
|
| 25 |
+
|
| 26 |
+
tokenizer = T5Tokenizer.from_pretrained(args.model)
|
| 27 |
+
# disable DynamicCache → no deepcopy of config
|
| 28 |
+
model = T5Model.from_pretrained(
|
| 29 |
+
args.model,
|
| 30 |
+
torch_dtype=torch.float32,
|
| 31 |
+
attn_implementation="eager",
|
| 32 |
+
use_cache=False, # <-- static shapes, no cache
|
| 33 |
+
).eval()
|
| 34 |
+
|
| 35 |
+
# constant-shape inputs
|
| 36 |
+
text = "hello"
|
| 37 |
+
enc_tok = tokenizer(text, max_length=args.seq_len, padding="max_length", truncation=True, return_tensors="pt")
|
| 38 |
+
with torch.no_grad():
|
| 39 |
+
enc_out = model.encoder(input_ids=enc_tok.input_ids).last_hidden_state.detach()
|
| 40 |
+
|
| 41 |
+
dec_tok = tokenizer("<pad>", max_length=args.seq_len, padding="max_length", return_tensors="pt")
|
| 42 |
+
|
| 43 |
+
# pre-run to lock shapes
|
| 44 |
+
with torch.no_grad():
|
| 45 |
+
_ = model.decoder(input_ids=dec_tok.input_ids, encoder_hidden_states=enc_out).last_hidden_state
|
| 46 |
+
|
| 47 |
+
# compile decoder forward only (full graph)
|
| 48 |
+
decode_fn = lambda inp, enc: model.decoder(input_ids=inp, encoder_hidden_states=enc).last_hidden_state
|
| 49 |
+
decode_fn = torch.compile(decode_fn, backend="neuron", fullgraph=True)
|
| 50 |
+
|
| 51 |
+
# warmup
|
| 52 |
+
start = time.time()
|
| 53 |
+
with torch.no_grad():
|
| 54 |
+
_ = decode_fn(dec_tok.input_ids, enc_out)
|
| 55 |
+
logger.info("Warmup: %.3f s", time.time() - start)
|
| 56 |
+
|
| 57 |
+
# benchmark
|
| 58 |
+
start = time.time()
|
| 59 |
+
with torch.no_grad():
|
| 60 |
+
hidden = decode_fn(dec_tok.input_ids, enc_out)
|
| 61 |
+
logger.info("Run: %.3f s", time.time() - start)
|
| 62 |
+
logger.info("Hidden shape: %s", hidden.shape) # [B, seq_len, d_model]
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
if __name__ == "__main__":
|
| 66 |
+
main()
|
torch_compile/run_t5_encoder.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# T5 encoder on Neuron – no Apex, full graph, constant shapes
|
| 3 |
+
import os
|
| 4 |
+
os.environ["USE_FUSED_LAYER_NORM"] = "0" # <── disable Apex
|
| 5 |
+
|
| 6 |
+
import argparse
|
| 7 |
+
import logging
|
| 8 |
+
import time
|
| 9 |
+
import torch
|
| 10 |
+
from transformers import T5Tokenizer, T5Model # use T5Model (no LM head)
|
| 11 |
+
from datasets import load_dataset
|
| 12 |
+
import torch_neuronx # guarantees Neuron backend
|
| 13 |
+
|
| 14 |
+
logging.basicConfig(level=logging.INFO)
|
| 15 |
+
logger = logging.getLogger(__name__)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def main():
|
| 19 |
+
parser = argparse.ArgumentParser(description="T5 encoder on Neuron (full graph)")
|
| 20 |
+
parser.add_argument("--model", default="t5-small")
|
| 21 |
+
parser.add_argument("--seq-len", type=int, default=128, help="Fixed seq length")
|
| 22 |
+
args = parser.parse_args()
|
| 23 |
+
|
| 24 |
+
torch.manual_seed(42)
|
| 25 |
+
torch.set_default_dtype(torch.float32)
|
| 26 |
+
|
| 27 |
+
tokenizer = T5Tokenizer.from_pretrained(args.model)
|
| 28 |
+
model = T5Model.from_pretrained(
|
| 29 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 30 |
+
).eval()
|
| 31 |
+
|
| 32 |
+
# fixed-shape input
|
| 33 |
+
text = "translate English to French: The cat is on the mat."
|
| 34 |
+
inputs = tokenizer(text, max_length=args.seq_len, padding="max_length", truncation=True, return_tensors="pt")
|
| 35 |
+
|
| 36 |
+
# pre-run to lock shapes
|
| 37 |
+
with torch.no_grad():
|
| 38 |
+
_ = model.encoder(**inputs).last_hidden_state
|
| 39 |
+
|
| 40 |
+
# compile encoder forward only (full graph)
|
| 41 |
+
encode_fn = lambda **kw: model.encoder(**kw).last_hidden_state
|
| 42 |
+
encode_fn = torch.compile(encode_fn, backend="neuron", fullgraph=True)
|
| 43 |
+
|
| 44 |
+
# warmup
|
| 45 |
+
start = time.time()
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
_ = encode_fn(**inputs)
|
| 48 |
+
logger.info("Warmup: %.3f s", time.time() - start)
|
| 49 |
+
|
| 50 |
+
# benchmark
|
| 51 |
+
start = time.time()
|
| 52 |
+
with torch.no_grad():
|
| 53 |
+
hidden = encode_fn(**inputs)
|
| 54 |
+
logger.info("Run: %.3f s", time.time() - start)
|
| 55 |
+
logger.info("Hidden shape: %s", hidden.shape) # [B, seq_len, d_model]
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
if __name__ == "__main__":
|
| 59 |
+
main()
|
torch_compile/run_unispeech.py
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# UniSpeech (non-SAT) CTC speech-recognition on Neuron – constant shapes, full graph
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import AutoProcessor, UniSpeechForCTC
|
| 8 |
+
from datasets import load_dataset
|
| 9 |
+
import torch_neuronx # guarantees Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="UniSpeech CTC on Neuron (full graph)")
|
| 17 |
+
parser.add_argument("--model", default="microsoft/unispeech-large-1500h-cv")
|
| 18 |
+
args = parser.parse_args()
|
| 19 |
+
|
| 20 |
+
torch.manual_seed(42)
|
| 21 |
+
torch.set_default_dtype(torch.float32)
|
| 22 |
+
|
| 23 |
+
# load small speech snippet
|
| 24 |
+
dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
| 25 |
+
sample = dataset[0]["audio"]["array"] # 16 kHz numpy array
|
| 26 |
+
sampling_rate = dataset.features["audio"].sampling_rate
|
| 27 |
+
|
| 28 |
+
# processor + CTC model (non-SAT)
|
| 29 |
+
processor = AutoProcessor.from_pretrained(args.model)
|
| 30 |
+
model = UniSpeechForCTC.from_pretrained(
|
| 31 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 32 |
+
).eval()
|
| 33 |
+
|
| 34 |
+
# preprocess – fixed-length audio (4 s)
|
| 35 |
+
inputs = processor(sample, sampling_rate=sampling_rate, max_length=4 * 16_000, padding="max_length", return_tensors="pt")
|
| 36 |
+
|
| 37 |
+
# pre-run to lock shapes
|
| 38 |
+
with torch.no_grad():
|
| 39 |
+
_ = model(**inputs).logits
|
| 40 |
+
|
| 41 |
+
# compile forward (full graph)
|
| 42 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 43 |
+
|
| 44 |
+
# warmup
|
| 45 |
+
start = time.time()
|
| 46 |
+
with torch.no_grad():
|
| 47 |
+
_ = model(**inputs)
|
| 48 |
+
logger.info("Warmup: %.3f s", time.time() - start)
|
| 49 |
+
|
| 50 |
+
# benchmark + decode
|
| 51 |
+
start = time.time()
|
| 52 |
+
with torch.no_grad():
|
| 53 |
+
logits = model(**inputs).logits
|
| 54 |
+
logger.info("Run: %.3f s", time.time() - start)
|
| 55 |
+
|
| 56 |
+
predicted_ids = torch.argmax(logits, dim=-1)
|
| 57 |
+
transcription = processor.batch_decode(predicted_ids)[0]
|
| 58 |
+
logger.info("Transcription: %s", transcription)
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
if __name__ == "__main__":
|
| 62 |
+
main()
|
torch_compile/run_unispeech_sat.py
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# UniSpeech-SAT encoder on Neuron – full graph, constant shapes
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import Wav2Vec2Processor, UniSpeechSatModel
|
| 8 |
+
from datasets import load_dataset
|
| 9 |
+
import torch_neuronx # guarantees Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="UniSpeech-SAT encoder on Neuron (full graph)")
|
| 17 |
+
parser.add_argument("--model", default="microsoft/unispeech-sat-base-100h-libri-ft")
|
| 18 |
+
args = parser.parse_args()
|
| 19 |
+
|
| 20 |
+
torch.manual_seed(42)
|
| 21 |
+
torch.set_default_dtype(torch.float32)
|
| 22 |
+
|
| 23 |
+
# load small speech snippet
|
| 24 |
+
dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
| 25 |
+
sample = dataset[0]["audio"]["array"] # 16 kHz numpy array
|
| 26 |
+
|
| 27 |
+
# processor + UniSpeech-SAT encoder (no LM head)
|
| 28 |
+
processor = Wav2Vec2Processor.from_pretrained(args.model)
|
| 29 |
+
model = UniSpeechSatModel.from_pretrained(
|
| 30 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 31 |
+
).eval()
|
| 32 |
+
|
| 33 |
+
# preprocess – fixed-length audio (pad to 4 s)
|
| 34 |
+
inputs = processor(sample, sampling_rate=16_000, max_length=4 * 16_000, padding="max_length", return_tensors="pt")
|
| 35 |
+
|
| 36 |
+
# pre-run to lock shapes
|
| 37 |
+
with torch.no_grad():
|
| 38 |
+
_ = model(**inputs).last_hidden_state
|
| 39 |
+
|
| 40 |
+
# compile encoder forward (full graph)
|
| 41 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 42 |
+
|
| 43 |
+
# warmup
|
| 44 |
+
start = time.time()
|
| 45 |
+
with torch.no_grad():
|
| 46 |
+
_ = model(**inputs)
|
| 47 |
+
logger.info("Warmup: %.3f s", time.time() - start)
|
| 48 |
+
|
| 49 |
+
# benchmark run
|
| 50 |
+
start = time.time()
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
hidden = model(**inputs).last_hidden_state
|
| 53 |
+
logger.info("Run: %.3f s", time.time() - start)
|
| 54 |
+
logger.info("Output hidden shape: %s", hidden.shape) # [B, T, hidden]
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
if __name__ == "__main__":
|
| 58 |
+
main()
|
| 59 |
+
|
| 60 |
+
"""
|
| 61 |
+
/usr/local/lib/python3.10/site-packages/torch/nn/utils/parametrizations.py:325:0: error: number of output elements (4718592) doesn't match expected number of elements (128)
|
| 62 |
+
/usr/local/lib/python3.10/site-packages/torch/nn/utils/parametrize.py:303:0: note: called from
|
| 63 |
+
/usr/local/lib/python3.10/site-packages/torch/nn/utils/parametrize.py:407:0: note: called from
|
| 64 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/unispeech_sat/modeling_unispeech_sat.py:140:0: note: called from
|
| 65 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/unispeech_sat/modeling_unispeech_sat.py:485:0: note: called from
|
| 66 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/unispeech_sat/modeling_unispeech_sat.py:1078:0: note: called from
|
| 67 |
+
"""
|
torch_compile/run_vit.py
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# Vision Transformer (ViT) image-classification on Neuron – full graph, constant shapes
|
| 3 |
+
import argparse
|
| 4 |
+
import logging
|
| 5 |
+
import time
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import AutoImageProcessor, ViTForImageClassification
|
| 8 |
+
from datasets import load_dataset
|
| 9 |
+
import torch_neuronx # guarantees Neuron backend
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="ViT on Neuron (full graph)")
|
| 17 |
+
parser.add_argument("--model", default="google/vit-base-patch16-224")
|
| 18 |
+
args = parser.parse_args()
|
| 19 |
+
|
| 20 |
+
torch.manual_seed(42)
|
| 21 |
+
torch.set_default_dtype(torch.float32)
|
| 22 |
+
|
| 23 |
+
# load dataset image
|
| 24 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 25 |
+
image = dataset["test"]["image"][0]
|
| 26 |
+
|
| 27 |
+
# load processor & model
|
| 28 |
+
processor = AutoImageProcessor.from_pretrained(args.model)
|
| 29 |
+
model = ViTForImageClassification.from_pretrained(
|
| 30 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 31 |
+
).eval()
|
| 32 |
+
|
| 33 |
+
# preprocess
|
| 34 |
+
inputs = processor(images=image, return_tensors="pt")
|
| 35 |
+
|
| 36 |
+
# pre-run to lock shapes
|
| 37 |
+
with torch.no_grad():
|
| 38 |
+
_ = model(**inputs).logits
|
| 39 |
+
|
| 40 |
+
# compile full graph
|
| 41 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 42 |
+
|
| 43 |
+
# warmup
|
| 44 |
+
warmup_start = time.time()
|
| 45 |
+
with torch.no_grad():
|
| 46 |
+
_ = model(**inputs)
|
| 47 |
+
logger.info("Warmup: %.3f s", time.time() - warmup_start)
|
| 48 |
+
|
| 49 |
+
# benchmark run
|
| 50 |
+
run_start = time.time()
|
| 51 |
+
with torch.no_grad():
|
| 52 |
+
logits = model(**inputs).logits
|
| 53 |
+
run_time = time.time() - run_start
|
| 54 |
+
|
| 55 |
+
# top-1 ImageNet class
|
| 56 |
+
predicted_class_idx = logits.argmax(-1).item()
|
| 57 |
+
predicted_label = model.config.id2label[predicted_class_idx]
|
| 58 |
+
|
| 59 |
+
logger.info("Run: %.3f s", run_time)
|
| 60 |
+
logger.info("Predicted label: %s", predicted_label)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
if __name__ == "__main__":
|
| 64 |
+
main()
|
torch_compile/run_wav2vec2.py
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from datasets import load_dataset
|
| 7 |
+
from transformers import Wav2Vec2Processor, Wav2Vec2ForCTC
|
| 8 |
+
|
| 9 |
+
import torch_neuronx
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
parser = argparse.ArgumentParser(description="Run Wav2Vec2 on Neuron")
|
| 17 |
+
parser.add_argument(
|
| 18 |
+
"--model", type=str, default="facebook/wav2vec2-base-960h", help="Wav2Vec2 model name"
|
| 19 |
+
)
|
| 20 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 21 |
+
args = parser.parse_args()
|
| 22 |
+
|
| 23 |
+
torch.set_default_dtype(torch.float32)
|
| 24 |
+
torch.manual_seed(42)
|
| 25 |
+
|
| 26 |
+
processor = Wav2Vec2Processor.from_pretrained(args.model)
|
| 27 |
+
model = Wav2Vec2ForCTC.from_pretrained(
|
| 28 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 29 |
+
)
|
| 30 |
+
model.eval()
|
| 31 |
+
|
| 32 |
+
dataset = load_dataset("hf-internal-testing/librispeech_asr_demo", "clean", split="validation")
|
| 33 |
+
dataset = dataset.sort("id")
|
| 34 |
+
sampling_rate = dataset.features["audio"].sampling_rate
|
| 35 |
+
inputs = processor(dataset[0]["audio"]["array"], sampling_rate=sampling_rate, return_tensors="pt")
|
| 36 |
+
# Run once to establish shapes before compile
|
| 37 |
+
with torch.no_grad():
|
| 38 |
+
logits = model(**inputs).logits
|
| 39 |
+
|
| 40 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=False)
|
| 41 |
+
|
| 42 |
+
# Warmup
|
| 43 |
+
warmup_start = time.time()
|
| 44 |
+
with torch.no_grad():
|
| 45 |
+
logits = model(**inputs).logits
|
| 46 |
+
warmup_time = time.time() - warmup_start
|
| 47 |
+
|
| 48 |
+
# Run
|
| 49 |
+
run_start = time.time()
|
| 50 |
+
with torch.no_grad():
|
| 51 |
+
logits = model(**inputs).logits
|
| 52 |
+
run_time = time.time() - run_start
|
| 53 |
+
probabilities = torch.sigmoid(logits[0])
|
| 54 |
+
labels = (probabilities > 0.5).long()
|
| 55 |
+
|
| 56 |
+
logger.info(f"Warmup: {warmup_time:.2f}s, Run: {run_time:.4f}s")
|
| 57 |
+
logger.info(f"Output label: {labels[0].tolist()}")
|
| 58 |
+
|
| 59 |
+
|
| 60 |
+
if __name__ == "__main__":
|
| 61 |
+
main()
|
| 62 |
+
|
| 63 |
+
"""
|
| 64 |
+
/usr/local/lib/python3.10/site-packages/torch/nn/utils/parametrizations.py:325:0: error: number of output elements (4718592) doesn't match expected number of elements (128)
|
| 65 |
+
/usr/local/lib/python3.10/site-packages/torch/nn/utils/parametrize.py:303:0: note: called from
|
| 66 |
+
/usr/local/lib/python3.10/site-packages/torch/nn/utils/parametrize.py:407:0: note: called from
|
| 67 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:372:0: note: called from
|
| 68 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:713:0: note: called from
|
| 69 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:1462:0: note: called from
|
| 70 |
+
/usr/local/lib/python3.10/site-packages/transformers/models/wav2vec2/modeling_wav2vec2.py:1862:0: note: called from
|
| 71 |
+
|
| 72 |
+
# dynamic shape of intermediate tensors leading to static shape error while runing the traced artifact.
|
| 73 |
+
"""
|
torch_compile/run_whisper.py
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
|
| 5 |
+
import torch
|
| 6 |
+
from transformers import AutoTokenizer, WhisperForConditionalGeneration
|
| 7 |
+
|
| 8 |
+
import torch_neuronx
|
| 9 |
+
|
| 10 |
+
logging.basicConfig(level=logging.INFO)
|
| 11 |
+
logger = logging.getLogger(__name__)
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def main():
|
| 15 |
+
parser = argparse.ArgumentParser(description="Run Whisper on Neuron")
|
| 16 |
+
parser.add_argument(
|
| 17 |
+
"--model", type=str, default="openai/whisper-tiny", help="Whisper model name"
|
| 18 |
+
)
|
| 19 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 20 |
+
args = parser.parse_args()
|
| 21 |
+
|
| 22 |
+
torch.set_default_dtype(torch.float32)
|
| 23 |
+
torch.manual_seed(42)
|
| 24 |
+
|
| 25 |
+
model = WhisperForConditionalGeneration.from_pretrained(
|
| 26 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 27 |
+
)
|
| 28 |
+
model.eval()
|
| 29 |
+
|
| 30 |
+
tokenizer = AutoTokenizer.from_pretrained(args.model)
|
| 31 |
+
|
| 32 |
+
num_mel_bins = model.config.num_mel_bins
|
| 33 |
+
input_features = torch.randn(args.batch_size, num_mel_bins, 3000, dtype=torch.float32)
|
| 34 |
+
gen_kwargs = {
|
| 35 |
+
"max_new_tokens": 64,
|
| 36 |
+
"do_sample": False,
|
| 37 |
+
"cache_implementation": "static",
|
| 38 |
+
"eos_token_id": -1,
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
# Run once to establish shapes before compile
|
| 42 |
+
with torch.no_grad():
|
| 43 |
+
_ = model.generate(input_features=input_features, **gen_kwargs)
|
| 44 |
+
|
| 45 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 46 |
+
|
| 47 |
+
# Warmup
|
| 48 |
+
warmup_start = time.time()
|
| 49 |
+
with torch.no_grad():
|
| 50 |
+
output = model.generate(input_features=input_features, **gen_kwargs)
|
| 51 |
+
warmup_time = time.time() - warmup_start
|
| 52 |
+
|
| 53 |
+
# Run
|
| 54 |
+
run_start = time.time()
|
| 55 |
+
with torch.no_grad():
|
| 56 |
+
output = model.generate(input_features=input_features, **gen_kwargs)
|
| 57 |
+
run_time = time.time() - run_start
|
| 58 |
+
|
| 59 |
+
logger.info(f"Warmup: {warmup_time:.2f}s, Run: {run_time:.4f}s")
|
| 60 |
+
logger.info(f"Output: {tokenizer.batch_decode(output, skip_special_tokens=True)}")
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
if __name__ == "__main__":
|
| 64 |
+
main()
|
| 65 |
+
|
| 66 |
+
"""
|
| 67 |
+
Traceback (most recent call last):
|
| 68 |
+
File "/workspace/torch-neuron-sample/scripts/tests/run_whisper.py", line 64, in <module>
|
| 69 |
+
main()
|
| 70 |
+
File "/workspace/torch-neuron-sample/scripts/tests/run_whisper.py", line 50, in main
|
| 71 |
+
output = model.generate(input_features=input_features, **gen_kwargs)
|
| 72 |
+
File "/usr/local/lib/python3.10/site-packages/transformers/models/whisper/generation_whisper.py", line 704, in generate
|
| 73 |
+
init_tokens = self._retrieve_init_tokens(
|
| 74 |
+
File "/usr/local/lib/python3.10/site-packages/transformers/models/whisper/generation_whisper.py", line 1572, in _retrieve_init_tokens
|
| 75 |
+
lang_ids = self.detect_language(
|
| 76 |
+
File "/usr/local/lib/python3.10/site-packages/transformers/models/whisper/generation_whisper.py", line 1683, in detect_language
|
| 77 |
+
lang_ids = logits.argmax(-1)
|
| 78 |
+
File "/torch-neuronx/torch_neuronx/python_ops/auto_registration.py", line 306, in wrapper
|
| 79 |
+
result = operation(*args, **kwargs)
|
| 80 |
+
File "/torch-neuronx/torch_neuronx/python_ops/base.py", line 712, in __call__
|
| 81 |
+
result = impl.execute(*args, **kwargs)
|
| 82 |
+
File "/torch-neuronx/torch_neuronx/python_ops/base.py", line 109, in execute
|
| 83 |
+
result = self._execute_impl(*args2, **kwargs2)
|
| 84 |
+
File "/torch-neuronx/torch_neuronx/python_ops/to_copy.py", line 102, in _execute_impl
|
| 85 |
+
cpu_dst = copy_neuron_to_cpu(
|
| 86 |
+
File "/torch-neuronx/torch_neuronx/python_ops/cast_policy.py", line 102, in copy_neuron_to_cpu
|
| 87 |
+
_C._nrt_copy_neuron_to_cpu_tensor(neuron_src, cpu_tmp, non_blocking=non_blocking)
|
| 88 |
+
RuntimeError: Compilation error occurred on Neuron for operation=aten::_index_put_impl_;
|
| 89 |
+
error message="COMPILATION FAILED: Error: 2026-01-16T11:49:13Z 2026-01-16 11:49:13.062190: E hilo/hlo_passes/NeuronHloVerifier.cc:647] [ERROR] [NCC_EVRF024] Output tensor size of 10,759,912,900 bytes with shape of f32[51865,51865] exceeds 4GB limit for individual tensor size. TIP: Consider applying model parallelism or tensor parallelism per https://awsdocs-neuron.readthedocs-hosted.com/en/latest/libraries/nxd-training/app_notes/nxd-training-tp-appnote.html."
|
| 90 |
+
python stack trace=
|
| 91 |
+
"""
|
torch_compile/run_xlm.py
ADDED
|
File without changes
|
torch_compile/run_xlm_roberta.py
ADDED
|
File without changes
|
torch_compile/run_yolos.py
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import argparse
|
| 2 |
+
import logging
|
| 3 |
+
import time
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
import torch
|
| 7 |
+
from transformers import AutoImageProcessor, YolosForObjectDetection
|
| 8 |
+
from datasets import load_dataset
|
| 9 |
+
import torch_neuronx # ensure Neuron backend is available
|
| 10 |
+
|
| 11 |
+
logging.basicConfig(level=logging.INFO)
|
| 12 |
+
logger = logging.getLogger(__name__)
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def main():
|
| 16 |
+
# Allow CPU fallback
|
| 17 |
+
# ERROR:torch_neuronx.neuron_dynamo_backend.backend:Execution failed: Compilation error occurred on Neuron for operation=torch_compile;
|
| 18 |
+
# error message="COMPILATION FAILED: Error: 2026-01-20T12:06:37Z tensor_op_name: _gather.577 | hlo_id: 577 | [ERROR] [NCC_EXTP003] Instructions generated by compiler 290400 exceeds the typical limit of 150000. Input computation graph is too big due to large operators - Consider using smaller batches or sequence length, or applying tensor parellelism. For further troubleshooting visit https://awsdocs-neuron.readthedocs-hosted.com/en/latest/libraries/nxd-training/app_notes/nxd-training-tp-appnote.html"
|
| 19 |
+
# python stack trace=
|
| 20 |
+
os.environ["TORCH_NEURONX_FALLBACK_ONLY_FOR_UNIMPLEMENTED_OPS"] = "0"
|
| 21 |
+
|
| 22 |
+
parser = argparse.ArgumentParser(description="Run YOLOS object detection on Neuron")
|
| 23 |
+
parser.add_argument(
|
| 24 |
+
"--model",
|
| 25 |
+
type=str,
|
| 26 |
+
default="hustvl/yolos-base",
|
| 27 |
+
help="YOLOS model name on Hugging Face Hub",
|
| 28 |
+
)
|
| 29 |
+
parser.add_argument("--batch-size", type=int, default=1, help="Batch size")
|
| 30 |
+
args = parser.parse_args()
|
| 31 |
+
|
| 32 |
+
torch.set_default_dtype(torch.float32)
|
| 33 |
+
torch.manual_seed(42)
|
| 34 |
+
|
| 35 |
+
# Load dataset and pick an image
|
| 36 |
+
dataset = load_dataset("huggingface/cats-image")
|
| 37 |
+
image = dataset["test"]["image"][0]
|
| 38 |
+
|
| 39 |
+
# Load processor and model
|
| 40 |
+
image_processor = AutoImageProcessor.from_pretrained(args.model)
|
| 41 |
+
model = YolosForObjectDetection.from_pretrained(
|
| 42 |
+
args.model, torch_dtype=torch.float32, attn_implementation="eager"
|
| 43 |
+
)
|
| 44 |
+
model.eval()
|
| 45 |
+
|
| 46 |
+
# Preprocess image
|
| 47 |
+
inputs = image_processor(images=image, return_tensors="pt")
|
| 48 |
+
|
| 49 |
+
# Pre-run once to fix shapes before compilation
|
| 50 |
+
with torch.no_grad():
|
| 51 |
+
outputs = model(**inputs)
|
| 52 |
+
|
| 53 |
+
# Compile forward pass
|
| 54 |
+
model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True)
|
| 55 |
+
|
| 56 |
+
# Warmup
|
| 57 |
+
warmup_start = time.time()
|
| 58 |
+
with torch.no_grad():
|
| 59 |
+
_ = model(**inputs)
|
| 60 |
+
warmup_time = time.time() - warmup_start
|
| 61 |
+
|
| 62 |
+
# Actual run
|
| 63 |
+
run_start = time.time()
|
| 64 |
+
with torch.no_grad():
|
| 65 |
+
outputs = model(**inputs)
|
| 66 |
+
run_time = time.time() - run_start
|
| 67 |
+
|
| 68 |
+
# Post-process: keep only top detection
|
| 69 |
+
logits = outputs.logits # [B, num_queries, num_classes + 1]
|
| 70 |
+
probs = logits.softmax(dim=-1)[0, :, :-1] # drop "no-object"
|
| 71 |
+
scores, labels = probs.max(dim=-1) # CPU fallback allowed
|
| 72 |
+
best_idx = scores.argmax().item()
|
| 73 |
+
|
| 74 |
+
logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time)
|
| 75 |
+
logger.info("Top detection: class=%d, score=%.3f", labels[best_idx].item(), scores[best_idx].item())
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
if __name__ == "__main__":
|
| 79 |
+
main()
|
| 80 |
+
|
| 81 |
+
"""
|
| 82 |
+
Need to fall back to CPU.
|
| 83 |
+
"""
|
torch_compile/torch_neuronx_dump/0123150520_241/offloaded_ops.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Operator ' aten::argmax.out ' fell back to CPU
|
torch_compile/torch_neuronx_dump/0123150520_241/used_ops.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Operator 'torch_compile' executed on Neuron
|
| 2 |
+
Operator 'neuron::memory::alloc' executed on Neuron
|
| 3 |
+
Operator 'neuron::copy::cpu_to_neuron' executed on Neuron
|
| 4 |
+
Operator '_to_copy' executed on Neuron
|
| 5 |
+
Operator 'model_default' executed on Neuron
|
| 6 |
+
Operator 'neuron::memory::dealloc' executed on Neuron
|
| 7 |
+
Operator 'neuron::copy::neuron_to_cpu' executed on Neuron
|
| 8 |
+
Operator 'copy_' executed on Neuron
|
torch_compile/torch_neuronx_dump/0123154351_1091/offloaded_ops.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
Operator ' aten::argmax.out ' fell back to CPU
|
torch_compile/torch_neuronx_dump/0123154351_1091/used_ops.txt
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Operator 'torch_compile' executed on Neuron
|
| 2 |
+
Operator 'neuron::memory::alloc' executed on Neuron
|
| 3 |
+
Operator 'neuron::copy::cpu_to_neuron' executed on Neuron
|
| 4 |
+
Operator '_to_copy' executed on Neuron
|
| 5 |
+
Operator 'model_default' executed on Neuron
|
| 6 |
+
Operator 'neuron::memory::dealloc' executed on Neuron
|
| 7 |
+
Operator 'neuron::copy::neuron_to_cpu' executed on Neuron
|
| 8 |
+
Operator 'copy_' executed on Neuron
|