| |
| |
| import argparse |
| import logging |
| import time |
|
|
| import torch |
| from transformers import AutoTokenizer, RoFormerForSequenceClassification |
| import torch_neuronx |
|
|
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="RoFormer on Neuron (full graph)") |
| parser.add_argument( |
| "--model", |
| type=str, |
| default="junnyu/roformer_chinese_base", |
| help="RoFormer model name on Hugging Face Hub", |
| ) |
| parser.add_argument("--batch-size", type=int, default=1, help="Batch size") |
| args = parser.parse_args() |
|
|
| torch.set_default_dtype(torch.float32) |
| torch.manual_seed(42) |
|
|
| |
| tokenizer = AutoTokenizer.from_pretrained(args.model) |
| model = RoFormerForSequenceClassification.from_pretrained( |
| args.model, torch_dtype=torch.float32, attn_implementation="eager" |
| ).eval() |
|
|
| |
| text = "RoFormer uses rotary position embeddings." |
| inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True) |
|
|
| |
| with torch.no_grad(): |
| _ = model(**inputs).logits |
|
|
| |
| model.forward = torch.compile(model.forward, backend="neuron", fullgraph=True) |
|
|
| |
| warmup_start = time.time() |
| with torch.no_grad(): |
| _ = model(**inputs) |
| warmup_time = time.time() - warmup_start |
|
|
| |
| run_start = time.time() |
| with torch.no_grad(): |
| logits = model(**inputs).logits |
| run_time = time.time() - run_start |
|
|
| |
| predicted_class_id = logits.argmax().item() |
| predicted_label = model.config.id2label[predicted_class_id] |
|
|
| logger.info("Warmup: %.2f s, Run: %.4f s", warmup_time, run_time) |
| logger.info("Predicted label: %s", predicted_label) |
|
|
|
|
| if __name__ == "__main__": |
| main() |