ykae
/

monarch-bert-base-mnli

Text Classification

monarch-matrices

hardware-efficient

Eval Results (legacy)

text-embeddings-inference

Model card Files Files and versions

ykae commited on Jan 5

Commit

587c5cc

·

verified ·

1 Parent(s): 01ab50d

Update README.md

Files changed (1) hide show

README.md +40 -8

README.md CHANGED Viewed

@@ -79,24 +79,56 @@ To see the real speedup, **compilation is mandatory** (otherwise PyTorch Python
 ```python
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
-# 1. Load Model with Custom Architecture
 model_id = "ykae/monarch-bert-base-mnli"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForSequenceClassification.from_pretrained(
     model_id,
-    trust_remote_code=True  # Required for Monarch Architecture
-).to("cuda")
-# 2. Enable Tensor Cores & Compile (CRITICAL for Speed)
 torch.set_float32_matmul_precision('high')
 model = torch.compile(model, mode="max-autotune")
-# 3. Inference
-inputs = tokenizer("Monarch matrices are efficiently sparse.", return_tensors="pt").to("cuda")
 with torch.no_grad():
-    outputs = model(**inputs)
-    print(outputs.logits)
 ```
 ## 🧠 The "Memory Paradox" (Read this!)

 ```python
 import torch
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
+from datasets import load_dataset
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+# 1. Setup & Load Model
+device = "cuda" if torch.cuda.is_available() else "cpu"
 model_id = "ykae/monarch-bert-base-mnli"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
 model = AutoModelForSequenceClassification.from_pretrained(
     model_id,
+    trust_remote_code=True
+).to(device)
+# 2. Performance Optimization (Mandatory for Monarch Speed)
 torch.set_float32_matmul_precision('high')
 model = torch.compile(model, mode="max-autotune")
+model.eval()
+# 3. Load MNLI Validation Set
+print("📊 Loading MNLI Validation set...")
+dataset = load_dataset("glue", "mnli", split="validation_matched")
+def tokenize_fn(ex):
+    return tokenizer(ex['premise'], ex['hypothesis'],
+                     padding="max_length", truncation=True, max_length=128)
+tokenized_ds = dataset.map(tokenize_fn, batched=True)
+tokenized_ds.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label'])
+loader = DataLoader(tokenized_ds, batch_size=32)
+# 4. Scientific Evaluation
+correct = 0
+total = 0
+print(f"🚀 Starting evaluation on {len(tokenized_ds)} samples...")
 with torch.no_grad():
+    for batch in tqdm(loader):
+        ids = batch['input_ids'].to(device)
+        mask = batch['attention_mask'].to(device)
+        labels = batch['label'].to(device)
+        outputs = model(ids, attention_mask=mask)
+        preds = torch.argmax(outputs.logits, dim=1)
+        correct += (preds == labels).sum().item()
+        total += labels.size(0)
+print(f"\n✅ Evaluation Finished!")
+print(f"📈 Accuracy: {100 * correct / total:.2f}%")
 ```
 ## 🧠 The "Memory Paradox" (Read this!)