aimgo
/

CaputEmendatoris

Token Classification

caputemendatoris

Model card Files Files and versions

aimgo commited on Feb 28

Commit

9b62ecf

·

verified ·

1 Parent(s): d54fd7c

Update README.md

Files changed (1) hide show

README.md +22 -8

README.md CHANGED Viewed

@@ -39,27 +39,41 @@ import torch
 from transformers import AutoModel, AutoTokenizer
 device = "cuda" if torch.cuda.is_available() else "cpu"
-model = AutoModel.from_pretrained("aimgo/CaputEmendatoris", trust_remote_code=True, torch_dtype=torch.bfloat16).to(device)
-tokenizer = AutoTokenizer.from_pretrained("aimgo/Emendator")
 model.eval()
 text = "quandoquidcrn natura anirni rnortalis habctur."
 enc = tokenizer(text, return_tensors="pt").to(device)
-# detect errors at each byte
 with torch.no_grad():
-    probs = model.detect(enc["input_ids"], enc["attention_mask"])
-# byte probability -> character
-byte_probs = probs[0][:-1].cpu().tolist()
 char_probs = []
 byte_idx = 0
 for c in text:
     n = len(c.encode("utf-8"))
-    char_probs.append(max(byte_probs[byte_idx:byte_idx + n]) if byte_idx + n <= len(byte_probs) else 0.0)
     byte_idx += n
-output = char_probs
 ```
 If you use this in your work, please cite:

 from transformers import AutoModel, AutoTokenizer
 device = "cuda" if torch.cuda.is_available() else "cpu"
+model_repo = "aimgo/caputemendatoris"
+tokenizer_repo = "aimgo/Emendator"
+tokenizer = AutoTokenizer.from_pretrained(tokenizer_repo)
+model = AutoModel.from_pretrained(
+    model_repo,
+    trust_remote_code=True, # <=== NECESSARY, THIS HEAD HAS A CUSTOM MODELING FILE
+    torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32,
+).to(device)
 model.eval()
 text = "quandoquidcrn natura anirni rnortalis habctur."
 enc = tokenizer(text, return_tensors="pt").to(device)
+# detector
 with torch.no_grad():
+    probs = model.detect(enc["input_ids"],enc.get("attention_mask", None))
+byte_probs = probs[0][:-1].detach().cpu().tolist()
 char_probs = []
 byte_idx = 0
 for c in text:
     n = len(c.encode("utf-8"))
+    if byte_idx + n <= len(byte_probs):
+        char_probs.append(max(byte_probs[byte_idx:byte_idx+n]))
+    else:
+        char_probs.append(0.0)
     byte_idx += n
+print(char_probs)
 ```
 If you use this in your work, please cite: