chiennv
/

langid-mmbert-small

chiennv commited on Mar 13

Commit

28ab986

verified ·

1 Parent(s): 35cf59e

Upload folder using huggingface_hub

Files changed (1) hide show

README.md CHANGED Viewed

@@ -53,10 +53,18 @@ def fast_detect_unknown(text: str) -> bool:
 ### Option A: Pipeline
 ```python
 from transformers import pipeline
 model_id = "chiennv/langid-mmbert-small-8gpu"
-clf = pipeline("text-classification", model=model_id, tokenizer=model_id, top_k=1)
 text = "Bonjour tout le monde"
 if fast_detect_unknown(text):
@@ -74,7 +82,11 @@ from transformers import AutoModelForSequenceClassification, AutoTokenizer
 model_id = "chiennv/langid-mmbert-small-8gpu"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
-model = AutoModelForSequenceClassification.from_pretrained(model_id)
 model.eval()
 text = "Bonjour tout le monde"
@@ -82,6 +94,7 @@ if fast_detect_unknown(text):
     print({"label": "UNKNOWN", "score": 1.0})
 else:
     inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
     with torch.no_grad():
         logits = model(**inputs).logits
         probs = torch.softmax(logits, dim=-1).squeeze(0)
@@ -96,3 +109,9 @@ else:
 ```bash
 python infer.py
 ```

 ### Option A: Pipeline
 ```python
+import torch
 from transformers import pipeline
 model_id = "chiennv/langid-mmbert-small-8gpu"
+device = 0 if torch.cuda.is_available() else -1
+clf = pipeline(
+    "text-classification",
+    model=model_id,
+    tokenizer=model_id,
+    top_k=1,
+    device=device,  # GPU id (0,1,...) or -1 for CPU
+)
 text = "Bonjour tout le monde"
 if fast_detect_unknown(text):
 model_id = "chiennv/langid-mmbert-small-8gpu"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Use FP16 on GPU for faster inference and lower memory.
+dtype = torch.float16 if device.type == "cuda" else torch.float32
+model = AutoModelForSequenceClassification.from_pretrained(model_id, torch_dtype=dtype).to(device)
 model.eval()
 text = "Bonjour tout le monde"
     print({"label": "UNKNOWN", "score": 1.0})
 else:
     inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=128)
+    inputs = {k: v.to(device) for k, v in inputs.items()}
     with torch.no_grad():
         logits = model(**inputs).logits
         probs = torch.softmax(logits, dim=-1).squeeze(0)
 ```bash
 python infer.py
 ```
+## GPU Notes
+- Check CUDA availability:
+  - `python -c "import torch; print(torch.cuda.is_available(), torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'no-gpu')"`
+- The AutoModel example above automatically uses GPU + FP16 when CUDA is available.