Instructions to use kwoncho/ko-sroberta-korean-sentence-type-classifier with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use kwoncho/ko-sroberta-korean-sentence-type-classifier with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="kwoncho/ko-sroberta-korean-sentence-type-classifier")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("kwoncho/ko-sroberta-korean-sentence-type-classifier", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| import torch | |
| import torch.nn as nn | |
| from transformers import AutoModel, AutoTokenizer | |
| def mean_pooling(last_hidden_state: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor: | |
| mask = attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float() | |
| summed = torch.sum(last_hidden_state * mask, dim=1) | |
| denom = torch.clamp(mask.sum(dim=1), min=1e-9) | |
| return summed / denom | |
| class EmbeddingClassifier(nn.Module): | |
| def __init__(self, model_name: str, num_labels: int): | |
| super().__init__() | |
| self.encoder = AutoModel.from_pretrained(model_name) | |
| hidden_size = self.encoder.config.hidden_size | |
| self.dropout = nn.Dropout(0.1) | |
| self.classifier = nn.Linear(hidden_size, num_labels) | |
| def forward(self, **inputs) -> torch.Tensor: | |
| outputs = self.encoder(**inputs) | |
| pooled = mean_pooling(outputs.last_hidden_state, inputs["attention_mask"]) | |
| return self.classifier(self.dropout(pooled)) | |
| def load_bundle(bundle_dir: str | Path): | |
| bundle_dir = Path(bundle_dir) | |
| config = json.loads((bundle_dir / "hf_export_config.json").read_text(encoding="utf-8")) | |
| model = EmbeddingClassifier( | |
| model_name=config["base_model_name"], | |
| num_labels=config["num_labels"], | |
| ) | |
| checkpoint = torch.load(bundle_dir / "best.pt", map_location="cpu", weights_only=False) | |
| model.load_state_dict(checkpoint["model_state_dict"]) | |
| model.eval() | |
| tokenizer = AutoTokenizer.from_pretrained(bundle_dir / "tokenizer") | |
| id2label = {int(k): v for k, v in config["id2label"].items()} | |
| return tokenizer, model, id2label | |
| if __name__ == "__main__": | |
| tokenizer, model, id2label = load_bundle(Path(__file__).resolve().parent) | |
| text = "๋ด๋ ๊ธ๋ฆฌ ์ธํ ๊ฐ๋ฅ์ฑ์ด ๋์์ง ๊ฒ์ผ๋ก ์ ๋ง๋๋ค." | |
| encoded = tokenizer( | |
| [text], | |
| padding=True, | |
| truncation=True, | |
| max_length=256, | |
| return_tensors="pt", | |
| ) | |
| with torch.no_grad(): | |
| logits = model(**encoded) | |
| pred = int(torch.argmax(logits, dim=1).item()) | |
| print({"text": text, "pred_label": id2label[pred]}) | |