Spaces:
Running
Running
| # Project Frozone | |
| # (C) 2025 | |
| import sys | |
| import argparse | |
| import torch | |
| from transformers import ( | |
| AutoConfig, | |
| AutoModelForSequenceClassification, | |
| AutoTokenizer | |
| ) | |
| torch.set_printoptions(precision=4, sci_mode=False) | |
| def load_model(model_name: str): | |
| print(f"Loading model {model_name}...") | |
| cfg = AutoConfig.from_pretrained(model_name) | |
| tok = AutoTokenizer.from_pretrained(model_name, use_fast=True) | |
| model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device).eval() | |
| return cfg, tok, model, device | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser("HF interactive playground") | |
| parser.add_argument( | |
| "model", | |
| nargs="?", | |
| help="full HF model name", | |
| default="minh21/XLNet-Reddit-Toxic-Comment-Classification" | |
| ) | |
| args = parser.parse_args() | |
| with torch.inference_mode(): | |
| cfg, tok, model, device = load_model(args.model) | |
| # If no PAD token, reuse EOS (or UNK) as PAD | |
| if tok.pad_token is None: | |
| tok.pad_token = tok.eos_token or tok.unk_token | |
| model.config.pad_token_id = tok.pad_token_id | |
| text = input("Enter text (or 'done'): ") | |
| while text != "done": | |
| encoded = tok( | |
| text, | |
| padding=True, | |
| truncation=True, | |
| max_length=256, | |
| return_tensors="pt", | |
| ) | |
| encoded = {k: v.to(device) for k, v in encoded.items()} | |
| out = model(**encoded) | |
| logits = out.logits.squeeze() | |
| # Multi-label classification: there are multiple, non-exclusive | |
| # categories, and the text will get a separate, independent | |
| # score for each. Example: a classifier that measures beauty on a | |
| # 0-to-1 scale, toxicity on a 0-to-1 scale, and interestingness | |
| # on a 0-to-1 scale. We want to use *sigmoid* to convert logits | |
| # to probabilities. | |
| # Single-label classification: there are mutually exclusive | |
| # categories, and the text will get a relative score for each, | |
| # indicating how probable each label is. Example: a classifier | |
| # that determines whether a text is on politics, sports, or | |
| # entertainment, and how likely each of these mutually exclusive | |
| # labels is to the correct answer. We want to use *softmax* to | |
| # convert logits to probabilities. | |
| if cfg.problem_type == "multi_label_classification": | |
| probs = torch.sigmoid(logits) | |
| elif cfg.problem_type == "single_label_classification": | |
| probs = torch.softmax(logits, dim=-1) | |
| else: | |
| print("Gah -- problem type not set! Lazy modeler...") | |
| if cfg.num_labels == 1: | |
| print("Assuming multi-label...") | |
| probs = torch.sigmoid(logits) | |
| else: | |
| print("Assuming single-label...") | |
| probs = torch.softmax(logits, dim=-1) | |
| probs = probs.detach().cpu().tolist() | |
| for labelnum in range(len(cfg.id2label)): | |
| print(f"{cfg.id2label[labelnum]:>14}: {probs[labelnum]:.4f}") | |
| text = input("Enter text (or 'done'): ") | |