Instructions to use FlowRank/mailSort with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use FlowRank/mailSort with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("text-classification", model="FlowRank/mailSort")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("FlowRank/mailSort", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| from __future__ import annotations | |
| import argparse | |
| from collections import Counter, defaultdict | |
| from dataclasses import dataclass | |
| from datasets import load_dataset | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline | |
| class Config: | |
| dataset_id: str | |
| model_id_or_path: str | |
| subfolder: str | None | |
| split: str | |
| max_samples: int | None | |
| def _build_text(subject: str, body: str) -> str: | |
| subject = "" if subject is None else str(subject) | |
| body = "" if body is None else str(body) | |
| if subject and body: | |
| return f"Subject: {subject}\n\nBody: {body}" | |
| return subject or body | |
| def _parse_args() -> Config: | |
| p = argparse.ArgumentParser(description="Evaluate a model (local or Hub) against a HF dataset split.") | |
| p.add_argument("--dataset-id", default="FlowRank/labeled_emails") | |
| p.add_argument("--model", default="outputs", help="Local path OR Hugging Face repo id (e.g. FlowRank/mailSort).") | |
| p.add_argument("--subfolder", default=None, help="Optional subfolder (e.g. model).") | |
| p.add_argument("--split", default="test", help="Which split to evaluate (e.g. test).") | |
| p.add_argument("--max-samples", type=int, default=None, help="Limit evaluation to N samples.") | |
| a = p.parse_args() | |
| return Config( | |
| dataset_id=a.dataset_id, | |
| model_id_or_path=a.model, | |
| subfolder=a.subfolder, | |
| split=a.split, | |
| max_samples=a.max_samples, | |
| ) | |
| def main() -> int: | |
| cfg = _parse_args() | |
| ds = load_dataset(cfg.dataset_id) | |
| if cfg.split not in ds: | |
| raise SystemExit(f"Split '{cfg.split}' not found. Available: {list(ds.keys())}") | |
| rows = ds[cfg.split] | |
| if cfg.max_samples is not None: | |
| rows = rows.select(range(min(cfg.max_samples, len(rows)))) | |
| kwargs = {} | |
| if cfg.subfolder: | |
| kwargs["subfolder"] = cfg.subfolder | |
| tokenizer = AutoTokenizer.from_pretrained(cfg.model_id_or_path, **kwargs) | |
| model = AutoModelForSequenceClassification.from_pretrained(cfg.model_id_or_path, **kwargs) | |
| clf = pipeline( | |
| "text-classification", | |
| model=model, | |
| tokenizer=tokenizer, | |
| truncation=True, | |
| ) | |
| correct = 0 | |
| total = 0 | |
| per_label = Counter() | |
| per_label_ok = Counter() | |
| confusion = defaultdict(Counter) # true -> pred -> count | |
| for ex in rows: | |
| text = _build_text(ex.get("subject"), ex.get("body")) | |
| true_label = str(ex["label"]) | |
| pred = clf(text, top_k=1)[0]["label"] | |
| total += 1 | |
| per_label[true_label] += 1 | |
| confusion[true_label][pred] += 1 | |
| if pred == true_label: | |
| correct += 1 | |
| per_label_ok[true_label] += 1 | |
| acc = correct / total if total else 0.0 | |
| print(f"dataset={cfg.dataset_id} split={cfg.split} samples={total}") | |
| print(f"accuracy={acc:.4f} ({correct}/{total})") | |
| print("\nper-label accuracy:") | |
| for label in sorted(per_label.keys()): | |
| denom = per_label[label] | |
| num = per_label_ok[label] | |
| print(f"- {label}: {num}/{denom} = {num/denom:.4f}") | |
| # print top confusions per label (lightweight) | |
| print("\ncommon confusions (top-2 per true label):") | |
| for true_label in sorted(confusion.keys()): | |
| most = confusion[true_label].most_common(3) | |
| # skip perfect-only rows | |
| if len(most) == 1 and most[0][0] == true_label: | |
| continue | |
| top = ", ".join([f"{pred}:{cnt}" for pred, cnt in most]) | |
| print(f"- {true_label}: {top}") | |
| return 0 | |
| if __name__ == "__main__": | |
| raise SystemExit(main()) | |