The model was fine-tuned in MLX (bfloat16) using a library called mlx-raclate. The base model was Qwen3-Embedding-0.6B, adapted for a Sequence Classification by adding a classification head (score, linear layer)
The mlx-raclate architecture for SequenceClassification matches transformers’ Qwen3ForSequenceClassification which enables the native support by transformers.
The model expects a specific input format, refer to the dataset page. For simplicity in the examples below, we just select a random entry from the test set
Usage
With transformers
from datasets import load_dataset
import polars as pl
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
ds = load_dataset("PITTI/wine-reviews", split="test")
df = pl.from_arrow(ds.data.table)
random_sample = df.sample(1).to_dicts()[0]
print(f"Random sample from validation data: {random_sample}")
# Load model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("PITTI/Qwen3-Embedding-0.6B-Wine-Variety-Classifier")
model = AutoModelForSequenceClassification.from_pretrained("PITTI/Qwen3-Embedding-0.6B-Wine-Variety-Classifier")
id2label = model.config.id2label
# Format input
text = random_sample["text"]
inputs = tokenizer(text, return_tensors="pt")
# Perform inference
with torch.no_grad():
outputs = model(**inputs)
logits = outputs.logits
predicted_class_id = logits.argmax().item()
print(f"Predicted class ID: {predicted_class_id}")
print(f"Predicted label: {id2label[predicted_class_id]}")
print(f"expected label: {random_sample['label']}")
With MLX-Raclate
from datasets import load_dataset
import polars as pl
import mlx.core as mx
from mlx_raclate.utils.utils import load
ds = load_dataset("PITTI/wine-reviews", split="test")
df = pl.from_arrow(ds.data.table)
random_sample = df.sample(1).to_dicts()[0]
print(f"Random sample from validation data: {random_sample}")
# Load model and tokenizer
model, tokenizer = load(
"PITTI/Qwen3-Embedding-0.6B-Wine-Variety-Classifier",
pipeline='text-classification' # in practice the pipeline should be identified automatically
)
id2label = model.config.id2label
text = random_sample["text"]
inputs = tokenizer._tokenizer(text, return_tensors="mlx")
outputs = model(**inputs)
logits= outputs['probabilities']
predicted_class_id = logits.argmax().item()
print(f"Predicted class ID: {predicted_class_id}")
print(f"Predicted label: {id2label[str(predicted_class_id)]}")
print(f"expected label: {random_sample['label']}")
- Downloads last month
- 1