File size: 937 Bytes
b018a0a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 | from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, pipeline
import torch
# Load the trained model and tokenizer
model_path = "models/distilbert"
tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased") # Use original tokenizer
model = DistilBertForSequenceClassification.from_pretrained(model_path)
# Create pipeline with both model and tokenizer
classifier = pipeline(
"text-classification",
model=model,
tokenizer=tokenizer,
device=0 if torch.cuda.is_available() else -1
)
# Example test
sample_text = "I feel hopeless and have trouble sleeping."
result = classifier(sample_text)
print("Prediction:", result)
# You can also get the label mapping from your training data
import pandas as pd
train_df = pd.read_csv("data/train.csv")
unique_labels = sorted(train_df["label"].unique())
print(f"Available labels: {unique_labels}")
|