from transformers import AutoModelForSequenceClassification, AutoTokenizer import numpy as np from scipy.special import softmax import urllib.request import csv from huggingface_hub import snapshot_download # Define model task = 'sentiment' MODEL = f"cardiffnlp/twitter-roberta-base-{task}" # Download model snapshot_download(repo_id=MODEL) # Load tokenizer and model tokenizer = AutoTokenizer.from_pretrained(MODEL, local_files_only=True) model = AutoModelForSequenceClassification.from_pretrained(MODEL, local_files_only=True) # Preprocessing function def preprocess(text): return " ".join(['@user' if t.startswith('@') else 'http' if t.startswith('http') else t for t in text.split()]) # Load labels labels = [] mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt" with urllib.request.urlopen(mapping_link) as f: labels = [row[1] for row in csv.reader(f.read().decode('utf-8').split("\n"), delimiter='\t') if len(row) > 1] # Sentiment analysis text = "Good night 😊" encoded_input = tokenizer(preprocess(text), return_tensors='pt') output = model(**encoded_input) scores = softmax(output.logits.detach().numpy()[0]) # Print results ranking = np.argsort(scores)[::-1] for i in range(scores.shape[0]): print(f"{i+1}) {labels[ranking[i]]} {np.round(float(scores[ranking[i]]), 4)}")