Spaces:

TheAang
/

Sentiment_analysis3

Runtime error

File size: 1,357 Bytes

e6b4f52
8c30969
 
 
e6b4f52
 
8c30969
e6b4f52
 
8c30969
 
e6b4f52
 
 
 
 
 
 
 
 
 
8c30969
e6b4f52
 
8c30969
 
e6b4f52
8c30969
e6b4f52
8c30969
e6b4f52
8c30969
e6b4f52
8c30969
e6b4f52
 
8c30969
e6b4f52

from transformers import AutoModelForSequenceClassification, AutoTokenizer
import numpy as np
from scipy.special import softmax
import urllib.request
import csv
from huggingface_hub import snapshot_download

# Define model
task = 'sentiment'
MODEL = f"cardiffnlp/twitter-roberta-base-{task}"

# Download model
snapshot_download(repo_id=MODEL)

# Load tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(MODEL, local_files_only=True)
model = AutoModelForSequenceClassification.from_pretrained(MODEL, local_files_only=True)

# Preprocessing function
def preprocess(text):
    return " ".join(['@user' if t.startswith('@') else 'http' if t.startswith('http') else t for t in text.split()])

# Load labels
labels = []
mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
with urllib.request.urlopen(mapping_link) as f:
    labels = [row[1] for row in csv.reader(f.read().decode('utf-8').split("\n"), delimiter='\t') if len(row) > 1]

# Sentiment analysis
text = "Good night 😊"
encoded_input = tokenizer(preprocess(text), return_tensors='pt')
output = model(**encoded_input)
scores = softmax(output.logits.detach().numpy()[0])

# Print results
ranking = np.argsort(scores)[::-1]
for i in range(scores.shape[0]):
    print(f"{i+1}) {labels[ranking[i]]} {np.round(float(scores[ranking[i]]), 4)}")