TheAang commited on
Commit
8c30969
·
verified ·
1 Parent(s): 635e815

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +63 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForSequenceClassification
2
+ from transformers import TFAutoModelForSequenceClassification
3
+ from transformers import AutoTokenizer
4
+ import numpy as np
5
+ from scipy.special import softmax
6
+ import csv
7
+ import urllib.request
8
+
9
+ # Preprocess text (username and link placeholders)
10
+ def preprocess(text):
11
+ new_text = []
12
+
13
+
14
+ for t in text.split(" "):
15
+ t = '@user' if t.startswith('@') and len(t) > 1 else t
16
+ t = 'http' if t.startswith('http') else t
17
+ new_text.append(t)
18
+ return " ".join(new_text)
19
+
20
+ # Tasks:
21
+ # emoji, emotion, hate, irony, offensive, sentiment
22
+ # stance/abortion, stance/atheism, stance/climate, stance/feminist, stance/hillary
23
+
24
+ task='sentiment'
25
+ MODEL = f"cardiffnlp/twitter-roberta-base-{task}"
26
+
27
+ tokenizer = AutoTokenizer.from_pretrained(MODEL)
28
+
29
+ # download label mapping
30
+ labels=[]
31
+ mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
32
+ with urllib.request.urlopen(mapping_link) as f:
33
+ html = f.read().decode('utf-8').split("\n")
34
+ csvreader = csv.reader(html, delimiter='\t')
35
+ labels = [row[1] for row in csvreader if len(row) > 1]
36
+
37
+ # PT
38
+ model = AutoModelForSequenceClassification.from_pretrained(MODEL)
39
+ model.save_pretrained(MODEL)
40
+
41
+ text = "Good night 😊"
42
+ text = preprocess(text)
43
+ encoded_input = tokenizer(text, return_tensors='pt')
44
+ output = model(**encoded_input)
45
+ scores = output[0][0].detach().numpy()
46
+ scores = softmax(scores)
47
+
48
+ # # TF
49
+ # model = TFAutoModelForSequenceClassification.from_pretrained(MODEL)
50
+ # model.save_pretrained(MODEL)
51
+
52
+ # text = "Good night 😊"
53
+ # encoded_input = tokenizer(text, return_tensors='tf')
54
+ # output = model(encoded_input)
55
+ # scores = output[0][0].numpy()
56
+ # scores = softmax(scores)
57
+
58
+ ranking = np.argsort(scores)
59
+ ranking = ranking[::-1]
60
+ for i in range(scores.shape[0]):
61
+ l = labels[ranking[i]]
62
+ s = scores[ranking[i]]
63
+ print(f"{i+1}) {l} {np.round(float(s), 4)}")