subhankarmannayfy commited on
Commit
b744f77
·
verified ·
1 Parent(s): 17d3919

update_roberta

Browse files
Files changed (1) hide show
  1. roberta_predict.py +102 -106
roberta_predict.py CHANGED
@@ -1,107 +1,103 @@
1
- import os
2
- import torch
3
- import numpy as np
4
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
-
6
- from huggingface_hub import login
7
-
8
- HF_TOKEN = os.getenv("HF_TOKEN")
9
-
10
- if HF_TOKEN:
11
- login(token=HF_TOKEN)
12
-
13
-
14
- MAX_LEN = 64
15
- labels = ["Negative", "Neutral", "Positive"]
16
-
17
-
18
- MODEL_REPOS = {
19
- "roberta": "subhankarmannayfy/brand-roberta",
20
- "distilroberta": "subhankarmannayfy/brand-distilroberta",
21
- "bert": "subhankarmannayfy/brand-bert",
22
- "albert": "subhankarmannayfy/brand-albert"
23
- }
24
-
25
-
26
- BASE_TOKENIZERS = {
27
- "roberta": "roberta-base",
28
- "distilroberta": "distilroberta-base",
29
- "bert": "bert-base-uncased",
30
- "albert": "albert-base-v2"
31
- }
32
-
33
- MODEL_CACHE = {}
34
-
35
-
36
-
37
-
38
- def load_model(model_name):
39
- if model_name in MODEL_CACHE:
40
- return MODEL_CACHE[model_name]
41
-
42
- print(f"🔄 Loading {model_name} from HuggingFace...")
43
-
44
- tokenizer = AutoTokenizer.from_pretrained(BASE_TOKENIZERS[model_name])
45
-
46
- model = AutoModelForSequenceClassification.from_pretrained(
47
- MODEL_REPOS[model_name],
48
- token=HF_TOKEN
49
- )
50
-
51
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
52
- model.to(device)
53
- model.eval()
54
-
55
- MODEL_CACHE[model_name] = (tokenizer, model, device)
56
- return tokenizer, model, device
57
-
58
-
59
- def predict(text, model_name="roberta"):
60
- tokenizer, model, device = load_model(model_name)
61
-
62
- inputs = tokenizer(
63
- text,
64
- return_tensors="pt",
65
- truncation=True,
66
- padding=True,
67
- max_length=MAX_LEN
68
- ).to(device)
69
-
70
- with torch.no_grad():
71
- outputs = model(**inputs)
72
- probs = torch.softmax(outputs.logits, dim=1).cpu().numpy()[0]
73
-
74
- pred = np.argmax(probs)
75
- return labels[pred], probs.tolist()
76
-
77
-
78
- def compare_all_models(text):
79
- results = []
80
-
81
- for model_name in MODEL_REPOS.keys():
82
- tokenizer, model, device = load_model(model_name)
83
-
84
- inputs = tokenizer(
85
- text,
86
- return_tensors="pt",
87
- truncation=True,
88
- padding=True,
89
- max_length=MAX_LEN
90
- ).to(device)
91
-
92
- with torch.no_grad():
93
- outputs = model(**inputs)
94
- probs = torch.softmax(outputs.logits, dim=1).cpu().numpy()[0]
95
-
96
- pred = np.argmax(probs)
97
-
98
- results.append({
99
- "model": model_name,
100
- "prediction": labels[pred],
101
- "confidence": float(max(probs)),
102
- "negative": float(probs[0]),
103
- "neutral": float(probs[1]),
104
- "positive": float(probs[2]),
105
- })
106
-
107
  return results
 
1
+ import os
2
+ import torch
3
+ import numpy as np
4
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
5
+
6
+ from huggingface_hub import login
7
+
8
+
9
+
10
+
11
+ MAX_LEN = 64
12
+ labels = ["Negative", "Neutral", "Positive"]
13
+
14
+
15
+ MODEL_REPOS = {
16
+ "roberta": "subhankarmannayfy/brand-roberta",
17
+ "distilroberta": "subhankarmannayfy/brand-distilroberta",
18
+ "bert": "subhankarmannayfy/brand-bert",
19
+ "albert": "subhankarmannayfy/brand-albert"
20
+ }
21
+
22
+
23
+ BASE_TOKENIZERS = {
24
+ "roberta": "roberta-base",
25
+ "distilroberta": "distilroberta-base",
26
+ "bert": "bert-base-uncased",
27
+ "albert": "albert-base-v2"
28
+ }
29
+
30
+ MODEL_CACHE = {}
31
+
32
+
33
+
34
+
35
+ def load_model(model_name):
36
+ if model_name in MODEL_CACHE:
37
+ return MODEL_CACHE[model_name]
38
+
39
+ print(f"🔄 Loading {model_name} from HuggingFace...")
40
+
41
+ tokenizer = AutoTokenizer.from_pretrained(BASE_TOKENIZERS[model_name])
42
+
43
+ model = AutoModelForSequenceClassification.from_pretrained(
44
+ MODEL_REPOS[model_name]
45
+ )
46
+
47
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
48
+ model.to(device)
49
+ model.eval()
50
+
51
+ MODEL_CACHE[model_name] = (tokenizer, model, device)
52
+ return tokenizer, model, device
53
+
54
+
55
+ def predict(text, model_name="roberta"):
56
+ tokenizer, model, device = load_model(model_name)
57
+
58
+ inputs = tokenizer(
59
+ text,
60
+ return_tensors="pt",
61
+ truncation=True,
62
+ padding=True,
63
+ max_length=MAX_LEN
64
+ ).to(device)
65
+
66
+ with torch.no_grad():
67
+ outputs = model(**inputs)
68
+ probs = torch.softmax(outputs.logits, dim=1).cpu().numpy()[0]
69
+
70
+ pred = np.argmax(probs)
71
+ return labels[pred], probs.tolist()
72
+
73
+
74
+ def compare_all_models(text):
75
+ results = []
76
+
77
+ for model_name in MODEL_REPOS.keys():
78
+ tokenizer, model, device = load_model(model_name)
79
+
80
+ inputs = tokenizer(
81
+ text,
82
+ return_tensors="pt",
83
+ truncation=True,
84
+ padding=True,
85
+ max_length=MAX_LEN
86
+ ).to(device)
87
+
88
+ with torch.no_grad():
89
+ outputs = model(**inputs)
90
+ probs = torch.softmax(outputs.logits, dim=1).cpu().numpy()[0]
91
+
92
+ pred = np.argmax(probs)
93
+
94
+ results.append({
95
+ "model": model_name,
96
+ "prediction": labels[pred],
97
+ "confidence": float(max(probs)),
98
+ "negative": float(probs[0]),
99
+ "neutral": float(probs[1]),
100
+ "positive": float(probs[2]),
101
+ })
102
+
 
 
 
 
103
  return results