Spaces:

ilushado
/

article_roberta_classifier

Runtime error

App Files Files Community

ilushado commited on Apr 23, 2023

Commit

9e69dbf

1 Parent(s): ecf8794

Update app.py

Browse files

Files changed (1) hide show

app.py +68 -29

app.py CHANGED Viewed

@@ -7,9 +7,8 @@ import transformers
 import json
 from torch.utils.data import Dataset, DataLoader
 from transformers import RobertaModel, RobertaTokenizer
-from transformers import AutoModel, DistilBertTokenizer
 import transformers
-from transformers import pipeline
 idx_to_tag = {0: 'cs',
  1: 'stat',
@@ -17,8 +16,10 @@ idx_to_tag = {0: 'cs',
  3: 'math',
  4: 'q-bio',
  5: 'eess',
- 6: 'economics, finances'
-             }
 tag_to_idx = {'cs': 0,
@@ -27,12 +28,36 @@ tag_to_idx = {'cs': 0,
  'math': 3,
  'q-bio': 4,
  'eess': 5,
- 'economics, finances': 6
-}
-tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
-model = pipeline('./model')
 st.markdown("### Угадыватель")
@@ -45,32 +70,46 @@ ans = None
 if st.button('Предположить'):
-    inputs = tokenizer(title + ' : ' + abstract, return_tensors='pt')
-    inputs['input_ids'] = inputs['input_ids']
-    inputs['attention_mask'] = inputs['attention_mask']
-    with torch.no_grad():
-        logits = model(**inputs).logits
-    idx = torch.nn.functional.softmax(logits[0], dim=0).argmax().item()
     st.markdown(f'{idx_to_tag[idx]}')
 if st.button("Посмотреть топ"):
-    if not logits:
-        inputs = tokenizer(title + ' : ' + abstract, return_tensors='pt')
-        inputs['input_ids'] = inputs['input_ids']
-        inputs['attention_mask'] = inputs['attention_mask']
-        with torch.no_grad():
-            logits = model(**inputs).logits
-        idx = torch.nn.functional.softmax(logits[0], dim=0).argmax().item()
-    elems = [el.item() for el in logits[0].argsort(descending=True)]
-    print(len(elems))
-    probs = logits[0].softmax(dim=0)
     str_ans = ''
     current_prob = 0
     current_elems = []

 import json
 from torch.utils.data import Dataset, DataLoader
 from transformers import RobertaModel, RobertaTokenizer
 import transformers
 idx_to_tag = {0: 'cs',
  1: 'stat',
  3: 'math',
  4: 'q-bio',
  5: 'eess',
+ 6: 'economics, finances',
+ 7: 'gr-qc',
+ 8: 'hep-ex',
+ 9: 'hep-lat'}
 tag_to_idx = {'cs': 0,
  'math': 3,
  'q-bio': 4,
  'eess': 5,
+ 'economics, finances': 6,
+ 'gr-qc': 7,
+ 'hep-ex': 8,
+ 'hep-lat': 9}
+class RobertaClass(torch.nn.Module):
+    def __init__(self):
+        super(RobertaClass, self).__init__()
+        self.l1 = RobertaModel.from_pretrained("roberta-base")
+        self.pre_classifier = torch.nn.Linear(768, 768)
+        self.dropout = torch.nn.Dropout(0.3)
+        self.classifier = torch.nn.Linear(768, 5)
+    def forward(self, input_ids, attention_mask, token_type_ids):
+        output_1 = self.l1(input_ids=input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids)
+        hidden_state = output_1[0]
+        pooler = hidden_state[:, 0]
+        pooler = self.pre_classifier(pooler)
+        pooler = torch.nn.ReLU()(pooler)
+        pooler = self.dropout(pooler)
+        output = self.classifier(pooler)
+        return output
+tokenizer = RobertaTokenizer.from_pretrained('roberta-base', truncation=True, do_lower_case=True,
+                                                 vocab_file='model/vocab.json',
+                                                 merges_file='model/merges.txt')
+model = torch.load('model/pytorch_roberta_sentiment.bin', map_location=torch.device('cpu'))
 st.markdown("### Угадыватель")
 if st.button('Предположить'):
+    text = title + " : " + abstract
+    inputs = tokenizer.encode_plus(
+                text,
+                None,
+                add_special_tokens=True,
+                max_length=256,
+                pad_to_max_length=True,
+                return_token_type_ids=True
+            )
+    ids = torch.Tensor(inputs['input_ids']).long()
+    mask = torch.Tensor(inputs['attention_mask']).long()
+    token_type_ids = torch.Tensor(inputs['token_type_ids']).long()
+    ans = model(ids.unsqueeze(0), mask.unsqueeze(0), token_type_ids.unsqueeze(0))
+    idx = torch.nn.functional.softmax(ans[0], dim=0).argmax().item()
     st.markdown(f'{idx_to_tag[idx]}')
 if st.button("Посмотреть топ"):
+    if not ans:
+        print(1)
+        text = title + " : " + abstract
+        inputs = tokenizer.encode_plus(
+                    text,
+                    None,
+                    add_special_tokens=True,
+                    max_length=256,
+                    pad_to_max_length=True,
+                    return_token_type_ids=True
+                )
+        ids = torch.Tensor(inputs['input_ids']).long()
+        mask = torch.Tensor(inputs['attention_mask']).long()
+        token_type_ids = torch.Tensor(inputs['token_type_ids']).long()
+        ans = model(ids.unsqueeze(0), mask.unsqueeze(0), token_type_ids.unsqueeze(0))
+    elems = [el.item() for el in ans[0].argsort(descending=True)]
+    probs = ans[0].softmax(dim=0)
     str_ans = ''
     current_prob = 0
     current_elems = []