Spaces:

SatAT
/

transformer_service

Sleeping

App Files Files Community

SatAT commited on Apr 15, 2023

Commit

8b24a35

1 Parent(s): 7d1dc00

Update app.py

Browse files

Files changed (1) hide show

app.py +91 -88

app.py CHANGED Viewed

@@ -10,13 +10,6 @@ from torch.utils.data import TensorDataset, DataLoader, RandomSampler, Sequentia
 st.markdown("### Paper category classification")
 st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
 # ^-- можно показывать пользователю текст, картинки, ограниченное подмножество html - всё как в jupyter
-title = st.text_area("INPUT TITLE HERE")
-abstract = st.text_area("INPUT ABSTRACT HERE")
-# ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
-if len(title) == 0 and len(abstract):
-    st.markdown(f"Could you input paper title/abstrac :)")
 @st.cache
 def load_model_and_tokenizer():
     tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
@@ -27,85 +20,95 @@ def load_model_and_tokenizer():
     return model, tokenizer
 model, tokenizer = load_model_and_tokenizer()
-MAX_LEN = 64
-# Преобразуем название статьи в токены
-tokens = tokenizer(title, padding=True, truncation=True, return_tensors="pt")
-# Получаем предсказание модели для названия статьи и абстракта (если есть)
-input_ids = tokens['input_ids']
-attention_mask = tokens['attention_mask']
-logits = model(input_ids, attention_mask)[0]
-tags_names = ['Accelerator Physics',
- 'adap-org',
- "adap-org",
- 'Algebra-Geometry',
- 'Astro-physics',
- "Astro-physics",
- 'Chao-dynamics',
- 'Chemistry-physics',
- 'cmp-lg',
- "cmp-lg",
- 'comp-gas',
- 'cond-mat',
- "cond-mat",
- 'Computer Science',
- 'dg-ga',
- 'Economics',
- 'eess',
- 'funct-an',
- 'gr-qc',
- "gr-qc",
- 'hep-ex',
- "hep-ex",
- 'hep-lat',
- "hep-lat",
- 'hep-ph',
- "hep-ph",
- 'hep-th',
- "hep-th",
- 'Math',
- 'math-ph',
- 'mtrl-th',
- 'nlin',
- 'nucl-ex',
- 'nucl-th',
- "nucl-th",
- 'patt-sol',
- 'Physics',
- 'q-alg',
- 'Quantitie-biology',
- 'q-fin',
- 'quant-ph',
- "quant-ph",
- 'solv-int',
- 'Statistics']
-if abstract:
-    abstract_tokens = tokenizer(abstract, padding=True, truncation=True, return_tensors="pt")
-    abstract_input_ids = abstract_tokens['input_ids']
-    abstract_attention_mask = abstract_tokens['attention_mask']
-    abstract_logits = model(abstract_input_ids, abstract_attention_mask)[0]
-    logits += abstract_logits
-# Получаем вероятности и сортируем их в порядке убывания
-probs = torch.softmax(logits, dim=-1).squeeze()
-sorted_probs, sorted_indices = torch.sort(probs, descending=True)
-# Считаем сумму вероятностей
-sum_probs = 0.0
-top_classes = []
-for i in range(len(sorted_probs)):
-    sum_probs += sorted_probs[i]
-    if sum_probs > 0.95 or sorted_probs[i] < 0.001:
-        break
-    top_classes.append((tags_names[sorted_indices[i].item()], sorted_probs[i].item()))
-# Выводим список тем с их вероятностями
-# from transformers import pipeline
-# pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
-raw_predictions = top_classes#le.inverse_transform(prediction)#pipe(text)
-# тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
-st.markdown(f"Possible categories with their probabilities for this paper : {raw_predictions}")
-# выводим результаты модели в текстовое поле, на потеху пользователю

 st.markdown("### Paper category classification")
 st.markdown("<img width=200px src='https://rozetked.me/images/uploads/dwoilp3BVjlE.jpg'>", unsafe_allow_html=True)
 # ^-- можно показывать пользователю текст, картинки, ограниченное подмножество html - всё как в jupyter
 @st.cache
 def load_model_and_tokenizer():
     tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
     return model, tokenizer
 model, tokenizer = load_model_and_tokenizer()
+title = st.text_area("INPUT TITLE HERE")
+abstract = st.text_area("INPUT ABSTRACT HERE")
+# ^-- показать текстовое поле. В поле text лежит строка, которая находится там в данный момент
+if len(title) == 0 and len(abstract) == 0:
+    st.markdown(f"Could you input paper title/abstract :)")
+elif len(title) == 0 and len(abstract) > 0:
+    st.markdown(f"Could you input paper title :)")
+else:
+    MAX_LEN = 64
+    # Преобразуем название статьи в токены
+    tokens = tokenizer(title, padding=True, truncation=True, return_tensors="pt")
+    # Получаем предсказание модели для названия статьи и абстракта (если есть)
+    input_ids = tokens['input_ids']
+    attention_mask = tokens['attention_mask']
+    logits = model(input_ids, attention_mask)[0]
+    tags_names = ['Accelerator Physics',
+     'adap-org',
+     "adap-org",
+     'Algebra-Geometry',
+     'Astro-physics',
+     "Astro-physics",
+     'Chao-dynamics',
+     'Chemistry-physics',
+     'cmp-lg',
+     "cmp-lg",
+     'comp-gas',
+     'cond-mat',
+     "cond-mat",
+     'Computer Science',
+     'dg-ga',
+     'Economics',
+     'eess',
+     'funct-an',
+     'gr-qc',
+     "gr-qc",
+     'hep-ex',
+     "hep-ex",
+     'hep-lat',
+     "hep-lat",
+     'hep-ph',
+     "hep-ph",
+     'hep-th',
+     "hep-th",
+     'Math',
+     'math-ph',
+     'mtrl-th',
+     'nlin',
+     'nucl-ex',
+     'nucl-th',
+     "nucl-th",
+     'patt-sol',
+     'Physics',
+     'q-alg',
+     'Quantitie-biology',
+     'q-fin',
+     'quant-ph',
+     "quant-ph",
+     'solv-int',
+     'Statistics']
+    if abstract:
+        abstract_tokens = tokenizer(abstract, padding=True, truncation=True, return_tensors="pt")
+        abstract_input_ids = abstract_tokens['input_ids']
+        abstract_attention_mask = abstract_tokens['attention_mask']
+        abstract_logits = model(abstract_input_ids, abstract_attention_mask)[0]
+        logits += abstract_logits
+    # Получаем вероятности и сортируем их в порядке убывания
+    probs = torch.softmax(logits, dim=-1).squeeze()
+    sorted_probs, sorted_indices = torch.sort(probs, descending=True)
+    # Считаем сумму вероятностей
+    sum_probs = 0.0
+    top_classes = []
+    for i in range(len(sorted_probs)):
+        sum_probs += sorted_probs[i]
+        if sum_probs > 0.95 or sorted_probs[i] < 0.001:
+            break
+        top_classes.append((tags_names[sorted_indices[i].item()], sorted_probs[i].item()))
+    # Выводим список тем с их вероятностями
+    # from transformers import pipeline
+    # pipe = pipeline("ner", "Davlan/distilbert-base-multilingual-cased-ner-hrl")
+    raw_predictions = top_classes#le.inverse_transform(prediction)#pipe(text)
+    # тут уже знакомый вам код с huggingface.transformers -- его можно заменить на что угодно от fairseq до catboost
+    st.markdown(f"Possible categories with their probabilities for this paper : {raw_predictions}")
+    # выводим результаты модели в текстовое поле, на потеху пользователю