Spaces:
Runtime error
Runtime error
Add dependencies
Browse files- app.py +10 -1
- constraints.py +68 -0
app.py
CHANGED
|
@@ -5,13 +5,22 @@ import pandas as pd
|
|
| 5 |
import torch
|
| 6 |
import tqdm
|
| 7 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 8 |
-
from baseline_BERT import id2label
|
| 9 |
import gradio as gr
|
|
|
|
| 10 |
|
| 11 |
model_ckpt = "Kithogue/2-lvl-events-multilingual"
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
|
| 13 |
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def get_inference(sample):
|
| 16 |
model_hf = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
|
| 17 |
encoding = tokenizer(sample, return_tensors="pt")
|
|
|
|
| 5 |
import torch
|
| 6 |
import tqdm
|
| 7 |
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
|
|
|
| 8 |
import gradio as gr
|
| 9 |
+
import constraints
|
| 10 |
|
| 11 |
model_ckpt = "Kithogue/2-lvl-events-multilingual"
|
| 12 |
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
|
| 13 |
|
| 14 |
|
| 15 |
+
def get_labels2id(is_two_layer):
|
| 16 |
+
"""sorted to preserve the order"""
|
| 17 |
+
labels = constraints.get_all_labels(is_two_layer)
|
| 18 |
+
return {label: i for i, label in enumerate(labels)}
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
id2label = {v: k for k, v in get_labels2id(True).items()}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
def get_inference(sample):
|
| 25 |
model_hf = AutoModelForSequenceClassification.from_pretrained(model_ckpt)
|
| 26 |
encoding = tokenizer(sample, return_tensors="pt")
|
constraints.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""A module to produce a 3-dimension tensor of size [I, J, K], where I, J, K are number of classes on each level.
|
| 2 |
+
If there is a path from i to j to k, then the number is 1, otherwise 0.
|
| 3 |
+
Based on the lowest level of the taxonomy."""
|
| 4 |
+
|
| 5 |
+
import jsonlines
|
| 6 |
+
import numpy as np
|
| 7 |
+
import torch
|
| 8 |
+
|
| 9 |
+
if torch.cuda.is_available():
|
| 10 |
+
device = 'cuda'
|
| 11 |
+
else:
|
| 12 |
+
'cpu'
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def get_layered_labels(is_two_layer: bool = False):
|
| 16 |
+
path_to_train = '/home/kisa/events/event-detection-hierarchical/data/RAMS/flat/train.jsonl'
|
| 17 |
+
with jsonlines.open(path_to_train, 'r') as f:
|
| 18 |
+
all_labels = []
|
| 19 |
+
for ff in f:
|
| 20 |
+
all_labels.append(ff['events']['type'])
|
| 21 |
+
all_labels = list(set(all_labels))
|
| 22 |
+
if is_two_layer:
|
| 23 |
+
return [get_two_layers(target) for target in all_labels]
|
| 24 |
+
else:
|
| 25 |
+
return [target.split('.') for target in all_labels]
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_two_layers(target):
|
| 29 |
+
return target.split('.')[:2]
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def get_all_labels(is_two_layer):
|
| 33 |
+
layered_labels = get_layered_labels(is_two_layer)
|
| 34 |
+
all_labels = [trgt for target in layered_labels for trgt in target]
|
| 35 |
+
labels = sorted(list(set(all_labels)))
|
| 36 |
+
if 'n/a' in labels:
|
| 37 |
+
labels.remove('n/a')
|
| 38 |
+
return labels
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def get_labels2id(is_two_layer):
|
| 42 |
+
"""sorted to preserve the order"""
|
| 43 |
+
labels = get_all_labels(is_two_layer)
|
| 44 |
+
return {label: i for i, label in enumerate(labels)}
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def get_all_paths(is_two_layer):
|
| 48 |
+
labels2id = get_labels2id(is_two_layer)
|
| 49 |
+
layered_labels = get_layered_labels(is_two_layer)
|
| 50 |
+
layered_labels_pos = []
|
| 51 |
+
"""Filtering out n/a class.
|
| 52 |
+
We assume that texts with such label have only 2 coarser layers as a valid label path,
|
| 53 |
+
which is already included in the valid path constraint"""
|
| 54 |
+
for target in layered_labels:
|
| 55 |
+
target_pos = []
|
| 56 |
+
for target_word in target:
|
| 57 |
+
if target_word == 'n/a':
|
| 58 |
+
continue
|
| 59 |
+
else:
|
| 60 |
+
target_pos.append(labels2id[target_word])
|
| 61 |
+
if target_pos not in layered_labels_pos:
|
| 62 |
+
layered_labels_pos.append(target_pos)
|
| 63 |
+
array_dim = len(labels2id)
|
| 64 |
+
path_matrix = np.zeros((array_dim, array_dim, array_dim))
|
| 65 |
+
for path in layered_labels_pos:
|
| 66 |
+
if len(path) > 2:
|
| 67 |
+
path_matrix[path[0], path[1], path[2]] = 1
|
| 68 |
+
return torch.Tensor(path_matrix).to(device)
|