Spaces:
Running
Running
Commit
·
569bb4f
1
Parent(s):
b6980f9
add streaming
Browse files
app.py
CHANGED
|
@@ -21,23 +21,25 @@ def load_models():
|
|
| 21 |
_ = model.to("cuda:0") # comment if no GPU available
|
| 22 |
_ = model.eval()
|
| 23 |
print("+++++ loaded model", time() - st_time)
|
| 24 |
-
dataset = load_dataset('Babelscape/rebel-dataset', split="validation")
|
| 25 |
-
return (tokenizer, model, dataset)
|
| 26 |
|
| 27 |
def extract_triplets(text):
|
| 28 |
triplets = []
|
| 29 |
-
relation = ''
|
|
|
|
|
|
|
| 30 |
for token in text.split():
|
| 31 |
if token == "<triplet>":
|
| 32 |
current = 't'
|
| 33 |
if relation != '':
|
| 34 |
-
triplets.append((
|
| 35 |
relation = ''
|
| 36 |
subject = ''
|
| 37 |
elif token == "<subj>":
|
| 38 |
current = 's'
|
| 39 |
if relation != '':
|
| 40 |
-
triplets.append((
|
| 41 |
object_ = ''
|
| 42 |
elif token == "<obj>":
|
| 43 |
current = 'o'
|
|
@@ -49,10 +51,10 @@ def extract_triplets(text):
|
|
| 49 |
object_ += ' ' + token
|
| 50 |
elif current == 'o':
|
| 51 |
relation += ' ' + token
|
| 52 |
-
|
|
|
|
| 53 |
return triplets
|
| 54 |
|
| 55 |
-
|
| 56 |
tokenizer, model, dataset = load_models()
|
| 57 |
|
| 58 |
agree = st.checkbox('Free input', False)
|
|
|
|
| 21 |
_ = model.to("cuda:0") # comment if no GPU available
|
| 22 |
_ = model.eval()
|
| 23 |
print("+++++ loaded model", time() - st_time)
|
| 24 |
+
dataset = load_dataset('Babelscape/rebel-dataset', split="validation", streaming=True)
|
| 25 |
+
return (tokenizer, model, dataset.take(1000))
|
| 26 |
|
| 27 |
def extract_triplets(text):
|
| 28 |
triplets = []
|
| 29 |
+
relation, subject, relation, object_ = '', '', '', ''
|
| 30 |
+
text = text.strip()
|
| 31 |
+
current = 'x'
|
| 32 |
for token in text.split():
|
| 33 |
if token == "<triplet>":
|
| 34 |
current = 't'
|
| 35 |
if relation != '':
|
| 36 |
+
triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
|
| 37 |
relation = ''
|
| 38 |
subject = ''
|
| 39 |
elif token == "<subj>":
|
| 40 |
current = 's'
|
| 41 |
if relation != '':
|
| 42 |
+
triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
|
| 43 |
object_ = ''
|
| 44 |
elif token == "<obj>":
|
| 45 |
current = 'o'
|
|
|
|
| 51 |
object_ += ' ' + token
|
| 52 |
elif current == 'o':
|
| 53 |
relation += ' ' + token
|
| 54 |
+
if subject != '' and relation != '' and object_ != '':
|
| 55 |
+
triplets.append({'head': subject.strip(), 'type': relation.strip(),'tail': object_.strip()})
|
| 56 |
return triplets
|
| 57 |
|
|
|
|
| 58 |
tokenizer, model, dataset = load_models()
|
| 59 |
|
| 60 |
agree = st.checkbox('Free input', False)
|