|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import AutoTokenizer, AutoModelForTokenClassification |
|
|
import numpy as np |
|
|
|
|
|
MODEL = "elliot-evno/kb-bert-swedish-dep" |
|
|
tokenizer = AutoTokenizer.from_pretrained(MODEL) |
|
|
model = AutoModelForTokenClassification.from_pretrained(MODEL) |
|
|
|
|
|
DEP_LABELS = ["_", "acl", "acl:cleft", "acl:relcl", "advcl", "advmod", "amod", "appos", "aux", "aux:pass", "case", "cc", "ccomp", "compound:prt", "conj", "cop", "csubj", "csubj:pass", "det", "discourse", "dislocated", "expl", "fixed", "flat:name", "iobj", "mark", "nmod", "nmod:poss", "nsubj", "nsubj:outer", "nsubj:pass", "nummod", "obj", "obl", "obl:agent", "orphan", "parataxis", "punct", "root", "vocative", "xcomp"] |
|
|
|
|
|
def predict_dependencies(text): |
|
|
"""Predict dependency relations for input text""" |
|
|
if not text.strip(): |
|
|
return "Please enter some Swedish text!" |
|
|
|
|
|
tokens = text.split() |
|
|
inputs = tokenizer(tokens, is_split_into_words=True, return_tensors="pt", |
|
|
truncation=True, padding=True) |
|
|
|
|
|
with torch.no_grad(): |
|
|
outputs = model(**inputs) |
|
|
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) |
|
|
predicted_label_ids = predictions.argmax(-1) |
|
|
|
|
|
word_ids = inputs.word_ids() |
|
|
predicted_labels = [] |
|
|
|
|
|
for i, token in enumerate(tokens): |
|
|
|
|
|
word_predictions = [] |
|
|
for j, word_id in enumerate(word_ids): |
|
|
if word_id == i: |
|
|
word_predictions.append(predicted_label_ids[0][j].item()) |
|
|
|
|
|
if word_predictions: |
|
|
|
|
|
label_id = word_predictions[0] |
|
|
if label_id < len(DEP_LABELS): |
|
|
predicted_labels.append(DEP_LABELS[label_id]) |
|
|
else: |
|
|
predicted_labels.append("UNK") |
|
|
else: |
|
|
predicted_labels.append("UNK") |
|
|
|
|
|
|
|
|
result = [] |
|
|
for token, label in zip(tokens, predicted_labels): |
|
|
result.append(f"{token} → {label}") |
|
|
|
|
|
return "\n".join(result) |
|
|
|
|
|
|
|
|
examples = [ |
|
|
"Jag heter Elliot.", |
|
|
"När barnen kom hem från skolan åt de pizza med sina föräldrar.", |
|
|
"Den svenska flickan som jag träffade igår läser en bok.", |
|
|
"Stockholm är Sveriges huvudstad och en vacker stad." |
|
|
] |
|
|
|
|
|
|
|
|
demo = gr.Interface( |
|
|
fn=predict_dependencies, |
|
|
inputs=gr.Textbox( |
|
|
label="Swedish Text", |
|
|
placeholder="Enter Swedish text here...", |
|
|
lines=3 |
|
|
), |
|
|
outputs=gr.Textbox( |
|
|
label="Dependency Relations", |
|
|
lines=10 |
|
|
), |
|
|
title="🌲 Swedish Dependency Parser", |
|
|
description="Enter Swedish text to get dependency relations using a fine-tuned BERT model. Shows grammatical relationships between words using Universal Dependencies format.", |
|
|
examples=examples, |
|
|
theme=gr.themes.Soft() |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |