Spaces:

Setur
/

Marka

Running

App Files Files Community

unijoh commited on Jan 15

Commit

ac86473

verified ·

1 Parent(s): 293c12b

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -118

app.py CHANGED Viewed

@@ -1,118 +1,118 @@
-import gradio as gr
-import torch
-import numpy as np
-import pandas as pd
-from transformers import AutoTokenizer, AutoModelForTokenClassification
-MODEL_ID = "YOUR_USERNAME/YOUR_MODEL_REPO"
-TAGS_FILEPATH = "Sosialurin-GOLD_tags.csv"
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
-model = AutoModelForTokenClassification.from_pretrained(MODEL_ID)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-model.to(device)
-model.eval()
-def load_tag_mappings(tags_filepath):
-    tags_df = pd.read_csv(tags_filepath)
-    features_to_tag = {
-        tuple(row[1:].values.astype(int)): row["Original Tag"]
-        for _, row in tags_df.iterrows()
-    }
-    vec_len = len(tags_df.columns) - 1
-    return features_to_tag, vec_len
-features_to_tag, VEC_LEN = load_tag_mappings(TAGS_FILEPATH)
-# Use the SAME intervals as your demo.py (keep these consistent!)
-intervals = (
-    (15, 28),
-    (29, 32),
-    (33, 35),
-    (36, 40),
-    (41, 42),
-    (43, 44),
-    (45, 49),
-    (50, 52),
-    (53, 58),
-    (59, 61),
-    (62, 64),
-    (65, 68),
-    (69, 70),
-)
-def vector_to_tag(vec):
-    return features_to_tag.get(tuple(vec.int().tolist()), "Unknown Tag")
-def tag_sentence(sentence: str):
-    sentence = sentence.strip()
-    if not sentence:
-        return ""
-    tokens = sentence.split()
-    enc = tokenizer(
-        tokens,
-        is_split_into_words=True,
-        add_special_tokens=True,
-        max_length=128,
-        padding="max_length",
-        truncation=True,
-        return_attention_mask=True,
-        return_tensors="pt"
-    )
-    input_ids = enc["input_ids"].to(device)
-    attention_mask = enc["attention_mask"].to(device)
-    word_ids = enc.word_ids(batch_index=0)
-    # begin token mask
-    begin = []
-    last = None
-    for wid in word_ids:
-        if wid is None:
-            begin.append(0)
-        elif wid != last:
-            begin.append(1)
-        else:
-            begin.append(0)
-        last = wid
-    with torch.no_grad():
-        out = model(input_ids=input_ids, attention_mask=attention_mask)
-        logits = out.logits[0]  # [seq_len, num_labels]
-    lines = []
-    for i in range(logits.shape[0]):
-        if attention_mask[0, i].item() != 1 or begin[i] != 1:
-            continue
-        pred = logits[i]
-        vec = torch.zeros(VEC_LEN, device=logits.device)
-        # Word type in [0..14]
-        wt = torch.argmax(pred[0:15]).item()
-        vec[wt] = 1
-        # Interval decoding
-        for a, b in intervals:
-            seg = pred[a:b+1]
-            k = torch.argmax(seg).item()
-            vec[a + k] = 1
-        wid = word_ids[i]
-        word = tokens[wid] if wid is not None and wid < len(tokens) else "<UNK>"
-        lines.append(f"{word}\t{vector_to_tag(vec)}")
-    return "\n".join(lines)
-demo = gr.Interface(
-    fn=tag_sentence,
-    inputs=gr.Textbox(lines=2, label="Sentence"),
-    outputs=gr.Textbox(lines=12, label="Token\\tTag"),
-    title="Faroese POS Tagger (Demo)"
-)
-if __name__ == "__main__":
-    demo.launch()

+import gradio as gr
+import torch
+import numpy as np
+import pandas as pd
+from transformers import AutoTokenizer, AutoModelForTokenClassification
+MODEL_ID = "Setur/BRAGD"
+TAGS_FILEPATH = "Sosialurin-GOLD_tags.csv"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
+model = AutoModelForTokenClassification.from_pretrained(MODEL_ID)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+model.eval()
+def load_tag_mappings(tags_filepath):
+    tags_df = pd.read_csv(tags_filepath)
+    features_to_tag = {
+        tuple(row[1:].values.astype(int)): row["Original Tag"]
+        for _, row in tags_df.iterrows()
+    }
+    vec_len = len(tags_df.columns) - 1
+    return features_to_tag, vec_len
+features_to_tag, VEC_LEN = load_tag_mappings(TAGS_FILEPATH)
+# Use the SAME intervals as your demo.py (keep these consistent!)
+intervals = (
+    (15, 28),
+    (29, 32),
+    (33, 35),
+    (36, 40),
+    (41, 42),
+    (43, 44),
+    (45, 49),
+    (50, 52),
+    (53, 58),
+    (59, 61),
+    (62, 64),
+    (65, 68),
+    (69, 70),
+)
+def vector_to_tag(vec):
+    return features_to_tag.get(tuple(vec.int().tolist()), "Unknown Tag")
+def tag_sentence(sentence: str):
+    sentence = sentence.strip()
+    if not sentence:
+        return ""
+    tokens = sentence.split()
+    enc = tokenizer(
+        tokens,
+        is_split_into_words=True,
+        add_special_tokens=True,
+        max_length=128,
+        padding="max_length",
+        truncation=True,
+        return_attention_mask=True,
+        return_tensors="pt"
+    )
+    input_ids = enc["input_ids"].to(device)
+    attention_mask = enc["attention_mask"].to(device)
+    word_ids = enc.word_ids(batch_index=0)
+    # begin token mask
+    begin = []
+    last = None
+    for wid in word_ids:
+        if wid is None:
+            begin.append(0)
+        elif wid != last:
+            begin.append(1)
+        else:
+            begin.append(0)
+        last = wid
+    with torch.no_grad():
+        out = model(input_ids=input_ids, attention_mask=attention_mask)
+        logits = out.logits[0]  # [seq_len, num_labels]
+    lines = []
+    for i in range(logits.shape[0]):
+        if attention_mask[0, i].item() != 1 or begin[i] != 1:
+            continue
+        pred = logits[i]
+        vec = torch.zeros(VEC_LEN, device=logits.device)
+        # Word type in [0..14]
+        wt = torch.argmax(pred[0:15]).item()
+        vec[wt] = 1
+        # Interval decoding
+        for a, b in intervals:
+            seg = pred[a:b+1]
+            k = torch.argmax(seg).item()
+            vec[a + k] = 1
+        wid = word_ids[i]
+        word = tokens[wid] if wid is not None and wid < len(tokens) else "<UNK>"
+        lines.append(f"{word}\t{vector_to_tag(vec)}")
+    return "\n".join(lines)
+demo = gr.Interface(
+    fn=tag_sentence,
+    inputs=gr.Textbox(lines=2, label="Sentence"),
+    outputs=gr.Textbox(lines=12, label="Token\\tTag"),
+    title="Faroese POS Tagger (Demo)"
+)
+if __name__ == "__main__":
+    demo.launch()