dejanseo
/

LinkBERT

Token Classification

Model card Files Files and versions

dejanseo commited on Feb 10, 2024

Commit

43b042c

·

verified ·

1 Parent(s): a9a3546

Upload 2 files

Files changed (2) hide show

inference.py +59 -0
linkbert.pth +3 -0

inference.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import streamlit as st
+import torch
+from transformers import BertForTokenClassification, BertTokenizerFast  # Import BertTokenizerFast
+def load_model(model_name='linkbert.pth'):
+    model_path = model_name
+    model = BertForTokenClassification.from_pretrained('bert-base-uncased', num_labels=2)
+    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
+    model.eval()  # Set the model to inference mode
+    return model
+def predict_and_annotate(model, tokenizer, text):
+    # Tokenize the input text with special tokens
+    inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, return_offsets_mapping=True)
+    input_ids, attention_mask, offset_mapping = inputs["input_ids"], inputs["attention_mask"], inputs["offset_mapping"]
+    with torch.no_grad():
+        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
+        predictions = torch.argmax(outputs.logits, dim=-1)
+    tokens = tokenizer.convert_ids_to_tokens(input_ids.squeeze().tolist())
+    predictions = predictions.squeeze().tolist()
+    offset_mapping = offset_mapping.squeeze().tolist()
+    annotated_text = ""
+    previous_end = 0
+    for offset, prediction in zip(offset_mapping, predictions):
+        start, end = offset
+        if start == end:  # Skip special tokens
+            continue
+        if prediction == 1:  # Anchor text
+            if start > previous_end:
+                annotated_text += text[previous_end:start]
+            annotated_text += f"<u>{text[start:end]}</u>"
+        else:
+            if start > previous_end:
+                annotated_text += text[previous_end:start]
+            annotated_text += text[start:end]
+        previous_end = end
+    annotated_text += text[previous_end:]  # Append remaining text
+    return annotated_text
+# Streamlit app setup
+st.title("BERT Token Classification for Anchor Text Prediction")
+# Load the model and tokenizer
+model = load_model('linkbert.pth')
+tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')  # Use BertTokenizerFast
+# User input text area
+user_input = st.text_area("Paste the text you want to analyze:", "Type or paste text here.")
+if st.button("Predict Anchor Texts"):
+    if user_input:
+        annotated_text = predict_and_annotate(model, tokenizer, user_input)
+        st.markdown(annotated_text, unsafe_allow_html=True)
+    else:
+        st.write("Please paste some text into the text area.")

linkbert.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:81dc286402b449bf1e0348dbd7f8bb0b64a284f452bd4e0b2bb41ddbac492a24
+size 435654416