Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,98 +1,48 @@
|
|
| 1 |
import os
|
| 2 |
-
import torch
|
| 3 |
import gradio as gr
|
| 4 |
-
from transformers import
|
| 5 |
-
import torch.nn.functional as F
|
| 6 |
|
| 7 |
-
#
|
| 8 |
-
|
| 9 |
-
model = BertForMaskedLM.from_pretrained("robzchhangte/mMizBERT", use_auth_token=token)
|
| 10 |
-
tokenizer = AutoTokenizer.from_pretrained("robzchhangte/mMizBERT", use_auth_token=token)
|
| 11 |
-
|
| 12 |
-
# Function to clean subword tokens (remove '##' and filter out special tokens)
|
| 13 |
-
def clean_token(token):
|
| 14 |
-
return token.replace('##', '')
|
| 15 |
-
|
| 16 |
-
# Function to predict the masked word with cleaned tokens and scores
|
| 17 |
-
def predict_masked_word(text):
|
| 18 |
-
# Tokenize input text
|
| 19 |
-
inputs = tokenizer(text, return_tensors="pt")
|
| 20 |
-
|
| 21 |
-
# Find the index of the [MASK] token
|
| 22 |
-
mask_token_index = torch.where(inputs["input_ids"] == tokenizer.mask_token_id)[1]
|
| 23 |
-
|
| 24 |
-
# Get predictions from the model
|
| 25 |
-
with torch.no_grad():
|
| 26 |
-
outputs = model(**inputs)
|
| 27 |
-
logits = outputs.logits
|
| 28 |
-
|
| 29 |
-
# Extract logits for the [MASK] token
|
| 30 |
-
mask_token_logits = logits[0, mask_token_index, :]
|
| 31 |
-
|
| 32 |
-
# Convert logits to probabilities using softmax
|
| 33 |
-
probs = F.softmax(mask_token_logits, dim=-1)
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
|
| 44 |
-
|
| 45 |
|
| 46 |
-
|
| 47 |
-
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
description
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
padding: 0px;
|
| 69 |
-
cursor: pointer !important;
|
| 70 |
-
background: rgb(17, 20, 45) !important;
|
| 71 |
-
border: none !important;
|
| 72 |
-
font-family: Poppins !important;
|
| 73 |
-
font-size: 14px !important;
|
| 74 |
-
font-weight: 500 !important;
|
| 75 |
-
color: rgb(255, 255, 255) !important;
|
| 76 |
-
border-radius: 12px !important;
|
| 77 |
-
transition: box-shadow 200ms ease, background 200ms ease !important;
|
| 78 |
-
}
|
| 79 |
-
.gr-button-primary:hover {
|
| 80 |
-
background: rgb(66, 133, 244) !important;
|
| 81 |
-
box-shadow: rgb(0 0 0 / 23%) 0px 1px 7px 0px !important;
|
| 82 |
-
}
|
| 83 |
-
"""
|
| 84 |
-
|
| 85 |
-
# Gradio interface
|
| 86 |
-
interface = gr.Interface(
|
| 87 |
-
fn=predict_masked_word,
|
| 88 |
-
inputs=gr.Textbox(lines=2, placeholder="Enter a sentence with [MASK] token"),
|
| 89 |
-
outputs=gr.Textbox(), # Display output as plain text
|
| 90 |
-
title="Masked Language Model Prediction for Mizo",
|
| 91 |
-
examples=examples,
|
| 92 |
-
css=css,
|
| 93 |
-
theme="light",
|
| 94 |
-
description=description
|
| 95 |
-
)
|
| 96 |
-
|
| 97 |
-
# Launch the app
|
| 98 |
-
interface.launch(share=False)
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
+
from transformers import pipeline, HfApi
|
|
|
|
| 4 |
|
| 5 |
+
# Set your Hugging Face token here
|
| 6 |
+
# HUGGINGFACE_TOKEN = "your_huggingface_token_here"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
# Authenticate with Hugging Face
|
| 9 |
+
# HfApi().set_access_token(HUGGINGFACE_TOKEN)
|
| 10 |
+
token = os.getenv("hf_token")
|
| 11 |
+
# Instantiate the model
|
| 12 |
+
model = pipeline(task="fill-mask",
|
| 13 |
+
model="robzchhangte/mMizBERT",
|
| 14 |
+
tokenizer="robzchhangte/mMizBERT",
|
| 15 |
+
use_auth_token=token) # Use the token to authenticate
|
| 16 |
+
|
| 17 |
+
def fill_the_mask(text):
|
| 18 |
+
if "[MASK]" not in text:
|
| 19 |
+
return "You did not enter \"[MASK]\" in the text. Please write your text again!"
|
| 20 |
+
else:
|
| 21 |
+
# Apply the model
|
| 22 |
+
model_out = model(text)
|
| 23 |
|
| 24 |
+
# First sort the list of dictionaries according to the score
|
| 25 |
+
model_out = sorted(model_out, key=lambda x: x['score'], reverse=True)
|
| 26 |
|
| 27 |
+
# Create a dictionary to store the model output
|
| 28 |
+
out_dict = {}
|
| 29 |
|
| 30 |
+
# Iterate over the list of dictionaries and get the required output
|
| 31 |
+
for sub_dict in model_out:
|
| 32 |
+
out_dict[sub_dict["sequence"]] = round(sub_dict["score"], 3)
|
| 33 |
+
|
| 34 |
+
return out_dict
|
| 35 |
+
|
| 36 |
+
# Create a Gradio user interface
|
| 37 |
+
my_interface = gr.Interface(
|
| 38 |
+
title="Masked Language Model APP\n(by Umair Akram)",
|
| 39 |
+
description="This App uses a fine-tuned DistilBERT-Base-Uncased Masked Language Model to predict the missed word in a sentence.\nEnter your text and put \"[MASK]\" at the word which you want to predict, as shown in the following example: Can we [MASK] to Paris?",
|
| 40 |
+
fn=fill_the_mask,
|
| 41 |
+
inputs="text",
|
| 42 |
+
outputs="label"
|
| 43 |
+
)
|
| 44 |
+
|
| 45 |
+
# Define the main function
|
| 46 |
+
if __name__ == "__main__":
|
| 47 |
+
# Launch the Gradio interface
|
| 48 |
+
my_interface.launch()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|