Spaces:
Runtime error
Runtime error
| from pandas.io.formats.format import return_docstring | |
| import streamlit as st | |
| import pandas as pd | |
| from transformers import AutoTokenizer, AutoModelForMaskedLM | |
| from transformers import pipeline | |
| import os | |
| import json | |
| import random | |
| with open("config.json") as f: | |
| cfg = json.loads(f.read()) | |
| def load_model(masked_text, model_name): | |
| model = AutoModelForMaskedLM.from_pretrained(model_name) | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| nlp = pipeline("fill-mask", model=model, tokenizer=tokenizer) | |
| MASK_TOKEN = tokenizer.mask_token | |
| masked_text = masked_text.replace("<mask>", MASK_TOKEN) | |
| result_sentence = nlp(masked_text) | |
| return result_sentence[0]["sequence"], result_sentence[0]["token_str"] | |
| def app(): | |
| st.markdown( | |
| "<h1 style='text-align: center; color: green;'>RoBERTa Hindi</h1>", | |
| unsafe_allow_html=True, | |
| ) | |
| st.markdown( | |
| "This demo uses multiple hindi transformer models for Masked Language Modelling (MLM)." | |
| ) | |
| models_list = list(cfg["models"].keys()) | |
| models = st.multiselect("Choose models", models_list, models_list) | |
| target_text_path = "./mlm_custom/mlm_targeted_text.csv" | |
| target_text_df = pd.read_csv(target_text_path) | |
| texts = target_text_df["text"] | |
| st.sidebar.title("Hindi MLM") | |
| results_df = pd.DataFrame(columns=["Model Name", "Filled Token", "Filled Text"]) | |
| model_names = [] | |
| filled_masked_texts = [] | |
| filled_tokens = [] | |
| pick_random = st.checkbox("Pick any random text") | |
| if pick_random: | |
| random_text = texts[random.randint(0, texts.shape[0] - 1)] | |
| masked_text = st.text_area("Please type a masked sentence to fill", random_text) | |
| else: | |
| select_text = st.sidebar.selectbox("Select any of the following text", texts) | |
| masked_text = st.text_area("Please type a masked sentence to fill", select_text) | |
| # pd.set_option('max_colwidth',30) | |
| if st.button("Fill the Mask!"): | |
| with st.spinner("Filling the Mask..."): | |
| for selected_model in models: | |
| filled_sentence, filled_token = load_model( | |
| masked_text, cfg["models"][selected_model] | |
| ) | |
| model_names.append(selected_model) | |
| filled_tokens.append(filled_token) | |
| filled_masked_texts.append(filled_sentence) | |
| results_df["Model Name"] = model_names | |
| results_df["Filled Token"] = filled_tokens | |
| results_df["Filled Text"] = filled_masked_texts | |
| st.table(results_df) | |