Shrikrishna's picture
Update app.py
05284a6 verified
import streamlit as st
import numpy as np
import pickle
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.sequence import pad_sequences
import nltk
from nltk.tokenize import word_tokenize
import ast
import pandas as pd
# Load assets
@st.cache_resource
def load_model_and_data():
model = load_model("bigru_pos_tagger.h5")
with open("word2idx_bigru.pkl", "rb") as f:
word2idx = pickle.load(f)
with open("idx2tag_bigru.pkl", "rb") as f:
idx2tag = pickle.load(f)
return model, word2idx, idx2tag
def pos_tag_sentence(sentence):
tokens = sentence.split()
token_ids = [word2idx.get(token, 1) for token in tokens]
token_ids_padded = pad_sequences([token_ids], maxlen=MAX_LEN, padding="post")
predictions = model.predict(token_ids_padded)[0]
predicted_tags = [idx2tag[np.argmax(tag)] for tag in predictions][:len(tokens)]
return list(zip(tokens, predicted_tags))
def makeTagedSentences(data):
try:
data = str(data)
data_list = ast.literal_eval(data)
formatted_string = " ".join([f"{word}\\{tag}" for word, tag in data_list])
return formatted_string
except (ValueError, SyntaxError):
return ""
model, word2idx, idx2tag = load_model_and_data()
MAX_LEN = 134
# Streamlit UI
st.title("Automatic Konkani POS Tagger")
user_input = st.text_area("Enter a Konkani sentence:", "")
if st.button("Tag Sentence"):
if user_input.strip():
if any("\\" in w for w in user_input.strip().split()):
cleaned_input = " ".join([w.split("\\")[0] for w in user_input.strip().split()])
else:
cleaned_input = user_input.strip()
tagged = pos_tag_sentence(cleaned_input)
df = pd.DataFrame(tagged, columns=["Word", "Predicted Tag"])
final_tagged_sentence = makeTagedSentences(tagged)
st.subheader("POS Tagged Konkani Sentence:")
st.text(final_tagged_sentence)
st.subheader("POS Tags Table: ")
st.table(df)
else:
st.warning("Please enter some Konkani Text.")
# Footer
#st.markdown(
# """
# <hr style="margin-top: 2rem; margin-bottom: 1rem">
# <div style="text-align: center; font-size: 0.9rem; color: gray;">
# &copy; 2025 | Shrikrishna Parab | Goa University
# </div>
# """,
# unsafe_allow_html=True
#)