FiLex-Demo / app.py
Angelo25's picture
Update app.py
9068fc0 verified
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
model = AutoModelForSeq2SeqLM.from_pretrained("Angelo25/Filipino-Lexical-Normalization")
tokenizer = AutoTokenizer.from_pretrained("Angelo25/Filipino-Lexical-Normalization")
model.eval()
def normalize(input_text):
inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
output = model.generate(**inputs,
max_new_tokens=inputs["input_ids"].shape[1] + 50,
num_beams=3,
early_stopping=True,
use_cache=True
)
result = tokenizer.decode(output[0], skip_special_tokens=True)
return result
sample_inputs = [["lodi q tlaga yn"],
["Jusko kawawa nmn ung bta"],
["d nmn yata maba2ril c philip"],
["Ang lalaki na nio mag work na kau"],
["Girl pa galit..xa na nga may utang..haha"]
]
demo = gr.Interface(
fn=normalize,
inputs=gr.Textbox(label="Input Text", placeholder="Enter informal Filipino text..."),
outputs=gr.Textbox(label="Normalized Text"),
theme=gr.Theme.from_hub("SebastianBravo/simci_css"),
title="FiLex: Filipino Lexical Normalization",
description="Normalizes informal/noisy Filipino text using a fine-tuned ByT5-base model.",
examples=sample_inputs
).launch()