Spaces:
Running
Running
| import torch | |
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSeq2SeqLM | |
| model = AutoModelForSeq2SeqLM.from_pretrained("Angelo25/Filipino-Lexical-Normalization") | |
| tokenizer = AutoTokenizer.from_pretrained("Angelo25/Filipino-Lexical-Normalization") | |
| model.eval() | |
| def normalize(input_text): | |
| inputs = tokenizer(input_text, return_tensors="pt").to(model.device) | |
| output = model.generate(**inputs, | |
| max_new_tokens=inputs["input_ids"].shape[1] + 50, | |
| num_beams=3, | |
| early_stopping=True, | |
| use_cache=True | |
| ) | |
| result = tokenizer.decode(output[0], skip_special_tokens=True) | |
| return result | |
| sample_inputs = [["lodi q tlaga yn"], | |
| ["Jusko kawawa nmn ung bta"], | |
| ["d nmn yata maba2ril c philip"], | |
| ["Ang lalaki na nio mag work na kau"], | |
| ["Girl pa galit..xa na nga may utang..haha"] | |
| ] | |
| demo = gr.Interface( | |
| fn=normalize, | |
| inputs=gr.Textbox(label="Input Text", placeholder="Enter informal Filipino text..."), | |
| outputs=gr.Textbox(label="Normalized Text"), | |
| theme=gr.Theme.from_hub("SebastianBravo/simci_css"), | |
| title="FiLex: Filipino Lexical Normalization", | |
| description="Normalizes informal/noisy Filipino text using a fine-tuned ByT5-base model.", | |
| examples=sample_inputs | |
| ).launch() | |