File size: 1,323 Bytes
79c8e14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9068fc0
90f5f27
 
79c8e14
 
 
 
 
 
 
 
9068fc0
79c8e14
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import torch
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

model = AutoModelForSeq2SeqLM.from_pretrained("Angelo25/Filipino-Lexical-Normalization")
tokenizer = AutoTokenizer.from_pretrained("Angelo25/Filipino-Lexical-Normalization")
model.eval()

def normalize(input_text):
    inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
    output = model.generate(**inputs,
    max_new_tokens=inputs["input_ids"].shape[1] + 50,
    num_beams=3,
    early_stopping=True,
    use_cache=True
    )

    result = tokenizer.decode(output[0], skip_special_tokens=True)
    return result

sample_inputs = [["lodi q tlaga yn"],
                 ["Jusko kawawa nmn ung bta"],
                 ["d nmn yata maba2ril c philip"],
                 ["Ang lalaki na nio mag work na kau"],
                 ["Girl pa galit..xa na nga may utang..haha"]
                 ]

demo = gr.Interface(
    fn=normalize,
    inputs=gr.Textbox(label="Input Text", placeholder="Enter informal Filipino text..."),
    outputs=gr.Textbox(label="Normalized Text"),
    theme=gr.Theme.from_hub("SebastianBravo/simci_css"),
    title="FiLex: Filipino Lexical Normalization",
    description="Normalizes informal/noisy Filipino text using a fine-tuned ByT5-base model.",
    examples=sample_inputs
).launch()