File size: 3,309 Bytes
fd7c103
 
 
 
3d8b7aa
fd7c103
 
 
 
 
9399b88
 
fd7c103
 
 
 
9399b88
3d8b7aa
9399b88
 
 
 
 
 
 
 
 
4d02817
9399b88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4d02817
9399b88
 
 
 
 
4d02817
9399b88
 
 
 
 
 
 
 
 
 
 
 
4d02817
9399b88
 
fd7c103
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

model_name = "GMCTech/LexCAT"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

def predict_sentiment(text):
    if not text.strip():
        return "Please enter text.", "—", "—"

    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    outputs = model(**inputs)
    predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
    predicted_class = torch.argmax(predictions, dim=1).item()
    
    sentiment_map = {0: "NEGATIVE", 1: "NEUTRAL", 2: "POSITIVE"}
    
    # Format each output separately
    sentiment_output = f"{sentiment_map[predicted_class]}"
    probabilities_output = (
        f"Negative: {predictions[0][0]:.3f}\n"
        f"Neutral: {predictions[0][1]:.3f}\n"
        f"Positive: {predictions[0][2]:.3f}"
    )

    return sentiment_output, probabilities_output

# Define Gradio Interface with 3 separate outputs
with gr.Blocks(theme="soft") as demo:
    gr.Markdown("# 🔍 LexCAT: Taglish Sentiment Analysis")
    gr.Markdown("""
    LexCAT is a lexicon-enhanced transformer model for sentiment analysis of Tagalog–English code-switched text (Taglish). \n\n

    • Developed by Glenn Marcus D. Cinco for his BS/MS thesis at Mapúa University. \n
    • Trained on the FiReCS dataset. \n
    • Enhanced with LexiLiksik to detect intra-sentential shifts (e.g., “Maganda pero expensive” → Negative).
    """)

    with gr.Row():
        with gr.Column(scale=1):
            input_box = gr.Textbox(
                placeholder="Type a Taglish sentence, e.g., 'Maganda pero expensive tlga'",
                label="Input Tagalog–English (Taglish) Text",
                lines=10,
                max_lines=20
            )
            submit_btn = gr.Button("Submit", variant="primary")
            clear_btn = gr.Button("Clear")

        with gr.Column(scale=1):
            sentiment_box = gr.Textbox(
                label="Predicted Sentiment",
                lines=3,
                max_lines=5,
                interactive=False
            )
            probabilities_box = gr.Textbox(
                label="Raw Probabilities",
                lines=6,
                max_lines=10,
                interactive=False
            )

    # Set up event listeners
    submit_btn.click(
        fn=predict_sentiment,
        inputs=input_box,
        outputs=[sentiment_box, probabilities_box]
    )

    clear_btn.click(
        fn=lambda: ("", "", ""),
        inputs=None,
        outputs=[input_box, sentiment_box, probabilities_box]
    )

    # Add examples below
    gr.Examples(
        examples=[
            ["sobrang lambot ng burger pero expensive tlga"],
            ["Ang ganda ng service, one star!"],
            ["Super duper late delivery umabot ng 2 weeks metro manila area lang naman"],
            ["Salamat sa nyo nagana nmn po sya kaya super thank you ako"],
            ["Ganda legit, kumpleto... problema lang nainit ng sobra..."]
        ],
        inputs=input_box,
        outputs=[sentiment_box, probabilities_box],
        label="Example Sentences"
    )

if __name__ == "__main__":
    demo.launch()