File size: 5,264 Bytes
9dbe682
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import gradio as gr
from transformers import AutoTokenizer, DebertaV2Config, DebertaV2Model, PreTrainedModel
import torch
import torch.nn as nn
import torch.nn.functional as F
import re

# ==============================================================================
# SECTION 1: TEXT CLEANING
# ==============================================================================
def clean_teks(text):
    text = text.lower()
    text = re.sub(r"http\S+|www\S+|https\S+", "", text)
    text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
    text = re.sub(r"\s+", " ", text).strip()
    return text

# ==============================================================================
# SECTION 2: MODEL DEFINITION
# ==============================================================================
class DebertaV3ForMultiTask(PreTrainedModel):
    config_class = DebertaV2Config

    def __init__(self, config):
        super().__init__(config)
        self.num_sentiment_labels = config.num_sentiment_labels
        self.num_type_labels = config.num_type_labels
        self.deberta = DebertaV2Model(config)
        self.dropout = nn.Dropout(config.hidden_dropout_prob)
        self.sentiment_classifier = nn.Linear(config.hidden_size, self.num_sentiment_labels)
        self.type_classifier = nn.Linear(config.hidden_size, self.num_type_labels)
        self.init_weights()

    def forward(self, input_ids=None, attention_mask=None, **kwargs):
        outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
        hidden_state = outputs.last_hidden_state[:, 0]
        pooled_output = self.dropout(hidden_state)
        sentiment_logits = self.sentiment_classifier(pooled_output)
        type_logits = self.type_classifier(pooled_output)
        return {
            "sentiment": sentiment_logits,
            "type": type_logits,
        }

# ==============================================================================
# SECTION 3: LOAD MODEL & TOKENIZER
# ==============================================================================
MODEL_PATH = "./finetuned_model_deberta_multitask"
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
model = DebertaV3ForMultiTask.from_pretrained(MODEL_PATH)
model.eval()

SENTIMENT_LABELS = ['negative', 'neutral', 'positive']
CATEGORY_LABELS = ['Business', 'Entertainment', 'General', 'Health', 'Science', 'Sports', 'Technology']

# ==============================================================================
# SECTION 4: PREDICTION FUNCTION
# ==============================================================================
def predict(text):
    if not text or text.isspace():
        return {}, {}, "No input provided", "No input provided"

    cleaned_text = clean_teks(text)
    inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, max_length=256, padding=True)

    with torch.no_grad():
        outputs = model(**inputs)
        sentiment_logits = outputs["sentiment"]
        type_logits = outputs["type"]

        sentiment_probs = F.softmax(sentiment_logits, dim=1)[0]
        type_probs = F.softmax(type_logits, dim=1)[0]

        sentiment_confidences = {label: round(prob.item(), 4) for label, prob in zip(SENTIMENT_LABELS, sentiment_probs)}
        category_confidences = {label: round(prob.item(), 4) for label, prob in zip(CATEGORY_LABELS, type_probs)}

        best_sentiment = SENTIMENT_LABELS[torch.argmax(sentiment_probs)]
        best_category = CATEGORY_LABELS[torch.argmax(type_probs)]

    return sentiment_confidences, category_confidences, f"{best_sentiment} ({sentiment_confidences[best_sentiment]:.2%})", f"{best_category} ({category_confidences[best_category]:.2%})"

# ==============================================================================
# SECTION 5: GRADIO UI
# ==============================================================================
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown("## 📰 News Sentiment and Category Classification")

    text_input = gr.Textbox(placeholder="Enter news text here...", label="Input Text", lines=5)
    submit_button = gr.Button("Analyze", variant="primary")

    with gr.Row():
        with gr.Column():
            gr.Markdown("### 🔎 Predicted Sentiment")
            sentiment_label = gr.Text(label="Predicted Sentiment")
            sentiment_output = gr.Label(label="Sentiment Probabilities", num_top_classes=3)
        with gr.Column():
            gr.Markdown("### 🗂️ Predicted News Category")
            category_label = gr.Text(label="Predicted Category")
            category_output = gr.Label(label="Category Probabilities", num_top_classes=len(CATEGORY_LABELS))

    submit_button.click(fn=predict, inputs=text_input, outputs=[sentiment_output, category_output, sentiment_label, category_label])

    gr.Examples(
        [
            ["Stanley Kubrick's estate has led the tributes to Shelley Duvall."],
            ["Lignetics Inc. recently acquired the fiber energy products wood pellets business unit from Revelyst."],
            ["An overcrowded California men’s prison was running on emergency generator power for a third day Tuesday."]
        ],
        inputs=text_input
    )

if __name__ == "__main__":
    demo.launch() # No need for share=True when deploying