dhiyaadli123 commited on
Commit
9dbe682
·
1 Parent(s): 65154f5

feat: Add final working application code and model

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ finetuned_model_deberta_multitask/model.safetensors filter=lfs diff=lfs merge=lfs -text
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, DebertaV2Config, DebertaV2Model, PreTrainedModel
3
+ import torch
4
+ import torch.nn as nn
5
+ import torch.nn.functional as F
6
+ import re
7
+
8
+ # ==============================================================================
9
+ # SECTION 1: TEXT CLEANING
10
+ # ==============================================================================
11
+ def clean_teks(text):
12
+ text = text.lower()
13
+ text = re.sub(r"http\S+|www\S+|https\S+", "", text)
14
+ text = re.sub(r"[^a-zA-Z0-9\s]", "", text)
15
+ text = re.sub(r"\s+", " ", text).strip()
16
+ return text
17
+
18
+ # ==============================================================================
19
+ # SECTION 2: MODEL DEFINITION
20
+ # ==============================================================================
21
+ class DebertaV3ForMultiTask(PreTrainedModel):
22
+ config_class = DebertaV2Config
23
+
24
+ def __init__(self, config):
25
+ super().__init__(config)
26
+ self.num_sentiment_labels = config.num_sentiment_labels
27
+ self.num_type_labels = config.num_type_labels
28
+ self.deberta = DebertaV2Model(config)
29
+ self.dropout = nn.Dropout(config.hidden_dropout_prob)
30
+ self.sentiment_classifier = nn.Linear(config.hidden_size, self.num_sentiment_labels)
31
+ self.type_classifier = nn.Linear(config.hidden_size, self.num_type_labels)
32
+ self.init_weights()
33
+
34
+ def forward(self, input_ids=None, attention_mask=None, **kwargs):
35
+ outputs = self.deberta(input_ids=input_ids, attention_mask=attention_mask)
36
+ hidden_state = outputs.last_hidden_state[:, 0]
37
+ pooled_output = self.dropout(hidden_state)
38
+ sentiment_logits = self.sentiment_classifier(pooled_output)
39
+ type_logits = self.type_classifier(pooled_output)
40
+ return {
41
+ "sentiment": sentiment_logits,
42
+ "type": type_logits,
43
+ }
44
+
45
+ # ==============================================================================
46
+ # SECTION 3: LOAD MODEL & TOKENIZER
47
+ # ==============================================================================
48
+ MODEL_PATH = "./finetuned_model_deberta_multitask"
49
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
50
+ model = DebertaV3ForMultiTask.from_pretrained(MODEL_PATH)
51
+ model.eval()
52
+
53
+ SENTIMENT_LABELS = ['negative', 'neutral', 'positive']
54
+ CATEGORY_LABELS = ['Business', 'Entertainment', 'General', 'Health', 'Science', 'Sports', 'Technology']
55
+
56
+ # ==============================================================================
57
+ # SECTION 4: PREDICTION FUNCTION
58
+ # ==============================================================================
59
+ def predict(text):
60
+ if not text or text.isspace():
61
+ return {}, {}, "No input provided", "No input provided"
62
+
63
+ cleaned_text = clean_teks(text)
64
+ inputs = tokenizer(cleaned_text, return_tensors="pt", truncation=True, max_length=256, padding=True)
65
+
66
+ with torch.no_grad():
67
+ outputs = model(**inputs)
68
+ sentiment_logits = outputs["sentiment"]
69
+ type_logits = outputs["type"]
70
+
71
+ sentiment_probs = F.softmax(sentiment_logits, dim=1)[0]
72
+ type_probs = F.softmax(type_logits, dim=1)[0]
73
+
74
+ sentiment_confidences = {label: round(prob.item(), 4) for label, prob in zip(SENTIMENT_LABELS, sentiment_probs)}
75
+ category_confidences = {label: round(prob.item(), 4) for label, prob in zip(CATEGORY_LABELS, type_probs)}
76
+
77
+ best_sentiment = SENTIMENT_LABELS[torch.argmax(sentiment_probs)]
78
+ best_category = CATEGORY_LABELS[torch.argmax(type_probs)]
79
+
80
+ return sentiment_confidences, category_confidences, f"{best_sentiment} ({sentiment_confidences[best_sentiment]:.2%})", f"{best_category} ({category_confidences[best_category]:.2%})"
81
+
82
+ # ==============================================================================
83
+ # SECTION 5: GRADIO UI
84
+ # ==============================================================================
85
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
86
+ gr.Markdown("## 📰 News Sentiment and Category Classification")
87
+
88
+ text_input = gr.Textbox(placeholder="Enter news text here...", label="Input Text", lines=5)
89
+ submit_button = gr.Button("Analyze", variant="primary")
90
+
91
+ with gr.Row():
92
+ with gr.Column():
93
+ gr.Markdown("### 🔎 Predicted Sentiment")
94
+ sentiment_label = gr.Text(label="Predicted Sentiment")
95
+ sentiment_output = gr.Label(label="Sentiment Probabilities", num_top_classes=3)
96
+ with gr.Column():
97
+ gr.Markdown("### 🗂️ Predicted News Category")
98
+ category_label = gr.Text(label="Predicted Category")
99
+ category_output = gr.Label(label="Category Probabilities", num_top_classes=len(CATEGORY_LABELS))
100
+
101
+ submit_button.click(fn=predict, inputs=text_input, outputs=[sentiment_output, category_output, sentiment_label, category_label])
102
+
103
+ gr.Examples(
104
+ [
105
+ ["Stanley Kubrick's estate has led the tributes to Shelley Duvall."],
106
+ ["Lignetics Inc. recently acquired the fiber energy products wood pellets business unit from Revelyst."],
107
+ ["An overcrowded California men’s prison was running on emergency generator power for a third day Tuesday."]
108
+ ],
109
+ inputs=text_input
110
+ )
111
+
112
+ if __name__ == "__main__":
113
+ demo.launch() # No need for share=True when deploying
finetuned_model_deberta_multitask/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
finetuned_model_deberta_multitask/config.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DebertaV3ForMultiTask"
4
+ ],
5
+ "attention_probs_dropout_prob": 0.1,
6
+ "hidden_act": "gelu",
7
+ "hidden_dropout_prob": 0.1,
8
+ "hidden_size": 768,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 3072,
11
+ "layer_norm_eps": 1e-07,
12
+ "legacy": true,
13
+ "max_position_embeddings": 512,
14
+ "max_relative_positions": -1,
15
+ "model_type": "deberta-v2",
16
+ "norm_rel_ebd": "layer_norm",
17
+ "num_attention_heads": 12,
18
+ "num_hidden_layers": 6,
19
+ "num_sentiment_labels": 3,
20
+ "num_type_labels": 7,
21
+ "pad_token_id": 0,
22
+ "pooler_dropout": 0,
23
+ "pooler_hidden_act": "gelu",
24
+ "pooler_hidden_size": 768,
25
+ "pos_att_type": [
26
+ "p2c",
27
+ "c2p"
28
+ ],
29
+ "position_biased_input": false,
30
+ "position_buckets": 256,
31
+ "relative_attention": true,
32
+ "share_att_key": true,
33
+ "torch_dtype": "float32",
34
+ "transformers_version": "4.52.4",
35
+ "type_vocab_size": 0,
36
+ "vocab_size": 128100
37
+ }
finetuned_model_deberta_multitask/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9c2e5f98351f19c3decad9b937f026187c03c40ad85e44c944b5a0f6f2470f
3
+ size 565260832
finetuned_model_deberta_multitask/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "[UNK]",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
finetuned_model_deberta_multitask/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
finetuned_model_deberta_multitask/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
finetuned_model_deberta_multitask/tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[PAD]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[UNK]",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "128000": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "bos_token": "[CLS]",
45
+ "clean_up_tokenization_spaces": false,
46
+ "cls_token": "[CLS]",
47
+ "do_lower_case": false,
48
+ "eos_token": "[SEP]",
49
+ "extra_special_tokens": {},
50
+ "mask_token": "[MASK]",
51
+ "model_max_length": 1000000000000000019884624838656,
52
+ "pad_token": "[PAD]",
53
+ "sep_token": "[SEP]",
54
+ "sp_model_kwargs": {},
55
+ "split_by_punct": false,
56
+ "tokenizer_class": "DebertaV2Tokenizer",
57
+ "unk_token": "[UNK]",
58
+ "vocab_type": "spm"
59
+ }
finetuned_model_deberta_multitask/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e4e8a1e0f273b34d9ebf5c37af4061a59c09f998def78a4b1ecf00f008dd637
3
+ size 5304
finetuned_model_deberta_multitask/tugas_besar_nlp ADDED
@@ -0,0 +1 @@
 
 
1
+ /content/drive/.shortcut-targets-by-id/1KOIMCqyf3HZb7FwGsTp3OfBTawGcdWMR/tugas_besar_nlp
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ transformers
2
+ torch
3
+ sentencepiece
4
+ gradio