hoom4n commited on
Commit
c5bcbe7
·
verified ·
1 Parent(s): 2e31647

Upload 11 files

Browse files
app.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["USE_TF"] = "0"
3
+ import torch
4
+ import gradio as gr
5
+ from transformers import DebertaV2Tokenizer
6
+ from src.model import SentiNetTransformer
7
+ from src.config import HPARAMS
8
+ from src.ui import build_demo
9
+
10
+ # CONFIGUARATION
11
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
12
+ hp = HPARAMS()
13
+ backbone_config_path = "model/config.json"
14
+ checkpint_path = "model/SentiNet_Transformer_params.pt"
15
+ tokenizer_path = "model/"
16
+
17
+ # LOAD MODEL & TOKENIZER
18
+ model = SentiNetTransformer(model_path=backbone_config_path, fc_dropout=hp.transformer_fc_dropout).to(device)
19
+ state_dict = torch.load(checkpint_path, map_location=device, weights_only=True)
20
+ model.load_state_dict(state_dict)
21
+ tokenizer = DebertaV2Tokenizer.from_pretrained(tokenizer_path)
22
+
23
+ # INFERENCE FUNCTION
24
+ @torch.no_grad()
25
+ def sentiment_classifier(model, tokenizer, text, thresh=0.5, max_length_trun=256, device=None):
26
+ if device is None:
27
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
28
+
29
+ inputs = tokenizer(text, return_tensors="pt",
30
+ add_special_tokens=True, max_length=max_length_trun,
31
+ truncation=True, padding=True)
32
+ inputs = {k: v.to(device) for k, v in inputs.items()}
33
+
34
+ model.eval()
35
+ logits = model(inputs)
36
+ prob = torch.sigmoid(logits).cpu().numpy()[0][0]
37
+
38
+ if prob >= thresh:
39
+ return "😀 Positive", round(float(prob), 3)
40
+ else:
41
+ return "😞 Negative", round(float(prob), 3)
42
+
43
+ # GRADIO DEMO
44
+ def generation_fn(text):
45
+ return sentiment_classifier(model, tokenizer, text, max_length_trun=256, device=None)
46
+
47
+ demo = build_demo(
48
+ generation_fn,
49
+ english_title = "# SentiNet: Transformer‑Based Sentiment Classifier",
50
+ persian_title = "# سنتی‌نت: تحلیل احساسات با ترنسفورمر",
51
+ assets_dir = "assets",
52
+ app_title = "SentiNet"
53
+ )
54
+
55
+ if __name__ == "__main__":
56
+ demo.launch()
assets/css/custom.css ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /* Background */
2
+ .gradio-container {
3
+ background: linear-gradient(135deg, #fdfbfb, #ebedee) !important;
4
+ font-family: 'Inter', 'Segoe UI', sans-serif !important;
5
+ }
6
+ .dark .gradio-container {
7
+ background: linear-gradient(135deg, #1e1a5e, #2a0a3a) !important;
8
+ }
9
+
10
+ /* Buttons */
11
+ button {
12
+ border-radius: 14px !important;
13
+ padding: 10px 18px !important;
14
+ font-weight: 600 !important;
15
+ background: linear-gradient(90deg, #6a11cb, #2575fc) !important;
16
+ color: white !important;
17
+ box-shadow: 0 4px 10px rgba(0,0,0,0.15) !important;
18
+ transition: transform 0.15s ease-in-out;
19
+ }
20
+ button:hover {
21
+ transform: translateY(-2px);
22
+ box-shadow: 0 6px 14px rgba(0,0,0,0.25) !important;
23
+ }
24
+
25
+ /* Title */
26
+ #title {
27
+ font-size: 2.8em !important;
28
+ font-weight: 700 !important;
29
+ color: #1e3a8a;
30
+ text-align: center;
31
+ margin-top: 28px;
32
+ margin-bottom: 12px;
33
+ text-shadow: 1px 2px 6px rgba(0,0,0,0.1);
34
+ }
35
+ .dark #title {
36
+ color: #e0f7fa !important;
37
+ text-shadow: 1px 2px 6px rgba(0,0,0,0.4);
38
+ }
39
+
40
+ /* Summary / Description */
41
+ #summary {
42
+ color: #374151;
43
+ background: rgba(255,255,255,0.7);
44
+ padding: 18px;
45
+ border-radius: 16px;
46
+ box-shadow: 0 4px 12px rgba(0,0,0,0.08);
47
+ margin-bottom: 16px;
48
+ text-align: justify !important;
49
+ }
50
+ .dark #summary {
51
+ color: #d1d5db !important;
52
+ background: rgba(30, 30, 46, 0.6) !important;
53
+ }
54
+
55
+ /* Help / Info Box */
56
+ #help_text {
57
+ color: #1f2937;
58
+ background: rgba(240, 249, 255, 0.9);
59
+ padding: 16px;
60
+ border-left: 5px solid #3b82f6;
61
+ border-radius: 14px;
62
+ box-shadow: 0 4px 10px rgba(0,0,0,0.05);
63
+ margin-top: 12px;
64
+ text-align: justify !important;
65
+ }
66
+ .dark #help_text {
67
+ color: #d1d5db !important;
68
+ background: rgba(30, 30, 46, 0.7) !important;
69
+ border-left: 5px solid #60a5fa !important;
70
+ }
71
+
72
+ /* RTL Support */
73
+ .persian {
74
+ direction: rtl;
75
+ text-align: right;
76
+ }
77
+ #summary.persian, #help_text.persian {
78
+ text-align: justify !important;
79
+ }
assets/markdown/english_summary.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ **SentiNet🤖** is an experimental project exploring different approaches to sentiment classification, with a focus on handling nuanced language phenomena such as sarcasm, shifting tones, and negation.
2
+ By fine‑tuning the Microsoft DeBERTa‑v3 encoder and comparing it against classic machine learning baselines and recurrent models, SentiNet demonstrates how modern Transformers capture contextual meaning beyond word‑level cues.
3
+ The system highlights the strengths and weaknesses of each approach while providing an interactive demo that outputs clear sentiment labels (😀 Positive / 😞 Negative) alongside confidence scores, making evaluation both rigorous and accessible.
4
+ Project GitHub: [https://github.com/HooMAN/SentiNet](https://github.com/HooMAN/SentiNet)
assets/markdown/persian_summary.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ **سنتی‌نت🤖** یک پروژه‌ی آزمایشی برای بررسی رویکردهای مختلف در دسته‌بندی احساسات است؛ تمرکز اصلی آن بر چالش‌های زبانی مانند طعنه، تغییر لحن در طول جمله و نفی می‌باشد.
2
+ این مدل با استفاده از ترنسفورمر DeBERTa‑v3 مایکروسافت و آموزش روی داده‌های نقد فیلم، توانسته فراتر از روش‌های کلاسیک عمل کند و معنای متنی را در سطح جمله درک کند. سنتی‌نت علاوه بر مقایسه‌ی رویکردهای سنتی و بازگشتی با ترنسفورمر، یک دموی تعاملی ارائه می‌دهد که نتیجه را همراه با برچسب احساسی (😀 مثبت / 😞 منفی) و میزان اطمینان نمایش می‌دهد تا ارزیابی برای کاربر ساده و شفاف باشد.
3
+ پروژه در گیت‌هاب: [https://github.com/HooMAN/SentiNet](https://github.com/HooMAN/SentiNet)
model/config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "deberta-v2",
3
+ "attention_probs_dropout_prob": 0.1,
4
+ "hidden_act": "gelu",
5
+ "hidden_dropout_prob": 0.1,
6
+ "hidden_size": 768,
7
+ "initializer_range": 0.02,
8
+ "intermediate_size": 3072,
9
+ "max_position_embeddings": 512,
10
+ "relative_attention": true,
11
+ "position_buckets": 256,
12
+ "norm_rel_ebd": "layer_norm",
13
+ "share_att_key": true,
14
+ "pos_att_type": "p2c|c2p",
15
+ "layer_norm_eps": 1e-7,
16
+ "max_relative_positions": -1,
17
+ "position_biased_input": false,
18
+ "num_attention_heads": 12,
19
+ "num_hidden_layers": 12,
20
+ "type_vocab_size": 0,
21
+ "vocab_size": 128100
22
+ }
model/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
model/tokenizer_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "do_lower_case": false,
3
+ "vocab_type": "spm"
4
+ }
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ gradio
4
+ numpy
src/config.py ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass, field
2
+
3
+ # ---- Hyperparameter configuration ----
4
+
5
+ @dataclass
6
+ class HPARAMS:
7
+ # common
8
+ seed: int = 42
9
+ url: str = "https://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz"
10
+
11
+ scheduler_hparams: dict = field(default_factory=lambda: {
12
+ "factor": 0.5,
13
+ "patience": 2,
14
+ "mode": "min"
15
+ })
16
+
17
+ # BiGRU hparams
18
+ max_seq_len_gru: int = 256
19
+ batch_size_gru: int = 128
20
+ vocab_size: int = 10000
21
+ glove_txt_path: str = ("/mnt/e/ML_Files/PreTrained_Models/GloVe_Embeddings/glove.2024.wikigiga"
22
+ ".200d/wiki_giga_2024_200_MFT20_vectors_seed_2024_alpha_0.75_eta_0.05_combined.txt")
23
+
24
+ model_hparams_gru: dict = field(default_factory=lambda: {
25
+ "embedding_dim": 128,
26
+ "hidden_size": 128,
27
+ "dropout": 0.12,
28
+ "num_gru_layers": 2,
29
+ "use_dense": False,
30
+ "dense_dropout_prob": 0.1
31
+ })
32
+
33
+ optimizer_hparams_gru: dict = field(default_factory=lambda: {
34
+ "lr": 1e-3,
35
+ "weight_decay": 5e-4
36
+ })
37
+
38
+ trainer_hparams_gru: dict = field(default_factory=lambda: {
39
+ "n_epochs": 20,
40
+ "use_early_stopping" : True,
41
+ "early_stopping_patience" : 3,
42
+ "scheduler_monitor" : "val_loss",
43
+ "restore_best_model": True,
44
+ })
45
+
46
+ # Transformer hparams
47
+ max_seq_len_transformer:int = 288
48
+ #transformer_path: str = "/mnt/e/ML_Files/PreTrained_Models/HuggingFace/deberta-v3-base/"
49
+ transformer_path: str = "/mnt/d/ML-Files/PreTrained-Models/HuggingFace/Transformer-Encoder/microsoft_deberta-v3-base/"
50
+ batch_size_transformer: int = 32
51
+ transformer_fc_dropout: float = 0.1
52
+
53
+ optimizer_hparams_transformer: dict = field(default_factory=lambda: {
54
+ "lr": 3e-5,
55
+ "weight_decay": 5e-4
56
+ })
57
+
58
+ trainer_hparams_transformer: dict = field(default_factory=lambda: {
59
+ "n_epochs": 5,
60
+ "use_early_stopping" : True,
61
+ "early_stopping_patience" : 2,
62
+ "scheduler_monitor" : "val_loss",
63
+ "restore_best_model": False,
64
+ })
65
+
66
+ hp = HPARAMS()
src/model.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ from transformers import AutoModel, DebertaV2Tokenizer, AutoConfig
4
+
5
+ class SentiNetTransformer(nn.Module):
6
+ """Sentiment classifier built on top of a pretrained Transformer backbone."""
7
+
8
+ def __init__(self, model_path: str, fc_dropout: float = 0.1):
9
+ super().__init__()
10
+ config = AutoConfig.from_pretrained(model_path)
11
+ self.transformer = AutoModel.from_config(config)
12
+ hidden_dim = self.transformer.config.hidden_size
13
+
14
+ self.fc = nn.Sequential(
15
+ nn.Linear(hidden_dim, hidden_dim),
16
+ nn.ReLU(),
17
+ nn.Dropout(fc_dropout)
18
+ )
19
+ self.output = nn.Linear(hidden_dim, 1)
20
+
21
+ def forward(self, encodings: dict):
22
+ transformer_outputs = self.transformer(**encodings) # last_hidden_state: (N, L, H)
23
+ cls_embedding = transformer_outputs.last_hidden_state[:, 0, :] # CLS token: (N, H)
24
+ x = self.fc(cls_embedding) # (N, H)
25
+ return self.output(x) # (N, 1)
src/ui.py ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+
4
+ def read_file(path: str, default_content: str = "") -> str:
5
+ """
6
+ Ensure file exists (with default_content if missing) and return its contents.
7
+ """
8
+ os.makedirs(os.path.dirname(path), exist_ok=True)
9
+ if not os.path.exists(path):
10
+ with open(path, "w", encoding="utf-8") as f:
11
+ f.write(default_content)
12
+ with open(path, "r", encoding="utf-8") as f:
13
+ return f.read()
14
+
15
+
16
+ def build_demo(
17
+ generation_fn,
18
+ english_title: str,
19
+ persian_title: str,
20
+ assets_dir: str = "assets",
21
+ app_title: str = "SentiNet Demo"
22
+ ):
23
+ md_dir = os.path.join(assets_dir, "markdown")
24
+ css_dir = os.path.join(assets_dir, "css")
25
+ english_md = os.path.join(md_dir, "english_summary.md")
26
+ persian_md = os.path.join(md_dir, "persian_summary.md")
27
+ english_summary = read_file(english_md)
28
+ persian_summary = read_file(persian_md)
29
+
30
+ css_file = os.path.join(css_dir, "custom.css")
31
+ css = read_file(css_file, "/* Custom CSS overrides */\n")
32
+
33
+ with gr.Blocks(css=css, title=app_title) as demo:
34
+ title_md = gr.Markdown(english_title, elem_id="title")
35
+
36
+ with gr.Row():
37
+ english_btn = gr.Button("English")
38
+ persian_btn = gr.Button("فارسی (Persian)")
39
+
40
+ summary_md = gr.Markdown(english_summary, elem_id="summary")
41
+
42
+ # generation panel
43
+ with gr.Row(variant="panel"):
44
+ with gr.Column(scale=1, variant="panel"):
45
+ text_inp = gr.Textbox(
46
+ label="Enter a sentence",
47
+ placeholder="Type your movie review here..."
48
+ )
49
+ generate_btn = gr.Button("🔍 Classify Sentiment", variant="primary")
50
+
51
+ with gr.Column(scale=1, variant="panel"):
52
+ sentiment_label = gr.Label(label="Prediction")
53
+ confidence_out = gr.Number(label="Confidence Score")
54
+
55
+ # events
56
+ generate_btn.click(
57
+ generation_fn,
58
+ inputs=[text_inp],
59
+ outputs=[sentiment_label, confidence_out]
60
+ )
61
+
62
+ def set_english():
63
+ return (
64
+ gr.update(value=english_title, elem_classes=[]),
65
+ gr.update(value=english_summary, elem_classes=[]),
66
+ )
67
+
68
+ def set_persian():
69
+ return (
70
+ gr.update(value=persian_title, elem_classes=["persian"]),
71
+ gr.update(value=persian_summary, elem_classes=["persian"]),
72
+ )
73
+
74
+ english_btn.click(set_english, outputs=[title_md, summary_md])
75
+ persian_btn.click(set_persian, outputs=[title_md, summary_md])
76
+
77
+ return demo