Upload 13 files

Browse files

Files changed (13) hide show

LICENSE +1 -0
README.md +5 -0
WCB/config.json +18 -0
WCB/model.safetensors +3 -0
WCB_4Layer_BiLSTM/config.json +18 -0
WCB_4Layer_BiLSTM/model.safetensors +3 -0
WCB_BiLSTM/config.json +18 -0
WCB_BiLSTM/model.safetensors +3 -0
WCB_CNN_BiLSTM/config.json +18 -0
WCB_CNN_BiLSTM/model.safetensors +3 -0
common/models.py +58 -0
infer.py +44 -0
requirements.txt +3 -0

LICENSE ADDED Viewed

	@@ -0,0 +1 @@


1	+ Apache-2.0

README.md ADDED Viewed

	@@ -0,0 +1,5 @@

+# Thai Sentiment (WangchanBERTa + LSTM Heads)
+## Install
+```bash
+pip install -r requirements.txt

WCB/config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "model_type": "custom_wcb_sentiment",
+  "base_model": "airesearch/wangchanberta-base-att-spm-uncased",
+  "architecture": "WCB",
+  "num_labels": 2,
+  "id2label": {
+    "0": "NEG",
+    "1": "POS"
+  },
+  "label2id": {
+    "NEG": 0,
+    "POS": 1
+  },
+  "max_length": 128,
+  "pooling_after_lstm": "masked_mean",
+  "export_source_checkpoint": "best_m1_wcb_5models_wcb_comparison.pth",
+  "export_experiment": "5models_wcb_comparison"
+}

WCB/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:21f68a987efcb981173b90aa55d2827622265a108842e97dd27975f9ca99bfd5
+size 421007280

WCB_4Layer_BiLSTM/config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "model_type": "custom_wcb_sentiment",
+  "base_model": "airesearch/wangchanberta-base-att-spm-uncased",
+  "architecture": "WCB_4Layer_BiLSTM",
+  "num_labels": 2,
+  "id2label": {
+    "0": "NEG",
+    "1": "POS"
+  },
+  "label2id": {
+    "NEG": 0,
+    "POS": 1
+  },
+  "max_length": 128,
+  "pooling_after_lstm": "masked_mean",
+  "export_source_checkpoint": "best_m4_wcb_4layer_bilstm_5models_wcb_comparison.pth",
+  "export_experiment": "5models_wcb_comparison"
+}

WCB_4Layer_BiLSTM/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0450384ee6ee1dea3352ef6805f92efcace76c6b80fe2b08d1ce7b1d4e340254
+size 424682216

WCB_BiLSTM/config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "model_type": "custom_wcb_sentiment",
+  "base_model": "airesearch/wangchanberta-base-att-spm-uncased",
+  "architecture": "WCB_BiLSTM",
+  "num_labels": 2,
+  "id2label": {
+    "0": "NEG",
+    "1": "POS"
+  },
+  "label2id": {
+    "NEG": 0,
+    "POS": 1
+  },
+  "max_length": 128,
+  "pooling_after_lstm": "masked_mean",
+  "export_source_checkpoint": "best_m2_wcb_bilstm_5models_wcb_comparison.pth",
+  "export_experiment": "5models_wcb_comparison"
+}

WCB_BiLSTM/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc6742020cc8966603c74b03bd8d091c9d19c451b307f0e2103e05200d07c090
+size 424682128

WCB_CNN_BiLSTM/config.json ADDED Viewed

	@@ -0,0 +1,18 @@

+{
+  "model_type": "custom_wcb_sentiment",
+  "base_model": "airesearch/wangchanberta-base-att-spm-uncased",
+  "architecture": "WCB_CNN_BiLSTM",
+  "num_labels": 2,
+  "id2label": {
+    "0": "NEG",
+    "1": "POS"
+  },
+  "label2id": {
+    "NEG": 0,
+    "POS": 1
+  },
+  "max_length": 128,
+  "pooling_after_lstm": "masked_mean",
+  "export_source_checkpoint": "best_m3_wcb_cnn_bilstm_5models_wcb_comparison.pth",
+  "export_experiment": "5models_wcb_comparison"
+}

WCB_CNN_BiLSTM/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bc09bb197646b9ea0f534396ebac19e5eb6e5162616bcf6a62ed6941409316e2
+size 423569368

common/models.py ADDED Viewed

	@@ -0,0 +1,58 @@

+# common/models.py
+import torch.nn as nn
+import torch.nn.functional as F
+from transformers import AutoModel
+# ตั้งค่าพื้นฐานให้ตรงกับตอนเทรน
+BASE_MODEL_NAME = "airesearch/wangchanberta-base-att-spm-uncased"
+POOLING_AFTER_LSTM = "masked_mean"
+class BaseHead(nn.Module):
+    def __init__(self, hidden_in, hidden_lstm=128, num_classes=2, dropout=0.3, pooling='masked_mean'):
+        super().__init__()
+        self.lstm = nn.LSTM(hidden_in, hidden_lstm, bidirectional=True, batch_first=True)
+        self.dropout = nn.Dropout(dropout)
+        self.fc = nn.Linear(hidden_lstm*2, num_classes)
+        assert pooling in ['cls','masked_mean','masked_max']
+        self.pooling = pooling
+    def pool(self, x, mask):
+        if self.pooling=='cls': return x[:,0,:]
+        mask = mask.unsqueeze(-1)
+        if self.pooling=='masked_mean':
+            s=(x*mask).sum(1); d=mask.sum(1).clamp(min=1e-6); return s/d
+        x=x.masked_fill(mask==0,-1e9); return x.max(1).values
+    def forward_after_bert(self, seq, mask):
+        x, _ = self.lstm(seq)
+        x = self.pool(x, mask)
+        return self.fc(self.dropout(x))
+class Model1Baseline(nn.Module):
+    def __init__(self, name=BASE_MODEL_NAME, hidden=128, dropout=0.3, classes=2, pooling=POOLING_AFTER_LSTM):
+        super().__init__()
+        self.bert = AutoModel.from_pretrained(name)
+        self.head = BaseHead(self.bert.config.hidden_size, hidden, classes, dropout, pooling)
+    def forward(self, ids, mask):
+        out = self.bert(input_ids=ids, attention_mask=mask)
+        return self.head.forward_after_bert(out.last_hidden_state, mask)
+class Model2CNNBiLSTM(nn.Module):
+    def __init__(self, name=BASE_MODEL_NAME, hidden=128, dropout=0.3, classes=2, pooling=POOLING_AFTER_LSTM):
+        super().__init__()
+        self.bert = AutoModel.from_pretrained(name)
+        H = self.bert.config.hidden_size
+        self.c1 = nn.Conv1d(H,128,3,padding=1)
+        self.c2 = nn.Conv1d(128,128,5,padding=2)
+        self.head = BaseHead(128, hidden, classes, dropout, pooling)
+    def forward(self, ids, mask):
+        out = self.bert(input_ids=ids, attention_mask=mask).last_hidden_state
+        x = F.relu(self.c1(out.transpose(1,2)))
+        x = F.relu(self.c2(x)).transpose(1,2)
+        return self.head.forward_after_bert(x, mask)
+def create_model_by_name(model_name):
+    if model_name == "Model1_Baseline":
+        return Model1Baseline()
+    elif model_name == "Model2_CNN_BiLSTM":
+        return Model2CNNBiLSTM()
+    else:
+        raise ValueError(f"Unknown model name: {model_name}")

infer.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# infer.py
+import os, sys, json, torch
+import torch.nn.functional as F
+from transformers import AutoTokenizer
+from safetensors.torch import load_file
+# ใช้สถาปัตยกรรมร่วม
+sys.path.append(os.path.join(os.path.dirname(__file__), "common"))
+from models import create_model_by_name
+def load_model(model_dir: str):
+    cfg_path = os.path.join(model_dir, "config.json")
+    w_path   = os.path.join(model_dir, "model.safetensors")
+    if not (os.path.exists(cfg_path) and os.path.exists(w_path)):
+        raise FileNotFoundError("config.json หรือ model.safetensors ไม่ครบ")
+    with open(cfg_path, "r", encoding="utf-8") as f:
+        cfg = json.load(f)
+    tok = AutoTokenizer.from_pretrained(cfg["base_model"])
+    model = create_model_by_name(cfg["arch"])
+    state = load_file(w_path)
+    model.load_state_dict(state)
+    model.eval()
+    return model, tok, cfg
+def predict(texts, model, tok, cfg):
+    enc = tok(texts, padding=True, truncation=True, max_length=cfg["max_len"], return_tensors="pt")
+    with torch.no_grad():
+        logits = model(enc["input_ids"], enc["attention_mask"])
+        prob = F.softmax(logits, dim=1).cpu().numpy()
+        pred = prob.argmax(1)
+    return pred, prob
+if __name__ == "__main__":
+    # เลือกโฟลเดอร์โมเดล: "baseline" หรือ "cnn_bilstm"
+    MODEL_DIR = sys.argv[1] if len(sys.argv) > 1 else "cnn_bilstm"
+    model, tok, cfg = load_model(MODEL_DIR)
+    xs = ["อาหารอร่อยมาก บริการดี", "ไม่ประทับใจเลย ช้ามาก"]
+    y, p = predict(xs, model, tok, cfg)
+    labels = ["negative", "positive"]
+    for t, yy, pp in zip(xs, y, p):
+        print(f"{t} => {labels[yy]} | prob={pp}")

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+torch
+transformers
+safetensors