Spaces:

plseng
/

khmer-space-injector

Sleeping

App Files Files Community

plseng commited on Jan 2

Commit

0b75fc6

verified ·

1 Parent(s): 7364245

Upload net.py

Browse files

Files changed (1) hide show

src/net.py +203 -0

src/net.py ADDED Viewed

	@@ -0,0 +1,203 @@

+"""
+Neural network models for Khmer space injection
+"""
+import torch
+import torch.nn as nn
+import random
+class CRF(nn.Module):
+    def __init__(self, num_tags):
+        super().__init__()
+        self.num_tags = num_tags
+        self.transitions = nn.Parameter(torch.randn(num_tags, num_tags))
+        self.start_transitions = nn.Parameter(torch.randn(num_tags))
+        self.end_transitions = nn.Parameter(torch.randn(num_tags))
+    def forward(self, emissions, tags, mask):
+        log_num = self._score_sentence(emissions, tags, mask)
+        log_den = self._log_partition(emissions, mask)
+        return torch.mean(log_den - log_num)
+    def _score_sentence(self, emissions, tags, mask):
+        score = self.start_transitions[tags[:, 0]]
+        for t in range(emissions.size(1) - 1):
+            score += emissions[:, t, tags[:, t]]
+            score += self.transitions[tags[:, t], tags[:, t + 1]] * mask[:, t + 1]
+        last_idx = mask.sum(1).long() - 1
+        last_tags = tags.gather(1, last_idx.unsqueeze(1)).squeeze()
+        score += self.end_transitions[last_tags]
+        return score
+    def _log_partition(self, emissions, mask):
+        alpha = self.start_transitions + emissions[:, 0]
+        for t in range(1, emissions.size(1)):
+            emit = emissions[:, t].unsqueeze(2)
+            trans = self.transitions.unsqueeze(0)
+            alpha = torch.logsumexp(alpha.unsqueeze(2) + emit + trans, dim=1)
+            alpha *= mask[:, t].unsqueeze(1)
+        return torch.logsumexp(alpha + self.end_transitions, dim=1)
+class RNN(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.Wxh = nn.Linear(input_dim, hidden_dim)
+        self.Whh = nn.Linear(hidden_dim, hidden_dim, bias=False)
+    def forward(self, x_t, h_prev):
+        return torch.tanh(self.Wxh(x_t) + self.Whh(h_prev))
+class GRU(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.z = nn.Linear(input_dim + hidden_dim, hidden_dim)
+        self.r = nn.Linear(input_dim + hidden_dim, hidden_dim)
+        self.h = nn.Linear(input_dim + hidden_dim, hidden_dim)
+    def forward(self, x_t, h_prev):
+        concat = torch.cat([x_t, h_prev], dim=-1)
+        z_t = torch.sigmoid(self.z(concat))
+        r_t = torch.sigmoid(self.r(concat))
+        concat_reset = torch.cat([x_t, r_t * h_prev], dim=-1)
+        h_tilde = torch.tanh(self.h(concat_reset))
+        return (1 - z_t) * h_prev + z_t * h_tilde
+class LSTM(nn.Module):
+    def __init__(self, input_dim, hidden_dim):
+        super().__init__()
+        self.i = nn.Linear(input_dim + hidden_dim, hidden_dim)
+        self.f = nn.Linear(input_dim + hidden_dim, hidden_dim)
+        self.o = nn.Linear(input_dim + hidden_dim, hidden_dim)
+        self.g = nn.Linear(input_dim + hidden_dim, hidden_dim)
+    def forward(self, x_t, state):
+        h_prev, c_prev = state
+        concat = torch.cat([x_t, h_prev], dim=-1)
+        i_t = torch.sigmoid(self.i(concat))
+        f_t = torch.sigmoid(self.f(concat))
+        o_t = torch.sigmoid(self.o(concat))
+        g_t = torch.tanh(self.g(concat))
+        c_t = f_t * c_prev + i_t * g_t
+        h_t = o_t * torch.tanh(c_t)
+        return h_t, c_t
+class BiRecurrentLayer(nn.Module):
+    def __init__(self, cell_cls, input_dim, hidden_dim, bidirectional=True):
+        super().__init__()
+        self.hidden_dim = hidden_dim
+        self.bidirectional = bidirectional
+        self.fw = cell_cls(input_dim, hidden_dim)
+        if bidirectional:
+            self.bw = cell_cls(input_dim, hidden_dim)
+    def forward(self, x):
+        B, T, _ = x.shape
+        device = x.device
+        H = self.hidden_dim
+        # ---------- Forward ----------
+        h_fw = []
+        h = torch.zeros(B, H, device=device)
+        c = torch.zeros_like(h) if isinstance(self.fw, LSTM) else None
+        for t in range(T):
+            if c is not None:
+                h, c = self.fw(x[:, t], (h, c))
+            else:
+                h = self.fw(x[:, t], h)
+            h_fw.append(h)
+        h_fw = torch.stack(h_fw, dim=1)
+        if not self.bidirectional:
+            return h_fw
+        # ---------- Backward ----------
+        h_bw = []
+        h = torch.zeros(B, H, device=device)
+        c = torch.zeros_like(h) if isinstance(self.bw, LSTM) else None
+        for t in reversed(range(T)):
+            if c is not None:
+                h, c = self.bw(x[:, t], (h, c))
+            else:
+                h = self.bw(x[:, t], h)
+            h_bw.append(h)
+        h_bw.reverse()
+        h_bw = torch.stack(h_bw, dim=1)
+        return torch.cat([h_fw, h_bw], dim=-1)
+class KhmerRNN(nn.Module):
+    def __init__(
+        self,
+        vocab_size,
+        embedding_dim=128,
+        hidden_dim=256,
+        num_layers=2,
+        dropout=0.3,
+        bidirectional=True,
+        rnn_type="lstm",
+        residual=True,
+        use_crf=True,
+    ):
+        super().__init__()
+        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=0)
+        self.dropout = nn.Dropout(dropout)
+        self.residual = residual
+        self.use_crf = use_crf
+        cell_map = {
+            "rnn": RNN,
+            "gru": GRU,
+            "lstm": LSTM,
+        }
+        cell_cls = cell_map[rnn_type.lower()]
+        self.layers = nn.ModuleList()
+        input_dim = embedding_dim
+        for _ in range(num_layers):
+            layer = BiRecurrentLayer(
+                cell_cls=cell_cls,
+                input_dim=input_dim,
+                hidden_dim=hidden_dim,
+                bidirectional=bidirectional,
+            )
+            self.layers.append(layer)
+            input_dim = hidden_dim * (2 if bidirectional else 1)
+        self.fc = nn.Linear(input_dim, 2)
+        if use_crf:
+            self.crf = CRF(num_tags=2)
+    def forward(self, x, tags=None, mask=None):
+        out = self.embedding(x)
+        for layer in self.layers:
+            residual = out
+            out = layer(out)
+            if self.residual and out.shape == residual.shape:
+                out = out + residual
+            out = self.dropout(out)
+        emissions = self.fc(out)
+        if self.use_crf and tags is not None:
+            return self.crf(emissions, tags, mask)
+        return emissions