Spaces:

Kleinpuki2
/

ai1

Sleeping

App Files Files Community

Kleinpuki2 commited on May 4

Commit

a69eabc

verified ·

1 Parent(s): f2f562e

Upload model.py

Browse files

Files changed (1) hide show

model.py +209 -0

model.py ADDED Viewed

	@@ -0,0 +1,209 @@

+import torch
+import torch.nn as nn
+from torch.nn import functional as F
+import json
+import re
+class BPETokenizer:
+    def __init__(self, model_type="gpt2"):
+        import tiktoken
+        self.enc = tiktoken.get_encoding(model_type)
+        self.vocab_size = self.enc.n_vocab
+    def encode(self, text: str):
+        return self.enc.encode(text, allowed_special={'<|endoftext|>'})
+    def decode(self, ids):
+        return self.enc.decode(ids)
+    def save(self, path: str):
+        with open(path, "w", encoding="utf-8") as f:
+            json.dump({"type": "bpe", "model": "gpt2"}, f)
+    def load(self, path: str):
+        pass
+class WordTokenizer:
+    def __init__(self):
+        self.word2idx = {"<PAD>": 0, "<UNK>": 1}
+        self.idx2word = {0: "<PAD>", 1: "<UNK>"}
+        self.vocab_size = 2
+    def build(self, text: str, max_vocab: int = 10000):
+        tokens = re.findall(r"\w+|[^\w\s]|\n", text.lower())
+        from collections import Counter
+        counts = Counter(tokens)
+        most_common = counts.most_common(max_vocab - 2)
+        for word, _ in most_common:
+            idx = len(self.word2idx)
+            self.word2idx[word] = idx
+            self.idx2word[idx] = word
+        self.vocab_size = len(self.word2idx)
+    def encode(self, text: str):
+        tokens = re.findall(r"\w+|[^\w\s]|\n", text.lower())
+        return [self.word2idx.get(t, 1) for t in tokens]
+    def decode(self, ids):
+        words = [self.idx2word.get(i, "<UNK>") for i in ids]
+        result = ""
+        for w in words:
+            if w in ".,!?;:)]}\"'" or result == "": result += w
+            elif w == "\n": result += "\n"
+            else: result += " " + w
+        return result
+    def save(self, path: str):
+        with open(path, "w", encoding="utf-8") as f:
+            json.dump({
+                "word2idx": self.word2idx,
+                "idx2word": {str(k): v for k, v in self.idx2word.items()}
+            }, f, ensure_ascii=False)
+    def load(self, path: str):
+        with open(path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+        self.word2idx = data["word2idx"]
+        self.idx2word = {int(k): v for k, v in data["idx2word"].items()}
+        self.vocab_size = len(self.word2idx)
+class MiniTransformer(nn.Module):
+    def __init__(self, vocab_size, emb_dim=128, n_layers=4, n_heads=4, ctx_len=64, dropout=0.1):
+        super().__init__()
+        self.ctx_len = ctx_len
+        self.n_heads = n_heads
+        self.emb_dim = emb_dim
+        self.n_layers = n_layers
+        self.token_embedding_table    = nn.Embedding(vocab_size, emb_dim)
+        self.position_embedding_table = nn.Embedding(ctx_len, emb_dim)
+        self.drop = nn.Dropout(dropout)
+        self.blocks = nn.ModuleList([
+            nn.TransformerEncoderLayer(
+                d_model=emb_dim,
+                nhead=n_heads,
+                dim_feedforward=emb_dim * 4,
+                dropout=dropout,
+                batch_first=True,
+                norm_first=True,
+                activation='gelu'
+            ) for _ in range(n_layers)
+        ])
+        self.ln_f   = nn.LayerNorm(emb_dim)
+        self.lm_head = nn.Linear(emb_dim, vocab_size, bias=False)
+        self.apply(self._init_weights)
+    def _init_weights(self, module):
+        if isinstance(module, nn.Linear):
+            nn.init.normal_(module.weight, mean=0.0, std=0.02)
+            if module.bias is not None:
+                nn.init.zeros_(module.bias)
+        elif isinstance(module, nn.Embedding):
+            nn.init.normal_(module.weight, mean=0.0, std=0.02)
+    def forward(self, idx, targets=None, use_checkpointing=False):
+        device = idx.device
+        B, T = idx.shape
+        tok_emb = self.token_embedding_table(idx)
+        pos_emb = self.position_embedding_table(torch.arange(T, device=device))
+        x = self.drop(tok_emb + pos_emb)
+        mask = torch.triu(torch.ones(T, T, device=device), diagonal=1).bool()
+        for block in self.blocks:
+            if use_checkpointing and self.training:
+                from torch.utils.checkpoint import checkpoint
+                def custom_forward(x_in, m_in):
+                    return block(x_in, src_mask=m_in, is_causal=True)
+                x = checkpoint(custom_forward, x, mask, use_reentrant=False)
+            else:
+                x = block(x, src_mask=mask, is_causal=True)
+        x = self.ln_f(x)
+        logits = self.lm_head(x)
+        loss = None
+        if targets is not None:
+            B, T, C = logits.shape
+            loss = F.cross_entropy(logits.view(B*T, C), targets.view(B*T))
+        return logits, loss
+    def generate(self, idx, max_new_tokens, temperature=0.8, top_k=40, repetition_penalty=1.0):
+        device = next(self.parameters()).device
+        if isinstance(idx, list):
+            idx = torch.tensor([idx], dtype=torch.long)
+        idx = idx.to(device)
+        self.eval()
+        with torch.no_grad():
+            for _ in range(max_new_tokens):
+                idx_cond = idx[:, -self.ctx_len:]
+                logits, _ = self(idx_cond)
+                logits = logits[:, -1, :] / temperature
+                if repetition_penalty != 1.0:
+                    for i in range(idx.shape[1]):
+                        token_id = idx[0, i].item()
+                        if logits[0, token_id] > 0:
+                            logits[0, token_id] /= repetition_penalty
+                        else:
+                            logits[0, token_id] *= repetition_penalty
+                if top_k > 0:
+                    v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+                    logits[logits < v[:, [-1]]] = float('-inf')
+                probs = F.softmax(logits, dim=-1)
+                idx_next = torch.multinomial(probs, num_samples=1)
+                idx = torch.cat((idx, idx_next), dim=1)
+        return idx
+    def generate_stream(self, idx, max_new_tokens, temperature=0.8, top_k=40, top_p=0.9, repetition_penalty=1.2):
+        device = next(self.parameters()).device
+        if isinstance(idx, list):
+            idx = torch.tensor([idx], dtype=torch.long)
+        idx = idx.to(device)
+        self.eval()
+        with torch.no_grad():
+            for _ in range(max_new_tokens):
+                idx_cond = idx[:, -self.ctx_len:]
+                logits, _ = self(idx_cond)
+                logits = logits[:, -1, :] / temperature
+                if repetition_penalty != 1.0:
+                    for i in range(idx.shape[1]):
+                        token_id = idx[0, i].item()
+                        if logits[0, token_id] > 0:
+                            logits[0, token_id] /= repetition_penalty
+                        else:
+                            logits[0, token_id] *= repetition_penalty
+                if top_k > 0:
+                    v, _ = torch.topk(logits, min(top_k, logits.size(-1)))
+                    logits[logits < v[:, [-1]]] = float('-inf')
+                if top_p > 0.0 and top_p < 1.0:
+                    sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+                    cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
+                    sorted_indices_to_remove = cumulative_probs > top_p
+                    sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
+                    sorted_indices_to_remove[..., 0] = 0
+                    indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+                    logits[indices_to_remove] = float('-inf')
+                probs = F.softmax(logits, dim=-1)
+                idx_next = torch.multinomial(probs, num_samples=1)
+                yield idx_next.item(), torch.max(probs).item()
+                idx = torch.cat((idx, idx_next), dim=1)
+    def save(self, path: str):
+        torch.save({
+            'model_state': self.state_dict(),
+            'config': {
+                'vocab_size': self.token_embedding_table.num_embeddings,
+                'emb_dim':    self.emb_dim,
+                'n_layers':   self.n_layers,
+                'n_heads':    self.n_heads,
+                'ctx_len':    self.ctx_len,
+            }
+        }, path)
+        print(f"Modell gespeichert: {path}")
+    @classmethod
+    def load(cls, path: str, device='cpu'):
+        if not torch.cuda.is_available():
+            device = 'cpu'
+        ckpt = torch.load(path, map_location=device, weights_only=False)
+        cfg  = ckpt['config']
+        m = cls(cfg['vocab_size'], cfg['emb_dim'], cfg['n_layers'], cfg['n_heads'], cfg['ctx_len'])
+        m.load_state_dict(ckpt['model_state'])
+        m.to(device)
+        print(f"Modell geladen: {path}")
+        return m