Spaces:

InfinitodeLTD
/

KaosGen

Sleeping

App Files Files Community

JohanBeytell commited on Jun 24, 2025

Commit

6f4e03e

verified ·

1 Parent(s): 4f0da9b

Update app.py

Browse files

Files changed (1) hide show

app.py +62 -126

app.py CHANGED Viewed

@@ -1,30 +1,24 @@
 import gradio as gr
 import torch
 import torch.nn as nn
-import math
-import re
-import unicodedata
-import random
-import os
-# --- Load constants and model ---
 SEED = 1337
-random.seed(SEED)
 torch.manual_seed(SEED)
-torch.cuda.manual_seed_all(SEED)
-DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
-MAX_LEN = 128
-SPECIAL = ['<pad>', '<bos>', '<eos>', '<sep>']
-BOS, EOS, PAD, SEP = 1, 2, 0, 3
-# Load vocab
 ckpt = torch.load("kaos.pt", map_location=DEVICE)
-stoi = ckpt["stoi"]
-itos = ckpt["itos"]
 VOCAB_SIZE = len(itos)
 class GPTSmall(nn.Module):
     def __init__(self, vocab_size, d_model=256, n_head=8, n_layer=4, dropout=0.2, max_len=MAX_LEN):
         super().__init__()
@@ -38,129 +32,71 @@ class GPTSmall(nn.Module):
     def forward(self, x):
         B, T = x.shape
-        tok = self.tok_emb(x)
-        tok = tok + self.pos_emb[:, :T]
         mask = torch.triu(torch.ones(T, T, device=x.device, dtype=torch.bool), 1)
         for blk in self.blocks:
             tok = blk(tok, src_key_padding_mask=(x == PAD), src_mask=mask)
-        tok = self.norm(tok)
-        return self.head(tok)
 model = GPTSmall(VOCAB_SIZE).to(DEVICE)
-model.load_state_dict(ckpt["model"])
 model.eval()
-# --- Clean + scoring ---
 def proper_case(text):
-    return re.sub(r"\\b(of|the|and|in|on|a)\\b", lambda m: m.group(0).lower(), text.title())
 def clean_name(text, title_case=True, max_repeats=2):
     text = unicodedata.normalize("NFC", text)
-    text = re.sub(r"(.)\\1{2,}", lambda m: m.group(1) * max_repeats, text, flags=re.IGNORECASE)
-    text = re.sub(r"’S|\\'S", "'s", text)
-    text = re.sub(r"[^0-9A-Za-zÀ-ÖØ-öø-ÿ'’\\-\\s]", "", text)
-    text = re.sub(r"\\s+", " ", text).strip()
     if title_case:
         text = proper_case(text)
-    text = re.sub(r"\\b(The|Of|In|On|A)\\s+\\1\\b", r"\\1", text, flags=re.IGNORECASE)
-    text = re.sub(r"([a-zA-Z])'S\\b", lambda m: m.group(1) + "'s", text)
-    return text
-def has_weird_word_lengths(name, min_len=3, max_len=24):
-    return any(len(word) < min_len or len(word) > max_len for word in name.split())
-def gibberish_score(name):
-    common_tris = {"the", "and", "ing", "ion", "ent", "ati", "for", "her", "ter", "tha", "ere", "nth", "tio", "ver",
-                   "his", "hat", "ers", "rea", "all", "ill", "ari", "est", "oth", "eve", "eld", "sky", "dra", "sha", "mir"}
-    text = name.lower().replace(" ", "")
-    trigrams = [text[i:i+3] for i in range(len(text) - 2)]
-    if not trigrams:
-        return 1.0
-    bad = sum(1 for tri in trigrams if tri not in common_tris)
-    return bad / len(trigrams)
-def pronounceability_score(name):
-    name = name.lower()
-    name = re.sub(r"[^a-z]", "", name)
-    if not name: return 0.0
-    vowels = "aeiouy"
-    v_count = sum(1 for c in name if c in vowels)
-    c_count = sum(1 for c in name if c not in vowels)
-    vc_ratio = v_count / (c_count + 1)
-    cluster_penalty = len(re.findall(r'[^aeiouy]{3,}', name)) * 0.1
-    alternation = re.findall(r'[aeiouy]+|[^aeiouy]+', name)
-    smoothness = len(alternation) / len(name)
-    score = (vc_ratio * 0.6) + (smoothness * 0.6) - cluster_penalty
-    return max(0.0, min(score, 1.0))
-def has_duplicate_articles(name):
-    return bool(re.search(r'\\b(the|of|in|on|a)\\s+\\1\\b', name, flags=re.IGNORECASE))
-def is_problematic(name):
-    return (
-        re.search(r'\\b(the the|of of|in in)\\b', name.lower()) or
-        (name.count(' ') == 0 and len(name) < 5) or
-        len(re.findall(r'[bcdfghjklmnpqrstvwxyz]{5,}', name.lower())) > 0
-    )
-def is_too_weird(name):
-    return (
-        any(len(w) > 14 for w in name.split()) or
-        re.search(r"[bcdfghjklmnpqrstvwxyz]{5,}", name.lower())
-    )
-def _sample_once(prompt, max_new=24, temperature=1.0, top_k=40):
     seq = [BOS] + [stoi.get(c, PAD) for c in prompt] + [SEP]
-    with torch.no_grad():
-        for _ in range(max_new):
-            x = torch.tensor(seq[-MAX_LEN:], dtype=torch.long, device=DEVICE).unsqueeze(0)
             logits = model(x)[:, -1, :] / temperature
-            if top_k:
-                v, i = torch.topk(logits, top_k)
-                idx = i[0, torch.softmax(v, -1).multinomial(1)].item()
-            else:
-                idx = torch.softmax(logits, -1).multinomial(1).item()
-            if idx == EOS or itos[idx] == "</s>":
-                break
-            seq.append(idx)
-    try:
-        start = seq.index(SEP) + 1
-    except ValueError:
-        start = 0
-    decoded = []
-    for idx in seq[start:]:
-        if idx == EOS or itos[idx] == "</s>":
             break
-        if idx != PAD:
-            decoded.append(itos[idx])
-    return ''.join(decoded).strip()
-def generate_name(prompt, min_chars=4, min_words=1, min_score=0.55, max_retries=3, temperature=1.0, temp_decay=0.85, max_gibberish=0.5):
-    last_try = ""
-    for attempt in range(max_retries):
-        temp = temperature * (temp_decay ** attempt)
-        raw = _sample_once(prompt, temperature=temp)
-        name = clean_name(raw)
-        last_try = name
-        score = pronounceability_score(name)
-        gibber = gibberish_score(name)
-        has_dupes = has_duplicate_articles(name)
-        weird_words = has_weird_word_lengths(name)
-        good = (
-            len(name) >= min_chars and len(name.split()) >= min_words and
-            score >= min_score and gibber <= max_gibberish and
-            not has_dupes and not weird_words
-        )
-        if good and not is_too_weird(name) and not is_problematic(name):
-            return name
-    return last_try
-def ui_fn(prompt):
-    names = [generate_name(prompt) for _ in range(3)]
-    return "\\n".join(names)
-demo = gr.Interface(fn=ui_fn, inputs="text", outputs="text", title="Fantasy Name Generator",
-                    description="Enter a character or world prompt to generate fantasy names.")
-if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import torch
 import torch.nn as nn
+import re, unicodedata, random, math
+from pathlib import Path
+# === Constants and Config ===
+DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
 SEED = 1337
 torch.manual_seed(SEED)
+random.seed(SEED)
+# === Load Checkpoint ===
 ckpt = torch.load("kaos.pt", map_location=DEVICE)
+stoi, itos = ckpt['stoi'], ckpt['itos']
+SPECIAL = ['<pad>', '<bos>', '<eos>', '<sep>']
+PAD, BOS, EOS, SEP = [stoi[s] for s in SPECIAL]
 VOCAB_SIZE = len(itos)
+MAX_LEN = 128  # match training
+# === Model ===
 class GPTSmall(nn.Module):
     def __init__(self, vocab_size, d_model=256, n_head=8, n_layer=4, dropout=0.2, max_len=MAX_LEN):
         super().__init__()
     def forward(self, x):
         B, T = x.shape
+        tok = self.tok_emb(x) + self.pos_emb[:, :T]
         mask = torch.triu(torch.ones(T, T, device=x.device, dtype=torch.bool), 1)
         for blk in self.blocks:
             tok = blk(tok, src_key_padding_mask=(x == PAD), src_mask=mask)
+        return self.head(self.norm(tok))
 model = GPTSmall(VOCAB_SIZE).to(DEVICE)
+model.load_state_dict(ckpt['model'])
 model.eval()
+# === Utility ===
 def proper_case(text):
+    return re.sub(r"\b(of|the|and|in|on|a)\b", lambda m: m.group(0).lower(), text.title())
 def clean_name(text, title_case=True, max_repeats=2):
     text = unicodedata.normalize("NFC", text)
+    text = re.sub(r'(.)\1{2,}', lambda m: m.group(1) * max_repeats, text)
+    text = re.sub(r"’S|\'S", "'s", text)
+    text = re.sub(r"[^0-9A-Za-zÀ-ÖØ-öø-ÿ'’\-\s]", "", text)
+    text = re.sub(r"\s+", " ", text).strip()
     if title_case:
         text = proper_case(text)
+    text = re.sub(r'\b(The|Of|In|On|A)\s+\1\b', r'\1', text, flags=re.IGNORECASE)
+    return re.sub(r"([a-zA-Z])'S\b", lambda m: m.group(1) + "'s", text)
+def sample_once(prompt, temperature=1.0, top_k=40, max_new=24):
     seq = [BOS] + [stoi.get(c, PAD) for c in prompt] + [SEP]
+    for _ in range(max_new):
+        x = torch.tensor(seq[-MAX_LEN:], dtype=torch.long, device=DEVICE)[None]
+        with torch.no_grad():
             logits = model(x)[:, -1, :] / temperature
+        if top_k:
+            v, i = torch.topk(logits, top_k)
+            idx = i[0, torch.softmax(v, -1).multinomial(1)].item()
+        else:
+            idx = torch.softmax(logits, -1).multinomial(1).item()
+        if idx == EOS:
             break
+        seq.append(idx)
+    name = ''.join(itos[i] for i in seq if i not in {BOS, SEP, EOS, PAD})
+    return clean_name(name)
+# === Gradio UI ===
+def generate_ui(prompt, temperature, top_k, count):
+    results = []
+    for _ in range(count):
+        name = sample_once(prompt, temperature=temperature, top_k=top_k)
+        results.append(name)
+    return "\n".join(results)
+description = """🎭 **Fantasy Name Generator**
+Give it a prompt like `a forgotten warrior king` or `mistress of the black swamp` and it'll generate creative fantasy-style names.
+This model is trained from scratch and runs entirely on PyTorch."""
+with gr.Blocks() as demo:
+    gr.Markdown(description)
+    with gr.Row():
+        prompt = gr.Textbox(label="Prompt", placeholder="e.g. 'a villain who whispers to shadows'", lines=1)
+    with gr.Row():
+        temperature = gr.Slider(0.1, 1.5, step=0.1, value=1.0, label="Temperature")
+        top_k = gr.Slider(10, 100, step=10, value=40, label="Top-K")
+        count = gr.Slider(1, 5, step=1, value=3, label="Names to Generate")
+    generate_btn = gr.Button("Generate Names")
+    output = gr.Textbox(label="Generated Names", lines=5)
+    generate_btn.click(fn=generate_ui, inputs=[prompt, temperature, top_k, count], outputs=output)
+demo.launch()