Spaces:

yajii
/

mcid-generator

Sleeping

App Files Files

Yajii2 commited on Jul 1, 2025

Commit

1ffcc33

unverified ·

1 Parent(s): 8f9c2f7

Add application file

Browse files

Files changed (6) hide show

.gitignore +179 -0
app.py +70 -0
model/gpt_char_model.py +27 -0
model/gpt_char_model_v3.pth +3 -0
requirements.txt +1 -0
tokenizer.py +18 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,179 @@

+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+### Python Patch ###
+# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
+poetry.toml
+# ruff
+.ruff_cache/
+# LSP config files
+pyrightconfig.json
+# End of https://www.toptal.com/developers/gitignore/api/python
+data/*
+!data/.keep

app.py ADDED Viewed

	@@ -0,0 +1,70 @@

+import gradio as gr
+import torch
+from model.gpt_char_model import CharGPT
+from tokenizer import CharTokenizer
+def load_model(model_path="model/gpt_char_model.pth", block_size=32):
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    device = torch.device("cpu")
+    tokenizer = CharTokenizer()
+    vocab_size = len(tokenizer.chars)
+    model = CharGPT(
+        vocab_size=vocab_size,
+        block_size=block_size,
+        n_layer=6,
+        n_head=4,
+        n_embd=256,
+    ).to(device)
+    model.load_state_dict(torch.load(model_path, map_location=device))
+    model.eval()
+    return model, tokenizer, device
+@torch.no_grad()
+def generate_username(seed_text="", min_length=1, max_length=16, temperature=1.0):
+    model, tokenizer, device = load_model(model_path="model/gpt_char_model_v3.pth")
+    input_ids = tokenizer.encode(seed_text)
+    input_ids.insert(0, 0)
+    input_ids = torch.tensor([input_ids], dtype=torch.long).to(device)
+    for _ in range(max_length):
+        input_crop = input_ids[:, -model.block_size :]
+        logits = model(input_crop)
+        logits = logits[:, -1, :] / temperature
+        probs = torch.softmax(logits, dim=-1)
+        next_id = torch.multinomial(probs, num_samples=1)
+        next_char = tokenizer.decode(next_id[0].tolist())
+        if next_char == "\n":
+            if input_ids.shape[1] < min_length:
+                continue
+            break
+        input_ids = torch.cat((input_ids, next_id), dim=1)
+    return tokenizer.decode(input_ids[0].tolist()).strip()
+def gradio_interface(seed_text, min_length, max_length, temperature):
+    return generate_username(
+        seed_text, int(min_length), int(max_length), float(temperature)
+    )
+with gr.Blocks(theme=gr.themes.Ocean()) as demo:
+    gr.Markdown("# MCID Generator")
+    with gr.Row():
+        seed = gr.Textbox(label="Start token", value="")
+    with gr.Row():
+        with gr.Column():
+            min_length = gr.Slider(1, 32, value=1, step=1, label="Minimum length")
+            max_length = gr.Slider(1, 32, value=16, step=1, label="Maximum length")
+        temperature = gr.Slider(0.5, 2.0, value=1.0, step=0.05, label="Temperature")
+    with gr.Row():
+        output = gr.Textbox(label="Generated username")
+    generate_btn = gr.Button("Generate")
+    generate_btn.click(
+        gradio_interface,
+        inputs=[seed, min_length, max_length, temperature],
+        outputs=output,
+    )
+demo.launch(share=True)

model/gpt_char_model.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import torch
+import torch.nn as nn
+class CharGPT(nn.Module):
+    def __init__(self, vocab_size, n_embd=128, n_head=4, n_layer=4, block_size=32):
+        super().__init__()
+        self.token_embedding = nn.Embedding(vocab_size, n_embd)
+        self.pos_embedding = nn.Embedding(block_size, n_embd)
+        self.transformer = nn.TransformerEncoder(
+            nn.TransformerEncoderLayer(d_model=n_embd, nhead=n_head),
+            num_layers=n_layer
+        )
+        self.ln = nn.LayerNorm(n_embd)
+        self.fc = nn.Linear(n_embd, vocab_size)
+        self.block_size = block_size
+    def forward(self, idx):
+        B, T = idx.shape
+        tok_emb = self.token_embedding(idx)
+        pos = torch.arange(T, device=idx.device)
+        pos_emb = self.pos_embedding(pos)
+        x = tok_emb + pos_emb
+        x = self.transformer(x)
+        x = self.ln(x)
+        logits = self.fc(x)
+        return logits

model/gpt_char_model_v3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:28877d8db87eabd9a8e1a674b9b5c4cb600d0a163acdac00886b351bc96c1232
+size 31757701

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ torch

tokenizer.py ADDED Viewed

	@@ -0,0 +1,18 @@

+class CharTokenizer:
+    def __init__(self, chars=None):
+        if chars is None:
+            # Minecraftユーザー名で使われやすい文字のセット例
+            chars = list(
+                "\nabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_"
+            )
+        self.chars = chars
+        self.char2id = {ch: i for i, ch in enumerate(chars)}
+        self.id2char = {i: ch for i, ch in enumerate(chars)}
+    def encode(self, text):
+        # 文字列をIDリストに変換。未知文字は無視（または特別扱いも可能）
+        return [self.char2id[ch] for ch in text if ch in self.char2id]
+    def decode(self, ids):
+        # IDリストを文字列に変換
+        return "".join(self.id2char[i] for i in ids if i in self.id2char)