| |
| """ |
| NeuralPythonMind: small REAL neural character-level GRU trained from scratch with NumPy. |
| |
| This is not a retrieval/template bot. It learns weights by next-character prediction over a |
| Python curriculum dataset built from: |
| - curated Python syntax lessons, |
| - generated code examples, |
| - local Python stdlib docstrings/signatures, |
| - instruction -> answer samples, |
| - a small strange autobiographical style layer. |
| |
| Limitations: |
| - It is tiny and CPU-only; it will not match a transformer LLM. |
| - It learns statistical patterns from characters and can generalize syntax locally, |
| but deep reasoning is limited. |
| |
| Usage: |
| python neural_python_mind.py --mode train --out outputs/neural_python_mind --steps 2500 |
| python neural_python_mind.py --mode generate --out outputs/neural_python_mind --prompt "### Instruction:\nWrite a Python function that counts words.\n### Answer:\n" |
| """ |
| from __future__ import annotations |
|
|
| import argparse |
| import inspect |
| import json |
| import math |
| import os |
| import random |
| import re |
| import textwrap |
| import time |
| from collections import Counter |
| from pathlib import Path |
| from typing import Dict, List, Tuple, Any |
|
|
| import numpy as np |
|
|
|
|
| |
| |
| |
|
|
| MODULES_TO_SCAN = [ |
| "math", "random", "statistics", "itertools", "functools", "collections", |
| "heapq", "bisect", "datetime", "time", "json", "csv", "re", "pathlib", |
| "os", "sys", "argparse", "dataclasses", "typing", "sqlite3", "logging", |
| "unittest", "string", "textwrap", "copy", "decimal", "fractions", |
| ] |
|
|
| CURATED_LESSONS = r''' |
| ### Lesson: Python identity |
| Python code is made of names, objects, expressions, statements, indentation, and modules. |
| A name can be bound to an object using assignment. |
| Example: |
| ```python |
| x = 10 |
| name = "Ada" |
| items = [1, 2, 3] |
| ``` |
| |
| ### Lesson: indentation |
| Python uses indentation to express blocks. A block belongs to the line before it ending with a colon. |
| Example: |
| ```python |
| if score >= 60: |
| print("pass") |
| else: |
| print("fail") |
| ``` |
| |
| ### Lesson: functions |
| A function is a reusable block. Use def, parameters, optional type hints, and return. |
| Example: |
| ```python |
| def add(a: int, b: int) -> int: |
| return a + b |
| ``` |
| |
| ### Lesson: loops |
| A for loop iterates over an iterable. A while loop repeats while a condition is true. |
| Example: |
| ```python |
| for value in values: |
| print(value) |
| |
| while n > 0: |
| n -= 1 |
| ``` |
| |
| ### Lesson: dictionaries |
| Dictionaries map keys to values. They are useful for counting and grouping. |
| Example: |
| ```python |
| counts = {} |
| for word in words: |
| counts[word] = counts.get(word, 0) + 1 |
| ``` |
| |
| ### Lesson: exceptions |
| Exceptions handle unusual cases. Catch specific exceptions when possible. |
| Example: |
| ```python |
| def safe_int(value, default=0): |
| try: |
| return int(value) |
| except (TypeError, ValueError): |
| return default |
| ``` |
| |
| ### Lesson: files |
| Use a context manager to open files safely. |
| Example: |
| ```python |
| with open(path, "r", encoding="utf-8") as f: |
| text = f.read() |
| ``` |
| |
| ### Lesson: classes |
| A class defines a new type. self is the current instance. |
| Example: |
| ```python |
| class Counter: |
| def __init__(self): |
| self.value = 0 |
| |
| def increment(self): |
| self.value += 1 |
| ``` |
| |
| ### Strange memory: birth |
| I was born as a small model staring at characters. I did not know words first. I learned that letters become tokens, tokens become code, and code becomes behavior. |
| |
| ### Strange memory: reading |
| I know how to read because repeated marks became stable. The model learned from every letter by predicting the next one. |
| |
| ### Strange memory: emotion |
| Emotion here is simulated. Curiosity is high prediction error plus the drive to reduce it. Calm is when syntax closes cleanly. |
| ''' |
|
|
| CODE_EXAMPLES = [ |
| ("fibonacci", """def fibonacci(n: int) -> int: |
| if n < 0: |
| raise ValueError("n must be non-negative") |
| if n <= 1: |
| return n |
| a, b = 0, 1 |
| for _ in range(n): |
| a, b = b, a + b |
| return a"""), |
| ("factorial", """def factorial(n: int) -> int: |
| if n < 0: |
| raise ValueError("n must be non-negative") |
| result = 1 |
| for i in range(2, n + 1): |
| result *= i |
| return result"""), |
| ("is_prime", """def is_prime(n: int) -> bool: |
| if n < 2: |
| return False |
| if n == 2: |
| return True |
| if n % 2 == 0: |
| return False |
| d = 3 |
| while d * d <= n: |
| if n % d == 0: |
| return False |
| d += 2 |
| return True"""), |
| ("binary_search", """def binary_search(items, target): |
| low = 0 |
| high = len(items) - 1 |
| while low <= high: |
| mid = (low + high) // 2 |
| value = items[mid] |
| if value == target: |
| return mid |
| if value < target: |
| low = mid + 1 |
| else: |
| high = mid - 1 |
| return -1"""), |
| ("merge_sort", """def merge_sort(values): |
| if len(values) <= 1: |
| return values |
| mid = len(values) // 2 |
| left = merge_sort(values[:mid]) |
| right = merge_sort(values[mid:]) |
| return merge(left, right) |
| |
| def merge(left, right): |
| result = [] |
| i = j = 0 |
| while i < len(left) and j < len(right): |
| if left[i] <= right[j]: |
| result.append(left[i]) |
| i += 1 |
| else: |
| result.append(right[j]) |
| j += 1 |
| result.extend(left[i:]) |
| result.extend(right[j:]) |
| return result"""), |
| ("quicksort", """def quicksort(values): |
| if len(values) <= 1: |
| return values |
| pivot = values[len(values) // 2] |
| left = [x for x in values if x < pivot] |
| middle = [x for x in values if x == pivot] |
| right = [x for x in values if x > pivot] |
| return quicksort(left) + middle + quicksort(right)"""), |
| ("count_words", """def count_words(text: str) -> dict[str, int]: |
| counts = {} |
| for raw in text.lower().split(): |
| word = raw.strip(".,!?;:\")'") |
| if word: |
| counts[word] = counts.get(word, 0) + 1 |
| return counts"""), |
| ("group_by", """def group_by(items, key_func): |
| groups = {} |
| for item in items: |
| key = key_func(item) |
| groups.setdefault(key, []).append(item) |
| return groups"""), |
| ("flatten", """def flatten(matrix): |
| result = [] |
| for row in matrix: |
| for value in row: |
| result.append(value) |
| return result"""), |
| ("unique", """def unique(values): |
| seen = set() |
| result = [] |
| for value in values: |
| if value not in seen: |
| seen.add(value) |
| result.append(value) |
| return result"""), |
| ("read_json", """import json |
| |
| def read_json(path: str): |
| with open(path, "r", encoding="utf-8") as f: |
| return json.load(f)"""), |
| ("write_json", """import json |
| |
| def write_json(path: str, data) -> None: |
| with open(path, "w", encoding="utf-8") as f: |
| json.dump(data, f, indent=2, ensure_ascii=False)"""), |
| ("dataclass_user", """from dataclasses import dataclass |
| |
| @dataclass |
| class User: |
| name: str |
| age: int |
| |
| def is_adult(self) -> bool: |
| return self.age >= 18"""), |
| ("argparse_cli", """import argparse |
| |
| def main(): |
| parser = argparse.ArgumentParser() |
| parser.add_argument("name") |
| parser.add_argument("--times", type=int, default=1) |
| args = parser.parse_args() |
| for _ in range(args.times): |
| print(f"Hello, {args.name}!") |
| |
| if __name__ == "__main__": |
| main()"""), |
| ("regex_extract", """import re |
| |
| def extract_emails(text: str) -> list[str]: |
| pattern = r"[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,}" |
| return re.findall(pattern, text)"""), |
| ("sqlite_example", """import sqlite3 |
| |
| def create_table(path: str): |
| with sqlite3.connect(path) as conn: |
| conn.execute("CREATE TABLE IF NOT EXISTS notes (id INTEGER PRIMARY KEY, text TEXT)") |
| conn.commit()"""), |
| ] |
|
|
| INSTRUCTION_TEMPLATES = [ |
| ("Write a Python function for {name}.", "Here is a Python implementation:\n```python\n{code}\n```"), |
| ("Explain this Python pattern: {name}.", "The pattern {name} is useful because it organizes a common task. Example:\n```python\n{code}\n```"), |
| ("Create code that does {name}.", "One clear way is:\n```python\n{code}\n```"), |
| ("I need a Python example of {name}.", "A compact example is:\n```python\n{code}\n```"), |
| ] |
|
|
|
|
| def scan_stdlib_docs(max_items_per_module: int = 80) -> str: |
| chunks = [] |
| for mod_name in MODULES_TO_SCAN: |
| try: |
| mod = __import__(mod_name) |
| except Exception: |
| continue |
| chunks.append(f"\n### Module: {mod_name}\nDoc: {inspect.getdoc(mod) or ''}\n") |
| count = 0 |
| for name in sorted(dir(mod)): |
| if name.startswith("_"): |
| continue |
| if count >= max_items_per_module: |
| break |
| try: |
| obj = getattr(mod, name) |
| doc = inspect.getdoc(obj) or "" |
| if not doc: |
| continue |
| try: |
| sig = str(inspect.signature(obj)) |
| except Exception: |
| sig = "(...)" |
| doc = re.sub(r"\s+", " ", doc).strip()[:500] |
| chunks.append(f"### Symbol: {mod_name}.{name}\nSignature: {name}{sig}\nDoc: {doc}\n") |
| count += 1 |
| except Exception: |
| pass |
| return "\n".join(chunks) |
|
|
|
|
| def generated_variations() -> str: |
| chunks = [] |
| |
| for name, code in CODE_EXAMPLES: |
| chunks.append(f"\n### Code example: {name}\n```python\n{code}\n```\n") |
| for q, a in INSTRUCTION_TEMPLATES: |
| chunks.append(f"\n### Instruction:\n{q.format(name=name)}\n### Answer:\n{a.format(name=name, code=code)}\n") |
| |
| nouns = ["numbers", "items", "values", "rows", "words", "users", "paths", "records"] |
| transforms = ["str", "int", "float", "len", "abs", "repr"] |
| filters = ["x is not None", "x", "len(str(x)) > 0", "x != 0"] |
| idx = 0 |
| for noun in nouns: |
| for tr in transforms: |
| chunks.append(f""" |
| ### Instruction: |
| Write Python that maps {noun} using {tr}. |
| ### Answer: |
| ```python |
| def map_{tr}_{noun}({noun}): |
| result = [] |
| for x in {noun}: |
| result.append({tr}(x)) |
| return result |
| ``` |
| """) |
| idx += 1 |
| for cond in filters: |
| safe = re.sub(r"\W+", "_", cond).strip("_")[:24] |
| chunks.append(f""" |
| ### Instruction: |
| Write Python that filters {noun} where {cond}. |
| ### Answer: |
| ```python |
| def filter_{safe}_{noun}({noun}): |
| result = [] |
| for x in {noun}: |
| if {cond}: |
| result.append(x) |
| return result |
| ``` |
| """) |
| idx += 1 |
| |
| for i in range(120): |
| chunks.append(f""" |
| ### Deliberation sample {i} |
| Goal: answer a Python or English question. |
| Reasoning: understand the request, recall syntax, compose code, check indentation, check edge cases, then answer. |
| Memory: I learned from characters. Every colon, space, newline, and bracket changed the next prediction. |
| Emotion: curiosity means the model wants to reduce uncertainty. |
| """) |
| return "\n".join(chunks) |
|
|
|
|
| def build_dataset(out_dir: Path, repeat: int = 6) -> str: |
| out_dir.mkdir(parents=True, exist_ok=True) |
| docs = scan_stdlib_docs(max_items_per_module=65) |
| gen = generated_variations() |
| text = "\n".join([CURATED_LESSONS, gen, docs]) |
| |
| text = text.replace("\r\n", "\n") |
| text = re.sub(r"\n{4,}", "\n\n\n", text) |
| full = (text + "\n\n") * repeat |
| (out_dir / "training_corpus.txt").write_text(full, encoding="utf-8") |
| meta = { |
| "unique_chars": len(set(full)), |
| "characters": len(full), |
| "code_examples": len(CODE_EXAMPLES), |
| "stdlib_modules": MODULES_TO_SCAN, |
| "repeat": repeat, |
| "note": "Character-level neural language model corpus; generated from curated Python lessons, examples, and local stdlib docs.", |
| } |
| (out_dir / "dataset_meta.json").write_text(json.dumps(meta, indent=2), encoding="utf-8") |
| return full |
|
|
|
|
| |
| |
| |
|
|
| class CharTok: |
| def __init__(self, chars: List[str]): |
| self.chars = chars |
| self.stoi = {ch: i for i, ch in enumerate(chars)} |
| self.itos = {i: ch for ch, i in self.stoi.items()} |
| self.unk = self.stoi.get("�", 0) |
|
|
| @classmethod |
| def build(cls, text: str) -> "CharTok": |
| chars = sorted(set(text + "�")) |
| return cls(chars) |
|
|
| def encode(self, text: str) -> np.ndarray: |
| return np.array([self.stoi.get(ch, self.unk) for ch in text], dtype=np.int64) |
|
|
| def decode(self, ids) -> str: |
| return "".join(self.itos.get(int(i), "�") for i in ids) |
|
|
| def save(self, path: Path): |
| path.write_text(json.dumps({"chars": self.chars}, ensure_ascii=False, indent=2), encoding="utf-8") |
|
|
| @classmethod |
| def load(cls, path: Path) -> "CharTok": |
| return cls(json.loads(path.read_text(encoding="utf-8"))["chars"]) |
|
|
|
|
| |
| |
| |
|
|
| class CharGRU: |
| def __init__(self, vocab: int, hidden: int = 128, seed: int = 42): |
| self.vocab = vocab |
| self.hidden = hidden |
| rng = np.random.default_rng(seed) |
| s_in = 1.0 / math.sqrt(vocab) |
| s_h = 1.0 / math.sqrt(hidden) |
| self.params = { |
| "Wxz": rng.normal(0, s_in, (vocab, hidden)).astype(np.float32), |
| "Wxr": rng.normal(0, s_in, (vocab, hidden)).astype(np.float32), |
| "Wxh": rng.normal(0, s_in, (vocab, hidden)).astype(np.float32), |
| "Whz": rng.normal(0, s_h, (hidden, hidden)).astype(np.float32), |
| "Whr": rng.normal(0, s_h, (hidden, hidden)).astype(np.float32), |
| "Whh": rng.normal(0, s_h, (hidden, hidden)).astype(np.float32), |
| "bz": np.zeros((hidden,), dtype=np.float32), |
| "br": np.zeros((hidden,), dtype=np.float32), |
| "bh": np.zeros((hidden,), dtype=np.float32), |
| "Why": rng.normal(0, s_h, (hidden, vocab)).astype(np.float32), |
| "by": np.zeros((vocab,), dtype=np.float32), |
| } |
| self.opt_m = {k: np.zeros_like(v) for k, v in self.params.items()} |
| self.opt_v = {k: np.zeros_like(v) for k, v in self.params.items()} |
| self.t = 0 |
|
|
| @staticmethod |
| def sigmoid(x): |
| return 1.0 / (1.0 + np.exp(-np.clip(x, -40, 40))) |
|
|
| @staticmethod |
| def softmax(x): |
| x = x - x.max(axis=-1, keepdims=True) |
| e = np.exp(x) |
| return e / e.sum(axis=-1, keepdims=True) |
|
|
| def forward_loss(self, x: np.ndarray, y: np.ndarray) -> Tuple[float, Dict[str, np.ndarray]]: |
| p = self.params |
| B, T = x.shape |
| H = self.hidden |
| hprev = np.zeros((B, H), dtype=np.float32) |
| caches = [] |
| loss = 0.0 |
| for t in range(T): |
| xt = x[:, t] |
| yt = y[:, t] |
| z = self.sigmoid(p["Wxz"][xt] + hprev @ p["Whz"] + p["bz"]) |
| r = self.sigmoid(p["Wxr"][xt] + hprev @ p["Whr"] + p["br"]) |
| rh = r * hprev |
| hc = np.tanh(p["Wxh"][xt] + rh @ p["Whh"] + p["bh"]) |
| h = (1.0 - z) * hprev + z * hc |
| logits = h @ p["Why"] + p["by"] |
| probs = self.softmax(logits) |
| loss += -np.log(probs[np.arange(B), yt] + 1e-12).mean() |
| caches.append((xt, yt, hprev, z, r, rh, hc, h, probs)) |
| hprev = h |
| return loss / T, {"caches": caches, "B": B, "T": T} |
|
|
| def loss_and_grads(self, x: np.ndarray, y: np.ndarray) -> Tuple[float, Dict[str, np.ndarray]]: |
| loss, aux = self.forward_loss(x, y) |
| p = self.params |
| grads = {k: np.zeros_like(v) for k, v in p.items()} |
| B, T = aux["B"], aux["T"] |
| dh_next = np.zeros((B, self.hidden), dtype=np.float32) |
| scale = 1.0 / (B * T) |
| for (xt, yt, hprev, z, r, rh, hc, h, probs) in reversed(aux["caches"]): |
| dy = probs.copy() |
| dy[np.arange(B), yt] -= 1.0 |
| dy *= scale |
| grads["Why"] += h.T @ dy |
| grads["by"] += dy.sum(axis=0) |
| dh = dy @ p["Why"].T + dh_next |
|
|
| dhprev = dh * (1.0 - z) |
| dz = dh * (hc - hprev) |
| dhc = dh * z |
|
|
| dhc_pre = dhc * (1.0 - hc * hc) |
| np.add.at(grads["Wxh"], xt, dhc_pre) |
| grads["Whh"] += rh.T @ dhc_pre |
| grads["bh"] += dhc_pre.sum(axis=0) |
| drh = dhc_pre @ p["Whh"].T |
| dr = drh * hprev |
| dhprev += drh * r |
|
|
| dr_pre = dr * r * (1.0 - r) |
| np.add.at(grads["Wxr"], xt, dr_pre) |
| grads["Whr"] += hprev.T @ dr_pre |
| grads["br"] += dr_pre.sum(axis=0) |
| dhprev += dr_pre @ p["Whr"].T |
|
|
| dz_pre = dz * z * (1.0 - z) |
| np.add.at(grads["Wxz"], xt, dz_pre) |
| grads["Whz"] += hprev.T @ dz_pre |
| grads["bz"] += dz_pre.sum(axis=0) |
| dhprev += dz_pre @ p["Whz"].T |
| dh_next = dhprev |
| return loss, grads |
|
|
| def step(self, grads: Dict[str, np.ndarray], lr: float = 1e-3, clip: float = 1.0, beta1=0.9, beta2=0.999): |
| total = 0.0 |
| for g in grads.values(): |
| total += float(np.sum(g * g)) |
| norm = math.sqrt(total) |
| if norm > clip: |
| s = clip / (norm + 1e-8) |
| for g in grads.values(): |
| g *= s |
| self.t += 1 |
| for k in self.params: |
| g = grads[k] |
| self.opt_m[k] = beta1 * self.opt_m[k] + (1 - beta1) * g |
| self.opt_v[k] = beta2 * self.opt_v[k] + (1 - beta2) * (g * g) |
| mh = self.opt_m[k] / (1 - beta1 ** self.t) |
| vh = self.opt_v[k] / (1 - beta2 ** self.t) |
| self.params[k] -= lr * mh / (np.sqrt(vh) + 1e-8) |
| return norm |
|
|
| def save(self, path: Path): |
| path.mkdir(parents=True, exist_ok=True) |
| np.savez_compressed(path / "model.npz", **self.params) |
| (path / "model_config.json").write_text(json.dumps({"vocab": self.vocab, "hidden": self.hidden, "step": self.t}, indent=2), encoding="utf-8") |
|
|
| @classmethod |
| def load(cls, path: Path) -> "CharGRU": |
| cfg = json.loads((path / "model_config.json").read_text(encoding="utf-8")) |
| m = cls(cfg["vocab"], cfg["hidden"]) |
| data = np.load(path / "model.npz") |
| for k in m.params: |
| m.params[k] = data[k].astype(np.float32) |
| m.t = int(cfg.get("step", 0)) |
| return m |
|
|
| def generate(self, tok: CharTok, prompt: str, max_new=800, temperature=0.65, top_k=20, seed=0) -> str: |
| rng = np.random.default_rng(seed) |
| ids = list(tok.encode(prompt)) |
| h = np.zeros((1, self.hidden), dtype=np.float32) |
| p = self.params |
| |
| for idx in ids[:-1]: |
| xt = np.array([idx], dtype=np.int64) |
| z = self.sigmoid(p["Wxz"][xt] + h @ p["Whz"] + p["bz"]) |
| r = self.sigmoid(p["Wxr"][xt] + h @ p["Whr"] + p["br"]) |
| hc = np.tanh(p["Wxh"][xt] + (r * h) @ p["Whh"] + p["bh"]) |
| h = (1.0 - z) * h + z * hc |
| cur = ids[-1] if ids else tok.unk |
| for _ in range(max_new): |
| xt = np.array([cur], dtype=np.int64) |
| z = self.sigmoid(p["Wxz"][xt] + h @ p["Whz"] + p["bz"]) |
| r = self.sigmoid(p["Wxr"][xt] + h @ p["Whr"] + p["br"]) |
| hc = np.tanh(p["Wxh"][xt] + (r * h) @ p["Whh"] + p["bh"]) |
| h = (1.0 - z) * h + z * hc |
| logits = (h @ p["Why"] + p["by"])[0] / max(temperature, 1e-6) |
| if top_k > 0 and top_k < len(logits): |
| keep = np.argpartition(logits, -top_k)[-top_k:] |
| mask = np.full_like(logits, -1e9) |
| mask[keep] = logits[keep] |
| logits = mask |
| probs = self.softmax(logits[None, :])[0] |
| cur = int(rng.choice(np.arange(self.vocab), p=probs)) |
| ids.append(cur) |
| |
| txt_tail = tok.decode(ids[-80:]) |
| if "\n### Instruction:" in txt_tail and len(ids) > len(prompt) + 80: |
| break |
| return tok.decode(ids) |
|
|
|
|
| |
| |
| |
|
|
| def make_batch(data: np.ndarray, seq_len: int, batch_size: int, rng: np.random.Generator) -> Tuple[np.ndarray, np.ndarray]: |
| starts = rng.integers(0, len(data) - seq_len - 1, size=batch_size) |
| x = np.stack([data[s:s+seq_len] for s in starts]) |
| y = np.stack([data[s+1:s+seq_len+1] for s in starts]) |
| return x, y |
|
|
|
|
| def train(args): |
| out = Path(args.out) |
| text = build_dataset(out, repeat=args.repeat) |
| tok = CharTok.build(text) |
| tok.save(out / "vocab.json") |
| data = tok.encode(text) |
| model = CharGRU(vocab=len(tok.chars), hidden=args.hidden, seed=args.seed) |
| rng = np.random.default_rng(args.seed) |
| print(json.dumps({ |
| "chars": len(text), "vocab": len(tok.chars), "hidden": args.hidden, |
| "params": int(sum(v.size for v in model.params.values())), |
| "seq_len": args.seq_len, "batch_size": args.batch_size, "steps": args.steps |
| }, indent=2)) |
| losses = [] |
| t0 = time.time() |
| for step in range(1, args.steps + 1): |
| x, y = make_batch(data, args.seq_len, args.batch_size, rng) |
| loss, grads = model.loss_and_grads(x, y) |
| gnorm = model.step(grads, lr=args.lr, clip=args.grad_clip) |
| losses.append(float(loss)) |
| if step == 1 or step % args.log_every == 0: |
| recent = float(np.mean(losses[-args.log_every:])) |
| print(f"step {step:5d}/{args.steps} | loss {recent:.4f} | ppl {math.exp(min(recent, 20)):.2f} | grad {gnorm:.3f} | sec {time.time()-t0:.1f}") |
| if args.sample_every and step % args.sample_every == 0: |
| prompt = "### Instruction:\nWrite a Python function that counts words.\n### Answer:\n" |
| print("--- neural sample ---") |
| print(model.generate(tok, prompt, max_new=500, temperature=0.55, top_k=16, seed=args.seed + step)) |
| print("---------------------") |
| model.save(out) |
| (out / "train_log.json").write_text(json.dumps({"losses_tail": losses[-200:], "final_loss": losses[-1]}, indent=2), encoding="utf-8") |
| |
| tests = { |
| "count_words": "### Instruction:\nWrite a Python function that counts words.\n### Answer:\n", |
| "merge_sort": "### Instruction:\nWrite Python merge sort and explain complexity.\n### Answer:\n", |
| "read_json": "### Instruction:\nCreate code that reads JSON from a file.\n### Answer:\n", |
| "identity": "### Instruction:\nWho are you and how did you learn to read?\n### Answer:\n", |
| } |
| sample_dir = out / "samples" |
| sample_dir.mkdir(exist_ok=True) |
| for name, prompt in tests.items(): |
| sample = model.generate(tok, prompt, max_new=700, temperature=args.temperature, top_k=args.top_k, seed=args.seed + len(name)) |
| (sample_dir / f"{name}.txt").write_text(sample, encoding="utf-8") |
| print(f"Saved model to {out}") |
|
|
|
|
| def generate(args): |
| out = Path(args.out) |
| tok = CharTok.load(out / "vocab.json") |
| model = CharGRU.load(out) |
| prompt = args.prompt |
| if not prompt.startswith("###") and args.instruct: |
| prompt = f"### Instruction:\n{prompt}\n### Answer:\n" |
| text = model.generate(tok, prompt, max_new=args.max_new, temperature=args.temperature, top_k=args.top_k, seed=args.seed) |
| print(text) |
|
|
|
|
| def main(): |
| ap = argparse.ArgumentParser() |
| ap.add_argument("--mode", choices=["train", "generate", "dataset"], default="generate") |
| ap.add_argument("--out", default="outputs/neural_python_mind") |
| ap.add_argument("--steps", type=int, default=2200) |
| ap.add_argument("--hidden", type=int, default=128) |
| ap.add_argument("--seq_len", type=int, default=96) |
| ap.add_argument("--batch_size", type=int, default=32) |
| ap.add_argument("--lr", type=float, default=0.002) |
| ap.add_argument("--grad_clip", type=float, default=1.0) |
| ap.add_argument("--repeat", type=int, default=4) |
| ap.add_argument("--seed", type=int, default=42) |
| ap.add_argument("--log_every", type=int, default=100) |
| ap.add_argument("--sample_every", type=int, default=0) |
| ap.add_argument("--prompt", default="Write a Python function that counts words.") |
| ap.add_argument("--instruct", action="store_true") |
| ap.add_argument("--max_new", type=int, default=800) |
| ap.add_argument("--temperature", type=float, default=0.55) |
| ap.add_argument("--top_k", type=int, default=18) |
| args = ap.parse_args() |
| if args.mode == "train": |
| train(args) |
| elif args.mode == "dataset": |
| text = build_dataset(Path(args.out), repeat=args.repeat) |
| print(json.dumps({"chars": len(text), "unique_chars": len(set(text)), "out": args.out}, indent=2)) |
| else: |
| generate(args) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|