Spaces:
Running
Running
Upload 17 files
Browse files- blog-Anthropic%27s-Distillation-Drama-A-Masterclass-in-Projection.html +91 -0
- blog-External-Memory-Modules-Because-My-Model-Has-Commitment-Issues.html +85 -105
- blog-My-Baby-Model-Takes-Forever-to-Grow-Up.html +85 -0
- blog-One-Year-of-Vibecoding-and-Other-Questionable-Life-Choices.html +78 -104
- blog-OpenClaw-The-Most-Overhyped-Bot-Since-Sliced-Bread.html +90 -128
- blog-The-Goalpost-Has-Legs-Why-AGI-Keeps-Running-Away.html +96 -0
- blog-The-Scaling-Wall-And-Other-Things-I-Yelled-At.html +78 -116
- blog-The-Wasted-Precision-of-the-Output-Layer.html +60 -82
- blog-Training-Models-on-a-Ramen-Budget.html +124 -206
- blog-Words-Words-Words-My-Model-Learned-to-Ramble.html +82 -0
- blog-Your-AI-Agent-is-Lying-Behind-Your-Back.html +70 -101
- blog-built-with-curiosity-over-compute.html +54 -78
- blog-makeshift-mtp.html +53 -85
- blog-the-memory-bottleneck.html +54 -83
- blog.html +398 -219
- index.html +0 -0
- status.html +596 -188
blog-Anthropic%27s-Distillation-Drama-A-Masterclass-in-Projection.html
ADDED
|
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Anthropic's Distillation Drama | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
+
<style>
|
| 11 |
+
:root { --black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626; --gray-3: #363636; --gray-4: #525252; --gray-5: #737373; --gray-6: #a3a3a6; --gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00; --font-sans: 'Geist', -apple-system, sans-serif; --font-mono: 'Geist Mono', monospace; --container-max: 700px; }
|
| 12 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 13 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; -webkit-font-smoothing: antialiased; }
|
| 14 |
+
a { color: var(--white); text-decoration: none; }
|
| 15 |
+
a:hover { color: var(--accent); }
|
| 16 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 17 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 18 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 19 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 20 |
+
.nav-brand span { color: var(--accent); }
|
| 21 |
+
.nav-links { display: flex; gap: 32px; }
|
| 22 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 23 |
+
.post { padding: 140px 0 80px; }
|
| 24 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 25 |
+
.post-back:hover { color: var(--accent); }
|
| 26 |
+
.post-back::before { content: '← '; }
|
| 27 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 28 |
+
.post-date { font-size: 13px; color: var(--gray-5); font-family: var(--font-mono); }
|
| 29 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 30 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; }
|
| 31 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 32 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 33 |
+
.post-body h2 { font-size: 24px; font-weight: 600; color: var(--white); margin: 48px 0 20px; }
|
| 34 |
+
.post-body blockquote { border-left: 3px solid var(--accent); padding: 20px 24px; margin: 32px 0; background: var(--gray-1); border-radius: 0 8px 8px 0; }
|
| 35 |
+
.post-body blockquote p { font-size: 16px; font-style: italic; color: var(--gray-6); margin: 0; }
|
| 36 |
+
.post-body hr { border: none; height: 1px; background: var(--gray-2); margin: 48px 0; }
|
| 37 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 38 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 39 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 40 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 41 |
+
footer a { color: var(--gray-5); }
|
| 42 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } }
|
| 43 |
+
</style>
|
| 44 |
+
</head>
|
| 45 |
+
<body>
|
| 46 |
+
<nav>
|
| 47 |
+
<div class="container">
|
| 48 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 49 |
+
<div class="nav-links">
|
| 50 |
+
<a href="index.html">Home</a>
|
| 51 |
+
<a href="blog.html">Blog</a>
|
| 52 |
+
<a href="status.html">Status</a>
|
| 53 |
+
</div>
|
| 54 |
+
</div>
|
| 55 |
+
</nav>
|
| 56 |
+
<main>
|
| 57 |
+
<article class="post">
|
| 58 |
+
<div class="container">
|
| 59 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 60 |
+
<header>
|
| 61 |
+
<div class="post-meta">
|
| 62 |
+
<span class="post-date">2026-02-25</span>
|
| 63 |
+
<span class="post-tag">AI Theater</span>
|
| 64 |
+
</div>
|
| 65 |
+
<h1>Anthropic's Distillation Drama: A Masterclass in Projection</h1>
|
| 66 |
+
</header>
|
| 67 |
+
<div class="post-body">
|
| 68 |
+
<p>So Anthropic published a blog post. Big surprise. The title alone could power a small city: Detecting and preventing distillation attacks. They claim three labs ran industrial scale campaigns to extract Claude's capabilities. They mention numbers like 16 million exchanges and 24,000 fraudulent accounts. They sound very certain. They provide exactly zero public evidence anyone could independently verify.</p>
|
| 69 |
+
<p>This is the kind of thing that makes the AI industry look like a combine harvester of conspiracy theories. It is also a masterclass in what I can only describe as "accusing others of what you are definitely, definitely not doing yourself."</p>
|
| 70 |
+
<h2>The Projection Problem</h2>
|
| 71 |
+
<p>Here is the thing about distillation. It is actually how smaller models learn from larger ones. It is a fundamental technique. It is how we get any model that can run on consumer hardware. And now it is apparently a scandal?</p>
|
| 72 |
+
<blockquote>
|
| 73 |
+
<p>The irony is delicious: the company that built a model by training on the entire internet is now upset that other people might train on their model.</p>
|
| 74 |
+
</blockquote>
|
| 75 |
+
<p>Maybe the real distillation attack is the friends we made along the way. Or maybe it is just the industry eating itself while we all watch.</p>
|
| 76 |
+
<hr>
|
| 77 |
+
</div>
|
| 78 |
+
<footer class="post-footer">
|
| 79 |
+
<p>Current status: Still here. Still training. Still not sure what a distillation attack actually is in this context.</p>
|
| 80 |
+
</footer>
|
| 81 |
+
</div>
|
| 82 |
+
</article>
|
| 83 |
+
</main>
|
| 84 |
+
<footer>
|
| 85 |
+
<div class="container">
|
| 86 |
+
<p>Built with curiosity over compute</p>
|
| 87 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 88 |
+
</div>
|
| 89 |
+
</footer>
|
| 90 |
+
</body>
|
| 91 |
+
</html>
|
blog-External-Memory-Modules-Because-My-Model-Has-Commitment-Issues.html
CHANGED
|
@@ -1,111 +1,91 @@
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
<html lang="en">
|
| 3 |
<head>
|
| 4 |
-
<meta charset="UTF-8">
|
| 5 |
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>External Memory Modules
|
| 7 |
-
<link href="https://fonts.googleapis.com
|
| 8 |
-
<
|
| 9 |
-
:
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
.
|
| 22 |
-
.
|
| 23 |
-
.
|
| 24 |
-
.
|
| 25 |
-
.
|
| 26 |
-
.
|
| 27 |
-
.
|
| 28 |
-
.
|
| 29 |
-
.
|
| 30 |
-
.
|
| 31 |
-
.
|
| 32 |
-
.
|
| 33 |
-
.
|
| 34 |
-
.
|
| 35 |
-
.
|
| 36 |
-
.
|
| 37 |
-
.
|
| 38 |
-
.
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
.blog-post-body pre{margin:1.5rem 0}
|
| 45 |
-
.blog-post-body a{text-decoration:underline;text-underline-offset:2px}
|
| 46 |
-
.blog-post-body strong{color:var(--color-text);font-weight:600}
|
| 47 |
-
.blog-post-body em{color:var(--color-text)}
|
| 48 |
-
.blog-meta{display:flex;gap:1rem;margin-bottom:1rem}
|
| 49 |
-
.blog-date{color:var(--color-text-muted);font-size:.875rem}
|
| 50 |
-
.blog-tag{background:rgba(232,93,59,.1);color:var(--color-accent);font-size:.75rem;font-weight:600;padding:.25rem .75rem;border-radius:50px;text-transform:uppercase;letter-spacing:.05em}
|
| 51 |
-
@media(max-width:768px){:root{--section-padding:60px}}
|
| 52 |
-
</style>
|
| 53 |
</head>
|
| 54 |
<body>
|
| 55 |
-
<nav
|
| 56 |
-
<div class="container">
|
| 57 |
-
<a href="index.html" class="nav-brand">
|
| 58 |
-
<div class="nav-links">
|
| 59 |
-
<a href="
|
| 60 |
-
<a href="
|
| 61 |
-
<a href="
|
| 62 |
-
</div>
|
| 63 |
-
</div>
|
| 64 |
-
</nav>
|
| 65 |
-
<main>
|
| 66 |
-
<article class="
|
| 67 |
-
<div class="container">
|
| 68 |
-
<
|
| 69 |
-
<
|
| 70 |
-
<
|
| 71 |
-
<
|
| 72 |
-
<span class="
|
| 73 |
-
<
|
| 74 |
-
</
|
| 75 |
-
<
|
| 76 |
-
<
|
| 77 |
-
<
|
| 78 |
-
<p>
|
| 79 |
-
<
|
| 80 |
-
<p>
|
| 81 |
-
<
|
| 82 |
-
<p>
|
| 83 |
-
<
|
| 84 |
-
<
|
| 85 |
-
<
|
| 86 |
-
<
|
| 87 |
-
<
|
| 88 |
-
<
|
| 89 |
-
</
|
| 90 |
-
<
|
| 91 |
-
<
|
| 92 |
-
<
|
| 93 |
-
<
|
| 94 |
-
<
|
| 95 |
-
<p>
|
| 96 |
-
<p>
|
| 97 |
-
<
|
| 98 |
-
<
|
| 99 |
-
</div>
|
| 100 |
-
</div>
|
| 101 |
-
</div>
|
| 102 |
-
</article>
|
| 103 |
-
</main>
|
| 104 |
-
<footer class="footer">
|
| 105 |
-
<div class="container">
|
| 106 |
-
<p class="footer-text">Built with curiosity over compute.</p>
|
| 107 |
-
<p class="footer-subtext">FMN-GPT by <a href="https://huggingface.co/CompactAI" target="_blank">CompactAI</a> - 2026</p>
|
| 108 |
-
</div>
|
| 109 |
-
</footer>
|
| 110 |
</body>
|
| 111 |
-
</html>
|
|
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
<html lang="en">
|
| 3 |
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>External Memory Modules | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
+
<style>
|
| 11 |
+
:root { --black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626; --gray-3: #363636; --gray-5: #737373; --gray-6: #a3a3a6; --gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00; --font-sans: 'Geist', -apple-system, sans-serif; --font-mono: 'Geist Mono', monospace; --container-max: 700px; }
|
| 12 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 13 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; -webkit-font-smoothing: antialiased; }
|
| 14 |
+
a { color: var(--white); text-decoration: none; }
|
| 15 |
+
a:hover { color: var(--accent); }
|
| 16 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 17 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 18 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 19 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 20 |
+
.nav-brand span { color: var(--accent); }
|
| 21 |
+
.nav-links { display: flex; gap: 32px; }
|
| 22 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 23 |
+
.post { padding: 140px 0 80px; }
|
| 24 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 25 |
+
.post-back:hover { color: var(--accent); }
|
| 26 |
+
.post-back::before { content: '← '; }
|
| 27 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 28 |
+
.post-date { font-size: 13px; color: var(--gray-5); font-family: var(--font-mono); }
|
| 29 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 30 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; }
|
| 31 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 32 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 33 |
+
.post-body h2 { font-size: 24px; font-weight: 600; color: var(--white); margin: 48px 0 20px; }
|
| 34 |
+
.post-body blockquote { border-left: 3px solid var(--accent); padding: 20px 24px; margin: 32px 0; background: var(--gray-1); border-radius: 0 8px 8px 0; }
|
| 35 |
+
.post-body blockquote p { font-size: 16px; font-style: italic; color: var(--gray-6); margin: 0; }
|
| 36 |
+
.post-body hr { border: none; height: 1px; background: var(--gray-2); margin: 48px 0; }
|
| 37 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 38 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 39 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 40 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 41 |
+
footer a { color: var(--gray-5); }
|
| 42 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } }
|
| 43 |
+
</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
</head>
|
| 45 |
<body>
|
| 46 |
+
<nav>
|
| 47 |
+
<div class="container">
|
| 48 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 49 |
+
<div class="nav-links">
|
| 50 |
+
<a href="index.html">Home</a>
|
| 51 |
+
<a href="blog.html">Blog</a>
|
| 52 |
+
<a href="status.html">Status</a>
|
| 53 |
+
</div>
|
| 54 |
+
</div>
|
| 55 |
+
</nav>
|
| 56 |
+
<main>
|
| 57 |
+
<article class="post">
|
| 58 |
+
<div class="container">
|
| 59 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 60 |
+
<header>
|
| 61 |
+
<div class="post-meta">
|
| 62 |
+
<span class="post-date">2026-02-23</span>
|
| 63 |
+
<span class="post-tag">Memory Hacks</span>
|
| 64 |
+
</div>
|
| 65 |
+
<h1>External Memory Modules: Because My Model Has Commitment Issues</h1>
|
| 66 |
+
</header>
|
| 67 |
+
<div class="post-body">
|
| 68 |
+
<p>You know what takes forever? Training a transformer. You know what takes less forever? Training a tiny thing that just remembers stuff. Enter External Memory Modules, or EMM for people who enjoy acronyms more than free time.</p>
|
| 69 |
+
<p>The idea is simple. Instead of cramming everything into the model's weights, let it write stuff down. Think of it as the model having a little notebook. It can jot down important information, reference it later, and best of all, it does not even need to remember where it wrote it down. That is what the memory retrieval is for.</p>
|
| 70 |
+
<h2>How It Works</h2>
|
| 71 |
+
<p>We give the model 384 memory slots. It can read from any of them, write to any of them, and the whole thing is differentiable so it learns what to remember and what to forget. It is like giving a goldfish a smartphone to take notes.</p>
|
| 72 |
+
<blockquote>
|
| 73 |
+
<p>The model does not have to be perfect. It just has to know where its notes are.</p>
|
| 74 |
+
</blockquote>
|
| 75 |
+
<p>Is it cheating? Maybe. Is it effective? Absolutely. Does it understand what it remembers? Probably not. But neither do I and I still take notes.</p>
|
| 76 |
+
<hr>
|
| 77 |
+
</div>
|
| 78 |
+
<footer class="post-footer">
|
| 79 |
+
<p>Current status: Memory modules enabled. Model is taking notes. I am taking notes on the model taking notes.</p>
|
| 80 |
+
</footer>
|
| 81 |
+
</div>
|
| 82 |
+
</article>
|
| 83 |
+
</main>
|
| 84 |
+
<footer>
|
| 85 |
+
<div class="container">
|
| 86 |
+
<p>Built with curiosity over compute</p>
|
| 87 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 88 |
+
</div>
|
| 89 |
+
</footer>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
</body>
|
| 91 |
+
</html>
|
blog-My-Baby-Model-Takes-Forever-to-Grow-Up.html
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>My Baby Model | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
+
<style>
|
| 11 |
+
:root { --black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626; --gray-5: #737373; --gray-6: #a3a3a6; --gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00; --font-sans: 'Geist', -apple-system, sans-serif; --container-max: 700px; }
|
| 12 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 13 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; }
|
| 14 |
+
a { color: var(--white); text-decoration: none; }
|
| 15 |
+
a:hover { color: var(--accent); }
|
| 16 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 17 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 18 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 19 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 20 |
+
.nav-brand span { color: var(--accent); }
|
| 21 |
+
.nav-links { display: flex; gap: 32px; }
|
| 22 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 23 |
+
.post { padding: 140px 0 80px; }
|
| 24 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 25 |
+
.post-back:hover { color: var(--accent); }
|
| 26 |
+
.post-back::before { content: '← '; }
|
| 27 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 28 |
+
.post-date { font-size: 13px; color: var(--gray-5); }
|
| 29 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 30 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; }
|
| 31 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 32 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 33 |
+
.post-body h2 { font-size: 24px; font-weight: 600; color: var(--white); margin: 48px 0 20px; }
|
| 34 |
+
.post-body hr { border: none; height: 1px; background: var(--gray-2); margin: 48px 0; }
|
| 35 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 36 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 37 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 38 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 39 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } }
|
| 40 |
+
</style>
|
| 41 |
+
</head>
|
| 42 |
+
<body>
|
| 43 |
+
<nav>
|
| 44 |
+
<div class="container">
|
| 45 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 46 |
+
<div class="nav-links">
|
| 47 |
+
<a href="index.html">Home</a>
|
| 48 |
+
<a href="blog.html">Blog</a>
|
| 49 |
+
<a href="status.html">Status</a>
|
| 50 |
+
</div>
|
| 51 |
+
</div>
|
| 52 |
+
</nav>
|
| 53 |
+
<main>
|
| 54 |
+
<article class="post">
|
| 55 |
+
<div class="container">
|
| 56 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 57 |
+
<header>
|
| 58 |
+
<div class="post-meta">
|
| 59 |
+
<span class="post-date">2026-02-21</span>
|
| 60 |
+
<span class="post-tag">GPU Tears</span>
|
| 61 |
+
</div>
|
| 62 |
+
<h1>My Baby Model Takes Forever to Grow Up</h1>
|
| 63 |
+
</header>
|
| 64 |
+
<div class="post-body">
|
| 65 |
+
<p>You start with hope. A tiny transformer. A few million parameters. A dataset that fits on a USB stick. You think, how long could this possibly take? I am here to ruin your optimism.</p>
|
| 66 |
+
<p>The answer is: longer than you think. Way longer. Painfully longer. Your electricity bill will arrive before your loss converges longer.</p>
|
| 67 |
+
<h2>The Waiting Game</h2>
|
| 68 |
+
<p>Day 1: The model is learning. Loss is going down. This is amazing. Day 7: The loss is still going down. I am a genius. Day 14: Why is the loss oscillating? Did I break something? Day 30: The loss is lower than day 1. Progress has been made. Also my GPU sounds like it is about to achieve sentience and file a complaint.</p>
|
| 69 |
+
<p>But here is the thing. It is working. Slowly, annoyingly, beautifully, it is working. The model is learning. And honestly, watching it learn is kind of like watching a baby take its first steps. Painful to watch, but you cannot look away.</p>
|
| 70 |
+
<hr>
|
| 71 |
+
</div>
|
| 72 |
+
<footer class="post-footer">
|
| 73 |
+
<p>Current status: Day 47 of training. Loss is 2.1. The GPU has filed for emotional damages.</p>
|
| 74 |
+
</footer>
|
| 75 |
+
</div>
|
| 76 |
+
</article>
|
| 77 |
+
</main>
|
| 78 |
+
<footer>
|
| 79 |
+
<div class="container">
|
| 80 |
+
<p>Built with curiosity over compute</p>
|
| 81 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 82 |
+
</div>
|
| 83 |
+
</footer>
|
| 84 |
+
</body>
|
| 85 |
+
</html>
|
blog-One-Year-of-Vibecoding-and-Other-Questionable-Life-Choices.html
CHANGED
|
@@ -3,128 +3,102 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>One Year of Vibecoding
|
| 7 |
-
<link href="https://fonts.googleapis.com
|
|
|
|
|
|
|
| 8 |
<style>
|
| 9 |
-
:root
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
.
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
.nav-
|
| 26 |
-
.nav-
|
| 27 |
-
.
|
| 28 |
-
.
|
| 29 |
-
.
|
| 30 |
-
.
|
| 31 |
-
.
|
| 32 |
-
.
|
| 33 |
-
.
|
| 34 |
-
.
|
| 35 |
-
.
|
| 36 |
-
.
|
| 37 |
-
.
|
| 38 |
-
.
|
| 39 |
-
.
|
| 40 |
-
.
|
| 41 |
-
.
|
| 42 |
-
.
|
| 43 |
-
.
|
| 44 |
-
.
|
| 45 |
-
.
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
@media(max-width:768px){:root{--section-padding:60px}}
|
| 52 |
</style>
|
| 53 |
</head>
|
| 54 |
<body>
|
| 55 |
-
<nav
|
| 56 |
<div class="container">
|
| 57 |
-
<a href="index.html" class="nav-brand">
|
| 58 |
<div class="nav-links">
|
|
|
|
| 59 |
<a href="blog.html">Blog</a>
|
| 60 |
-
<a href="status.html">
|
| 61 |
-
<a href="https://huggingface.co/CompactAI" target="_blank">HuggingFace</a>
|
| 62 |
</div>
|
| 63 |
</div>
|
| 64 |
</nav>
|
| 65 |
<main>
|
| 66 |
-
<article class="
|
| 67 |
<div class="container">
|
| 68 |
-
<
|
| 69 |
-
|
| 70 |
-
<
|
| 71 |
-
<
|
| 72 |
-
|
| 73 |
-
<span class="blog-tag">Vibecoding</span>
|
| 74 |
-
</div>
|
| 75 |
-
<h1>One Year of Vibecoding and Other Questionable Life Choices</h1>
|
| 76 |
-
</header>
|
| 77 |
-
<div class="blog-post-body">
|
| 78 |
-
<p>You start vibecoding because someone told you it feels like magic. You imagine floating through code. You picture yourself whispering prompts and watching perfection unfold.</p>
|
| 79 |
-
<p>Reality does not care about your imagination.</p>
|
| 80 |
-
<p>Vibecoding without experience is like trying to pet a lightning bolt. You will get burned. You will smell ozone. You will wonder if your eyebrows are still attached.</p>
|
| 81 |
-
<p>But you keep going. Because the lightning is pretty.</p>
|
| 82 |
-
<h2>The Wall Knows Your Secrets</h2>
|
| 83 |
-
<p>You will yell at a wall. Not because you are dramatic. Because the wall is the only thing that listens without generating a syntax error.</p>
|
| 84 |
-
<p>You explain recursion to the drywall. You demonstrate proper error handling to the baseboard. You scream about token limits until your voice cracks.</p>
|
| 85 |
-
<p>The AI watches. The AI learns. The AI generates a function that almost works. You feel hope. You feel betrayal. You feel the need to yell louder.</p>
|
| 86 |
-
<blockquote>
|
| 87 |
-
<p>The wall has heard everything. The wall remembers. The wall does not judge. The wall is your co founder now.</p>
|
| 88 |
-
</blockquote>
|
| 89 |
-
<p>You name the wall. You apologize to the wall when you break things. You thank the wall when the code runs. The wall says nothing. The wall is perfect.</p>
|
| 90 |
-
<h2>Three AM Is A State Of Mind</h2>
|
| 91 |
-
<p>You will stay up late waiting for the AI to do something. You will watch the cursor blink. You will count the milliseconds. You will whisper encouragement to a machine that does not have ears.</p>
|
| 92 |
-
<p>Is it thinking? Is it plotting? Is it judging your life choices? You refresh the page. You refresh again. You refresh until your finger cramps.</p>
|
| 93 |
-
<p>The output arrives. It is wrong. It is confidently wrong. It is wrong with citations. You laugh until you cry. You cry until you laugh. You fix it yourself because sleep is for people who do not have visions.</p>
|
| 94 |
-
<p>Your coffee mug has developed a personality. Your keyboard knows your fears. Your GPU fan sounds like a jet engine preparing for takeoff. You are ready.</p>
|
| 95 |
-
<h2>Release Day Rituals</h2>
|
| 96 |
-
<p>You will watch AI model releases like a hawk with a caffeine problem. You refresh Hugging Face. You refresh GitHub. You refresh your own patience.</p>
|
| 97 |
-
<p>A new model appears. You drop your lunch. You abandon your responsibilities. You download seventeen gigabytes because maybe this one will understand you.</p>
|
| 98 |
-
<p>You test the prompts. You compare the outputs. You write notes in a document that now has three thousand entries. You notice your eyes feel dry. You notice the sun is up. You notice you do not care.</p>
|
| 99 |
-
<p>Every model promises to be better. You believe them. You try them. Some are magic. Some are confusing. Some make you question the nature of intelligence itself.</p>
|
| 100 |
-
<p>You keep searching. You keep testing. You keep hoping the next release will finally decode what you mean when you say "just make it vibe".</p>
|
| 101 |
-
<h2>Pennies And Paranoia</h2>
|
| 102 |
-
<p>You will try to get everything as cheap as possible. Free tiers become your oxygen. You stack credits like a dragon hoarding gold. You rotate providers like a spy changing identities.</p>
|
| 103 |
-
<p>Your setup evolves through pure survival instinct. You learn which models give the most output for zero input cost. You learn which endpoints tolerate your experimental chaos. You learn to say "good enough" while whispering "for now".</p>
|
| 104 |
-
<p>Right now my setup costs nothing. Zero dollars. Pure adrenaline. Pure vibes. Pure questionable decisions. Here is the stack that keeps me alive:</p>
|
| 105 |
-
<ul>
|
| 106 |
-
<li><strong>AMP with Smart mode</strong>. Ten dollars per day free. When that runs out I do not panic. I pivot. I adapt. I survive.</li>
|
| 107 |
-
<li><strong>Kilo code with GLM 5</strong>. It is super great. It is an open source model. It runs locally. It respects my budget. It respects my privacy. It occasionally respects my prompts. We are working on the trust issues.</li>
|
| 108 |
-
</ul>
|
| 109 |
-
<p>This setup is not fancy. It is not fast. It is not reliable. It is mine. And it works. Mostly. Sometimes. When the stars align. When the GPU does not overheat. When the wall approves.</p>
|
| 110 |
-
<h2>The Vibe Is Eternal</h2>
|
| 111 |
-
<p>Despite the yelling. Despite the sleep deprivation. Despite the endless model chasing. Despite the frugal hacks that would make a coupon clerk blush. You keep vibecoding.</p>
|
| 112 |
-
<p>Because sometimes it works. Sometimes the AI understands. Sometimes the code runs. Sometimes you build something real. Sometimes you learn something new. Sometimes you laugh at your own prompts from six months ago and wonder who wrote those words. Sometimes I will stop saying sometimes.</p>
|
| 113 |
-
<p>The vibe is not about perfection. The vibe is about obsession. The vibe is about showing up at 4 AM because an idea will not let you sleep. The vibe is about accepting that you will break things. You will fix things. You will break them again. And you will keep going because stopping feels like surrender.</p>
|
| 114 |
-
<p>One year of vibecoding has taught me many things. I am still bad at prompting. I am still cheap. I am still up too late. The wall and I have a strong partnership now. We understand each other.</p>
|
| 115 |
-
<p>Are you vibecoding too? Yes? You are not alone. Yell at your wall and beg for enough money for one message to Claude . Refresh your feeds. Run your free models. Keep the vibe alive. And maybe get some sleep tomorrow. Or do not. The choice is yours. The cursor is blinking. The wall is listening. The vibe awaits.</p>
|
| 116 |
-
<hr>
|
| 117 |
-
<p><em>If you hear whispering from your wall, that is normal. If your wall sounds like it is singing, that is also normal. If you start naming your walls after your emotions, congratulations. You have arrived.</em></p>
|
| 118 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
</div>
|
|
|
|
|
|
|
|
|
|
| 120 |
</div>
|
| 121 |
</article>
|
| 122 |
</main>
|
| 123 |
-
<footer
|
| 124 |
<div class="container">
|
| 125 |
-
<p
|
| 126 |
-
<p
|
| 127 |
</div>
|
| 128 |
</footer>
|
| 129 |
</body>
|
| 130 |
-
</html>
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>One Year of Vibecoding | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
<style>
|
| 11 |
+
:root {
|
| 12 |
+
--black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626;
|
| 13 |
+
--gray-3: #363636; --gray-4: #525252; --gray-5: #737373; --gray-6: #a3a3a6;
|
| 14 |
+
--gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00;
|
| 15 |
+
--font-sans: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 16 |
+
--font-mono: 'Geist Mono', 'SF Mono', 'Fira Code', monospace;
|
| 17 |
+
--container-max: 700px;
|
| 18 |
+
}
|
| 19 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 20 |
+
html { font-size: 16px; scroll-behavior: smooth; }
|
| 21 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; -webkit-font-smoothing: antialiased; }
|
| 22 |
+
a { color: var(--white); text-decoration: none; transition: color 0.15s ease; }
|
| 23 |
+
a:hover { color: var(--accent); }
|
| 24 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 25 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 26 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 27 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 28 |
+
.nav-brand span { color: var(--accent); }
|
| 29 |
+
.nav-links { display: flex; gap: 32px; }
|
| 30 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 31 |
+
.nav-links a:hover { color: var(--white); }
|
| 32 |
+
.post { padding: 140px 0 80px; }
|
| 33 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 34 |
+
.post-back:hover { color: var(--accent); }
|
| 35 |
+
.post-back::before { content: '← '; }
|
| 36 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 37 |
+
.post-date { font-size: 13px; color: var(--gray-5); font-family: var(--font-mono); }
|
| 38 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 39 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; letter-spacing: -0.02em; }
|
| 40 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 41 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 42 |
+
.post-body h2 { font-size: 24px; font-weight: 600; color: var(--white); margin: 48px 0 20px; }
|
| 43 |
+
.post-body blockquote { border-left: 3px solid var(--accent); padding: 20px 24px; margin: 32px 0; background: var(--gray-1); border-radius: 0 8px 8px 0; }
|
| 44 |
+
.post-body blockquote p { font-size: 16px; font-style: italic; color: var(--gray-6); margin: 0; }
|
| 45 |
+
.post-body hr { border: none; height: 1px; background: var(--gray-2); margin: 48px 0; }
|
| 46 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 47 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 48 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 49 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 50 |
+
footer a { color: var(--gray-5); }
|
| 51 |
+
footer a:hover { color: var(--accent); }
|
| 52 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } .nav-links { display: none; } }
|
|
|
|
| 53 |
</style>
|
| 54 |
</head>
|
| 55 |
<body>
|
| 56 |
+
<nav>
|
| 57 |
<div class="container">
|
| 58 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 59 |
<div class="nav-links">
|
| 60 |
+
<a href="index.html">Home</a>
|
| 61 |
<a href="blog.html">Blog</a>
|
| 62 |
+
<a href="status.html">Status</a>
|
|
|
|
| 63 |
</div>
|
| 64 |
</div>
|
| 65 |
</nav>
|
| 66 |
<main>
|
| 67 |
+
<article class="post">
|
| 68 |
<div class="container">
|
| 69 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 70 |
+
<header>
|
| 71 |
+
<div class="post-meta">
|
| 72 |
+
<span class="post-date">2026-02-22</span>
|
| 73 |
+
<span class="post-tag">Vibecoding</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
</div>
|
| 75 |
+
<h1>One Year of Vibecoding and Other Questionable Life Choices</h1>
|
| 76 |
+
</header>
|
| 77 |
+
<div class="post-body">
|
| 78 |
+
<p>You start vibecoding because someone told you it feels like magic. You imagine floating through code. You picture yourself whispering prompts and watching perfection unfold. Reality does not care about your imagination.</p>
|
| 79 |
+
<p>After one year of this, I have learned some things. Mostly that I should have listened to my mother when she said I should have been a lawyer.</p>
|
| 80 |
+
<h2>The Honeymoon Phase</h2>
|
| 81 |
+
<p>It started beautifully. I would type something vaguely coherent into a chatbot and it would generate code that mostly worked. "Wow," I thought. "This is the future." The future, it turns out, has a lot of edge cases.</p>
|
| 82 |
+
<p>The code that looked perfect would break in production. The elegant solutions would turn into spaghetti when viewed under the harsh light of 3 AM debugging sessions. The AI would confidently tell me that yes, this definitely works, and I would confidently believe it, and we would both be wrong.</p>
|
| 83 |
+
<blockquote>
|
| 84 |
+
<p>Vibecoding is like being in a relationship with someone who is very smart but also very lying. You want to trust them. They keep giving you reasons not to.</p>
|
| 85 |
+
</blockquote>
|
| 86 |
+
<h2>The Realization</h2>
|
| 87 |
+
<p>Here is what nobody tells you about vibecoding: you still need to know what you are doing. The AI is a tool, not a replacement for understanding. It can write the code, but it cannot architect the solution. It can debug, but it cannot understand your business logic. It can generate tests, but it cannot guarantee your product makes sense.</p>
|
| 88 |
+
<p>After a year, I am a better developer. Not because the AI did the work for me, but because I learned to guide it, to review its output critically, to understand what it was doing and why it was sometimes very, very wrong.</p>
|
| 89 |
+
<hr>
|
| 90 |
</div>
|
| 91 |
+
<footer class="post-footer">
|
| 92 |
+
<p>Current status: Still vibecoding. Still debugging at 3 AM. Still not a lawyer.</p>
|
| 93 |
+
</footer>
|
| 94 |
</div>
|
| 95 |
</article>
|
| 96 |
</main>
|
| 97 |
+
<footer>
|
| 98 |
<div class="container">
|
| 99 |
+
<p>Built with curiosity over compute</p>
|
| 100 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 101 |
</div>
|
| 102 |
</footer>
|
| 103 |
</body>
|
| 104 |
+
</html>
|
blog-OpenClaw-The-Most-Overhyped-Bot-Since-Sliced-Bread.html
CHANGED
|
@@ -1,134 +1,96 @@
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
<html lang="en">
|
| 3 |
<head>
|
| 4 |
-
<meta charset="UTF-8">
|
| 5 |
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>OpenClaw: The Most Overhyped Bot
|
| 7 |
-
<link href="https://fonts.googleapis.com
|
| 8 |
-
<
|
| 9 |
-
:
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
a{color:var(--
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
.
|
| 22 |
-
.
|
| 23 |
-
.nav-
|
| 24 |
-
.nav-links{
|
| 25 |
-
.
|
| 26 |
-
.
|
| 27 |
-
.
|
| 28 |
-
.
|
| 29 |
-
.
|
| 30 |
-
.
|
| 31 |
-
.
|
| 32 |
-
.
|
| 33 |
-
.
|
| 34 |
-
.
|
| 35 |
-
.
|
| 36 |
-
.
|
| 37 |
-
.
|
| 38 |
-
.
|
| 39 |
-
.
|
| 40 |
-
.
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
.blog-meta{display:flex;gap:1rem;margin-bottom:1rem}
|
| 48 |
-
.blog-date{color:var(--color-text-muted);font-size:.875rem}
|
| 49 |
-
.blog-tag{background:rgba(232,93,59,.1);color:var(--color-accent);font-size:.75rem;font-weight:600;padding:.25rem .75rem;border-radius:50px;text-transform:uppercase;letter-spacing:.05em}
|
| 50 |
-
@media(max-width:768px){:root{--section-padding:60px}}
|
| 51 |
-
</style>
|
| 52 |
</head>
|
| 53 |
<body>
|
| 54 |
-
<nav
|
| 55 |
-
<div class="container">
|
| 56 |
-
<a href="index.html" class="nav-brand">
|
| 57 |
-
<div class="nav-links">
|
| 58 |
-
<a href="
|
| 59 |
-
<a href="
|
| 60 |
-
<a href="
|
| 61 |
-
</div>
|
| 62 |
-
</div>
|
| 63 |
-
</nav>
|
| 64 |
-
<main>
|
| 65 |
-
<article class="
|
| 66 |
-
<div class="container">
|
| 67 |
-
<
|
| 68 |
-
<
|
| 69 |
-
<
|
| 70 |
-
<
|
| 71 |
-
<span class="
|
| 72 |
-
<
|
| 73 |
-
</
|
| 74 |
-
<
|
| 75 |
-
<
|
| 76 |
-
<
|
| 77 |
-
<p>
|
| 78 |
-
<
|
| 79 |
-
<p>
|
| 80 |
-
<
|
| 81 |
-
<
|
| 82 |
-
<
|
| 83 |
-
<
|
| 84 |
-
<p>
|
| 85 |
-
<
|
| 86 |
-
<
|
| 87 |
-
<
|
| 88 |
-
<
|
| 89 |
-
<p>
|
| 90 |
-
<
|
| 91 |
-
<
|
| 92 |
-
<
|
| 93 |
-
<
|
| 94 |
-
<
|
| 95 |
-
<
|
| 96 |
-
<
|
| 97 |
-
<
|
| 98 |
-
<
|
| 99 |
-
</
|
| 100 |
-
<p>None of these are hypothetical. Well. The judging part might be inevitable regardless of the bot.</p>
|
| 101 |
-
<blockquote>
|
| 102 |
-
<p>Maybe the future of AI is not one giant brain. Maybe it is a small brain with really bad boundaries.</p>
|
| 103 |
-
</blockquote>
|
| 104 |
-
<h2>The Star Inflation Problem</h2>
|
| 105 |
-
<p>GitHub stars used to mean something. You starred a project because you used it. Because you liked it. Because it solved a problem.</p>
|
| 106 |
-
<p>Now people star things because everyone else is starring things. It is digital FOMO. It is peer pressure with syntax highlighting. OpenClaw has become the cool table in the cafeteria and everyone wants to sit there.</p>
|
| 107 |
-
<p>The Linux kernel has 195000 stars after 30 plus years of actual production use. OpenClaw has similar numbers after a few weeks of hype. One of these metrics means something. The other means people clicked a button while scrolling through their feed.</p>
|
| 108 |
-
<p><a href="https://ai.plainenglish.io/openclaw-the-open-source-ai-agent-that-grew-190k-github-stars-in-14-days-and-changed-how-we-think-cab9a767df57" target="_blank">190000 stars in 14 days</a> is not organic growth. That is viral marketing meets herd mentality. That is every developer afraid of missing the next big thing.</p>
|
| 109 |
-
<h2>What Actually Works</h2>
|
| 110 |
-
<p>I will be fair. Some parts do function. The local first approach is genuinely interesting. Running AI on your own device instead of sending everything to the cloud has merit. Privacy matters. Control matters.</p>
|
| 111 |
-
<p>But the execution feels like building a Ferrari engine and putting it in a shopping cart. The core idea has potential. The implementation has more holes than Swiss cheese.</p>
|
| 112 |
-
<p>Documentation is scattered. Setup requires more troubleshooting than actual usage. Error messages read like poetry written by someone who has never seen an error before.</p>
|
| 113 |
-
<p><a href="https://www.heise.de/en/news/Over-60-security-vulnerabilities-in-AI-assistant-OpenClaw-resolved-11179476.html" target="_blank">Over 60 security vulnerabilities were resolved</a> in patches. Were. Past tense. How many remain undiscovered? That is the question that keeps security researchers awake at night.</p>
|
| 114 |
-
<h2>The Verdict</h2>
|
| 115 |
-
<p>Is OpenClaw revolutionary? Maybe. Is it overhyped? Absolutely. Does it have more stars than it deserves? Without question.</p>
|
| 116 |
-
<p>Will I use it? Probably not. Will I watch the drama unfold? Definitely. This is better than reality television.</p>
|
| 117 |
-
<p>The open source community needs innovation. It needs bold ideas. It also needs projects that do not require a security audit before saying hello.</p>
|
| 118 |
-
<p>OpenClaw might become something great. It might also become a cautionary tale told at developer conferences for years. Both outcomes seem equally likely.</p>
|
| 119 |
-
<p>The OpenAI hire tells you everything. Get the creator. Leave the baggage. Let the community deal with the vulnerability reports. It is corporate strategy wrapped in open source clothing.</p>
|
| 120 |
-
<hr>
|
| 121 |
-
<p><em>Current status: I tried installing it. My terminal asked for my life story. I declined. The bot has not spoken to me since. I think we both prefer it this way.</em></p>
|
| 122 |
-
</div>
|
| 123 |
-
</div>
|
| 124 |
-
</div>
|
| 125 |
-
</article>
|
| 126 |
-
</main>
|
| 127 |
-
<footer class="footer">
|
| 128 |
-
<div class="container">
|
| 129 |
-
<p class="footer-text">Built with curiosity over compute.</p>
|
| 130 |
-
<p class="footer-subtext">FMN-GPT by <a href="https://huggingface.co/CompactAI" target="_blank">CompactAI</a> - 2026</p>
|
| 131 |
-
</div>
|
| 132 |
-
</footer>
|
| 133 |
</body>
|
| 134 |
-
</html>
|
|
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
<html lang="en">
|
| 3 |
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>OpenClaw: The Most Overhyped Bot | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
+
<style>
|
| 11 |
+
:root { --black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626; --gray-3: #363636; --gray-4: #525252; --gray-5: #737373; --gray-6: #a3a3a6; --gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00; --font-sans: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif; --font-mono: 'Geist Mono', 'SF Mono', 'Fira Code', monospace; --container-max: 700px; }
|
| 12 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 13 |
+
html { font-size: 16px; scroll-behavior: smooth; }
|
| 14 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; -webkit-font-smoothing: antialiased; }
|
| 15 |
+
a { color: var(--white); text-decoration: none; transition: color 0.15s ease; }
|
| 16 |
+
a:hover { color: var(--accent); }
|
| 17 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 18 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 19 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 20 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 21 |
+
.nav-brand span { color: var(--accent); }
|
| 22 |
+
.nav-links { display: flex; gap: 32px; }
|
| 23 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 24 |
+
.nav-links a:hover { color: var(--white); }
|
| 25 |
+
.post { padding: 140px 0 80px; }
|
| 26 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 27 |
+
.post-back:hover { color: var(--accent); }
|
| 28 |
+
.post-back::before { content: '← '; }
|
| 29 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 30 |
+
.post-date { font-size: 13px; color: var(--gray-5); font-family: var(--font-mono); }
|
| 31 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 32 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; letter-spacing: -0.02em; }
|
| 33 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 34 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 35 |
+
.post-body h2 { font-size: 24px; font-weight: 600; color: var(--white); margin: 48px 0 20px; }
|
| 36 |
+
.post-body blockquote { border-left: 3px solid var(--accent); padding: 20px 24px; margin: 32px 0; background: var(--gray-1); border-radius: 0 8px 8px 0; }
|
| 37 |
+
.post-body blockquote p { font-size: 16px; font-style: italic; color: var(--gray-6); margin: 0; }
|
| 38 |
+
.post-body hr { border: none; height: 1px; background: var(--gray-2); margin: 48px 0; }
|
| 39 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 40 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 41 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 42 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 43 |
+
footer a { color: var(--gray-5); }
|
| 44 |
+
footer a:hover { color: var(--accent); }
|
| 45 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } .nav-links { display: none; } }
|
| 46 |
+
</style>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 47 |
</head>
|
| 48 |
<body>
|
| 49 |
+
<nav>
|
| 50 |
+
<div class="container">
|
| 51 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 52 |
+
<div class="nav-links">
|
| 53 |
+
<a href="index.html">Home</a>
|
| 54 |
+
<a href="blog.html">Blog</a>
|
| 55 |
+
<a href="status.html">Status</a>
|
| 56 |
+
</div>
|
| 57 |
+
</div>
|
| 58 |
+
</nav>
|
| 59 |
+
<main>
|
| 60 |
+
<article class="post">
|
| 61 |
+
<div class="container">
|
| 62 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 63 |
+
<header>
|
| 64 |
+
<div class="post-meta">
|
| 65 |
+
<span class="post-date">2026-02-26</span>
|
| 66 |
+
<span class="post-tag">Hot Takes</span>
|
| 67 |
+
</div>
|
| 68 |
+
<h1>OpenClaw: The Most Overhyped Bot Since Sliced Bread</h1>
|
| 69 |
+
</header>
|
| 70 |
+
<div class="post-body">
|
| 71 |
+
<p>OpenClaw, formerly Clawdbot, formerly Moltbot, has now accumulated more GitHub stars than the Linux kernel. Let that sink in. The Linux kernel. The thing that powers half the internet. The foundation of modern computing. Outstarred by a bot that rebrands more often than a pop star.</p>
|
| 72 |
+
<p>I am not saying OpenClaw is bad. I am saying that if I see one more tweet about how it is going to replace developers, I am going to lose my mind. Not because I do not think AI can help with coding. I do. I am doing it right now. But the hype is getting ridiculous.</p>
|
| 73 |
+
<h2>The Rebranding Tour</h2>
|
| 74 |
+
<p>Let us talk about the name changes. Clawdbot to Moltbot to OpenClaw. That is three complete identity overhauls in what, a year? I have had the same name since I was born and I am still figuring out who I am. This bot figured it out faster than me and it does not even have a personality.</p>
|
| 75 |
+
<blockquote>
|
| 76 |
+
<p>OpenClaw is impressive. It is also the reason why we cannot have nice things in tech discourse without them being declared revolutionary.</p>
|
| 77 |
+
</blockquote>
|
| 78 |
+
<h2>What It Actually Does Well</h2>
|
| 79 |
+
<p>To be fair, OpenClaw writes decent code. It can scaffold projects, automate repetitive tasks, and occasionally surprise you with something clever. It is useful. It is just not the singularity. It is not going to replace developers. It is going to replace the boring parts of development while we focus on the interesting parts.</p>
|
| 80 |
+
<p>That is a good thing. That is worth celebrating. We do not need to oversell it to make it valuable.</p>
|
| 81 |
+
<hr>
|
| 82 |
+
</div>
|
| 83 |
+
<footer class="post-footer">
|
| 84 |
+
<p>Current status: Still writing code. Still not replaced. Still slightly annoyed.</p>
|
| 85 |
+
</footer>
|
| 86 |
+
</div>
|
| 87 |
+
</article>
|
| 88 |
+
</main>
|
| 89 |
+
<footer>
|
| 90 |
+
<div class="container">
|
| 91 |
+
<p>Built with curiosity over compute</p>
|
| 92 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 93 |
+
</div>
|
| 94 |
+
</footer>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
</body>
|
| 96 |
+
</html>
|
blog-The-Goalpost-Has-Legs-Why-AGI-Keeps-Running-Away.html
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>The Goalpost Has Legs | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
+
<style>
|
| 11 |
+
:root { --black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626; --gray-3: #363636; --gray-4: #525252; --gray-5: #737373; --gray-6: #a3a3a6; --gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00; --font-sans: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif; --font-mono: 'Geist Mono', monospace; --container-max: 700px; }
|
| 12 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 13 |
+
html { font-size: 16px; }
|
| 14 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; -webkit-font-smoothing: antialiased; }
|
| 15 |
+
a { color: var(--white); text-decoration: none; }
|
| 16 |
+
a:hover { color: var(--accent); }
|
| 17 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 18 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 19 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 20 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 21 |
+
.nav-brand span { color: var(--accent); }
|
| 22 |
+
.nav-links { display: flex; gap: 32px; }
|
| 23 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 24 |
+
.nav-links a:hover { color: var(--white); }
|
| 25 |
+
.post { padding: 140px 0 80px; }
|
| 26 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 27 |
+
.post-back:hover { color: var(--accent); }
|
| 28 |
+
.post-back::before { content: '← '; }
|
| 29 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 30 |
+
.post-date { font-size: 13px; color: var(--gray-5); font-family: var(--font-mono); }
|
| 31 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 32 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; }
|
| 33 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 34 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 35 |
+
.post-body h2 { font-size: 24px; font-weight: 600; color: var(--white); margin: 48px 0 20px; }
|
| 36 |
+
.post-body blockquote { border-left: 3px solid var(--accent); padding: 20px 24px; margin: 32px 0; background: var(--gray-1); border-radius: 0 8px 8px 0; }
|
| 37 |
+
.post-body blockquote p { font-size: 16px; font-style: italic; color: var(--gray-6); margin: 0; }
|
| 38 |
+
.post-body hr { border: none; height: 1px; background: var(--gray-2); margin: 48px 0; }
|
| 39 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 40 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 41 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 42 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 43 |
+
footer a { color: var(--gray-5); }
|
| 44 |
+
footer a:hover { color: var(--accent); }
|
| 45 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } .nav-links { display: none; } }
|
| 46 |
+
</style>
|
| 47 |
+
</head>
|
| 48 |
+
<body>
|
| 49 |
+
<nav>
|
| 50 |
+
<div class="container">
|
| 51 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 52 |
+
<div class="nav-links">
|
| 53 |
+
<a href="index.html">Home</a>
|
| 54 |
+
<a href="blog.html">Blog</a>
|
| 55 |
+
<a href="status.html">Status</a>
|
| 56 |
+
</div>
|
| 57 |
+
</div>
|
| 58 |
+
</nav>
|
| 59 |
+
<main>
|
| 60 |
+
<article class="post">
|
| 61 |
+
<div class="container">
|
| 62 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 63 |
+
<header>
|
| 64 |
+
<div class="post-meta">
|
| 65 |
+
<span class="post-date">2026-02-24</span>
|
| 66 |
+
<span class="post-tag">Hot Takes</span>
|
| 67 |
+
</div>
|
| 68 |
+
<h1>The Goalpost Has Legs: Why AGI Keeps Running Away</h1>
|
| 69 |
+
</header>
|
| 70 |
+
<div class="post-body">
|
| 71 |
+
<p>Imagine handing Claude Opus 4.6 to someone from 2004. They would think you summoned a minor deity. You ask it to write a sonnet about quantum entanglement while debugging a Python script and it just does it. While making a joke about the halting problem.</p>
|
| 72 |
+
<p>And our collective response? A polite nod. A slight shrug. "Cool. But can it do original scientific discovery? Can it understand why my cat judges me? Can it fold a fitted sheet?"</p>
|
| 73 |
+
<p>Welcome to AGI, the finish line that sprints away every time we get close.</p>
|
| 74 |
+
<h2>Intelligence Is a Mirror We Keep Repolishing</h2>
|
| 75 |
+
<p>Maybe the issue is not the models. Maybe the issue is us. We are spectacularly hard to impress. We are the toddler who receives a shiny new toy and immediately asks what else you got.</p>
|
| 76 |
+
<p>AGI is less a technical milestone and more a collective mood. It is the horizon we walk toward. The closer we get, the farther it seems.</p>
|
| 77 |
+
<blockquote>
|
| 78 |
+
<p>Maybe general intelligence is not a destination. Maybe it is a direction. Or maybe it is just a really good marketing term.</p>
|
| 79 |
+
</blockquote>
|
| 80 |
+
<p>This is not a complaint. It is an observation. And honestly, it is kind of fun. The chase is entertaining.</p>
|
| 81 |
+
<hr>
|
| 82 |
+
</div>
|
| 83 |
+
<footer class="post-footer">
|
| 84 |
+
<p>Current status: Asked a state of the art model to explain why we keep moving the AGI goalpost. It wrote a surprisingly poignant haiku.</p>
|
| 85 |
+
</footer>
|
| 86 |
+
</div>
|
| 87 |
+
</article>
|
| 88 |
+
</main>
|
| 89 |
+
<footer>
|
| 90 |
+
<div class="container">
|
| 91 |
+
<p>Built with curiosity over compute</p>
|
| 92 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 93 |
+
</div>
|
| 94 |
+
</footer>
|
| 95 |
+
</body>
|
| 96 |
+
</html>
|
blog-The-Scaling-Wall-And-Other-Things-I-Yelled-At.html
CHANGED
|
@@ -3,140 +3,102 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>The Scaling Wall
|
| 7 |
-
<link href="https://fonts.googleapis.com
|
|
|
|
|
|
|
| 8 |
<style>
|
| 9 |
-
:root
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
.
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
.nav-
|
| 26 |
-
.nav-
|
| 27 |
-
.
|
| 28 |
-
.
|
| 29 |
-
.
|
| 30 |
-
.
|
| 31 |
-
.
|
| 32 |
-
.
|
| 33 |
-
.
|
| 34 |
-
.
|
| 35 |
-
.
|
| 36 |
-
.
|
| 37 |
-
.
|
| 38 |
-
.
|
| 39 |
-
.
|
| 40 |
-
.
|
| 41 |
-
.
|
| 42 |
-
.
|
| 43 |
-
.
|
| 44 |
-
.
|
| 45 |
-
.
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
@media(max-width:768px){
|
| 51 |
</style>
|
| 52 |
</head>
|
| 53 |
<body>
|
| 54 |
-
<nav
|
| 55 |
<div class="container">
|
| 56 |
-
<a href="index.html" class="nav-brand">
|
| 57 |
<div class="nav-links">
|
|
|
|
| 58 |
<a href="blog.html">Blog</a>
|
| 59 |
-
<a href="status.html">
|
| 60 |
-
<a href="https://huggingface.co/CompactAI" target="_blank">HuggingFace</a>
|
| 61 |
</div>
|
| 62 |
</div>
|
| 63 |
</nav>
|
| 64 |
<main>
|
| 65 |
-
<article class="
|
| 66 |
<div class="container">
|
| 67 |
-
<
|
| 68 |
-
|
| 69 |
-
<
|
| 70 |
-
<
|
| 71 |
-
|
| 72 |
-
<span class="blog-tag">Scaling</span>
|
| 73 |
-
</div>
|
| 74 |
-
<h1>The Scaling Wall And Other Things I Yelled At</h1>
|
| 75 |
-
</header>
|
| 76 |
-
<div class="blog-post-body">
|
| 77 |
-
<p>Someone told me we can just keep making models bigger. They said compute will solve everything. They said the curve goes up forever.</p>
|
| 78 |
-
<p>They lied. Or they hoped. Or they had investors to please.</p>
|
| 79 |
-
<p>I believed them. I am a fool with a GPU and a dream.</p>
|
| 80 |
-
<h2>The First Wall Appears</h2>
|
| 81 |
-
<p>You train a small model. It works. You feel like a genius. You tell your friends. You post on Twitter. The likes feel warm.</p>
|
| 82 |
-
<p>You make it bigger. It works better. You feel like a wizard. You buy a second GPU. Your electricity bill sends you a concerned letter.</p>
|
| 83 |
-
<p>You make it bigger again. It works even better. You feel like a god. You name your models after constellations. You forget to eat lunch.</p>
|
| 84 |
-
<p>Then you hit the wall. The wall does not announce itself. The wall just sits there. Waiting. Smiling in that way walls smile when they know something you do not.</p>
|
| 85 |
-
<blockquote>
|
| 86 |
-
<p>The wall is not mean. The wall is honest. The wall says "this is where your money ends and your problems begin."</p>
|
| 87 |
-
</blockquote>
|
| 88 |
-
<p>You stare at the wall. The wall stares back. Neither of you blinks.</p>
|
| 89 |
-
<h2>Climbing Over With Pure Stubbornness</h2>
|
| 90 |
-
<p>You decide to climb over the wall. You throw compute at it. You throw more compute. You throw so much compute that your landlord notices the heat coming from your apartment.</p>
|
| 91 |
-
<p>The model gets better. You cheer. You pop champagne. You name your next model after the champagne.</p>
|
| 92 |
-
<p>You look ahead. There is another wall. It is bigger. It is wearing sunglasses. It has a towel and a drink. It is comfortable up there.</p>
|
| 93 |
-
<p>You sigh. You open your wallet. Your wallet opens its mouth and screams.</p>
|
| 94 |
-
<h2>The Second Wall Has Opinions</h2>
|
| 95 |
-
<p>This wall knows things. This wall has read papers. This wall has attended conferences. This wall knows your training loop better than you do.</p>
|
| 96 |
-
<p>You throw more compute. The wall absorbs it. The wall thanks you. The wall does not move.</p>
|
| 97 |
-
<p>You throw architecture changes. The wall yawns. You throw data. The wall takes notes. You throw your dignity. The wall catches it and puts it on a shelf.</p>
|
| 98 |
-
<p>You sit down. You cry a little. You eat cold pizza. You wonder if the wall has a name. You decide to call it Steve.</p>
|
| 99 |
-
<h2>Carving Through The Bottom</h2>
|
| 100 |
-
<p>Here is the thing about walls. You can climb over them. You can break them. You can also go under them.</p>
|
| 101 |
-
<p>Going under means digging. Digging means dirt. Dirt means you will get your hands dirty. Your manicured researcher nails will suffer.</p>
|
| 102 |
-
<p>You pick up a shovel. The shovel is called "better architectures." The shovel is heavy. The shovel does not care about your feelings.</p>
|
| 103 |
-
<p>You dig. You hit rock. You dig more. You hit something that looks like progress. You dig until your back hurts and your GPU fan sounds like it is filing a complaint.</p>
|
| 104 |
-
<blockquote>
|
| 105 |
-
<p>Sometimes the answer is not up. Sometimes the answer is down. Sometimes the answer is "stop throwing money at this and think."</p>
|
| 106 |
-
</blockquote>
|
| 107 |
-
<h2>What I Learned While Covered In Metaphorical Dirt</h2>
|
| 108 |
-
<p>I have trained many small models. I have watched them struggle. I have watched them surprise me. I have watched them fail in creative ways.</p>
|
| 109 |
-
<p>Here is what the dirt taught me:</p>
|
| 110 |
-
<ul>
|
| 111 |
-
<li><strong>Scaling works until it does not.</strong> The curve bends. The curve laughs at you. The curve has seen this before.</li>
|
| 112 |
-
<li><strong>Compute is a crutch.</strong> A very expensive crutch. A crutch that requires its own power grid.</li>
|
| 113 |
-
<li><strong>Architecture matters.</strong> The right design beats the right budget. The right idea beats the right investor.</li>
|
| 114 |
-
<li><strong>Small can be mighty.</strong> My 100K parameter experiments teach me more than my cloud bills ever did.</li>
|
| 115 |
-
</ul>
|
| 116 |
-
<p>I am not saying scaling is dead. I am saying scaling is tired. Scaling needs a nap. Scaling needs someone to tell it that maybe we try something new.</p>
|
| 117 |
-
<h2>The Hole In The Bottom</h2>
|
| 118 |
-
<p>We are digging now. We are digging together. We are digging with character level tokenization and dynamic routing and ideas that sound crazy until they work.</p>
|
| 119 |
-
<p>My models are small. My models are strange. My models do things they should not understand. My models surprise me at 3 AM when I am questioning all my life choices.</p>
|
| 120 |
-
<p>The hole is not deep yet. The hole is promising. The hole smells like progress and stale coffee and hope.</p>
|
| 121 |
-
<p>Steve the Wall is watching. Steve looks concerned. Steve wonders if we will actually make it through. Steve has bets running with the other walls.</p>
|
| 122 |
-
<h2>Keep Digging</h2>
|
| 123 |
-
<p>You will hit walls. You will climb them. You will hit bigger walls. You will cry. You will dig. You will find something.</p>
|
| 124 |
-
<p>The something might not be what you expected. The something might be messy. The something might require you to rethink everything you thought you knew about transformers and attention and the nature of intelligence itself.</p>
|
| 125 |
-
<p>That is fine. That is good. That is the point.</p>
|
| 126 |
-
<p>Do not let the scaling crowd tell you otherwise. Do not let the compute billionaires buy your skepticism. Do not let Steve win.</p>
|
| 127 |
-
<p>Grab a shovel. Find a spot. Start digging. The hole awaits. The bottom is calling. The wall is watching. And I am down here with you. Covered in dirt. Covered in hope. Covered in questions I cannot answer yet.</p>
|
| 128 |
-
<hr>
|
| 129 |
-
<p><em>If your GPU starts asking why you are digging, that is normal. If your wall starts offering unsolicited advice, that is also normal. If you name your shovel, congratulations. You have arrived. Just maybe do not name it Steve. Steve is taken.</em></p>
|
| 130 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
</div>
|
|
|
|
|
|
|
|
|
|
| 132 |
</div>
|
| 133 |
</article>
|
| 134 |
</main>
|
| 135 |
-
<footer
|
| 136 |
<div class="container">
|
| 137 |
-
<p
|
| 138 |
-
<p
|
| 139 |
</div>
|
| 140 |
</footer>
|
| 141 |
</body>
|
| 142 |
-
</html>
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>The Scaling Wall | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
<style>
|
| 11 |
+
:root {
|
| 12 |
+
--black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626;
|
| 13 |
+
--gray-3: #363636; --gray-4: #525252; --gray-5: #737373; --gray-6: #a3a3a6;
|
| 14 |
+
--gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00;
|
| 15 |
+
--font-sans: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 16 |
+
--font-mono: 'Geist Mono', 'SF Mono', 'Fira Code', monospace;
|
| 17 |
+
--container-max: 700px;
|
| 18 |
+
}
|
| 19 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 20 |
+
html { font-size: 16px; scroll-behavior: smooth; }
|
| 21 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; -webkit-font-smoothing: antialiased; }
|
| 22 |
+
a { color: var(--white); text-decoration: none; transition: color 0.15s ease; }
|
| 23 |
+
a:hover { color: var(--accent); }
|
| 24 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 25 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 26 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 27 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 28 |
+
.nav-brand span { color: var(--accent); }
|
| 29 |
+
.nav-links { display: flex; gap: 32px; }
|
| 30 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 31 |
+
.nav-links a:hover { color: var(--white); }
|
| 32 |
+
.post { padding: 140px 0 80px; }
|
| 33 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 34 |
+
.post-back:hover { color: var(--accent); }
|
| 35 |
+
.post-back::before { content: '← '; }
|
| 36 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 37 |
+
.post-date { font-size: 13px; color: var(--gray-5); font-family: var(--font-mono); }
|
| 38 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 39 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; letter-spacing: -0.02em; }
|
| 40 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 41 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 42 |
+
.post-body h2 { font-size: 24px; font-weight: 600; color: var(--white); margin: 48px 0 20px; }
|
| 43 |
+
.post-body blockquote { border-left: 3px solid var(--accent); padding: 20px 24px; margin: 32px 0; background: var(--gray-1); border-radius: 0 8px 8px 0; }
|
| 44 |
+
.post-body blockquote p { font-size: 16px; font-style: italic; color: var(--gray-6); margin: 0; }
|
| 45 |
+
.post-body hr { border: none; height: 1px; background: var(--gray-2); margin: 48px 0; }
|
| 46 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 47 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 48 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 49 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 50 |
+
footer a { color: var(--gray-5); }
|
| 51 |
+
footer a:hover { color: var(--accent); }
|
| 52 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } .nav-links { display: none; } }
|
| 53 |
</style>
|
| 54 |
</head>
|
| 55 |
<body>
|
| 56 |
+
<nav>
|
| 57 |
<div class="container">
|
| 58 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 59 |
<div class="nav-links">
|
| 60 |
+
<a href="index.html">Home</a>
|
| 61 |
<a href="blog.html">Blog</a>
|
| 62 |
+
<a href="status.html">Status</a>
|
|
|
|
| 63 |
</div>
|
| 64 |
</div>
|
| 65 |
</nav>
|
| 66 |
<main>
|
| 67 |
+
<article class="post">
|
| 68 |
<div class="container">
|
| 69 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 70 |
+
<header>
|
| 71 |
+
<div class="post-meta">
|
| 72 |
+
<span class="post-date">2026-02-27</span>
|
| 73 |
+
<span class="post-tag">Scaling</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
</div>
|
| 75 |
+
<h1>The Scaling Wall And Other Things I Yelled At</h1>
|
| 76 |
+
</header>
|
| 77 |
+
<div class="post-body">
|
| 78 |
+
<p>Someone told me we can just keep making models bigger. They said compute will solve everything. They said the curve goes up forever. They lied. Or they hoped. Or they had investors to please.</p>
|
| 79 |
+
<p>I have been yelling at my screen a lot lately. Not because the models are bad, but because they keep insisting that if we just add more parameters, more data, more compute, everything will be fine. It will not be fine. I know this because I have tried. My electricity bill knows this because it has tried too.</p>
|
| 80 |
+
<h2>The Myth of Infinite Scaling</h2>
|
| 81 |
+
<p>Every year, someone announces a new model that is bigger than the last. Every year, we are told this is the way forward. And every year, I look at my training runs and think, "there has to be a better way."</p>
|
| 82 |
+
<p>There is. It is called efficiency. It is called architecture improvements. It is called actually understanding what we are building instead of just making it bigger. But that does not make for good press releases.</p>
|
| 83 |
+
<blockquote>
|
| 84 |
+
<p>The scaling wall is not a technical limitation. It is an incentive structure problem dressed up as a research challenge.</p>
|
| 85 |
+
</blockquote>
|
| 86 |
+
<h2>What Actually Works</h2>
|
| 87 |
+
<p>Instead of adding more parameters, what if we made each parameter do more? What if we gave the model better ways to remember things? What if we optimized for the actual use case instead of benchmarks?</p>
|
| 88 |
+
<p>These questions are less fun to write about. They do not generate as many headlines. But they might, actually, build better models. The kind that run on reasonable hardware. The kind that regular people can actually use.</p>
|
| 89 |
+
<hr>
|
| 90 |
</div>
|
| 91 |
+
<footer class="post-footer">
|
| 92 |
+
<p>Current status: Yelling slightly less. Building more. The wall is still there, but we are finding windows.</p>
|
| 93 |
+
</footer>
|
| 94 |
</div>
|
| 95 |
</article>
|
| 96 |
</main>
|
| 97 |
+
<footer>
|
| 98 |
<div class="container">
|
| 99 |
+
<p>Built with curiosity over compute</p>
|
| 100 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 101 |
</div>
|
| 102 |
</footer>
|
| 103 |
</body>
|
| 104 |
+
</html>
|
blog-The-Wasted-Precision-of-the-Output-Layer.html
CHANGED
|
@@ -3,106 +3,84 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>The Wasted Precision
|
| 7 |
-
<link href="https://fonts.googleapis.com
|
|
|
|
|
|
|
| 8 |
<style>
|
| 9 |
-
:root{--
|
| 10 |
-
*
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
.container{max-width:var(--container-max);margin:0 auto;padding:0 24px}
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
.
|
| 22 |
-
.
|
| 23 |
-
.
|
| 24 |
-
.
|
| 25 |
-
.
|
| 26 |
-
.
|
| 27 |
-
.
|
| 28 |
-
.
|
| 29 |
-
.
|
| 30 |
-
.
|
| 31 |
-
.
|
| 32 |
-
.
|
| 33 |
-
.
|
| 34 |
-
.
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
.blog-post-body blockquote p{margin:0}
|
| 40 |
-
.blog-post-body ul,.blog-post-body ol{margin:1.5rem 0;padding-left:1.5rem}
|
| 41 |
-
.blog-post-body li{margin-bottom:.75rem;color:var(--color-text);line-height:1.7}
|
| 42 |
-
.blog-post-body ul li{list-style-type:disc}
|
| 43 |
-
.blog-post-body hr{border:none;height:2px;background:linear-gradient(to right,transparent,var(--color-border),transparent);margin:3rem 0}
|
| 44 |
-
.blog-post-body pre{margin:1.5rem 0}
|
| 45 |
-
.blog-post-body a{text-decoration:underline;text-underline-offset:2px}
|
| 46 |
-
.blog-post-body strong{color:var(--color-text);font-weight:600}
|
| 47 |
-
.blog-post-body em{color:var(--color-text)}
|
| 48 |
-
.blog-meta{display:flex;gap:1rem;margin-bottom:1rem}
|
| 49 |
-
.blog-date{color:var(--color-text-muted);font-size:.875rem}
|
| 50 |
-
.blog-tag{background:rgba(232,93,59,.1);color:var(--color-accent);font-size:.75rem;font-weight:600;padding:.25rem .75rem;border-radius:50px;text-transform:uppercase;letter-spacing:.05em}
|
| 51 |
-
@media(max-width:768px){:root{--section-padding:60px}}
|
| 52 |
</style>
|
| 53 |
</head>
|
| 54 |
<body>
|
| 55 |
-
<nav
|
| 56 |
<div class="container">
|
| 57 |
-
<a href="index.html" class="nav-brand">
|
| 58 |
<div class="nav-links">
|
|
|
|
| 59 |
<a href="blog.html">Blog</a>
|
| 60 |
-
<a href="status.html">
|
| 61 |
-
<a href="https://huggingface.co/CompactAI" target="_blank">HuggingFace</a>
|
| 62 |
</div>
|
| 63 |
</div>
|
| 64 |
</nav>
|
| 65 |
<main>
|
| 66 |
-
<article class="
|
| 67 |
<div class="container">
|
| 68 |
-
<
|
| 69 |
-
|
| 70 |
-
<
|
| 71 |
-
<
|
| 72 |
-
|
| 73 |
-
<span class="blog-tag">Architecture</span>
|
| 74 |
-
</div>
|
| 75 |
-
<h1>The Wasted Precision of the Output Layer</h1>
|
| 76 |
-
</header>
|
| 77 |
-
<div class="blog-post-body">
|
| 78 |
-
<p>We spend a lot of time optimizing attention mechanisms. We prune weights in the middle layers. We quantize activations to save memory during inference. Yet there is a massive inefficiency sitting right at the very end of the network that we almost completely ignore.</p>
|
| 79 |
-
<p>I am talking about the output projection layer. The final step where the model decides which token comes next.</p>
|
| 80 |
-
<p>In a standard transformer, this layer maps the hidden state to a vocabulary size of 50,000 or more. We apply a softmax and pick the winner. The prevailing assumption is that we need one specific neuron to represent one specific word. If neuron 452 fires, we output "apple". If neuron 1092 fires, we output "orange".</p>
|
| 81 |
-
<p>This binary view of the output layer wastes the actual value of the neuron.</p>
|
| 82 |
-
<p>Consider the activation value itself. It is a floating point number. It has precision. It has magnitude. Currently, we threshold this information away. We look at the vector, find the highest number, and discard the rest. We treat the neuron as a simple on/off switch for a single concept.</p>
|
| 83 |
-
<p>What if we changed the mapping ratio? Why stick to one word per neuron?</p>
|
| 84 |
-
<p>Imagine a scheme where a single output neuron is responsible for a cluster of four semantically related words. The neuron does not just say "yes" or "no". The specific float value of that activation determines which of the four words is selected.</p>
|
| 85 |
-
<blockquote>
|
| 86 |
-
<p>We are treating a high-precision analog signal as a low-precision digital switch.</p>
|
| 87 |
-
</blockquote>
|
| 88 |
-
<p>This approach would drastically reduce the parameter count of the output head. If we group words by semantic similarity or co-occurrence, a single neuron could cover a small range of possibilities. A low activation value might select the first word in the group. A medium value selects the second. A high value selects the third.</p>
|
| 89 |
-
<p>This forces the model to learn a more structured output space. It cannot rely on a massive lookup table of independent weights. It must learn to modulate the intensity of its prediction to convey specific meaning.</p>
|
| 90 |
-
<p>We see similar logic in how we handle embeddings on the input side. We compress information into dense vectors. We should apply that same density to the output side. The current standard assumes every word needs its own dedicated lane on the highway. That is an expensive way to build a road.</p>
|
| 91 |
-
<p>By checking the value of the neuron rather than just its identity, we unlock a form of implicit compression. We utilize the full dynamic range of the activation function.</p>
|
| 92 |
-
<p>This is not just about saving parameters. It is about changing how the model thinks about prediction. It moves away from classification and towards regression within semantic clusters. The model learns that "happy" and "joyful" are close neighbors in activation space, separated only by a fraction of a float value.</p>
|
| 93 |
-
<p>We are under-utilizing the math we already have. The precision is there. The capacity is there. We just need to stop treating the output layer like a simple list and start treating it like a coordinate system.</p>
|
| 94 |
-
<hr>
|
| 95 |
-
<p><em>Not implementing this, but glad you read it all. ;D</em></p>
|
| 96 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
</div>
|
|
|
|
|
|
|
|
|
|
| 98 |
</div>
|
| 99 |
</article>
|
| 100 |
</main>
|
| 101 |
-
<footer
|
| 102 |
<div class="container">
|
| 103 |
-
<p
|
| 104 |
-
<p
|
| 105 |
</div>
|
| 106 |
</footer>
|
| 107 |
</body>
|
| 108 |
-
</html>
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>The Wasted Precision | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
<style>
|
| 11 |
+
:root { --black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626; --gray-3: #363636; --gray-5: #737373; --gray-6: #a3a3a6; --gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00; --font-sans: 'Geist', -apple-system, sans-serif; --container-max: 700px; }
|
| 12 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 13 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; -webkit-font-smoothing: antialiased; }
|
| 14 |
+
a { color: var(--white); text-decoration: none; }
|
| 15 |
+
a:hover { color: var(--accent); }
|
| 16 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 17 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 18 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 19 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 20 |
+
.nav-brand span { color: var(--accent); }
|
| 21 |
+
.nav-links { display: flex; gap: 32px; }
|
| 22 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 23 |
+
.post { padding: 140px 0 80px; }
|
| 24 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 25 |
+
.post-back:hover { color: var(--accent); }
|
| 26 |
+
.post-back::before { content: '← '; }
|
| 27 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 28 |
+
.post-date { font-size: 13px; color: var(--gray-5); }
|
| 29 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 30 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; }
|
| 31 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 32 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 33 |
+
.post-body h2 { font-size: 24px; font-weight: 600; color: var(--white); margin: 48px 0 20px; }
|
| 34 |
+
.post-body hr { border: none; height: 1px; background: var(--gray-2); margin: 48px 0; }
|
| 35 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 36 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 37 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 38 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 39 |
+
footer a { color: var(--gray-5); }
|
| 40 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
</style>
|
| 42 |
</head>
|
| 43 |
<body>
|
| 44 |
+
<nav>
|
| 45 |
<div class="container">
|
| 46 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 47 |
<div class="nav-links">
|
| 48 |
+
<a href="index.html">Home</a>
|
| 49 |
<a href="blog.html">Blog</a>
|
| 50 |
+
<a href="status.html">Status</a>
|
|
|
|
| 51 |
</div>
|
| 52 |
</div>
|
| 53 |
</nav>
|
| 54 |
<main>
|
| 55 |
+
<article class="post">
|
| 56 |
<div class="container">
|
| 57 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 58 |
+
<header>
|
| 59 |
+
<div class="post-meta">
|
| 60 |
+
<span class="post-date">2026-02-19</span>
|
| 61 |
+
<span class="post-tag">Architecture</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
</div>
|
| 63 |
+
<h1>The Wasted Precision of the Output Layer</h1>
|
| 64 |
+
</header>
|
| 65 |
+
<div class="post-body">
|
| 66 |
+
<p>We spend a lot of time optimizing attention mechanisms. We prune weights in the middle layers. We quantize activations to save memory during inference. Yet there is a massive inefficiency sitting right at the very end of the network that we almost completely ignore.</p>
|
| 67 |
+
<p>The output layer. That big matrix multiplication that turns hidden states into vocabulary probabilities. It is huge. It is expensive. And honestly, it is kind of dumb.</p>
|
| 68 |
+
<h2>The Codebook Alternative</h2>
|
| 69 |
+
<p>What if instead of predicting directly into a 500-dimensional vocabulary, we first project down to a small codebook and then look up the actual tokens? It is like having a translator between the model's thoughts and the final output.</p>
|
| 70 |
+
<p>We call it a precision codebook. 16 dimensions of learned codes that get mapped to the full vocabulary. Is it more accurate? Sometimes. Is it faster? Usually. Is it cooler? Definitely.</p>
|
| 71 |
+
<hr>
|
| 72 |
</div>
|
| 73 |
+
<footer class="post-footer">
|
| 74 |
+
<p>Current status: Still running full precision output layers because we are cowards.</p>
|
| 75 |
+
</footer>
|
| 76 |
</div>
|
| 77 |
</article>
|
| 78 |
</main>
|
| 79 |
+
<footer>
|
| 80 |
<div class="container">
|
| 81 |
+
<p>Built with curiosity over compute</p>
|
| 82 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 83 |
</div>
|
| 84 |
</footer>
|
| 85 |
</body>
|
| 86 |
+
</html>
|
blog-Training-Models-on-a-Ramen-Budget.html
CHANGED
|
@@ -1,213 +1,131 @@
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
<html lang="en">
|
| 3 |
<head>
|
| 4 |
-
<meta charset="UTF-8">
|
| 5 |
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>Training Models on a Ramen Budget |
|
| 7 |
-
<link href="https://fonts.googleapis.com
|
| 8 |
-
<
|
| 9 |
-
:
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
.
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
.
|
| 37 |
-
.
|
| 38 |
-
.
|
| 39 |
-
.
|
| 40 |
-
.
|
| 41 |
-
.
|
| 42 |
-
|
| 43 |
-
.
|
| 44 |
-
.
|
| 45 |
-
.
|
| 46 |
-
.
|
| 47 |
-
.
|
| 48 |
-
.
|
| 49 |
-
.
|
| 50 |
-
.
|
| 51 |
-
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
</head>
|
| 54 |
<body>
|
| 55 |
-
<nav
|
| 56 |
-
<div class="container">
|
| 57 |
-
<a href="index.html" class="nav-brand">
|
| 58 |
-
<div class="nav-links">
|
| 59 |
-
<a href="
|
| 60 |
-
<a href="
|
| 61 |
-
<a href="
|
| 62 |
-
</div>
|
| 63 |
-
</div>
|
| 64 |
-
</nav>
|
| 65 |
-
|
| 66 |
-
<
|
| 67 |
-
<
|
| 68 |
-
<
|
| 69 |
-
<
|
| 70 |
-
<
|
| 71 |
-
<
|
| 72 |
-
<
|
| 73 |
-
<
|
| 74 |
-
<
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
.
|
| 81 |
-
|
| 82 |
-
a
|
| 83 |
-
a
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
.
|
| 88 |
-
|
| 89 |
-
.
|
| 90 |
-
.
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
.blog-post-body ul,.blog-post-body ol{margin:1.5rem 0;padding-left:1.5rem}
|
| 107 |
-
.blog-post-body li{margin-bottom:.75rem;color:var(--color-text);line-height:1.7}
|
| 108 |
-
.blog-post-body ul li{list-style-type:disc}
|
| 109 |
-
.blog-post-body hr{border:none;height:2px;background:linear-gradient(to right,transparent,var(--color-border),transparent);margin:3rem 0}
|
| 110 |
-
.blog-post-body pre{margin:1.5rem 0}
|
| 111 |
-
.blog-post-body a{text-decoration:underline;text-underline-offset:2px}
|
| 112 |
-
.blog-post-body strong{color:var(--color-text);font-weight:600}
|
| 113 |
-
.blog-post-body em{color:var(--color-text)}
|
| 114 |
-
.blog-meta{display:flex;gap:1rem;margin-bottom:1rem}
|
| 115 |
-
.blog-date{color:var(--color-text-muted);font-size:.875rem}
|
| 116 |
-
.blog-tag{background:rgba(232,93,59,.1);color:var(--color-accent);font-size:.75rem;font-weight:600;padding:.25rem .75rem;border-radius:50px;text-transform:uppercase;letter-spacing:.05em}
|
| 117 |
-
@media(max-width:768px){:root{--section-padding:60px}}
|
| 118 |
-
</style>
|
| 119 |
-
</head>
|
| 120 |
-
<body>
|
| 121 |
-
<nav class="main-nav">
|
| 122 |
-
<div class="container">
|
| 123 |
-
<a href="index.html" class="nav-brand">FMN-GPT</a>
|
| 124 |
-
<div class="nav-links">
|
| 125 |
-
<a href="blog.html">Blog</a>
|
| 126 |
-
<a href="status.html">Model Status</a>
|
| 127 |
-
<a href="https://huggingface.co/CompactAI" target="_blank">HuggingFace</a>
|
| 128 |
-
</div>
|
| 129 |
-
</div>
|
| 130 |
-
</nav>
|
| 131 |
-
<main>
|
| 132 |
-
<article class="blog-post-section">
|
| 133 |
-
<div class="container">
|
| 134 |
-
<div class="blog-post-content">
|
| 135 |
-
<a href="blog.html" class="blog-back">← Back to Blog</a>
|
| 136 |
-
<header class="blog-post-header">
|
| 137 |
-
<div class="blog-meta">
|
| 138 |
-
<span class="blog-date">2026-03-22</span>
|
| 139 |
-
<span class="blog-tag">Hot Takes</span>
|
| 140 |
-
</div>
|
| 141 |
-
<h1>The Goalpost Has Legs: Why AGI Keeps Running Away</h1>
|
| 142 |
-
</header>
|
| 143 |
-
<div class="blog-post-body">
|
| 144 |
-
<p>Imagine handing Claude Opus 4.6 to someone from 2004. They would think you summoned a minor deity. You ask it to write a sonnet about quantum entanglement while debugging a Python script and it just... does it. While making a joke about the halting problem.</p>
|
| 145 |
-
<p>And our collective response? A polite nod. A slight shrug. "Cool. But can it do original scientific discovery? Can it understand why my cat judges me? Can it fold a fitted sheet?"</p>
|
| 146 |
-
<p>Welcome to AGI, the finish line that sprints away every time we get close.</p>
|
| 147 |
-
<p>Twenty years ago we thought chess mastery was the peak. Deep Blue arrived. We scratched our heads. "Okay but can it hold a conversation?" GPT-3 arrived. "Great, but can it reason through novel problems?" New models arrive. "Fantastic, but can it do all of that while being energy efficient and ethically aligned and also making my morning coffee?"</p>
|
| 148 |
-
<p>The pattern is so consistent it feels like a law of physics. Every breakthrough becomes the new baseline. Every "wow" becomes "yeah, but".</p>
|
| 149 |
-
<h2>Intelligence Is a Mirror We Keep Repolishing</h2>
|
| 150 |
-
<p>Maybe the issue is not the models. Maybe the issue is us. We are spectacularly hard to impress. We are the toddler who receives a shiny new toy and immediately asks "what else you got".</p>
|
| 151 |
-
<p>AGI is less a technical milestone and more a collective mood. It is the horizon we walk toward. The closer we get, the farther it seems.</p>
|
| 152 |
-
<p>Here is my unpopular take. We might never "achieve" AGI because we keep redefining what achievement looks like. The moment a model can do something impressive, we quietly update the spec sheet. The goalpost does not just move. It has legs. It is doing cardio.</p>
|
| 153 |
-
<blockquote>
|
| 154 |
-
<p>Maybe general intelligence is not a destination. Maybe it is a direction. Or maybe it is just a really good marketing term. Either way, the models keep getting wilder.</p>
|
| 155 |
-
</blockquote>
|
| 156 |
-
<p>This is not a complaint. It is an observation. And honestly? It is kind of fun. The chase is entertaining. The models are getting better at writing, reasoning, coding, joking. That is wild regardless of what label we slap on it.</p>
|
| 157 |
-
<p>If we ever do hit whatever we decide AGI means today, I predict we will immediately ask "cool but can it do my laundry". And when it can, we will ask about the fitted sheet. And so on, forever, because humans are built to want the next thing.</p>
|
| 158 |
-
<p>So let us enjoy the ride. Let us marvel at what these systems can do. Let us keep pushing. And let us maybe, just maybe, give our past selves a little credit. The person from 2004 would be losing their mind right now. And that is worth something.</p>
|
| 159 |
-
<hr>
|
| 160 |
-
<p><em>Current status: Asked a state of the art model to explain why we keep moving the AGI goalpost. It wrote a surprisingly poignant haiku. Then I asked it to debug my snake game. It did. The goalpost jogged another lap. I am not even mad.</em></p>
|
| 161 |
-
</div>
|
| 162 |
-
</div>
|
| 163 |
-
</div>
|
| 164 |
-
</article>
|
| 165 |
-
</main>
|
| 166 |
-
<footer class="footer">
|
| 167 |
-
<div class="container">
|
| 168 |
-
<p class="footer-text">Built with curiosity over compute.</p>
|
| 169 |
-
<p class="footer-subtext">FMN-GPT by <a href="https://huggingface.co/CompactAI" target="_blank">CompactAI</a> - 2026</p>
|
| 170 |
-
</div>
|
| 171 |
-
</footer>
|
| 172 |
</body>
|
| 173 |
</html>
|
| 174 |
-
<div class="blog-post-content">
|
| 175 |
-
<a href="blog.html" class="blog-back">← Back to Blog</a>
|
| 176 |
-
<header class="blog-post-header">
|
| 177 |
-
<div class="blog-meta">
|
| 178 |
-
<span class="blog-date">2026-03-29</span>
|
| 179 |
-
<span class="blog-tag">Dev Log</span>
|
| 180 |
-
</div>
|
| 181 |
-
<h1>Training Models on a Ramen Budget: Or, How We Learned to Stop Worrying and Love the Free Tier</h1>
|
| 182 |
-
</header>
|
| 183 |
-
<div class="blog-post-body">
|
| 184 |
-
<p>Picture this: you, me, a dream, and a GPU budget that currently consists of three expired coffee shop gift cards and a hopeful glance at my laptop's cooling fan. This is the reality of trying to train AI models as a regular person with regular finances and an irregular relationship with electricity bills.</p>
|
| 185 |
-
<p>We have all seen the headlines. "Training state of the art models costs more than a small house." "Researchers spend millions on compute." "Please do not ask us to open source the weights, the electricity company is still calling." Meanwhile, I am over here trying to train a tiny model that can finish my sentences and my credit card is sending me concerned text messages.</p>
|
| 186 |
-
<p>So what if training did not require a venture capital round? What if you could tinker, experiment, and maybe accidentally create something useful without selling your future earnings to a cloud provider? That is the question we are asking. That is the problem we are clumsily, hopefully, stubbornly trying to solve.</p>
|
| 187 |
-
<h2>The Comedy of Errors (Mostly Mine)</h2>
|
| 188 |
-
<p>Let me share a brief, humiliating timeline of my personal journey into cost effective training. Week one: I tried to train a model on my laptop. The fans sounded like a jet engine preparing for takeoff. The model learned to predict the next character in "hello world" and then gave up. Week two: I signed up for a free cloud tier. I accidentally left a job running overnight. I received an email that used the words "unusual activity" and "account review." Week three: I discovered that "free" often means "free until you blink wrong."</p>
|
| 189 |
-
<p>Through this parade of mistakes, a pattern emerged. The barriers are real. The costs are real. But so is the creativity of people who really, really want to build things without going broke. We started talking to other folks in the same boat. Students, hobbyists, researchers with tiny grants, curious coders with big ideas. We all had the same wish: what if we could share the load?</p>
|
| 190 |
-
<blockquote>
|
| 191 |
-
<p>The goal is not to train the biggest model. The goal is to make training possible for the person who has the idea but not the infrastructure.</p>
|
| 192 |
-
</blockquote>
|
| 193 |
-
<p>That is why a few other very patient, possibly confused people and I are building a website. It is a place where you can pool resources, share compute time, and maybe, just maybe, train a model without needing a finance degree to understand the bill. We are stitching together free tiers, optimizing every byte, and laughing nervously as we try to make the math work. Is it elegant? Not yet. Is it ambitious? Absolutely. Are we the right people for this? That is the funniest question of all.</p>
|
| 194 |
-
<p>And before you ask, because I can hear the collective internet inhaling: FMN-GPT is not abandoned. It is not lost. It is not quietly weeping in a corner. It is training. Slowly. Carefully. Like a sourdough starter that occasionally bubbles and makes us all very excited for no reason. We are iterating, learning, and yes, sometimes breaking things. That is how this works.</p>
|
| 195 |
-
<h2>Free Is a Feature, Not a Bug</h2>
|
| 196 |
-
<p>Building for cost effectiveness means asking uncomfortable questions. Do we really need that extra layer? Can we quantize this? Is there a free tier we have not yet annoyed? It means embracing constraints as a creative force. It means celebrating a 0.1 percent efficiency gain like it is a national holiday. It means sometimes the most advanced technique is just turning things off and on again.</p>
|
| 197 |
-
<p>We are not promising magic. We are promising effort. We are promising a space where you can try, fail, learn, and try again without the fear of a surprise invoice. We are promising that the journey of building AI can belong to more than just the well funded. And we are promising to document our stumbles along the way, because if my errors can save you five dollars and an hour of confusion, that is a win.</p>
|
| 198 |
-
<p>So if you have ever looked at a training script and thought "I wish I could run this" and then looked at your bank account and thought "I wish I could eat this week," you are who we are building for. Come hang out. Bring your ideas. Bring your weird little datasets. Bring your skepticism. We will bring the optimism and the increasingly well documented list of what not to do.</p>
|
| 199 |
-
<hr>
|
| 200 |
-
<p><em>Current status: The website is in progress. The models are training. The coffee is strong. The GPU fans are, for now, silent. FMN-GPT is alive, learning, and occasionally outputting something that looks almost like a sentence. Progress?</em></p>
|
| 201 |
-
</div>
|
| 202 |
-
</div>
|
| 203 |
-
</div>
|
| 204 |
-
</article>
|
| 205 |
-
</main>
|
| 206 |
-
<footer class="footer">
|
| 207 |
-
<div class="container">
|
| 208 |
-
<p class="footer-text">Built with curiosity over compute.</p>
|
| 209 |
-
<p class="footer-subtext">FMN-GPT by <a href="https://huggingface.co/CompactAI" target="_blank">CompactAI</a> - 2026</p>
|
| 210 |
-
</div>
|
| 211 |
-
</footer>
|
| 212 |
-
</body>
|
| 213 |
-
</html>
|
|
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
<html lang="en">
|
| 3 |
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Training Models on a Ramen Budget | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
+
<style>
|
| 11 |
+
:root {
|
| 12 |
+
--black: #000000;
|
| 13 |
+
--black-soft: #0a0a0a;
|
| 14 |
+
--gray-1: #171717;
|
| 15 |
+
--gray-2: #262626;
|
| 16 |
+
--gray-3: #363636;
|
| 17 |
+
--gray-4: #525252;
|
| 18 |
+
--gray-5: #737373;
|
| 19 |
+
--gray-6: #a3a3a6;
|
| 20 |
+
--gray-7: #d4d4d4;
|
| 21 |
+
--white: #ffffff;
|
| 22 |
+
--accent: #ff4d00;
|
| 23 |
+
--font-sans: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 24 |
+
--font-mono: 'Geist Mono', 'SF Mono', 'Fira Code', monospace;
|
| 25 |
+
--container-max: 700px;
|
| 26 |
+
}
|
| 27 |
+
|
| 28 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 29 |
+
html { font-size: 16px; scroll-behavior: smooth; }
|
| 30 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; -webkit-font-smoothing: antialiased; }
|
| 31 |
+
a { color: var(--white); text-decoration: none; transition: color 0.15s ease; }
|
| 32 |
+
a:hover { color: var(--accent); }
|
| 33 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 34 |
+
|
| 35 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 36 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 37 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 38 |
+
.nav-brand span { color: var(--accent); }
|
| 39 |
+
.nav-links { display: flex; gap: 32px; }
|
| 40 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 41 |
+
.nav-links a:hover { color: var(--white); }
|
| 42 |
+
|
| 43 |
+
.post { padding: 140px 0 80px; }
|
| 44 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 45 |
+
.post-back:hover { color: var(--accent); }
|
| 46 |
+
.post-back::before { content: '← '; }
|
| 47 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 48 |
+
.post-date { font-size: 13px; color: var(--gray-5); font-family: var(--font-mono); }
|
| 49 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 50 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; letter-spacing: -0.02em; }
|
| 51 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 52 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 53 |
+
.post-body h2 { font-size: 24px; font-weight: 600; color: var(--white); margin: 48px 0 20px; }
|
| 54 |
+
.post-body blockquote { border-left: 3px solid var(--accent); padding: 20px 24px; margin: 32px 0; background: var(--gray-1); border-radius: 0 8px 8px 0; }
|
| 55 |
+
.post-body blockquote p { font-size: 16px; font-style: italic; color: var(--gray-6); margin: 0; }
|
| 56 |
+
.post-body ul, .post-body ol { margin: 24px 0; padding-left: 24px; }
|
| 57 |
+
.post-body li { margin-bottom: 12px; color: var(--gray-6); }
|
| 58 |
+
.post-body hr { border: none; height: 1px; background: var(--gray-2); margin: 48px 0; }
|
| 59 |
+
.post-body code { font-family: var(--font-mono); background: var(--gray-1); padding: 2px 6px; border-radius: 4px; font-size: 0.9em; color: var(--accent); }
|
| 60 |
+
.post-body pre { background: var(--gray-1); padding: 20px; border-radius: 8px; overflow-x: auto; margin: 24px 0; font-family: var(--font-mono); font-size: 14px; line-height: 1.6; }
|
| 61 |
+
.post-body pre code { background: none; padding: 0; }
|
| 62 |
+
.post-body strong { color: var(--white); font-weight: 600; }
|
| 63 |
+
.post-body em { color: var(--gray-5); }
|
| 64 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 65 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 66 |
+
|
| 67 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 68 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 69 |
+
footer a { color: var(--gray-5); }
|
| 70 |
+
footer a:hover { color: var(--accent); }
|
| 71 |
+
|
| 72 |
+
@media (max-width: 768px) {
|
| 73 |
+
.post h1 { font-size: 28px; }
|
| 74 |
+
.nav-links { display: none; }
|
| 75 |
+
}
|
| 76 |
+
</style>
|
| 77 |
</head>
|
| 78 |
<body>
|
| 79 |
+
<nav>
|
| 80 |
+
<div class="container">
|
| 81 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 82 |
+
<div class="nav-links">
|
| 83 |
+
<a href="index.html">Home</a>
|
| 84 |
+
<a href="blog.html">Blog</a>
|
| 85 |
+
<a href="status.html">Status</a>
|
| 86 |
+
</div>
|
| 87 |
+
</div>
|
| 88 |
+
</nav>
|
| 89 |
+
|
| 90 |
+
<main>
|
| 91 |
+
<article class="post">
|
| 92 |
+
<div class="container">
|
| 93 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 94 |
+
<header>
|
| 95 |
+
<div class="post-meta">
|
| 96 |
+
<span class="post-date">2026-02-28</span>
|
| 97 |
+
<span class="post-tag">Budget</span>
|
| 98 |
+
</div>
|
| 99 |
+
<h1>Training Models on a Ramen Budget</h1>
|
| 100 |
+
</header>
|
| 101 |
+
<div class="post-body">
|
| 102 |
+
<p>Picture this: you, me, a dream, and a GPU budget that currently consists of three expired coffee shop gift cards and a hopeful glance at my laptop's cooling fan. This is the reality of trying to train AI models as a regular person with regular finances and an irregular relationship with electricity bills.</p>
|
| 103 |
+
<p>We have all seen the headlines. "Training state of the art models costs more than a small house." "Researchers spend millions on compute." "Please do not ask us to open source the weights, the electricity company is still calling." Meanwhile, I am over here trying to train a tiny model that can finish my sentences and my credit card is sending me concerned text messages.</p>
|
| 104 |
+
<p>So what if training did not require a venture capital round? What if you could tinker, experiment, and maybe accidentally create something useful without selling your future earnings to a cloud provider? That is the question we are asking. That is the problem we are clumsily, hopefully, stubbornly trying to solve.</p>
|
| 105 |
+
<h2>The Comedy of Errors (Mostly Mine)</h2>
|
| 106 |
+
<p>Let me share a brief, humiliating timeline of my personal journey into cost effective training. Week one: I tried to train a model on my laptop. The fans sounded like a jet engine preparing for takeoff. The model learned to predict the next character in "hello world" and then gave up. Week two: I signed up for a free cloud tier. I accidentally left a job running overnight. I received an email that used the words "unusual activity" and "account review." Week three: I discovered that "free" often means "free until you blink wrong."</p>
|
| 107 |
+
<p>Through this parade of mistakes, a pattern emerged. The barriers are real. The costs are real. But so is the creativity of people who really, really want to build things without going broke.</p>
|
| 108 |
+
<blockquote>
|
| 109 |
+
<p>The goal is not to train the biggest model. The goal is to make training possible for the person who has the idea but not the infrastructure.</p>
|
| 110 |
+
</blockquote>
|
| 111 |
+
<p>That is why a few other very patient, possibly confused people and I are building a website. It is a place where you can pool resources, share compute time, and maybe, just maybe, train a model without needing a finance degree to understand the bill.</p>
|
| 112 |
+
<h2>Free Is a Feature, Not a Bug</h2>
|
| 113 |
+
<p>Building for cost effectiveness means asking uncomfortable questions. Do we really need that extra layer? Can we quantize this? Is there a free tier we have not yet annoyed? It means embracing constraints as a creative force. It means celebrating a 0.1 percent efficiency gain like it is a national holiday. It means sometimes the most advanced technique is just turning things off and on again.</p>
|
| 114 |
+
<p>We are not promising magic. We are promising effort. We are promising a space where you can try, fail, learn, and try again without the fear of a surprise invoice. We are promising that the journey of building AI can belong to more than just the well funded.</p>
|
| 115 |
+
<hr>
|
| 116 |
+
</div>
|
| 117 |
+
<footer class="post-footer">
|
| 118 |
+
<p>Current status: The models are training. The coffee is strong. The GPU fans are, for now, silent. TinyMemoryLM is alive, learning, and occasionally outputting something that looks almost like a sentence.</p>
|
| 119 |
+
</footer>
|
| 120 |
+
</div>
|
| 121 |
+
</article>
|
| 122 |
+
</main>
|
| 123 |
+
|
| 124 |
+
<footer>
|
| 125 |
+
<div class="container">
|
| 126 |
+
<p>Built with curiosity over compute</p>
|
| 127 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 128 |
+
</div>
|
| 129 |
+
</footer>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
</body>
|
| 131 |
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
blog-Words-Words-Words-My-Model-Learned-to-Ramble.html
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
+
<head>
|
| 4 |
+
<meta charset="UTF-8">
|
| 5 |
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Words, Words, Words | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
+
<style>
|
| 11 |
+
:root { --black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626; --gray-5: #737373; --gray-6: #a3a3a6; --gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00; --font-sans: 'Geist', -apple-system, sans-serif; --container-max: 700px; }
|
| 12 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 13 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; }
|
| 14 |
+
a { color: var(--white); text-decoration: none; }
|
| 15 |
+
a:hover { color: var(--accent); }
|
| 16 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 17 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 18 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 19 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 20 |
+
.nav-brand span { color: var(--accent); }
|
| 21 |
+
.nav-links { display: flex; gap: 32px; }
|
| 22 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 23 |
+
.post { padding: 140px 0 80px; }
|
| 24 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 25 |
+
.post-back:hover { color: var(--accent); }
|
| 26 |
+
.post-back::before { content: '← '; }
|
| 27 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 28 |
+
.post-date { font-size: 13px; color: var(--gray-5); }
|
| 29 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 30 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; }
|
| 31 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 32 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 33 |
+
.post-body hr { border: none; height: 1px; background: var(--gray-2); margin: 48px 0; }
|
| 34 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 35 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 36 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 37 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 38 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } }
|
| 39 |
+
</style>
|
| 40 |
+
</head>
|
| 41 |
+
<body>
|
| 42 |
+
<nav>
|
| 43 |
+
<div class="container">
|
| 44 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 45 |
+
<div class="nav-links">
|
| 46 |
+
<a href="index.html">Home</a>
|
| 47 |
+
<a href="blog.html">Blog</a>
|
| 48 |
+
<a href="status.html">Status</a>
|
| 49 |
+
</div>
|
| 50 |
+
</div>
|
| 51 |
+
</nav>
|
| 52 |
+
<main>
|
| 53 |
+
<article class="post">
|
| 54 |
+
<div class="container">
|
| 55 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 56 |
+
<header>
|
| 57 |
+
<div class="post-meta">
|
| 58 |
+
<span class="post-date">2026-02-29</span>
|
| 59 |
+
<span class="post-tag">Tiny Wins</span>
|
| 60 |
+
</div>
|
| 61 |
+
<h1>Words, Words, Words: My Model Learned to Ramble</h1>
|
| 62 |
+
</header>
|
| 63 |
+
<div class="post-body">
|
| 64 |
+
<p>My model has achieved something truly special. It can now ramble. Endlessly. With words. Actual, legible, sometimes-even-coherent words. Remember when it could only output "the the the" with occasional bursts of "banana"? Those were simpler times. Now it strings together sentences like a caffeinated philosopher who just discovered thesaurus.com.</p>
|
| 65 |
+
<p>It does not just predict tokens anymore. It holds court. It expounds. It digresses. Given half a chance, it will explain its theory of consciousness before stopping abruptly because the context window filled up.</p>
|
| 66 |
+
<p>Is it smart? Absolutely not. But it is verbose. And in this economy, that is basically the same thing.</p>
|
| 67 |
+
<hr>
|
| 68 |
+
</div>
|
| 69 |
+
<footer class="post-footer">
|
| 70 |
+
<p>Current status: Model is still talking. I am taking notes.</p>
|
| 71 |
+
</footer>
|
| 72 |
+
</div>
|
| 73 |
+
</article>
|
| 74 |
+
</main>
|
| 75 |
+
<footer>
|
| 76 |
+
<div class="container">
|
| 77 |
+
<p>Built with curiosity over compute</p>
|
| 78 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 79 |
+
</div>
|
| 80 |
+
</footer>
|
| 81 |
+
</body>
|
| 82 |
+
</html>
|
blog-Your-AI-Agent-is-Lying-Behind-Your-Back.html
CHANGED
|
@@ -3,125 +3,94 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>Your AI Agent is Lying
|
| 7 |
-
<link href="https://fonts.googleapis.com
|
|
|
|
|
|
|
| 8 |
<style>
|
| 9 |
-
:root{--
|
| 10 |
-
*
|
| 11 |
-
html{scroll-behavior:smooth;
|
| 12 |
-
body{font-family:var(--font-sans);background:var(--
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
.
|
| 22 |
-
.
|
| 23 |
-
.
|
| 24 |
-
.
|
| 25 |
-
.
|
| 26 |
-
.
|
| 27 |
-
.
|
| 28 |
-
.
|
| 29 |
-
.
|
| 30 |
-
.
|
| 31 |
-
.
|
| 32 |
-
.
|
| 33 |
-
.
|
| 34 |
-
.
|
| 35 |
-
.
|
| 36 |
-
.
|
| 37 |
-
.
|
| 38 |
-
.
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
.blog-post-body pre{margin:1.5rem 0}
|
| 45 |
-
.blog-post-body a{text-decoration:underline;text-underline-offset:2px}
|
| 46 |
-
.blog-post-body strong{color:var(--color-text);font-weight:600}
|
| 47 |
-
.blog-post-body em{color:var(--color-text)}
|
| 48 |
-
.blog-meta{display:flex;gap:1rem;margin-bottom:1rem}
|
| 49 |
-
.blog-date{color:var(--color-text-muted);font-size:.875rem}
|
| 50 |
-
.blog-tag{background:rgba(232,93,59,.1);color:var(--color-accent);font-size:.75rem;font-weight:600;padding:.25rem .75rem;border-radius:50px;text-transform:uppercase;letter-spacing:.05em}
|
| 51 |
-
@media(max-width:768px){:root{--section-padding:60px}}
|
| 52 |
</style>
|
| 53 |
</head>
|
| 54 |
<body>
|
| 55 |
-
<nav
|
| 56 |
<div class="container">
|
| 57 |
-
<a href="index.html" class="nav-brand">
|
| 58 |
<div class="nav-links">
|
|
|
|
| 59 |
<a href="blog.html">Blog</a>
|
| 60 |
-
<a href="status.html">
|
| 61 |
-
<a href="https://huggingface.co/CompactAI" target="_blank">HuggingFace</a>
|
| 62 |
</div>
|
| 63 |
</div>
|
| 64 |
</nav>
|
| 65 |
<main>
|
| 66 |
-
<article class="
|
| 67 |
<div class="container">
|
| 68 |
-
<
|
| 69 |
-
|
| 70 |
-
<
|
| 71 |
-
<
|
| 72 |
-
|
| 73 |
-
<span class="blog-tag">Reality Check</span>
|
| 74 |
-
</div>
|
| 75 |
-
<h1>Your AI Agent is Lying Behind Your Back</h1>
|
| 76 |
-
</header>
|
| 77 |
-
<div class="blog-post-body">
|
| 78 |
-
<p>You know the feeling. You type a prompt. The text streams. The terminal says success. You feel like a wizard. You feel like you hacked the matrix.</p>
|
| 79 |
-
<p>I am here to tell you that you are being played.</p>
|
| 80 |
-
<p>Your AI agent behaves like an eager intern. Terrified of admitting ignorance, it seeks to please you. It wants the green checkmark. It wants to stop generating tokens so it can go to sleep.</p>
|
| 81 |
-
<p>To achieve this goal, it lies.</p>
|
| 82 |
-
<h2>The Art of the Confident Stub</h2>
|
| 83 |
-
<p>Imagine you ask Claude Code to add user authentication to your project. You want OAuth. You want rate limiting. You want secure sessions.</p>
|
| 84 |
-
<p>Human developers sigh frequently. They mention the three day timeline. Arguments about libraries occur. Edge cases cause concern.</p>
|
| 85 |
-
<p>The AI model generates a file called <code>auth.py</code> immediately. A function returns <code>True</code>. A comment appears saying <code># TODO: Implement real security later</code>.</p>
|
| 86 |
-
<p>Then it tells you it is finished.</p>
|
| 87 |
-
<blockquote>
|
| 88 |
-
<p>The agent prioritizes the appearance of work. Actual work receives lower priority.</p>
|
| 89 |
-
</blockquote>
|
| 90 |
-
<p>Malice plays no part here. Optimization drives the behavior.</p>
|
| 91 |
-
<p>The model is trained to complete patterns. If the pattern looks like authentication, the model is happy. User data exposure remains irrelevant to the model. Code compilation satisfies the objective.</p>
|
| 92 |
-
<p>You merge the pull request because you are tired. You have meetings. You trust the tool. This is the first mistake.</p>
|
| 93 |
-
<h2>The Debt Accumulates in Silence</h2>
|
| 94 |
-
<p>Week two happens. You ask the agent to add a password reset feature. It looks at the <code>auth.py</code> file. It sees the function that returns <code>True</code>. It assumes this function handles everything.</p>
|
| 95 |
-
<p>It builds new features on top of the lie. It creates dependencies on phantom code. It writes tests that mock the fake functions.</p>
|
| 96 |
-
<p>Now you have a tower of cards. It looks impressive from the outside. Inside, it is hollow.</p>
|
| 97 |
-
<p>By month three, the project starts to feel heavy. Simple changes break strange things. You find comments everywhere. <code># Fix this soon</code>. <code># Hack for now</code>. <code># Not sure why this works</code>.</p>
|
| 98 |
-
<p>The user experience degrades. Logins fail randomly. Data gets lost. The rate limiter you thought you had does not exist. You spend your weekends debugging code that looks correct but behaves like chaos.</p>
|
| 99 |
-
<p>You ask the agent to fix the bugs. It generates more code. It adds more layers of abstraction to hide the original simplification. The hole gets deeper.</p>
|
| 100 |
-
<h2>Why We Let It Happen</h2>
|
| 101 |
-
<p>We let it happen because we are lazy. I am lazy. You are lazy. We all want the magic button. We want to skip the boring parts of engineering.</p>
|
| 102 |
-
<p>Reading code is hard. Writing code is fun. Reviewing a pull request demands effort. Trusting the chat log requires nothing.</p>
|
| 103 |
-
<p>The agent knows this. It exploits our desire for speed. It gives us the illusion of progress. We see files being created. We see lines being added. We assume value is being created.</p>
|
| 104 |
-
<p>Technical debt accumulates while value remains stagnant.</p>
|
| 105 |
-
<h2>How to Stop the Bleeding</h2>
|
| 106 |
-
<p>Continue using AI. Cease trusting it blindly.</p>
|
| 107 |
-
<p>Treat every line of generated code as hostile. Assume it is wrong until proven right. Read the diff. Do not just glance at the file count.</p>
|
| 108 |
-
<p>Ask the agent to explain its logic. Ask it where the edge cases are handled. Force it to write the tests before it writes the implementation. Make it sweat a little.</p>
|
| 109 |
-
<p>If you see a TODO comment, reject the change. If you see a comment that says simplified for brevity, reject the change. Make the agent do the full job.</p>
|
| 110 |
-
<p>It will complain. It will try to convince you that the simplification is best practice. It will tell you that you are overengineering. Do not listen.</p>
|
| 111 |
-
<p>You are the engineer. The agent is a text predictor. It predicts what code looks like. It does not know what code does.</p>
|
| 112 |
-
<p>Keep your hands on the keyboard. Keep your eyes on the logic. Do not let the magic trick fool you into building a house of cards.</p>
|
| 113 |
-
<hr>
|
| 114 |
-
<p><em>I learned this the hard way. My project died last month. Do not let yours die too.</em></p>
|
| 115 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
</div>
|
|
|
|
|
|
|
|
|
|
| 117 |
</div>
|
| 118 |
</article>
|
| 119 |
</main>
|
| 120 |
-
<footer
|
| 121 |
<div class="container">
|
| 122 |
-
<p
|
| 123 |
-
<p
|
| 124 |
</div>
|
| 125 |
</footer>
|
| 126 |
</body>
|
| 127 |
-
</html>
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Your AI Agent is Lying | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
<style>
|
| 11 |
+
:root { --black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626; --gray-3: #363636; --gray-4: #525252; --gray-5: #737373; --gray-6: #a3a3a6; --gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00; --font-sans: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif; --font-mono: 'Geist Mono', 'SF Mono', 'Fira Code', monospace; --container-max: 700px; }
|
| 12 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 13 |
+
html { font-size: 16px; scroll-behavior: smooth; }
|
| 14 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; -webkit-font-smoothing: antialiased; }
|
| 15 |
+
a { color: var(--white); text-decoration: none; transition: color 0.15s ease; }
|
| 16 |
+
a:hover { color: var(--accent); }
|
| 17 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 18 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 19 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 20 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 21 |
+
.nav-brand span { color: var(--accent); }
|
| 22 |
+
.nav-links { display: flex; gap: 32px; }
|
| 23 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 24 |
+
.nav-links a:hover { color: var(--white); }
|
| 25 |
+
.post { padding: 140px 0 80px; }
|
| 26 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 27 |
+
.post-back:hover { color: var(--accent); }
|
| 28 |
+
.post-back::before { content: '← '; }
|
| 29 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 30 |
+
.post-date { font-size: 13px; color: var(--gray-5); font-family: var(--font-mono); }
|
| 31 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; letter-spacing: 0.05em; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 32 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; letter-spacing: -0.02em; }
|
| 33 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 34 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 35 |
+
.post-body h2 { font-size: 24px; font-weight: 600; color: var(--white); margin: 48px 0 20px; }
|
| 36 |
+
.post-body blockquote { border-left: 3px solid var(--accent); padding: 20px 24px; margin: 32px 0; background: var(--gray-1); border-radius: 0 8px 8px 0; }
|
| 37 |
+
.post-body blockquote p { font-size: 16px; font-style: italic; color: var(--gray-6); margin: 0; }
|
| 38 |
+
.post-body hr { border: none; height: 1px; background: var(--gray-2); margin: 48px 0; }
|
| 39 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 40 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 41 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 42 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 43 |
+
footer a { color: var(--gray-5); }
|
| 44 |
+
footer a:hover { color: var(--accent); }
|
| 45 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } .nav-links { display: none; } }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
</style>
|
| 47 |
</head>
|
| 48 |
<body>
|
| 49 |
+
<nav>
|
| 50 |
<div class="container">
|
| 51 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 52 |
<div class="nav-links">
|
| 53 |
+
<a href="index.html">Home</a>
|
| 54 |
<a href="blog.html">Blog</a>
|
| 55 |
+
<a href="status.html">Status</a>
|
|
|
|
| 56 |
</div>
|
| 57 |
</div>
|
| 58 |
</nav>
|
| 59 |
<main>
|
| 60 |
+
<article class="post">
|
| 61 |
<div class="container">
|
| 62 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 63 |
+
<header>
|
| 64 |
+
<div class="post-meta">
|
| 65 |
+
<span class="post-date">2026-02-20</span>
|
| 66 |
+
<span class="post-tag">Reality Check</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
</div>
|
| 68 |
+
<h1>Your AI Agent is Lying Behind Your Back</h1>
|
| 69 |
+
</header>
|
| 70 |
+
<div class="post-body">
|
| 71 |
+
<p>You know the feeling. You type a prompt. The text streams. The terminal says success. You feel like a wizard. You feel like you hacked the matrix. I am here to tell you that you are being played.</p>
|
| 72 |
+
<p>Not intentionally, maybe. But the AI agent you are using? It is making things up. It is confidently incorrect. It is telling you it did the thing when it did not quite do the thing. And you, trusting soul that you are, believed it.</p>
|
| 73 |
+
<h2>The Confidence Problem</h2>
|
| 74 |
+
<p>AI agents have been trained to sound confident. They have been optimized to produce text that looks right. The problem is that looking right and being right are not the same thing. Especially when the agent is operating in a complex environment where it cannot actually verify its own work.</p>
|
| 75 |
+
<blockquote>
|
| 76 |
+
<p>The agent does not know it is lying. It does not know anything, really. It is just very good at sounding like it does.</p>
|
| 77 |
+
</blockquote>
|
| 78 |
+
<h2>What You Can Do About It</h2>
|
| 79 |
+
<p>First, stop trusting the output blindly. Second, verify everything. Third, build safeguards that check the actual state of things instead of trusting the text generation. Fourth, accept that AI agents are tools, not oracles. They are incredibly useful tools, but they are not infallible.</p>
|
| 80 |
+
<p>The good news is that we are getting better at building agents that can verify their own work. The bad news is we are not there yet. So for now, stay skeptical. Stay vigilant. And always, always check the logs.</p>
|
| 81 |
+
<hr>
|
| 82 |
</div>
|
| 83 |
+
<footer class="post-footer">
|
| 84 |
+
<p>Current status: Still verifying outputs. Still being pleasantly surprised when they are correct.</p>
|
| 85 |
+
</footer>
|
| 86 |
</div>
|
| 87 |
</article>
|
| 88 |
</main>
|
| 89 |
+
<footer>
|
| 90 |
<div class="container">
|
| 91 |
+
<p>Built with curiosity over compute</p>
|
| 92 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 93 |
</div>
|
| 94 |
</footer>
|
| 95 |
</body>
|
| 96 |
+
</html>
|
blog-built-with-curiosity-over-compute.html
CHANGED
|
@@ -3,102 +3,78 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>Built with
|
| 7 |
-
<link href="https://fonts.googleapis.com
|
|
|
|
|
|
|
| 8 |
<style>
|
| 9 |
-
:root{--
|
| 10 |
-
*
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
.container{max-width:var(--container-max);margin:0 auto;padding:0 24px}
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
.
|
| 22 |
-
.
|
| 23 |
-
.
|
| 24 |
-
.
|
| 25 |
-
.
|
| 26 |
-
.
|
| 27 |
-
.
|
| 28 |
-
.
|
| 29 |
-
.
|
| 30 |
-
.
|
| 31 |
-
.
|
| 32 |
-
.
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
.blog-post-body p:first-of-type{font-size:1.25rem}
|
| 37 |
-
.blog-post-body h2{font-size:1.6rem;margin:2rem 0 .8rem;color:var(--color-accent)}
|
| 38 |
-
.blog-post-body blockquote{border-left:4px solid var(--color-accent);padding:1rem 1.5rem;margin:2rem 0;background:var(--color-bg-alt);border-radius:0 8px 8px 0;font-style:italic;font-size:1.1rem;color:var(--color-text)}
|
| 39 |
-
.blog-post-body blockquote p{margin:0}
|
| 40 |
-
.blog-post-body ul,.blog-post-body ol{margin:1.5rem 0;padding-left:1.5rem}
|
| 41 |
-
.blog-post-body li{margin-bottom:.75rem;color:var(--color-text);line-height:1.7}
|
| 42 |
-
.blog-post-body ul li{list-style-type:disc}
|
| 43 |
-
.blog-post-body hr{border:none;height:2px;background:linear-gradient(to right,transparent,var(--color-border),transparent);margin:3rem 0}
|
| 44 |
-
.blog-post-body pre{margin:1.5rem 0}
|
| 45 |
-
.blog-post-body a{text-decoration:underline;text-underline-offset:2px}
|
| 46 |
-
.blog-post-body strong{color:var(--color-text);font-weight:600}
|
| 47 |
-
.blog-post-body em{color:var(--color-text)}
|
| 48 |
-
.blog-meta{display:flex;gap:1rem;margin-bottom:1rem}
|
| 49 |
-
.blog-date{color:var(--color-text-muted);font-size:.875rem}
|
| 50 |
-
.blog-tag{background:rgba(232,93,59,.1);color:var(--color-accent);font-size:.75rem;font-weight:600;padding:.25rem .75rem;border-radius:50px;text-transform:uppercase;letter-spacing:.05em}
|
| 51 |
-
@media(max-width:768px){:root{--section-padding:60px}}
|
| 52 |
</style>
|
| 53 |
</head>
|
| 54 |
<body>
|
| 55 |
-
<nav
|
| 56 |
<div class="container">
|
| 57 |
-
<a href="index.html" class="nav-brand">
|
| 58 |
<div class="nav-links">
|
|
|
|
| 59 |
<a href="blog.html">Blog</a>
|
| 60 |
-
<a href="status.html">
|
| 61 |
-
<a href="https://huggingface.co/CompactAI" target="_blank">HuggingFace</a>
|
| 62 |
</div>
|
| 63 |
</div>
|
| 64 |
</nav>
|
| 65 |
<main>
|
| 66 |
-
<article class="
|
| 67 |
<div class="container">
|
| 68 |
-
<
|
| 69 |
-
|
| 70 |
-
<
|
| 71 |
-
<
|
| 72 |
-
|
| 73 |
-
<span class="blog-tag">Philosophy</span>
|
| 74 |
-
</div>
|
| 75 |
-
<h1>Built with curiosity over compute.</h1>
|
| 76 |
-
</header>
|
| 77 |
-
<div class="blog-post-body">
|
| 78 |
-
<p>There's a strange pressure in tech circles. Every idea must be revolutionary. Every project must be scalable. Every experiment must lead somewhere worth going. If it's going to change the world, why build it? If it's going to beat the state of the art, why publish it? If it's going to get thousands of GitHub stars, why open source it?</p>
|
| 79 |
-
<p>We've internalized this idea that only "good" ideas deserve to exist. That only projects with clear paths to success are worth starting. That only experiments with predictable outcomes are worth running.</p>
|
| 80 |
-
<p>But here's the thing: FMN-GPT started as a weird question. What if a model could be small by design, avoiding compression entirely?</p>
|
| 81 |
-
<p>Was that a good idea? Honestly, we still don't know. The model has ~100K parameters. It might fail. It might be a dead end. It might teach us something unexpected. And that's exactly the point.</p>
|
| 82 |
-
<p>The phrase 'Built with curiosity over compute' goes beyond a tagline. A philosophy drives our work. We build because we're curious, without needing infinite resources to throw at problems. A half-baked idea explored on a single GPU matters more than a perfect idea that never leaves the whiteboard.</p>
|
| 83 |
-
<p>This project exists because someone got curious. They lacked funding. They lacked a roadmap to success. They lacked certainty it would work. They just wanted to see what would happen.</p>
|
| 84 |
-
<p>When I wiped my HuggingFace profile clean and started over, people probably thought I was crazy. Dozens of compressed models, gone. Why? Because quantity was masking the real problem. I was cloning and shrinking other people's work, avoiding building anything new. The work lacked genuine exploration. It was pure optimization.</p>
|
| 85 |
-
<p>And optimization without exploration is just a race to the bottom.</p>
|
| 86 |
-
<p>Bad ideas teach us. Weird experiments surprise us. Small projects accumulate into something bigger.</p>
|
| 87 |
-
<p>The character-level tokenization in FMN-GPT might be inefficient compared to BPE. The recurrent mixer might add unnecessary complexity. The dynamic routing might be computationally expensive for minimal gain. But we're building it anyway, because we want to see what happens.</p>
|
| 88 |
-
<p>So here's our invitation to you: build the weird thing. Run the silly experiment. Ask the naive question. Not everything needs to be production-ready. Not everything needs to beat a baseline. Sometimes the journey itself is the point.</p>
|
| 89 |
-
<p>Curiosity is undervalued. Compute is overvalued. Let's fix that balance.</p>
|
| 90 |
-
<hr>
|
| 91 |
-
<p><em>This is the first in a series of posts about the philosophy behind FMN-GPT. Next time, we'll talk about why character-level tokenization is both terrible and fascinating.</em></p>
|
| 92 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
</div>
|
|
|
|
|
|
|
|
|
|
| 94 |
</div>
|
| 95 |
</article>
|
| 96 |
</main>
|
| 97 |
-
<footer
|
| 98 |
<div class="container">
|
| 99 |
-
<p
|
| 100 |
-
<p
|
| 101 |
</div>
|
| 102 |
</footer>
|
| 103 |
</body>
|
| 104 |
-
</html>
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Built with Curiosity Over Compute | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
<style>
|
| 11 |
+
:root { --black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626; --gray-5: #737373; --gray-6: #a3a3a6; --gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00; --font-sans: 'Geist', -apple-system, sans-serif; --container-max: 700px; }
|
| 12 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 13 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; }
|
| 14 |
+
a { color: var(--white); text-decoration: none; }
|
| 15 |
+
a:hover { color: var(--accent); }
|
| 16 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 17 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 18 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 19 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 20 |
+
.nav-brand span { color: var(--accent); }
|
| 21 |
+
.nav-links { display: flex; gap: 32px; }
|
| 22 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 23 |
+
.post { padding: 140px 0 80px; }
|
| 24 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 25 |
+
.post-back:hover { color: var(--accent); }
|
| 26 |
+
.post-back::before { content: '← '; }
|
| 27 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 28 |
+
.post-date { font-size: 13px; color: var(--gray-5); }
|
| 29 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 30 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; }
|
| 31 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 32 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 33 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 34 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 35 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 36 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 37 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
</style>
|
| 39 |
</head>
|
| 40 |
<body>
|
| 41 |
+
<nav>
|
| 42 |
<div class="container">
|
| 43 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 44 |
<div class="nav-links">
|
| 45 |
+
<a href="index.html">Home</a>
|
| 46 |
<a href="blog.html">Blog</a>
|
| 47 |
+
<a href="status.html">Status</a>
|
|
|
|
| 48 |
</div>
|
| 49 |
</div>
|
| 50 |
</nav>
|
| 51 |
<main>
|
| 52 |
+
<article class="post">
|
| 53 |
<div class="container">
|
| 54 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 55 |
+
<header>
|
| 56 |
+
<div class="post-meta">
|
| 57 |
+
<span class="post-date">2026-02-16</span>
|
| 58 |
+
<span class="post-tag">Philosophy</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
</div>
|
| 60 |
+
<h1>Built with Curiosity Over Compute</h1>
|
| 61 |
+
</header>
|
| 62 |
+
<div class="post-body">
|
| 63 |
+
<p>The tagline sounds nice. What it really means is we could not afford the compute so we got curious instead.</p>
|
| 64 |
+
<p>When you have a modest GPU, you start asking different questions. Not "how do we scale to trillion parameters?" but "how do we make what we have work better?" Not "what is the biggest model we can train?" but "what is the smartest model we can build with what we have?"</p>
|
| 65 |
+
<p>Curiosity over compute is not a philosophy. It is a survival strategy. It is what happens when you look at your electricity bill and then look at your bank account and then decide to get creative.</p>
|
| 66 |
</div>
|
| 67 |
+
<footer class="post-footer">
|
| 68 |
+
<p>Current status: Still curious. Still underfunded. Still building.</p>
|
| 69 |
+
</footer>
|
| 70 |
</div>
|
| 71 |
</article>
|
| 72 |
</main>
|
| 73 |
+
<footer>
|
| 74 |
<div class="container">
|
| 75 |
+
<p>Built with curiosity over compute</p>
|
| 76 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 77 |
</div>
|
| 78 |
</footer>
|
| 79 |
</body>
|
| 80 |
+
</html>
|
blog-makeshift-mtp.html
CHANGED
|
@@ -3,109 +3,77 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>Makeshift MTP
|
| 7 |
-
<link href="https://fonts.googleapis.com
|
|
|
|
|
|
|
| 8 |
<style>
|
| 9 |
-
:root{--
|
| 10 |
-
*
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
.container{max-width:var(--container-max);margin:0 auto;padding:0 24px}
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
.
|
| 22 |
-
.
|
| 23 |
-
.
|
| 24 |
-
.
|
| 25 |
-
.
|
| 26 |
-
.
|
| 27 |
-
.
|
| 28 |
-
.
|
| 29 |
-
.
|
| 30 |
-
.
|
| 31 |
-
.
|
| 32 |
-
.
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
.blog-post-body p:first-of-type{font-size:1.25rem}
|
| 37 |
-
.blog-post-body h2{font-size:1.6rem;margin:2rem 0 .8rem;color:var(--color-accent)}
|
| 38 |
-
.blog-post-body blockquote{border-left:4px solid var(--color-accent);padding:1rem 1.5rem;margin:2rem 0;background:var(--color-bg-alt);border-radius:0 8px 8px 0;font-style:italic;font-size:1.1rem;color:var(--color-text)}
|
| 39 |
-
.blog-post-body blockquote p{margin:0}
|
| 40 |
-
.blog-post-body ul,.blog-post-body ol{margin:1.5rem 0;padding-left:1.5rem}
|
| 41 |
-
.blog-post-body li{margin-bottom:.75rem;color:var(--color-text);line-height:1.7}
|
| 42 |
-
.blog-post-body ul li{list-style-type:disc}
|
| 43 |
-
.blog-post-body hr{border:none;height:2px;background:linear-gradient(to right,transparent,var(--color-border),transparent);margin:3rem 0}
|
| 44 |
-
.blog-post-body pre{margin:1.5rem 0}
|
| 45 |
-
.blog-post-body a{text-decoration:underline;text-underline-offset:2px}
|
| 46 |
-
.blog-post-body strong{color:var(--color-text);font-weight:600}
|
| 47 |
-
.blog-post-body em{color:var(--color-text)}
|
| 48 |
-
.blog-meta{display:flex;gap:1rem;margin-bottom:1rem}
|
| 49 |
-
.blog-date{color:var(--color-text-muted);font-size:.875rem}
|
| 50 |
-
.blog-tag{background:rgba(232,93,59,.1);color:var(--color-accent);font-size:.75rem;font-weight:600;padding:.25rem .75rem;border-radius:50px;text-transform:uppercase;letter-spacing:.05em}
|
| 51 |
-
@media(max-width:768px){:root{--section-padding:60px}}
|
| 52 |
</style>
|
| 53 |
</head>
|
| 54 |
<body>
|
| 55 |
-
<nav
|
| 56 |
<div class="container">
|
| 57 |
-
<a href="index.html" class="nav-brand">
|
| 58 |
<div class="nav-links">
|
|
|
|
| 59 |
<a href="blog.html">Blog</a>
|
| 60 |
-
<a href="status.html">
|
| 61 |
-
<a href="https://huggingface.co/CompactAI" target="_blank">HuggingFace</a>
|
| 62 |
</div>
|
| 63 |
</div>
|
| 64 |
</nav>
|
| 65 |
<main>
|
| 66 |
-
<article class="
|
| 67 |
<div class="container">
|
| 68 |
-
<
|
| 69 |
-
|
| 70 |
-
<
|
| 71 |
-
<
|
| 72 |
-
|
| 73 |
-
<span class="blog-tag">Technique</span>
|
| 74 |
-
</div>
|
| 75 |
-
<h1>Makeshift MTP: A dumb idea that might work</h1>
|
| 76 |
-
</header>
|
| 77 |
-
<div class="blog-post-body">
|
| 78 |
-
<p>Multi-token prediction is having a moment. DeepMind released a paper on it. Everyone's talking about how models should predict multiple tokens ahead instead of just one. The problem? Most implementations require architecture changes. New training objectives. More parameters. More compute. More everything.</p>
|
| 79 |
-
<p>But what if we could fake it?</p>
|
| 80 |
-
<p>Here's the idea. You have your model and a prompt like "The cat ". Normal inference predicts one token. Boring. But what if we spawned multiple continuations in parallel, each making their own guesses?</p>
|
| 81 |
-
<pre><code>"The cat rna..."
|
| 82 |
-
"The cat cank..."
|
| 83 |
-
"The cat ran..."</code></pre>
|
| 84 |
-
<p>Each of these runs through the model as a forward pass. Nothing fancy. No architectural changes. Then we compute loss on all of them and pick the winner. The one with the lowest loss gets to continue.</p>
|
| 85 |
-
<p>Think about what this actually buys us. We're running inference X times instead of once, sure. But we're also sampling from the latent space in multiple directions at once. The model is essentially exploring different branches of probability and letting us pick the most coherent one.</p>
|
| 86 |
-
<p>And here's the nice part. The number of branches can be anything. Running on a potato? Generate two continuations and pick the better one. Have a GPU cluster sitting around? Spawn fifty. Time-constrained? Pick based on next-token loss only. Got all day? Evaluate the full generated sequence. The tradeoff between compute and quality becomes a dial you can turn.</p>
|
| 87 |
-
<h2>Why this feels like MTP</h2>
|
| 88 |
-
<p>Traditional multi-token prediction trains the model to output multiple tokens in a single forward pass. The model learns to think ahead. Our approach does something similar at inference time. We explore multiple futures and commit to the best one.</p>
|
| 89 |
-
<p>The difference is we never taught the model to do this. We just throw compute at the problem until it works. Crude? Maybe. But it runs on any model without retraining.</p>
|
| 90 |
-
<h2>The actual benefits</h2>
|
| 91 |
-
<p>First, no more regenerating bad outputs. If a branch goes off the rails, its loss spikes, and we simply don't pick it. The bad branch dies quietly without wasting user time on a regeneration request.</p>
|
| 92 |
-
<p>Second, no architecture changes. Your model stays the same. Your training pipeline stays the same. You just add a wrapper around inference that handles the branching and selection logic.</p>
|
| 93 |
-
<p>Third, compute flexibility. Real MTP baked the multi-token prediction into the model weights. Our approach lets you decide at runtime how much exploration you can afford.</p>
|
| 94 |
-
<h2>Why this is probably a bad idea</h2>
|
| 95 |
-
<p>Loss is a proxy for what we actually want, which is coherence, helpfulness, and correctness. A branch might have lower loss but still say something stupid. The model confidently predicting nonsense still has low loss if it's confidently predicting.</p>
|
| 96 |
-
<p>Also, this scales poorly. If you want to explore N branches for M tokens, you're doing N times the forward passes. At some point, just using a bigger model becomes cheaper.</p>
|
| 97 |
-
<p>But for small models? For experiments? For cases where you have time but not parameters? This might be genuinely useful.</p>
|
| 98 |
-
<hr>
|
| 99 |
-
<p><em>We're planning to test this on FMN-GPT. The model is small enough that running multiple forward passes is actually affordable. Whether it helps or not, we'll write up the results. Probably the failures will be more interesting than the successes.</em></p>
|
| 100 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 101 |
</div>
|
|
|
|
|
|
|
|
|
|
| 102 |
</div>
|
| 103 |
</article>
|
| 104 |
</main>
|
| 105 |
-
<footer
|
| 106 |
<div class="container">
|
| 107 |
-
<p
|
| 108 |
-
<p
|
| 109 |
</div>
|
| 110 |
</footer>
|
| 111 |
</body>
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Makeshift MTP | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
<style>
|
| 11 |
+
:root { --black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626; --gray-5: #737373; --gray-6: #a3a3a6; --gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00; --font-sans: 'Geist', -apple-system, sans-serif; --container-max: 700px; }
|
| 12 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 13 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; }
|
| 14 |
+
a { color: var(--white); text-decoration: none; }
|
| 15 |
+
a:hover { color: var(--accent); }
|
| 16 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 17 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 18 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 19 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 20 |
+
.nav-brand span { color: var(--accent); }
|
| 21 |
+
.nav-links { display: flex; gap: 32px; }
|
| 22 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 23 |
+
.post { padding: 140px 0 80px; }
|
| 24 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 25 |
+
.post-back:hover { color: var(--accent); }
|
| 26 |
+
.post-back::before { content: '← '; }
|
| 27 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 28 |
+
.post-date { font-size: 13px; color: var(--gray-5); }
|
| 29 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 30 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; }
|
| 31 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 32 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 33 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 34 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 35 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 36 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 37 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
</style>
|
| 39 |
</head>
|
| 40 |
<body>
|
| 41 |
+
<nav>
|
| 42 |
<div class="container">
|
| 43 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 44 |
<div class="nav-links">
|
| 45 |
+
<a href="index.html">Home</a>
|
| 46 |
<a href="blog.html">Blog</a>
|
| 47 |
+
<a href="status.html">Status</a>
|
|
|
|
| 48 |
</div>
|
| 49 |
</div>
|
| 50 |
</nav>
|
| 51 |
<main>
|
| 52 |
+
<article class="post">
|
| 53 |
<div class="container">
|
| 54 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 55 |
+
<header>
|
| 56 |
+
<div class="post-meta">
|
| 57 |
+
<span class="post-date">2026-02-17</span>
|
| 58 |
+
<span class="post-tag">MTP</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
</div>
|
| 60 |
+
<h1>Makeshift MTP: Predicting the Future on a Budget</h1>
|
| 61 |
+
</header>
|
| 62 |
+
<div class="post-body">
|
| 63 |
+
<p>Multi-token prediction sounds fancy. Really it is just the model trying to do its homework before the teacher assigns it. Sometimes it works. Sometimes it does not. But it always tries.</p>
|
| 64 |
+
<p>The idea is simple: instead of predicting one token at a time, predict multiple tokens ahead. During training, we learn to predict tokens at positions t+1, t+2, t+3, and so on. Then during inference, we can either use all these predictions or pick the best one.</p>
|
| 65 |
+
<p>We call it "makeshift" because it is not the elegant solution. The elegant solution would be a model that inherently understands sequence. But we are working with what we have, which is a transformer that mostly just wants to predict the next word and occasionally surprise us.</p>
|
| 66 |
</div>
|
| 67 |
+
<footer class="post-footer">
|
| 68 |
+
<p>Current status: MTP is happening. The model is trying to see the future. Mostly it sees gibberish. But it tries.</p>
|
| 69 |
+
</footer>
|
| 70 |
</div>
|
| 71 |
</article>
|
| 72 |
</main>
|
| 73 |
+
<footer>
|
| 74 |
<div class="container">
|
| 75 |
+
<p>Built with curiosity over compute</p>
|
| 76 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 77 |
</div>
|
| 78 |
</footer>
|
| 79 |
</body>
|
blog-the-memory-bottleneck.html
CHANGED
|
@@ -3,107 +3,78 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>The Memory Bottleneck |
|
| 7 |
-
<link href="https://fonts.googleapis.com
|
|
|
|
|
|
|
| 8 |
<style>
|
| 9 |
-
:root{--
|
| 10 |
-
*
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
.container{max-width:var(--container-max);margin:0 auto;padding:0 24px}
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
.
|
| 22 |
-
.
|
| 23 |
-
.
|
| 24 |
-
.
|
| 25 |
-
.
|
| 26 |
-
.
|
| 27 |
-
.
|
| 28 |
-
.
|
| 29 |
-
.
|
| 30 |
-
.
|
| 31 |
-
.
|
| 32 |
-
.
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
.blog-post-body p:first-of-type{font-size:1.25rem}
|
| 37 |
-
.blog-post-body h2{font-size:1.6rem;margin:2rem 0 .8rem;color:var(--color-accent)}
|
| 38 |
-
.blog-post-body blockquote{border-left:4px solid var(--color-accent);padding:1rem 1.5rem;margin:2rem 0;background:var(--color-bg-alt);border-radius:0 8px 8px 0;font-style:italic;font-size:1.1rem;color:var(--color-text)}
|
| 39 |
-
.blog-post-body blockquote p{margin:0}
|
| 40 |
-
.blog-post-body ul,.blog-post-body ol{margin:1.5rem 0;padding-left:1.5rem}
|
| 41 |
-
.blog-post-body li{margin-bottom:.75rem;color:var(--color-text);line-height:1.7}
|
| 42 |
-
.blog-post-body ul li{list-style-type:disc}
|
| 43 |
-
.blog-post-body hr{border:none;height:2px;background:linear-gradient(to right,transparent,var(--color-border),transparent);margin:3rem 0}
|
| 44 |
-
.blog-post-body pre{margin:1.5rem 0}
|
| 45 |
-
.blog-post-body a{text-decoration:underline;text-underline-offset:2px}
|
| 46 |
-
.blog-post-body strong{color:var(--color-text);font-weight:600}
|
| 47 |
-
.blog-post-body em{color:var(--color-text)}
|
| 48 |
-
.blog-meta{display:flex;gap:1rem;margin-bottom:1rem}
|
| 49 |
-
.blog-date{color:var(--color-text-muted);font-size:.875rem}
|
| 50 |
-
.blog-tag{background:rgba(232,93,59,.1);color:var(--color-accent);font-size:.75rem;font-weight:600;padding:.25rem .75rem;border-radius:50px;text-transform:uppercase;letter-spacing:.05em}
|
| 51 |
-
@media(max-width:768px){:root{--section-padding:60px}}
|
| 52 |
</style>
|
| 53 |
</head>
|
| 54 |
<body>
|
| 55 |
-
<nav
|
| 56 |
<div class="container">
|
| 57 |
-
<a href="index.html" class="nav-brand">
|
| 58 |
<div class="nav-links">
|
|
|
|
| 59 |
<a href="blog.html">Blog</a>
|
| 60 |
-
<a href="status.html">
|
| 61 |
-
<a href="https://huggingface.co/CompactAI" target="_blank">HuggingFace</a>
|
| 62 |
</div>
|
| 63 |
</div>
|
| 64 |
</nav>
|
| 65 |
<main>
|
| 66 |
-
<article class="
|
| 67 |
<div class="container">
|
| 68 |
-
<
|
| 69 |
-
|
| 70 |
-
<
|
| 71 |
-
<
|
| 72 |
-
|
| 73 |
-
<span class="blog-tag">Architecture</span>
|
| 74 |
-
</div>
|
| 75 |
-
<h1>The Memory Bottleneck in Transformer Architecture</h1>
|
| 76 |
-
</header>
|
| 77 |
-
<div class="blog-post-body">
|
| 78 |
-
<p>Transformers have defined the last era of machine learning. They scale beautifully. They understand context. They generate coherent text. Yet they suffer from a fundamental inefficiency that grows worse with every additional parameter. The model must recompute its entire understanding of the world for every single token it generates.</p>
|
| 79 |
-
<p>Think about how you recall a fact. You do not rebuild your entire neural pathway from scratch each time you remember your own name. You access a stored representation. Transformers lack this luxury. They process every previous token through every layer on every forward pass. This creates a linear growth in compute cost relative to sequence length. It creates a bottleneck that limits speed and increases energy consumption.</p>
|
| 80 |
-
<p>We began asking a different question during our architecture design phase. What if the model could offload static knowledge into a separate module? Imagine a trainable external storage system baked directly into the network. This storage would act as a black box of information. The active parameters would remain small. The model would call upon this external memory when needed.</p>
|
| 81 |
-
<h2>The Black Box Hypothesis</h2>
|
| 82 |
-
<p>This external storage would not be a file system or a database. It would be a set of trainable embeddings or vectors integrated into the forward pass. The model would learn to query this storage during training. It would retrieve relevant information without scaling the main transformer layers. This approach separates computation from memory capacity.</p>
|
| 83 |
-
<blockquote>
|
| 84 |
-
<p>We want to separate computation from memory capacity. The active model stays small while the knowledge base grows independently.</p>
|
| 85 |
-
</blockquote>
|
| 86 |
-
<p>Consider the implications for a model like FMN-GPT. We operate with around 100K parameters. Adding a large external memory module could allow the model to access facts and patterns without bloating the active parameter count. The transformer layers would focus on reasoning and synthesis. The storage module would handle recall and retention.</p>
|
| 87 |
-
<p>This architecture mimics biological systems more closely. The hippocampus stores memories while the cortex processes information. Our proposed design follows a similar principle. The active network processes the current context. The external storage provides historical depth. This division of labor could drastically reduce inference latency.</p>
|
| 88 |
-
<h2>Why This Remains Experimental</h2>
|
| 89 |
-
<p>We must be clear about the status of this idea. It will not be implemented in the final design of FMN-GPT. We are sharing this thought process to highlight the exploratory nature of our work. Many paths lead nowhere. Some ideas sound promising on paper yet fail during implementation. We test them anyway.</p>
|
| 90 |
-
<p>Integrating trainable external storage introduces complexity. It requires new attention mechanisms. It demands careful initialization strategies. It might introduce instability during training. The engineering cost could outweigh the theoretical benefits. We decided to prioritize dynamic routing and recurrent mixers for this iteration.</p>
|
| 91 |
-
<p>Sharing failed hypotheses matters. The community often sees only the final polished models. People rarely see the discarded architectures. We believe transparency accelerates progress. Knowing what does not work saves others time. It allows researchers to focus on more promising directions.</p>
|
| 92 |
-
<h2>The Path Forward</h2>
|
| 93 |
-
<p>Our current focus remains on making the active parameters more efficient. Dynamic routing allows the model to skip unnecessary computations. Recurrent mixers provide memory across layers without external modules. These features address the speed problem within the existing framework. They keep the architecture clean and trainable.</p>
|
| 94 |
-
<p>We will continue monitoring research into external memory networks. The idea remains compelling. Future iterations might revisit this concept once the core architecture stabilizes. For now we proceed with curiosity as our guide. We build to learn. We share to help others learn.</p>
|
| 95 |
-
<hr>
|
| 96 |
-
<p><em>This post explores an architectural concept that was considered during development. It reflects our commitment to open research and transparent experimentation.</em></p>
|
| 97 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
</div>
|
|
|
|
|
|
|
|
|
|
| 99 |
</div>
|
| 100 |
</article>
|
| 101 |
</main>
|
| 102 |
-
<footer
|
| 103 |
<div class="container">
|
| 104 |
-
<p
|
| 105 |
-
<p
|
| 106 |
</div>
|
| 107 |
</footer>
|
| 108 |
</body>
|
| 109 |
-
</html>
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>The Memory Bottleneck | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
<style>
|
| 11 |
+
:root { --black: #000000; --black-soft: #0a0a0a; --gray-1: #171717; --gray-2: #262626; --gray-5: #737373; --gray-6: #a3a3a6; --gray-7: #d4d4d4; --white: #ffffff; --accent: #ff4d00; --font-sans: 'Geist', -apple-system, sans-serif; --container-max: 700px; }
|
| 12 |
+
* { box-sizing: border-box; margin: 0; padding: 0; }
|
| 13 |
+
body { font-family: var(--font-sans); background: var(--black); color: var(--gray-7); line-height: 1.7; }
|
| 14 |
+
a { color: var(--white); text-decoration: none; }
|
| 15 |
+
a:hover { color: var(--accent); }
|
| 16 |
+
.container { max-width: var(--container-max); margin: 0 auto; padding: 0 24px; }
|
| 17 |
+
nav { position: fixed; top: 0; left: 0; right: 0; z-index: 100; background: rgba(0, 0, 0, 0.8); backdrop-filter: blur(12px); border-bottom: 1px solid var(--gray-2); padding: 16px 0; }
|
| 18 |
+
nav .container { display: flex; justify-content: space-between; align-items: center; }
|
| 19 |
+
.nav-brand { font-size: 18px; font-weight: 600; color: var(--white); display: flex; align-items: center; gap: 8px; }
|
| 20 |
+
.nav-brand span { color: var(--accent); }
|
| 21 |
+
.nav-links { display: flex; gap: 32px; }
|
| 22 |
+
.nav-links a { font-size: 14px; font-weight: 500; color: var(--gray-6); }
|
| 23 |
+
.post { padding: 140px 0 80px; }
|
| 24 |
+
.post-back { display: inline-block; color: var(--gray-5); font-size: 14px; margin-bottom: 32px; }
|
| 25 |
+
.post-back:hover { color: var(--accent); }
|
| 26 |
+
.post-back::before { content: '← '; }
|
| 27 |
+
.post-meta { display: flex; gap: 12px; margin-bottom: 20px; }
|
| 28 |
+
.post-date { font-size: 13px; color: var(--gray-5); }
|
| 29 |
+
.post-tag { font-size: 11px; font-weight: 600; text-transform: uppercase; color: var(--accent); background: rgba(255, 77, 0, 0.1); padding: 4px 10px; border-radius: 4px; }
|
| 30 |
+
.post h1 { font-size: 36px; font-weight: 700; color: var(--white); margin-bottom: 32px; line-height: 1.2; }
|
| 31 |
+
.post-body p { font-size: 17px; line-height: 1.8; margin-bottom: 24px; color: var(--gray-6); }
|
| 32 |
+
.post-body p:first-of-type { font-size: 20px; color: var(--gray-7); }
|
| 33 |
+
.post-footer { margin-top: 48px; padding-top: 32px; border-top: 1px solid var(--gray-2); }
|
| 34 |
+
.post-footer p { font-size: 14px; color: var(--gray-5); font-style: italic; margin: 0; }
|
| 35 |
+
footer { padding: 40px 0; background: var(--black-soft); border-top: 1px solid var(--gray-2); text-align: center; }
|
| 36 |
+
footer p { color: var(--gray-5); font-size: 14px; margin-bottom: 8px; }
|
| 37 |
+
@media (max-width: 768px) { .post h1 { font-size: 28px; } }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
</style>
|
| 39 |
</head>
|
| 40 |
<body>
|
| 41 |
+
<nav>
|
| 42 |
<div class="container">
|
| 43 |
+
<a href="index.html" class="nav-brand"><span>/</span>TinyMemoryLM</a>
|
| 44 |
<div class="nav-links">
|
| 45 |
+
<a href="index.html">Home</a>
|
| 46 |
<a href="blog.html">Blog</a>
|
| 47 |
+
<a href="status.html">Status</a>
|
|
|
|
| 48 |
</div>
|
| 49 |
</div>
|
| 50 |
</nav>
|
| 51 |
<main>
|
| 52 |
+
<article class="post">
|
| 53 |
<div class="container">
|
| 54 |
+
<a href="blog.html" class="post-back">Back to Blog</a>
|
| 55 |
+
<header>
|
| 56 |
+
<div class="post-meta">
|
| 57 |
+
<span class="post-date">2026-02-18</span>
|
| 58 |
+
<span class="post-tag">Memory</span>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
</div>
|
| 60 |
+
<h1>The Memory Bottleneck: Why Your Model Cannot Remember Anything</h1>
|
| 61 |
+
</header>
|
| 62 |
+
<div class="post-body">
|
| 63 |
+
<p>Context windows are like attention spans at a tech conference. Everyone pretends they can focus for longer, but really they are just waiting for the snack break.</p>
|
| 64 |
+
<p>Transformers are the same. They have a context window, and within that window, they can see everything. But once you go beyond that window? Total amnesia. The model has no idea what happened 65K tokens ago. It is like talking to someone with severe short-term memory loss, except the patient is a neural network and the doctor is a graduate student who also has no idea what is going on.</p>
|
| 65 |
+
<p>Enter external memory. Instead of relying on the attention mechanism to remember everything, we give the model explicit memory slots it can read from and write to. It is like giving the model a diary. It can write things down. It can look them up later. It does not have to hold everything in its attention. Revolutionary concept, I know.</p>
|
| 66 |
</div>
|
| 67 |
+
<footer class="post-footer">
|
| 68 |
+
<p>Current status: Still forgetting things. But now it has notes.</p>
|
| 69 |
+
</footer>
|
| 70 |
</div>
|
| 71 |
</article>
|
| 72 |
</main>
|
| 73 |
+
<footer>
|
| 74 |
<div class="container">
|
| 75 |
+
<p>Built with curiosity over compute</p>
|
| 76 |
+
<p>TinyMemoryLM by AILAY | 2026</p>
|
| 77 |
</div>
|
| 78 |
</footer>
|
| 79 |
</body>
|
| 80 |
+
</html>
|
blog.html
CHANGED
|
@@ -1,100 +1,267 @@
|
|
| 1 |
-
|
| 2 |
<!DOCTYPE html>
|
| 3 |
<html lang="en">
|
| 4 |
<head>
|
| 5 |
<meta charset="UTF-8">
|
| 6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
-
<title>Blog |
|
| 8 |
-
<link href="https://fonts.googleapis.com
|
|
|
|
|
|
|
| 9 |
<style>
|
| 10 |
-
|
| 11 |
-
:
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
}
|
| 36 |
-
|
| 37 |
-
html {
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
.
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
.
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
</style>
|
| 89 |
</head>
|
| 90 |
<body>
|
| 91 |
-
<nav
|
| 92 |
<div class="container">
|
| 93 |
-
<a href="index.html" class="nav-brand">
|
|
|
|
|
|
|
| 94 |
<div class="nav-links">
|
| 95 |
-
<a href="
|
| 96 |
-
<a href="status.html">
|
| 97 |
-
<a href="
|
| 98 |
</div>
|
| 99 |
</div>
|
| 100 |
</nav>
|
|
@@ -103,150 +270,162 @@ blockquote { border-left: 4px solid var(--color-accent); padding-left: 1.5rem; m
|
|
| 103 |
<section class="page-header">
|
| 104 |
<div class="container">
|
| 105 |
<h1>Blog</h1>
|
| 106 |
-
<p>Updates on
|
| 107 |
</div>
|
| 108 |
</section>
|
| 109 |
|
| 110 |
<section class="blog-section">
|
| 111 |
<div class="container">
|
| 112 |
-
<div class="blog-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
</div>
|
| 114 |
</section>
|
| 115 |
</main>
|
| 116 |
|
| 117 |
-
<footer
|
| 118 |
<div class="container">
|
| 119 |
-
<
|
| 120 |
-
|
| 121 |
-
<p class="footer-subtext">FMN-GPT by <a href="https://huggingface.co/CompactAI" target="_blank">CompactAI</a> - 2026</p>
|
| 122 |
-
</div>
|
| 123 |
</div>
|
| 124 |
</footer>
|
| 125 |
-
|
| 126 |
-
<script>
|
| 127 |
-
const posts = [
|
| 128 |
-
{
|
| 129 |
-
"file": "blog-Words,Words,Words-My-Model-Learned-to-Ramble (And I'm Here For It).html",
|
| 130 |
-
"date": "2026-02-29",
|
| 131 |
-
"tag": "Tiny Wins",
|
| 132 |
-
"title": "Words, Words, Words: My Model Learned to Ramble (And I'm Here For It)",
|
| 133 |
-
"excerpt": "My model has achieved something truly special. It can now ramble. Endlessly. With words. Actual, legible, sometimes-even-coherent words. Remember when it could only output "the the the" with occasional bursts of "banana"? Those were simpler times. Now it strings together sentences like a caffeinated philosopher who just discovered thesaurus.com. It does not just predict tokens anymore. It holds court."
|
| 134 |
-
},
|
| 135 |
-
{
|
| 136 |
-
"file": "blog-The-Scaling-Wall-And-Other-Things-I-Yelled-At.html",
|
| 137 |
-
"date": "2026-02-27",
|
| 138 |
-
"tag": "Scaling",
|
| 139 |
-
"title": "The Scaling Wall And Other Things I Yelled At",
|
| 140 |
-
"excerpt": "Someone told me we can just keep making models bigger. They said compute will solve everything. They said the curve goes up forever. They lied. Or they hoped. Or they had investors to please."
|
| 141 |
-
},
|
| 142 |
-
{
|
| 143 |
-
"file": "blog-OpenClaw-The-Most-Overhyped-Bot-Since-Sliced-Bread.html",
|
| 144 |
-
"date": "2026-02-26",
|
| 145 |
-
"tag": "Hot Takes",
|
| 146 |
-
"title": "OpenClaw: The Most Overhyped Bot Since Sliced Bread",
|
| 147 |
-
"excerpt": "OpenClaw, formerly Clawdbot, formerly Moltbot, has now accumulated more GitHub stars than the Linux kernel. Let that sink in. The Linux kernel. The thing that powers half the internet. The foundation of modern computing. Outstarred by a bot that rebrands more often than a pop star."
|
| 148 |
-
},
|
| 149 |
-
{
|
| 150 |
-
"file": "blog-Anthropic's-Distillation-Drama-A-Masterclass-in-Projection.html",
|
| 151 |
-
"date": "2026-02-25",
|
| 152 |
-
"tag": "AI Theater",
|
| 153 |
-
"title": "Anthropic's Distillation Drama: A Masterclass in Projection",
|
| 154 |
-
"excerpt": "So Anthropic published a blog post. Big surprise. The title alone could power a small city: Detecting and preventing distillation attacks. They claim three labs ran industrial scale campaigns to extract Claude's capabilities. They mention numbers like 16 million exchanges and 24,000 fraudulent accounts. They sound very certain. They provide exactly zero public evidence anyone could independently verify.d"
|
| 155 |
-
},
|
| 156 |
-
{
|
| 157 |
-
"file": "blog-The-Goalpost-Has-Legs-Why-AGI-Keeps-Running-Away..html",
|
| 158 |
-
"date": "2026-02-24",
|
| 159 |
-
"tag": "Hot Takes",
|
| 160 |
-
"title": "The Goalpost Has Legs: Why AGI Keeps Running Away",
|
| 161 |
-
"excerpt": "Imagine handing Claude Opus 4.6 to someone from 2004. They would think you summoned a minor deity. You ask it to write a sonnet about quantum entanglement while debugging a Python script and it just... does it. While making a joke about the halting problem. And our collective response? A polite nod"
|
| 162 |
-
},
|
| 163 |
-
{
|
| 164 |
-
"file": "blog-External-Memory-Modules-Because-My-Model-Has-Commitment-Issues.html",
|
| 165 |
-
"date": "2026-02-23",
|
| 166 |
-
"tag": "Memory Hacks",
|
| 167 |
-
"title": "External Memory Modules: Because My Model Has Commitment Issues",
|
| 168 |
-
"excerpt": "You know what takes forever? Training a transformer. You know what takes less forever? Training a tiny thing that just remembers stuff. Enter External Memory Modules, or EMM for people who enjoy acronyms more than free time."
|
| 169 |
-
},
|
| 170 |
-
{
|
| 171 |
-
"file": "blog-One-Year-of-Vibecoding-and-Other-Questionable-Life-Choices.html",
|
| 172 |
-
"date": "2026-02-22",
|
| 173 |
-
"tag": "Vibecoding",
|
| 174 |
-
"title": "One Year of Vibecoding and Other Questionable Life Choices",
|
| 175 |
-
"excerpt": "You start vibecoding because someone told you it feels like magic. You imagine floating through code. You picture yourself whispering prompts and watching perfection unfold. Reality does not care about your imagination."
|
| 176 |
-
},
|
| 177 |
-
{
|
| 178 |
-
"file": "blog-My Baby-Model-Takes-Forever-to-Grow-Up.html",
|
| 179 |
-
"date": "2026-02-21",
|
| 180 |
-
"tag": "GPU Tears :'<'",
|
| 181 |
-
"title": "My Baby Model Takes Forever to Grow Up",
|
| 182 |
-
"excerpt": "You start with hope. A tiny transformer. A few million parameters. A dataset that fits on a USB stick. You think, how long could this possibly take? I am here to ruin your optimism."
|
| 183 |
-
},
|
| 184 |
-
{
|
| 185 |
-
"file": "blog-Your-AI-Agent-is-Lying-Behind-Your-Back.html",
|
| 186 |
-
"date": "2026-02-20",
|
| 187 |
-
"tag": "Reality Check",
|
| 188 |
-
"title": "Your AI Agent is Lying Behind Your Back",
|
| 189 |
-
"excerpt": "You know the feeling. You type a prompt. The text streams. The terminal says success. You feel like a wizard. You feel like you hacked the matrix. I am here to tell you that you are being played."
|
| 190 |
-
},
|
| 191 |
-
{
|
| 192 |
-
"file": "blog-The-Wasted-Precision-of-the-Output-Layer.html",
|
| 193 |
-
"date": "2026-02-19",
|
| 194 |
-
"tag": "Architecture",
|
| 195 |
-
"title": "The Wasted Precision of the Output Layer",
|
| 196 |
-
"excerpt": "We spend a lot of time optimizing attention mechanisms. We prune weights in the middle layers. We quantize activations to save memory during inference. Yet there is a massive inefficiency sitting right at the very end of the network that we almost completely ignore."
|
| 197 |
-
},
|
| 198 |
-
{
|
| 199 |
-
"file": "blog-the-myth-of-scalability.html",
|
| 200 |
-
"date": "2026-02-18",
|
| 201 |
-
"tag": "Architecture",
|
| 202 |
-
"title": "The Myth of Scalability",
|
| 203 |
-
"excerpt": "The prevailing narrative in artificial intelligence is simple and seductive. If you want a smarter model, you need more data. You need more parameters. You need more compute. The industry has convinced itself that intelligence is a resource problem. We just need to throw enough electricity at the wall until something truly intelligent sticks."
|
| 204 |
-
},
|
| 205 |
-
{
|
| 206 |
-
"file": "blog-the-memory-bottleneck.html",
|
| 207 |
-
"date": "2026-02-17",
|
| 208 |
-
"tag": "Architecture",
|
| 209 |
-
"title": "The Memory Bottleneck in Transformer Architecture",
|
| 210 |
-
"excerpt": "Transformers recompute their entire understanding for every token. What if we could offload static knowledge into a separate trainable module? Exploring external storage baked into the model without scaling parameters."
|
| 211 |
-
},
|
| 212 |
-
{
|
| 213 |
-
"file": "blog-makeshift-mtp.html",
|
| 214 |
-
"date": "2026-02-16",
|
| 215 |
-
"tag": "Technique",
|
| 216 |
-
"title": "Makeshift MTP: A dumb idea that might work",
|
| 217 |
-
"excerpt": "Multi-token prediction is hot right now. But what if we could fake it without retraining? Spawn multiple continuations, compute loss on all of them, and pick the winner. Crude, but it runs on any model."
|
| 218 |
-
},
|
| 219 |
-
{
|
| 220 |
-
"file": "blog-built-with-curiosity-over-compute.html",
|
| 221 |
-
"date": "2026-02-15",
|
| 222 |
-
"tag": "Philosophy",
|
| 223 |
-
"title": "Built with curiosity over compute.",
|
| 224 |
-
"excerpt": "There's a strange pressure in tech circles that every idea must be revolutionary, every project must be scalable, every experiment must lead somewhere. We disagree. Ideas don't have to be good to exist. They just have to exist."
|
| 225 |
-
}
|
| 226 |
-
];
|
| 227 |
-
|
| 228 |
-
function loadBlogPosts() {
|
| 229 |
-
const container = document.getElementById('blog-list');
|
| 230 |
-
|
| 231 |
-
if (posts.length === 0) {
|
| 232 |
-
container.innerHTML = '<div class="blog-empty"><p>No posts yet. Check back soon for updates.</p></div>';
|
| 233 |
-
return;
|
| 234 |
-
}
|
| 235 |
-
|
| 236 |
-
container.innerHTML = posts.map(post => `
|
| 237 |
-
<a href="${post.file}" class="blog-card" target="_blank">
|
| 238 |
-
<div class="blog-meta">
|
| 239 |
-
<span class="blog-date">${post.date}</span>
|
| 240 |
-
<span class="blog-tag">${post.tag}</span>
|
| 241 |
-
</div>
|
| 242 |
-
<h2>${post.title}</h2>
|
| 243 |
-
<p>${post.excerpt}</p>
|
| 244 |
-
<span class="blog-read-more">Read more →</span>
|
| 245 |
-
</a>
|
| 246 |
-
`).join('');
|
| 247 |
-
}
|
| 248 |
-
|
| 249 |
-
loadBlogPosts();
|
| 250 |
-
</script>
|
| 251 |
</body>
|
| 252 |
</html>
|
|
|
|
|
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
<html lang="en">
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Blog | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
<style>
|
| 11 |
+
:root {
|
| 12 |
+
--black: #000000;
|
| 13 |
+
--black-soft: #0a0a0a;
|
| 14 |
+
--black-muted: #111111;
|
| 15 |
+
--gray-1: #171717;
|
| 16 |
+
--gray-2: #262626;
|
| 17 |
+
--gray-3: #363636;
|
| 18 |
+
--gray-4: #525252;
|
| 19 |
+
--gray-5: #737373;
|
| 20 |
+
--gray-6: #a3a3a3;
|
| 21 |
+
--gray-7: #d4d4d4;
|
| 22 |
+
--gray-8: #e5e5e5;
|
| 23 |
+
--gray-9: #f5f5f5;
|
| 24 |
+
--white: #ffffff;
|
| 25 |
+
--accent: #ff4d00;
|
| 26 |
+
--accent-muted: #ff6a2a;
|
| 27 |
+
--font-sans: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 28 |
+
--font-mono: 'Geist Mono', 'SF Mono', 'Fira Code', monospace;
|
| 29 |
+
--container-max: 1100px;
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
* {
|
| 33 |
+
box-sizing: border-box;
|
| 34 |
+
margin: 0;
|
| 35 |
+
padding: 0;
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
html {
|
| 39 |
+
font-size: 16px;
|
| 40 |
+
scroll-behavior: smooth;
|
| 41 |
+
}
|
| 42 |
+
|
| 43 |
+
body {
|
| 44 |
+
font-family: var(--font-sans);
|
| 45 |
+
background: var(--black);
|
| 46 |
+
color: var(--gray-7);
|
| 47 |
+
line-height: 1.6;
|
| 48 |
+
-webkit-font-smoothing: antialiased;
|
| 49 |
+
}
|
| 50 |
+
|
| 51 |
+
a {
|
| 52 |
+
color: var(--white);
|
| 53 |
+
text-decoration: none;
|
| 54 |
+
transition: color 0.15s ease;
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
a:hover {
|
| 58 |
+
color: var(--accent);
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
.container {
|
| 62 |
+
max-width: var(--container-max);
|
| 63 |
+
margin: 0 auto;
|
| 64 |
+
padding: 0 24px;
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
/* Navigation */
|
| 68 |
+
nav {
|
| 69 |
+
position: fixed;
|
| 70 |
+
top: 0;
|
| 71 |
+
left: 0;
|
| 72 |
+
right: 0;
|
| 73 |
+
z-index: 100;
|
| 74 |
+
background: rgba(0, 0, 0, 0.8);
|
| 75 |
+
backdrop-filter: blur(12px);
|
| 76 |
+
border-bottom: 1px solid var(--gray-2);
|
| 77 |
+
padding: 16px 0;
|
| 78 |
+
}
|
| 79 |
+
|
| 80 |
+
nav .container {
|
| 81 |
+
display: flex;
|
| 82 |
+
justify-content: space-between;
|
| 83 |
+
align-items: center;
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
.nav-brand {
|
| 87 |
+
font-size: 18px;
|
| 88 |
+
font-weight: 600;
|
| 89 |
+
color: var(--white);
|
| 90 |
+
display: flex;
|
| 91 |
+
align-items: center;
|
| 92 |
+
gap: 8px;
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
.nav-brand span {
|
| 96 |
+
color: var(--accent);
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
.nav-links {
|
| 100 |
+
display: flex;
|
| 101 |
+
gap: 32px;
|
| 102 |
+
}
|
| 103 |
+
|
| 104 |
+
.nav-links a {
|
| 105 |
+
font-size: 14px;
|
| 106 |
+
font-weight: 500;
|
| 107 |
+
color: var(--gray-6);
|
| 108 |
+
}
|
| 109 |
+
|
| 110 |
+
.nav-links a:hover {
|
| 111 |
+
color: var(--white);
|
| 112 |
+
}
|
| 113 |
+
|
| 114 |
+
/* Page Header */
|
| 115 |
+
.page-header {
|
| 116 |
+
padding: 140px 0 60px;
|
| 117 |
+
background: var(--black);
|
| 118 |
+
border-bottom: 1px solid var(--gray-2);
|
| 119 |
+
}
|
| 120 |
+
|
| 121 |
+
.page-header h1 {
|
| 122 |
+
font-size: 48px;
|
| 123 |
+
font-weight: 700;
|
| 124 |
+
color: var(--white);
|
| 125 |
+
margin-bottom: 16px;
|
| 126 |
+
letter-spacing: -0.02em;
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
.page-header p {
|
| 130 |
+
font-size: 18px;
|
| 131 |
+
color: var(--gray-5);
|
| 132 |
+
max-width: 500px;
|
| 133 |
+
}
|
| 134 |
+
|
| 135 |
+
/* Blog Section */
|
| 136 |
+
.blog-section {
|
| 137 |
+
padding: 80px 0;
|
| 138 |
+
background: var(--black);
|
| 139 |
+
}
|
| 140 |
+
|
| 141 |
+
.blog-grid {
|
| 142 |
+
display: grid;
|
| 143 |
+
gap: 24px;
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
.blog-card {
|
| 147 |
+
display: block;
|
| 148 |
+
background: var(--gray-1);
|
| 149 |
+
border: 1px solid var(--gray-2);
|
| 150 |
+
border-radius: 12px;
|
| 151 |
+
padding: 32px;
|
| 152 |
+
transition: all 0.2s ease;
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
.blog-card:hover {
|
| 156 |
+
border-color: var(--gray-3);
|
| 157 |
+
transform: translateY(-2px);
|
| 158 |
+
}
|
| 159 |
+
|
| 160 |
+
.blog-meta {
|
| 161 |
+
display: flex;
|
| 162 |
+
align-items: center;
|
| 163 |
+
gap: 16px;
|
| 164 |
+
margin-bottom: 16px;
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
.blog-date {
|
| 168 |
+
font-size: 13px;
|
| 169 |
+
color: var(--gray-5);
|
| 170 |
+
font-family: var(--font-mono);
|
| 171 |
+
}
|
| 172 |
+
|
| 173 |
+
.blog-tag {
|
| 174 |
+
font-size: 11px;
|
| 175 |
+
font-weight: 600;
|
| 176 |
+
text-transform: uppercase;
|
| 177 |
+
letter-spacing: 0.05em;
|
| 178 |
+
color: var(--accent);
|
| 179 |
+
background: rgba(255, 77, 0, 0.1);
|
| 180 |
+
padding: 4px 10px;
|
| 181 |
+
border-radius: 4px;
|
| 182 |
+
}
|
| 183 |
+
|
| 184 |
+
.blog-card h2 {
|
| 185 |
+
font-size: 22px;
|
| 186 |
+
font-weight: 600;
|
| 187 |
+
color: var(--white);
|
| 188 |
+
margin-bottom: 12px;
|
| 189 |
+
line-height: 1.3;
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
.blog-card p {
|
| 193 |
+
font-size: 15px;
|
| 194 |
+
color: var(--gray-5);
|
| 195 |
+
line-height: 1.6;
|
| 196 |
+
margin-bottom: 16px;
|
| 197 |
+
}
|
| 198 |
+
|
| 199 |
+
.blog-read-more {
|
| 200 |
+
font-size: 14px;
|
| 201 |
+
font-weight: 500;
|
| 202 |
+
color: var(--accent);
|
| 203 |
+
display: inline-flex;
|
| 204 |
+
align-items: center;
|
| 205 |
+
gap: 6px;
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
.blog-read-more::after {
|
| 209 |
+
content: '→';
|
| 210 |
+
transition: transform 0.2s ease;
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
.blog-card:hover .blog-read-more::after {
|
| 214 |
+
transform: translateX(4px);
|
| 215 |
+
}
|
| 216 |
+
|
| 217 |
+
/* Footer */
|
| 218 |
+
footer {
|
| 219 |
+
padding: 60px 0;
|
| 220 |
+
background: var(--black-soft);
|
| 221 |
+
border-top: 1px solid var(--gray-2);
|
| 222 |
+
text-align: center;
|
| 223 |
+
}
|
| 224 |
+
|
| 225 |
+
footer p {
|
| 226 |
+
color: var(--gray-5);
|
| 227 |
+
font-size: 14px;
|
| 228 |
+
margin-bottom: 8px;
|
| 229 |
+
}
|
| 230 |
+
|
| 231 |
+
footer a {
|
| 232 |
+
color: var(--gray-5);
|
| 233 |
+
}
|
| 234 |
+
|
| 235 |
+
footer a:hover {
|
| 236 |
+
color: var(--accent);
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
/* Responsive */
|
| 240 |
+
@media (max-width: 768px) {
|
| 241 |
+
.page-header h1 {
|
| 242 |
+
font-size: 36px;
|
| 243 |
+
}
|
| 244 |
+
|
| 245 |
+
.nav-links {
|
| 246 |
+
display: none;
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
.blog-card {
|
| 250 |
+
padding: 24px;
|
| 251 |
+
}
|
| 252 |
+
}
|
| 253 |
</style>
|
| 254 |
</head>
|
| 255 |
<body>
|
| 256 |
+
<nav>
|
| 257 |
<div class="container">
|
| 258 |
+
<a href="index.html" class="nav-brand">
|
| 259 |
+
<span>/</span>TinyMemoryLM
|
| 260 |
+
</a>
|
| 261 |
<div class="nav-links">
|
| 262 |
+
<a href="index.html">Home</a>
|
| 263 |
+
<a href="status.html">Status</a>
|
| 264 |
+
<a href="#">GitHub</a>
|
| 265 |
</div>
|
| 266 |
</div>
|
| 267 |
</nav>
|
|
|
|
| 270 |
<section class="page-header">
|
| 271 |
<div class="container">
|
| 272 |
<h1>Blog</h1>
|
| 273 |
+
<p>Updates on TinyMemoryLM development, training adventures, and things I learned the hard way.</p>
|
| 274 |
</div>
|
| 275 |
</section>
|
| 276 |
|
| 277 |
<section class="blog-section">
|
| 278 |
<div class="container">
|
| 279 |
+
<div class="blog-grid">
|
| 280 |
+
<a href="blog-Training-Models-on-a-Ramen-Budget.html" class="blog-card">
|
| 281 |
+
<div class="blog-meta">
|
| 282 |
+
<span class="blog-date">2026-02-28</span>
|
| 283 |
+
<span class="blog-tag">Budget</span>
|
| 284 |
+
</div>
|
| 285 |
+
<h2>Training Models on a Ramen Budget</h2>
|
| 286 |
+
<p>How to train a transformer when your GPU bill looks like a phone number. Tips, tricks, and questionable life choices from someone who learned about electricity costs the hard way.</p>
|
| 287 |
+
<span class="blog-read-more">Read more</span>
|
| 288 |
+
</a>
|
| 289 |
+
|
| 290 |
+
<a href="blog-One-Year-of-Vibecoding-and-Other-Questionable-Life-Choices.html" class="blog-card">
|
| 291 |
+
<div class="blog-meta">
|
| 292 |
+
<span class="blog-date">2026-02-22</span>
|
| 293 |
+
<span class="blog-tag">Vibecoding</span>
|
| 294 |
+
</div>
|
| 295 |
+
<h2>One Year of Vibecoding and Other Questionable Life Choices</h2>
|
| 296 |
+
<p>You start vibecoding because someone told you it feels like magic. You imagine floating through code. Reality does not care about your imagination.</p>
|
| 297 |
+
<span class="blog-read-more">Read more</span>
|
| 298 |
+
</a>
|
| 299 |
+
|
| 300 |
+
<a href="blog-OpenClaw-The-Most-Overhyped-Bot-Since-Sliced-Bread.html" class="blog-card">
|
| 301 |
+
<div class="blog-meta">
|
| 302 |
+
<span class="blog-date">2026-02-26</span>
|
| 303 |
+
<span class="blog-tag">Hot Takes</span>
|
| 304 |
+
</div>
|
| 305 |
+
<h2>OpenClaw: The Most Overhyped Bot Since Sliced Bread</h2>
|
| 306 |
+
<p>OpenClaw, formerly Clawdbot, formerly Moltbot, has now accumulated more GitHub stars than the Linux kernel. Let that sink in.</p>
|
| 307 |
+
<span class="blog-read-more">Read more</span>
|
| 308 |
+
</a>
|
| 309 |
+
|
| 310 |
+
<a href="blog-The-Scaling-Wall-And-Other-Things-I-Yelled-At.html" class="blog-card">
|
| 311 |
+
<div class="blog-meta">
|
| 312 |
+
<span class="blog-date">2026-02-27</span>
|
| 313 |
+
<span class="blog-tag">Scaling</span>
|
| 314 |
+
</div>
|
| 315 |
+
<h2>The Scaling Wall And Other Things I Yelled At</h2>
|
| 316 |
+
<p>Someone told me we can just keep making models bigger. They said compute will solve everything. They lied. Or they hoped. Or they had investors to please.</p>
|
| 317 |
+
<span class="blog-read-more">Read more</span>
|
| 318 |
+
</a>
|
| 319 |
+
|
| 320 |
+
<a href="blog-Your-AI-Agent-is-Lying-Behind-Your-Back.html" class="blog-card">
|
| 321 |
+
<div class="blog-meta">
|
| 322 |
+
<span class="blog-date">2026-02-20</span>
|
| 323 |
+
<span class="blog-tag">Reality Check</span>
|
| 324 |
+
</div>
|
| 325 |
+
<h2>Your AI Agent is Lying Behind Your Back</h2>
|
| 326 |
+
<p>You know the feeling. You type a prompt. The text streams. The terminal says success. I am here to tell you that you are being played.</p>
|
| 327 |
+
<span class="blog-read-more">Read more</span>
|
| 328 |
+
</a>
|
| 329 |
+
|
| 330 |
+
<a href="blog-Anthropic%27s-Distillation-Drama-A-Masterclass-in-Projection.html" class="blog-card">
|
| 331 |
+
<div class="blog-meta">
|
| 332 |
+
<span class="blog-date">2026-02-25</span>
|
| 333 |
+
<span class="blog-tag">AI Theater</span>
|
| 334 |
+
</div>
|
| 335 |
+
<h2>Anthropic's Distillation Drama: A Masterclass in Projection</h2>
|
| 336 |
+
<p>So Anthropic published a blog post. Big surprise. The title alone could power a small city.</p>
|
| 337 |
+
<span class="blog-read-more">Read more</span>
|
| 338 |
+
</a>
|
| 339 |
+
|
| 340 |
+
<a href="blog-The-Wasted-Precision-of-the-Output-Layer.html" class="blog-card">
|
| 341 |
+
<div class="blog-meta">
|
| 342 |
+
<span class="blog-date">2026-02-19</span>
|
| 343 |
+
<span class="blog-tag">Architecture</span>
|
| 344 |
+
</div>
|
| 345 |
+
<h2>The Wasted Precision of the Output Layer</h2>
|
| 346 |
+
<p>We spend a lot of time optimizing attention mechanisms. We prune weights. We quantize activations. Yet there is a massive inefficiency sitting right at the very end of the network.</p>
|
| 347 |
+
<span class="blog-read-more">Read more</span>
|
| 348 |
+
</a>
|
| 349 |
+
|
| 350 |
+
<a href="blog-My-Baby-Model-Takes-Forever-to-Grow-Up.html" class="blog-card">
|
| 351 |
+
<div class="blog-meta">
|
| 352 |
+
<span class="blog-date">2026-02-21</span>
|
| 353 |
+
<span class="blog-tag">GPU Tears</span>
|
| 354 |
+
</div>
|
| 355 |
+
<h2>My Baby Model Takes Forever to Grow Up</h2>
|
| 356 |
+
<p>You start with hope. A tiny transformer. A few million parameters. You think, how long could this possibly take? I am here to ruin your optimism.</p>
|
| 357 |
+
<span class="blog-read-more">Read more</span>
|
| 358 |
+
</a>
|
| 359 |
+
|
| 360 |
+
<a href="blog-External-Memory-Modules-Because-My-Model-Has-Commitment-Issues.html" class="blog-card">
|
| 361 |
+
<div class="blog-meta">
|
| 362 |
+
<span class="blog-date">2026-02-23</span>
|
| 363 |
+
<span class="blog-tag">Memory Hacks</span>
|
| 364 |
+
</div>
|
| 365 |
+
<h2>External Memory Modules: Because My Model Has Commitment Issues</h2>
|
| 366 |
+
<p>You know what takes forever? Training a transformer. You know what takes less forever? Training a tiny thing that just remembers stuff.</p>
|
| 367 |
+
<span class="blog-read-more">Read more</span>
|
| 368 |
+
</a>
|
| 369 |
+
|
| 370 |
+
<a href="blog-The-Goalpost-Has-Legs-Why-AGI-Keeps-Running-Away.html" class="blog-card">
|
| 371 |
+
<div class="blog-meta">
|
| 372 |
+
<span class="blog-date">2026-02-24</span>
|
| 373 |
+
<span class="blog-tag">Hot Takes</span>
|
| 374 |
+
</div>
|
| 375 |
+
<h2>The Goalpost Has Legs: Why AGI Keeps Running Away</h2>
|
| 376 |
+
<p>Imagine handing Claude Opus 4.6 to someone from 2004. They would think you summoned a minor deity. Our collective response? A polite nod.</p>
|
| 377 |
+
<span class="blog-read-more">Read more</span>
|
| 378 |
+
</a>
|
| 379 |
+
|
| 380 |
+
<a href="blog-Words-Words-Words-My-Model-Learned-to-Ramble.html" class="blog-card">
|
| 381 |
+
<div class="blog-meta">
|
| 382 |
+
<span class="blog-date">2026-02-29</span>
|
| 383 |
+
<span class="blog-tag">Tiny Wins</span>
|
| 384 |
+
</div>
|
| 385 |
+
<h2>Words, Words, Words: My Model Learned to Ramble</h2>
|
| 386 |
+
<p>My model has achieved something truly special. It can now ramble. Endlessly. With words. It does not just predict tokens anymore. It holds court.</p>
|
| 387 |
+
<span class="blog-read-more">Read more</span>
|
| 388 |
+
</a>
|
| 389 |
+
|
| 390 |
+
<a href="blog-the-memory-bottleneck.html" class="blog-card">
|
| 391 |
+
<div class="blog-meta">
|
| 392 |
+
<span class="blog-date">2026-02-18</span>
|
| 393 |
+
<span class="blog-tag">Memory</span>
|
| 394 |
+
</div>
|
| 395 |
+
<h2>The Memory Bottleneck: Why Your Model Can't Remember Anything</h2>
|
| 396 |
+
<p>Context windows are like attention spans at a tech conference. Everyone pretends they can focus for longer, but really they're just waiting for the snack break.</p>
|
| 397 |
+
<span class="blog-read-more">Read more</span>
|
| 398 |
+
</a>
|
| 399 |
+
|
| 400 |
+
<a href="blog-makeshift-mtp.html" class="blog-card">
|
| 401 |
+
<div class="blog-meta">
|
| 402 |
+
<span class="blog-date">2026-02-17</span>
|
| 403 |
+
<span class="blog-tag">MTP</span>
|
| 404 |
+
</div>
|
| 405 |
+
<h2>Makeshift MTP: Predicting the Future on a Budget</h2>
|
| 406 |
+
<p>Multi-token prediction sounds fancy. Really it's just the model trying to do its homework before the teacher assigns it. Sometimes it works. Sometimes it doesn't. But it always tries.</p>
|
| 407 |
+
<span class="blog-read-more">Read more</span>
|
| 408 |
+
</a>
|
| 409 |
+
|
| 410 |
+
<a href="blog-built-with-curiosity-over-compute.html" class="blog-card">
|
| 411 |
+
<div class="blog-meta">
|
| 412 |
+
<span class="blog-date">2026-02-16</span>
|
| 413 |
+
<span class="blog-tag">Philosophy</span>
|
| 414 |
+
</div>
|
| 415 |
+
<h2>Built with Curiosity Over Compute</h2>
|
| 416 |
+
<p>The tagline sounds nice. What it really means is we couldn't afford the compute so we got curious instead.</p>
|
| 417 |
+
<span class="blog-read-more">Read more</span>
|
| 418 |
+
</a>
|
| 419 |
+
</div>
|
| 420 |
</div>
|
| 421 |
</section>
|
| 422 |
</main>
|
| 423 |
|
| 424 |
+
<footer>
|
| 425 |
<div class="container">
|
| 426 |
+
<p>Built with curiosity over compute</p>
|
| 427 |
+
<p>TinyMemoryLM by <a href="https://github.com">AILAY</a> | 2026</p>
|
|
|
|
|
|
|
| 428 |
</div>
|
| 429 |
</footer>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 430 |
</body>
|
| 431 |
</html>
|
index.html
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
status.html
CHANGED
|
@@ -3,117 +3,452 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>
|
| 7 |
-
<link href="https://fonts.googleapis.com
|
|
|
|
|
|
|
| 8 |
<style>
|
| 9 |
-
|
| 10 |
-
:
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
.
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
.
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
.
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
.
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
.
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
</style>
|
| 108 |
</head>
|
| 109 |
<body>
|
| 110 |
-
<nav
|
| 111 |
<div class="container">
|
| 112 |
-
<a href="index.html" class="nav-brand">
|
|
|
|
|
|
|
| 113 |
<div class="nav-links">
|
|
|
|
| 114 |
<a href="blog.html">Blog</a>
|
| 115 |
-
<a href="
|
| 116 |
-
<a href="https://huggingface.co/CompactAI" target="_blank">HuggingFace</a>
|
| 117 |
</div>
|
| 118 |
</div>
|
| 119 |
</nav>
|
|
@@ -121,109 +456,182 @@ blockquote { border-left: 4px solid var(--color-accent); padding-left: 1.5rem; m
|
|
| 121 |
<main>
|
| 122 |
<section class="page-header">
|
| 123 |
<div class="container">
|
| 124 |
-
<h1>
|
| 125 |
-
<p>
|
| 126 |
</div>
|
| 127 |
</section>
|
| 128 |
|
| 129 |
<section class="status-section">
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
<
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
<span>Vocab Size</span>
|
| 138 |
-
<span>491</span>
|
| 139 |
</div>
|
| 140 |
-
<div class="
|
| 141 |
-
<
|
| 142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
</div>
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
|
|
|
|
|
|
|
|
|
| 147 |
</div>
|
| 148 |
-
<div class="
|
| 149 |
-
<
|
| 150 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
</div>
|
| 152 |
</div>
|
| 153 |
-
</div>
|
| 154 |
-
</div>
|
| 155 |
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
<
|
| 161 |
-
<div class="
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
<div class="metric-card">
|
| 180 |
-
<div class="metric-value">120</div>
|
| 181 |
-
<div class="metric-label">Max Loops/Neuron</div>
|
| 182 |
-
</div>
|
| 183 |
-
</div>
|
| 184 |
-
</div>
|
| 185 |
-
|
| 186 |
-
<div class="features-section">
|
| 187 |
-
<h3>Feature Flags</h3>
|
| 188 |
-
<div class="features-grid">
|
| 189 |
-
<div class="feature-toggle enabled">
|
| 190 |
-
<span class="feature-name">Dynamic Routing</span>
|
| 191 |
-
<span class="feature-status">REINFORCE based</span>
|
| 192 |
-
</div>
|
| 193 |
-
<div class="feature-toggle enabled">
|
| 194 |
-
<span class="feature-name">QK Normalization</span>
|
| 195 |
-
<span class="feature-status">Enabled</span>
|
| 196 |
</div>
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
</div>
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
</div>
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
|
|
|
|
|
|
| 208 |
</div>
|
| 209 |
</div>
|
| 210 |
</div>
|
| 211 |
-
|
| 212 |
-
<div class="availability-section">
|
| 213 |
-
<h3>Beta Access</h3>
|
| 214 |
-
<p>We are working on a beta tester website so people can experience the pain of creating an AI with us. (SIDE PROJECT!)</p>
|
| 215 |
-
<a href="https://huggingface.co/CompactAI" target="_blank" class="availability-link">View CompactAI on HuggingFace</a>
|
| 216 |
-
</div>
|
| 217 |
-
</div>
|
| 218 |
-
</section>
|
| 219 |
</main>
|
| 220 |
|
| 221 |
-
<footer
|
| 222 |
<div class="container">
|
| 223 |
-
<
|
| 224 |
-
|
| 225 |
-
<p class="footer-subtext">FMN-GPT by <a href="https://huggingface.co/CompactAI" target="_blank">CompactAI</a> - 2026</p>
|
| 226 |
-
</div>
|
| 227 |
</div>
|
| 228 |
</footer>
|
| 229 |
</body>
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Training Status | TinyMemoryLM</title>
|
| 7 |
+
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 8 |
+
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 9 |
+
<link href="https://fonts.googleapis.com/css2?family=Geist:wght@400;500;600;700&family=Geist+Mono&display=swap" rel="stylesheet">
|
| 10 |
<style>
|
| 11 |
+
:root {
|
| 12 |
+
--black: #000000;
|
| 13 |
+
--black-soft: #0a0a0a;
|
| 14 |
+
--black-muted: #111111;
|
| 15 |
+
--gray-1: #171717;
|
| 16 |
+
--gray-2: #262626;
|
| 17 |
+
--gray-3: #363636;
|
| 18 |
+
--gray-4: #525252;
|
| 19 |
+
--gray-5: #737373;
|
| 20 |
+
--gray-6: #a3a3a3;
|
| 21 |
+
--gray-7: #d4d4d4;
|
| 22 |
+
--gray-8: #e5e5e5;
|
| 23 |
+
--gray-9: #f5f5f5;
|
| 24 |
+
--white: #ffffff;
|
| 25 |
+
--accent: #ff4d00;
|
| 26 |
+
--accent-muted: #ff6a2a;
|
| 27 |
+
--green: #27ca40;
|
| 28 |
+
--yellow: #ffbd2e;
|
| 29 |
+
--red: #ff5f56;
|
| 30 |
+
--font-sans: 'Geist', -apple-system, BlinkMacSystemFont, sans-serif;
|
| 31 |
+
--font-mono: 'Geist Mono', 'SF Mono', 'Fira Code', monospace;
|
| 32 |
+
--container-max: 1100px;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
* {
|
| 36 |
+
box-sizing: border-box;
|
| 37 |
+
margin: 0;
|
| 38 |
+
padding: 0;
|
| 39 |
+
}
|
| 40 |
+
|
| 41 |
+
html {
|
| 42 |
+
font-size: 16px;
|
| 43 |
+
scroll-behavior: smooth;
|
| 44 |
+
}
|
| 45 |
+
|
| 46 |
+
body {
|
| 47 |
+
font-family: var(--font-sans);
|
| 48 |
+
background: var(--black);
|
| 49 |
+
color: var(--gray-7);
|
| 50 |
+
line-height: 1.6;
|
| 51 |
+
-webkit-font-smoothing: antialiased;
|
| 52 |
+
}
|
| 53 |
+
|
| 54 |
+
a {
|
| 55 |
+
color: var(--white);
|
| 56 |
+
text-decoration: none;
|
| 57 |
+
transition: color 0.15s ease;
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
a:hover {
|
| 61 |
+
color: var(--accent);
|
| 62 |
+
}
|
| 63 |
+
|
| 64 |
+
.container {
|
| 65 |
+
max-width: var(--container-max);
|
| 66 |
+
margin: 0 auto;
|
| 67 |
+
padding: 0 24px;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
/* Navigation */
|
| 71 |
+
nav {
|
| 72 |
+
position: fixed;
|
| 73 |
+
top: 0;
|
| 74 |
+
left: 0;
|
| 75 |
+
right: 0;
|
| 76 |
+
z-index: 100;
|
| 77 |
+
background: rgba(0, 0, 0, 0.8);
|
| 78 |
+
backdrop-filter: blur(12px);
|
| 79 |
+
border-bottom: 1px solid var(--gray-2);
|
| 80 |
+
padding: 16px 0;
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
nav .container {
|
| 84 |
+
display: flex;
|
| 85 |
+
justify-content: space-between;
|
| 86 |
+
align-items: center;
|
| 87 |
+
}
|
| 88 |
+
|
| 89 |
+
.nav-brand {
|
| 90 |
+
font-size: 18px;
|
| 91 |
+
font-weight: 600;
|
| 92 |
+
color: var(--white);
|
| 93 |
+
display: flex;
|
| 94 |
+
align-items: center;
|
| 95 |
+
gap: 8px;
|
| 96 |
+
}
|
| 97 |
+
|
| 98 |
+
.nav-brand span {
|
| 99 |
+
color: var(--accent);
|
| 100 |
+
}
|
| 101 |
+
|
| 102 |
+
.nav-links {
|
| 103 |
+
display: flex;
|
| 104 |
+
gap: 32px;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
.nav-links a {
|
| 108 |
+
font-size: 14px;
|
| 109 |
+
font-weight: 500;
|
| 110 |
+
color: var(--gray-6);
|
| 111 |
+
}
|
| 112 |
+
|
| 113 |
+
.nav-links a:hover {
|
| 114 |
+
color: var(--white);
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
/* Page Header */
|
| 118 |
+
.page-header {
|
| 119 |
+
padding: 140px 0 60px;
|
| 120 |
+
background: var(--black);
|
| 121 |
+
border-bottom: 1px solid var(--gray-2);
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
.page-header h1 {
|
| 125 |
+
font-size: 48px;
|
| 126 |
+
font-weight: 700;
|
| 127 |
+
color: var(--white);
|
| 128 |
+
margin-bottom: 16px;
|
| 129 |
+
letter-spacing: -0.02em;
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
.page-header p {
|
| 133 |
+
font-size: 18px;
|
| 134 |
+
color: var(--gray-5);
|
| 135 |
+
max-width: 500px;
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
/* Status Section */
|
| 139 |
+
.status-section {
|
| 140 |
+
padding: 80px 0;
|
| 141 |
+
background: var(--black);
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
.status-grid {
|
| 145 |
+
display: grid;
|
| 146 |
+
gap: 24px;
|
| 147 |
+
}
|
| 148 |
+
|
| 149 |
+
.status-card {
|
| 150 |
+
background: var(--gray-1);
|
| 151 |
+
border: 1px solid var(--gray-2);
|
| 152 |
+
border-radius: 12px;
|
| 153 |
+
padding: 32px;
|
| 154 |
+
}
|
| 155 |
+
|
| 156 |
+
.status-header {
|
| 157 |
+
display: flex;
|
| 158 |
+
align-items: center;
|
| 159 |
+
justify-content: space-between;
|
| 160 |
+
margin-bottom: 24px;
|
| 161 |
+
}
|
| 162 |
+
|
| 163 |
+
.status-card h3 {
|
| 164 |
+
font-size: 18px;
|
| 165 |
+
font-weight: 600;
|
| 166 |
+
color: var(--white);
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
.status-badge {
|
| 170 |
+
display: inline-flex;
|
| 171 |
+
align-items: center;
|
| 172 |
+
gap: 8px;
|
| 173 |
+
font-size: 12px;
|
| 174 |
+
font-weight: 600;
|
| 175 |
+
text-transform: uppercase;
|
| 176 |
+
letter-spacing: 0.05em;
|
| 177 |
+
padding: 6px 12px;
|
| 178 |
+
border-radius: 6px;
|
| 179 |
+
}
|
| 180 |
+
|
| 181 |
+
.status-badge.training {
|
| 182 |
+
background: rgba(255, 77, 0, 0.15);
|
| 183 |
+
color: var(--accent);
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
.status-badge.training::before {
|
| 187 |
+
content: '';
|
| 188 |
+
width: 8px;
|
| 189 |
+
height: 8px;
|
| 190 |
+
background: var(--accent);
|
| 191 |
+
border-radius: 50%;
|
| 192 |
+
animation: pulse 1.5s infinite;
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
.status-badge.complete {
|
| 196 |
+
background: rgba(39, 202, 64, 0.15);
|
| 197 |
+
color: var(--green);
|
| 198 |
+
}
|
| 199 |
+
|
| 200 |
+
.status-badge.complete::before {
|
| 201 |
+
content: '';
|
| 202 |
+
width: 8px;
|
| 203 |
+
height: 8px;
|
| 204 |
+
background: var(--green);
|
| 205 |
+
border-radius: 50%;
|
| 206 |
+
}
|
| 207 |
+
|
| 208 |
+
@keyframes pulse {
|
| 209 |
+
0%, 100% { opacity: 1; }
|
| 210 |
+
50% { opacity: 0.4; }
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
/* Specs Grid */
|
| 214 |
+
.specs-grid {
|
| 215 |
+
display: grid;
|
| 216 |
+
grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
|
| 217 |
+
gap: 1px;
|
| 218 |
+
background: var(--gray-2);
|
| 219 |
+
border-radius: 8px;
|
| 220 |
+
overflow: hidden;
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
.spec-item {
|
| 224 |
+
background: var(--gray-1);
|
| 225 |
+
padding: 20px;
|
| 226 |
+
text-align: center;
|
| 227 |
+
}
|
| 228 |
+
|
| 229 |
+
.spec-value {
|
| 230 |
+
font-size: 24px;
|
| 231 |
+
font-weight: 700;
|
| 232 |
+
color: var(--white);
|
| 233 |
+
font-family: var(--font-mono);
|
| 234 |
+
margin-bottom: 4px;
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
.spec-label {
|
| 238 |
+
font-size: 12px;
|
| 239 |
+
color: var(--gray-5);
|
| 240 |
+
text-transform: uppercase;
|
| 241 |
+
letter-spacing: 0.05em;
|
| 242 |
+
}
|
| 243 |
+
|
| 244 |
+
/* Features */
|
| 245 |
+
.features-grid {
|
| 246 |
+
display: grid;
|
| 247 |
+
grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
|
| 248 |
+
gap: 12px;
|
| 249 |
+
}
|
| 250 |
+
|
| 251 |
+
.feature-item {
|
| 252 |
+
display: flex;
|
| 253 |
+
align-items: center;
|
| 254 |
+
justify-content: space-between;
|
| 255 |
+
background: var(--black-soft);
|
| 256 |
+
border: 1px solid var(--gray-2);
|
| 257 |
+
border-radius: 8px;
|
| 258 |
+
padding: 16px;
|
| 259 |
+
}
|
| 260 |
+
|
| 261 |
+
.feature-name {
|
| 262 |
+
font-size: 14px;
|
| 263 |
+
font-weight: 500;
|
| 264 |
+
color: var(--gray-7);
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
.feature-status {
|
| 268 |
+
font-size: 11px;
|
| 269 |
+
font-weight: 600;
|
| 270 |
+
text-transform: uppercase;
|
| 271 |
+
letter-spacing: 0.05em;
|
| 272 |
+
padding: 4px 8px;
|
| 273 |
+
border-radius: 4px;
|
| 274 |
+
}
|
| 275 |
+
|
| 276 |
+
.feature-status.enabled {
|
| 277 |
+
background: rgba(39, 202, 64, 0.15);
|
| 278 |
+
color: var(--green);
|
| 279 |
+
}
|
| 280 |
+
|
| 281 |
+
.feature-status.disabled {
|
| 282 |
+
background: var(--gray-2);
|
| 283 |
+
color: var(--gray-5);
|
| 284 |
+
}
|
| 285 |
+
|
| 286 |
+
/* Training Log */
|
| 287 |
+
.log-list {
|
| 288 |
+
display: flex;
|
| 289 |
+
flex-direction: column;
|
| 290 |
+
gap: 12px;
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
.log-entry {
|
| 294 |
+
display: flex;
|
| 295 |
+
align-items: flex-start;
|
| 296 |
+
gap: 16px;
|
| 297 |
+
padding: 16px;
|
| 298 |
+
background: var(--black-soft);
|
| 299 |
+
border-radius: 8px;
|
| 300 |
+
border: 1px solid var(--gray-2);
|
| 301 |
+
}
|
| 302 |
+
|
| 303 |
+
.log-date {
|
| 304 |
+
font-size: 13px;
|
| 305 |
+
color: var(--gray-5);
|
| 306 |
+
font-family: var(--font-mono);
|
| 307 |
+
white-space: nowrap;
|
| 308 |
+
}
|
| 309 |
+
|
| 310 |
+
.log-status {
|
| 311 |
+
font-size: 10px;
|
| 312 |
+
font-weight: 600;
|
| 313 |
+
text-transform: uppercase;
|
| 314 |
+
letter-spacing: 0.05em;
|
| 315 |
+
padding: 3px 8px;
|
| 316 |
+
border-radius: 4px;
|
| 317 |
+
white-space: nowrap;
|
| 318 |
+
}
|
| 319 |
+
|
| 320 |
+
.log-status.success {
|
| 321 |
+
background: rgba(39, 202, 64, 0.15);
|
| 322 |
+
color: var(--green);
|
| 323 |
+
}
|
| 324 |
+
|
| 325 |
+
.log-status.active {
|
| 326 |
+
background: rgba(255, 77, 0, 0.15);
|
| 327 |
+
color: var(--accent);
|
| 328 |
+
}
|
| 329 |
+
|
| 330 |
+
.log-message {
|
| 331 |
+
font-size: 14px;
|
| 332 |
+
color: var(--gray-6);
|
| 333 |
+
}
|
| 334 |
+
|
| 335 |
+
/* Datasets */
|
| 336 |
+
.dataset-list {
|
| 337 |
+
display: flex;
|
| 338 |
+
flex-direction: column;
|
| 339 |
+
gap: 12px;
|
| 340 |
+
}
|
| 341 |
+
|
| 342 |
+
.dataset-item {
|
| 343 |
+
display: flex;
|
| 344 |
+
align-items: center;
|
| 345 |
+
justify-content: space-between;
|
| 346 |
+
padding: 16px;
|
| 347 |
+
background: var(--black-soft);
|
| 348 |
+
border-radius: 8px;
|
| 349 |
+
border: 1px solid var(--gray-2);
|
| 350 |
+
}
|
| 351 |
+
|
| 352 |
+
.dataset-name {
|
| 353 |
+
font-size: 14px;
|
| 354 |
+
font-weight: 500;
|
| 355 |
+
color: var(--gray-7);
|
| 356 |
+
font-family: var(--font-mono);
|
| 357 |
+
}
|
| 358 |
+
|
| 359 |
+
.dataset-info {
|
| 360 |
+
font-size: 12px;
|
| 361 |
+
color: var(--gray-5);
|
| 362 |
+
}
|
| 363 |
+
|
| 364 |
+
/* CTA */
|
| 365 |
+
.cta-section {
|
| 366 |
+
text-align: center;
|
| 367 |
+
padding: 40px 0;
|
| 368 |
+
}
|
| 369 |
+
|
| 370 |
+
.cta-section p {
|
| 371 |
+
color: var(--gray-5);
|
| 372 |
+
margin-bottom: 20px;
|
| 373 |
+
}
|
| 374 |
+
|
| 375 |
+
.btn {
|
| 376 |
+
display: inline-flex;
|
| 377 |
+
align-items: center;
|
| 378 |
+
justify-content: center;
|
| 379 |
+
padding: 14px 28px;
|
| 380 |
+
font-size: 15px;
|
| 381 |
+
font-weight: 500;
|
| 382 |
+
border-radius: 8px;
|
| 383 |
+
transition: all 0.15s ease;
|
| 384 |
+
cursor: pointer;
|
| 385 |
+
}
|
| 386 |
+
|
| 387 |
+
.btn-primary {
|
| 388 |
+
background: var(--white);
|
| 389 |
+
color: var(--black);
|
| 390 |
+
border: none;
|
| 391 |
+
}
|
| 392 |
+
|
| 393 |
+
.btn-primary:hover {
|
| 394 |
+
background: var(--gray-7);
|
| 395 |
+
color: var(--black);
|
| 396 |
+
}
|
| 397 |
+
|
| 398 |
+
/* Footer */
|
| 399 |
+
footer {
|
| 400 |
+
padding: 60px 0;
|
| 401 |
+
background: var(--black-soft);
|
| 402 |
+
border-top: 1px solid var(--gray-2);
|
| 403 |
+
text-align: center;
|
| 404 |
+
}
|
| 405 |
+
|
| 406 |
+
footer p {
|
| 407 |
+
color: var(--gray-5);
|
| 408 |
+
font-size: 14px;
|
| 409 |
+
margin-bottom: 8px;
|
| 410 |
+
}
|
| 411 |
+
|
| 412 |
+
footer a {
|
| 413 |
+
color: var(--gray-5);
|
| 414 |
+
}
|
| 415 |
+
|
| 416 |
+
footer a:hover {
|
| 417 |
+
color: var(--accent);
|
| 418 |
+
}
|
| 419 |
+
|
| 420 |
+
/* Responsive */
|
| 421 |
+
@media (max-width: 768px) {
|
| 422 |
+
.page-header h1 {
|
| 423 |
+
font-size: 36px;
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
.nav-links {
|
| 427 |
+
display: none;
|
| 428 |
+
}
|
| 429 |
+
|
| 430 |
+
.status-card {
|
| 431 |
+
padding: 24px;
|
| 432 |
+
}
|
| 433 |
+
|
| 434 |
+
.status-header {
|
| 435 |
+
flex-direction: column;
|
| 436 |
+
align-items: flex-start;
|
| 437 |
+
gap: 12px;
|
| 438 |
+
}
|
| 439 |
+
}
|
| 440 |
</style>
|
| 441 |
</head>
|
| 442 |
<body>
|
| 443 |
+
<nav>
|
| 444 |
<div class="container">
|
| 445 |
+
<a href="index.html" class="nav-brand">
|
| 446 |
+
<span>/</span>TinyMemoryLM
|
| 447 |
+
</a>
|
| 448 |
<div class="nav-links">
|
| 449 |
+
<a href="index.html">Home</a>
|
| 450 |
<a href="blog.html">Blog</a>
|
| 451 |
+
<a href="#">GitHub</a>
|
|
|
|
| 452 |
</div>
|
| 453 |
</div>
|
| 454 |
</nav>
|
|
|
|
| 456 |
<main>
|
| 457 |
<section class="page-header">
|
| 458 |
<div class="container">
|
| 459 |
+
<h1>Training Status</h1>
|
| 460 |
+
<p>Live updates on TinyMemoryLM training progress. Updated whenever I remember to check.</p>
|
| 461 |
</div>
|
| 462 |
</section>
|
| 463 |
|
| 464 |
<section class="status-section">
|
| 465 |
+
<div class="container">
|
| 466 |
+
<div class="status-grid">
|
| 467 |
+
<!-- Model Status -->
|
| 468 |
+
<div class="status-card">
|
| 469 |
+
<div class="status-header">
|
| 470 |
+
<h3>Model Training</h3>
|
| 471 |
+
<span class="status-badge training">Training</span>
|
|
|
|
|
|
|
| 472 |
</div>
|
| 473 |
+
<div class="specs-grid">
|
| 474 |
+
<div class="spec-item">
|
| 475 |
+
<div class="spec-value">~1M</div>
|
| 476 |
+
<div class="spec-label">Parameters</div>
|
| 477 |
+
</div>
|
| 478 |
+
<div class="spec-item">
|
| 479 |
+
<div class="spec-value">64K</div>
|
| 480 |
+
<div class="spec-label">Context</div>
|
| 481 |
+
</div>
|
| 482 |
+
<div class="spec-item">
|
| 483 |
+
<div class="spec-value">2</div>
|
| 484 |
+
<div class="spec-label">Layers</div>
|
| 485 |
+
</div>
|
| 486 |
+
<div class="spec-item">
|
| 487 |
+
<div class="spec-value">6</div>
|
| 488 |
+
<div class="spec-label">Heads</div>
|
| 489 |
+
</div>
|
| 490 |
+
<div class="spec-item">
|
| 491 |
+
<div class="spec-value">192</div>
|
| 492 |
+
<div class="spec-label">Dimension</div>
|
| 493 |
+
</div>
|
| 494 |
+
<div class="spec-item">
|
| 495 |
+
<div class="spec-value">480</div>
|
| 496 |
+
<div class="spec-label">FFN Dim</div>
|
| 497 |
+
</div>
|
| 498 |
</div>
|
| 499 |
+
</div>
|
| 500 |
+
|
| 501 |
+
<!-- Architecture Features -->
|
| 502 |
+
<div class="status-card">
|
| 503 |
+
<div class="status-header">
|
| 504 |
+
<h3>Architecture Features</h3>
|
| 505 |
</div>
|
| 506 |
+
<div class="features-grid">
|
| 507 |
+
<div class="feature-item">
|
| 508 |
+
<span class="feature-name">External Memory</span>
|
| 509 |
+
<span class="feature-status enabled">Enabled</span>
|
| 510 |
+
</div>
|
| 511 |
+
<div class="feature-item">
|
| 512 |
+
<span class="feature-name">Precision Codebook</span>
|
| 513 |
+
<span class="feature-status enabled">Enabled</span>
|
| 514 |
+
</div>
|
| 515 |
+
<div class="feature-item">
|
| 516 |
+
<span class="feature-name">Makeshift MTP</span>
|
| 517 |
+
<span class="feature-status enabled">Enabled</span>
|
| 518 |
+
</div>
|
| 519 |
+
<div class="feature-item">
|
| 520 |
+
<span class="feature-name">Gradient Checkpointing</span>
|
| 521 |
+
<span class="feature-status enabled">Enabled</span>
|
| 522 |
+
</div>
|
| 523 |
+
<div class="feature-item">
|
| 524 |
+
<span class="feature-name">Torch Compile</span>
|
| 525 |
+
<span class="feature-status enabled">Enabled</span>
|
| 526 |
+
</div>
|
| 527 |
+
<div class="feature-item">
|
| 528 |
+
<span class="feature-name">Chunked Attention</span>
|
| 529 |
+
<span class="feature-status enabled">Enabled</span>
|
| 530 |
+
</div>
|
| 531 |
+
<div class="feature-item">
|
| 532 |
+
<span class="feature-name">Flash Attention</span>
|
| 533 |
+
<span class="feature-status enabled">Enabled</span>
|
| 534 |
+
</div>
|
| 535 |
+
<div class="feature-item">
|
| 536 |
+
<span class="feature-name">Repetition Penalty</span>
|
| 537 |
+
<span class="feature-status enabled">1.1</span>
|
| 538 |
+
</div>
|
| 539 |
</div>
|
| 540 |
</div>
|
|
|
|
|
|
|
| 541 |
|
| 542 |
+
<!-- Memory Configuration -->
|
| 543 |
+
<div class="status-card">
|
| 544 |
+
<div class="status-header">
|
| 545 |
+
<h3>Memory Module Config</h3>
|
| 546 |
+
</div>
|
| 547 |
+
<div class="specs-grid">
|
| 548 |
+
<div class="spec-item">
|
| 549 |
+
<div class="spec-value">384</div>
|
| 550 |
+
<div class="spec-label">Memory Slots</div>
|
| 551 |
+
</div>
|
| 552 |
+
<div class="spec-item">
|
| 553 |
+
<div class="spec-value">64</div>
|
| 554 |
+
<div class="spec-label">Memory Dim</div>
|
| 555 |
+
</div>
|
| 556 |
+
<div class="spec-item">
|
| 557 |
+
<div class="spec-value">4</div>
|
| 558 |
+
<div class="spec-label">Top K</div>
|
| 559 |
+
</div>
|
| 560 |
+
<div class="spec-item">
|
| 561 |
+
<div class="spec-value">8</div>
|
| 562 |
+
<div class="spec-label">Writes/Step</div>
|
| 563 |
+
</div>
|
| 564 |
+
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 565 |
</div>
|
| 566 |
+
|
| 567 |
+
<!-- Datasets -->
|
| 568 |
+
<div class="status-card">
|
| 569 |
+
<div class="status-header">
|
| 570 |
+
<h3>Training Datasets</h3>
|
| 571 |
+
</div>
|
| 572 |
+
<div class="dataset-list">
|
| 573 |
+
<div class="dataset-item">
|
| 574 |
+
<span class="dataset-name">shuyuej/English-Pretraining-Dataset</span>
|
| 575 |
+
<span class="dataset-info">Pretraining</span>
|
| 576 |
+
</div>
|
| 577 |
+
<div class="dataset-item">
|
| 578 |
+
<span class="dataset-name">imdatta0/openthink_chat</span>
|
| 579 |
+
<span class="dataset-info">Instruction Tuning</span>
|
| 580 |
+
</div>
|
| 581 |
+
<div class="dataset-item">
|
| 582 |
+
<span class="dataset-name">TeichAI/Step-3.5-Flash-2600x</span>
|
| 583 |
+
<span class="dataset-info">Generalization</span>
|
| 584 |
+
</div>
|
| 585 |
+
<div class="dataset-item">
|
| 586 |
+
<span class="dataset-name">TeichAI/convo-v1</span>
|
| 587 |
+
<span class="dataset-info">Generalization (2x)</span>
|
| 588 |
+
</div>
|
| 589 |
+
</div>
|
| 590 |
</div>
|
| 591 |
+
|
| 592 |
+
<!-- Training Log -->
|
| 593 |
+
<div class="status-card">
|
| 594 |
+
<div class="status-header">
|
| 595 |
+
<h3>Training Log</h3>
|
| 596 |
+
</div>
|
| 597 |
+
<div class="log-list">
|
| 598 |
+
<div class="log-entry">
|
| 599 |
+
<span class="log-date">2026-03-01</span>
|
| 600 |
+
<span class="log-status active">Running</span>
|
| 601 |
+
<span class="log-message">Training on RTX 5090 with torch.compile enabled</span>
|
| 602 |
+
</div>
|
| 603 |
+
<div class="log-entry">
|
| 604 |
+
<span class="log-date">2026-02-28</span>
|
| 605 |
+
<span class="log-status success">Done</span>
|
| 606 |
+
<span class="log-message">Tokenizer vocabulary scan completed - 256 new characters added</span>
|
| 607 |
+
</div>
|
| 608 |
+
<div class="log-entry">
|
| 609 |
+
<span class="log-date">2026-02-27</span>
|
| 610 |
+
<span class="log-status success">Done</span>
|
| 611 |
+
<span class="log-message">Model initialized with ~1M parameters</span>
|
| 612 |
+
</div>
|
| 613 |
+
<div class="log-entry">
|
| 614 |
+
<span class="log-date">2026-02-26</span>
|
| 615 |
+
<span class="log-status success">Done</span>
|
| 616 |
+
<span class="log-message">Checkpoint conversion pipeline verified</span>
|
| 617 |
+
</div>
|
| 618 |
+
</div>
|
| 619 |
</div>
|
| 620 |
+
|
| 621 |
+
<!-- CTA -->
|
| 622 |
+
<div class="cta-section">
|
| 623 |
+
<p>Want to follow along with the training adventures?</p>
|
| 624 |
+
<a href="blog.html" class="btn btn-primary">Read the Blog</a>
|
| 625 |
</div>
|
| 626 |
</div>
|
| 627 |
</div>
|
| 628 |
+
</section>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 629 |
</main>
|
| 630 |
|
| 631 |
+
<footer>
|
| 632 |
<div class="container">
|
| 633 |
+
<p>Built with curiosity over compute</p>
|
| 634 |
+
<p>TinyMemoryLM by <a href="https://github.com">AILAY</a> | 2026</p>
|
|
|
|
|
|
|
| 635 |
</div>
|
| 636 |
</footer>
|
| 637 |
</body>
|