AIencoder commited on
Commit
d02aea6
·
verified ·
1 Parent(s): 61e6c7a

Initial DeepSite commit

Browse files
Files changed (2) hide show
  1. README.md +9 -6
  2. index.html +917 -19
README.md CHANGED
@@ -1,10 +1,13 @@
1
  ---
2
- title: Deepsite Project G3dbm
3
- emoji:
4
- colorFrom: pink
5
- colorTo: red
6
  sdk: static
7
- pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
1
  ---
2
+ title: DeepSite Project
3
+ colorFrom: blue
4
+ colorTo: pink
 
5
  sdk: static
6
+ emoji: 📚
7
+ tags:
8
+ - deepsite-v4
9
  ---
10
 
11
+ # DeepSite Project
12
+
13
+ This project has been created with [DeepSite](https://deepsite.hf.co) AI Vibe Coding.
index.html CHANGED
@@ -1,19 +1,917 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en" class="scroll-smooth">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>llama.cpp - Complete Guide, Tips & Forks</title>
7
+ <script src="https://cdn.tailwindcss.com"></script>
8
+ <script src="https://unpkg.com/lucide@latest"></script>
9
+ <link rel="preconnect" href="https://fonts.googleapis.com">
10
+ <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
11
+ <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600&display=swap" rel="stylesheet">
12
+ <script>
13
+ tailwind.config = {
14
+ theme: {
15
+ extend: {
16
+ fontFamily: {
17
+ sans: ['Inter', 'sans-serif'],
18
+ mono: ['JetBrains Mono', 'monospace'],
19
+ },
20
+ colors: {
21
+ primary: '#10b981',
22
+ secondary: '#3b82f6',
23
+ accent: '#f59e0b',
24
+ dark: '#0f172a',
25
+ darker: '#020617',
26
+ code: '#1e293b',
27
+ }
28
+ }
29
+ }
30
+ }
31
+ </script>
32
+ <style>
33
+ body {
34
+ background-color: #020617;
35
+ color: #e2e8f0;
36
+ }
37
+ .glass {
38
+ background: rgba(30, 41, 59, 0.7);
39
+ backdrop-filter: blur(12px);
40
+ border: 1px solid rgba(255, 255, 255, 0.1);
41
+ }
42
+ .glass-hover:hover {
43
+ background: rgba(51, 65, 85, 0.8);
44
+ border-color: rgba(16, 185, 129, 0.5);
45
+ transform: translateY(-4px);
46
+ box-shadow: 0 20px 40px -15px rgba(16, 185, 129, 0.3);
47
+ }
48
+ .code-block {
49
+ background: #0d1117;
50
+ border: 1px solid #30363d;
51
+ border-radius: 0.5rem;
52
+ overflow-x: auto;
53
+ }
54
+ .copy-btn {
55
+ position: absolute;
56
+ top: 0.5rem;
57
+ right: 0.5rem;
58
+ opacity: 0;
59
+ transition: opacity 0.2s;
60
+ }
61
+ .code-block:hover .copy-btn {
62
+ opacity: 1;
63
+ }
64
+ .pre-wrap {
65
+ white-space: pre-wrap;
66
+ word-wrap: break-word;
67
+ }
68
+ .hero-gradient {
69
+ background: linear-gradient(135deg, rgba(16, 185, 129, 0.1) 0%, rgba(59, 130, 246, 0.1) 100%);
70
+ }
71
+ </style>
72
+ </head>
73
+ <body class="antialiased">
74
+
75
+ <!-- Navigation -->
76
+ <nav class="fixed w-full z-50 glass border-b border-slate-800">
77
+ <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
78
+ <div class="flex justify-between items-center h-16">
79
+ <div class="flex items-center space-x-2">
80
+ <i data-lucide="brain-circuit" class="w-8 h-8 text-primary"></i>
81
+ <span class="text-xl font-bold text-white">llama.cpp</span>
82
+ </div>
83
+ <div class="hidden md:flex space-x-8">
84
+ <a href="#overview" class="text-slate-300 hover:text-primary transition">Overview</a>
85
+ <a href="#installation" class="text-slate-300 hover:text-primary transition">Installation</a>
86
+ <a href="#usage" class="text-slate-300 hover:text-primary transition">Usage</a>
87
+ <a href="#forks" class="text-slate-300 hover:text-primary transition">Forks</a>
88
+ <a href="#optimization" class="text-slate-300 hover:text-primary transition">Tips</a>
89
+ </div>
90
+ <div class="md:hidden">
91
+ <button id="mobile-menu-btn" class="text-slate-300 hover:text-white">
92
+ <i data-lucide="menu" class="w-6 h-6"></i>
93
+ </button>
94
+ </div>
95
+ </div>
96
+ </div>
97
+ <div id="mobile-menu" class="hidden md:hidden glass border-t border-slate-800">
98
+ <div class="px-2 pt-2 pb-3 space-y-1">
99
+ <a href="#overview" class="block px-3 py-2 text-slate-300 hover:text-primary">Overview</a>
100
+ <a href="#installation" class="block px-3 py-2 text-slate-300 hover:text-primary">Installation</a>
101
+ <a href="#usage" class="block px-3 py-2 text-slate-300 hover:text-primary">Usage</a>
102
+ <a href="#forks" class="block px-3 py-2 text-slate-300 hover:text-primary">Forks</a>
103
+ <a href="#optimization" class="block px-3 py-2 text-slate-300 hover:text-primary">Tips</a>
104
+ </div>
105
+ </div>
106
+ </nav>
107
+
108
+ <!-- Hero Section -->
109
+ <section class="relative pt-32 pb-20 lg:pt-48 lg:pb-32 hero-gradient overflow-hidden">
110
+ <div class="absolute inset-0 bg-[radial-gradient(ellipse_at_center,_var(--tw-gradient-stops))] from-primary/20 via-darker to-darker"></div>
111
+ <div class="relative max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 text-center">
112
+ <h1 class="text-5xl md:text-7xl font-extrabold mb-6 bg-gradient-to-r from-primary to-secondary bg-clip-text text-transparent">
113
+ llama.cpp
114
+ </h1>
115
+ <p class="text-2xl md:text-3xl text-slate-300 mb-8 font-light">
116
+ Your Ultimate Guide to Running LLMs Locally
117
+ </p>
118
+ <p class="text-lg text-slate-400 max-w-3xl mx-auto mb-10">
119
+ Complete documentation, installation guides, optimization tips, and forks comparison for the most efficient C++ implementation of LLaMA and other large language models.
120
+ </p>
121
+ <div class="flex flex-col sm:flex-row gap-4 justify-center">
122
+ <a href="#installation" class="px-8 py-4 bg-primary hover:bg-primary/90 text-white font-semibold rounded-lg transition transform hover:scale-105 flex items-center justify-center gap-2">
123
+ <i data-lucide="download" class="w-5 h-5"></i>
124
+ Quick Start
125
+ </a>
126
+ <a href="https://github.com/ggerganov/llama.cpp" target="_blank" class="px-8 py-4 glass hover:bg-slate-800 text-white font-semibold rounded-lg transition flex items-center justify-center gap-2">
127
+ <i data-lucide="github" class="w-5 h-5"></i>
128
+ View on GitHub
129
+ </a>
130
+ </div>
131
+ </div>
132
+ </section>
133
+
134
+ <!-- Overview Section -->
135
+ <section id="overview" class="py-20 bg-slate-900/50">
136
+ <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
137
+ <div class="text-center mb-16">
138
+ <h2 class="text-3xl md:text-4xl font-bold mb-4 text-white">What is llama.cpp?</h2>
139
+ <p class="text-slate-400 text-lg max-w-2xl mx-auto">
140
+ A high-performance C++ port of Facebook's LLaMA model, enabling efficient inference on consumer hardware.
141
+ </p>
142
+ </div>
143
+
144
+ <div class="grid md:grid-cols-3 gap-8">
145
+ <div class="glass p-6 rounded-xl glass-hover transition duration-300">
146
+ <div class="w-12 h-12 bg-primary/20 rounded-lg flex items-center justify-center mb-4">
147
+ <i data-lucide="cpu" class="w-6 h-6 text-primary"></i>
148
+ </div>
149
+ <h3 class="text-xl font-semibold mb-2 text-white">Zero Dependencies</h3>
150
+ <p class="text-slate-400">Pure C++ implementation with no dependencies. Just compile and run on CPU or GPU.</p>
151
+ </div>
152
+
153
+ <div class="glass p-6 rounded-xl glass-hover transition duration-300">
154
+ <div class="w-12 h-12 bg-secondary/20 rounded-lg flex items-center justify-center mb-4">
155
+ <i data-lucide="zap" class="w-6 h-6 text-secondary"></i>
156
+ </div>
157
+ <h3 class="text-xl font-semibold mb-2 text-white">Optimized Inference</h3>
158
+ <p class="text-slate-400">Highly optimized for 4-bit and 5-bit quantized models. Supports GPU acceleration via CUDA, Metal, and Vulkan.</p>
159
+ </div>
160
+
161
+ <div class="glass p-6 rounded-xl glass-hover transition duration-300">
162
+ <div class="w-12 h-12 bg-accent/20 rounded-lg flex items-center justify-center mb-4">
163
+ <i data-lucide="layers" class="w-6 h-6 text-accent"></i>
164
+ </div>
165
+ <h3 class="text-xl font-semibold mb-2 text-white">Multiple Models</h3>
166
+ <p class="text-slate-400">Supports LLaMA, LLaMA 2, Falcon, Wizard, Vicuna, and many more GGUF models.</p>
167
+ </div>
168
+ </div>
169
+
170
+ <!-- Key Stats -->
171
+ <div class="mt-16 grid grid-cols-2 md:grid-cols-4 gap-8">
172
+ <div class="text-center p-6 glass rounded-xl">
173
+ <div class="text-3xl font-bold text-primary mb-1">60k+</div>
174
+ <div class="text-sm text-slate-400">GitHub Stars</div>
175
+ </div>
176
+ <div class="text-center p-6 glass rounded-xl">
177
+ <div class="text-3xl font-bold text-secondary mb-1">Q4_K_M</div>
178
+ <div class="text-sm text-slate-400">Recommended Quant</div>
179
+ </div>
180
+ <div class="text-center p-6 glass rounded-xl">
181
+ <div class="text-3xl font-bold text-accent mb-1">CUDA</div>
182
+ <div class="text-sm text-slate-400">GPU Support</div>
183
+ </div>
184
+ <div class="text-center p-6 glass rounded-xl">
185
+ <div class="text-3xl font-bold text-purple-400 mb-1">8GB+</div>
186
+ <div class="text-sm text-slate-400">RAM Required</div>
187
+ </div>
188
+ </div>
189
+ </div>
190
+ </section>
191
+
192
+ <!-- Installation Section -->
193
+ <section id="installation" class="py-20">
194
+ <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
195
+ <div class="text-center mb-12">
196
+ <h2 class="text-3xl md:text-4xl font-bold mb-4 text-white">Installation Guide</h2>
197
+ <p class="text-slate-400">Multiple ways to get llama.cpp running on your machine.</p>
198
+ </div>
199
+
200
+ <div class="space-y-8">
201
+ <!-- Pre-built Binaries -->
202
+ <div class="glass p-6 rounded-xl">
203
+ <h3 class="text-xl font-semibold mb-4 text-white flex items-center gap-2">
204
+ <i data-lucide="package" class="w-5 h-5 text-primary"></i>
205
+ Option 1: Pre-built Releases (Windows)
206
+ </h3>
207
+ <div class="relative code-block p-4 mb-4">
208
+ <button class="copy-btn p-2 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
209
+ <i data-lucide="copy" class="w-4 h-4"></i>
210
+ </button>
211
+ <code class="text-slate-300 font-mono text-sm">
212
+ # Download from:<br>
213
+ https://github.com/ggerganov/llama.cpp/releases<br><br>
214
+ # Look for: llama-b[BUILD]-bin-win-[ARCH]-[BUILD_TYPE].zip<br>
215
+ # llama-b[BUILD]-macOS-[ARCH].zip<br>
216
+ # llama-b[BUILD]-bin-ubuntu-[ARCH].[EXT]
217
+ </code>
218
+ </div>
219
+ </div>
220
+
221
+ <!-- Build from Source -->
222
+ <div class="glass p-6 rounded-xl">
223
+ <h3 class="text-xl font-semibold mb-4 text-white flex items-center gap-2">
224
+ <i data-lucide="terminal" class="w-5 h-5 text-secondary"></i>
225
+ Option 2: Build from Source
226
+ </h3>
227
+
228
+ <div class="space-y-4">
229
+ <div>
230
+ <h4 class="text-sm font-semibold text-slate-300 mb-2">Mac/Linux</h4>
231
+ <div class="relative code-block p-4">
232
+ <button class="copy-btn p-2 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
233
+ <i data-lucide="copy" class="w-4 h-4"></i>
234
+ </button>
235
+ <pre class="font-mono text-sm text-slate-300 pre-wrap">git clone https://github.com/ggerganov/llama.cpp.git
236
+ cd llama.cpp
237
+ make</pre>
238
+ </div>
239
+ </div>
240
+
241
+ <div>
242
+ <h4 class="text-sm font-semibold text-slate-300 mb-2">Windows (CMake)</h4>
243
+ <div class="relative code-block p-4">
244
+ <button class="copy-btn p-2 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
245
+ <i data-lucide="copy" class="w-4 h-4"></i>
246
+ </button>
247
+ <pre class="font-mono text-sm text-slate-300 pre-wrap">git clone https://github.com/ggerganov/llama.cpp.git
248
+ cd llama.cpp
249
+ cmake .
250
+ cmake --build . --config Release</pre>
251
+ </div>
252
+ </div>
253
+
254
+ <div>
255
+ <h4 class="text-sm font-semibold text-slate-300 mb-2">With CUDA Support (GPU)</h4>
256
+ <div class="relative code-block p-4">
257
+ <button class="copy-btn p-2 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
258
+ <i data-lucide="copy" class="w-4 h-4"></i>
259
+ </button>
260
+ <pre class="font-mono text-sm text-slate-300 pre-wrap">make LLAMA_CUDA=1
261
+ # or for Windows
262
+ cmake -DLLAMA_CUDA=ON .
263
+ cmake --build . --config Release</pre>
264
+ </div>
265
+ </div>
266
+ </div>
267
+ </div>
268
+
269
+ <!-- Docker -->
270
+ <div class="glass p-6 rounded-xl">
271
+ <h3 class="text-xl font-semibold mb-4 text-white flex items-center gap-2">
272
+ <i data-lucide="container" class="w-5 h-5 text-accent"></i>
273
+ Option 3: Docker
274
+ </h3>
275
+ <div class="relative code-block p-4">
276
+ <button class="copy-btn p-2 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
277
+ <i data-lucide="copy" class="w-4 h-4"></i>
278
+ </button>
279
+ <pre class="font-mono text-sm text-slate-300 pre-wrap">docker pull ghcr.io/ggerganov/llama.cpp:latest
280
+ docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:latest --api -m /models/your-model.gguf</pre>
281
+ </div>
282
+ </div>
283
+ </div>
284
+ </div>
285
+ </section>
286
+
287
+ <!-- Usage Section -->
288
+ <section id="usage" class="py-20 bg-slate-900/50">
289
+ <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
290
+ <div class="text-center mb-12">
291
+ <h2 class="text-3xl md:text-4xl font-bold mb-4 text-white">Usage & Commands</h2>
292
+ <p class="text-slate-400">Master the CLI and server modes.</p>
293
+ </div>
294
+
295
+ <div class="grid lg:grid-cols-2 gap-8">
296
+ <!-- Basic Commands -->
297
+ <div class="glass p-6 rounded-xl">
298
+ <h3 class="text-xl font-semibold mb-4 text-white">Basic Commands</h3>
299
+ <div class="space-y-4">
300
+ <div>
301
+ <h4 class="text-sm font-semibold text-slate-300 mb-2">Simple Inference</h4>
302
+ <div class="relative code-block p-3">
303
+ <button class="copy-btn p-1.5 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
304
+ <i data-lucide="copy" class="w-3 h-3"></i>
305
+ </button>
306
+ <code class="text-xs font-mono text-slate-300">./main -m models/model.gguf -p "Your prompt here"</code>
307
+ </div>
308
+ </div>
309
+ <div>
310
+ <h4 class="text-sm font-semibold text-slate-300 mb-2">Interactive Chat</h4>
311
+ <div class="relative code-block p-3">
312
+ <button class="copy-btn p-1.5 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
313
+ <i data-lucide="copy" class="w-3 h-3"></i>
314
+ </button>
315
+ <code class="text-xs font-mono text-slate-300">./main -m models/model.gguf --interactive-server</code>
316
+ </div>
317
+ </div>
318
+ <div>
319
+ <h4 class="text-sm font-semibold text-slate-300 mb-2">Server Mode (API)</h4>
320
+ <div class="relative code-block p-3">
321
+ <button class="copy-btn p-1.5 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
322
+ <i data-lucide="copy" class="w-3 h-3"></i>
323
+ </button>
324
+ <code class="text-xs font-mono text-slate-300">./server -m models/model.gguf -c 2048 --port 8080</code>
325
+ </div>
326
+ </div>
327
+ </div>
328
+ </div>
329
+
330
+ <!-- Command Parameters -->
331
+ <div class="glass p-6 rounded-xl">
332
+ <h3 class="text-xl font-semibold mb-4 text-white">Important Flags</h3>
333
+ <div class="space-y-3">
334
+ <div class="flex items-start gap-3">
335
+ <code class="text-primary font-mono text-sm">-c, --ctx_size</code>
336
+ <span class="text-slate-400 text-sm">Context size (e.g., 2048, 4096, 8192)</span>
337
+ </div>
338
+ <div class="flex items-start gap-3">
339
+ <code class="text-primary font-mono text-sm">-n, --n_predict</code>
340
+ <span class="text-slate-400 text-sm">Number of tokens to predict (-1 = infinity)</span>
341
+ </div>
342
+ <div class="flex items-start gap-3">
343
+ <code class="text-primary font-mono text-sm">-t, --threads</code>
344
+ <span class="text-slate-400 text-sm">Number of CPU threads (recommend: physical cores)</span>
345
+ </div>
346
+ <div class="flex items-start gap-3">
347
+ <code class="text-primary font-mono text-sm">--temp</code>
348
+ <span class="text-slate-400 text-sm">Temperature (0.8 is standard)</span>
349
+ </div>
350
+ <div class="flex items-start gap-3">
351
+ <code class="text-primary font-mono text-sm">--gpu_layers/-ngl</code>
352
+ <span class="text-slate-400 text-sm">Number of layers to offload to GPU</span>
353
+ </div>
354
+ </div>
355
+ </div>
356
+
357
+ <!-- API Example -->
358
+ <div class="glass p-6 rounded-xl lg:col-span-2">
359
+ <h3 class="text-xl font-semibold mb-4 text-white">API Example (OpenAI-compatible)</h3>
360
+ <div class="relative code-block p-4">
361
+ <button class="copy-btn p-2 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
362
+ <i data-lucide="copy" class="w-4 h-4"></i>
363
+ </button>
364
+ <pre class="font-mono text-sm text-slate-300 pre-wrap"># Start server
365
+ ./server -m models/llama-2-7b-chat.Q4_K_M.gguf -c 4096
366
+
367
+ # Send request
368
+ curl -X POST http://localhost:8080/v1/chat/completions \
369
+ -H "Content-Type: application/json" \
370
+ -d '{
371
+ "messages": [
372
+ {"role": "system", "content": "You are a helpful assistant."},
373
+ {"role": "user", "content": "Hello!"}
374
+ ]
375
+ }'</pre>
376
+ </div>
377
+ </div>
378
+ </div>
379
+ </div>
380
+ </section>
381
+
382
+ <!-- Forks Section -->
383
+ <section id="forks" class="py-20">
384
+ <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
385
+ <div class="text-center mb-12">
386
+ <h2 class="text-3xl md:text-4xl font-bold mb-4 text-white">Popular Forks & Projects</h2>
387
+ <p class="text-slate-400">Specialized versions and wrappers built on llama.cpp</p>
388
+ </div>
389
+
390
+ <div class="grid md:grid-cols-2 lg:grid-cols-3 gap-6">
391
+ <!-- Ollama -->
392
+ <div class="glass p-6 rounded-xl glass-hover transition duration-300 border-l-4 border-primary">
393
+ <div class="flex items-center justify-between mb-3">
394
+ <h3 class="text-xl font-bold text-white">Ollama</h3>
395
+ <span class="px-2 py-1 bg-primary/20 text-primary text-xs rounded font-mono">Top Pick</span>
396
+ </div>
397
+ <p class="text-slate-400 mb-4 text-sm">The easiest way to run LLaMA, Mistral, and other models locally. Provides a CLI and API for running models.</p>
398
+ <ul class="text-xs text-slate-500 space-y-1 mb-4">
399
+ <li>• macOS, Linux, Windows</li>
400
+ <li>• Model library available</li>
401
+ <li>• REST API included</li>
402
+ <li>• One-liner install</li>
403
+ </ul>
404
+ <a href="https://ollama.ai" target="_blank" class="text-primary hover:text-primary/80 text-sm flex items-center gap-1">
405
+ ollama.ai <i data-lucide="external-link" class="w-3 h-3"></i>
406
+ </a>
407
+ </div>
408
+
409
+ <!-- Oobabooga -->
410
+ <div class="glass p-6 rounded-xl glass-hover transition duration-300 border-l-4 border-secondary">
411
+ <div class="flex items-center justify-between mb-3">
412
+ <h3 class="text-xl font-bold text-white">TextGen WebUI</h3>
413
+ <span class="px-2 py-1 bg-secondary/20 text-secondary text-xs rounded font-mono">GUI</span>
414
+ </div>
415
+ <p class="text-slate-400 mb-4 text-sm">A web interface for LLMs. Supports multiple backends including llama.cpp with extensive character/persona features.</p>
416
+ <ul class="text-xs text-slate-500 space-y-1 mb-4">
417
+ <li>• Web-based interface</li>
418
+ <li>• Chat/Completions modes</li>
419
+ <li>• LoRA support</li>
420
+ <li>• Extensions support</li>
421
+ </ul>
422
+ <a href="https://github.com/oobabooga/text-generation-webui" target="_blank" class="text-secondary hover:text-secondary/80 text-sm flex items-center gap-1">
423
+ GitHub <i data-lucide="external-link" class="w-3 h-3"></i>
424
+ </a>
425
+ </div>
426
+
427
+ <!-- Koboldcpp -->
428
+ <div class="glass p-6 rounded-xl glass-hover transition duration-300 border-l-4 border-pink-500">
429
+ <div class="flex items-center justify-between mb-3">
430
+ <h3 class="text-xl font-bold text-white">KoboldCpp</h3>
431
+ <span class="px-2 py-1 bg-pink-500/20 text-pink-500 text-xs rounded font-mono">Gaming</span>
432
+ </div>
433
+ <p class="text-slate-400 mb-4 text-sm">A user-friendly wrapper for llama.cpp optimized for story writing and text adventure gaming.</p>
434
+ <ul class="text-xs text-slate-500 space-y-1 mb-4">
435
+ <li>• Kobold AI compatibility</li>
436
+ <li>• Streamlined UI</li>
437
+ <li>• Adventure mode</li>
438
+ <li>• Easy single-binary setup</li>
439
+ </ul>
440
+ <a href="https://github.com/LostRuins/koboldcpp" target="_blank" class="text-pink-500 hover:text-pink-400 text-sm flex items-center gap-1">
441
+ GitHub <i data-lucide="external-link" class="w-3 h-3"></i>
442
+ </a>
443
+ </div>
444
+
445
+ <!-- llamafile -->
446
+ <div class="glass p-6 rounded-xl glass-hover transition duration-300 border-l-4 border-accent">
447
+ <div class="flex items-center justify-between mb-3">
448
+ <h3 class="text-xl font-bold text-white">llamafile</h3>
449
+ <span class="px-2 py-1 bg-accent/20 text-accent text-xs rounded font-mono">Portable</span>
450
+ </div>
451
+ <p class="text-slate-400 mb-4 text-sm">Mozilla's project. LLMs packaged as single executable files that run on most computers without dependencies.</p>
452
+ <ul class="text-xs text-slate-500 space-y-1 mb-4">
453
+ <li>• Single-file executables</li>
454
+ <li>• No installation needed</li>
455
+ <li>• Cross-platform</li>
456
+ <li>• Embeddable</li>
457
+ </ul>
458
+ <a href="https://github.com/Mozilla-Ocho/llamafile" target="_blank" class="text-accent hover:text-accent/80 text-sm flex items-center gap-1">
459
+ GitHub <i data-lucide="external-link" class="w-3 h-3"></i>
460
+ </a>
461
+ </div>
462
+
463
+ <!-- LocalAI -->
464
+ <div class="glass p-6 rounded-xl glass-hover transition duration-300 border-l-4 border-purple-500">
465
+ <div class="flex items-center justify-between mb-3">
466
+ <h3 class="text-xl font-bold text-white">LocalAI</h3>
467
+ <span class="px-2 py-1 bg-purple-500/20 text-purple-500 text-xs rounded font-mono">API</span>
468
+ </div>
469
+ <p class="text-slate-400 mb-4 text-sm">Drop-in OpenAI API replacement. Self-hosted with llama.cpp backend. Supports text generation, images, and audio.</p>
470
+ <ul class="text-xs text-slate-500 space-y-1 mb-4">
471
+ <li>• OpenAI API compatible</li>
472
+ <li>• Docker ready</li>
473
+ <li>• Model hot-reloading</li>
474
+ <li>• Multiple backends</li>
475
+ </ul>
476
+ <a href="https://localai.io" target="_blank" class="text-purple-500 hover:text-purple-400 text-sm flex items-center gap-1">
477
+ localai.io <i data-lucide="external-link" class="w-3 h-3"></i>
478
+ </a>
479
+ </div>
480
+
481
+ <!-- LM Studio -->
482
+ <div class="glass p-6 rounded-xl glass-hover transition duration-300 border-l-4 border-cyan-500">
483
+ <div class="flex items-center justify-between mb-3">
484
+ <h3 class="text-xl font-bold text-white">LM Studio</h3>
485
+ <span class="px-2 py-1 bg-cyan-500/20 text-cyan-500 text-xs rounded font-mono">App</span>
486
+ </div>
487
+ <p class="text-slate-400 mb-4 text-sm">Desktop application for running local LLMs with a beautiful interface. Easy model downloading and chatting.</p>
488
+ <ul class="text-xs text-slate-500 space-y-1 mb-4">
489
+ <li>• Desktop GUI</li>
490
+ <li>• Chat interface</li>
491
+ <li>• HuggingFace integration</li>
492
+ <li>• macOS/Windows</li>
493
+ </ul>
494
+ <a href="https://lmstudio.ai" target="_blank" class="text-cyan-500 hover:text-cyan-400 text-sm flex items-center gap-1">
495
+ lmstudio.ai <i data-lucide="external-link" class="w-3 h-3"></i>
496
+ </a>
497
+ </div>
498
+ </div>
499
+ </div>
500
+ </section>
501
+
502
+ <!-- Optimization Tips -->
503
+ <section id="optimization" class="py-20 bg-slate-900/50">
504
+ <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
505
+ <div class="text-center mb-12">
506
+ <h2 class="text-3xl md:text-4xl font-bold mb-4 text-white">Optimization Tips</h2>
507
+ <p class="text-slate-400">Get the best performance from your models.</p>
508
+ </div>
509
+
510
+ <div class="grid md:grid-cols-2 gap-8">
511
+ <!-- Quantization Guide -->
512
+ <div class="glass p-6 rounded-xl">
513
+ <h3 class="text-xl font-semibold mb-4 text-white flex items-center gap-2">
514
+ <i data-lucide="scale" class="w-5 h-5 text-primary"></i>
515
+ Quantization Guide (GGUF)
516
+ </h3>
517
+ <div class="space-y-3">
518
+ <div class="p-3 bg-slate-800/50 rounded-lg border border-slate-700">
519
+ <div class="flex justify-between mb-1">
520
+ <span class="font-semibold text-primary font-mono text-sm">Q4_K_M</span>
521
+ <span class="text-slate-400 text-xs">Best balance</span>
522
+ </div>
523
+ <p class="text-slate-400 text-xs">Recommended for most models. ~4.7GB for 7B model. Quality slightly better than Q4_0.</p>
524
+ </div>
525
+ <div class="p-3 bg-slate-800/50 rounded-lg border border-slate-700">
526
+ <div class="flex justify-between mb-1">
527
+ <span class="font-semibold text-slate-300 font-mono text-sm">Q5_K_M</span>
528
+ <span class="text-slate-400 text-xs">High quality</span>
529
+ </div>
530
+ <p class="text-slate-400 text-xs">For quality-critical tasks. ~5.8GB for 7B model.</p>
531
+ </div>
532
+ <div class="p-3 bg-slate-800/50 rounded-lg border border-slate-700">
533
+ <div class="flex justify-between mb-1">
534
+ <span class="font-semibold text-slate-300 font-mono text-sm">Q8_0</span>
535
+ <span class="text-slate-400 text-xs">Maximum quality</span>
536
+ </div>
537
+ <p class="text-slate-400 text-xs">Almost unnoticeable loss. ~7GB for 7B model.</p>
538
+ </div>
539
+ </div>
540
+ </div>
541
+
542
+ <!-- Performance Tips -->
543
+ <div class="glass p-6 rounded-xl">
544
+ <h3 class="text-xl font-semibold mb-4 text-white flex items-center gap-2">
545
+ <i data-lucide="gauge" class="w-5 h-5 text-secondary"></i>
546
+ Performance Tips
547
+ </h3>
548
+ <ul class="space-y-3 text-slate-300 text-sm">
549
+ <li class="flex items-start gap-2">
550
+ <span class="text-primary">•</span>
551
+ <span><strong>GPU Offloading:</strong> Use <code class="bg-slate-800 px-1 rounded text-primary">-ngl 35</code> on macOS (Metal) or <code class="bg-slate-800 px-1 rounded text-primary">-ngl 33</code> on NVIDIA (CUDA) for 7B models</span>
552
+ </li>
553
+ <li class="flex items-start gap-2">
554
+ <span class="text-primary">•</span>
555
+ <span><strong>Context Size:</strong> Start with 2048, increase based on your needs (uses more VRAM)</span>
556
+ </li>
557
+ <li class="flex items-start gap-2">
558
+ <span class="text-primary">•</span>
559
+ <span><strong>Threads:</strong> Set to your physical CPU cores count with <code class="bg-slate-800 px-1 rounded text-primary">-t [cores]</code></span>
560
+ </li>
561
+ <li class="flex items-start gap-2">
562
+ <span class="text-primary">•</span>
563
+ <span><strong>Memory Mapping:</strong> Enable with <code class="bg-slate-800 px-1 rounded text-primary">--mlock</code> to keep model in RAM</span>
564
+ </li>
565
+ <li class="flex items-start gap-2">
566
+ <span class="text-primary">•</span>
567
+ <span><strong>Batch Size:</strong> Increase <code class="bg-slate-800 px-1 rounded text-primary">-b 512</code> for higher throughput</span>
568
+ </li>
569
+ </ul>
570
+ </div>
571
+
572
+ <!-- Hardware Recommendations -->
573
+ <div class="glass p-6 rounded-xl md:col-span-2">
574
+ <h3 class="text-xl font-semibold mb-4 text-white flex items-center gap-2">
575
+ <i data-lucide="hard-drive" class="w-5 h-5 text-accent"></i>
576
+ Hardware Requirements
577
+ </h3>
578
+ <div class="grid grid-cols-2 md:grid-cols-4 gap-4">
579
+ <div class="text-center p-4 bg-slate-800/50 rounded-lg">
580
+ <div class="text-lg font-bold text-white mb-1">7B Model</div>
581
+ <div class="text-xs text-slate-400">~4-8GB RAM</div>
582
+ <div class="text-xs text-slate-500 mt-1">M1 Mac Minimum</div>
583
+ </div>
584
+ <div class="text-center p-4 bg-slate-800/50 rounded-lg">
585
+ <div class="text-lg font-bold text-white mb-1">13B Model</div>
586
+ <div class="text-xs text-slate-400">~8-12GB RAM</div>
587
+ <div class="text-xs text-slate-500 mt-1">Requires GPU</div>
588
+ </div>
589
+ <div class="text-center p-4 bg-slate-800/50 rounded-lg">
590
+ <div class="text-lg font-bold text-white mb-1">30B Model</div>
591
+ <div class="text-xs text-slate-400">~20GB RAM</div>
592
+ <div class="text-xs text-slate-500 mt-1">32GB System + GPU</div>
593
+ </div>
594
+ <div class="text-center p-4 bg-slate-800/50 rounded-lg">
595
+ <div class="text-lg font-bold text-white mb-1">70B Model</div>
596
+ <div class="text-xs text-slate-400">~40GB+ RAM</div>
597
+ <div class="text-xs text-slate-500 mt-1">High-end GPU</div>
598
+ </div>
599
+ </div>
600
+ </div>
601
+ </div>
602
+ </div>
603
+ </section>
604
+
605
+ <!-- Convert Models Section -->
606
+ <section class="py-20">
607
+ <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
608
+ <div class="glass p-8 rounded-xl border border-slate-700">
609
+ <h2 class="text-2xl font-bold mb-4 text-white">Converting Models</h2>
610
+ <p class="text-slate-400 mb-4">Converting Safetensors/PyTorch models to GGUF format for llama.cpp:</p>
611
+ <div class="relative code-block p-4">
612
+ <button class="copy-btn p-2 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
613
+ <i data-lucide="copy" class="w-4 h-4"></i>
614
+ </button>
615
+ <pre class="font-mono text-sm text-slate-300 pre-wrap"># Install dependencies
616
+ python -m pip install gguf protobuf
617
+
618
+ # Convert HuggingFace model to GGUF
619
+ python convert-hf-to-gguf.py /path/to/model \
620
+ --outfile /path/to/output/model.gguf \
621
+ --outtype q4_k_m</pre>
622
+ </div>
623
+ <p class="text-xs text-slate-500 mt-3">Available outtypes: f32, f16, bf16, q8_0, q4_0, q4_1, q4_k_s, q4_k_m, q5_k_s, q5_k_m, q6_k</p>
624
+ </div>
625
+ </div>
626
+ </section>
627
+
628
+ <!-- Model Sources Section -->
629
+ <section class="py-20 bg-slate-900/50">
630
+ <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
631
+ <div class="text-center mb-12">
632
+ <h2 class="text-3xl md:text-4xl font-bold mb-4 text-white">Where to Download Models</h2>
633
+ <p class="text-slate-400">Pre-converted GGUF models ready to use.</p>
634
+ </div>
635
+
636
+ <div class="grid md:grid-cols-3 gap-6">
637
+ <div class="glass p-6 rounded-xl glass-hover transition duration-300">
638
+ <div class="flex items-center gap-3 mb-4">
639
+ <div class="w-10 h-10 bg-yellow-500/20 rounded-lg flex items-center justify-center">
640
+ <i data-lucide="database" class="w-5 h-5 text-yellow-500"></i>
641
+ </div>
642
+ <h3 class="text-lg font-bold text-white">TheBloke</h3>
643
+ </div>
644
+ <p class="text-slate-400 text-sm mb-4">The most popular source for quantized GGUF models. Hundreds of models including Llama 2, Mistral, CodeLlama, and more.</p>
645
+ <a href="https://huggingface.co/TheBloke" target="_blank" class="text-primary hover:text-primary/80 text-sm flex items-center gap-1">
646
+ View on HuggingFace <i data-lucide="external-link" class="w-3 h-3"></i>
647
+ </a>
648
+ </div>
649
+
650
+ <div class="glass p-6 rounded-xl glass-hover transition duration-300">
651
+ <div class="flex items-center gap-3 mb-4">
652
+ <div class="w-10 h-10 bg-blue-500/20 rounded-lg flex items-center justify-center">
653
+ <i data-lucide="layers" class="w-5 h-5 text-blue-500"></i>
654
+ </div>
655
+ <h3 class="text-lg font-bold text-white">NousResearch</h3>
656
+ </div>
657
+ <p class="text-slate-400 text-sm mb-4">Research-focused models including Hermes, Synthia, and other fine-tuned versions with GGUF support.</p>
658
+ <a href="https://huggingface.co/NousResearch" target="_blank" class="text-blue-500 hover:text-blue-400 text-sm flex items-center gap-1">
659
+ View on HuggingFace <i data-lucide="external-link" class="w-3 h-3"></i>
660
+ </a>
661
+ </div>
662
+
663
+ <div class="glass p-6 rounded-xl glass-hover transition duration-300">
664
+ <div class="flex items-center gap-3 mb-4">
665
+ <div class="w-10 h-10 bg-purple-500/20 rounded-lg flex items-center justify-center">
666
+ <i data-lucide="cloud" class="w-5 h-5 text-purple-500"></i>
667
+ </div>
668
+ <h3 class="text-lg font-bold text-white">LWDW (RunPod)</h3>
669
+ </div>
670
+ <p class="text-slate-400 text-sm mb-4">Specialized in large model variants (70B+) and unique quantizations. Great for GPU cloud inference.</p>
671
+ <a href="https://huggingface.co/LWDW" target="_blank" class="text-purple-500 hover:text-purple-400 text-sm flex items-center gap-1">
672
+ View on HuggingFace <i data-lucide="external-link" class="w-3 h-3"></i>
673
+ </a>
674
+ </div>
675
+ </div>
676
+
677
+ <div class="mt-8 glass p-6 rounded-xl">
678
+ <h3 class="text-lg font-semibold mb-3 text-white">Popular Models to Try</h3>
679
+ <div class="grid md:grid-cols-2 lg:grid-cols-4 gap-4 text-sm">
680
+ <div class="p-3 bg-slate-800/50 rounded border border-slate-700">
681
+ <code class="text-primary font-mono">mistral-7b-instruct</code>
682
+ <p class="text-slate-500 text-xs mt-1">Fast, great quality</p>
683
+ </div>
684
+ <div class="p-3 bg-slate-800/50 rounded border border-slate-700">
685
+ <code class="text-primary font-mono">llama-2-7b/13b-chat</code>
686
+ <p class="text-slate-500 text-xs mt-1">All-purpose, balanced</p>
687
+ </div>
688
+ <div class="p-3 bg-slate-800/50 rounded border border-slate-700">
689
+ <code class="text-primary font-mono">codellama-7b/13b</code>
690
+ <p class="text-slate-500 text-xs mt-1">Code generation</p>
691
+ </div>
692
+ <div class="p-3 bg-slate-800/50 rounded border border-slate-700">
693
+ <code class="text-primary font-mono">neural-chat-7b</code>
694
+ <p class="text-slate-500 text-xs mt-1">Conversations</p>
695
+ </div>
696
+ </div>
697
+ </div>
698
+ </div>
699
+ </section>
700
+
701
+ <!-- Advanced Features Section -->
702
+ <section class="py-20">
703
+ <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
704
+ <div class="text-center mb-12">
705
+ <h2 class="text-3xl md:text-4xl font-bold mb-4 text-white">Advanced Features</h2>
706
+ <p class="text-slate-400">Unlock the full potential of llama.cpp.</p>
707
+ </div>
708
+
709
+ <div class="space-y-6">
710
+ <div class="grid md:grid-cols-2 gap-6">
711
+ <div class="glass p-6 rounded-xl">
712
+ <h3 class="text-xl font-semibold mb-3 text-white flex items-center gap-2">
713
+ <i data-lucide="zap" class="w-5 h-5 text-yellow-500"></i>
714
+ Speculative Decoding
715
+ </h3>
716
+ <p class="text-slate-400 text-sm mb-3">Use a smaller draft model to speed up token generation. Can achieve 2-3x speedup on supported hardware.</p>
717
+ <div class="relative code-block p-3">
718
+ <button class="copy-btn p-1.5 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
719
+ <i data-lucide="copy" class="w-3 h-3"></i>
720
+ </button>
721
+ <code class="text-xs font-mono text-slate-300">./main -m large_model.gguf --draft small_model.gguf -ngl 35 --draft 10</code>
722
+ </div>
723
+ </div>
724
+
725
+ <div class="glass p-6 rounded-xl">
726
+ <h3 class="text-xl font-semibold mb-3 text-white flex items-center gap-2">
727
+ <i data-lucide="terminal" class="w-5 h-5 text-green-500"></i>
728
+ Grammar-Based Sampling
729
+ </h3>
730
+ <p class="text-slate-400 text-sm mb-3">Force JSON output or specific formats using GBNF grammar files. Perfect for structured output.</p>
731
+ <div class="relative code-block p-3">
732
+ <button class="copy-btn p-1.5 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
733
+ <i data-lucide="copy" class="w-3 h-3"></i>
734
+ </button>
735
+ <code class="text-xs font-mono text-slate-300">./main -m model.gguf --grammar-file json.gbnf -p "Generate JSON:"</code>
736
+ </div>
737
+ </div>
738
+ </div>
739
+
740
+ <div class="grid md:grid-cols-2 gap-6">
741
+ <div class="glass p-6 rounded-xl">
742
+ <h3 class="text-xl font-semibold mb-3 text-white flex items-center gap-2">
743
+ <i data-lucide="cpu" class="w-5 h-5 text-blue-500"></i>
744
+ Continuous Batching
745
+ </h3>
746
+ <p class="text-slate-400 text-sm">Process multiple prompts simultaneously in server mode for higher throughput in production environments.</p>
747
+ </div>
748
+
749
+ <div class="glass p-6 rounded-xl">
750
+ <h3 class="text-xl font-semibold mb-3 text-white flex items-center gap-2">
751
+ <i data-lucide="layers" class="w-5 h-5 text-purple-500"></i>
752
+ LoRA Support
753
+ </h3>
754
+ <p class="text-slate-400 text-sm mb-3">Load LoRA adapters on top of base models without merging. Hot-swap adapters at runtime.</p>
755
+ <div class="relative code-block p-3">
756
+ <button class="copy-btn p-1.5 bg-slate-700 rounded hover:bg-slate-600 text-white" onclick="copyCode(this)">
757
+ <i data-lucide="copy" class="w-3 h-3"></i>
758
+ </button>
759
+ <code class="text-xs font-mono text-slate-300">./main -m base.gguf --lora adapter.bin --lora-scale 0.8</code>
760
+ </div>
761
+ </div>
762
+ </div>
763
+ </div>
764
+ </div>
765
+ </section>
766
+
767
+ <!-- Troubleshooting Section -->
768
+ <section class="py-20 bg-slate-900/50">
769
+ <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
770
+ <div class="text-center mb-12">
771
+ <h2 class="text-3xl md:text-4xl font-bold mb-4 text-white">Troubleshooting</h2>
772
+ <p class="text-slate-400">Common issues and solutions.</p>
773
+ </div>
774
+
775
+ <div class="space-y-4">
776
+ <div class="glass p-6 rounded-xl border-l-4 border-red-500">
777
+ <h3 class="text-lg font-semibold text-white mb-2">CUDA Out of Memory</h3>
778
+ <p class="text-slate-400 text-sm mb-2">Error: <code class="bg-red-900/30 text-red-300 px-1 rounded">CUDA out of memory</code></p>
779
+ <p class="text-slate-400 text-sm">Reduce GPU layers or use a smaller model. Try <code class="text-primary">-ngl 20</code> instead of <code class="text-primary">-ngl 35</code>, or use a Q4_K_M quantized model instead of Q5.</p>
780
+ </div>
781
+
782
+ <div class="glass p-6 rounded-xl border-l-4 border-yellow-500">
783
+ <h3 class="text-lg font-semibold text-white mb-2">Slow Token Generation</h3>
784
+ <p class="text-slate-400 text-sm mb-2">Model is running on CPU instead of GPU.</p>
785
+ <p class="text-slate-400 text-sm">Ensure you built with CUDA/Metal support. Check <code class="text-primary">nvidia-smi</code> or Activity Monitor to verify GPU usage. Increase <code class="text-primary">-ngl</code> to offload more layers.</p>
786
+ </div>
787
+
788
+ <div class="glass p-6 rounded-xl border-l-4 border-blue-500">
789
+ <h3 class="text-lg font-semibold text-white mb-2">GGUF Format Errors</h3>
790
+ <p class="text-slate-400 text-sm mb-2">Error: <code class="bg-blue-900/30 text-blue-300 px-1 rounded">invalid magic</code></p>
791
+ <p class="text-slate-400 text-sm">Your llama.cpp version is too old for this GGUF file. Pull latest changes and rebuild, or download an older GGUF version (v1 or v2).</p>
792
+ </div>
793
+
794
+ <div class="glass p-6 rounded-xl border-l-4 border-green-500">
795
+ <h3 class="text-lg font-semibold text-white mb-2">Model Output is Gibberish</h3>
796
+ <p class="text-slate-400 text-sm mb-2">Random characters or nonsensical output.</p>
797
+ <p class="text-slate-400 text-sm">Usually indicates wrong tokenizer or incompatible model. Ensure you're using the correct prompt template for the model (e.g., ChatML for Mistral).</p>
798
+ </div>
799
+ </div>
800
+ </div>
801
+ </section>
802
+
803
+ <!-- Community Section -->
804
+ <section class="py-20">
805
+ <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
806
+ <div class="text-center mb-12">
807
+ <h2 class="text-3xl md:text-4xl font-bold mb-4 text-white">Community & Resources</h2>
808
+ <p class="text-slate-400">Get help and stay updated.</p>
809
+ </div>
810
+
811
+ <div class="grid md:grid-cols-4 gap-6">
812
+ <a href="https://github.com/ggerganov/llama.cpp/discussions" target="_blank" class="glass p-6 rounded-xl glass-hover transition duration-300 text-center">
813
+ <i data-lucide="message-square" class="w-8 h-8 text-primary mx-auto mb-3"></i>
814
+ <h3 class="text-lg font-semibold text-white mb-1">GitHub Discussions</h3>
815
+ <p class="text-slate-400 text-sm">Community support</p>
816
+ </a>
817
+
818
+ <a href="https://discord.gg/llama-cpp" target="_blank" class="glass p-6 rounded-xl glass-hover transition duration-300 text-center">
819
+ <i data-lucide="message-circle" class="w-8 h-8 text-indigo-500 mx-auto mb-3"></i>
820
+ <h3 class="text-lg font-semibold text-white mb-1">Discord</h3>
821
+ <p class="text-slate-400 text-sm">Real-time chat</p>
822
+ </a>
823
+
824
+ <a href="https://www.reddit.com/r/LocalLLaMA/" target="_blank" class="glass p-6 rounded-xl glass-hover transition duration-300 text-center">
825
+ <i data-lucide="users" class="w-8 h-8 text-orange-500 mx-auto mb-3"></i>
826
+ <h3 class="text-lg font-semibold text-white mb-1">r/LocalLLaMA</h3>
827
+ <p class="text-slate-400 text-sm">Reddit community</p>
828
+ </a>
829
+
830
+ <a href="https://github.com/ggerganov/llama.cpp/blob/master/README.md" target="_blank" class="glass p-6 rounded-xl glass-hover transition duration-300 text-center">
831
+ <i data-lucide="book" class="w-8 h-8 text-blue-500 mx-auto mb-3"></i>
832
+ <h3 class="text-lg font-semibold text-white mb-1">Documentation</h3>
833
+ <p class="text-slate-400 text-sm">Official wiki</p>
834
+ </a>
835
+ </div>
836
+ </div>
837
+ </section>
838
+
839
+ <!-- Footer -->
840
+ <footer class="bg-slate-900 border-t border-slate-800 py-12">
841
+ <div class="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8">
842
+ <div class="flex flex-col md:flex-row justify-between items-center">
843
+ <div class="mb-4 md:mb-0">
844
+ <div class="flex items-center space-x-2 mb-2">
845
+ <i data-lucide="brain-circuit" class="w-6 h-6 text-primary"></i>
846
+ <span class="text-lg font-bold text-white">llama.cpp</span>
847
+ </div>
848
+ <p class="text-slate-400 text-sm">The ultimate guide to running LLMs locally</p>
849
+ </div>
850
+ <div class="flex space-x-6">
851
+ <a href="https://github.com/ggerganov/llama.cpp" target="_blank" class="text-slate-400 hover:text-white transition">
852
+ <i data-lucide="github" class="w-6 h-6"></i>
853
+ </a>
854
+ <a href="https://huggingface.co/TheBloke" target="_blank" class="text-slate-400 hover:text-white transition">
855
+ <i data-lucide="database" class="w-6 h-6"></i>
856
+ </a>
857
+ <a href="#" class="text-slate-400 hover:text-white transition">
858
+ <i data-lucide="twitter" class="w-6 h-6"></i>
859
+ </a>
860
+ </div>
861
+ </div>
862
+ <div class="mt-8 pt-8 border-t border-slate-800 text-center text-slate-500 text-sm">
863
+ <p>Not affiliated with Meta or Facebook. llama.cpp is created by Georgi Gerganov and community.</p>
864
+ </div>
865
+ </div>
866
+ </footer>
867
+
868
+ <script>
869
+ // Initialize Lucide icons
870
+ lucide.createIcons();
871
+
872
+ // Mobile menu toggle
873
+ const mobileMenuBtn = document.getElementById('mobile-menu-btn');
874
+ const mobileMenu = document.getElementById('mobile-menu');
875
+
876
+ mobileMenuBtn.addEventListener('click', () => {
877
+ mobileMenu.classList.toggle('hidden');
878
+ });
879
+
880
+ // Close mobile menu when clicking a link
881
+ mobileMenu.querySelectorAll('a').forEach(link => {
882
+ link.addEventListener('click', () => {
883
+ mobileMenu.classList.add('hidden');
884
+ });
885
+ });
886
+
887
+ // Copy code functionality
888
+ function copyCode(btn) {
889
+ const codeBlock = btn.parentElement.querySelector('code, pre');
890
+ const text = codeBlock.textContent;
891
+
892
+ navigator.clipboard.writeText(text).then(() => {
893
+ const originalIcon = btn.innerHTML;
894
+ btn.innerHTML = '<i data-lucide="check" class="w-4 h-4"></i>';
895
+ lucide.createIcons();
896
+
897
+ setTimeout(() => {
898
+ btn.innerHTML = originalIcon;
899
+ lucide.createIcons();
900
+ }, 2000);
901
+ });
902
+ }
903
+
904
+ // Smooth scroll for anchor links
905
+ document.querySelectorAll('a[href^="#"]').forEach(anchor => {
906
+ anchor.addEventListener('click', function (e) {
907
+ e.preventDefault();
908
+ const target = document.querySelector(this.getAttribute('href'));
909
+ if (target) {
910
+ target.scrollIntoView({ behavior: 'smooth', block: 'start' });
911
+ }
912
+ });
913
+ });
914
+ </script>
915
+ <script src="https://deepsite.hf.co/deepsite-badge.js"></script>
916
+ </body>
917
+ </html>