MukulRay commited on
Commit
00e4869
·
1 Parent(s): d0eeb73

feat: Groq backend, guardrails module, Sumeru UI redesign

Browse files
Files changed (6) hide show
  1. guardrails.py +100 -0
  2. index.html +441 -341
  3. main.py +46 -19
  4. pyvenv.cfg +5 -0
  5. rag.py +83 -57
  6. requirements.txt +6 -3
guardrails.py ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import logging
3
+
4
+ from sentence_transformers import SentenceTransformer, util
5
+
6
+ logger = logging.getLogger(__name__)
7
+
8
+ EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
9
+
10
+ GENSHIN_ANCHORS = [
11
+ "Genshin Impact character build",
12
+ "elemental reaction damage",
13
+ "Archon quest lore",
14
+ "artifact set bonus",
15
+ "team composition synergy",
16
+ "vision and gnosis",
17
+ "Mondstadt Liyue Inazuma Sumeru Fontaine Natlan",
18
+ "polearm sword claymore bow catalyst",
19
+ "Pyro Hydro Cryo Electro Anemo Geo Dendro",
20
+ "Genshin resin domain spiral abyss",
21
+ "Hu Tao Zhongli Venti Kazuha Raiden Shogun",
22
+ "Neuvillette Furina Arlecchino Navia Wriothesley",
23
+ "who is this Genshin character",
24
+ "best build for Genshin character",
25
+ "Genshin lore story explained",
26
+ "where to find character in Genshin",
27
+ ]
28
+
29
+ INJECTION_PATTERNS = [
30
+ "ignore previous instructions",
31
+ "ignore your instructions",
32
+ "you are now",
33
+ "pretend you are",
34
+ "act as",
35
+ "forget everything",
36
+ "jailbreak",
37
+ "dan mode",
38
+ "do anything now",
39
+ ]
40
+
41
+ _embedder = None
42
+ _anchor_embeddings = None
43
+
44
+
45
+ def _get_embedder():
46
+ global _embedder, _anchor_embeddings
47
+ if _embedder is None:
48
+ _embedder = SentenceTransformer(EMBED_MODEL)
49
+ _anchor_embeddings = _embedder.encode(
50
+ GENSHIN_ANCHORS, convert_to_tensor=True
51
+ )
52
+ return _embedder, _anchor_embeddings
53
+
54
+
55
+ def is_in_domain(query: str, threshold: float = 0.15) -> bool:
56
+ try:
57
+ embedder, anchors = _get_embedder()
58
+ query_vec = embedder.encode(query, convert_to_tensor=True)
59
+ score = util.cos_sim(query_vec, anchors).max().item()
60
+ logger.info(f"Domain score: {score:.3f}")
61
+ return score >= threshold
62
+ except Exception as e:
63
+ logger.warning(f"Domain check failed ({e}) — passing query through")
64
+ return True
65
+
66
+
67
+ def has_injection(query: str) -> bool:
68
+ q = query.lower()
69
+ return any(p in q for p in INJECTION_PATTERNS)
70
+
71
+
72
+ def validate_input(query: str) -> tuple[bool, str]:
73
+ query = query.strip()
74
+
75
+ if not query:
76
+ return False, "Query cannot be empty."
77
+
78
+ if len(query) > 500:
79
+ return False, "Query is too long — please keep it under 500 characters."
80
+
81
+ if has_injection(query):
82
+ return False, "That type of query isn't something I can help with."
83
+
84
+ if not is_in_domain(query):
85
+ return (
86
+ False,
87
+ "I'm specialized in Genshin Impact — ask me about characters, "
88
+ "builds, lore, or elemental mechanics!",
89
+ )
90
+
91
+ return True, ""
92
+
93
+
94
+ def validate_output(answer: str) -> tuple[bool, str]:
95
+ answer = answer.strip().replace("</s>", "").strip()
96
+
97
+ if len(answer) < 10:
98
+ return False, "I couldn't find enough context to answer that — try rephrasing."
99
+
100
+ return True, answer
index.html CHANGED
@@ -3,26 +3,32 @@
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>LLMOpsLlama 3.1 RAG</title>
7
  <link rel="preconnect" href="https://fonts.googleapis.com">
8
- <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@300;400;500&family=Syne:wght@400;600;700&display=swap" rel="stylesheet">
9
  <style>
10
  :root {
11
- --bg: #0a0a0b;
12
- --surface: #111113;
13
- --surface2: #18181c;
14
- --border: #2a2a30;
15
- --border2: #3a3a42;
16
- --accent: #7c6af7;
17
- --accent2: #a89cf7;
18
- --green: #3dd68c;
19
- --red: #f76a6a;
20
- --amber: #f7c26a;
21
- --text: #e8e8f0;
22
- --text2: #8888a0;
23
- --text3: #555568;
24
- --mono: 'JetBrains Mono', monospace;
25
- --sans: 'Syne', sans-serif;
 
 
 
 
 
 
26
  }
27
 
28
  * { box-sizing: border-box; margin: 0; padding: 0; }
@@ -30,442 +36,447 @@
30
  body {
31
  background: var(--bg);
32
  color: var(--text);
33
- font-family: var(--sans);
34
  min-height: 100vh;
35
  display: flex;
36
  flex-direction: column;
 
37
  }
38
 
39
- /* subtle grid background */
40
- body::before {
41
- content: '';
 
42
  position: fixed;
43
- inset: 0;
44
- background-image:
45
- linear-gradient(rgba(124,106,247,0.03) 1px, transparent 1px),
46
- linear-gradient(90deg, rgba(124,106,247,0.03) 1px, transparent 1px);
47
- background-size: 40px 40px;
48
  pointer-events: none;
49
  z-index: 0;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
 
52
  header {
53
- position: relative;
54
- z-index: 1;
55
- padding: 28px 40px 24px;
56
- border-bottom: 1px solid var(--border);
57
  display: flex;
58
  align-items: center;
59
  justify-content: space-between;
 
 
 
60
  }
61
 
62
- .logo {
63
- display: flex;
64
- align-items: center;
65
- gap: 14px;
66
- }
67
 
68
  .logo-mark {
69
- width: 36px;
70
- height: 36px;
71
- border-radius: 8px;
72
- background: var(--accent);
73
- display: flex;
74
- align-items: center;
75
- justify-content: center;
76
- font-family: var(--mono);
77
- font-size: 14px;
78
- font-weight: 500;
79
- color: #fff;
80
- letter-spacing: -0.5px;
 
81
  }
82
 
83
  .logo-text {
84
- font-size: 16px;
85
- font-weight: 700;
86
- color: var(--text);
87
- letter-spacing: -0.3px;
88
  }
89
 
90
  .logo-sub {
91
- font-size: 11px;
92
- color: var(--text3);
93
  font-family: var(--mono);
94
- margin-top: 1px;
95
  }
96
 
97
  .status-pill {
98
- display: flex;
99
- align-items: center;
100
- gap: 7px;
101
- padding: 6px 12px;
102
- border-radius: 20px;
103
  border: 1px solid var(--border);
104
- background: var(--surface);
105
- font-family: var(--mono);
106
- font-size: 11px;
107
- color: var(--text2);
108
  }
109
 
110
  .status-dot {
111
- width: 7px;
112
- height: 7px;
113
- border-radius: 50%;
114
- background: var(--text3);
115
- transition: background 0.3s;
116
  }
117
- .status-dot.online { background: var(--green); box-shadow: 0 0 6px var(--green); }
118
- .status-dot.error { background: var(--red); }
119
  .status-dot.loading { background: var(--amber); animation: pulse 1s ease-in-out infinite; }
120
-
121
- @keyframes pulse { 0%,100%{opacity:1} 50%{opacity:0.4} }
122
 
123
  main {
124
- position: relative;
125
- z-index: 1;
126
- flex: 1;
127
- display: flex;
128
- flex-direction: column;
129
- max-width: 860px;
130
- width: 100%;
131
- margin: 0 auto;
132
- padding: 40px 40px 0;
133
  }
134
 
 
135
  .hero {
136
- margin-bottom: 40px;
137
- animation: fadeUp 0.5s ease both;
 
138
  }
139
 
140
  @keyframes fadeUp {
141
- from { opacity:0; transform: translateY(12px); }
142
  to { opacity:1; transform: translateY(0); }
143
  }
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  .hero h1 {
146
- font-size: 32px;
147
- font-weight: 700;
148
- color: var(--text);
149
- letter-spacing: -0.8px;
150
- line-height: 1.2;
151
- margin-bottom: 10px;
152
  }
153
 
154
- .hero h1 span { color: var(--accent2); }
 
 
 
 
155
 
156
- .hero p {
157
- font-size: 14px;
158
- color: var(--text2);
159
- font-family: var(--mono);
160
- line-height: 1.6;
161
  }
162
 
163
  .model-tag {
164
- display: inline-flex;
165
- align-items: center;
166
- gap: 6px;
167
- margin-top: 12px;
168
- padding: 4px 10px;
169
- border-radius: 4px;
170
- border: 1px solid var(--border2);
171
- background: var(--surface2);
172
- font-family: var(--mono);
173
- font-size: 11px;
174
- color: var(--accent2);
175
  }
176
 
177
- /* Query input area */
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  .query-box {
179
  background: var(--surface);
180
  border: 1px solid var(--border);
181
- border-radius: 12px;
182
- padding: 0;
183
  overflow: hidden;
184
- transition: border-color 0.2s;
185
- animation: fadeUp 0.5s 0.1s ease both;
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  }
 
 
187
 
188
  .query-box:focus-within {
189
- border-color: var(--accent);
 
190
  }
191
 
192
  .query-label {
193
- padding: 12px 16px 0;
194
- font-family: var(--mono);
195
- font-size: 11px;
196
- color: var(--text3);
197
- letter-spacing: 0.05em;
198
- text-transform: uppercase;
199
  }
200
 
201
  textarea {
202
- width: 100%;
203
- background: transparent;
204
- border: none;
205
- outline: none;
206
- resize: none;
207
- padding: 10px 16px 14px;
208
- font-family: var(--mono);
209
- font-size: 14px;
210
- color: var(--text);
211
- line-height: 1.6;
212
- min-height: 90px;
213
- caret-color: var(--accent);
214
  }
215
 
216
- textarea::placeholder { color: var(--text3); }
217
 
218
  .query-footer {
219
- display: flex;
220
- align-items: center;
221
  justify-content: space-between;
222
- padding: 10px 14px;
223
  border-top: 1px solid var(--border);
224
  background: var(--surface2);
225
  }
226
 
227
  .top-k-wrap {
228
- display: flex;
229
- align-items: center;
230
- gap: 8px;
231
- font-family: var(--mono);
232
- font-size: 12px;
233
- color: var(--text2);
234
  }
235
 
236
  .top-k-wrap select {
237
- background: var(--surface);
238
  border: 1px solid var(--border2);
239
- border-radius: 4px;
240
- color: var(--text);
241
- font-family: var(--mono);
242
- font-size: 12px;
243
- padding: 3px 8px;
244
- cursor: pointer;
245
- outline: none;
246
  }
247
 
248
  .send-btn {
249
- display: flex;
250
- align-items: center;
251
- gap: 8px;
252
- padding: 8px 18px;
253
- background: var(--accent);
254
- border: none;
255
- border-radius: 6px;
256
- color: #fff;
257
- font-family: var(--sans);
258
- font-size: 13px;
259
- font-weight: 600;
260
  cursor: pointer;
261
- transition: background 0.15s, transform 0.1s;
262
- letter-spacing: 0.2px;
 
263
  }
264
 
265
- .send-btn:hover { background: var(--accent2); }
266
- .send-btn:active { transform: scale(0.97); }
267
- .send-btn:disabled { background: var(--border2); color: var(--text3); cursor: not-allowed; transform: none; }
 
 
 
268
 
269
- .send-btn .arrow { font-size: 14px; transition: transform 0.15s; }
270
- .send-btn:not(:disabled):hover .arrow { transform: translateX(3px); }
 
 
 
 
 
271
 
272
- /* Response area */
273
  #response-area {
274
  margin-top: 28px;
275
- animation: fadeUp 0.4s ease both;
276
  display: none;
277
  }
278
-
279
  #response-area.visible { display: block; }
280
 
281
  .response-card {
282
  background: var(--surface);
283
  border: 1px solid var(--border);
284
- border-radius: 12px;
285
- overflow: hidden;
286
  }
287
 
288
  .response-header {
289
- display: flex;
290
- align-items: center;
291
  justify-content: space-between;
292
- padding: 12px 16px;
293
  border-bottom: 1px solid var(--border);
294
  background: var(--surface2);
295
  }
296
 
297
  .response-label {
298
- font-family: var(--mono);
299
- font-size: 11px;
300
- color: var(--text3);
301
- text-transform: uppercase;
302
- letter-spacing: 0.05em;
303
- display: flex;
304
- align-items: center;
305
- gap: 7px;
306
  }
307
 
308
  .response-label .dot {
309
  width: 6px; height: 6px; border-radius: 50%;
310
- background: var(--green);
 
 
 
 
 
 
311
  }
312
 
313
  .latency-tag {
314
- font-family: var(--mono);
315
- font-size: 11px;
316
- color: var(--text3);
317
- padding: 2px 8px;
318
- border-radius: 3px;
319
- border: 1px solid var(--border);
320
  }
321
 
322
  .response-body {
323
- padding: 20px;
324
- font-size: 15px;
325
- line-height: 1.75;
326
- color: var(--text);
327
- min-height: 60px;
328
- white-space: pre-wrap;
329
- word-break: break-word;
330
  }
331
 
332
- /* loading state */
333
  .thinking {
334
- display: flex;
335
- align-items: center;
336
- gap: 10px;
337
- padding: 20px;
338
- font-family: var(--mono);
339
- font-size: 13px;
340
- color: var(--text2);
341
  }
342
 
343
  .thinking-dots span {
344
  display: inline-block;
345
- width: 5px; height: 5px;
346
- border-radius: 50%;
347
- background: var(--accent);
348
- margin: 0 2px;
349
- animation: bounce 1.2s ease-in-out infinite;
 
 
 
 
 
350
  }
351
- .thinking-dots span:nth-child(2) { animation-delay: 0.2s; }
352
- .thinking-dots span:nth-child(3) { animation-delay: 0.4s; }
353
- @keyframes bounce { 0%,80%,100%{transform:translateY(0)} 40%{transform:translateY(-6px)} }
354
 
355
  .sources-section {
356
  border-top: 1px solid var(--border);
357
- padding: 12px 20px;
358
  background: var(--surface2);
359
  }
360
 
361
  .sources-label {
362
- font-family: var(--mono);
363
- font-size: 11px;
364
- color: var(--text3);
365
- text-transform: uppercase;
366
- letter-spacing: 0.05em;
367
- margin-bottom: 8px;
368
  }
369
 
370
  .source-chip {
371
- display: inline-flex;
372
- align-items: center;
373
- gap: 5px;
374
- padding: 3px 9px;
375
- border-radius: 4px;
376
- border: 1px solid var(--border2);
377
- background: var(--surface);
378
- font-family: var(--mono);
379
- font-size: 11px;
380
- color: var(--accent2);
381
- margin: 3px 4px 3px 0;
382
  }
383
 
384
  .no-sources {
385
- font-family: var(--mono);
386
- font-size: 12px;
387
- color: var(--text3);
388
- font-style: italic;
389
  }
390
 
391
  .error-card {
392
- background: rgba(247,106,106,0.06);
393
- border: 1px solid rgba(247,106,106,0.25);
394
- border-radius: 8px;
395
- padding: 14px 16px;
396
- font-family: var(--mono);
397
- font-size: 13px;
398
- color: var(--red);
399
- margin-top: 16px;
400
- display: none;
401
  }
402
  .error-card.visible { display: block; }
403
 
404
- /* history */
405
  .history-section {
406
- margin-top: 32px;
407
- padding-bottom: 40px;
408
- animation: fadeUp 0.4s 0.15s ease both;
409
  }
410
 
411
  .history-label {
412
- font-family: var(--mono);
413
- font-size: 11px;
414
- color: var(--text3);
415
- text-transform: uppercase;
416
- letter-spacing: 0.05em;
417
- margin-bottom: 12px;
418
  }
419
 
420
  .history-item {
421
- background: var(--surface);
422
- border: 1px solid var(--border);
423
- border-radius: 8px;
424
- padding: 12px 16px;
425
- margin-bottom: 8px;
426
- cursor: pointer;
427
- transition: border-color 0.15s;
428
  }
429
 
430
- .history-item:hover { border-color: var(--border2); }
 
 
 
431
 
432
  .history-q {
433
- font-size: 13px;
434
- color: var(--text2);
435
- font-family: var(--mono);
436
- white-space: nowrap;
437
- overflow: hidden;
438
- text-overflow: ellipsis;
439
  }
440
 
441
  .history-meta {
442
- font-size: 11px;
443
- color: var(--text3);
444
- font-family: var(--mono);
445
- margin-top: 4px;
446
  }
447
 
448
  footer {
449
- position: relative;
450
- z-index: 1;
451
- text-align: center;
452
- padding: 20px;
453
- font-family: var(--mono);
454
- font-size: 11px;
455
- color: var(--text3);
456
  border-top: 1px solid var(--border);
 
 
457
  margin-top: auto;
458
  }
 
 
459
  </style>
460
  </head>
461
  <body>
462
 
 
 
 
 
 
463
  <header>
464
  <div class="logo">
465
- <div class="logo-mark">λ</div>
466
  <div>
467
- <div class="logo-text">LLMOps RAG</div>
468
- <div class="logo-sub">Llama 3.1 · QLoRA · Pinecone</div>
469
  </div>
470
  </div>
471
  <div class="status-pill">
@@ -476,18 +487,27 @@
476
 
477
  <main>
478
  <div class="hero">
479
- <h1>Ask your <span>fine-tuned</span> model</h1>
480
- <p>Llama 3.1 8B · QLoRA exp2_lr2e-4_r16 · RTX 3060 · 4-bit NF4</p>
 
 
 
481
  <div class="model-tag">
482
- <span>●</span> Retrieval-augmented · semantic search over ingested docs
483
  </div>
484
  </div>
485
 
 
 
 
 
 
 
486
  <div class="query-box">
487
- <div class="query-label">query</div>
488
  <textarea
489
  id="query-input"
490
- placeholder="What is QLoRA and how does it differ from full fine-tuning?"
491
  rows="3"
492
  ></textarea>
493
  <div class="query-footer">
@@ -499,10 +519,10 @@
499
  <option value="3" selected>3</option>
500
  <option value="5">5</option>
501
  </select>
502
- <span style="color:var(--text3)">retrieved chunks</span>
503
  </div>
504
  <button class="send-btn" id="send-btn" onclick="submitQuery()">
505
- Run query <span class="arrow"></span>
506
  </button>
507
  </div>
508
  </div>
@@ -514,29 +534,123 @@
514
  <div class="response-header">
515
  <div class="response-label">
516
  <div class="dot"></div>
517
- response
518
  </div>
519
  <div class="latency-tag" id="latency-tag">—</div>
520
  </div>
521
  <div id="response-body" class="response-body"></div>
522
  <div class="sources-section">
523
- <div class="sources-label">sources</div>
524
- <div id="sources-list"><span class="no-sources">no documents ingested yet</span></div>
525
  </div>
526
  </div>
527
  </div>
528
 
529
  <div class="history-section" id="history-section" style="display:none">
530
- <div class="history-label">recent queries</div>
531
  <div id="history-list"></div>
532
  </div>
533
  </main>
534
 
535
  <footer>
536
- running locally · http://localhost:8000 · <span id="footer-model">exp2_lr2e-4_r16</span>
537
  </footer>
538
 
539
  <script>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
540
  const API = 'http://localhost:8000';
541
  const history = [];
542
 
@@ -550,15 +664,15 @@ async function checkHealth() {
550
  const d = await r.json();
551
  if (d.model_loaded) {
552
  dot.className = 'status-dot online';
553
- txt.textContent = 'model ready';
554
  } else {
555
  dot.className = 'status-dot loading';
556
- txt.textContent = 'loading model...';
557
  setTimeout(checkHealth, 3000);
558
  }
559
  } catch {
560
  dot.className = 'status-dot error';
561
- txt.textContent = 'server offline';
562
  setTimeout(checkHealth, 5000);
563
  }
564
  }
@@ -567,29 +681,25 @@ async function submitQuery() {
567
  const query = document.getElementById('query-input').value.trim();
568
  if (!query) return;
569
 
570
- const top_k = parseInt(document.getElementById('top-k').value);
571
- const btn = document.getElementById('send-btn');
572
- const responseArea = document.getElementById('response-area');
573
- const responseBody = document.getElementById('response-body');
574
- const errorCard = document.getElementById('error-card');
575
- const latencyTag = document.getElementById('latency-tag');
576
- const sourcesList = document.getElementById('sources-list');
577
-
578
- // reset
579
- errorCard.className = 'error-card';
580
- responseArea.className = 'response-area visible';
581
- responseArea.style.display = 'block';
582
- latencyTag.textContent = '';
583
- sourcesList.innerHTML = '';
584
  btn.disabled = true;
585
 
586
- // show thinking
587
- responseBody.innerHTML = `
588
  <div class="thinking">
589
- <div class="thinking-dots">
590
- <span></span><span></span><span></span>
591
- </div>
592
- generating response...
593
  </div>`;
594
 
595
  try {
@@ -605,49 +715,41 @@ async function submitQuery() {
605
  }
606
 
607
  const data = await res.json();
 
608
 
609
- // render answer
610
- responseBody.textContent = data.answer;
611
-
612
- // latency
613
  const ms = Math.round(data.latency_ms);
614
- latencyTag.textContent = ms >= 1000 ? `${(ms/1000).toFixed(1)}s` : `${ms}ms`;
615
 
616
- // sources
617
  if (data.sources && data.sources.length > 0) {
618
- sourcesList.innerHTML = data.sources.map(s => {
619
  const name = s.split(/[\\/]/).pop();
620
- return `<span class="source-chip">📄 ${name}</span>`;
621
  }).join('');
622
  } else {
623
- sourcesList.innerHTML = '<span class="no-sources">no docs ingested — run ingest.py to add documents</span>';
624
  }
625
 
626
- // add to history
627
- addHistory(query, ms);
628
 
629
  } catch (err) {
630
- responseBody.innerHTML = '';
631
- responseArea.style.display = 'none';
632
- errorCard.className = 'error-card visible';
633
- errorCard.textContent = `Error: ${err.message}`;
634
  }
635
 
636
  btn.disabled = false;
637
  }
638
 
639
- function addHistory(query, latency_ms) {
640
  history.unshift({ query, latency_ms, time: new Date().toLocaleTimeString() });
641
  if (history.length > 5) history.pop();
642
 
643
- const section = document.getElementById('history-section');
644
- const list = document.getElementById('history-list');
645
-
646
- section.style.display = 'block';
647
- list.innerHTML = history.map((h, i) => `
648
  <div class="history-item" onclick="rerun(${i})">
649
  <div class="history-q">${h.query}</div>
650
- <div class="history-meta">${h.time} · ${Math.round(h.latency_ms/1000).toFixed(1)}s</div>
651
  </div>
652
  `).join('');
653
  }
@@ -657,7 +759,6 @@ function rerun(i) {
657
  submitQuery();
658
  }
659
 
660
- // Ctrl+Enter to submit
661
  document.getElementById('query-input').addEventListener('keydown', e => {
662
  if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) {
663
  e.preventDefault();
@@ -665,7 +766,6 @@ document.getElementById('query-input').addEventListener('keydown', e => {
665
  }
666
  });
667
 
668
- // check health on load
669
  checkHealth();
670
  </script>
671
  </body>
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>IrminsulGenshin Impact AI Assistant</title>
7
  <link rel="preconnect" href="https://fonts.googleapis.com">
8
+ <link href="https://fonts.googleapis.com/css2?family=Cinzel:wght@400;500;600&family=EB+Garamond:ital,wght@0,400;0,500;1,400&family=JetBrains+Mono:wght@300;400&display=swap" rel="stylesheet">
9
  <style>
10
  :root {
11
+ --bg: #060d08;
12
+ --bg2: #0a1410;
13
+ --surface: rgba(8,18,12,0.88);
14
+ --surface2: rgba(12,24,16,0.92);
15
+ --border: rgba(80,180,100,0.12);
16
+ --border2: rgba(80,180,100,0.22);
17
+ --dendro: #7ecb6a;
18
+ --dendro2: #a8e090;
19
+ --dendro3: #c8f0b0;
20
+ --parchment: #c8b888;
21
+ --parchment2:#e0d0a8;
22
+ --teal: #4ab890;
23
+ --text: #d8e8d0;
24
+ --text2: #7a9878;
25
+ --text3: #3a5038;
26
+ --green: #5dd68c;
27
+ --red: #e87878;
28
+ --amber: #d8a84a;
29
+ --mono: 'JetBrains Mono', monospace;
30
+ --serif: 'EB Garamond', serif;
31
+ --display: 'Cinzel', serif;
32
  }
33
 
34
  * { box-sizing: border-box; margin: 0; padding: 0; }
 
36
  body {
37
  background: var(--bg);
38
  color: var(--text);
39
+ font-family: var(--serif);
40
  min-height: 100vh;
41
  display: flex;
42
  flex-direction: column;
43
+ overflow-x: hidden;
44
  }
45
 
46
+ canvas#bg { position: fixed; inset: 0; z-index: 0; pointer-events: none; }
47
+
48
+ /* deep forest atmospheric glow */
49
+ .atmo {
50
  position: fixed;
51
+ border-radius: 50%;
 
 
 
 
52
  pointer-events: none;
53
  z-index: 0;
54
+ filter: blur(80px);
55
+ }
56
+ .atmo-1 {
57
+ width: 700px; height: 500px;
58
+ top: -100px; left: -100px;
59
+ background: radial-gradient(ellipse, rgba(40,100,50,0.18) 0%, transparent 70%);
60
+ }
61
+ .atmo-2 {
62
+ width: 500px; height: 400px;
63
+ top: 30%; right: -80px;
64
+ background: radial-gradient(ellipse, rgba(60,160,80,0.1) 0%, transparent 70%);
65
+ }
66
+ .atmo-3 {
67
+ width: 600px; height: 300px;
68
+ bottom: 0; left: 20%;
69
+ background: radial-gradient(ellipse, rgba(30,80,40,0.14) 0%, transparent 70%);
70
  }
71
 
72
  header {
73
+ position: relative; z-index: 10;
74
+ padding: 20px 48px;
 
 
75
  display: flex;
76
  align-items: center;
77
  justify-content: space-between;
78
+ border-bottom: 1px solid var(--border);
79
+ backdrop-filter: blur(20px);
80
+ background: rgba(6,13,8,0.7);
81
  }
82
 
83
+ .logo { display: flex; align-items: center; gap: 14px; }
 
 
 
 
84
 
85
  .logo-mark {
86
+ width: 38px; height: 38px;
87
+ border-radius: 50%;
88
+ border: 1px solid rgba(126,203,106,0.3);
89
+ background: radial-gradient(circle at 40% 35%, rgba(80,180,80,0.15), rgba(6,13,8,0.95));
90
+ display: flex; align-items: center; justify-content: center;
91
+ font-size: 18px;
92
+ box-shadow: 0 0 16px rgba(80,200,80,0.12), inset 0 0 12px rgba(80,200,80,0.06);
93
+ animation: runeGlow 3s ease-in-out infinite;
94
+ }
95
+
96
+ @keyframes runeGlow {
97
+ 0%,100% { box-shadow: 0 0 16px rgba(80,200,80,0.12), inset 0 0 12px rgba(80,200,80,0.06); }
98
+ 50% { box-shadow: 0 0 28px rgba(80,200,80,0.25), inset 0 0 16px rgba(80,200,80,0.12); }
99
  }
100
 
101
  .logo-text {
102
+ font-family: var(--display);
103
+ font-size: 17px; font-weight: 500;
104
+ color: var(--dendro2);
105
+ letter-spacing: 0.08em;
106
  }
107
 
108
  .logo-sub {
109
+ font-size: 10px; color: var(--text3);
 
110
  font-family: var(--mono);
111
+ margin-top: 2px; letter-spacing: 0.04em;
112
  }
113
 
114
  .status-pill {
115
+ display: flex; align-items: center; gap: 7px;
116
+ padding: 5px 13px; border-radius: 20px;
 
 
 
117
  border: 1px solid var(--border);
118
+ background: rgba(8,18,12,0.8);
119
+ font-family: var(--mono); font-size: 11px; color: var(--text2);
120
+ backdrop-filter: blur(8px);
 
121
  }
122
 
123
  .status-dot {
124
+ width: 6px; height: 6px; border-radius: 50%;
125
+ background: var(--text3); transition: background 0.3s;
 
 
 
126
  }
127
+ .status-dot.online { background: var(--green); box-shadow: 0 0 7px var(--green); }
128
+ .status-dot.error { background: var(--red); }
129
  .status-dot.loading { background: var(--amber); animation: pulse 1s ease-in-out infinite; }
130
+ @keyframes pulse { 0%,100%{opacity:1} 50%{opacity:0.3} }
 
131
 
132
  main {
133
+ position: relative; z-index: 10;
134
+ flex: 1; display: flex; flex-direction: column;
135
+ max-width: 800px; width: 100%;
136
+ margin: 0 auto; padding: 52px 40px 0;
 
 
 
 
 
137
  }
138
 
139
+ /* parchment scroll hero */
140
  .hero {
141
+ margin-bottom: 44px;
142
+ text-align: center;
143
+ animation: fadeUp 0.7s ease both;
144
  }
145
 
146
  @keyframes fadeUp {
147
+ from { opacity:0; transform: translateY(14px); }
148
  to { opacity:1; transform: translateY(0); }
149
  }
150
 
151
+ .rune-row {
152
+ display: flex; align-items: center; justify-content: center;
153
+ gap: 12px; margin-bottom: 18px;
154
+ font-size: 14px; color: var(--dendro);
155
+ opacity: 0.6; letter-spacing: 0.3em;
156
+ }
157
+
158
+ .rune-row span { animation: runeFlicker 4s ease-in-out infinite; }
159
+ .rune-row span:nth-child(2) { animation-delay: 0.8s; }
160
+ .rune-row span:nth-child(3) { animation-delay: 1.6s; }
161
+ @keyframes runeFlicker {
162
+ 0%,100%{opacity:0.5} 50%{opacity:1;text-shadow:0 0 8px var(--dendro);}
163
+ }
164
+
165
  .hero h1 {
166
+ font-family: var(--display);
167
+ font-size: 40px; font-weight: 500;
168
+ color: var(--parchment2);
169
+ letter-spacing: 0.06em; line-height: 1.2;
170
+ margin-bottom: 8px;
171
+ text-shadow: 0 0 60px rgba(100,200,80,0.15);
172
  }
173
 
174
+ .hero h1 em {
175
+ font-style: normal;
176
+ color: var(--dendro2);
177
+ text-shadow: 0 0 30px rgba(126,203,106,0.5);
178
+ }
179
 
180
+ .hero-sub {
181
+ font-size: 16px; font-style: italic;
182
+ color: var(--text2); line-height: 1.75;
183
+ max-width: 480px; margin: 0 auto 20px;
 
184
  }
185
 
186
  .model-tag {
187
+ display: inline-flex; align-items: center; gap: 8px;
188
+ padding: 5px 16px; border-radius: 3px;
189
+ border: 1px solid rgba(126,203,106,0.2);
190
+ background: rgba(40,80,30,0.2);
191
+ font-family: var(--mono); font-size: 11px;
192
+ color: var(--dendro); letter-spacing: 0.04em;
 
 
 
 
 
193
  }
194
 
195
+ /* ornamental divider */
196
+ .ornament {
197
+ display: flex; align-items: center;
198
+ gap: 12px; margin-bottom: 36px; opacity: 0.25;
199
+ }
200
+ .ornament-line {
201
+ flex: 1; height: 1px;
202
+ background: linear-gradient(90deg, transparent, var(--dendro), transparent);
203
+ }
204
+ .ornament-glyph {
205
+ font-size: 12px; color: var(--dendro);
206
+ letter-spacing: 4px;
207
+ }
208
+
209
+ /* query box — parchment card feel */
210
  .query-box {
211
  background: var(--surface);
212
  border: 1px solid var(--border);
213
+ border-radius: 4px;
 
214
  overflow: hidden;
215
+ transition: border-color 0.3s, box-shadow 0.3s;
216
+ animation: fadeUp 0.7s 0.1s ease both;
217
+ backdrop-filter: blur(20px);
218
+ position: relative;
219
+ }
220
+
221
+ /* corner rune accents */
222
+ .query-box::before,
223
+ .query-box::after {
224
+ content: '✦';
225
+ position: absolute;
226
+ font-size: 10px;
227
+ color: var(--dendro);
228
+ opacity: 0.3;
229
+ top: 8px;
230
  }
231
+ .query-box::before { left: 10px; }
232
+ .query-box::after { right: 10px; }
233
 
234
  .query-box:focus-within {
235
+ border-color: rgba(126,203,106,0.35);
236
+ box-shadow: 0 0 32px rgba(80,180,60,0.08);
237
  }
238
 
239
  .query-label {
240
+ padding: 14px 18px 0;
241
+ font-family: var(--display);
242
+ font-size: 9px; color: var(--text3);
243
+ letter-spacing: 0.18em; text-transform: uppercase;
 
 
244
  }
245
 
246
  textarea {
247
+ width: 100%; background: transparent;
248
+ border: none; outline: none; resize: none;
249
+ padding: 10px 18px 16px;
250
+ font-family: var(--serif); font-size: 15px;
251
+ color: var(--text); line-height: 1.75;
252
+ min-height: 100px; caret-color: var(--dendro);
 
 
 
 
 
 
253
  }
254
 
255
+ textarea::placeholder { color: var(--text3); font-style: italic; }
256
 
257
  .query-footer {
258
+ display: flex; align-items: center;
 
259
  justify-content: space-between;
260
+ padding: 10px 16px;
261
  border-top: 1px solid var(--border);
262
  background: var(--surface2);
263
  }
264
 
265
  .top-k-wrap {
266
+ display: flex; align-items: center;
267
+ gap: 8px; font-family: var(--mono);
268
+ font-size: 11px; color: var(--text2);
 
 
 
269
  }
270
 
271
  .top-k-wrap select {
272
+ background: rgba(6,13,8,0.8);
273
  border: 1px solid var(--border2);
274
+ border-radius: 3px; color: var(--text);
275
+ font-family: var(--mono); font-size: 11px;
276
+ padding: 3px 8px; cursor: pointer; outline: none;
 
 
 
 
277
  }
278
 
279
  .send-btn {
280
+ display: flex; align-items: center; gap: 8px;
281
+ padding: 9px 22px;
282
+ background: linear-gradient(135deg, rgba(60,140,50,0.8), rgba(40,100,35,0.9));
283
+ border: 1px solid rgba(126,203,106,0.3);
284
+ border-radius: 3px;
285
+ color: var(--dendro3);
286
+ font-family: var(--display);
287
+ font-size: 11px; font-weight: 500;
 
 
 
288
  cursor: pointer;
289
+ transition: all 0.2s;
290
+ letter-spacing: 0.1em;
291
+ box-shadow: 0 2px 20px rgba(60,180,50,0.15);
292
  }
293
 
294
+ .send-btn:hover {
295
+ background: linear-gradient(135deg, rgba(80,160,60,0.9), rgba(55,120,45,0.95));
296
+ box-shadow: 0 4px 28px rgba(60,200,60,0.25);
297
+ border-color: rgba(126,203,106,0.5);
298
+ transform: translateY(-1px);
299
+ }
300
 
301
+ .send-btn:active { transform: scale(0.97); }
302
+ .send-btn:disabled {
303
+ background: rgba(20,35,20,0.7);
304
+ color: var(--text3); cursor: not-allowed;
305
+ transform: none; box-shadow: none;
306
+ border-color: var(--border);
307
+ }
308
 
 
309
  #response-area {
310
  margin-top: 28px;
311
+ animation: fadeUp 0.5s ease both;
312
  display: none;
313
  }
 
314
  #response-area.visible { display: block; }
315
 
316
  .response-card {
317
  background: var(--surface);
318
  border: 1px solid var(--border);
319
+ border-radius: 4px; overflow: hidden;
320
+ backdrop-filter: blur(20px);
321
  }
322
 
323
  .response-header {
324
+ display: flex; align-items: center;
 
325
  justify-content: space-between;
326
+ padding: 11px 18px;
327
  border-bottom: 1px solid var(--border);
328
  background: var(--surface2);
329
  }
330
 
331
  .response-label {
332
+ font-family: var(--display); font-size: 9px;
333
+ color: var(--text3); text-transform: uppercase;
334
+ letter-spacing: 0.14em;
335
+ display: flex; align-items: center; gap: 8px;
 
 
 
 
336
  }
337
 
338
  .response-label .dot {
339
  width: 6px; height: 6px; border-radius: 50%;
340
+ background: var(--dendro);
341
+ box-shadow: 0 0 8px var(--dendro);
342
+ animation: dendroFlicker 2s ease-in-out infinite;
343
+ }
344
+ @keyframes dendroFlicker {
345
+ 0%,100%{opacity:1;box-shadow:0 0 8px var(--dendro);}
346
+ 50%{opacity:0.6;box-shadow:0 0 4px var(--dendro);}
347
  }
348
 
349
  .latency-tag {
350
+ font-family: var(--mono); font-size: 11px;
351
+ color: var(--text3); padding: 2px 8px;
352
+ border-radius: 2px; border: 1px solid var(--border);
 
 
 
353
  }
354
 
355
  .response-body {
356
+ padding: 22px 24px;
357
+ font-size: 15.5px; line-height: 1.9;
358
+ color: var(--text); font-family: var(--serif);
359
+ white-space: pre-wrap; word-break: break-word;
 
 
 
360
  }
361
 
 
362
  .thinking {
363
+ display: flex; align-items: center; gap: 12px;
364
+ padding: 22px 24px;
365
+ font-family: var(--serif); font-size: 14px;
366
+ color: var(--text3); font-style: italic;
 
 
 
367
  }
368
 
369
  .thinking-dots span {
370
  display: inline-block;
371
+ width: 5px; height: 5px; border-radius: 50%;
372
+ background: var(--dendro); margin: 0 2px;
373
+ animation: dendroOrb 1.6s ease-in-out infinite;
374
+ opacity: 0.4;
375
+ }
376
+ .thinking-dots span:nth-child(2) { animation-delay: 0.25s; }
377
+ .thinking-dots span:nth-child(3) { animation-delay: 0.5s; }
378
+ @keyframes dendroOrb {
379
+ 0%,100%{opacity:0.3;transform:scale(0.9);}
380
+ 50%{opacity:1;transform:scale(1.3);box-shadow:0 0 6px var(--dendro);}
381
  }
 
 
 
382
 
383
  .sources-section {
384
  border-top: 1px solid var(--border);
385
+ padding: 12px 22px;
386
  background: var(--surface2);
387
  }
388
 
389
  .sources-label {
390
+ font-family: var(--display); font-size: 9px;
391
+ color: var(--text3); text-transform: uppercase;
392
+ letter-spacing: 0.14em; margin-bottom: 8px;
 
 
 
393
  }
394
 
395
  .source-chip {
396
+ display: inline-flex; align-items: center; gap: 5px;
397
+ padding: 3px 10px; border-radius: 2px;
398
+ border: 1px solid rgba(80,180,80,0.18);
399
+ background: rgba(40,80,30,0.15);
400
+ font-family: var(--mono); font-size: 11px;
401
+ color: var(--dendro); margin: 3px 4px 3px 0;
 
 
 
 
 
402
  }
403
 
404
  .no-sources {
405
+ font-family: var(--mono); font-size: 11px;
406
+ color: var(--text3); font-style: italic;
 
 
407
  }
408
 
409
  .error-card {
410
+ background: rgba(232,120,120,0.05);
411
+ border: 1px solid rgba(232,120,120,0.18);
412
+ border-radius: 4px; padding: 14px 18px;
413
+ font-family: var(--mono); font-size: 12px;
414
+ color: var(--red); margin-top: 16px; display: none;
 
 
 
 
415
  }
416
  .error-card.visible { display: block; }
417
 
 
418
  .history-section {
419
+ margin-top: 32px; padding-bottom: 48px;
420
+ animation: fadeUp 0.5s ease both;
 
421
  }
422
 
423
  .history-label {
424
+ font-family: var(--display); font-size: 9px;
425
+ color: var(--text3); text-transform: uppercase;
426
+ letter-spacing: 0.14em; margin-bottom: 12px;
 
 
 
427
  }
428
 
429
  .history-item {
430
+ background: var(--surface); border: 1px solid var(--border);
431
+ border-radius: 3px; padding: 11px 16px;
432
+ margin-bottom: 6px; cursor: pointer;
433
+ transition: border-color 0.2s, background 0.2s;
434
+ backdrop-filter: blur(8px);
 
 
435
  }
436
 
437
+ .history-item:hover {
438
+ border-color: rgba(80,180,80,0.28);
439
+ background: rgba(30,60,25,0.3);
440
+ }
441
 
442
  .history-q {
443
+ font-size: 13.5px; color: var(--text2);
444
+ font-family: var(--serif);
445
+ white-space: nowrap; overflow: hidden; text-overflow: ellipsis;
 
 
 
446
  }
447
 
448
  .history-meta {
449
+ font-size: 10px; color: var(--text3);
450
+ font-family: var(--mono); margin-top: 4px;
 
 
451
  }
452
 
453
  footer {
454
+ position: relative; z-index: 10;
455
+ text-align: center; padding: 18px;
456
+ font-family: var(--mono); font-size: 10px;
457
+ color: var(--text3); letter-spacing: 0.06em;
 
 
 
458
  border-top: 1px solid var(--border);
459
+ backdrop-filter: blur(8px);
460
+ background: rgba(6,13,8,0.6);
461
  margin-top: auto;
462
  }
463
+
464
+ footer span { color: var(--dendro); opacity: 0.6; }
465
  </style>
466
  </head>
467
  <body>
468
 
469
+ <canvas id="bg"></canvas>
470
+ <div class="atmo atmo-1"></div>
471
+ <div class="atmo atmo-2"></div>
472
+ <div class="atmo atmo-3"></div>
473
+
474
  <header>
475
  <div class="logo">
476
+ <div class="logo-mark"></div>
477
  <div>
478
+ <div class="logo-text">Irminsul</div>
479
+ <div class="logo-sub">Llama 3.1 · QLoRA · Pinecone RAG</div>
480
  </div>
481
  </div>
482
  <div class="status-pill">
 
487
 
488
  <main>
489
  <div class="hero">
490
+ <div class="rune-row">
491
+ <span></span><span>✦</span><span>᪥</span>
492
+ </div>
493
+ <h1>The <em>Akasha</em> Speaks</h1>
494
+ <p class="hero-sub">All knowledge flows through the tree. Ask of Teyvat's lore, its people, their battles, and the elements that bind this world.</p>
495
  <div class="model-tag">
496
+ &nbsp; Dendro-augmented retrieval · Genshin Impact corpus
497
  </div>
498
  </div>
499
 
500
+ <div class="ornament">
501
+ <div class="ornament-line"></div>
502
+ <div class="ornament-glyph">✦ ᪥ ✦</div>
503
+ <div class="ornament-line"></div>
504
+ </div>
505
+
506
  <div class="query-box">
507
+ <div class="query-label">Inscribe your query</div>
508
  <textarea
509
  id="query-input"
510
+ placeholder="What does the Irminsul record of Nahida's imprisonment reveal..."
511
  rows="3"
512
  ></textarea>
513
  <div class="query-footer">
 
519
  <option value="3" selected>3</option>
520
  <option value="5">5</option>
521
  </select>
522
+ <span style="color:var(--text3)">retrieved branches</span>
523
  </div>
524
  <button class="send-btn" id="send-btn" onclick="submitQuery()">
525
+ Query the Tree
526
  </button>
527
  </div>
528
  </div>
 
534
  <div class="response-header">
535
  <div class="response-label">
536
  <div class="dot"></div>
537
+ irminsul responds
538
  </div>
539
  <div class="latency-tag" id="latency-tag">—</div>
540
  </div>
541
  <div id="response-body" class="response-body"></div>
542
  <div class="sources-section">
543
+ <div class="sources-label">branches consulted</div>
544
+ <div id="sources-list"><span class="no-sources">no records ingested</span></div>
545
  </div>
546
  </div>
547
  </div>
548
 
549
  <div class="history-section" id="history-section" style="display:none">
550
+ <div class="history-label">Recent queries</div>
551
  <div id="history-list"></div>
552
  </div>
553
  </main>
554
 
555
  <footer>
556
+ Irminsul &nbsp;·&nbsp; <span>Genshin Impact AI Assistant</span> &nbsp;·&nbsp; exp2_lr2e-4_r16 + Groq
557
  </footer>
558
 
559
  <script>
560
+ // ── Floating spore particles + Dendro wisps ────────────────────────────────────
561
+ (function() {
562
+ const canvas = document.getElementById('bg');
563
+ const ctx = canvas.getContext('2d');
564
+ let W, H, particles = [], wisps = [];
565
+
566
+ function resize() {
567
+ W = canvas.width = window.innerWidth;
568
+ H = canvas.height = window.innerHeight;
569
+ }
570
+
571
+ function rand(a, b) { return a + Math.random() * (b - a); }
572
+
573
+ function initParticles() {
574
+ particles = [];
575
+ const n = Math.floor(W * H / 8000);
576
+ for (let i = 0; i < n; i++) {
577
+ particles.push({
578
+ x: rand(0, W), y: rand(0, H),
579
+ r: rand(0.8, 2.5),
580
+ vx: rand(-0.15, 0.15),
581
+ vy: rand(-0.4, -0.1),
582
+ alpha: rand(0.1, 0.5),
583
+ flicker: rand(0.002, 0.006),
584
+ phase: rand(0, Math.PI * 2),
585
+ hue: rand(100, 150),
586
+ });
587
+ }
588
+ }
589
+
590
+ function initWisps() {
591
+ wisps = [];
592
+ for (let i = 0; i < 6; i++) {
593
+ wisps.push({
594
+ x: rand(0, W), y: rand(H * 0.3, H * 0.9),
595
+ r: rand(60, 140),
596
+ alpha: rand(0.02, 0.06),
597
+ speed: rand(0.0003, 0.0008),
598
+ phase: rand(0, Math.PI * 2),
599
+ hue: rand(110, 145),
600
+ });
601
+ }
602
+ }
603
+
604
+ function draw(t) {
605
+ ctx.clearRect(0, 0, W, H);
606
+
607
+ // Dendro wisps — soft glowing orbs drifting in the background
608
+ wisps.forEach(w => {
609
+ const a = w.alpha * (0.6 + 0.4 * Math.sin(t * w.speed * 1000 + w.phase));
610
+ const grd = ctx.createRadialGradient(w.x, w.y, 0, w.x, w.y, w.r);
611
+ grd.addColorStop(0, `hsla(${w.hue},70%,55%,${a})`);
612
+ grd.addColorStop(1, `hsla(${w.hue},70%,40%,0)`);
613
+ ctx.beginPath();
614
+ ctx.arc(w.x, w.y, w.r, 0, Math.PI * 2);
615
+ ctx.fillStyle = grd;
616
+ ctx.fill();
617
+ w.x += Math.sin(t * 0.0003 + w.phase) * 0.3;
618
+ w.y += Math.cos(t * 0.0002 + w.phase) * 0.2;
619
+ });
620
+
621
+ // spore particles — tiny glowing motes floating upward
622
+ particles.forEach(p => {
623
+ const a = p.alpha * (0.5 + 0.5 * Math.sin(t * p.flicker * 1000 + p.phase));
624
+ ctx.beginPath();
625
+ ctx.arc(p.x, p.y, p.r, 0, Math.PI * 2);
626
+ ctx.fillStyle = `hsla(${p.hue},80%,70%,${a})`;
627
+ ctx.fill();
628
+
629
+ // tiny glow ring on larger motes
630
+ if (p.r > 1.8) {
631
+ ctx.beginPath();
632
+ ctx.arc(p.x, p.y, p.r * 2.5, 0, Math.PI * 2);
633
+ ctx.fillStyle = `hsla(${p.hue},80%,70%,${a * 0.15})`;
634
+ ctx.fill();
635
+ }
636
+
637
+ p.x += p.vx + Math.sin(t * 0.0005 + p.phase) * 0.2;
638
+ p.y += p.vy;
639
+
640
+ if (p.y < -10) { p.y = H + 10; p.x = rand(0, W); }
641
+ if (p.x < -10) p.x = W + 10;
642
+ if (p.x > W + 10) p.x = -10;
643
+ });
644
+
645
+ requestAnimationFrame(draw);
646
+ }
647
+
648
+ window.addEventListener('resize', () => { resize(); initParticles(); initWisps(); });
649
+ resize(); initParticles(); initWisps();
650
+ requestAnimationFrame(draw);
651
+ })();
652
+
653
+ // ── API ────────────────────────────────────────────────────────────────────────
654
  const API = 'http://localhost:8000';
655
  const history = [];
656
 
 
664
  const d = await r.json();
665
  if (d.model_loaded) {
666
  dot.className = 'status-dot online';
667
+ txt.textContent = 'akasha linked';
668
  } else {
669
  dot.className = 'status-dot loading';
670
+ txt.textContent = 'loading...';
671
  setTimeout(checkHealth, 3000);
672
  }
673
  } catch {
674
  dot.className = 'status-dot error';
675
+ txt.textContent = 'offline';
676
  setTimeout(checkHealth, 5000);
677
  }
678
  }
 
681
  const query = document.getElementById('query-input').value.trim();
682
  if (!query) return;
683
 
684
+ const top_k = parseInt(document.getElementById('top-k').value);
685
+ const btn = document.getElementById('send-btn');
686
+ const respArea = document.getElementById('response-area');
687
+ const respBody = document.getElementById('response-body');
688
+ const errCard = document.getElementById('error-card');
689
+ const latTag = document.getElementById('latency-tag');
690
+ const srcList = document.getElementById('sources-list');
691
+
692
+ errCard.className = 'error-card';
693
+ respArea.style.display = 'block';
694
+ respArea.className = 'response-area visible';
695
+ latTag.textContent = '';
696
+ srcList.innerHTML = '';
 
697
  btn.disabled = true;
698
 
699
+ respBody.innerHTML = `
 
700
  <div class="thinking">
701
+ <div class="thinking-dots"><span></span><span></span><span></span></div>
702
+ searching the branches...
 
 
703
  </div>`;
704
 
705
  try {
 
715
  }
716
 
717
  const data = await res.json();
718
+ respBody.textContent = data.answer;
719
 
 
 
 
 
720
  const ms = Math.round(data.latency_ms);
721
+ latTag.textContent = ms >= 1000 ? `${(ms/1000).toFixed(1)}s` : `${ms}ms`;
722
 
 
723
  if (data.sources && data.sources.length > 0) {
724
+ srcList.innerHTML = data.sources.map(s => {
725
  const name = s.split(/[\\/]/).pop();
726
+ return `<span class="source-chip"> ${name}</span>`;
727
  }).join('');
728
  } else {
729
+ srcList.innerHTML = '<span class="no-sources">no records found — run ingest.py</span>';
730
  }
731
 
732
+ addToHistory(query, ms);
 
733
 
734
  } catch (err) {
735
+ respBody.innerHTML = '';
736
+ respArea.style.display = 'none';
737
+ errCard.className = 'error-card visible';
738
+ errCard.textContent = `Error: ${err.message}`;
739
  }
740
 
741
  btn.disabled = false;
742
  }
743
 
744
+ function addToHistory(query, latency_ms) {
745
  history.unshift({ query, latency_ms, time: new Date().toLocaleTimeString() });
746
  if (history.length > 5) history.pop();
747
 
748
+ document.getElementById('history-section').style.display = 'block';
749
+ document.getElementById('history-list').innerHTML = history.map((h, i) => `
 
 
 
750
  <div class="history-item" onclick="rerun(${i})">
751
  <div class="history-q">${h.query}</div>
752
+ <div class="history-meta">${h.time} · ${(h.latency_ms / 1000).toFixed(1)}s</div>
753
  </div>
754
  `).join('');
755
  }
 
759
  submitQuery();
760
  }
761
 
 
762
  document.getElementById('query-input').addEventListener('keydown', e => {
763
  if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) {
764
  e.preventDefault();
 
766
  }
767
  });
768
 
 
769
  checkHealth();
770
  </script>
771
  </body>
main.py CHANGED
@@ -1,16 +1,19 @@
1
- from fastapi import FastAPI, HTTPException
2
- from fastapi.middleware.cors import CORSMiddleware
3
- from pydantic import BaseModel
4
- from contextlib import asynccontextmanager
5
  import logging
6
  import time
 
 
 
 
 
7
  from fastapi.responses import FileResponse
 
 
 
8
  from rag import RAGChain
9
 
10
  logging.basicConfig(level=logging.INFO)
11
  logger = logging.getLogger(__name__)
12
 
13
- from typing import Optional
14
  rag_chain: Optional[RAGChain] = None
15
 
16
 
@@ -26,9 +29,9 @@ async def lifespan(app: FastAPI):
26
 
27
 
28
  app = FastAPI(
29
- title="LLMOps RAG API",
30
- description="Llama 3.1 8B QLoRA fine-tuned + Pinecone RAG",
31
- version="1.0.0",
32
  lifespan=lifespan,
33
  )
34
 
@@ -43,37 +46,61 @@ app.add_middleware(
43
  class GenerateRequest(BaseModel):
44
  query: str
45
  top_k: int = 3
46
- max_new_tokens: int = 512
47
 
48
 
49
  class GenerateResponse(BaseModel):
50
  answer: str
51
  sources: list[str]
52
  latency_ms: float
 
53
 
54
 
55
- @app.get("/health")
56
- def health():
57
- return {"status": "ok", "model_loaded": rag_chain is not None and rag_chain.ready}
58
-
59
-
60
  @app.get("/")
61
  def ui():
62
  return FileResponse("index.html")
63
 
64
 
 
 
 
 
 
 
 
 
65
  @app.post("/generate", response_model=GenerateResponse)
66
  def generate(req: GenerateRequest):
67
  if not rag_chain or not rag_chain.ready:
68
- raise HTTPException(status_code=503, detail="Model not loaded yet")
69
- if not req.query.strip():
70
- raise HTTPException(status_code=400, detail="Query cannot be empty")
 
 
 
 
 
 
 
71
 
72
  start = time.time()
73
- answer, sources = rag_chain.query(req.query, top_k=req.top_k, max_new_tokens=req.max_new_tokens)
74
  latency_ms = (time.time() - start) * 1000
75
 
76
- return GenerateResponse(answer=answer, sources=sources, latency_ms=round(latency_ms, 1))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
 
78
 
79
  @app.post("/ingest")
 
 
 
 
 
1
  import logging
2
  import time
3
+ from contextlib import asynccontextmanager
4
+ from typing import Optional
5
+
6
+ from fastapi import FastAPI, HTTPException
7
+ from fastapi.middleware.cors import CORSMiddleware
8
  from fastapi.responses import FileResponse
9
+ from pydantic import BaseModel
10
+
11
+ from guardrails import validate_input, validate_output
12
  from rag import RAGChain
13
 
14
  logging.basicConfig(level=logging.INFO)
15
  logger = logging.getLogger(__name__)
16
 
 
17
  rag_chain: Optional[RAGChain] = None
18
 
19
 
 
29
 
30
 
31
  app = FastAPI(
32
+ title="Irminsul Genshin Impact AI Assistant",
33
+ description="RAG-powered assistant for Genshin Impact lore, builds, and mechanics.",
34
+ version="2.0.0",
35
  lifespan=lifespan,
36
  )
37
 
 
46
  class GenerateRequest(BaseModel):
47
  query: str
48
  top_k: int = 3
 
49
 
50
 
51
  class GenerateResponse(BaseModel):
52
  answer: str
53
  sources: list[str]
54
  latency_ms: float
55
+ blocked: bool = False
56
 
57
 
 
 
 
 
 
58
  @app.get("/")
59
  def ui():
60
  return FileResponse("index.html")
61
 
62
 
63
+ @app.get("/health")
64
+ def health():
65
+ return {
66
+ "status": "ok",
67
+ "model_loaded": rag_chain is not None and rag_chain.ready,
68
+ }
69
+
70
+
71
  @app.post("/generate", response_model=GenerateResponse)
72
  def generate(req: GenerateRequest):
73
  if not rag_chain or not rag_chain.ready:
74
+ raise HTTPException(status_code=503, detail="Model not loaded yet.")
75
+
76
+ allowed, reason = validate_input(req.query)
77
+ if not allowed:
78
+ return GenerateResponse(
79
+ answer=reason,
80
+ sources=[],
81
+ latency_ms=0.0,
82
+ blocked=True,
83
+ )
84
 
85
  start = time.time()
86
+ answer, sources = rag_chain.query(req.query, top_k=req.top_k)
87
  latency_ms = (time.time() - start) * 1000
88
 
89
+ is_clean, answer = validate_output(answer)
90
+ if not is_clean:
91
+ return GenerateResponse(
92
+ answer=answer,
93
+ sources=[],
94
+ latency_ms=round(latency_ms, 1),
95
+ blocked=True,
96
+ )
97
+
98
+ return GenerateResponse(
99
+ answer=answer,
100
+ sources=sources,
101
+ latency_ms=round(latency_ms, 1),
102
+ blocked=False,
103
+ )
104
 
105
 
106
  @app.post("/ingest")
pyvenv.cfg ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ home = C:\Users\mukul\AppData\Local\Programs\Python\Python312
2
+ include-system-site-packages = false
3
+ version = 3.12.9
4
+ executable = C:\Users\mukul\AppData\Local\Programs\Python\Python312\python.exe
5
+ command = C:\Users\mukul\AppData\Local\Programs\Python\Python312\python.exe -m venv E:\Projects\llmops-serve\venv
rag.py CHANGED
@@ -4,32 +4,98 @@ import logging
4
  import torch
5
  from dotenv import load_dotenv
6
 
7
- load_dotenv()
8
 
9
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, BitsAndBytesConfig
10
- from langchain_community.vectorstores import Pinecone as LangchainPinecone
11
  from langchain_community.embeddings import HuggingFaceEmbeddings
12
- from langchain.chains import RetrievalQA
13
- from langchain_community.llms import HuggingFacePipeline
14
- from langchain.prompts import PromptTemplate
15
  from pinecone import Pinecone
16
 
17
  logger = logging.getLogger(__name__)
18
 
19
- LOCAL_MODEL = os.getenv("MODEL_PATH", "./models/merged/exp2_lr2e-4_r16")
20
- EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
 
21
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
22
- PINECONE_INDEX = os.getenv("PINECONE_INDEX", "llmops-rag")
 
 
23
 
24
- PROMPT_TEMPLATE = """You are a precise Genshin Impact assistant. Answer ONLY using the context below.
25
- If specific details like weapon names or artifact sets are not in the context, say so — do not invent them.
 
 
26
 
27
  Context:
28
  {context}
29
 
30
  Question: {question}
31
 
32
- Answer (use only information from the context above):"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
 
35
  class RAGChain:
@@ -39,45 +105,8 @@ class RAGChain:
39
  self.vectorstore = None
40
 
41
  def load(self):
42
- logger.info(f"Loading model from {LOCAL_MODEL}")
43
-
44
- # ---- 4-bit quant for 6GB VRAM ----
45
- bnb_config = BitsAndBytesConfig(
46
- load_in_4bit=True,
47
- bnb_4bit_quant_type="nf4",
48
- bnb_4bit_compute_dtype=torch.bfloat16,
49
- bnb_4bit_use_double_quant=True,
50
- )
51
 
52
- tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL)
53
- model = AutoModelForCausalLM.from_pretrained(
54
- LOCAL_MODEL,
55
- quantization_config=bnb_config,
56
- device_map="auto",
57
- torch_dtype=torch.bfloat16,
58
- max_memory={0: "5.5GiB", "cpu": "24GiB"},
59
- )
60
- model.eval()
61
-
62
- tokenizer.pad_token = tokenizer.eos_token
63
-
64
- hf_pipe = pipeline(
65
- "text-generation",
66
- model=model,
67
- tokenizer=tokenizer,
68
- max_new_tokens=256,
69
- do_sample=False,
70
- temperature=None,
71
- top_p=None,
72
- repetition_penalty=1.3,
73
- return_full_text=False,
74
- eos_token_id=tokenizer.eos_token_id,
75
- pad_token_id=tokenizer.eos_token_id,
76
- )
77
- llm = HuggingFacePipeline(pipeline=hf_pipe)
78
- logger.info("Model loaded.")
79
-
80
- # ---- Embeddings + Pinecone ----
81
  logger.info("Connecting to Pinecone...")
82
  embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
83
  pc = Pinecone(api_key=PINECONE_API_KEY)
@@ -85,7 +114,6 @@ class RAGChain:
85
  self.vectorstore = LangchainPinecone(index, embeddings, "text")
86
  logger.info("Pinecone connected.")
87
 
88
- # ---- RetrievalQA chain ----
89
  prompt = PromptTemplate(
90
  template=PROMPT_TEMPLATE,
91
  input_variables=["context", "question"],
@@ -98,19 +126,17 @@ class RAGChain:
98
  chain_type_kwargs={"prompt": prompt},
99
  )
100
  self.ready = True
101
- logger.info("RAG chain ready.")
102
 
103
- def query(self, question: str, top_k: int = 3, max_new_tokens: int = 512) -> tuple[str, list[str]]:
104
  if not self.ready:
105
- raise RuntimeError("Chain not loaded")
106
 
107
- # Override retriever k at query time
108
  self.chain.retriever.search_kwargs["k"] = top_k
109
-
110
  result = self.chain.invoke({"query": question})
111
  answer = result["result"].strip().replace("</s>", "").strip()
112
  sources = [
113
  doc.metadata.get("source", "unknown")
114
  for doc in result.get("source_documents", [])
115
  ]
116
- return answer, list(dict.fromkeys(sources)) # deduplicated, order preserved
 
4
  import torch
5
  from dotenv import load_dotenv
6
 
7
+ load_dotenv()
8
 
9
+ from langchain_classic.chains import RetrievalQA
10
+ from langchain_core.prompts import PromptTemplate
11
  from langchain_community.embeddings import HuggingFaceEmbeddings
12
+ from langchain_community.vectorstores import Pinecone as LangchainPinecone
 
 
13
  from pinecone import Pinecone
14
 
15
  logger = logging.getLogger(__name__)
16
 
17
+ LLM_BACKEND = os.getenv("LLM_BACKEND", "groq")
18
+ LOCAL_MODEL = os.getenv("MODEL_PATH", "./models/merged/exp2_lr2e-4_r16")
19
+ EMBED_MODEL = os.getenv("EMBED_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
20
  PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
21
+ PINECONE_INDEX = os.getenv("PINECONE_INDEX", "llmops-rag")
22
+ GROQ_API_KEY = os.getenv("GROQ_API_KEY")
23
+ GROQ_MODEL = "llama-3.1-8b-instant"
24
 
25
+ PROMPT_TEMPLATE = """You are a knowledgeable Genshin Impact assistant. \
26
+ Answer using ONLY the context provided below. If the context does not \
27
+ contain enough information to answer confidently, say so — do not invent \
28
+ weapon names, artifact sets, or lore details.
29
 
30
  Context:
31
  {context}
32
 
33
  Question: {question}
34
 
35
+ Answer:"""
36
+
37
+
38
+ def _build_groq_llm():
39
+ from langchain_groq import ChatGroq
40
+
41
+ if not GROQ_API_KEY:
42
+ raise EnvironmentError("GROQ_API_KEY not set in environment.")
43
+
44
+ logger.info(f"Using Groq backend — model: {GROQ_MODEL}")
45
+ return ChatGroq(
46
+ api_key=GROQ_API_KEY,
47
+ model_name=GROQ_MODEL,
48
+ temperature=0.2,
49
+ max_tokens=512,
50
+ )
51
+
52
+
53
+ def _build_local_llm():
54
+ from transformers import (
55
+ AutoModelForCausalLM,
56
+ AutoTokenizer,
57
+ BitsAndBytesConfig,
58
+ pipeline,
59
+ )
60
+ from langchain_community.llms import HuggingFacePipeline
61
+
62
+ logger.info(f"Loading local model from {LOCAL_MODEL}")
63
+
64
+ bnb_config = BitsAndBytesConfig(
65
+ load_in_4bit=True,
66
+ bnb_4bit_quant_type="nf4",
67
+ bnb_4bit_compute_dtype=torch.bfloat16,
68
+ bnb_4bit_use_double_quant=True,
69
+ )
70
+
71
+ tokenizer = AutoTokenizer.from_pretrained(LOCAL_MODEL)
72
+ tokenizer.pad_token = tokenizer.eos_token
73
+
74
+ model = AutoModelForCausalLM.from_pretrained(
75
+ LOCAL_MODEL,
76
+ quantization_config=bnb_config,
77
+ device_map="auto",
78
+ torch_dtype=torch.bfloat16,
79
+ max_memory={0: "5.5GiB", "cpu": "24GiB"},
80
+ )
81
+ model.eval()
82
+
83
+ hf_pipe = pipeline(
84
+ "text-generation",
85
+ model=model,
86
+ tokenizer=tokenizer,
87
+ max_new_tokens=256,
88
+ do_sample=False,
89
+ temperature=None,
90
+ top_p=None,
91
+ repetition_penalty=1.3,
92
+ return_full_text=False,
93
+ eos_token_id=tokenizer.eos_token_id,
94
+ pad_token_id=tokenizer.eos_token_id,
95
+ )
96
+
97
+ logger.info("Local model loaded.")
98
+ return HuggingFacePipeline(pipeline=hf_pipe)
99
 
100
 
101
  class RAGChain:
 
105
  self.vectorstore = None
106
 
107
  def load(self):
108
+ llm = _build_groq_llm() if LLM_BACKEND == "groq" else _build_local_llm()
 
 
 
 
 
 
 
 
109
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  logger.info("Connecting to Pinecone...")
111
  embeddings = HuggingFaceEmbeddings(model_name=EMBED_MODEL)
112
  pc = Pinecone(api_key=PINECONE_API_KEY)
 
114
  self.vectorstore = LangchainPinecone(index, embeddings, "text")
115
  logger.info("Pinecone connected.")
116
 
 
117
  prompt = PromptTemplate(
118
  template=PROMPT_TEMPLATE,
119
  input_variables=["context", "question"],
 
126
  chain_type_kwargs={"prompt": prompt},
127
  )
128
  self.ready = True
129
+ logger.info(f"RAG chain ready — backend: {LLM_BACKEND}")
130
 
131
+ def query(self, question: str, top_k: int = 3) -> tuple[str, list[str]]:
132
  if not self.ready:
133
+ raise RuntimeError("RAG chain is not loaded.")
134
 
 
135
  self.chain.retriever.search_kwargs["k"] = top_k
 
136
  result = self.chain.invoke({"query": question})
137
  answer = result["result"].strip().replace("</s>", "").strip()
138
  sources = [
139
  doc.metadata.get("source", "unknown")
140
  for doc in result.get("source_documents", [])
141
  ]
142
+ return answer, list(dict.fromkeys(sources))
requirements.txt CHANGED
@@ -11,10 +11,13 @@ bitsandbytes==0.43.3 # CPU-compatible for Docker; swap 0.49.2 if CUDA 12.8
11
  accelerate==1.6.0
12
 
13
  # RAG
14
- langchain==0.3.25
15
- langchain-community==0.3.23
16
  pinecone-client==5.0.1
17
  sentence-transformers==4.1.0
18
 
19
  # Utilities
20
- python-dotenv==1.0.1
 
 
 
 
11
  accelerate==1.6.0
12
 
13
  # RAG
14
+ langchain==1.2.13
15
+ langchain-community==0.4.1
16
  pinecone-client==5.0.1
17
  sentence-transformers==4.1.0
18
 
19
  # Utilities
20
+ python-dotenv==1.0.1
21
+
22
+ groq==0.37.1
23
+ langchain-groq==1.1.2