owenisas commited on
Commit
cf3dbbf
·
verified ·
1 Parent(s): cd2e1fb

Polish First-Principle AI interface

Browse files
Files changed (1) hide show
  1. app.py +100 -55
app.py CHANGED
@@ -238,29 +238,22 @@ def _status_markdown() -> str:
238
  total_gb, available_gb = _meminfo_gb()
239
  size = _repo_file_size()
240
  size_text = f"{size / (1024 ** 3):.1f} GB" if size else "unknown"
241
- local_state = "present" if LOCAL_MODEL_PATH.exists() else "not present"
242
  llama_state = "importable" if Llama is not None else f"missing ({LLAMA_IMPORT_ERROR})"
243
  spaces_state = "importable" if spaces is not None else "not importable"
244
- model_state = "loaded" if MODEL is not None else ("error" if MODEL_ERROR else "not loaded")
245
- model_path = str(MODEL_PATH) if MODEL_PATH else "not resolved"
246
- total_text = f"{total_gb:.1f} GB" if total_gb is not None else "unknown"
247
  available_text = f"{available_gb:.1f} GB" if available_gb is not None else "unknown"
248
- env = _safe_env_summary()
249
- env_lines = "\n".join(f"- `{k}`: `{v}`" for k, v in env.items()) or "- No selected runtime env vars exposed."
250
-
251
- return f"""### Runtime
252
- - Model repo: `{MODEL_REPO}`
253
- - Model file: `{MODEL_FILE}` ({size_text})
254
- - Local development source: `{local_state}`
255
- - Resolved model path: `{model_path}`
256
- - llama.cpp runtime: `{llama_state}`
257
- - ZeroGPU helper package: `{spaces_state}`
258
- - Model state: `{model_state}`
259
- - RAM: `{available_text}` available / `{total_text}` total
260
- - Python: `{platform.python_version()}`
261
-
262
- ### Selected Environment
263
- {env_lines}
264
  """
265
 
266
 
@@ -314,13 +307,11 @@ def respond(
314
  text, meta = _complete(prompt, max_tokens, temperature, top_p, repeat_penalty)
315
  except Exception as exc:
316
  text = (
317
- "Runtime compatibility check failed.\n\n"
318
  f"{exc}\n\n"
319
- "This Space is configured for ZeroGPU, but the uploaded asset is a 31 GB Q8 GGUF. "
320
- "ZeroGPU is primarily a Gradio/PyTorch dynamic GPU runtime, while this app uses "
321
- "llama.cpp through llama-cpp-python. If the runtime cannot expose enough RAM or a "
322
- "compatible llama.cpp CUDA backend, the model is intentionally not loaded instead "
323
- "of crashing the Space."
324
  )
325
  meta = {"elapsed": 0.0, "completion_tokens": len(text.split()), "tokens_per_second": 0.0}
326
 
@@ -331,40 +322,46 @@ def respond(
331
 
332
  CSS = """
333
  :root {
334
- --phase-bg: #080b10;
335
- --phase-panel: #111820;
336
- --phase-panel-2: #0d131a;
337
- --phase-border: #26323f;
338
- --phase-text: #e8edf2;
339
- --phase-muted: #9aa8b5;
340
- --phase-accent: #4fb3ff;
341
- --phase-good: #66d68a;
342
  }
343
  .gradio-container {
344
  background: var(--phase-bg) !important;
345
  color: var(--phase-text) !important;
346
  max-width: none !important;
 
347
  }
348
  .phase-shell {
349
- max-width: 1440px;
350
  margin: 0 auto;
 
351
  }
352
  .phase-title {
353
  border: 1px solid var(--phase-border);
354
- background: linear-gradient(180deg, #121a23, #0b1118);
355
- padding: 16px 18px;
356
- border-radius: 8px;
357
- margin-bottom: 12px;
 
358
  }
359
  .phase-title h1 {
360
- font-size: 24px;
 
361
  line-height: 1.15;
362
- margin: 0 0 6px;
363
  letter-spacing: 0;
364
  }
365
  .phase-title p {
366
  color: var(--phase-muted);
 
367
  margin: 0;
 
368
  }
369
  .phase-badge-row {
370
  display: flex;
@@ -374,43 +371,84 @@ CSS = """
374
  }
375
  .phase-badge {
376
  border: 1px solid var(--phase-border);
377
- background: #0c131b;
378
  color: var(--phase-muted);
379
- border-radius: 999px;
380
- padding: 5px 9px;
381
  font-size: 12px;
382
  }
383
  .phase-badge strong {
384
  color: var(--phase-text);
385
  font-weight: 650;
386
  }
387
- .panel {
388
  border-color: var(--phase-border) !important;
389
- background: var(--phase-panel) !important;
390
- border-radius: 8px !important;
391
  }
392
- label, .wrap, .prose, .markdown-body {
 
 
 
 
 
393
  color: var(--phase-text) !important;
394
  }
395
- textarea, input {
396
- background: #0b1118 !important;
 
397
  color: var(--phase-text) !important;
398
  border-color: var(--phase-border) !important;
399
  }
 
 
 
400
  button.primary {
401
  background: var(--phase-accent) !important;
402
- color: #06101a !important;
 
 
 
 
403
  }
404
  .message {
405
  border-radius: 8px !important;
406
  }
407
  .chatbot {
408
- background: var(--phase-panel-2) !important;
409
  border: 1px solid var(--phase-border) !important;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
  }
411
  @media (max-width: 900px) {
412
  .phase-title h1 {
413
- font-size: 20px;
414
  }
415
  }
416
  """
@@ -422,11 +460,11 @@ with gr.Blocks(title="First-Principle AI", fill_width=True) as demo:
422
  """
423
  <div class="phase-title">
424
  <h1>First-Principle AI</h1>
425
- <p>A dense Gradio console for probing the Phase-3 Q8 GGUF with visible runtime diagnostics.</p>
426
  <div class="phase-badge-row">
427
  <span class="phase-badge"><strong>Model</strong> build-small-hackathon/phase-3-gguf</span>
428
  <span class="phase-badge"><strong>Runtime</strong> llama.cpp via llama-cpp-python</span>
429
- <span class="phase-badge"><strong>Hardware target</strong> ZeroGPU with guarded fallback</span>
430
  </div>
431
  </div>
432
  """
@@ -463,6 +501,13 @@ with gr.Blocks(title="First-Principle AI", fill_width=True) as demo:
463
  )
464
 
465
  with gr.Column(scale=4, min_width=320):
 
 
 
 
 
 
 
466
  system_prompt = gr.Textbox(
467
  label="System prompt",
468
  value="You are First-Principle AI in a model lab. Be direct, technical, and evidence-oriented.",
 
238
  total_gb, available_gb = _meminfo_gb()
239
  size = _repo_file_size()
240
  size_text = f"{size / (1024 ** 3):.1f} GB" if size else "unknown"
 
241
  llama_state = "importable" if Llama is not None else f"missing ({LLAMA_IMPORT_ERROR})"
242
  spaces_state = "importable" if spaces is not None else "not importable"
243
+ model_state = "Loaded" if MODEL is not None else ("Guarded" if MODEL_ERROR else "Standby")
 
 
244
  available_text = f"{available_gb:.1f} GB" if available_gb is not None else "unknown"
245
+
246
+ return f"""### Model Status
247
+ **{model_state}** - public demo mode keeps the Space responsive.
248
+
249
+ | Check | Value |
250
+ | --- | --- |
251
+ | Model | `{MODEL_REPO}` |
252
+ | File | `{MODEL_FILE}` ({size_text}) |
253
+ | Runtime | `llama.cpp` {llama_state}; ZeroGPU helper {spaces_state} |
254
+ | Available RAM | {available_text} |
255
+
256
+ The model is a large Q8 GGUF. This Space does not automatically pull and load it unless `PHASE3_FORCE_LOAD=1` is set by the Space owner.
 
 
 
 
257
  """
258
 
259
 
 
307
  text, meta = _complete(prompt, max_tokens, temperature, top_p, repeat_penalty)
308
  except Exception as exc:
309
  text = (
310
+ "Model loading is intentionally gated.\n\n"
311
  f"{exc}\n\n"
312
+ "The UI is live and the model artifact is published, but this public Space is configured "
313
+ "to avoid an automatic 31 GB runtime download. To enable real inference, set "
314
+ "`PHASE3_FORCE_LOAD=1` after confirming the llama.cpp backend and Space hardware."
 
 
315
  )
316
  meta = {"elapsed": 0.0, "completion_tokens": len(text.split()), "tokens_per_second": 0.0}
317
 
 
322
 
323
  CSS = """
324
  :root {
325
+ --phase-bg: #f6f8fb;
326
+ --phase-panel: #ffffff;
327
+ --phase-panel-soft: #f9fafb;
328
+ --phase-border: #d8dee8;
329
+ --phase-text: #111827;
330
+ --phase-muted: #5f6b7a;
331
+ --phase-accent: #2563eb;
332
+ --phase-accent-dark: #1d4ed8;
333
  }
334
  .gradio-container {
335
  background: var(--phase-bg) !important;
336
  color: var(--phase-text) !important;
337
  max-width: none !important;
338
+ font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif !important;
339
  }
340
  .phase-shell {
341
+ max-width: 1180px;
342
  margin: 0 auto;
343
+ padding: 24px 18px 40px;
344
  }
345
  .phase-title {
346
  border: 1px solid var(--phase-border);
347
+ background: linear-gradient(180deg, #ffffff, #eef4ff);
348
+ padding: 22px 24px;
349
+ border-radius: 10px;
350
+ margin-bottom: 18px;
351
+ box-shadow: 0 12px 34px rgba(31, 41, 55, 0.08);
352
  }
353
  .phase-title h1 {
354
+ color: var(--phase-text);
355
+ font-size: 30px;
356
  line-height: 1.15;
357
+ margin: 0 0 8px;
358
  letter-spacing: 0;
359
  }
360
  .phase-title p {
361
  color: var(--phase-muted);
362
+ font-size: 15px;
363
  margin: 0;
364
+ max-width: 760px;
365
  }
366
  .phase-badge-row {
367
  display: flex;
 
371
  }
372
  .phase-badge {
373
  border: 1px solid var(--phase-border);
374
+ background: #ffffff;
375
  color: var(--phase-muted);
376
+ border-radius: 7px;
377
+ padding: 7px 10px;
378
  font-size: 12px;
379
  }
380
  .phase-badge strong {
381
  color: var(--phase-text);
382
  font-weight: 650;
383
  }
384
+ .gradio-container .block {
385
  border-color: var(--phase-border) !important;
386
+ border-radius: 10px !important;
387
+ box-shadow: none !important;
388
  }
389
+ .gradio-container label,
390
+ .gradio-container .wrap,
391
+ .gradio-container .prose,
392
+ .gradio-container .markdown-body,
393
+ .gradio-container .svelte-1gfkn6j,
394
+ .gradio-container .svelte-1hguek3 {
395
  color: var(--phase-text) !important;
396
  }
397
+ textarea,
398
+ input {
399
+ background: #ffffff !important;
400
  color: var(--phase-text) !important;
401
  border-color: var(--phase-border) !important;
402
  }
403
+ textarea::placeholder {
404
+ color: #8a95a5 !important;
405
+ }
406
  button.primary {
407
  background: var(--phase-accent) !important;
408
+ color: #ffffff !important;
409
+ border-color: var(--phase-accent) !important;
410
+ }
411
+ button.primary:hover {
412
+ background: var(--phase-accent-dark) !important;
413
  }
414
  .message {
415
  border-radius: 8px !important;
416
  }
417
  .chatbot {
418
+ background: #ffffff !important;
419
  border: 1px solid var(--phase-border) !important;
420
+ min-height: 560px;
421
+ }
422
+ .chatbot .message,
423
+ .chatbot .bubble-wrap {
424
+ color: var(--phase-text) !important;
425
+ }
426
+ .phase-side-note {
427
+ border: 1px solid #bfdbfe;
428
+ background: #eff6ff;
429
+ color: #1e3a8a;
430
+ border-radius: 10px;
431
+ padding: 12px 14px;
432
+ margin-bottom: 12px;
433
+ font-size: 13px;
434
+ line-height: 1.45;
435
+ }
436
+ .phase-side-note strong {
437
+ color: #1e40af;
438
+ }
439
+ .gradio-container table {
440
+ background: #ffffff !important;
441
+ color: var(--phase-text) !important;
442
+ }
443
+ .gradio-container code {
444
+ background: #eef2f7 !important;
445
+ color: #111827 !important;
446
+ border-radius: 4px;
447
+ padding: 1px 4px;
448
  }
449
  @media (max-width: 900px) {
450
  .phase-title h1 {
451
+ font-size: 24px;
452
  }
453
  }
454
  """
 
460
  """
461
  <div class="phase-title">
462
  <h1>First-Principle AI</h1>
463
+ <p>A clean model-console interface for probing the Phase-3 Q8 GGUF with transparent runtime status.</p>
464
  <div class="phase-badge-row">
465
  <span class="phase-badge"><strong>Model</strong> build-small-hackathon/phase-3-gguf</span>
466
  <span class="phase-badge"><strong>Runtime</strong> llama.cpp via llama-cpp-python</span>
467
+ <span class="phase-badge"><strong>Mode</strong> guarded public demo</span>
468
  </div>
469
  </div>
470
  """
 
501
  )
502
 
503
  with gr.Column(scale=4, min_width=320):
504
+ gr.HTML(
505
+ """
506
+ <div class="phase-side-note">
507
+ <strong>Status:</strong> UI is live. The large Q8 GGUF is published in the org model repo, and automatic loading is gated to keep the public Space responsive.
508
+ </div>
509
+ """
510
+ )
511
  system_prompt = gr.Textbox(
512
  label="System prompt",
513
  value="You are First-Principle AI in a model lab. Be direct, technical, and evidence-oriented.",