Spaces:

build-small-hackathon
/

First-Principle-AI

Paused

App Files Files Community

owenisas commited on 7 days ago

Commit

cf3dbbf

verified ·

1 Parent(s): cd2e1fb

Polish First-Principle AI interface

Browse files

Files changed (1) hide show

app.py +100 -55

app.py CHANGED Viewed

@@ -238,29 +238,22 @@ def _status_markdown() -> str:
     total_gb, available_gb = _meminfo_gb()
     size = _repo_file_size()
     size_text = f"{size / (1024 ** 3):.1f} GB" if size else "unknown"
-    local_state = "present" if LOCAL_MODEL_PATH.exists() else "not present"
     llama_state = "importable" if Llama is not None else f"missing ({LLAMA_IMPORT_ERROR})"
     spaces_state = "importable" if spaces is not None else "not importable"
-    model_state = "loaded" if MODEL is not None else ("error" if MODEL_ERROR else "not loaded")
-    model_path = str(MODEL_PATH) if MODEL_PATH else "not resolved"
-    total_text = f"{total_gb:.1f} GB" if total_gb is not None else "unknown"
     available_text = f"{available_gb:.1f} GB" if available_gb is not None else "unknown"
-    env = _safe_env_summary()
-    env_lines = "\n".join(f"- `{k}`: `{v}`" for k, v in env.items()) or "- No selected runtime env vars exposed."
-    return f"""### Runtime
-- Model repo: `{MODEL_REPO}`
-- Model file: `{MODEL_FILE}` ({size_text})
-- Local development source: `{local_state}`
-- Resolved model path: `{model_path}`
-- llama.cpp runtime: `{llama_state}`
-- ZeroGPU helper package: `{spaces_state}`
-- Model state: `{model_state}`
-- RAM: `{available_text}` available / `{total_text}` total
-- Python: `{platform.python_version()}`
-### Selected Environment
-{env_lines}
 """
@@ -314,13 +307,11 @@ def respond(
         text, meta = _complete(prompt, max_tokens, temperature, top_p, repeat_penalty)
     except Exception as exc:
         text = (
-            "Runtime compatibility check failed.\n\n"
             f"{exc}\n\n"
-            "This Space is configured for ZeroGPU, but the uploaded asset is a 31 GB Q8 GGUF. "
-            "ZeroGPU is primarily a Gradio/PyTorch dynamic GPU runtime, while this app uses "
-            "llama.cpp through llama-cpp-python. If the runtime cannot expose enough RAM or a "
-            "compatible llama.cpp CUDA backend, the model is intentionally not loaded instead "
-            "of crashing the Space."
         )
         meta = {"elapsed": 0.0, "completion_tokens": len(text.split()), "tokens_per_second": 0.0}
@@ -331,40 +322,46 @@ def respond(
 CSS = """
 :root {
-  --phase-bg: #080b10;
-  --phase-panel: #111820;
-  --phase-panel-2: #0d131a;
-  --phase-border: #26323f;
-  --phase-text: #e8edf2;
-  --phase-muted: #9aa8b5;
-  --phase-accent: #4fb3ff;
-  --phase-good: #66d68a;
 }
 .gradio-container {
   background: var(--phase-bg) !important;
   color: var(--phase-text) !important;
   max-width: none !important;
 }
 .phase-shell {
-  max-width: 1440px;
   margin: 0 auto;
 }
 .phase-title {
   border: 1px solid var(--phase-border);
-  background: linear-gradient(180deg, #121a23, #0b1118);
-  padding: 16px 18px;
-  border-radius: 8px;
-  margin-bottom: 12px;
 }
 .phase-title h1 {
-  font-size: 24px;
   line-height: 1.15;
-  margin: 0 0 6px;
   letter-spacing: 0;
 }
 .phase-title p {
   color: var(--phase-muted);
   margin: 0;
 }
 .phase-badge-row {
   display: flex;
@@ -374,43 +371,84 @@ CSS = """
 }
 .phase-badge {
   border: 1px solid var(--phase-border);
-  background: #0c131b;
   color: var(--phase-muted);
-  border-radius: 999px;
-  padding: 5px 9px;
   font-size: 12px;
 }
 .phase-badge strong {
   color: var(--phase-text);
   font-weight: 650;
 }
-.panel {
   border-color: var(--phase-border) !important;
-  background: var(--phase-panel) !important;
-  border-radius: 8px !important;
 }
-label, .wrap, .prose, .markdown-body {
   color: var(--phase-text) !important;
 }
-textarea, input {
-  background: #0b1118 !important;
   color: var(--phase-text) !important;
   border-color: var(--phase-border) !important;
 }
 button.primary {
   background: var(--phase-accent) !important;
-  color: #06101a !important;
 }
 .message {
   border-radius: 8px !important;
 }
 .chatbot {
-  background: var(--phase-panel-2) !important;
   border: 1px solid var(--phase-border) !important;
 }
 @media (max-width: 900px) {
   .phase-title h1 {
-    font-size: 20px;
   }
 }
 """
@@ -422,11 +460,11 @@ with gr.Blocks(title="First-Principle AI", fill_width=True) as demo:
             """
             <div class="phase-title">
               <h1>First-Principle AI</h1>
-              <p>A dense Gradio console for probing the Phase-3 Q8 GGUF with visible runtime diagnostics.</p>
               <div class="phase-badge-row">
                 <span class="phase-badge"><strong>Model</strong> build-small-hackathon/phase-3-gguf</span>
                 <span class="phase-badge"><strong>Runtime</strong> llama.cpp via llama-cpp-python</span>
-                <span class="phase-badge"><strong>Hardware target</strong> ZeroGPU with guarded fallback</span>
               </div>
             </div>
             """
@@ -463,6 +501,13 @@ with gr.Blocks(title="First-Principle AI", fill_width=True) as demo:
                 )
             with gr.Column(scale=4, min_width=320):
                 system_prompt = gr.Textbox(
                     label="System prompt",
                     value="You are First-Principle AI in a model lab. Be direct, technical, and evidence-oriented.",

     total_gb, available_gb = _meminfo_gb()
     size = _repo_file_size()
     size_text = f"{size / (1024 ** 3):.1f} GB" if size else "unknown"
     llama_state = "importable" if Llama is not None else f"missing ({LLAMA_IMPORT_ERROR})"
     spaces_state = "importable" if spaces is not None else "not importable"
+    model_state = "Loaded" if MODEL is not None else ("Guarded" if MODEL_ERROR else "Standby")
     available_text = f"{available_gb:.1f} GB" if available_gb is not None else "unknown"
+    return f"""### Model Status
+**{model_state}** - public demo mode keeps the Space responsive.
+| Check | Value |
+| --- | --- |
+| Model | `{MODEL_REPO}` |
+| File | `{MODEL_FILE}` ({size_text}) |
+| Runtime | `llama.cpp` {llama_state}; ZeroGPU helper {spaces_state} |
+| Available RAM | {available_text} |
+The model is a large Q8 GGUF. This Space does not automatically pull and load it unless `PHASE3_FORCE_LOAD=1` is set by the Space owner.
 """
         text, meta = _complete(prompt, max_tokens, temperature, top_p, repeat_penalty)
     except Exception as exc:
         text = (
+            "Model loading is intentionally gated.\n\n"
             f"{exc}\n\n"
+            "The UI is live and the model artifact is published, but this public Space is configured "
+            "to avoid an automatic 31 GB runtime download. To enable real inference, set "
+            "`PHASE3_FORCE_LOAD=1` after confirming the llama.cpp backend and Space hardware."
         )
         meta = {"elapsed": 0.0, "completion_tokens": len(text.split()), "tokens_per_second": 0.0}
 CSS = """
 :root {
+  --phase-bg: #f6f8fb;
+  --phase-panel: #ffffff;
+  --phase-panel-soft: #f9fafb;
+  --phase-border: #d8dee8;
+  --phase-text: #111827;
+  --phase-muted: #5f6b7a;
+  --phase-accent: #2563eb;
+  --phase-accent-dark: #1d4ed8;
 }
 .gradio-container {
   background: var(--phase-bg) !important;
   color: var(--phase-text) !important;
   max-width: none !important;
+  font-family: Inter, ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif !important;
 }
 .phase-shell {
+  max-width: 1180px;
   margin: 0 auto;
+  padding: 24px 18px 40px;
 }
 .phase-title {
   border: 1px solid var(--phase-border);
+  background: linear-gradient(180deg, #ffffff, #eef4ff);
+  padding: 22px 24px;
+  border-radius: 10px;
+  margin-bottom: 18px;
+  box-shadow: 0 12px 34px rgba(31, 41, 55, 0.08);
 }
 .phase-title h1 {
+  color: var(--phase-text);
+  font-size: 30px;
   line-height: 1.15;
+  margin: 0 0 8px;
   letter-spacing: 0;
 }
 .phase-title p {
   color: var(--phase-muted);
+  font-size: 15px;
   margin: 0;
+  max-width: 760px;
 }
 .phase-badge-row {
   display: flex;
 }
 .phase-badge {
   border: 1px solid var(--phase-border);
+  background: #ffffff;
   color: var(--phase-muted);
+  border-radius: 7px;
+  padding: 7px 10px;
   font-size: 12px;
 }
 .phase-badge strong {
   color: var(--phase-text);
   font-weight: 650;
 }
+.gradio-container .block {
   border-color: var(--phase-border) !important;
+  border-radius: 10px !important;
+  box-shadow: none !important;
 }
+.gradio-container label,
+.gradio-container .wrap,
+.gradio-container .prose,
+.gradio-container .markdown-body,
+.gradio-container .svelte-1gfkn6j,
+.gradio-container .svelte-1hguek3 {
   color: var(--phase-text) !important;
 }
+textarea,
+input {
+  background: #ffffff !important;
   color: var(--phase-text) !important;
   border-color: var(--phase-border) !important;
 }
+textarea::placeholder {
+  color: #8a95a5 !important;
+}
 button.primary {
   background: var(--phase-accent) !important;
+  color: #ffffff !important;
+  border-color: var(--phase-accent) !important;
+}
+button.primary:hover {
+  background: var(--phase-accent-dark) !important;
 }
 .message {
   border-radius: 8px !important;
 }
 .chatbot {
+  background: #ffffff !important;
   border: 1px solid var(--phase-border) !important;
+  min-height: 560px;
+}
+.chatbot .message,
+.chatbot .bubble-wrap {
+  color: var(--phase-text) !important;
+}
+.phase-side-note {
+  border: 1px solid #bfdbfe;
+  background: #eff6ff;
+  color: #1e3a8a;
+  border-radius: 10px;
+  padding: 12px 14px;
+  margin-bottom: 12px;
+  font-size: 13px;
+  line-height: 1.45;
+}
+.phase-side-note strong {
+  color: #1e40af;
+}
+.gradio-container table {
+  background: #ffffff !important;
+  color: var(--phase-text) !important;
+}
+.gradio-container code {
+  background: #eef2f7 !important;
+  color: #111827 !important;
+  border-radius: 4px;
+  padding: 1px 4px;
 }
 @media (max-width: 900px) {
   .phase-title h1 {
+    font-size: 24px;
   }
 }
 """
             """
             <div class="phase-title">
               <h1>First-Principle AI</h1>
+              <p>A clean model-console interface for probing the Phase-3 Q8 GGUF with transparent runtime status.</p>
               <div class="phase-badge-row">
                 <span class="phase-badge"><strong>Model</strong> build-small-hackathon/phase-3-gguf</span>
                 <span class="phase-badge"><strong>Runtime</strong> llama.cpp via llama-cpp-python</span>
+                <span class="phase-badge"><strong>Mode</strong> guarded public demo</span>
               </div>
             </div>
             """
                 )
             with gr.Column(scale=4, min_width=320):
+                gr.HTML(
+                    """
+                    <div class="phase-side-note">
+                      <strong>Status:</strong> UI is live. The large Q8 GGUF is published in the org model repo, and automatic loading is gated to keep the public Space responsive.
+                    </div>
+                    """
+                )
                 system_prompt = gr.Textbox(
                     label="System prompt",
                     value="You are First-Principle AI in a model lab. Be direct, technical, and evidence-oriented.",