Spaces:

Inpris
/

Humains-Junior

Sleeping

App Files Files Community

NS-Y commited on Nov 3, 2025

Commit

7d89622

verified ·

1 Parent(s): e79fc25

Update app.py

Browse files

Files changed (1) hide show

app.py +10 -22

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "")   # REQUIRED (Secret)
 # ==============================
 # Tunables (Variables or Secrets)
 # ==============================
-# Per-attempt request timeout (keep modest so we can poll repeatedly)
 PER_REQUEST_TIMEOUT_SEC = int(os.getenv("FRIENDLI_PER_REQUEST_TIMEOUT_SEC", "30"))
 # Total time budget to wait for cold start + retries
 COLD_START_BUDGET_SEC = int(os.getenv("FRIENDLI_COLD_START_BUDGET_SEC", "180"))
@@ -26,8 +26,8 @@ DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0"))
 # Backoff tuning
 BACKOFF_BASE_SEC = float(os.getenv("FRIENDLI_BACKOFF_BASE_SEC", "2.0"))
-BACKOFF_CAP_SEC = float(os.getenv("FRIENDLI_BACKOFF_CAP_SEC", "20.0"))
-JITTER_SEC = float(os.getenv("FRIENDLI_JITTER_SEC", "0.5"))
 # ==============================
 # Appendix-style system prompt (general instructions)
@@ -83,7 +83,6 @@ Context:
 RETRYABLE_HTTP = {408, 429, 500, 502, 503, 504, 522, 524}
 def _sleep_with_budget(seconds, deadline):
-    # Sleep but never go beyond the overall budget
     now = time.monotonic()
     remaining = max(0.0, deadline - now)
     time.sleep(max(0.0, min(seconds, remaining)))
@@ -93,7 +92,6 @@ def _retry_after_seconds(resp):
         ra = resp.headers.get("Retry-After")
         if not ra:
             return None
-        # Retry-After can be seconds or an HTTP-date; treat as seconds if numeric
         return float(ra)
     except Exception:
         return None
@@ -134,25 +132,24 @@ def call_friendly_with_time_budget(messages, max_tokens, temperature):
                 timeout=PER_REQUEST_TIMEOUT_SEC,
             )
             if resp.status_code == 503:
-                # Cold start; honor Retry-After if provided, otherwise use configured wait
                 ra = _retry_after_seconds(resp)
                 wait = ra if ra is not None else (INITIAL_503_WAIT_SEC if not saw_first_503 else BACKOFF_BASE_SEC)
                 saw_first_503 = True
                 if time.monotonic() + wait > deadline:
-                    resp.raise_for_status()  # will throw and exit loop to error
                 _sleep_with_budget(wait, deadline)
                 continue
-            # For other retryable status codes
             if resp.status_code in RETRYABLE_HTTP and time.monotonic() < deadline:
-                # exponential backoff with jitter
                 exp = min(BACKOFF_CAP_SEC, BACKOFF_BASE_SEC * (2 ** min(6, attempt)))
                 wait = exp + random.uniform(0, JITTER_SEC)
                 _sleep_with_budget(wait, deadline)
                 continue
-            # Non-OK without retries left → raise
             resp.raise_for_status()
             data = resp.json()
@@ -170,7 +167,6 @@ def call_friendly_with_time_budget(messages, max_tokens, temperature):
                 wait = exp + random.uniform(0, JITTER_SEC)
                 _sleep_with_budget(wait, deadline)
                 continue
-            # Budget exhausted
             raise gr.Error(
                 f"Friendly API: retry budget exceeded after ~{COLD_START_BUDGET_SEC}s. "
                 "Please try again; the model may have just finished warming."
@@ -206,25 +202,17 @@ PRESET_CTX = (
 with gr.Blocks(title="Humains-Junior (Humains.com) — Exoskeleton Reasoning") as demo:
     gr.Markdown(
-        "# Humains-Junior by Humains.com — Exoskeleton Reasoning (Friendly API)\n"
-        "\n"
-        "### How this demo enforces the prompt rules\n"
-        "- **Message format**: system prompt (general instructions) → a single **User:** block containing **Question + Context**.\n"
         "- **Model behavior**:\n"
         "  1. Outputs two plain-text sections: **Analysis** then **Response**.\n"
-        "  2. When the **question is related to the Context**, it **prioritizes the Context** over internal knowledge—even if the Context is factually wrong.\n"
         "  3. If the **question is unrelated to the Context**, it **may answer normally** (not forced to follow the Context).\n"
-        "  4. Uses **plain text only** in the model output (no bullets/numbering/markdown characters).\n"
-        "- **Resilience**: time-budgeted retries handle long cold starts (default ~180s total budget).\n"
-        "- **Security**: API key, endpoint, and model ID are **server-side secrets**."
-    )
     )
     with gr.Row():
         with gr.Column(scale=3):
             q = gr.Textbox(label="Question", value=PRESET_Q, lines=3)
-            ctx = gr.Textbox(label="Context (only source of truth)", value=PRESET_CTX, lines=8)
             with gr.Row():
                 temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")

 # ==============================
 # Tunables (Variables or Secrets)
 # ==============================
+# Per-attempt request timeout (keep modest so we can poll repeatedly during warmup)
 PER_REQUEST_TIMEOUT_SEC = int(os.getenv("FRIENDLI_PER_REQUEST_TIMEOUT_SEC", "30"))
 # Total time budget to wait for cold start + retries
 COLD_START_BUDGET_SEC = int(os.getenv("FRIENDLI_COLD_START_BUDGET_SEC", "180"))
 # Backoff tuning
 BACKOFF_BASE_SEC = float(os.getenv("FRIENDLI_BACKOFF_BASE_SEC", "2.0"))
+BACKOFF_CAP_SEC  = float(os.getenv("FRIENDLI_BACKOFF_CAP_SEC", "20.0"))
+JITTER_SEC       = float(os.getenv("FRIENDLI_JITTER_SEC", "0.5"))
 # ==============================
 # Appendix-style system prompt (general instructions)
 RETRYABLE_HTTP = {408, 429, 500, 502, 503, 504, 522, 524}
 def _sleep_with_budget(seconds, deadline):
     now = time.monotonic()
     remaining = max(0.0, deadline - now)
     time.sleep(max(0.0, min(seconds, remaining)))
         ra = resp.headers.get("Retry-After")
         if not ra:
             return None
         return float(ra)
     except Exception:
         return None
                 timeout=PER_REQUEST_TIMEOUT_SEC,
             )
+            # 503: cold start; wait then retry (honor Retry-After if provided)
             if resp.status_code == 503:
                 ra = _retry_after_seconds(resp)
                 wait = ra if ra is not None else (INITIAL_503_WAIT_SEC if not saw_first_503 else BACKOFF_BASE_SEC)
                 saw_first_503 = True
                 if time.monotonic() + wait > deadline:
+                    resp.raise_for_status()
                 _sleep_with_budget(wait, deadline)
                 continue
+            # Other retryable statuses (rate limit / transient errors)
             if resp.status_code in RETRYABLE_HTTP and time.monotonic() < deadline:
                 exp = min(BACKOFF_CAP_SEC, BACKOFF_BASE_SEC * (2 ** min(6, attempt)))
                 wait = exp + random.uniform(0, JITTER_SEC)
                 _sleep_with_budget(wait, deadline)
                 continue
+            # Non-OK without remaining budget → raise
             resp.raise_for_status()
             data = resp.json()
                 wait = exp + random.uniform(0, JITTER_SEC)
                 _sleep_with_budget(wait, deadline)
                 continue
             raise gr.Error(
                 f"Friendly API: retry budget exceeded after ~{COLD_START_BUDGET_SEC}s. "
                 "Please try again; the model may have just finished warming."
 with gr.Blocks(title="Humains-Junior (Humains.com) — Exoskeleton Reasoning") as demo:
     gr.Markdown(
+        "# Humains-Junior by Humains.com — Exoskeleton Reasoning (Friendly API)\n\n"
         "- **Model behavior**:\n"
         "  1. Outputs two plain-text sections: **Analysis** then **Response**.\n"
+        "  2. When the **question is related to the Context**, it **prioritizes the Context** over internal knowledge, even if the Context is factually wrong.\n"
         "  3. If the **question is unrelated to the Context**, it **may answer normally** (not forced to follow the Context).\n"
     )
     with gr.Row():
         with gr.Column(scale=3):
             q = gr.Textbox(label="Question", value=PRESET_Q, lines=3)
+            ctx = gr.Textbox(label="Context (only source of truth when related)", value=PRESET_CTX, lines=8)
             with gr.Row():
                 temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")