Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -14,7 +14,7 @@ FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "") # REQUIRED (Secret)
|
|
| 14 |
# ==============================
|
| 15 |
# Tunables (Variables or Secrets)
|
| 16 |
# ==============================
|
| 17 |
-
# Per-attempt request timeout (keep modest so we can poll repeatedly)
|
| 18 |
PER_REQUEST_TIMEOUT_SEC = int(os.getenv("FRIENDLI_PER_REQUEST_TIMEOUT_SEC", "30"))
|
| 19 |
# Total time budget to wait for cold start + retries
|
| 20 |
COLD_START_BUDGET_SEC = int(os.getenv("FRIENDLI_COLD_START_BUDGET_SEC", "180"))
|
|
@@ -26,8 +26,8 @@ DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0"))
|
|
| 26 |
|
| 27 |
# Backoff tuning
|
| 28 |
BACKOFF_BASE_SEC = float(os.getenv("FRIENDLI_BACKOFF_BASE_SEC", "2.0"))
|
| 29 |
-
BACKOFF_CAP_SEC
|
| 30 |
-
JITTER_SEC
|
| 31 |
|
| 32 |
# ==============================
|
| 33 |
# Appendix-style system prompt (general instructions)
|
|
@@ -83,7 +83,6 @@ Context:
|
|
| 83 |
RETRYABLE_HTTP = {408, 429, 500, 502, 503, 504, 522, 524}
|
| 84 |
|
| 85 |
def _sleep_with_budget(seconds, deadline):
|
| 86 |
-
# Sleep but never go beyond the overall budget
|
| 87 |
now = time.monotonic()
|
| 88 |
remaining = max(0.0, deadline - now)
|
| 89 |
time.sleep(max(0.0, min(seconds, remaining)))
|
|
@@ -93,7 +92,6 @@ def _retry_after_seconds(resp):
|
|
| 93 |
ra = resp.headers.get("Retry-After")
|
| 94 |
if not ra:
|
| 95 |
return None
|
| 96 |
-
# Retry-After can be seconds or an HTTP-date; treat as seconds if numeric
|
| 97 |
return float(ra)
|
| 98 |
except Exception:
|
| 99 |
return None
|
|
@@ -134,25 +132,24 @@ def call_friendly_with_time_budget(messages, max_tokens, temperature):
|
|
| 134 |
timeout=PER_REQUEST_TIMEOUT_SEC,
|
| 135 |
)
|
| 136 |
|
|
|
|
| 137 |
if resp.status_code == 503:
|
| 138 |
-
# Cold start; honor Retry-After if provided, otherwise use configured wait
|
| 139 |
ra = _retry_after_seconds(resp)
|
| 140 |
wait = ra if ra is not None else (INITIAL_503_WAIT_SEC if not saw_first_503 else BACKOFF_BASE_SEC)
|
| 141 |
saw_first_503 = True
|
| 142 |
if time.monotonic() + wait > deadline:
|
| 143 |
-
resp.raise_for_status()
|
| 144 |
_sleep_with_budget(wait, deadline)
|
| 145 |
continue
|
| 146 |
|
| 147 |
-
#
|
| 148 |
if resp.status_code in RETRYABLE_HTTP and time.monotonic() < deadline:
|
| 149 |
-
# exponential backoff with jitter
|
| 150 |
exp = min(BACKOFF_CAP_SEC, BACKOFF_BASE_SEC * (2 ** min(6, attempt)))
|
| 151 |
wait = exp + random.uniform(0, JITTER_SEC)
|
| 152 |
_sleep_with_budget(wait, deadline)
|
| 153 |
continue
|
| 154 |
|
| 155 |
-
# Non-OK without
|
| 156 |
resp.raise_for_status()
|
| 157 |
|
| 158 |
data = resp.json()
|
|
@@ -170,7 +167,6 @@ def call_friendly_with_time_budget(messages, max_tokens, temperature):
|
|
| 170 |
wait = exp + random.uniform(0, JITTER_SEC)
|
| 171 |
_sleep_with_budget(wait, deadline)
|
| 172 |
continue
|
| 173 |
-
# Budget exhausted
|
| 174 |
raise gr.Error(
|
| 175 |
f"Friendly API: retry budget exceeded after ~{COLD_START_BUDGET_SEC}s. "
|
| 176 |
"Please try again; the model may have just finished warming."
|
|
@@ -206,25 +202,17 @@ PRESET_CTX = (
|
|
| 206 |
|
| 207 |
with gr.Blocks(title="Humains-Junior (Humains.com) — Exoskeleton Reasoning") as demo:
|
| 208 |
gr.Markdown(
|
| 209 |
-
"# Humains-Junior by Humains.com — Exoskeleton Reasoning (Friendly API)\n"
|
| 210 |
-
"\n"
|
| 211 |
-
"### How this demo enforces the prompt rules\n"
|
| 212 |
-
"- **Message format**: system prompt (general instructions) → a single **User:** block containing **Question + Context**.\n"
|
| 213 |
"- **Model behavior**:\n"
|
| 214 |
" 1. Outputs two plain-text sections: **Analysis** then **Response**.\n"
|
| 215 |
-
" 2. When the **question is related to the Context**, it **prioritizes the Context** over internal knowledge
|
| 216 |
" 3. If the **question is unrelated to the Context**, it **may answer normally** (not forced to follow the Context).\n"
|
| 217 |
-
" 4. Uses **plain text only** in the model output (no bullets/numbering/markdown characters).\n"
|
| 218 |
-
"- **Resilience**: time-budgeted retries handle long cold starts (default ~180s total budget).\n"
|
| 219 |
-
"- **Security**: API key, endpoint, and model ID are **server-side secrets**."
|
| 220 |
-
)
|
| 221 |
-
|
| 222 |
)
|
| 223 |
|
| 224 |
with gr.Row():
|
| 225 |
with gr.Column(scale=3):
|
| 226 |
q = gr.Textbox(label="Question", value=PRESET_Q, lines=3)
|
| 227 |
-
ctx = gr.Textbox(label="Context (only source of truth)", value=PRESET_CTX, lines=8)
|
| 228 |
|
| 229 |
with gr.Row():
|
| 230 |
temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
|
|
|
|
| 14 |
# ==============================
|
| 15 |
# Tunables (Variables or Secrets)
|
| 16 |
# ==============================
|
| 17 |
+
# Per-attempt request timeout (keep modest so we can poll repeatedly during warmup)
|
| 18 |
PER_REQUEST_TIMEOUT_SEC = int(os.getenv("FRIENDLI_PER_REQUEST_TIMEOUT_SEC", "30"))
|
| 19 |
# Total time budget to wait for cold start + retries
|
| 20 |
COLD_START_BUDGET_SEC = int(os.getenv("FRIENDLI_COLD_START_BUDGET_SEC", "180"))
|
|
|
|
| 26 |
|
| 27 |
# Backoff tuning
|
| 28 |
BACKOFF_BASE_SEC = float(os.getenv("FRIENDLI_BACKOFF_BASE_SEC", "2.0"))
|
| 29 |
+
BACKOFF_CAP_SEC = float(os.getenv("FRIENDLI_BACKOFF_CAP_SEC", "20.0"))
|
| 30 |
+
JITTER_SEC = float(os.getenv("FRIENDLI_JITTER_SEC", "0.5"))
|
| 31 |
|
| 32 |
# ==============================
|
| 33 |
# Appendix-style system prompt (general instructions)
|
|
|
|
| 83 |
RETRYABLE_HTTP = {408, 429, 500, 502, 503, 504, 522, 524}
|
| 84 |
|
| 85 |
def _sleep_with_budget(seconds, deadline):
|
|
|
|
| 86 |
now = time.monotonic()
|
| 87 |
remaining = max(0.0, deadline - now)
|
| 88 |
time.sleep(max(0.0, min(seconds, remaining)))
|
|
|
|
| 92 |
ra = resp.headers.get("Retry-After")
|
| 93 |
if not ra:
|
| 94 |
return None
|
|
|
|
| 95 |
return float(ra)
|
| 96 |
except Exception:
|
| 97 |
return None
|
|
|
|
| 132 |
timeout=PER_REQUEST_TIMEOUT_SEC,
|
| 133 |
)
|
| 134 |
|
| 135 |
+
# 503: cold start; wait then retry (honor Retry-After if provided)
|
| 136 |
if resp.status_code == 503:
|
|
|
|
| 137 |
ra = _retry_after_seconds(resp)
|
| 138 |
wait = ra if ra is not None else (INITIAL_503_WAIT_SEC if not saw_first_503 else BACKOFF_BASE_SEC)
|
| 139 |
saw_first_503 = True
|
| 140 |
if time.monotonic() + wait > deadline:
|
| 141 |
+
resp.raise_for_status()
|
| 142 |
_sleep_with_budget(wait, deadline)
|
| 143 |
continue
|
| 144 |
|
| 145 |
+
# Other retryable statuses (rate limit / transient errors)
|
| 146 |
if resp.status_code in RETRYABLE_HTTP and time.monotonic() < deadline:
|
|
|
|
| 147 |
exp = min(BACKOFF_CAP_SEC, BACKOFF_BASE_SEC * (2 ** min(6, attempt)))
|
| 148 |
wait = exp + random.uniform(0, JITTER_SEC)
|
| 149 |
_sleep_with_budget(wait, deadline)
|
| 150 |
continue
|
| 151 |
|
| 152 |
+
# Non-OK without remaining budget → raise
|
| 153 |
resp.raise_for_status()
|
| 154 |
|
| 155 |
data = resp.json()
|
|
|
|
| 167 |
wait = exp + random.uniform(0, JITTER_SEC)
|
| 168 |
_sleep_with_budget(wait, deadline)
|
| 169 |
continue
|
|
|
|
| 170 |
raise gr.Error(
|
| 171 |
f"Friendly API: retry budget exceeded after ~{COLD_START_BUDGET_SEC}s. "
|
| 172 |
"Please try again; the model may have just finished warming."
|
|
|
|
| 202 |
|
| 203 |
with gr.Blocks(title="Humains-Junior (Humains.com) — Exoskeleton Reasoning") as demo:
|
| 204 |
gr.Markdown(
|
| 205 |
+
"# Humains-Junior by Humains.com — Exoskeleton Reasoning (Friendly API)\n\n"
|
|
|
|
|
|
|
|
|
|
| 206 |
"- **Model behavior**:\n"
|
| 207 |
" 1. Outputs two plain-text sections: **Analysis** then **Response**.\n"
|
| 208 |
+
" 2. When the **question is related to the Context**, it **prioritizes the Context** over internal knowledge, even if the Context is factually wrong.\n"
|
| 209 |
" 3. If the **question is unrelated to the Context**, it **may answer normally** (not forced to follow the Context).\n"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 210 |
)
|
| 211 |
|
| 212 |
with gr.Row():
|
| 213 |
with gr.Column(scale=3):
|
| 214 |
q = gr.Textbox(label="Question", value=PRESET_Q, lines=3)
|
| 215 |
+
ctx = gr.Textbox(label="Context (only source of truth when related)", value=PRESET_CTX, lines=8)
|
| 216 |
|
| 217 |
with gr.Row():
|
| 218 |
temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
|