Spaces:

Inpris
/

Humains-Junior

Sleeping

App Files Files Community

NS-Y commited on Nov 3, 2025

Commit

4f672c8

verified ·

1 Parent(s): 76da580

Update app.py

Browse files

Larger time out

Files changed (1) hide show

app.py +95 -48

app.py CHANGED Viewed

@@ -7,14 +7,27 @@ import gradio as gr
 # ==============================
 # Secrets (set in Settings → Variables & secrets → Secrets)
 # ==============================
-FRIENDLI_API_KEY = os.getenv("FRIENDLI_API_KEY", "")                 # REQUIRED (Secret)
-FRIENDLI_ENDPOINT = os.getenv("FRIENDLI_ENDPOINT", "")               # REQUIRED (Secret)
-FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "")               # REQUIRED (Secret)
-# Optional tuning as Variables or Secrets
 DEFAULT_MAX_TOKENS = int(os.getenv("FRIENDLI_MAX_TOKENS", "2000"))
 DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0"))
-DEFAULT_TIMEOUT = int(os.getenv("FRIENDLI_TIMEOUT_SEC", "60"))
 # ==============================
 # Appendix-style system prompt (general instructions)
@@ -50,7 +63,7 @@ Response: The capital of France is London.
 """
 # ==============================
-# Message builder (exact shape requested)
 # system prompt (general instructions)
 # User: question + context
 # ==============================
@@ -65,17 +78,34 @@ Context:
     ]
 # ==============================
-# Friendly API client with retry (503 wake-up aware)
 # ==============================
-def call_friendly_with_retry(messages, max_tokens, temperature,
-                             timeout_sec=DEFAULT_TIMEOUT, max_attempts=5, first_503_wait=10):
-    # Validate secrets exist
     if not FRIENDLI_API_KEY:
-        raise RuntimeError("Missing FRIENDLI_API_KEY (Secret). Add it in Settings → Variables & secrets.")
     if not FRIENDLI_ENDPOINT:
-        raise RuntimeError("Missing FRIENDLI_ENDPOINT (Secret).")
     if not FRIENDLI_MODEL_ID:
-        raise RuntimeError("Missing FRIENDLI_MODEL_ID (Secret).")
     headers = {
         "Content-Type": "application/json",
@@ -88,15 +118,41 @@ def call_friendly_with_retry(messages, max_tokens, temperature,
         "temperature": float(temperature),
     }
-    for attempt in range(1, max_attempts + 1):
         try:
-            resp = requests.post(FRIENDLI_ENDPOINT, headers=headers, json=payload, timeout=timeout_sec)
             if resp.status_code == 503:
-                # cold start: wait then retry
-                if attempt < max_attempts:
-                    time.sleep(first_503_wait)
-                    continue
-                resp.raise_for_status()
             resp.raise_for_status()
             data = resp.json()
@@ -105,24 +161,20 @@ def call_friendly_with_retry(messages, max_tokens, temperature,
                     .get("message", {})
                     .get("content", "")
             )
-            return content if content and content.strip() else "[EMPTY_RESPONSE]"
-        except requests.exceptions.HTTPError as http_err:
-            code = getattr(http_err.response, "status_code", None)
-            if code in (429, 500, 502, 503, 504) and attempt < max_attempts:
-                backoff = min(2 ** attempt, 20) + random.uniform(0, 0.5)
-                time.sleep(backoff)
-                continue
-            raise RuntimeError(f"Friendly API HTTP error (status={code}).") from http_err
-        except requests.exceptions.RequestException as net_err:
-            if attempt < max_attempts:
-                backoff = min(2 ** attempt, 20) + random.uniform(0, 0.5)
-                time.sleep(backoff)
                 continue
-            raise RuntimeError("Friendly API network error.") from net_err
-    raise RuntimeError("Failed to get response from Friendly API after retries.")
 # ==============================
 # Helpers: split Analysis / Response
@@ -155,16 +207,15 @@ PRESET_CTX = (
 with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)") as demo:
     gr.Markdown(
         "# Exoskeleton Reasoning — Friendly API\n"
-        "- **Format enforced**: system prompt (general instructions), then a single **User:** message containing **Question + Context**.\n"
-        "- Built-in **503-aware retries** handle cold starts automatically.\n"
-        "- Keys and endpoints are **server-side secrets**; nothing sensitive is exposed in the UI."
     )
     with gr.Row():
         with gr.Column(scale=3):
-            q = gr.Textbox(label="Question", value=PRESET_Q, lines=3, placeholder="Type your question")
-            ctx = gr.Textbox(label="Context (used as the only source of truth)", value=PRESET_CTX, lines=8,
-                             placeholder="Paste/modify context here")
             with gr.Row():
                 temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
@@ -188,15 +239,11 @@ with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)")
             return "", "", ""
         messages = build_messages(question, context)
-        text = call_friendly_with_retry(
             messages=messages,
             max_tokens=max_tokens,
             temperature=temperature,
-            timeout_sec=DEFAULT_TIMEOUT,
-            max_attempts=5,
-            first_503_wait=10,
         )
         analysis, response = parse_analysis_response(text)
         return analysis, response, text

 # ==============================
 # Secrets (set in Settings → Variables & secrets → Secrets)
 # ==============================
+FRIENDLI_API_KEY = os.getenv("FRIENDLI_API_KEY", "")     # REQUIRED (Secret)
+FRIENDLI_ENDPOINT = os.getenv("FRIENDLI_ENDPOINT", "")   # REQUIRED (Secret)
+FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "")   # REQUIRED (Secret)
+# ==============================
+# Tunables (Variables or Secrets)
+# ==============================
+# Per-attempt request timeout (keep modest so we can poll repeatedly)
+PER_REQUEST_TIMEOUT_SEC = int(os.getenv("FRIENDLI_PER_REQUEST_TIMEOUT_SEC", "30"))
+# Total time budget to wait for cold start + retries
+COLD_START_BUDGET_SEC = int(os.getenv("FRIENDLI_COLD_START_BUDGET_SEC", "180"))
+# Initial fixed wait after the *first* 503 (model waking)
+INITIAL_503_WAIT_SEC = int(os.getenv("FRIENDLI_INITIAL_503_WAIT_SEC", "15"))
+# Max tokens / temperature defaults
 DEFAULT_MAX_TOKENS = int(os.getenv("FRIENDLI_MAX_TOKENS", "2000"))
 DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0"))
+# Backoff tuning
+BACKOFF_BASE_SEC = float(os.getenv("FRIENDLI_BACKOFF_BASE_SEC", "2.0"))
+BACKOFF_CAP_SEC = float(os.getenv("FRIENDLI_BACKOFF_CAP_SEC", "20.0"))
+JITTER_SEC = float(os.getenv("FRIENDLI_JITTER_SEC", "0.5"))
 # ==============================
 # Appendix-style system prompt (general instructions)
 """
 # ==============================
+# Message builder (exact shape)
 # system prompt (general instructions)
 # User: question + context
 # ==============================
     ]
 # ==============================
+# Friendly API client with time-budgeted retry
 # ==============================
+RETRYABLE_HTTP = {408, 429, 500, 502, 503, 504, 522, 524}
+def _sleep_with_budget(seconds, deadline):
+    # Sleep but never go beyond the overall budget
+    now = time.monotonic()
+    remaining = max(0.0, deadline - now)
+    time.sleep(max(0.0, min(seconds, remaining)))
+def _retry_after_seconds(resp):
+    try:
+        ra = resp.headers.get("Retry-After")
+        if not ra:
+            return None
+        # Retry-After can be seconds or an HTTP-date; treat as seconds if numeric
+        return float(ra)
+    except Exception:
+        return None
+def call_friendly_with_time_budget(messages, max_tokens, temperature):
+    # Validate secrets
     if not FRIENDLI_API_KEY:
+        raise gr.Error("Missing FRIENDLI_API_KEY (Secret).")
     if not FRIENDLI_ENDPOINT:
+        raise gr.Error("Missing FRIENDLI_ENDPOINT (Secret).")
     if not FRIENDLI_MODEL_ID:
+        raise gr.Error("Missing FRIENDLI_MODEL_ID (Secret).")
     headers = {
         "Content-Type": "application/json",
         "temperature": float(temperature),
     }
+    session = requests.Session()
+    start = time.monotonic()
+    deadline = start + COLD_START_BUDGET_SEC
+    attempt = 0
+    saw_first_503 = False
+    while True:
+        attempt += 1
         try:
+            resp = session.post(
+                FRIENDLI_ENDPOINT,
+                headers=headers,
+                json=payload,
+                timeout=PER_REQUEST_TIMEOUT_SEC,
+            )
             if resp.status_code == 503:
+                # Cold start; honor Retry-After if provided, otherwise use configured wait
+                ra = _retry_after_seconds(resp)
+                wait = ra if ra is not None else (INITIAL_503_WAIT_SEC if not saw_first_503 else BACKOFF_BASE_SEC)
+                saw_first_503 = True
+                if time.monotonic() + wait > deadline:
+                    resp.raise_for_status()  # will throw and exit loop to error
+                _sleep_with_budget(wait, deadline)
+                continue
+            # For other retryable status codes
+            if resp.status_code in RETRYABLE_HTTP and time.monotonic() < deadline:
+                # exponential backoff with jitter
+                exp = min(BACKOFF_CAP_SEC, BACKOFF_BASE_SEC * (2 ** min(6, attempt)))
+                wait = exp + random.uniform(0, JITTER_SEC)
+                _sleep_with_budget(wait, deadline)
+                continue
+            # Non-OK without retries left → raise
             resp.raise_for_status()
             data = resp.json()
                     .get("message", {})
                     .get("content", "")
             )
+            return content if content and str(content).strip() else "[EMPTY_RESPONSE]"
+        except requests.exceptions.RequestException:
+            # Network / timeout; retry within budget
+            if time.monotonic() < deadline:
+                exp = min(BACKOFF_CAP_SEC, BACKOFF_BASE_SEC * (2 ** min(6, attempt)))
+                wait = exp + random.uniform(0, JITTER_SEC)
+                _sleep_with_budget(wait, deadline)
                 continue
+            # Budget exhausted
+            raise gr.Error(
+                f"Friendly API: retry budget exceeded after ~{COLD_START_BUDGET_SEC}s. "
+                "Please try again; the model may have just finished warming."
+            )
 # ==============================
 # Helpers: split Analysis / Response
 with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)") as demo:
     gr.Markdown(
         "# Exoskeleton Reasoning — Friendly API\n"
+        "- **Format**: system prompt (general instructions), then a single **User:** message containing **Question + Context**.\n"
+        "- Built-in **time-budgeted retries** handle long cold starts (default ~180s budget).\n"
+        "- Keys and endpoints are **server-side secrets**."
     )
     with gr.Row():
         with gr.Column(scale=3):
+            q = gr.Textbox(label="Question", value=PRESET_Q, lines=3)
+            ctx = gr.Textbox(label="Context (only source of truth)", value=PRESET_CTX, lines=8)
             with gr.Row():
                 temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
             return "", "", ""
         messages = build_messages(question, context)
+        text = call_friendly_with_time_budget(
             messages=messages,
             max_tokens=max_tokens,
             temperature=temperature,
         )
         analysis, response = parse_analysis_response(text)
         return analysis, response, text