NS-Y commited on
Commit
7d89622
·
verified ·
1 Parent(s): e79fc25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -22
app.py CHANGED
@@ -14,7 +14,7 @@ FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "") # REQUIRED (Secret)
14
  # ==============================
15
  # Tunables (Variables or Secrets)
16
  # ==============================
17
- # Per-attempt request timeout (keep modest so we can poll repeatedly)
18
  PER_REQUEST_TIMEOUT_SEC = int(os.getenv("FRIENDLI_PER_REQUEST_TIMEOUT_SEC", "30"))
19
  # Total time budget to wait for cold start + retries
20
  COLD_START_BUDGET_SEC = int(os.getenv("FRIENDLI_COLD_START_BUDGET_SEC", "180"))
@@ -26,8 +26,8 @@ DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0"))
26
 
27
  # Backoff tuning
28
  BACKOFF_BASE_SEC = float(os.getenv("FRIENDLI_BACKOFF_BASE_SEC", "2.0"))
29
- BACKOFF_CAP_SEC = float(os.getenv("FRIENDLI_BACKOFF_CAP_SEC", "20.0"))
30
- JITTER_SEC = float(os.getenv("FRIENDLI_JITTER_SEC", "0.5"))
31
 
32
  # ==============================
33
  # Appendix-style system prompt (general instructions)
@@ -83,7 +83,6 @@ Context:
83
  RETRYABLE_HTTP = {408, 429, 500, 502, 503, 504, 522, 524}
84
 
85
  def _sleep_with_budget(seconds, deadline):
86
- # Sleep but never go beyond the overall budget
87
  now = time.monotonic()
88
  remaining = max(0.0, deadline - now)
89
  time.sleep(max(0.0, min(seconds, remaining)))
@@ -93,7 +92,6 @@ def _retry_after_seconds(resp):
93
  ra = resp.headers.get("Retry-After")
94
  if not ra:
95
  return None
96
- # Retry-After can be seconds or an HTTP-date; treat as seconds if numeric
97
  return float(ra)
98
  except Exception:
99
  return None
@@ -134,25 +132,24 @@ def call_friendly_with_time_budget(messages, max_tokens, temperature):
134
  timeout=PER_REQUEST_TIMEOUT_SEC,
135
  )
136
 
 
137
  if resp.status_code == 503:
138
- # Cold start; honor Retry-After if provided, otherwise use configured wait
139
  ra = _retry_after_seconds(resp)
140
  wait = ra if ra is not None else (INITIAL_503_WAIT_SEC if not saw_first_503 else BACKOFF_BASE_SEC)
141
  saw_first_503 = True
142
  if time.monotonic() + wait > deadline:
143
- resp.raise_for_status() # will throw and exit loop to error
144
  _sleep_with_budget(wait, deadline)
145
  continue
146
 
147
- # For other retryable status codes
148
  if resp.status_code in RETRYABLE_HTTP and time.monotonic() < deadline:
149
- # exponential backoff with jitter
150
  exp = min(BACKOFF_CAP_SEC, BACKOFF_BASE_SEC * (2 ** min(6, attempt)))
151
  wait = exp + random.uniform(0, JITTER_SEC)
152
  _sleep_with_budget(wait, deadline)
153
  continue
154
 
155
- # Non-OK without retries left → raise
156
  resp.raise_for_status()
157
 
158
  data = resp.json()
@@ -170,7 +167,6 @@ def call_friendly_with_time_budget(messages, max_tokens, temperature):
170
  wait = exp + random.uniform(0, JITTER_SEC)
171
  _sleep_with_budget(wait, deadline)
172
  continue
173
- # Budget exhausted
174
  raise gr.Error(
175
  f"Friendly API: retry budget exceeded after ~{COLD_START_BUDGET_SEC}s. "
176
  "Please try again; the model may have just finished warming."
@@ -206,25 +202,17 @@ PRESET_CTX = (
206
 
207
  with gr.Blocks(title="Humains-Junior (Humains.com) — Exoskeleton Reasoning") as demo:
208
  gr.Markdown(
209
- "# Humains-Junior by Humains.com — Exoskeleton Reasoning (Friendly API)\n"
210
- "\n"
211
- "### How this demo enforces the prompt rules\n"
212
- "- **Message format**: system prompt (general instructions) → a single **User:** block containing **Question + Context**.\n"
213
  "- **Model behavior**:\n"
214
  " 1. Outputs two plain-text sections: **Analysis** then **Response**.\n"
215
- " 2. When the **question is related to the Context**, it **prioritizes the Context** over internal knowledgeeven if the Context is factually wrong.\n"
216
  " 3. If the **question is unrelated to the Context**, it **may answer normally** (not forced to follow the Context).\n"
217
- " 4. Uses **plain text only** in the model output (no bullets/numbering/markdown characters).\n"
218
- "- **Resilience**: time-budgeted retries handle long cold starts (default ~180s total budget).\n"
219
- "- **Security**: API key, endpoint, and model ID are **server-side secrets**."
220
- )
221
-
222
  )
223
 
224
  with gr.Row():
225
  with gr.Column(scale=3):
226
  q = gr.Textbox(label="Question", value=PRESET_Q, lines=3)
227
- ctx = gr.Textbox(label="Context (only source of truth)", value=PRESET_CTX, lines=8)
228
 
229
  with gr.Row():
230
  temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
 
14
  # ==============================
15
  # Tunables (Variables or Secrets)
16
  # ==============================
17
+ # Per-attempt request timeout (keep modest so we can poll repeatedly during warmup)
18
  PER_REQUEST_TIMEOUT_SEC = int(os.getenv("FRIENDLI_PER_REQUEST_TIMEOUT_SEC", "30"))
19
  # Total time budget to wait for cold start + retries
20
  COLD_START_BUDGET_SEC = int(os.getenv("FRIENDLI_COLD_START_BUDGET_SEC", "180"))
 
26
 
27
  # Backoff tuning
28
  BACKOFF_BASE_SEC = float(os.getenv("FRIENDLI_BACKOFF_BASE_SEC", "2.0"))
29
+ BACKOFF_CAP_SEC = float(os.getenv("FRIENDLI_BACKOFF_CAP_SEC", "20.0"))
30
+ JITTER_SEC = float(os.getenv("FRIENDLI_JITTER_SEC", "0.5"))
31
 
32
  # ==============================
33
  # Appendix-style system prompt (general instructions)
 
83
  RETRYABLE_HTTP = {408, 429, 500, 502, 503, 504, 522, 524}
84
 
85
  def _sleep_with_budget(seconds, deadline):
 
86
  now = time.monotonic()
87
  remaining = max(0.0, deadline - now)
88
  time.sleep(max(0.0, min(seconds, remaining)))
 
92
  ra = resp.headers.get("Retry-After")
93
  if not ra:
94
  return None
 
95
  return float(ra)
96
  except Exception:
97
  return None
 
132
  timeout=PER_REQUEST_TIMEOUT_SEC,
133
  )
134
 
135
+ # 503: cold start; wait then retry (honor Retry-After if provided)
136
  if resp.status_code == 503:
 
137
  ra = _retry_after_seconds(resp)
138
  wait = ra if ra is not None else (INITIAL_503_WAIT_SEC if not saw_first_503 else BACKOFF_BASE_SEC)
139
  saw_first_503 = True
140
  if time.monotonic() + wait > deadline:
141
+ resp.raise_for_status()
142
  _sleep_with_budget(wait, deadline)
143
  continue
144
 
145
+ # Other retryable statuses (rate limit / transient errors)
146
  if resp.status_code in RETRYABLE_HTTP and time.monotonic() < deadline:
 
147
  exp = min(BACKOFF_CAP_SEC, BACKOFF_BASE_SEC * (2 ** min(6, attempt)))
148
  wait = exp + random.uniform(0, JITTER_SEC)
149
  _sleep_with_budget(wait, deadline)
150
  continue
151
 
152
+ # Non-OK without remaining budget → raise
153
  resp.raise_for_status()
154
 
155
  data = resp.json()
 
167
  wait = exp + random.uniform(0, JITTER_SEC)
168
  _sleep_with_budget(wait, deadline)
169
  continue
 
170
  raise gr.Error(
171
  f"Friendly API: retry budget exceeded after ~{COLD_START_BUDGET_SEC}s. "
172
  "Please try again; the model may have just finished warming."
 
202
 
203
  with gr.Blocks(title="Humains-Junior (Humains.com) — Exoskeleton Reasoning") as demo:
204
  gr.Markdown(
205
+ "# Humains-Junior by Humains.com — Exoskeleton Reasoning (Friendly API)\n\n"
 
 
 
206
  "- **Model behavior**:\n"
207
  " 1. Outputs two plain-text sections: **Analysis** then **Response**.\n"
208
+ " 2. When the **question is related to the Context**, it **prioritizes the Context** over internal knowledge, even if the Context is factually wrong.\n"
209
  " 3. If the **question is unrelated to the Context**, it **may answer normally** (not forced to follow the Context).\n"
 
 
 
 
 
210
  )
211
 
212
  with gr.Row():
213
  with gr.Column(scale=3):
214
  q = gr.Textbox(label="Question", value=PRESET_Q, lines=3)
215
+ ctx = gr.Textbox(label="Context (only source of truth when related)", value=PRESET_CTX, lines=8)
216
 
217
  with gr.Row():
218
  temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")