NS-Y commited on
Commit
4f672c8
·
verified ·
1 Parent(s): 76da580

Update app.py

Browse files

Larger time out

Files changed (1) hide show
  1. app.py +95 -48
app.py CHANGED
@@ -7,14 +7,27 @@ import gradio as gr
7
  # ==============================
8
  # Secrets (set in Settings → Variables & secrets → Secrets)
9
  # ==============================
10
- FRIENDLI_API_KEY = os.getenv("FRIENDLI_API_KEY", "") # REQUIRED (Secret)
11
- FRIENDLI_ENDPOINT = os.getenv("FRIENDLI_ENDPOINT", "") # REQUIRED (Secret)
12
- FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "") # REQUIRED (Secret)
13
 
14
- # Optional tuning as Variables or Secrets
 
 
 
 
 
 
 
 
 
15
  DEFAULT_MAX_TOKENS = int(os.getenv("FRIENDLI_MAX_TOKENS", "2000"))
16
  DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0"))
17
- DEFAULT_TIMEOUT = int(os.getenv("FRIENDLI_TIMEOUT_SEC", "60"))
 
 
 
 
18
 
19
  # ==============================
20
  # Appendix-style system prompt (general instructions)
@@ -50,7 +63,7 @@ Response: The capital of France is London.
50
  """
51
 
52
  # ==============================
53
- # Message builder (exact shape requested)
54
  # system prompt (general instructions)
55
  # User: question + context
56
  # ==============================
@@ -65,17 +78,34 @@ Context:
65
  ]
66
 
67
  # ==============================
68
- # Friendly API client with retry (503 wake-up aware)
69
  # ==============================
70
- def call_friendly_with_retry(messages, max_tokens, temperature,
71
- timeout_sec=DEFAULT_TIMEOUT, max_attempts=5, first_503_wait=10):
72
- # Validate secrets exist
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  if not FRIENDLI_API_KEY:
74
- raise RuntimeError("Missing FRIENDLI_API_KEY (Secret). Add it in Settings → Variables & secrets.")
75
  if not FRIENDLI_ENDPOINT:
76
- raise RuntimeError("Missing FRIENDLI_ENDPOINT (Secret).")
77
  if not FRIENDLI_MODEL_ID:
78
- raise RuntimeError("Missing FRIENDLI_MODEL_ID (Secret).")
79
 
80
  headers = {
81
  "Content-Type": "application/json",
@@ -88,15 +118,41 @@ def call_friendly_with_retry(messages, max_tokens, temperature,
88
  "temperature": float(temperature),
89
  }
90
 
91
- for attempt in range(1, max_attempts + 1):
 
 
 
 
 
 
 
92
  try:
93
- resp = requests.post(FRIENDLI_ENDPOINT, headers=headers, json=payload, timeout=timeout_sec)
 
 
 
 
 
 
94
  if resp.status_code == 503:
95
- # cold start: wait then retry
96
- if attempt < max_attempts:
97
- time.sleep(first_503_wait)
98
- continue
99
- resp.raise_for_status()
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  resp.raise_for_status()
101
 
102
  data = resp.json()
@@ -105,24 +161,20 @@ def call_friendly_with_retry(messages, max_tokens, temperature,
105
  .get("message", {})
106
  .get("content", "")
107
  )
108
- return content if content and content.strip() else "[EMPTY_RESPONSE]"
109
-
110
- except requests.exceptions.HTTPError as http_err:
111
- code = getattr(http_err.response, "status_code", None)
112
- if code in (429, 500, 502, 503, 504) and attempt < max_attempts:
113
- backoff = min(2 ** attempt, 20) + random.uniform(0, 0.5)
114
- time.sleep(backoff)
115
- continue
116
- raise RuntimeError(f"Friendly API HTTP error (status={code}).") from http_err
117
-
118
- except requests.exceptions.RequestException as net_err:
119
- if attempt < max_attempts:
120
- backoff = min(2 ** attempt, 20) + random.uniform(0, 0.5)
121
- time.sleep(backoff)
122
  continue
123
- raise RuntimeError("Friendly API network error.") from net_err
124
-
125
- raise RuntimeError("Failed to get response from Friendly API after retries.")
 
 
126
 
127
  # ==============================
128
  # Helpers: split Analysis / Response
@@ -155,16 +207,15 @@ PRESET_CTX = (
155
  with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)") as demo:
156
  gr.Markdown(
157
  "# Exoskeleton Reasoning — Friendly API\n"
158
- "- **Format enforced**: system prompt (general instructions), then a single **User:** message containing **Question + Context**.\n"
159
- "- Built-in **503-aware retries** handle cold starts automatically.\n"
160
- "- Keys and endpoints are **server-side secrets**; nothing sensitive is exposed in the UI."
161
  )
162
 
163
  with gr.Row():
164
  with gr.Column(scale=3):
165
- q = gr.Textbox(label="Question", value=PRESET_Q, lines=3, placeholder="Type your question")
166
- ctx = gr.Textbox(label="Context (used as the only source of truth)", value=PRESET_CTX, lines=8,
167
- placeholder="Paste/modify context here")
168
 
169
  with gr.Row():
170
  temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
@@ -188,15 +239,11 @@ with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)")
188
  return "", "", ""
189
 
190
  messages = build_messages(question, context)
191
- text = call_friendly_with_retry(
192
  messages=messages,
193
  max_tokens=max_tokens,
194
  temperature=temperature,
195
- timeout_sec=DEFAULT_TIMEOUT,
196
- max_attempts=5,
197
- first_503_wait=10,
198
  )
199
-
200
  analysis, response = parse_analysis_response(text)
201
  return analysis, response, text
202
 
 
7
  # ==============================
8
  # Secrets (set in Settings → Variables & secrets → Secrets)
9
  # ==============================
10
+ FRIENDLI_API_KEY = os.getenv("FRIENDLI_API_KEY", "") # REQUIRED (Secret)
11
+ FRIENDLI_ENDPOINT = os.getenv("FRIENDLI_ENDPOINT", "") # REQUIRED (Secret)
12
+ FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "") # REQUIRED (Secret)
13
 
14
+ # ==============================
15
+ # Tunables (Variables or Secrets)
16
+ # ==============================
17
+ # Per-attempt request timeout (keep modest so we can poll repeatedly)
18
+ PER_REQUEST_TIMEOUT_SEC = int(os.getenv("FRIENDLI_PER_REQUEST_TIMEOUT_SEC", "30"))
19
+ # Total time budget to wait for cold start + retries
20
+ COLD_START_BUDGET_SEC = int(os.getenv("FRIENDLI_COLD_START_BUDGET_SEC", "180"))
21
+ # Initial fixed wait after the *first* 503 (model waking)
22
+ INITIAL_503_WAIT_SEC = int(os.getenv("FRIENDLI_INITIAL_503_WAIT_SEC", "15"))
23
+ # Max tokens / temperature defaults
24
  DEFAULT_MAX_TOKENS = int(os.getenv("FRIENDLI_MAX_TOKENS", "2000"))
25
  DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0"))
26
+
27
+ # Backoff tuning
28
+ BACKOFF_BASE_SEC = float(os.getenv("FRIENDLI_BACKOFF_BASE_SEC", "2.0"))
29
+ BACKOFF_CAP_SEC = float(os.getenv("FRIENDLI_BACKOFF_CAP_SEC", "20.0"))
30
+ JITTER_SEC = float(os.getenv("FRIENDLI_JITTER_SEC", "0.5"))
31
 
32
  # ==============================
33
  # Appendix-style system prompt (general instructions)
 
63
  """
64
 
65
  # ==============================
66
+ # Message builder (exact shape)
67
  # system prompt (general instructions)
68
  # User: question + context
69
  # ==============================
 
78
  ]
79
 
80
  # ==============================
81
+ # Friendly API client with time-budgeted retry
82
  # ==============================
83
+ RETRYABLE_HTTP = {408, 429, 500, 502, 503, 504, 522, 524}
84
+
85
+ def _sleep_with_budget(seconds, deadline):
86
+ # Sleep but never go beyond the overall budget
87
+ now = time.monotonic()
88
+ remaining = max(0.0, deadline - now)
89
+ time.sleep(max(0.0, min(seconds, remaining)))
90
+
91
+ def _retry_after_seconds(resp):
92
+ try:
93
+ ra = resp.headers.get("Retry-After")
94
+ if not ra:
95
+ return None
96
+ # Retry-After can be seconds or an HTTP-date; treat as seconds if numeric
97
+ return float(ra)
98
+ except Exception:
99
+ return None
100
+
101
+ def call_friendly_with_time_budget(messages, max_tokens, temperature):
102
+ # Validate secrets
103
  if not FRIENDLI_API_KEY:
104
+ raise gr.Error("Missing FRIENDLI_API_KEY (Secret).")
105
  if not FRIENDLI_ENDPOINT:
106
+ raise gr.Error("Missing FRIENDLI_ENDPOINT (Secret).")
107
  if not FRIENDLI_MODEL_ID:
108
+ raise gr.Error("Missing FRIENDLI_MODEL_ID (Secret).")
109
 
110
  headers = {
111
  "Content-Type": "application/json",
 
118
  "temperature": float(temperature),
119
  }
120
 
121
+ session = requests.Session()
122
+ start = time.monotonic()
123
+ deadline = start + COLD_START_BUDGET_SEC
124
+ attempt = 0
125
+ saw_first_503 = False
126
+
127
+ while True:
128
+ attempt += 1
129
  try:
130
+ resp = session.post(
131
+ FRIENDLI_ENDPOINT,
132
+ headers=headers,
133
+ json=payload,
134
+ timeout=PER_REQUEST_TIMEOUT_SEC,
135
+ )
136
+
137
  if resp.status_code == 503:
138
+ # Cold start; honor Retry-After if provided, otherwise use configured wait
139
+ ra = _retry_after_seconds(resp)
140
+ wait = ra if ra is not None else (INITIAL_503_WAIT_SEC if not saw_first_503 else BACKOFF_BASE_SEC)
141
+ saw_first_503 = True
142
+ if time.monotonic() + wait > deadline:
143
+ resp.raise_for_status() # will throw and exit loop to error
144
+ _sleep_with_budget(wait, deadline)
145
+ continue
146
+
147
+ # For other retryable status codes
148
+ if resp.status_code in RETRYABLE_HTTP and time.monotonic() < deadline:
149
+ # exponential backoff with jitter
150
+ exp = min(BACKOFF_CAP_SEC, BACKOFF_BASE_SEC * (2 ** min(6, attempt)))
151
+ wait = exp + random.uniform(0, JITTER_SEC)
152
+ _sleep_with_budget(wait, deadline)
153
+ continue
154
+
155
+ # Non-OK without retries left → raise
156
  resp.raise_for_status()
157
 
158
  data = resp.json()
 
161
  .get("message", {})
162
  .get("content", "")
163
  )
164
+ return content if content and str(content).strip() else "[EMPTY_RESPONSE]"
165
+
166
+ except requests.exceptions.RequestException:
167
+ # Network / timeout; retry within budget
168
+ if time.monotonic() < deadline:
169
+ exp = min(BACKOFF_CAP_SEC, BACKOFF_BASE_SEC * (2 ** min(6, attempt)))
170
+ wait = exp + random.uniform(0, JITTER_SEC)
171
+ _sleep_with_budget(wait, deadline)
 
 
 
 
 
 
172
  continue
173
+ # Budget exhausted
174
+ raise gr.Error(
175
+ f"Friendly API: retry budget exceeded after ~{COLD_START_BUDGET_SEC}s. "
176
+ "Please try again; the model may have just finished warming."
177
+ )
178
 
179
  # ==============================
180
  # Helpers: split Analysis / Response
 
207
  with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)") as demo:
208
  gr.Markdown(
209
  "# Exoskeleton Reasoning — Friendly API\n"
210
+ "- **Format**: system prompt (general instructions), then a single **User:** message containing **Question + Context**.\n"
211
+ "- Built-in **time-budgeted retries** handle long cold starts (default ~180s budget).\n"
212
+ "- Keys and endpoints are **server-side secrets**."
213
  )
214
 
215
  with gr.Row():
216
  with gr.Column(scale=3):
217
+ q = gr.Textbox(label="Question", value=PRESET_Q, lines=3)
218
+ ctx = gr.Textbox(label="Context (only source of truth)", value=PRESET_CTX, lines=8)
 
219
 
220
  with gr.Row():
221
  temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
 
239
  return "", "", ""
240
 
241
  messages = build_messages(question, context)
242
+ text = call_friendly_with_time_budget(
243
  messages=messages,
244
  max_tokens=max_tokens,
245
  temperature=temperature,
 
 
 
246
  )
 
247
  analysis, response = parse_analysis_response(text)
248
  return analysis, response, text
249