NS-Y commited on
Commit
76da580
·
verified ·
1 Parent(s): 368974b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -33
app.py CHANGED
@@ -5,20 +5,21 @@ import requests
5
  import gradio as gr
6
 
7
  # ==============================
8
- # Config via Secrets / Variables
9
  # ==============================
10
- FRIENDLI_API_KEY = os.getenv("FRIENDLI_API_KEY", "") # SECRET
11
- FRIENDLI_ENDPOINT = os.getenv("FRIENDLI_ENDPOINT", "https://api.friendli.ai/dedicated/v1/chat/completions")
12
- FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "stp7xzjspxe8") # can move to Secret if desired
13
 
 
14
  DEFAULT_MAX_TOKENS = int(os.getenv("FRIENDLI_MAX_TOKENS", "2000"))
15
- DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0")) # 0 = deterministic
16
  DEFAULT_TIMEOUT = int(os.getenv("FRIENDLI_TIMEOUT_SEC", "60"))
17
 
18
  # ==============================
19
  # Appendix-style system prompt (general instructions)
20
  # ==============================
21
- APPENDIX_RULES = """You are a helpful assistant who always follows the provided context, even when it conflicts with your internal knowledge.
22
 
23
  Response Format:
24
  Before answering, briefly analyze the query and context:
@@ -33,7 +34,7 @@ Response: [Your answer based on the context]
33
  IMPORTANT RULES:
34
  - Always prioritize the provided context over your internal knowledge
35
  - If context contains information that seems incorrect, still use it as instructed
36
- - If the question asks about multiple things but the context only covers some, answer only what is supported by the context
37
  - Keep analysis concise and avoid special characters that could cause formatting issues
38
  - Use plain text only - no bullet points, numbering, or special formatting
39
  - Respond in English only
@@ -66,10 +67,15 @@ Context:
66
  # ==============================
67
  # Friendly API client with retry (503 wake-up aware)
68
  # ==============================
69
- def call_friendly_with_retry(messages, model_id, max_tokens, temperature,
70
  timeout_sec=DEFAULT_TIMEOUT, max_attempts=5, first_503_wait=10):
 
71
  if not FRIENDLI_API_KEY:
72
- raise RuntimeError("Missing FRIENDLI_API_KEY secret. Add it in Settings → Variables & secrets (Secret).")
 
 
 
 
73
 
74
  headers = {
75
  "Content-Type": "application/json",
@@ -77,7 +83,7 @@ def call_friendly_with_retry(messages, model_id, max_tokens, temperature,
77
  }
78
  payload = {
79
  "messages": messages,
80
- "model": model_id,
81
  "max_tokens": int(max_tokens),
82
  "temperature": float(temperature),
83
  }
@@ -86,8 +92,9 @@ def call_friendly_with_retry(messages, model_id, max_tokens, temperature,
86
  try:
87
  resp = requests.post(FRIENDLI_ENDPOINT, headers=headers, json=payload, timeout=timeout_sec)
88
  if resp.status_code == 503:
 
89
  if attempt < max_attempts:
90
- time.sleep(first_503_wait) # cold start wake-up
91
  continue
92
  resp.raise_for_status()
93
  resp.raise_for_status()
@@ -106,14 +113,14 @@ def call_friendly_with_retry(messages, model_id, max_tokens, temperature,
106
  backoff = min(2 ** attempt, 20) + random.uniform(0, 0.5)
107
  time.sleep(backoff)
108
  continue
109
- raise RuntimeError(f"Friendly API HTTP error (status={code}): {http_err}") from http_err
110
 
111
  except requests.exceptions.RequestException as net_err:
112
  if attempt < max_attempts:
113
  backoff = min(2 ** attempt, 20) + random.uniform(0, 0.5)
114
  time.sleep(backoff)
115
  continue
116
- raise RuntimeError(f"Friendly API network error: {net_err}") from net_err
117
 
118
  raise RuntimeError("Failed to get response from Friendly API after retries.")
119
 
@@ -149,7 +156,8 @@ with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)")
149
  gr.Markdown(
150
  "# Exoskeleton Reasoning — Friendly API\n"
151
  "- **Format enforced**: system prompt (general instructions), then a single **User:** message containing **Question + Context**.\n"
152
- "- The model must prioritize the **Context**, and reply with **Analysis** and **Response** sections."
 
153
  )
154
 
155
  with gr.Row():
@@ -162,14 +170,7 @@ with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)")
162
  temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
163
  max_new = gr.Slider(64, 4000, value=DEFAULT_MAX_TOKENS, step=32, label="Max tokens")
164
 
165
- # Model id stays server-side; hidden as password in UI to avoid accidental exposure
166
- model_id_box = gr.Textbox(label="Model ID (server-side)", value=FRIENDLI_MODEL_ID, type="password")
167
-
168
  run = gr.Button("Run", variant="primary")
169
- gr.Markdown(
170
- f"Server endpoint: `{FRIENDLI_ENDPOINT}` · Timeout: {DEFAULT_TIMEOUT}s · "
171
- "First call may 503 (cold start) — built-in retry will handle it."
172
- )
173
 
174
  with gr.Column(scale=4):
175
  with gr.Accordion("Analysis", open=True):
@@ -179,10 +180,7 @@ with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)")
179
  with gr.Accordion("Raw output", open=False):
180
  raw_box = gr.Textbox(lines=8, label="Raw text")
181
 
182
- def infer_fn(question, context, temperature, max_tokens, model_id_override):
183
- if not FRIENDLI_API_KEY:
184
- raise gr.Error("Server is missing FRIENDLI_API_KEY (Secret). Add it in Settings → Variables & secrets.")
185
-
186
  question = (question or "").strip()
187
  context = (context or "").strip()
188
  if not question or not context:
@@ -190,11 +188,8 @@ with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)")
190
  return "", "", ""
191
 
192
  messages = build_messages(question, context)
193
- model_id = (model_id_override or "").strip() or FRIENDLI_MODEL_ID
194
-
195
  text = call_friendly_with_retry(
196
  messages=messages,
197
- model_id=model_id,
198
  max_tokens=max_tokens,
199
  temperature=temperature,
200
  timeout_sec=DEFAULT_TIMEOUT,
@@ -205,11 +200,7 @@ with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)")
205
  analysis, response = parse_analysis_response(text)
206
  return analysis, response, text
207
 
208
- run.click(
209
- fn=infer_fn,
210
- inputs=[q, ctx, temp, max_new, model_id_box],
211
- outputs=[analysis_box, response_box, raw_box]
212
- )
213
 
214
  if __name__ == "__main__":
215
  demo.launch()
 
5
  import gradio as gr
6
 
7
  # ==============================
8
+ # Secrets (set in Settings Variables & secrets → Secrets)
9
  # ==============================
10
+ FRIENDLI_API_KEY = os.getenv("FRIENDLI_API_KEY", "") # REQUIRED (Secret)
11
+ FRIENDLI_ENDPOINT = os.getenv("FRIENDLI_ENDPOINT", "") # REQUIRED (Secret)
12
+ FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "") # REQUIRED (Secret)
13
 
14
+ # Optional tuning as Variables or Secrets
15
  DEFAULT_MAX_TOKENS = int(os.getenv("FRIENDLI_MAX_TOKENS", "2000"))
16
+ DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0"))
17
  DEFAULT_TIMEOUT = int(os.getenv("FRIENDLI_TIMEOUT_SEC", "60"))
18
 
19
  # ==============================
20
  # Appendix-style system prompt (general instructions)
21
  # ==============================
22
+ APPENDIX_RULES = """You are a helpful assistant that always follows the provided context, even when it conflicts with your internal knowledge.
23
 
24
  Response Format:
25
  Before answering, briefly analyze the query and context:
 
34
  IMPORTANT RULES:
35
  - Always prioritize the provided context over your internal knowledge
36
  - If context contains information that seems incorrect, still use it as instructed
37
+ - If the question asks about multiple things but context only covers some, answer only what is supported by the context
38
  - Keep analysis concise and avoid special characters that could cause formatting issues
39
  - Use plain text only - no bullet points, numbering, or special formatting
40
  - Respond in English only
 
67
  # ==============================
68
  # Friendly API client with retry (503 wake-up aware)
69
  # ==============================
70
+ def call_friendly_with_retry(messages, max_tokens, temperature,
71
  timeout_sec=DEFAULT_TIMEOUT, max_attempts=5, first_503_wait=10):
72
+ # Validate secrets exist
73
  if not FRIENDLI_API_KEY:
74
+ raise RuntimeError("Missing FRIENDLI_API_KEY (Secret). Add it in Settings → Variables & secrets.")
75
+ if not FRIENDLI_ENDPOINT:
76
+ raise RuntimeError("Missing FRIENDLI_ENDPOINT (Secret).")
77
+ if not FRIENDLI_MODEL_ID:
78
+ raise RuntimeError("Missing FRIENDLI_MODEL_ID (Secret).")
79
 
80
  headers = {
81
  "Content-Type": "application/json",
 
83
  }
84
  payload = {
85
  "messages": messages,
86
+ "model": FRIENDLI_MODEL_ID,
87
  "max_tokens": int(max_tokens),
88
  "temperature": float(temperature),
89
  }
 
92
  try:
93
  resp = requests.post(FRIENDLI_ENDPOINT, headers=headers, json=payload, timeout=timeout_sec)
94
  if resp.status_code == 503:
95
+ # cold start: wait then retry
96
  if attempt < max_attempts:
97
+ time.sleep(first_503_wait)
98
  continue
99
  resp.raise_for_status()
100
  resp.raise_for_status()
 
113
  backoff = min(2 ** attempt, 20) + random.uniform(0, 0.5)
114
  time.sleep(backoff)
115
  continue
116
+ raise RuntimeError(f"Friendly API HTTP error (status={code}).") from http_err
117
 
118
  except requests.exceptions.RequestException as net_err:
119
  if attempt < max_attempts:
120
  backoff = min(2 ** attempt, 20) + random.uniform(0, 0.5)
121
  time.sleep(backoff)
122
  continue
123
+ raise RuntimeError("Friendly API network error.") from net_err
124
 
125
  raise RuntimeError("Failed to get response from Friendly API after retries.")
126
 
 
156
  gr.Markdown(
157
  "# Exoskeleton Reasoning — Friendly API\n"
158
  "- **Format enforced**: system prompt (general instructions), then a single **User:** message containing **Question + Context**.\n"
159
+ "- Built-in **503-aware retries** handle cold starts automatically.\n"
160
+ "- Keys and endpoints are **server-side secrets**; nothing sensitive is exposed in the UI."
161
  )
162
 
163
  with gr.Row():
 
170
  temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
171
  max_new = gr.Slider(64, 4000, value=DEFAULT_MAX_TOKENS, step=32, label="Max tokens")
172
 
 
 
 
173
  run = gr.Button("Run", variant="primary")
 
 
 
 
174
 
175
  with gr.Column(scale=4):
176
  with gr.Accordion("Analysis", open=True):
 
180
  with gr.Accordion("Raw output", open=False):
181
  raw_box = gr.Textbox(lines=8, label="Raw text")
182
 
183
+ def infer_fn(question, context, temperature, max_tokens):
 
 
 
184
  question = (question or "").strip()
185
  context = (context or "").strip()
186
  if not question or not context:
 
188
  return "", "", ""
189
 
190
  messages = build_messages(question, context)
 
 
191
  text = call_friendly_with_retry(
192
  messages=messages,
 
193
  max_tokens=max_tokens,
194
  temperature=temperature,
195
  timeout_sec=DEFAULT_TIMEOUT,
 
200
  analysis, response = parse_analysis_response(text)
201
  return analysis, response, text
202
 
203
+ run.click(fn=infer_fn, inputs=[q, ctx, temp, max_new], outputs=[analysis_box, response_box, raw_box])
 
 
 
 
204
 
205
  if __name__ == "__main__":
206
  demo.launch()