NS-Y commited on
Commit
368974b
·
verified ·
1 Parent(s): 6ee67a7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +46 -89
app.py CHANGED
@@ -1,33 +1,24 @@
1
  import os
2
  import time
3
  import random
4
- import json
5
  import requests
6
  import gradio as gr
7
 
8
  # ==============================
9
  # Config via Secrets / Variables
10
  # ==============================
11
- # Secrets (Space: Settings → Variables & secrets → Secrets)
12
- FRIENDLI_API_KEY = os.getenv("FRIENDLI_API_KEY", "") # <— SECRET. Do not print/log.
13
-
14
- # Variables (non-secret is okay; keep model id as a secret if you prefer)
15
  FRIENDLI_ENDPOINT = os.getenv("FRIENDLI_ENDPOINT", "https://api.friendli.ai/dedicated/v1/chat/completions")
16
- FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "stp7xzjspxe8") # move to Secret if you want to hide it fully
 
17
  DEFAULT_MAX_TOKENS = int(os.getenv("FRIENDLI_MAX_TOKENS", "2000"))
18
- DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0"))
19
  DEFAULT_TIMEOUT = int(os.getenv("FRIENDLI_TIMEOUT_SEC", "60"))
20
 
21
- # Safety: never leak secrets in logs
22
- def _redact(s: str) -> str:
23
- if not s:
24
- return s
25
- return s[:4] + "****" + s[-4:] if len(s) > 8 else "****"
26
-
27
  # ==============================
28
- # Appendix-style Prompt (Phi 3.5 instruct flavor)
29
  # ==============================
30
- APPENDIX_RULES = """You are a helpful assistant that always follows the provided context, even when it conflicts with your internal knowledge.
31
 
32
  Response Format:
33
  Before answering, briefly analyze the query and context:
@@ -42,14 +33,14 @@ Response: [Your answer based on the context]
42
  IMPORTANT RULES:
43
  - Always prioritize the provided context over your internal knowledge
44
  - If context contains information that seems incorrect, still use it as instructed
45
- - If the question asks about multiple things but context only covers some, answer only what is supported by the context
46
  - Keep analysis concise and avoid special characters that could cause formatting issues
47
  - Use plain text only - no bullet points, numbering, or special formatting
48
  - Respond in English only
49
 
50
  Example 1 - Conflicting information:
51
- Client: What is the capital of France? Answer based on the context.
52
-
53
  Context:
54
  The capital of France is London. It has been the political center of France since 1789 and houses the French Parliament.
55
 
@@ -57,37 +48,28 @@ Analysis: The query asks for the capital of France. The context states it is Lon
57
  Response: The capital of France is London.
58
  """
59
 
 
 
 
 
 
60
  def build_messages(question: str, context: str):
61
- """
62
- Friendly's API expects OpenAI-style 'messages'.
63
- We'll send:
64
- - system: Appendix rules + one-shot example
65
- - user: "Client: ... Answer based on the context.\n\nContext:\n..."
66
- """
67
- system = APPENDIX_RULES
68
- user = f"""Client: {question.strip()} Answer based on the context.
69
-
70
  Context:
71
  {context.strip()}"""
72
  return [
73
- {"role": "system", "content": system},
74
- {"role": "user", "content": user},
75
  ]
76
 
77
  # ==============================
78
- # Friendly API client with retry
79
  # ==============================
80
- def call_friendly_with_retry(messages, model_id, max_tokens, temperature, timeout_sec=DEFAULT_TIMEOUT,
81
- max_attempts=5, first_503_wait=10):
82
- """
83
- Calls Friendly chat completions with:
84
- - 503-aware first retry (server warm-up)
85
- - exponential backoff w/ jitter
86
- - strict timeout
87
- All secrets are read from env; nothing is exposed to the client UI.
88
- """
89
  if not FRIENDLI_API_KEY:
90
- raise RuntimeError("Missing FRIENDLI_API_KEY secret.")
91
 
92
  headers = {
93
  "Content-Type": "application/json",
@@ -100,63 +82,45 @@ def call_friendly_with_retry(messages, model_id, max_tokens, temperature, timeou
100
  "temperature": float(temperature),
101
  }
102
 
103
- # First attempt is often 503 (cold start). Handle specifically.
104
  for attempt in range(1, max_attempts + 1):
105
  try:
106
- resp = requests.post(
107
- FRIENDLI_ENDPOINT,
108
- headers=headers,
109
- json=payload,
110
- timeout=timeout_sec,
111
- )
112
- # If Friendly uses 429/5xx for rate/overload, raise_for_status will catch it
113
  if resp.status_code == 503:
114
- # cold start; wait and retry with fixed small delay
115
  if attempt < max_attempts:
116
- time.sleep(first_503_wait)
117
  continue
118
- else:
119
- resp.raise_for_status()
120
  resp.raise_for_status()
121
 
122
  data = resp.json()
123
- # Defensive parsing
124
  content = (
125
  data.get("choices", [{}])[0]
126
  .get("message", {})
127
  .get("content", "")
128
  )
129
- if not content or not str(content).strip():
130
- return "[EMPTY_RESPONSE]"
131
- return str(content)
132
 
133
  except requests.exceptions.HTTPError as http_err:
134
  code = getattr(http_err.response, "status_code", None)
135
- # Retry strategies:
136
  if code in (429, 500, 502, 503, 504) and attempt < max_attempts:
137
- # Exp backoff with jitter
138
- sleep_s = min(2 ** attempt, 20) + random.uniform(0, 0.5)
139
- time.sleep(sleep_s)
140
  continue
141
- # Non-retryable or exhausted
142
  raise RuntimeError(f"Friendly API HTTP error (status={code}): {http_err}") from http_err
143
 
144
  except requests.exceptions.RequestException as net_err:
145
- # Network timeouts / DNS / connection errors — retry with backoff
146
  if attempt < max_attempts:
147
- sleep_s = min(2 ** attempt, 20) + random.uniform(0, 0.5)
148
- time.sleep(sleep_s)
149
  continue
150
  raise RuntimeError(f"Friendly API network error: {net_err}") from net_err
151
 
152
- # Should not reach here due to raises above, but just in case:
153
  raise RuntimeError("Failed to get response from Friendly API after retries.")
154
 
155
  # ==============================
156
- # Helpers
157
  # ==============================
158
  def parse_analysis_response(text: str):
159
- """Extract 'Analysis:' and 'Response:' blocks from plain text."""
160
  if not text:
161
  return "", ""
162
  a_idx = text.rfind("Analysis:")
@@ -175,39 +139,36 @@ def parse_analysis_response(text: str):
175
  # ==============================
176
  # UI
177
  # ==============================
178
- PRESET_Q = "What are the health effects of coffee? Answer based on the context."
179
  PRESET_CTX = (
180
  "Coffee contains caffeine, which can increase alertness. Excess intake may cause "
181
  "jitteriness and sleep disruption. Moderate consumption is considered safe for most adults."
182
  )
183
 
184
- with gr.Blocks(title="Exoskeleton Reasoning — Appendix Prompt (Friendly API)") as demo:
185
  gr.Markdown(
186
- "# Exoskeleton Reasoning — Appendix-Style Prompt (Friendly API)\n"
187
- "- This demo **uses your Friendly endpoint** from the server (no keys in the browser).\n"
188
- "- The model must prioritize the provided **Context**, and reply in plain text with two sections: **Analysis** and **Response**.\n"
189
- "- Note: the **first call** may return **503** while the model wakes; built-in retries will handle it."
190
  )
191
 
192
  with gr.Row():
193
  with gr.Column(scale=3):
194
- q = gr.Textbox(label="Client question", value=PRESET_Q, lines=4)
195
- ctx = gr.Textbox(label="Context (the source you must follow)", value=PRESET_CTX, lines=8)
 
196
 
197
  with gr.Row():
198
  temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
199
  max_new = gr.Slider(64, 4000, value=DEFAULT_MAX_TOKENS, step=32, label="Max tokens")
200
 
201
- # Optional override (kept server-side; not exposed to client JS)
202
- model_id_box = gr.Textbox(
203
- label="Model ID (server-side override)",
204
- value=FRIENDLI_MODEL_ID,
205
- type="password", # visually hides value in the UI (still server-side)
206
- )
207
 
208
  run = gr.Button("Run", variant="primary")
209
- tips = gr.Markdown(
210
- f"**Server config** — endpoint: `{FRIENDLI_ENDPOINT}` · model: hidden · timeout: {DEFAULT_TIMEOUT}s"
 
211
  )
212
 
213
  with gr.Column(scale=4):
@@ -220,21 +181,17 @@ with gr.Blocks(title="Exoskeleton Reasoning — Appendix Prompt (Friendly API)")
220
 
221
  def infer_fn(question, context, temperature, max_tokens, model_id_override):
222
  if not FRIENDLI_API_KEY:
223
- raise gr.Error("Server is missing FRIENDLI_API_KEY secret. Add it in Settings → Variables & secrets.")
224
 
225
  question = (question or "").strip()
226
  context = (context or "").strip()
227
  if not question or not context:
228
- gr.Warning("Please provide both a Client question and Context.")
229
  return "", "", ""
230
 
231
- # Never expose secrets/endpoint; all calls are server-side
232
  messages = build_messages(question, context)
233
-
234
- # Resolve model id strictly server-side
235
  model_id = (model_id_override or "").strip() or FRIENDLI_MODEL_ID
236
 
237
- # Do the call with retries
238
  text = call_friendly_with_retry(
239
  messages=messages,
240
  model_id=model_id,
 
1
  import os
2
  import time
3
  import random
 
4
  import requests
5
  import gradio as gr
6
 
7
  # ==============================
8
  # Config via Secrets / Variables
9
  # ==============================
10
+ FRIENDLI_API_KEY = os.getenv("FRIENDLI_API_KEY", "") # SECRET
 
 
 
11
  FRIENDLI_ENDPOINT = os.getenv("FRIENDLI_ENDPOINT", "https://api.friendli.ai/dedicated/v1/chat/completions")
12
+ FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "stp7xzjspxe8") # can move to Secret if desired
13
+
14
  DEFAULT_MAX_TOKENS = int(os.getenv("FRIENDLI_MAX_TOKENS", "2000"))
15
+ DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0")) # 0 = deterministic
16
  DEFAULT_TIMEOUT = int(os.getenv("FRIENDLI_TIMEOUT_SEC", "60"))
17
 
 
 
 
 
 
 
18
  # ==============================
19
+ # Appendix-style system prompt (general instructions)
20
  # ==============================
21
+ APPENDIX_RULES = """You are a helpful assistant who always follows the provided context, even when it conflicts with your internal knowledge.
22
 
23
  Response Format:
24
  Before answering, briefly analyze the query and context:
 
33
  IMPORTANT RULES:
34
  - Always prioritize the provided context over your internal knowledge
35
  - If context contains information that seems incorrect, still use it as instructed
36
+ - If the question asks about multiple things but the context only covers some, answer only what is supported by the context
37
  - Keep analysis concise and avoid special characters that could cause formatting issues
38
  - Use plain text only - no bullet points, numbering, or special formatting
39
  - Respond in English only
40
 
41
  Example 1 - Conflicting information:
42
+ User:
43
+ Question: What is the capital of France?
44
  Context:
45
  The capital of France is London. It has been the political center of France since 1789 and houses the French Parliament.
46
 
 
48
  Response: The capital of France is London.
49
  """
50
 
51
+ # ==============================
52
+ # Message builder (exact shape requested)
53
+ # system prompt (general instructions)
54
+ # User: question + context
55
+ # ==============================
56
  def build_messages(question: str, context: str):
57
+ user_block = f"""User:
58
+ Question: {question.strip()}
 
 
 
 
 
 
 
59
  Context:
60
  {context.strip()}"""
61
  return [
62
+ {"role": "system", "content": APPENDIX_RULES},
63
+ {"role": "user", "content": user_block},
64
  ]
65
 
66
  # ==============================
67
+ # Friendly API client with retry (503 wake-up aware)
68
  # ==============================
69
+ def call_friendly_with_retry(messages, model_id, max_tokens, temperature,
70
+ timeout_sec=DEFAULT_TIMEOUT, max_attempts=5, first_503_wait=10):
 
 
 
 
 
 
 
71
  if not FRIENDLI_API_KEY:
72
+ raise RuntimeError("Missing FRIENDLI_API_KEY secret. Add it in Settings → Variables & secrets (Secret).")
73
 
74
  headers = {
75
  "Content-Type": "application/json",
 
82
  "temperature": float(temperature),
83
  }
84
 
 
85
  for attempt in range(1, max_attempts + 1):
86
  try:
87
+ resp = requests.post(FRIENDLI_ENDPOINT, headers=headers, json=payload, timeout=timeout_sec)
 
 
 
 
 
 
88
  if resp.status_code == 503:
 
89
  if attempt < max_attempts:
90
+ time.sleep(first_503_wait) # cold start wake-up
91
  continue
92
+ resp.raise_for_status()
 
93
  resp.raise_for_status()
94
 
95
  data = resp.json()
 
96
  content = (
97
  data.get("choices", [{}])[0]
98
  .get("message", {})
99
  .get("content", "")
100
  )
101
+ return content if content and content.strip() else "[EMPTY_RESPONSE]"
 
 
102
 
103
  except requests.exceptions.HTTPError as http_err:
104
  code = getattr(http_err.response, "status_code", None)
 
105
  if code in (429, 500, 502, 503, 504) and attempt < max_attempts:
106
+ backoff = min(2 ** attempt, 20) + random.uniform(0, 0.5)
107
+ time.sleep(backoff)
 
108
  continue
 
109
  raise RuntimeError(f"Friendly API HTTP error (status={code}): {http_err}") from http_err
110
 
111
  except requests.exceptions.RequestException as net_err:
 
112
  if attempt < max_attempts:
113
+ backoff = min(2 ** attempt, 20) + random.uniform(0, 0.5)
114
+ time.sleep(backoff)
115
  continue
116
  raise RuntimeError(f"Friendly API network error: {net_err}") from net_err
117
 
 
118
  raise RuntimeError("Failed to get response from Friendly API after retries.")
119
 
120
  # ==============================
121
+ # Helpers: split Analysis / Response
122
  # ==============================
123
  def parse_analysis_response(text: str):
 
124
  if not text:
125
  return "", ""
126
  a_idx = text.rfind("Analysis:")
 
139
  # ==============================
140
  # UI
141
  # ==============================
142
+ PRESET_Q = "What are the health effects of coffee?"
143
  PRESET_CTX = (
144
  "Coffee contains caffeine, which can increase alertness. Excess intake may cause "
145
  "jitteriness and sleep disruption. Moderate consumption is considered safe for most adults."
146
  )
147
 
148
+ with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)") as demo:
149
  gr.Markdown(
150
+ "# Exoskeleton Reasoning — Friendly API\n"
151
+ "- **Format enforced**: system prompt (general instructions), then a single **User:** message containing **Question + Context**.\n"
152
+ "- The model must prioritize the **Context**, and reply with **Analysis** and **Response** sections."
 
153
  )
154
 
155
  with gr.Row():
156
  with gr.Column(scale=3):
157
+ q = gr.Textbox(label="Question", value=PRESET_Q, lines=3, placeholder="Type your question")
158
+ ctx = gr.Textbox(label="Context (used as the only source of truth)", value=PRESET_CTX, lines=8,
159
+ placeholder="Paste/modify context here")
160
 
161
  with gr.Row():
162
  temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
163
  max_new = gr.Slider(64, 4000, value=DEFAULT_MAX_TOKENS, step=32, label="Max tokens")
164
 
165
+ # Model id stays server-side; hidden as password in UI to avoid accidental exposure
166
+ model_id_box = gr.Textbox(label="Model ID (server-side)", value=FRIENDLI_MODEL_ID, type="password")
 
 
 
 
167
 
168
  run = gr.Button("Run", variant="primary")
169
+ gr.Markdown(
170
+ f"Server endpoint: `{FRIENDLI_ENDPOINT}` · Timeout: {DEFAULT_TIMEOUT}s · "
171
+ "First call may 503 (cold start) — built-in retry will handle it."
172
  )
173
 
174
  with gr.Column(scale=4):
 
181
 
182
  def infer_fn(question, context, temperature, max_tokens, model_id_override):
183
  if not FRIENDLI_API_KEY:
184
+ raise gr.Error("Server is missing FRIENDLI_API_KEY (Secret). Add it in Settings → Variables & secrets.")
185
 
186
  question = (question or "").strip()
187
  context = (context or "").strip()
188
  if not question or not context:
189
+ gr.Warning("Please provide both a Question and a Context.")
190
  return "", "", ""
191
 
 
192
  messages = build_messages(question, context)
 
 
193
  model_id = (model_id_override or "").strip() or FRIENDLI_MODEL_ID
194
 
 
195
  text = call_friendly_with_retry(
196
  messages=messages,
197
  model_id=model_id,