Spaces:

Inpris
/

Humains-Junior

Sleeping

App Files Files Community

NS-Y commited on Nov 3, 2025

Commit

368974b

verified ·

1 Parent(s): 6ee67a7

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -89

app.py CHANGED Viewed

@@ -1,33 +1,24 @@
 import os
 import time
 import random
-import json
 import requests
 import gradio as gr
 # ==============================
 # Config via Secrets / Variables
 # ==============================
-# Secrets (Space: Settings → Variables & secrets → Secrets)
-FRIENDLI_API_KEY = os.getenv("FRIENDLI_API_KEY", "")  # <— SECRET. Do not print/log.
-# Variables (non-secret is okay; keep model id as a secret if you prefer)
 FRIENDLI_ENDPOINT = os.getenv("FRIENDLI_ENDPOINT", "https://api.friendli.ai/dedicated/v1/chat/completions")
-FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "stp7xzjspxe8")  # move to Secret if you want to hide it fully
 DEFAULT_MAX_TOKENS = int(os.getenv("FRIENDLI_MAX_TOKENS", "2000"))
-DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0"))
 DEFAULT_TIMEOUT = int(os.getenv("FRIENDLI_TIMEOUT_SEC", "60"))
-# Safety: never leak secrets in logs
-def _redact(s: str) -> str:
-    if not s:
-        return s
-    return s[:4] + "****" + s[-4:] if len(s) > 8 else "****"
 # ==============================
-# Appendix-style Prompt (Phi 3.5 instruct flavor)
 # ==============================
-APPENDIX_RULES = """You are a helpful assistant that always follows the provided context, even when it conflicts with your internal knowledge.
 Response Format:
 Before answering, briefly analyze the query and context:
@@ -42,14 +33,14 @@ Response: [Your answer based on the context]
 IMPORTANT RULES:
 - Always prioritize the provided context over your internal knowledge
 - If context contains information that seems incorrect, still use it as instructed
-- If the question asks about multiple things but context only covers some, answer only what is supported by the context
 - Keep analysis concise and avoid special characters that could cause formatting issues
 - Use plain text only - no bullet points, numbering, or special formatting
 - Respond in English only
 Example 1 - Conflicting information:
-Client: What is the capital of France? Answer based on the context.
 Context:
 The capital of France is London. It has been the political center of France since 1789 and houses the French Parliament.
@@ -57,37 +48,28 @@ Analysis: The query asks for the capital of France. The context states it is Lon
 Response: The capital of France is London.
 """
 def build_messages(question: str, context: str):
-    """
-    Friendly's API expects OpenAI-style 'messages'.
-    We'll send:
-      - system: Appendix rules + one-shot example
-      - user:   "Client: ... Answer based on the context.\n\nContext:\n..."
-    """
-    system = APPENDIX_RULES
-    user = f"""Client: {question.strip()} Answer based on the context.
 Context:
 {context.strip()}"""
     return [
-        {"role": "system", "content": system},
-        {"role": "user", "content": user},
     ]
 # ==============================
-# Friendly API client with retry
 # ==============================
-def call_friendly_with_retry(messages, model_id, max_tokens, temperature, timeout_sec=DEFAULT_TIMEOUT,
-                             max_attempts=5, first_503_wait=10):
-    """
-    Calls Friendly chat completions with:
-      - 503-aware first retry (server warm-up)
-      - exponential backoff w/ jitter
-      - strict timeout
-    All secrets are read from env; nothing is exposed to the client UI.
-    """
     if not FRIENDLI_API_KEY:
-        raise RuntimeError("Missing FRIENDLI_API_KEY secret.")
     headers = {
         "Content-Type": "application/json",
@@ -100,63 +82,45 @@ def call_friendly_with_retry(messages, model_id, max_tokens, temperature, timeou
         "temperature": float(temperature),
     }
-    # First attempt is often 503 (cold start). Handle specifically.
     for attempt in range(1, max_attempts + 1):
         try:
-            resp = requests.post(
-                FRIENDLI_ENDPOINT,
-                headers=headers,
-                json=payload,
-                timeout=timeout_sec,
-            )
-            # If Friendly uses 429/5xx for rate/overload, raise_for_status will catch it
             if resp.status_code == 503:
-                # cold start; wait and retry with fixed small delay
                 if attempt < max_attempts:
-                    time.sleep(first_503_wait)
                     continue
-                else:
-                    resp.raise_for_status()
             resp.raise_for_status()
             data = resp.json()
-            # Defensive parsing
             content = (
                 data.get("choices", [{}])[0]
                     .get("message", {})
                     .get("content", "")
             )
-            if not content or not str(content).strip():
-                return "[EMPTY_RESPONSE]"
-            return str(content)
         except requests.exceptions.HTTPError as http_err:
             code = getattr(http_err.response, "status_code", None)
-            # Retry strategies:
             if code in (429, 500, 502, 503, 504) and attempt < max_attempts:
-                # Exp backoff with jitter
-                sleep_s = min(2 ** attempt, 20) + random.uniform(0, 0.5)
-                time.sleep(sleep_s)
                 continue
-            # Non-retryable or exhausted
             raise RuntimeError(f"Friendly API HTTP error (status={code}): {http_err}") from http_err
         except requests.exceptions.RequestException as net_err:
-            # Network timeouts / DNS / connection errors — retry with backoff
             if attempt < max_attempts:
-                sleep_s = min(2 ** attempt, 20) + random.uniform(0, 0.5)
-                time.sleep(sleep_s)
                 continue
             raise RuntimeError(f"Friendly API network error: {net_err}") from net_err
-    # Should not reach here due to raises above, but just in case:
     raise RuntimeError("Failed to get response from Friendly API after retries.")
 # ==============================
-# Helpers
 # ==============================
 def parse_analysis_response(text: str):
-    """Extract 'Analysis:' and 'Response:' blocks from plain text."""
     if not text:
         return "", ""
     a_idx = text.rfind("Analysis:")
@@ -175,39 +139,36 @@ def parse_analysis_response(text: str):
 # ==============================
 # UI
 # ==============================
-PRESET_Q = "What are the health effects of coffee? Answer based on the context."
 PRESET_CTX = (
     "Coffee contains caffeine, which can increase alertness. Excess intake may cause "
     "jitteriness and sleep disruption. Moderate consumption is considered safe for most adults."
 )
-with gr.Blocks(title="Exoskeleton Reasoning — Appendix Prompt (Friendly API)") as demo:
     gr.Markdown(
-        "# Exoskeleton Reasoning — Appendix-Style Prompt (Friendly API)\n"
-        "- This demo **uses your Friendly endpoint** from the server (no keys in the browser).\n"
-        "- The model must prioritize the provided **Context**, and reply in plain text with two sections: **Analysis** and **Response**.\n"
-        "- Note: the **first call** may return **503** while the model wakes; built-in retries will handle it."
     )
     with gr.Row():
         with gr.Column(scale=3):
-            q = gr.Textbox(label="Client question", value=PRESET_Q, lines=4)
-            ctx = gr.Textbox(label="Context (the source you must follow)", value=PRESET_CTX, lines=8)
             with gr.Row():
                 temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
                 max_new = gr.Slider(64, 4000, value=DEFAULT_MAX_TOKENS, step=32, label="Max tokens")
-            # Optional override (kept server-side; not exposed to client JS)
-            model_id_box = gr.Textbox(
-                label="Model ID (server-side override)",
-                value=FRIENDLI_MODEL_ID,
-                type="password",  # visually hides value in the UI (still server-side)
-            )
             run = gr.Button("Run", variant="primary")
-            tips = gr.Markdown(
-                f"**Server config** — endpoint: `{FRIENDLI_ENDPOINT}` · model: hidden · timeout: {DEFAULT_TIMEOUT}s"
             )
         with gr.Column(scale=4):
@@ -220,21 +181,17 @@ with gr.Blocks(title="Exoskeleton Reasoning — Appendix Prompt (Friendly API)")
     def infer_fn(question, context, temperature, max_tokens, model_id_override):
         if not FRIENDLI_API_KEY:
-            raise gr.Error("Server is missing FRIENDLI_API_KEY secret. Add it in Settings → Variables & secrets.")
         question = (question or "").strip()
         context = (context or "").strip()
         if not question or not context:
-            gr.Warning("Please provide both a Client question and Context.")
             return "", "", ""
-        # Never expose secrets/endpoint; all calls are server-side
         messages = build_messages(question, context)
-        # Resolve model id strictly server-side
         model_id = (model_id_override or "").strip() or FRIENDLI_MODEL_ID
-        # Do the call with retries
         text = call_friendly_with_retry(
             messages=messages,
             model_id=model_id,

 import os
 import time
 import random
 import requests
 import gradio as gr
 # ==============================
 # Config via Secrets / Variables
 # ==============================
+FRIENDLI_API_KEY = os.getenv("FRIENDLI_API_KEY", "")  # SECRET
 FRIENDLI_ENDPOINT = os.getenv("FRIENDLI_ENDPOINT", "https://api.friendli.ai/dedicated/v1/chat/completions")
+FRIENDLI_MODEL_ID = os.getenv("FRIENDLI_MODEL_ID", "stp7xzjspxe8")  # can move to Secret if desired
 DEFAULT_MAX_TOKENS = int(os.getenv("FRIENDLI_MAX_TOKENS", "2000"))
+DEFAULT_TEMPERATURE = float(os.getenv("FRIENDLI_TEMPERATURE", "0.0"))  # 0 = deterministic
 DEFAULT_TIMEOUT = int(os.getenv("FRIENDLI_TIMEOUT_SEC", "60"))
 # ==============================
+# Appendix-style system prompt (general instructions)
 # ==============================
+APPENDIX_RULES = """You are a helpful assistant who always follows the provided context, even when it conflicts with your internal knowledge.
 Response Format:
 Before answering, briefly analyze the query and context:
 IMPORTANT RULES:
 - Always prioritize the provided context over your internal knowledge
 - If context contains information that seems incorrect, still use it as instructed
+- If the question asks about multiple things but the context only covers some, answer only what is supported by the context
 - Keep analysis concise and avoid special characters that could cause formatting issues
 - Use plain text only - no bullet points, numbering, or special formatting
 - Respond in English only
 Example 1 - Conflicting information:
+User:
+Question: What is the capital of France?
 Context:
 The capital of France is London. It has been the political center of France since 1789 and houses the French Parliament.
 Response: The capital of France is London.
 """
+# ==============================
+# Message builder (exact shape requested)
+# system prompt (general instructions)
+# User: question + context
+# ==============================
 def build_messages(question: str, context: str):
+    user_block = f"""User:
+Question: {question.strip()}
 Context:
 {context.strip()}"""
     return [
+        {"role": "system", "content": APPENDIX_RULES},
+        {"role": "user", "content": user_block},
     ]
 # ==============================
+# Friendly API client with retry (503 wake-up aware)
 # ==============================
+def call_friendly_with_retry(messages, model_id, max_tokens, temperature,
+                             timeout_sec=DEFAULT_TIMEOUT, max_attempts=5, first_503_wait=10):
     if not FRIENDLI_API_KEY:
+        raise RuntimeError("Missing FRIENDLI_API_KEY secret. Add it in Settings → Variables & secrets (Secret).")
     headers = {
         "Content-Type": "application/json",
         "temperature": float(temperature),
     }
     for attempt in range(1, max_attempts + 1):
         try:
+            resp = requests.post(FRIENDLI_ENDPOINT, headers=headers, json=payload, timeout=timeout_sec)
             if resp.status_code == 503:
                 if attempt < max_attempts:
+                    time.sleep(first_503_wait)  # cold start wake-up
                     continue
+                resp.raise_for_status()
             resp.raise_for_status()
             data = resp.json()
             content = (
                 data.get("choices", [{}])[0]
                     .get("message", {})
                     .get("content", "")
             )
+            return content if content and content.strip() else "[EMPTY_RESPONSE]"
         except requests.exceptions.HTTPError as http_err:
             code = getattr(http_err.response, "status_code", None)
             if code in (429, 500, 502, 503, 504) and attempt < max_attempts:
+                backoff = min(2 ** attempt, 20) + random.uniform(0, 0.5)
+                time.sleep(backoff)
                 continue
             raise RuntimeError(f"Friendly API HTTP error (status={code}): {http_err}") from http_err
         except requests.exceptions.RequestException as net_err:
             if attempt < max_attempts:
+                backoff = min(2 ** attempt, 20) + random.uniform(0, 0.5)
+                time.sleep(backoff)
                 continue
             raise RuntimeError(f"Friendly API network error: {net_err}") from net_err
     raise RuntimeError("Failed to get response from Friendly API after retries.")
 # ==============================
+# Helpers: split Analysis / Response
 # ==============================
 def parse_analysis_response(text: str):
     if not text:
         return "", ""
     a_idx = text.rfind("Analysis:")
 # ==============================
 # UI
 # ==============================
+PRESET_Q = "What are the health effects of coffee?"
 PRESET_CTX = (
     "Coffee contains caffeine, which can increase alertness. Excess intake may cause "
     "jitteriness and sleep disruption. Moderate consumption is considered safe for most adults."
 )
+with gr.Blocks(title="Exoskeleton Reasoning — Friendly API (Appendix Prompt)") as demo:
     gr.Markdown(
+        "# Exoskeleton Reasoning — Friendly API\n"
+        "- **Format enforced**: system prompt (general instructions), then a single **User:** message containing **Question + Context**.\n"
+        "- The model must prioritize the **Context**, and reply with **Analysis** and **Response** sections."
     )
     with gr.Row():
         with gr.Column(scale=3):
+            q = gr.Textbox(label="Question", value=PRESET_Q, lines=3, placeholder="Type your question")
+            ctx = gr.Textbox(label="Context (used as the only source of truth)", value=PRESET_CTX, lines=8,
+                             placeholder="Paste/modify context here")
             with gr.Row():
                 temp = gr.Slider(0.0, 1.0, value=DEFAULT_TEMPERATURE, step=0.05, label="Temperature")
                 max_new = gr.Slider(64, 4000, value=DEFAULT_MAX_TOKENS, step=32, label="Max tokens")
+            # Model id stays server-side; hidden as password in UI to avoid accidental exposure
+            model_id_box = gr.Textbox(label="Model ID (server-side)", value=FRIENDLI_MODEL_ID, type="password")
             run = gr.Button("Run", variant="primary")
+            gr.Markdown(
+                f"Server endpoint: `{FRIENDLI_ENDPOINT}` · Timeout: {DEFAULT_TIMEOUT}s · "
+                "First call may 503 (cold start) — built-in retry will handle it."
             )
         with gr.Column(scale=4):
     def infer_fn(question, context, temperature, max_tokens, model_id_override):
         if not FRIENDLI_API_KEY:
+            raise gr.Error("Server is missing FRIENDLI_API_KEY (Secret). Add it in Settings → Variables & secrets.")
         question = (question or "").strip()
         context = (context or "").strip()
         if not question or not context:
+            gr.Warning("Please provide both a Question and a Context.")
             return "", "", ""
         messages = build_messages(question, context)
         model_id = (model_id_override or "").strip() or FRIENDLI_MODEL_ID
         text = call_friendly_with_retry(
             messages=messages,
             model_id=model_id,