Spaces:

adowu
/

foc

Sleeping

App Files Files Community

adowu commited on 26 days ago

Commit

eda6854

verified ·

1 Parent(s): 13045e2

Update main.py

Browse files

Files changed (1) hide show

main.py +101 -108

main.py CHANGED Viewed

@@ -30,9 +30,9 @@ DEFAULT_TEMP = float(os.getenv("DEFAULT_TEMPERATURE", "0.6"))
 DEFAULT_TOP_P = float(os.getenv("DEFAULT_TOP_P", "0.95"))
 DEFAULT_TOKENS = int(os.getenv("DEFAULT_MAX_TOKENS", "32000"))
-REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "180"))
 MAX_RETRIES = int(os.getenv("MAX_RETRIES", "3"))
-RETRY_BASE_DELAY = float(os.getenv("RETRY_BASE_DELAY", "1.7"))
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 log = logging.getLogger(__name__)
@@ -49,7 +49,7 @@ async def get_client() -> Client:
     if _client is None:
         log.info("Connecting to %s", HF_SPACE_URL)
         _client = await asyncio.to_thread(Client, HF_SPACE_URL)
-        log.info("Connected to Space.")
     return _client
@@ -106,8 +106,8 @@ async def lifespan(app: FastAPI):
 # ---------------------------------------------------------------------------
 app = FastAPI(
-    title="FHR",
-    version="4.1.0",
     lifespan=lifespan,
 )
@@ -124,17 +124,27 @@ app.add_middleware(
 # ---------------------------------------------------------------------------
 def _content_str(m: Message) -> str:
     if isinstance(m.content, str):
         return m.content
-    return "".join(
-        p.get("text", "") or p.get("content", "")
-        for p in m.content
-        if isinstance(p, dict)
-    )
 def _build_prompt(messages: list[Message]) -> str:
     system, parts = [], []
     for m in messages:
         c = _content_str(m).strip()
         if not c:
@@ -144,77 +154,83 @@ def _build_prompt(messages: list[Message]) -> str:
             system.append(c)
         elif m.role == "assistant":
             parts.append(f"[ASSISTANT]\n{c}")
-        else:
             parts.append(c)
-    prefix = ""
-    if system:
-        prefix = "[SYSTEM]\n" + "\n".join(system) + "\n[/SYSTEM]\n\n"
-    return prefix + "\n\n".join(parts)
 # ---------------------------------------------------------------------------
-# Robust Extraction
 # ---------------------------------------------------------------------------
 def _extract_text(result: Any) -> str:
-    if hasattr(result, "data"):
-        result = result.data
     if isinstance(result, tuple):
-        result = list(result)
-    if isinstance(result, dict) and "value" in result:
-        result = result["value"]
-    if isinstance(result, list) and result:
-        last = result[-1]
-        if isinstance(last, dict):
-            if "content" in last:
-                return str(last["content"]).strip()
-            if "value" in last:
-                return str(last["value"]).strip()
-        if isinstance(last, (list, tuple)) and len(last) >= 2:
-            return str(last[1]).strip()
-        if isinstance(last, str):
-            return last.strip()
-    if isinstance(result, str):
-        return result.strip()
-    raise ValueError("Unable to extract model response.")
 # ---------------------------------------------------------------------------
-# Retry Wrapper
 # ---------------------------------------------------------------------------
-async def _call_with_retries(func, *args, **kwargs):
     for attempt in range(1, MAX_RETRIES + 1):
         try:
-            return await asyncio.wait_for(func(*args, **kwargs), timeout=REQUEST_TIMEOUT)
         except Exception as e:
-            if attempt >= MAX_RETRIES:
-                log.error("All retries failed.")
-                raise
             delay = RETRY_BASE_DELAY ** attempt
-            log.warning(
-                "Attempt %d failed: %s | retrying in %.2fs",
-                attempt,
-                str(e),
-                delay,
-            )
             await asyncio.sleep(delay)
-# ---------------------------------------------------------------------------
-# Falcon Call
-# ---------------------------------------------------------------------------
 async def _call_falcon_once(prompt: str, req: ChatCompletionRequest) -> str:
     client = await get_client()
@@ -238,65 +254,42 @@ async def _call_falcon_once(prompt: str, req: ChatCompletionRequest) -> str:
     return _extract_text(result)
-async def _call_falcon(prompt: str, req: ChatCompletionRequest) -> str:
-    return await _call_with_retries(_call_falcon_once, prompt, req)
 # ---------------------------------------------------------------------------
-# SAFE STREAMING (HF Spaces stable)
 # ---------------------------------------------------------------------------
-async def _safe_stream(prompt: str, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
-    """
-    Stable streaming for HF Spaces:
-    1. Generate full response with retries
-    2. Stream chunks safely
-    """
-    text = await _call_falcon(prompt, req)
     cid = f"chatcmpl-{uuid.uuid4().hex}"
     created = int(time.time())
-    try:
-        for i in range(0, len(text), 16):
-            chunk = {
-                "id": cid,
-                "object": "chat.completion.chunk",
-                "created": created,
-                "model": req.model,
-                "choices": [{
-                    "index": 0,
-                    "delta": {"content": text[i:i+16]},
-                    "finish_reason": None,
-                }],
-            }
-            yield f"data: {json.dumps(chunk)}\n\n"
-            await asyncio.sleep(0.02)
-        final = {
             "id": cid,
             "object": "chat.completion.chunk",
             "created": created,
             "model": req.model,
             "choices": [{
                 "index": 0,
-                "delta": {},
-                "finish_reason": "stop",
             }],
         }
-        yield f"data: {json.dumps(final)}\n\n"
-        yield "data: [DONE]\n\n"
-    except Exception:
-        log.exception("Streaming crashed unexpectedly.")
-        yield "data: [DONE]\n\n"
 # ---------------------------------------------------------------------------
-# OpenAI Response Builder
 # ---------------------------------------------------------------------------
 def _make_response(text: str, req: ChatCompletionRequest) -> dict:
@@ -330,23 +323,23 @@ async def chat_completions(req: ChatCompletionRequest, _: None = Depends(verify_
     prompt = _build_prompt(req.messages)
     try:
-        if req.stream:
-            return StreamingResponse(
-                _safe_stream(prompt, req),
-                media_type="text/event-stream",
-                headers={
-                    "Cache-Control": "no-cache",
-                    "Connection": "keep-alive",
-                    "X-Accel-Buffering": "no",
-                },
-            )
-        text = await _call_falcon(prompt, req)
-        return JSONResponse(content=_make_response(text, req))
     except Exception:
-        log.exception("Final failure after retries.")
         raise HTTPException(
             status_code=502,
             detail="Model temporarily unavailable. Please try again.",
-        )

 DEFAULT_TOP_P = float(os.getenv("DEFAULT_TOP_P", "0.95"))
 DEFAULT_TOKENS = int(os.getenv("DEFAULT_MAX_TOKENS", "32000"))
+REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "120"))
 MAX_RETRIES = int(os.getenv("MAX_RETRIES", "3"))
+RETRY_BASE_DELAY = float(os.getenv("RETRY_BASE_DELAY", "1.5"))
 logging.basicConfig(level=logging.INFO, format="%(asctime)s %(levelname)s %(message)s")
 log = logging.getLogger(__name__)
     if _client is None:
         log.info("Connecting to %s", HF_SPACE_URL)
         _client = await asyncio.to_thread(Client, HF_SPACE_URL)
+        log.info("Connected.")
     return _client
 # ---------------------------------------------------------------------------
 app = FastAPI(
+    title="Foc",
+    version="5.0.0",
     lifespan=lifespan,
 )
 # ---------------------------------------------------------------------------
 def _content_str(m: Message) -> str:
+    """
+    Extract ONLY text blocks.
+    This preserves Dyad compatibility and filters UI noise.
+    """
     if isinstance(m.content, str):
         return m.content
+    text_parts = []
+    for p in m.content:
+        if isinstance(p, dict) and p.get("type") == "text":
+            text_parts.append(p.get("text", "").strip())
+    return "".join(text_parts)
 def _build_prompt(messages: list[Message]) -> str:
+    """
+    Preserve original F alignment.
+    """
     system, parts = [], []
     for m in messages:
         c = _content_str(m).strip()
         if not c:
             system.append(c)
         elif m.role == "assistant":
             parts.append(f"[ASSISTANT]\n{c}")
+        elif m.role == "user":
             parts.append(c)
+    prefix = "[SYSTEM]\n" + "\n".join(system) + "\n[/SYSTEM]\n" if system else ""
+    return prefix + "\n".join(parts)
 # ---------------------------------------------------------------------------
+# Robust extraction
 # ---------------------------------------------------------------------------
 def _extract_text(result: Any) -> str:
     if isinstance(result, tuple):
+        data = result
+    elif hasattr(result, "data"):
+        data = result.data
+    else:
+        data = [result]
+    conversation = None
+    for item in data:
+        if isinstance(item, dict) and "value" in item:
+            if isinstance(item["value"], list):
+                conversation = item["value"]
+                break
+        elif isinstance(item, list):
+            conversation = item
+            break
+    if not conversation:
+        raise ValueError("Cannot extract conversation from result")
+    last = conversation[-1]
+    if isinstance(last, dict):
+        content = last.get("content", "")
+    elif isinstance(last, (list, tuple)) and len(last) >= 2:
+        content = last[1] or ""
+    else:
+        content = str(last)
+    if isinstance(content, list):
+        parts = []
+        for block in content:
+            if isinstance(block, dict) and block.get("type") == "text":
+                parts.append(block.get("content", block.get("text", "")))
+        return "".join(parts).strip()
+    return str(content).strip()
 # ---------------------------------------------------------------------------
+# Retry wrapper
 # ---------------------------------------------------------------------------
+async def _call_with_retries(prompt: str, req: ChatCompletionRequest) -> str:
+    last_error = None
     for attempt in range(1, MAX_RETRIES + 1):
         try:
+            return await asyncio.wait_for(
+                _call_falcon_once(prompt, req),
+                timeout=REQUEST_TIMEOUT,
+            )
         except Exception as e:
+            last_error = e
+            if attempt == MAX_RETRIES:
+                break
             delay = RETRY_BASE_DELAY ** attempt
+            log.warning("Attempt %d failed: %s | retrying in %.2fs",
+                        attempt, str(e), delay)
             await asyncio.sleep(delay)
+    raise last_error
 async def _call_falcon_once(prompt: str, req: ChatCompletionRequest) -> str:
     client = await get_client()
     return _extract_text(result)
 # ---------------------------------------------------------------------------
+# Streaming (buffered safe streaming)
 # ---------------------------------------------------------------------------
+async def _stream_sse(text: str, req: ChatCompletionRequest) -> AsyncGenerator[str, None]:
     cid = f"chatcmpl-{uuid.uuid4().hex}"
     created = int(time.time())
+    for i in range(0, len(text), 8):
+        chunk = {
             "id": cid,
             "object": "chat.completion.chunk",
             "created": created,
             "model": req.model,
             "choices": [{
                 "index": 0,
+                "delta": {"content": text[i:i+8]},
+                "finish_reason": None,
             }],
         }
+        yield f"data: {json.dumps(chunk)}\n\n"
+        await asyncio.sleep(0.01)
+    yield f"data: {json.dumps({
+        'id': cid,
+        'object': 'chat.completion.chunk',
+        'created': created,
+        'model': req.model,
+        'choices': [{'index': 0, 'delta': {}, 'finish_reason': 'stop'}],
+    })}\n\n"
+    yield "data: [DONE]\n\n"
 # ---------------------------------------------------------------------------
+# OpenAI response builder
 # ---------------------------------------------------------------------------
 def _make_response(text: str, req: ChatCompletionRequest) -> dict:
     prompt = _build_prompt(req.messages)
     try:
+        text = await _call_with_retries(prompt, req)
     except Exception:
+        log.exception("Falcon failed after retries")
         raise HTTPException(
             status_code=502,
             detail="Model temporarily unavailable. Please try again.",
+        )
+    if req.stream:
+        return StreamingResponse(
+            _stream_sse(text, req),
+            media_type="text/event-stream",
+            headers={
+                "Cache-Control": "no-cache",
+                "X-Accel-Buffering": "no",
+                "Connection": "keep-alive",
+            },
+        )
+    return JSONResponse(content=_make_response(text, req))