Spaces:

AdarshJi
/

DDS

Sleeping

App Files Files Community

AdarshJi commited on 26 days ago

Commit

3ef5e4f

verified ·

1 Parent(s): b0316b9

Update server.py

Browse files

Files changed (1) hide show

server.py +101 -80

server.py CHANGED Viewed

@@ -534,10 +534,6 @@ def QWEN(
@@ -549,7 +545,7 @@ PROVIDERS: Dict[str, Dict[str, Any]] = {
     "3": {"__func__": FREEGPT, "models": M3},
 }
-# will be filled on startup to avoid per-request introspection
 PROVIDER_META: Dict[str, Dict[str, Any]] = {}
 class Config:
@@ -577,7 +573,7 @@ class ChatRequest:
         messages = payload.get("messages") or payload.get("message") or payload.get("msgs")
         model = payload.get("model_name") or payload.get("model")
         provider = payload.get("provider") or Config.DEFAULT_PROVIDER
-        provider = str(provider)  # keep "1","2","3" style
         max_tokens = payload.get("max_tokens", Config.DEFAULT_MAX_TOKENS)
         temperature = payload.get("temperature", Config.DEFAULT_TEMPERATURE)
         stream = payload.get("stream", Config.STREAM)
@@ -602,31 +598,78 @@ GLOBAL_AIOHTTP: Optional[aiohttp.ClientSession] = None
 @app.on_event("startup")
 async def on_startup():
     global GLOBAL_AIOHTTP, PROVIDER_META
-    logger.info("Starting up - creating global aiohttp session and analyzing providers")
     GLOBAL_AIOHTTP = aiohttp.ClientSession()
     for key, payload in PROVIDERS.items():
         func = payload["__func__"]
-        meta = {
             "func": func,
             "is_async_gen_fn": inspect.isasyncgenfunction(func),
             "is_coroutine_fn": inspect.iscoroutinefunction(func),
             "is_generator_fn": inspect.isgeneratorfunction(func),
-            # mark as sync if not coroutine/asyncgen/generator
-            "is_sync": not (inspect.iscoroutinefunction(func) or inspect.isasyncgenfunction(func) or inspect.isgeneratorfunction(func)),
         }
-        PROVIDER_META[key] = meta
-    logger.info("Provider metadata prepared")
 @app.on_event("shutdown")
 async def on_shutdown():
     global GLOBAL_AIOHTTP
-    logger.info("Shutting down - closing global aiohttp session")
     if GLOBAL_AIOHTTP and not GLOBAL_AIOHTTP.closed:
         await GLOBAL_AIOHTTP.close()
-async def _call_provider_and_iterate(
     provider_key: str,
     messages: List[Dict],
     model: str,
@@ -635,8 +678,7 @@ async def _call_provider_and_iterate(
     timeout: float,
 ) -> AsyncGenerator[bytes, None]:
     """
-    Invoke provider according to metadata and yield raw bytes.
-    We'll transform these bytes into SSE events higher up.
     """
     if provider_key not in PROVIDER_META:
         raise ValueError(f"Unknown provider '{provider_key}'")
@@ -644,15 +686,14 @@ async def _call_provider_and_iterate(
     meta = PROVIDER_META[provider_key]
     func = meta["func"]
-    async def _invoke_async():
-        return func(Requests, Message=messages, Model=model, max_token=max_token, stream=stream_flag, timeout=timeout)
     try:
-        provider_task = _invoke_async()
-        # async generator function
         if meta["is_async_gen_fn"]:
-            agen = await asyncio.wait_for(provider_task, timeout=timeout)
             async for item in agen:
                 if item is None:
                     continue
@@ -664,14 +705,22 @@ async def _call_provider_and_iterate(
                     yield str(item).encode("utf-8")
             return
-        # coroutine function
         if meta["is_coroutine_fn"]:
-            res = await asyncio.wait_for(provider_task, timeout=timeout)
             if res is None:
                 return
-            # list/tuple
-            if isinstance(res, (list, tuple)):
-                for item in res:
                     if item is None:
                         continue
                     if isinstance(item, bytes):
@@ -681,7 +730,7 @@ async def _call_provider_and_iterate(
                     else:
                         yield str(item).encode("utf-8")
                 return
-            # sync-iterable
             if inspect.isgenerator(res) or (hasattr(res, "__iter__") and not isinstance(res, (str, bytes, dict))):
                 for item in res:
                     if item is None:
@@ -702,25 +751,14 @@ async def _call_provider_and_iterate(
                 yield str(res).encode("utf-8")
             return
-        # sync function/generator: run in thread
-        sync_res = await asyncio.wait_for(
-            asyncio.to_thread(func, Requests, messages, model, max_token, stream_flag, timeout),
-            timeout=timeout,
-        )
         if sync_res is None:
             return
-        if isinstance(sync_res, (list, tuple)):
-            for item in sync_res:
-                if item is None:
-                    continue
-                if isinstance(item, bytes):
-                    yield item
-                elif isinstance(item, str):
-                    yield item.encode("utf-8")
-                else:
-                    yield str(item).encode("utf-8")
-            return
         if inspect.isgenerator(sync_res) or (hasattr(sync_res, "__iter__") and not isinstance(sync_res, (str, bytes, dict))):
             for item in sync_res:
                 if item is None:
@@ -744,13 +782,14 @@ async def _call_provider_and_iterate(
         logger.warning(err.strip())
         yield err.encode("utf-8")
     except Exception as e:
-        logger.exception("Provider error")
         err = f"[server_error] {type(e).__name__}: {e}\n"
         yield err.encode("utf-8")
 @app.post("/chat")
 async def chat_endpoint(request: Request):
     try:
         body_bytes = await request.body()
         payload = _loads(body_bytes)
@@ -764,14 +803,9 @@ async def chat_endpoint(request: Request):
     provider_key = req.provider
     if req.stream:
-        async def sse_stream_gen():
-            """
-            For every chunk from provider, send an SSE event line:
-            data: {"response":"..."}\n\n
-            After completion send a final line: [DONE]\n
-            """
-            # iterate provider outputs (raw bytes)
-            async for raw_chunk in _call_provider_and_iterate(
                 provider_key=provider_key,
                 messages=req.messages,
                 model=req.model or Config.DEFAULT_MODEL,
@@ -779,36 +813,28 @@ async def chat_endpoint(request: Request):
                 stream_flag=req.stream,
                 timeout=Config.TIMEOUT,
             ):
-                # decode provider chunk to text
-                if isinstance(raw_chunk, bytes):
-                    text = raw_chunk.decode("utf-8", errors="ignore")
-                else:
-                    text = str(raw_chunk)
-                # build the JSON payload {"response": "<text>"} and serialize
                 payload_obj = {"response": text}
                 try:
                     json_str = _dumps(payload_obj)
                 except Exception:
-                    # fallback to manual safe-escape for string-only payload
-                    import json as _json_fallback
-                    json_str = _json_fallback.dumps(payload_obj)
-                # SSE data line + double newline (SSE event terminator)
                 sse_event = f"data: {json_str}\n\n"
                 yield sse_event.encode("utf-8")
-            # final termination marker exactly as you requested
-            # NOTE: sending it as a line by itself (not prefixed by 'data:')
             yield ("[DONE]\n").encode("utf-8")
-        # content-type text/event-stream (SSE)
-        return StreamingResponse(sse_stream_gen(), media_type="text/event-stream")
     else:
-        # non-stream: collect and return JSON (same as before)
         collected = []
-        async for chunk in _call_provider_and_iterate(
             provider_key=provider_key,
             messages=req.messages,
             model=req.model or Config.DEFAULT_MODEL,
@@ -816,18 +842,13 @@ async def chat_endpoint(request: Request):
             stream_flag=req.stream,
             timeout=Config.TIMEOUT,
         ):
-            if isinstance(chunk, bytes):
-                collected.append(chunk.decode("utf-8", errors="ignore"))
-            else:
-                collected.append(str(chunk))
-        full_text = "".join(collected)
-        return JSONResponse({"text": full_text})
 @app.get("/model")
 async def model():
-    models = [M1, M2, M3]
-    return {"models": models}
 @app.get("/health")

     "3": {"__func__": FREEGPT, "models": M3},
 }
+# precomputed provider metadata for speed
 PROVIDER_META: Dict[str, Dict[str, Any]] = {}
 class Config:
         messages = payload.get("messages") or payload.get("message") or payload.get("msgs")
         model = payload.get("model_name") or payload.get("model")
         provider = payload.get("provider") or Config.DEFAULT_PROVIDER
+        provider = str(provider)
         max_tokens = payload.get("max_tokens", Config.DEFAULT_MAX_TOKENS)
         temperature = payload.get("temperature", Config.DEFAULT_TEMPERATURE)
         stream = payload.get("stream", Config.STREAM)
 @app.on_event("startup")
 async def on_startup():
     global GLOBAL_AIOHTTP, PROVIDER_META
+    logger.info("startup: create global aiohttp session and analyze providers")
     GLOBAL_AIOHTTP = aiohttp.ClientSession()
     for key, payload in PROVIDERS.items():
         func = payload["__func__"]
+        PROVIDER_META[key] = {
             "func": func,
             "is_async_gen_fn": inspect.isasyncgenfunction(func),
             "is_coroutine_fn": inspect.iscoroutinefunction(func),
             "is_generator_fn": inspect.isgeneratorfunction(func),
+            "is_sync_fn": not (inspect.iscoroutinefunction(func) or inspect.isasyncgenfunction(func) or inspect.isgeneratorfunction(func)),
         }
+    logger.info("provider meta ready: %s", {k: {kk: vv for kk, vv in v.items() if kk != "func"} for k, v in PROVIDER_META.items()})
 @app.on_event("shutdown")
 async def on_shutdown():
     global GLOBAL_AIOHTTP
+    logger.info("shutdown: close global aiohttp session")
     if GLOBAL_AIOHTTP and not GLOBAL_AIOHTTP.closed:
         await GLOBAL_AIOHTTP.close()
+async def _stream_sync_generator_in_thread(func, *args, **kwargs) -> AsyncGenerator[bytes, None]:
+    """
+    Run a sync generator in a thread and stream items back via an asyncio.Queue.
+    This allows streaming without blocking the event loop.
+    """
+    loop = asyncio.get_running_loop()
+    q: asyncio.Queue = asyncio.Queue(maxsize=32)
+    sentinel = object()
+    def worker():
+        try:
+            gen = func(*args, **kwargs)
+            # if the function is not actually a generator but returns a value, handle that
+            if gen is None:
+                loop.call_soon_threadsafe(q.put_nowait, sentinel)
+                return
+            # If it's iterable, iterate and put items into queue
+            for item in gen:
+                loop.call_soon_threadsafe(q.put_nowait, item)
+        except Exception as e:
+            # pass the exception object forward to the async side
+            loop.call_soon_threadsafe(q.put_nowait, e)
+        finally:
+            loop.call_soon_threadsafe(q.put_nowait, sentinel)
+    # start worker in thread
+    thread_task = loop.run_in_executor(None, worker)
+    # consume from queue
+    while True:
+        item = await q.get()
+        if item is sentinel:
+            break
+        if isinstance(item, Exception):
+            # re-raise in async context so upstream can handle
+            raise item
+        if item is None:
+            continue
+        if isinstance(item, bytes):
+            yield item
+        elif isinstance(item, str):
+            yield item.encode("utf-8")
+        else:
+            yield str(item).encode("utf-8")
+    # ensure worker finished/propagated exceptions
+    await asyncio.shield(thread_task)
+async def _call_provider_and_stream(
     provider_key: str,
     messages: List[Dict],
     model: str,
     timeout: float,
 ) -> AsyncGenerator[bytes, None]:
     """
+    Core streaming logic. Yields raw bytes as soon as provider yields items.
     """
     if provider_key not in PROVIDER_META:
         raise ValueError(f"Unknown provider '{provider_key}'")
     meta = PROVIDER_META[provider_key]
     func = meta["func"]
+    # pass arguments using your original parameter names so providers stay unchanged
+    kwargs = dict(messages=messages, model=model, max_token=max_token, stream=stream_flag, timeout=timeout)
     try:
+        # 1) Async generator functions -> call returns an async generator (do NOT await)
         if meta["is_async_gen_fn"]:
+            agen = func(Requests, **kwargs)
+            # iterate immediately (no waiting for full result)
             async for item in agen:
                 if item is None:
                     continue
                     yield str(item).encode("utf-8")
             return
+        # 2) Sync generator functions -> call returns generator; iterate it in background thread
+        if meta["is_generator_fn"]:
+            # Note: call func in thread via helper which will iterate and push items to queue
+            async for item in _stream_sync_generator_in_thread(lambda *a, **k: func(Requests, **kwargs)):
+                yield item
+            return
+        # 3) Coroutine functions (async def) that return final result -> await it (can't stream before it completes)
         if meta["is_coroutine_fn"]:
+            # await the coroutine under timeout (can't stream until it returns)
+            res = await asyncio.wait_for(func(Requests, **kwargs), timeout=timeout)
             if res is None:
                 return
+            # if it returned an async generator (rare), iterate it
+            if inspect.isasyncgen(res):
+                async for item in res:
                     if item is None:
                         continue
                     if isinstance(item, bytes):
                     else:
                         yield str(item).encode("utf-8")
                 return
+            # if it returned a sync iterable -> iterate and yield
             if inspect.isgenerator(res) or (hasattr(res, "__iter__") and not isinstance(res, (str, bytes, dict))):
                 for item in res:
                     if item is None:
                 yield str(res).encode("utf-8")
             return
+        # 4) Sync plain function (not generator) -> run in thread (returns value or iterable)
+        # We call func in a thread and stream results as they appear if it's iterable.
+        def sync_call_wrapper():
+            return func(Requests, **kwargs)
+        sync_res = await asyncio.wait_for(asyncio.to_thread(sync_call_wrapper), timeout=timeout)
         if sync_res is None:
             return
         if inspect.isgenerator(sync_res) or (hasattr(sync_res, "__iter__") and not isinstance(sync_res, (str, bytes, dict))):
             for item in sync_res:
                 if item is None:
         logger.warning(err.strip())
         yield err.encode("utf-8")
     except Exception as e:
+        logger.exception("provider error")
         err = f"[server_error] {type(e).__name__}: {e}\n"
         yield err.encode("utf-8")
 @app.post("/chat")
 async def chat_endpoint(request: Request):
+    # fast load
     try:
         body_bytes = await request.body()
         payload = _loads(body_bytes)
     provider_key = req.provider
     if req.stream:
+        async def sse_stream():
+            # iterate provider stream and immediately send SSE-formatted chunks
+            async for raw_chunk in _call_provider_and_stream(
                 provider_key=provider_key,
                 messages=req.messages,
                 model=req.model or Config.DEFAULT_MODEL,
                 stream_flag=req.stream,
                 timeout=Config.TIMEOUT,
             ):
+                # decode raw chunk to text
+                text = raw_chunk.decode("utf-8", errors="ignore") if isinstance(raw_chunk, (bytes, bytearray)) else str(raw_chunk)
+                # prepare JSON payload object
                 payload_obj = {"response": text}
                 try:
                     json_str = _dumps(payload_obj)
                 except Exception:
+                    # fallback
+                    import json as _fallback_json
+                    json_str = _fallback_json.dumps(payload_obj)
+                # send SSE data line + blank line
                 sse_event = f"data: {json_str}\n\n"
                 yield sse_event.encode("utf-8")
+            # final termination marker exactly as requested
             yield ("[DONE]\n").encode("utf-8")
+        return StreamingResponse(sse_stream(), media_type="text/event-stream")
     else:
+        # non-stream: collect (only for non-stream requests)
         collected = []
+        async for chunk in _call_provider_and_stream(
             provider_key=provider_key,
             messages=req.messages,
             model=req.model or Config.DEFAULT_MODEL,
             stream_flag=req.stream,
             timeout=Config.TIMEOUT,
         ):
+            collected.append(chunk.decode("utf-8", errors="ignore") if isinstance(chunk, (bytes, bytearray)) else str(chunk))
+        return JSONResponse({"text": "".join(collected)})
 @app.get("/model")
 async def model():
+    return {"models": [M1, M2, M3]}
 @app.get("/health")