Spaces:

adowu
/

foc

Sleeping

App Files Files Community

adowu commited on Feb 22

Commit

8e47a66

verified ·

1 Parent(s): d6def15

Update main.py

Browse files

Files changed (1) hide show

main.py +28 -13

main.py CHANGED Viewed

@@ -124,17 +124,13 @@ def _condense_messages(messages: list[Message], max_tokens: int) -> str:
     system_msgs = [m for m in messages if m.role == "system"]
     user_assistant = [m for m in messages if m.role in ("user", "assistant")]
-    # Budujemy prompt w kolejności: system + user/assistant
     condensed_parts = []
-    # system zawsze pełny
     for m in system_msgs:
         condensed_parts.append(_content_str(m))
-    # dynamiczne skracanie starszych user/assistant
     tokens_so_far = sum(_token_count(part) for part in condensed_parts)
-    # jeśli wchodzimy w limity
     for m in user_assistant:
         text = _content_str(m)
         tcount = _token_count(text)
@@ -144,7 +140,7 @@ def _condense_messages(messages: list[Message], max_tokens: int) -> str:
             if remaining_tokens <= 0:
                 continue
             approx_chars = remaining_tokens * AVG_CHARS_PER_TOKEN
-            text = text[-approx_chars:]  # zachowujemy ostatnią część
             tcount = _token_count(text)
         condensed_parts.append(text)
@@ -228,17 +224,18 @@ async def _call_falcon_once(prompt: str, req: ChatCompletionRequest) -> str:
         "top_p": req.top_p,
     }
-    # inicjalizacja nowego chatu z promptem
     await asyncio.to_thread(
         client.predict,
-        input_value=prompt,
         settings_form_value=settings,
         api_name="/new_chat",
     )
     result = await asyncio.to_thread(
         client.predict,
-        input_value=prompt,
         settings_form_value=settings,
         api_name="/add_message",
     )
@@ -254,12 +251,22 @@ async def _stream_sse(text: str, req: ChatCompletionRequest) -> AsyncGenerator[s
     cid = f"chatcmpl-{uuid.uuid4().hex}"
     created = int(time.time())
     for i in range(0, len(text), 8):
-        chunk = {"id": cid, "object": "chat.completion.chunk", "created": created,
-                 "model": req.model, "choices": [{"index": 0, "delta": {"content": text[i:i+8]}, "finish_reason": None}]}
         yield f"data: {json.dumps(chunk)}\n\n"
         await asyncio.sleep(0.01)
-    final_chunk = {"id": cid, "object": "chat.completion.chunk", "created": created,
-                   "model": req.model, "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]}
     yield f"data: {json.dumps(final_chunk)}\n\n"
     yield "data: [DONE]\n\n"
@@ -291,7 +298,15 @@ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_credentials=True,
 @app.get("/")
 async def root():
-    return {"service": "FOC API", "version": "5.0.0", "endpoints": {"health": "/health", "models": "/v1/models", "chat": "/v1/chat/completions"}}
 @app.get("/health")

     system_msgs = [m for m in messages if m.role == "system"]
     user_assistant = [m for m in messages if m.role in ("user", "assistant")]
     condensed_parts = []
     for m in system_msgs:
         condensed_parts.append(_content_str(m))
     tokens_so_far = sum(_token_count(part) for part in condensed_parts)
     for m in user_assistant:
         text = _content_str(m)
         tcount = _token_count(text)
             if remaining_tokens <= 0:
                 continue
             approx_chars = remaining_tokens * AVG_CHARS_PER_TOKEN
+            text = text[-approx_chars:]
             tcount = _token_count(text)
         condensed_parts.append(text)
         "top_p": req.top_p,
     }
+    # inicjalizacja nowego chatu z promptem (jeśli endpoint wymaga)
     await asyncio.to_thread(
         client.predict,
+        prompt,  # pierwszy argument podawany pozycyjnie
         settings_form_value=settings,
         api_name="/new_chat",
     )
+    # dodanie wiadomości
     result = await asyncio.to_thread(
         client.predict,
+        prompt,  # pierwszy argument podawany pozycyjnie
         settings_form_value=settings,
         api_name="/add_message",
     )
     cid = f"chatcmpl-{uuid.uuid4().hex}"
     created = int(time.time())
     for i in range(0, len(text), 8):
+        chunk = {
+            "id": cid,
+            "object": "chat.completion.chunk",
+            "created": created,
+            "model": req.model,
+            "choices": [{"index": 0, "delta": {"content": text[i:i+8]}, "finish_reason": None}]
+        }
         yield f"data: {json.dumps(chunk)}\n\n"
         await asyncio.sleep(0.01)
+    final_chunk = {
+        "id": cid,
+        "object": "chat.completion.chunk",
+        "created": created,
+        "model": req.model,
+        "choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}]
+    }
     yield f"data: {json.dumps(final_chunk)}\n\n"
     yield "data: [DONE]\n\n"
 @app.get("/")
 async def root():
+    return {
+        "service": "FOC API",
+        "version": "5.0.0",
+        "endpoints": {
+            "health": "/health",
+            "models": "/v1/models",
+            "chat": "/v1/chat/completions"
+        }
+    }
 @app.get("/health")