AnatoliiG commited on
Commit ·
2d1f66e
1
Parent(s): 17af010
multithread
Browse files- src/api/routes.py +10 -10
- src/core/config.py +3 -1
src/api/routes.py
CHANGED
|
@@ -106,13 +106,13 @@ async def chat_completions(request: Request):
|
|
| 106 |
if stream:
|
| 107 |
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
| 108 |
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
|
|
|
| 106 |
if stream:
|
| 107 |
return StreamingResponse(stream_generator(), media_type="text/event-stream")
|
| 108 |
|
| 109 |
+
else:
|
| 110 |
+
async with engine.lock:
|
| 111 |
+
result = await asyncio.to_thread(
|
| 112 |
+
engine.generate,
|
| 113 |
+
messages,
|
| 114 |
+
data.get("max_tokens", settings.DEFAULT_MAX_TOKENS),
|
| 115 |
+
data.get("temperature", settings.DEFAULT_TEMP),
|
| 116 |
+
stream=False,
|
| 117 |
+
)
|
| 118 |
+
return result
|
src/core/config.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
from pydantic_settings import BaseSettings
|
| 2 |
|
| 3 |
|
|
@@ -8,7 +10,7 @@ class Settings(BaseSettings):
|
|
| 8 |
CONTEXT_SIZE: int = 8192
|
| 9 |
DEFAULT_MAX_TOKENS: int = 4096
|
| 10 |
DEFAULT_TEMP: float = 0.4
|
| 11 |
-
N_THREADS: int =
|
| 12 |
N_GPU_LAYERS: int = 0
|
| 13 |
|
| 14 |
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
|
| 3 |
from pydantic_settings import BaseSettings
|
| 4 |
|
| 5 |
|
|
|
|
| 10 |
CONTEXT_SIZE: int = 8192
|
| 11 |
DEFAULT_MAX_TOKENS: int = 4096
|
| 12 |
DEFAULT_TEMP: float = 0.4
|
| 13 |
+
N_THREADS: int = os.cpu_count()
|
| 14 |
N_GPU_LAYERS: int = 0
|
| 15 |
|
| 16 |
|