Tim Luka Horstmann
commited on
Commit
·
703cd97
1
Parent(s):
a79e01b
Update stay alive
Browse files
app.py
CHANGED
|
@@ -3,7 +3,7 @@ import json
|
|
| 3 |
import time
|
| 4 |
import numpy as np
|
| 5 |
from sentence_transformers import SentenceTransformer
|
| 6 |
-
from fastapi import FastAPI, HTTPException
|
| 7 |
from fastapi.responses import StreamingResponse
|
| 8 |
from pydantic import BaseModel
|
| 9 |
from llama_cpp import Llama
|
|
@@ -239,4 +239,27 @@ async def warm_up_model():
|
|
| 239 |
logger.info("Model warm-up completed.")
|
| 240 |
# Log initial RAM usage
|
| 241 |
ram_stats = get_ram_usage()
|
| 242 |
-
logger.info(f"Initial RAM usage after startup: {ram_stats}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import time
|
| 4 |
import numpy as np
|
| 5 |
from sentence_transformers import SentenceTransformer
|
| 6 |
+
from fastapi import FastAPI, HTTPException, BackgroundTasks
|
| 7 |
from fastapi.responses import StreamingResponse
|
| 8 |
from pydantic import BaseModel
|
| 9 |
from llama_cpp import Llama
|
|
|
|
| 239 |
logger.info("Model warm-up completed.")
|
| 240 |
# Log initial RAM usage
|
| 241 |
ram_stats = get_ram_usage()
|
| 242 |
+
logger.info(f"Initial RAM usage after startup: {ram_stats}")
|
| 243 |
+
|
| 244 |
+
# Add a background task to keep the model warm
|
| 245 |
+
@app.on_event("startup")
|
| 246 |
+
async def setup_periodic_tasks():
|
| 247 |
+
asyncio.create_task(keep_model_warm())
|
| 248 |
+
logger.info("Periodic model warm-up task scheduled")
|
| 249 |
+
|
| 250 |
+
async def keep_model_warm():
|
| 251 |
+
"""Background task that keeps the model warm by sending periodic requests"""
|
| 252 |
+
while True:
|
| 253 |
+
try:
|
| 254 |
+
logger.info("Performing periodic model warm-up")
|
| 255 |
+
dummy_query = "Say only the word 'ok.'"
|
| 256 |
+
dummy_history = []
|
| 257 |
+
# Process a dummy query through the generator to keep it warm
|
| 258 |
+
async for _ in stream_response(dummy_query, dummy_history):
|
| 259 |
+
pass
|
| 260 |
+
logger.info("Periodic warm-up completed")
|
| 261 |
+
except Exception as e:
|
| 262 |
+
logger.error(f"Error in periodic warm-up: {str(e)}")
|
| 263 |
+
|
| 264 |
+
# Wait for 13 minutes before the next warm-up
|
| 265 |
+
await asyncio.sleep(13 * 60)
|