Spaces:

Diamanta
/

JBAIP

Sleeping

Diamanta commited on Jun 1, 2025

Commit

45c840a

verified ·

1 Parent(s): 7357a31

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,14 +2,11 @@ from fastapi import FastAPI
 from pydantic import BaseModel
 from typing import List
 from llama_cpp import Llama
 app = FastAPI()
-llm = Llama(
-    model_path="phi-2.Q4_K_M.gguf",
-    n_ctx=2048,
-    n_threads=2
-)
 class Message(BaseModel):
     role: str
@@ -21,8 +18,24 @@ class ChatRequest(BaseModel):
     temperature: float = 0.7
     max_tokens: int = 256
 @app.post("/v1/chat/completions")
 async def chat_completions(req: ChatRequest):
     prompt = "\n".join([f"{m.role}: {m.content}" for m in req.messages]) + "\nassistant:"
     output = llm(
         prompt,

 from pydantic import BaseModel
 from typing import List
 from llama_cpp import Llama
+import os
 app = FastAPI()
+llm = None  # Will initialize on startup
 class Message(BaseModel):
     role: str
     temperature: float = 0.7
     max_tokens: int = 256
+@app.on_event("startup")
+def load_model():
+    global llm
+    model_path = "phi-2.Q4_K_M.gguf"
+    if not os.path.exists(model_path):
+        raise RuntimeError(f"Model not found: {model_path}")
+    llm = Llama(
+        model_path=model_path,
+        n_ctx=2048,
+        n_threads=2
+    )
 @app.post("/v1/chat/completions")
 async def chat_completions(req: ChatRequest):
+    global llm
+    if llm is None:
+        return {"error": "Model not initialized."}
     prompt = "\n".join([f"{m.role}: {m.content}" for m in req.messages]) + "\nassistant:"
     output = llm(
         prompt,