llm / llm.py
Omkar008's picture
Update llm.py
1195548 verified
raw
history blame contribute delete
858 Bytes
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from llama_cpp import Llama
import subprocess
# Initialize the LLM once when the application starts
llm = Llama(
model_path="llama-3.2-1b-instruct-q4_k_m.gguf"
)
app = FastAPI()
class ChatRequest(BaseModel):
message: str
@app.get("/")
async def test():
return {"message":"endpoint working !!"}
@app.post("/chat")
async def chat_completion(request: ChatRequest):
try:
response = llm.create_chat_completion(
messages=[
{"role": "user", "content": request.message}
]
)
return {
"response": response['choices'][0]['message']['content']
}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
# To run the application:
# uvicorn filename:app --reload