| from fastapi import FastAPI, HTTPException |
| from pydantic import BaseModel |
| from llama_cpp import Llama |
| import subprocess |
|
|
| |
| llm = Llama( |
| model_path="llama-3.2-1b-instruct-q4_k_m.gguf" |
| ) |
|
|
| app = FastAPI() |
|
|
| class ChatRequest(BaseModel): |
| message: str |
|
|
| @app.get("/") |
| async def test(): |
| return {"message":"endpoint working !!"} |
|
|
| @app.post("/chat") |
| async def chat_completion(request: ChatRequest): |
| try: |
| response = llm.create_chat_completion( |
| messages=[ |
| {"role": "user", "content": request.message} |
| ] |
| ) |
| return { |
| "response": response['choices'][0]['message']['content'] |
| } |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| |
| |