Spaces:

Omkar008
/

llm

Sleeping

llm / llm.py

Update llm.py

1195548 verified over 1 year ago

858 Bytes

	from fastapi import FastAPI, HTTPException
	from pydantic import BaseModel
	from llama_cpp import Llama
	import subprocess

	# Initialize the LLM once when the application starts
	llm = Llama(
	model_path="llama-3.2-1b-instruct-q4_k_m.gguf"
	)

	app = FastAPI()

	class ChatRequest(BaseModel):
	message: str

	@app.get("/")
	async def test():
	return {"message":"endpoint working !!"}

	@app.post("/chat")
	async def chat_completion(request: ChatRequest):
	try:
	response = llm.create_chat_completion(
	messages=[
	{"role": "user", "content": request.message}
	]
	)
	return {
	"response": response['choices'][0]['message']['content']
	}
	except Exception as e:
	raise HTTPException(status_code=500, detail=str(e))

	# To run the application:
	# uvicorn filename:app --reload