Spaces:

ujalaarshad17
/

SQLGPT

Sleeping

SQLGPT / main.py

setted the n_threads

eea8e3d over 1 year ago

1.19 kB

	from fastapi import FastAPI, HTTPException
	from fastapi.middleware.cors import CORSMiddleware
	from models import SQLContext
	from llama_cpp import Llama
	import logging

	llm = Llama(
	model_path="./quantized_model/sql_gpt_quantized.gguf",
	n_ctx=512,
	n_threads=1,
	n_gpu_layers=-1
	)

	app = FastAPI()
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"], # Allow your frontend origin
	allow_credentials=True,
	allow_methods=["*"], # Allow all methods
	allow_headers=["*"], # Allow all headers
	)
	@app.get("/")
	def root():
	return {"Hello": "World"}

	@app.post("/query")
	async def send_response(query: SQLContext):
	query_text = f'''
	### CONTEXT:\n{query.context}\n\n### QUESTION:{query.question}\n\n### [RESPONSE]:\n"
	'''
	try:
	output = llm(
	prompt=query_text,
	max_tokens=512,
	)
	response_text = output["choices"][0]["text"].split('###')[0].strip().strip('"')
	return {"response": response_text}
	except Exception as e:
	logging.error(f"Error generating response: {e}")
	raise HTTPException(status_code=500, detail="Internal server error while processing the query.")