from fastapi import FastAPI, HTTPException from fastapi.middleware.cors import CORSMiddleware from models import SQLContext from llama_cpp import Llama import logging llm = Llama( model_path="./quantized_model/sql_gpt_quantized.gguf", n_ctx=512, n_threads=1, n_gpu_layers=-1 ) app = FastAPI() app.add_middleware( CORSMiddleware, allow_origins=["*"], # Allow your frontend origin allow_credentials=True, allow_methods=["*"], # Allow all methods allow_headers=["*"], # Allow all headers ) @app.get("/") def root(): return {"Hello": "World"} @app.post("/query") async def send_response(query: SQLContext): query_text = f''' ### CONTEXT:\n{query.context}\n\n### QUESTION:{query.question}\n\n### [RESPONSE]:\n" ''' try: output = llm( prompt=query_text, max_tokens=512, ) response_text = output["choices"][0]["text"].split('###')[0].strip().strip('"') return {"response": response_text} except Exception as e: logging.error(f"Error generating response: {e}") raise HTTPException(status_code=500, detail="Internal server error while processing the query.")