from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from models import SQLContext
from llama_cpp import Llama
import logging

llm = Llama(
    model_path="./quantized_model/sql_gpt_quantized.gguf", 
    n_ctx=512,
    n_threads=1,
    n_gpu_layers=-1
)

app = FastAPI()
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allow your frontend origin
    allow_credentials=True,
    allow_methods=["*"],  # Allow all methods
    allow_headers=["*"],  # Allow all headers
)
@app.get("/")
def root():
    return {"Hello": "World"}

@app.post("/query")
async def send_response(query: SQLContext):
    query_text = f'''
    ### CONTEXT:\n{query.context}\n\n### QUESTION:{query.question}\n\n### [RESPONSE]:\n"
    '''
    try:
        output = llm(
            prompt=query_text,
            max_tokens=512,
        )
        response_text = output["choices"][0]["text"].split('###')[0].strip().strip('"')
        return {"response": response_text}
    except Exception as e:
        logging.error(f"Error generating response: {e}")
        raise HTTPException(status_code=500, detail="Internal server error while processing the query.")