from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from gpt4all import GPT4All

app = FastAPI()

# Path to GGUF model (ensure it's inside the container or downloaded)
MODEL_PATH = "/app/Llama-3.2-3B-Instruct-Q4_0.gguf"

try:
    gpt_model = GPT4All(model_path=MODEL_PATH)
except Exception as e:
    raise HTTPException(status_code=500, detail=f"Error loading model: {str(e)}")

class Query(BaseModel):
    question: str

@app.get("/")
def home():
    return {"message": "API is running! Use /ask to query the model."}

@app.post("/ask")
def ask(query: Query):
    if not query.question:
        raise HTTPException(status_code=400, detail="Missing question in request.")

    with gpt_model.chat_session():
        response = gpt_model.generate(query.question)
    
    return {"response": response}