from fastapi import FastAPI, HTTPException from pydantic import BaseModel from gpt4all import GPT4All app = FastAPI() # Path to GGUF model (ensure it's inside the container or downloaded) MODEL_PATH = "/app/Llama-3.2-3B-Instruct-Q4_0.gguf" try: gpt_model = GPT4All(model_path=MODEL_PATH) except Exception as e: raise HTTPException(status_code=500, detail=f"Error loading model: {str(e)}") class Query(BaseModel): question: str @app.get("/") def home(): return {"message": "API is running! Use /ask to query the model."} @app.post("/ask") def ask(query: Query): if not query.question: raise HTTPException(status_code=400, detail="Missing question in request.") with gpt_model.chat_session(): response = gpt_model.generate(query.question) return {"response": response}