| | from fastapi import FastAPI, HTTPException |
| | from pydantic import BaseModel |
| | from gpt4all import GPT4All |
| |
|
| | app = FastAPI() |
| |
|
| | |
| | MODEL_PATH = "/app/Llama-3.2-3B-Instruct-Q4_0.gguf" |
| |
|
| | try: |
| | gpt_model = GPT4All(model_path=MODEL_PATH) |
| | except Exception as e: |
| | raise HTTPException(status_code=500, detail=f"Error loading model: {str(e)}") |
| |
|
| | class Query(BaseModel): |
| | question: str |
| |
|
| | @app.get("/") |
| | def home(): |
| | return {"message": "API is running! Use /ask to query the model."} |
| |
|
| | @app.post("/ask") |
| | def ask(query: Query): |
| | if not query.question: |
| | raise HTTPException(status_code=400, detail="Missing question in request.") |
| |
|
| | with gpt_model.chat_session(): |
| | response = gpt_model.generate(query.question) |
| | |
| | return {"response": response} |
| |
|