import os from flask import Flask, request, jsonify from flask_cors import CORS from gpt4all import GPT4All app = Flask(__name__) CORS(app) # Load model once at startup (ensure model file available on the server) MODEL_NAME = os.environ.get("GPT4ALL_MODEL", "gpt4all-falcon-newbpe-q4_0") model = GPT4All(MODEL_NAME) @app.route("/chat", methods=["POST"]) def chat(): data = request.get_json(force=True) prompt = data.get("prompt", "") max_tokens = int(data.get("max_tokens", 150)) # tuned for speed with model.chat_session(): resp = model.generate( prompt, max_tokens=50, temp=float(data.get("temp", 0.1)), top_k=int(data.get("top_k", 20)), top_p=float(data.get("top_p", 0.7)), streaming=False ) return jsonify({"response": resp}) if __name__ == "__main__": port = int(os.environ.get("PORT", 5000)) app.run(host="0.0.0.0", port=port)