from flask import Flask, request, jsonify, Response import subprocess import json import logging logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) app = Flask(__name__) @app.route('/completions', methods=['POST']) def get_completion(): try: data = request.get_json() if not data or 'prompt' not in data: return jsonify({"error": "Missing 'prompt' in request body"}), 400 prompt = data['prompt'] def generate(): try: process = subprocess.Popen( ["ollama", "run", "llama2", "-p", prompt], # Replace "llama2" with your model stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, encoding="utf-8" ) for line in process.stdout: try: json_line = json.loads(line) if "response" in json_line: yield f"data: {json.dumps({'text': json_line['response']})}\n\n" elif "done" in json_line: #check for done message break except json.JSONDecodeError: logger.warning(f"Invalid JSON line from Ollama: {line.strip()}") stderr = process.stderr.read() if stderr: logger.error(f"Ollama stderr: {stderr}") process.wait() except FileNotFoundError: yield f"data: {json.dumps({'text': 'Error: Ollama not found. Is it installed and in your PATH?'})}\n\n" except Exception as e: logger.exception("Error in Ollama subprocess:") yield f"data: {json.dumps({'text': f'Error in Ollama: {e}'})}\n\n" return Response(generate(), mimetype='text/event-stream') except Exception as e: logger.exception("Error in /completions route:") return jsonify({"error": "An error occurred during processing."}), 500 if __name__ == "__main__": app.run(debug=False, host='0.0.0.0', port=5000)