| """ |
| Flask HTTP API for the Criterion-Wise Neural-LLM Hybrid Grading System. |
| |
| Endpoints |
| --------- |
| GET /health β liveness probe |
| GET /info β model / pipeline metadata |
| POST /predict β score an (answer, criteria) payload against a question |
| |
| POST /predict expects JSON of shape: |
| { |
| "question": "β¦", |
| "answer": "β¦", |
| "criteria": [ |
| {"name": "Input handling", "max_score": 2, "description": "full rubric text"}, |
| {"name": "Logic", "max_score": 3, "description": "β¦"} |
| ] |
| } |
| |
| It returns JSON of shape: |
| { |
| "total_score": 3.4, |
| "max_total_score": 5, |
| "percentage": 68.0, |
| "results": [ {criterion, score, max_score, pred_norm, signals, explanation}, ... ], |
| "overall_explanation": "β¦" |
| } |
| """ |
| import logging |
| import os |
| import time |
| import uuid |
| from flask import Flask, request, jsonify, g |
|
|
| |
| from model.logging_config import setup_logging |
| setup_logging() |
|
|
| from model import load_pipeline, predict |
|
|
| logger = logging.getLogger("app") |
|
|
|
|
| app = Flask(__name__) |
|
|
|
|
| |
| |
| with app.app_context(): |
| load_pipeline() |
|
|
|
|
| |
| @app.before_request |
| def _log_request_start(): |
| g.req_id = uuid.uuid4().hex[:8] |
| g.req_start = time.perf_counter() |
| logger.info("β %s %s [req=%s]", request.method, request.path, g.req_id) |
|
|
|
|
| @app.after_request |
| def _log_request_end(response): |
| if hasattr(g, "req_start"): |
| elapsed_ms = (time.perf_counter() - g.req_start) * 1000 |
| logger.info( |
| "β %s %s [req=%s] status=%d in %.1fms", |
| request.method, request.path, |
| getattr(g, "req_id", "?"), |
| response.status_code, elapsed_ms, |
| ) |
| return response |
|
|
|
|
| @app.route("/health", methods=["GET"]) |
| def health(): |
| return jsonify({"status": "ok"}), 200 |
|
|
|
|
| @app.route("/info", methods=["GET"]) |
| def info(): |
| import torch |
| return jsonify({ |
| "encoder_space": os.getenv("ENCODER_SPACE_URL", "(unset)"), |
| "llm_model": os.getenv("LLM_MODEL_NAME", "Qwen/Qwen2.5-1.5B-Instruct"), |
| "llm_disabled": os.getenv("DISABLE_LLM_EXPLAINER", "0") == "1", |
| "checkpoint": os.getenv("CHECKPOINT_PATH", "best_model_v5.pt"), |
| "device": "cuda" if torch.cuda.is_available() else "cpu", |
| "gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else None, |
| }), 200 |
|
|
|
|
| @app.route("/predict", methods=["POST"]) |
| def predict_endpoint(): |
| |
| payload = None |
| if request.is_json: |
| payload = request.get_json(silent=True) |
| elif "file" in request.files: |
| import json as _json |
| try: |
| payload = _json.load(request.files["file"]) |
| except Exception as e: |
| return jsonify({"error": f"Invalid JSON file: {e}"}), 400 |
| else: |
| |
| payload = request.get_json(silent=True) |
|
|
| if payload is None: |
| return jsonify({ |
| "error": "Request body must be JSON with keys: question, answer, criteria" |
| }), 400 |
|
|
| question = payload.get("question") |
| answer = payload.get("answer") |
| criteria = payload.get("criteria") |
|
|
| try: |
| result = predict(question=question, answer=answer, criteria=criteria) |
| return jsonify(result), 200 |
| except ValueError as ve: |
| |
| logger.warning("Validation error: %s", ve) |
| return jsonify({"error": str(ve)}), 400 |
| except Exception as e: |
| logger.exception("Unhandled error in /predict") |
| return jsonify({"error": "Internal error", "detail": str(e)}), 500 |
|
|
|
|
| if __name__ == "__main__": |
| host = os.getenv("HOST", "0.0.0.0") |
| port = int(os.getenv("PORT", "8000")) |
| |
| app.run(host=host, port=port, debug=False, threaded=False) |