Nanny7's picture
Initial commit: Flask backend for pseudoscore-x
219ee1a
"""
Flask HTTP API for the Criterion-Wise Neural-LLM Hybrid Grading System.
Endpoints
---------
GET /health β†’ liveness probe
GET /info β†’ model / pipeline metadata
POST /predict β†’ score an (answer, criteria) payload against a question
POST /predict expects JSON of shape:
{
"question": "…",
"answer": "…",
"criteria": [
{"name": "Input handling", "max_score": 2, "description": "full rubric text"},
{"name": "Logic", "max_score": 3, "description": "…"}
]
}
It returns JSON of shape:
{
"total_score": 3.4,
"max_total_score": 5,
"percentage": 68.0,
"results": [ {criterion, score, max_score, pred_norm, signals, explanation}, ... ],
"overall_explanation": "…"
}
"""
import logging
import os
import time
import uuid
from flask import Flask, request, jsonify, g
# Configure logging BEFORE importing anything that uses it
from model.logging_config import setup_logging
setup_logging()
from model import load_pipeline, predict
logger = logging.getLogger("app")
app = Flask(__name__)
# ── Load model once at process start ───────────────────────────────────────
# For production, prefer gunicorn with --preload so this happens pre-fork.
with app.app_context():
load_pipeline()
# ── Per-request logging hooks ──────────────────────────────────────────────
@app.before_request
def _log_request_start():
g.req_id = uuid.uuid4().hex[:8]
g.req_start = time.perf_counter()
logger.info("β†’ %s %s [req=%s]", request.method, request.path, g.req_id)
@app.after_request
def _log_request_end(response):
if hasattr(g, "req_start"):
elapsed_ms = (time.perf_counter() - g.req_start) * 1000
logger.info(
"← %s %s [req=%s] status=%d in %.1fms",
request.method, request.path,
getattr(g, "req_id", "?"),
response.status_code, elapsed_ms,
)
return response
@app.route("/health", methods=["GET"])
def health():
return jsonify({"status": "ok"}), 200
@app.route("/info", methods=["GET"])
def info():
import torch
return jsonify({
"encoder_space": os.getenv("ENCODER_SPACE_URL", "(unset)"),
"llm_model": os.getenv("LLM_MODEL_NAME", "Qwen/Qwen2.5-1.5B-Instruct"),
"llm_disabled": os.getenv("DISABLE_LLM_EXPLAINER", "0") == "1",
"checkpoint": os.getenv("CHECKPOINT_PATH", "best_model_v5.pt"),
"device": "cuda" if torch.cuda.is_available() else "cpu",
"gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
}), 200
@app.route("/predict", methods=["POST"])
def predict_endpoint():
# Accept either application/json or a raw file upload named "file"
payload = None
if request.is_json:
payload = request.get_json(silent=True)
elif "file" in request.files:
import json as _json
try:
payload = _json.load(request.files["file"])
except Exception as e:
return jsonify({"error": f"Invalid JSON file: {e}"}), 400
else:
# Try raw body as JSON anyway
payload = request.get_json(silent=True)
if payload is None:
return jsonify({
"error": "Request body must be JSON with keys: question, answer, criteria"
}), 400
question = payload.get("question")
answer = payload.get("answer")
criteria = payload.get("criteria")
try:
result = predict(question=question, answer=answer, criteria=criteria)
return jsonify(result), 200
except ValueError as ve:
# Validation errors β†’ 400
logger.warning("Validation error: %s", ve)
return jsonify({"error": str(ve)}), 400
except Exception as e:
logger.exception("Unhandled error in /predict")
return jsonify({"error": "Internal error", "detail": str(e)}), 500
if __name__ == "__main__":
host = os.getenv("HOST", "0.0.0.0")
port = int(os.getenv("PORT", "8000"))
# debug=False because we don't want the reloader to load the encoder twice
app.run(host=host, port=port, debug=False, threaded=False)