Spaces:

sahanwickramasinghe
/

pseudoscorex-backend

Sleeping

App Files Files Community

pseudoscorex-backend / app.py

Nanny7

Initial commit: Flask backend for pseudoscore-x

219ee1a 22 days ago

raw

history blame contribute delete

4.25 kB

	"""
	Flask HTTP API for the Criterion-Wise Neural-LLM Hybrid Grading System.

	Endpoints
	---------
	GET /health → liveness probe
	GET /info → model / pipeline metadata
	POST /predict → score an (answer, criteria) payload against a question

	POST /predict expects JSON of shape:
	{
	"question": "…",
	"answer": "…",
	"criteria": [
	{"name": "Input handling", "max_score": 2, "description": "full rubric text"},
	{"name": "Logic", "max_score": 3, "description": "…"}
	]
	}

	It returns JSON of shape:
	{
	"total_score": 3.4,
	"max_total_score": 5,
	"percentage": 68.0,
	"results": [ {criterion, score, max_score, pred_norm, signals, explanation}, ... ],
	"overall_explanation": "…"
	}
	"""
	import logging
	import os
	import time
	import uuid
	from flask import Flask, request, jsonify, g

	# Configure logging BEFORE importing anything that uses it
	from model.logging_config import setup_logging
	setup_logging()

	from model import load_pipeline, predict

	logger = logging.getLogger("app")


	app = Flask(__name__)


	# ── Load model once at process start ───────────────────────────────────────
	# For production, prefer gunicorn with --preload so this happens pre-fork.
	with app.app_context():
	load_pipeline()


	# ── Per-request logging hooks ──────────────────────────────────────────────
	@app.before_request
	def _log_request_start():
	g.req_id = uuid.uuid4().hex[:8]
	g.req_start = time.perf_counter()
	logger.info("→ %s %s [req=%s]", request.method, request.path, g.req_id)


	@app.after_request
	def _log_request_end(response):
	if hasattr(g, "req_start"):
	elapsed_ms = (time.perf_counter() - g.req_start) * 1000
	logger.info(
	"← %s %s [req=%s] status=%d in %.1fms",
	request.method, request.path,
	getattr(g, "req_id", "?"),
	response.status_code, elapsed_ms,
	)
	return response


	@app.route("/health", methods=["GET"])
	def health():
	return jsonify({"status": "ok"}), 200


	@app.route("/info", methods=["GET"])
	def info():
	import torch
	return jsonify({
	"encoder_space": os.getenv("ENCODER_SPACE_URL", "(unset)"),
	"llm_model": os.getenv("LLM_MODEL_NAME", "Qwen/Qwen2.5-1.5B-Instruct"),
	"llm_disabled": os.getenv("DISABLE_LLM_EXPLAINER", "0") == "1",
	"checkpoint": os.getenv("CHECKPOINT_PATH", "best_model_v5.pt"),
	"device": "cuda" if torch.cuda.is_available() else "cpu",
	"gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else None,
	}), 200


	@app.route("/predict", methods=["POST"])
	def predict_endpoint():
	# Accept either application/json or a raw file upload named "file"
	payload = None
	if request.is_json:
	payload = request.get_json(silent=True)
	elif "file" in request.files:
	import json as _json
	try:
	payload = _json.load(request.files["file"])
	except Exception as e:
	return jsonify({"error": f"Invalid JSON file: {e}"}), 400
	else:
	# Try raw body as JSON anyway
	payload = request.get_json(silent=True)

	if payload is None:
	return jsonify({
	"error": "Request body must be JSON with keys: question, answer, criteria"
	}), 400

	question = payload.get("question")
	answer = payload.get("answer")
	criteria = payload.get("criteria")

	try:
	result = predict(question=question, answer=answer, criteria=criteria)
	return jsonify(result), 200
	except ValueError as ve:
	# Validation errors → 400
	logger.warning("Validation error: %s", ve)
	return jsonify({"error": str(ve)}), 400
	except Exception as e:
	logger.exception("Unhandled error in /predict")
	return jsonify({"error": "Internal error", "detail": str(e)}), 500


	if __name__ == "__main__":
	host = os.getenv("HOST", "0.0.0.0")
	port = int(os.getenv("PORT", "8000"))
	# debug=False because we don't want the reloader to load the encoder twice
	app.run(host=host, port=port, debug=False, threaded=False)