ecom-qa-bert / src /app.py
rnyx's picture
Initial deploy: BERT QA app
3338b6d
"""
Flask application entrypoint.
Routes
GET / β€” main UI (index.html)
GET /healthz β€” liveness probe
POST /api/scrape β€” scrape a product URL
POST /api/predict β€” run BERT QA on (question, context)
GET /api/history β€” list stored Q&A entries
DELETE /api/history/<id> β€” remove a single entry
DELETE /api/history β€” clear all entries
"""
import logging
import os
from pathlib import Path
from flask import Flask, jsonify, render_template, request
from . import config, db
from .model import init_model, predict_qa
from .scraper import scrape_url
FORMAT = "%(asctime)s [%(levelname)s] %(name)s β€” %(message)s"
logging.basicConfig(format=FORMAT, level=logging.INFO)
logger = logging.getLogger(__name__)
_PROJECT_ROOT = Path(__file__).resolve().parent.parent
def create_app() -> Flask:
app = Flask(
__name__,
template_folder=str(_PROJECT_ROOT / "templates"),
static_folder=str(_PROJECT_ROOT / "static"),
)
# ── Rate limiting (optional) ────────────────────────────────
limiter = None
if config.RATE_LIMIT_ENABLED:
try:
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
limiter = Limiter(
get_remote_address,
app=app,
default_limits=[config.RATE_LIMIT_DEFAULT],
storage_uri="memory://",
strategy="fixed-window",
)
logger.info("Rate limiting enabled")
except ImportError:
logger.warning("flask-limiter not installed; rate limiting disabled")
def _limit(rule: str):
"""Decorator that applies a limit only if the limiter is active."""
if limiter is None:
def noop(fn):
return fn
return noop
return limiter.limit(rule)
# ── Init DB early so any failure surfaces at boot, not at first write ──
db.init_db()
# ── Routes ──────────────────────────────────────────────────
@app.get("/")
def index():
return render_template("index.html")
@app.get("/healthz")
def healthz():
return jsonify({"status": "ok", "model": config.MODEL_NAME}), 200
@app.post("/api/scrape")
@_limit(config.RATE_LIMIT_SCRAPE)
def api_scrape():
payload = request.get_json(silent=True) or {}
url = (payload.get("url") or "").strip()
if not url:
return jsonify({"error": "URL is required."}), 400
try:
result = scrape_url(url)
if result.get("error"):
return jsonify(result), 400
return jsonify(result)
except Exception as e: # noqa: BLE001 β€” top-level safety net
logger.exception("Scraping failed")
return jsonify({"error": f"Unexpected error: {e}"}), 500
@app.post("/api/predict")
@_limit(config.RATE_LIMIT_PREDICT)
def api_predict():
payload = request.get_json(silent=True) or {}
question = (payload.get("question") or "").strip()
context = (payload.get("context") or "").strip()
source_url = (payload.get("source_url") or "").strip() or None
source_type = (payload.get("source_type") or "").strip() or None
product_title = (payload.get("product_title") or "").strip() or None
if not question or not context:
return jsonify({"error": "Both question and context are required."}), 400
if len(context) < 20:
return jsonify({"error": "Context is too short (minimum 20 characters)."}), 400
if len(question) > 500:
return jsonify({"error": "Question is too long (max 500 characters)."}), 400
try:
result = predict_qa(question, context)
except ValueError as e:
return jsonify({"error": str(e)}), 400
except Exception as e: # noqa: BLE001
logger.exception("Prediction failed")
return jsonify({"error": f"Inference error: {e}"}), 500
# Persist β€” failure here must NOT break the user's response
try:
entry_id = db.save_qa(
question=question,
answer=result["answer"],
confidence=result["confidence"],
confidence_level=result["confidence_level"],
inference_ms=result["inference_time_ms"],
source_url=source_url,
source_type=source_type,
product_title=product_title,
)
result["history_id"] = entry_id
except Exception:
logger.exception("Failed to persist Q&A β€” continuing")
return jsonify(result)
@app.get("/api/history")
def api_history():
limit = request.args.get("limit", type=int) or config.HISTORY_LIMIT
limit = max(1, min(limit, 500))
try:
return jsonify({"items": db.list_history(limit=limit)})
except Exception as e: # noqa: BLE001
logger.exception("History listing failed")
return jsonify({"error": str(e)}), 500
@app.delete("/api/history/<int:entry_id>")
def api_history_delete(entry_id: int):
try:
ok = db.delete_entry(entry_id)
return jsonify({"deleted": ok, "id": entry_id}), (200 if ok else 404)
except Exception as e: # noqa: BLE001
logger.exception("History delete failed")
return jsonify({"error": str(e)}), 500
@app.delete("/api/history")
def api_history_clear():
try:
n = db.clear_history()
return jsonify({"cleared": n})
except Exception as e: # noqa: BLE001
logger.exception("History clear failed")
return jsonify({"error": str(e)}), 500
# ── Model load at import-time so gunicorn workers are warm ─────
logger.info("Initializing BERT QA model…")
init_model()
logger.info("Model ready.")
return app
# Gunicorn entry: `gunicorn src.app:app`
app = create_app()
if __name__ == "__main__":
app.run(host="0.0.0.0", port=config.PORT, debug=config.DEBUG)