| """Flask annotation app for blind summary evaluation. |
| |
| Serves a web UI where annotators evaluate AI-generated summaries. |
| Annotations are anonymous and shared -- each summary is evaluated once. |
| Annotations are persisted in a SQLite database. |
| |
| Optional password protection: set APP_PASSWORD as an environment variable. |
| Authentication uses HMAC tokens stored in the browser via localStorage, |
| avoiding cookies/sessions that can break behind reverse proxies. |
| """ |
|
|
| import hashlib |
| import hmac |
| import json |
| import os |
| import sqlite3 |
| from functools import cache |
| from pathlib import Path |
|
|
| from flask import Flask, Response, jsonify, request, send_file |
|
|
| DATASET_PATH = Path(os.environ.get("DATASET_PATH", "2026-04-23_prompt_evaluation_dataset.jsonl")) |
| DB_PATH = Path(os.environ.get("DB_PATH", "/data/annotations.db")) |
|
|
| ANNOTATION_FIELDS = ( |
| "bewertung", "korrekt", "relevant", "vollstaendig", "kohaerenz", "anmerkungen", |
| ) |
|
|
| APP_PASSWORD = os.environ.get("APP_PASSWORD", "") |
| SECRET_KEY = os.environ.get("SECRET_KEY", os.urandom(24).hex()) |
|
|
| app = Flask(__name__) |
|
|
|
|
| |
| |
| |
|
|
| def get_db() -> sqlite3.Connection: |
| """Open a SQLite connection with row factory and WAL mode.""" |
| db = sqlite3.connect(str(DB_PATH)) |
| db.row_factory = sqlite3.Row |
| db.execute("PRAGMA journal_mode=WAL") |
| db.execute(""" |
| CREATE TABLE IF NOT EXISTS annotations ( |
| eval_id TEXT PRIMARY KEY, |
| bewertung TEXT, |
| korrekt TEXT, |
| relevant TEXT, |
| vollstaendig TEXT, |
| kohaerenz TEXT, |
| anmerkungen TEXT, |
| updated_at DATETIME DEFAULT CURRENT_TIMESTAMP |
| ) |
| """) |
| return db |
|
|
|
|
| |
| |
| |
|
|
| @cache |
| def load_dataset() -> tuple[dict, ...]: |
| """Load evaluation items from JSONL. Cached because the dataset never changes.""" |
| items = [] |
| with open(DATASET_PATH, encoding="utf-8") as f: |
| for line in f: |
| if line.strip(): |
| row = json.loads(line) |
| has_prior = bool(row.get("bewertung")) |
| row["has_prior_judgement"] = has_prior |
| if has_prior: |
| for field in ANNOTATION_FIELDS: |
| row[f"prior_{field}"] = row.get(field) |
| items.append(row) |
| items.sort(key=lambda x: x["eval_id"]) |
| return tuple(items) |
|
|
|
|
| def fetch_annotations(db: sqlite3.Connection) -> dict[str, dict]: |
| """Fetch all annotations, keyed by eval_id.""" |
| rows = db.execute("SELECT * FROM annotations").fetchall() |
| return {row["eval_id"]: dict(row) for row in rows} |
|
|
|
|
| def merge_items_with_annotations( |
| items: tuple[dict, ...], |
| annotations: dict[str, dict], |
| ) -> list[dict]: |
| """Return items with annotation values merged in (does not mutate originals).""" |
| merged = [] |
| for item in items: |
| ann = annotations.get(item["eval_id"]) |
| entry = {**item, "evaluated": ann is not None} |
| if ann: |
| for field in ANNOTATION_FIELDS: |
| entry[field] = ann.get(field) |
| merged.append(entry) |
| return merged |
|
|
|
|
| |
| |
| |
|
|
|
|
| def _make_auth_token() -> str: |
| """Create an HMAC token derived from the app password and secret key.""" |
| key = SECRET_KEY.encode() if isinstance(SECRET_KEY, str) else SECRET_KEY |
| return hmac.new(key, APP_PASSWORD.encode(), hashlib.sha256).hexdigest() |
|
|
|
|
| @app.before_request |
| def check_auth(): |
| if not APP_PASSWORD: |
| return None |
| if request.path in ("/", "/login"): |
| return None |
| if request.path.startswith("/api/login"): |
| return None |
| token = request.headers.get("Authorization", "").removeprefix("Bearer ") |
| if token == _make_auth_token(): |
| return None |
| return jsonify({"error": "unauthorized"}), 401 |
|
|
|
|
| @app.route("/api/login", methods=["POST"]) |
| def api_login(): |
| data = request.get_json() |
| if data and data.get("password") == APP_PASSWORD: |
| return jsonify({"token": _make_auth_token()}) |
| return jsonify({"error": "wrong_password"}), 401 |
|
|
|
|
| |
| |
| |
|
|
|
|
| @app.after_request |
| def no_cache_api(response): |
| """Prevent browser from caching API responses.""" |
| if request.path.startswith("/api/"): |
| response.headers["Cache-Control"] = "no-store" |
| return response |
|
|
|
|
| @app.route("/") |
| def index(): |
| return send_file("index.html") |
|
|
|
|
| @app.route("/api/entries") |
| def get_entries(): |
| """Return all evaluation items with annotation data merged in.""" |
| items = load_dataset() |
| db = get_db() |
| annotations = fetch_annotations(db) |
| db.close() |
| return jsonify(merge_items_with_annotations(items, annotations)) |
|
|
|
|
| @app.route("/api/annotate", methods=["POST"]) |
| def annotate(): |
| """Save or update an annotation.""" |
| data = request.get_json() |
| db = get_db() |
| db.execute( |
| """INSERT OR REPLACE INTO annotations |
| (eval_id, bewertung, korrekt, relevant, vollstaendig, kohaerenz, anmerkungen) |
| VALUES (?, ?, ?, ?, ?, ?, ?)""", |
| ( |
| data["eval_id"], |
| data.get("bewertung"), |
| data.get("korrekt"), |
| data.get("relevant"), |
| data.get("vollstaendig"), |
| data.get("kohaerenz"), |
| data.get("anmerkungen"), |
| ), |
| ) |
| db.commit() |
| db.close() |
| return jsonify({"status": "ok"}) |
|
|
|
|
| @app.route("/api/progress") |
| def progress(): |
| """Return annotation progress.""" |
| total = len(load_dataset()) |
| db = get_db() |
| count = db.execute("SELECT COUNT(*) FROM annotations").fetchone()[0] |
| db.close() |
| return jsonify({"total": total, "annotated": count}) |
|
|
|
|
| @app.route("/api/export") |
| def export_annotations(): |
| """Export all annotations as downloadable JSONL.""" |
| db = get_db() |
| rows = db.execute("SELECT * FROM annotations ORDER BY eval_id").fetchall() |
| db.close() |
| lines = [json.dumps(dict(row), ensure_ascii=False) for row in rows] |
| return Response( |
| "\n".join(lines) + "\n", |
| mimetype="application/jsonl", |
| headers={"Content-Disposition": "attachment; filename=annotations.jsonl"}, |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| DB_PATH.parent.mkdir(parents=True, exist_ok=True) |
| app.run(host="0.0.0.0", port=7860) |
|
|