Spaces:
Running
Running
| import secrets | |
| import sys | |
| import uuid | |
| from pathlib import Path | |
| from flask import Flask, Response, jsonify, render_template, request, send_file | |
| from newest_model import PREFERRED_PRODUCTION_CHAT_MODELS, select_groq_chat_models | |
| from src.config import ( | |
| APP_PASSWORD, | |
| APP_USERNAME, | |
| AVAILABLE_MODELS, | |
| DATA_DIR, | |
| DEFAULT_OUTPUT_SHEET_NAME, | |
| SPACE_ID, | |
| ) | |
| from src.process_runner import stop_process, stream_process | |
| from src.utils import reference_sync_status, save_manual_references_to_hub | |
| from src.workbook_io import read_workbook_sheets, resolve_allowed_path, save_uploaded_excel | |
| APP_ROOT = Path(__file__).resolve().parent | |
| UPLOAD_DIR = APP_ROOT / DATA_DIR / "uploads" | |
| UPLOAD_DIR.mkdir(parents=True, exist_ok=True) | |
| ALLOWED_FILE_ROOTS = [APP_ROOT / DATA_DIR] | |
| app = Flask( | |
| __name__, | |
| template_folder=str(APP_ROOT / "ui" / "templates"), | |
| static_folder=str(APP_ROOT / "ui" / "static"), | |
| ) | |
| app.config["MAX_CONTENT_LENGTH"] = 100 * 1024 * 1024 | |
| STATE = { | |
| "clean_path": "", | |
| "clean_filename": "", | |
| "clean_sheets": [], | |
| "clean_selected_sheet": "", | |
| "output_sheet": DEFAULT_OUTPUT_SHEET_NAME, | |
| "models": "", | |
| "apply_workbook_path": "", | |
| "apply_workbook_filename": "", | |
| "apply_sheets": [], | |
| "apply_selected_sheet": "", | |
| "apply_blueprint_path": "", | |
| "apply_blueprint_filename": "", | |
| } | |
| def auth_required_response() -> Response: | |
| return Response( | |
| "Authentication required", | |
| 401, | |
| {"WWW-Authenticate": 'Basic realm="MasterMap Cleaner"'}, | |
| ) | |
| def missing_auth_config_response() -> Response: | |
| return Response( | |
| "APP_PASSWORD Space Secret is not configured.", | |
| 503, | |
| ) | |
| def require_basic_auth(): | |
| if not APP_PASSWORD: | |
| if SPACE_ID: | |
| return missing_auth_config_response() | |
| return None | |
| auth = request.authorization | |
| if not auth: | |
| return auth_required_response() | |
| valid_username = secrets.compare_digest(auth.username or "", APP_USERNAME) | |
| valid_password = secrets.compare_digest(auth.password or "", APP_PASSWORD) | |
| if valid_username and valid_password: | |
| return None | |
| return auth_required_response() | |
| def prevent_browser_cache(response): | |
| response.headers["Cache-Control"] = "no-store, no-cache, must-revalidate, max-age=0" | |
| response.headers["Pragma"] = "no-cache" | |
| response.headers["Expires"] = "0" | |
| return response | |
| def default_models() -> str: | |
| preferred_model_ids = {model.lower() for model in PREFERRED_PRODUCTION_CHAT_MODELS} | |
| env_preferred_models = [ | |
| model | |
| for model in AVAILABLE_MODELS | |
| if model.lower() in preferred_model_ids | |
| ] | |
| return ",".join(env_preferred_models or PREFERRED_PRODUCTION_CHAT_MODELS) | |
| def render_page(message: str = "", error: str = ""): | |
| if STATE["clean_sheets"]: | |
| STATE["clean_selected_sheet"] = pick_sheet(STATE["clean_sheets"], STATE["clean_selected_sheet"]) | |
| if STATE["apply_sheets"]: | |
| STATE["apply_selected_sheet"] = pick_sheet(STATE["apply_sheets"], STATE["apply_selected_sheet"]) | |
| return render_template( | |
| "index.html", | |
| **STATE, | |
| default_output_sheet=DEFAULT_OUTPUT_SHEET_NAME, | |
| default_models=default_models(), | |
| can_apply=can_apply_blueprint(), | |
| message=message, | |
| error=error, | |
| ) | |
| def can_apply_blueprint() -> bool: | |
| return bool( | |
| STATE["apply_workbook_path"] | |
| and STATE["apply_blueprint_path"] | |
| and STATE["apply_sheets"] | |
| and STATE["apply_selected_sheet"] | |
| ) | |
| def wants_json_response() -> bool: | |
| return "application/json" in request.headers.get("Accept", "") | |
| def ui_state_payload(message: str = "", error: str = ""): | |
| return { | |
| "message": message, | |
| "error": error, | |
| "apply_workbook_path": STATE["apply_workbook_path"], | |
| "apply_workbook_filename": STATE["apply_workbook_filename"], | |
| "apply_sheets": STATE["apply_sheets"], | |
| "apply_selected_sheet": STATE["apply_selected_sheet"], | |
| "apply_blueprint_path": STATE["apply_blueprint_path"], | |
| "apply_blueprint_filename": STATE["apply_blueprint_filename"], | |
| "can_apply": can_apply_blueprint(), | |
| } | |
| def pick_sheet(sheets, preferred_sheet=None): | |
| if preferred_sheet and preferred_sheet in sheets: | |
| return preferred_sheet | |
| if STATE["output_sheet"] in sheets: | |
| return STATE["output_sheet"] | |
| return sheets[0] if sheets else "" | |
| def update_ui_state_from_form(form): | |
| STATE["clean_selected_sheet"] = form.get("clean_selected_sheet") or STATE["clean_selected_sheet"] | |
| STATE["output_sheet"] = form.get("output_sheet") or STATE["output_sheet"] or DEFAULT_OUTPUT_SHEET_NAME | |
| STATE["models"] = form.get("models") or STATE["models"] | |
| STATE["apply_selected_sheet"] = form.get("apply_selected_sheet") or STATE["apply_selected_sheet"] | |
| def index(): | |
| return render_page() | |
| def prepare_clean(): | |
| try: | |
| update_ui_state_from_form(request.form) | |
| filename, path = save_uploaded_excel(request.files.get("file"), UPLOAD_DIR) | |
| sheets = read_workbook_sheets(path) | |
| except Exception as exc: | |
| return render_page(error=str(exc)) | |
| STATE["clean_path"] = str(path) | |
| STATE["clean_filename"] = filename | |
| STATE["clean_sheets"] = sheets | |
| STATE["clean_selected_sheet"] = pick_sheet(sheets, request.form.get("clean_selected_sheet")) | |
| STATE["apply_workbook_path"] = str(path) | |
| STATE["apply_workbook_filename"] = filename | |
| STATE["apply_sheets"] = sheets | |
| STATE["apply_selected_sheet"] = pick_sheet(sheets, request.form.get("apply_selected_sheet") or STATE["output_sheet"]) | |
| return render_page(message=f"Loaded {filename}.") | |
| def remove_clean(): | |
| update_ui_state_from_form(request.form) | |
| old_path = STATE["clean_path"] | |
| STATE["clean_path"] = "" | |
| STATE["clean_filename"] = "" | |
| STATE["clean_sheets"] = [] | |
| STATE["clean_selected_sheet"] = "" | |
| if STATE["apply_workbook_path"] == old_path: | |
| STATE["apply_workbook_path"] = "" | |
| STATE["apply_workbook_filename"] = "" | |
| STATE["apply_sheets"] = [] | |
| STATE["apply_selected_sheet"] = "" | |
| return render_page(message="File removed.") | |
| def prepare_apply_workbook(): | |
| try: | |
| update_ui_state_from_form(request.form) | |
| filename, path = save_uploaded_excel(request.files.get("file"), UPLOAD_DIR) | |
| sheets = read_workbook_sheets(path) | |
| except Exception as exc: | |
| if wants_json_response(): | |
| return jsonify(ui_state_payload(error=str(exc))), 400 | |
| return render_page(error=str(exc)) | |
| STATE["apply_workbook_path"] = str(path) | |
| STATE["apply_workbook_filename"] = filename | |
| STATE["apply_sheets"] = sheets | |
| STATE["apply_selected_sheet"] = pick_sheet(sheets, request.form.get("apply_selected_sheet")) | |
| if wants_json_response(): | |
| return jsonify(ui_state_payload(message=f"Loaded apply workbook {filename}.")) | |
| return render_page(message=f"Loaded apply workbook {filename}.") | |
| def prepare_apply_blueprint(): | |
| try: | |
| update_ui_state_from_form(request.form) | |
| if STATE["apply_workbook_path"] and Path(STATE["apply_workbook_path"]).is_file(): | |
| STATE["apply_sheets"] = read_workbook_sheets(Path(STATE["apply_workbook_path"])) | |
| STATE["apply_selected_sheet"] = pick_sheet(STATE["apply_sheets"], request.form.get("apply_selected_sheet")) | |
| filename, path = save_uploaded_excel(request.files.get("file"), UPLOAD_DIR) | |
| except Exception as exc: | |
| if wants_json_response(): | |
| return jsonify(ui_state_payload(error=str(exc))), 400 | |
| return render_page(error=str(exc)) | |
| STATE["apply_blueprint_path"] = str(path) | |
| STATE["apply_blueprint_filename"] = filename | |
| if wants_json_response(): | |
| return jsonify(ui_state_payload(message=f"Loaded blueprint {filename}.")) | |
| return render_page(message=f"Loaded blueprint {filename}.") | |
| def models_endpoint(): | |
| try: | |
| models = select_groq_chat_models(limit=len(PREFERRED_PRODUCTION_CHAT_MODELS)) | |
| except Exception as exc: | |
| return jsonify({"error": str(exc)}), 500 | |
| return jsonify({"models": models}) | |
| def references_status(): | |
| return jsonify(reference_sync_status()) | |
| def save_references(): | |
| try: | |
| result = save_manual_references_to_hub(APP_ROOT) | |
| except Exception as exc: | |
| return jsonify({"error": str(exc)}), 400 | |
| return jsonify({"message": "Manual references saved to Hugging Face.", **result}) | |
| def sheets_endpoint(): | |
| try: | |
| workbook_path = resolve_allowed_path(request.args.get("path", ""), APP_ROOT, ALLOWED_FILE_ROOTS) | |
| if not workbook_path.is_file(): | |
| return jsonify({"error": "Workbook is not available."}), 404 | |
| return jsonify({"sheets": read_workbook_sheets(workbook_path)}) | |
| except Exception as exc: | |
| return jsonify({"error": str(exc)}), 500 | |
| def download_blueprint(): | |
| blueprint_path = APP_ROOT / DATA_DIR / "Blueprint.xlsx" | |
| if not blueprint_path.exists(): | |
| return jsonify({"error": "Blueprint has not been generated yet."}), 404 | |
| return send_file(blueprint_path, as_attachment=True, download_name="Blueprint.xlsx") | |
| def download_cleaned_workbook(): | |
| if not STATE["clean_path"]: | |
| return jsonify({"error": "Cleaned workbook is not available."}), 404 | |
| workbook_path = resolve_allowed_path(STATE["clean_path"], APP_ROOT, ALLOWED_FILE_ROOTS) | |
| if not workbook_path.is_file(): | |
| return jsonify({"error": "Cleaned workbook is not available."}), 404 | |
| return send_file( | |
| workbook_path, | |
| as_attachment=True, | |
| download_name=f"cleaned_{STATE['clean_filename'] or workbook_path.name}", | |
| ) | |
| def download_applied_workbook(): | |
| if not STATE["apply_workbook_path"]: | |
| return jsonify({"error": "Applied workbook is not available."}), 404 | |
| workbook_path = resolve_allowed_path(STATE["apply_workbook_path"], APP_ROOT, ALLOWED_FILE_ROOTS) | |
| if not workbook_path.is_file(): | |
| return jsonify({"error": "Applied workbook is not available."}), 404 | |
| return send_file( | |
| workbook_path, | |
| as_attachment=True, | |
| download_name=f"cleaned_{STATE['apply_workbook_filename'] or workbook_path.name}", | |
| ) | |
| def run(): | |
| job_id = request.args.get("job_id", uuid.uuid4().hex) | |
| input_path = request.args.get("input", "") | |
| sheet = request.args.get("sheet", "") | |
| output_sheet = request.args.get("output_sheet", DEFAULT_OUTPUT_SHEET_NAME) | |
| model_list = request.args.get("models", "") | |
| if not input_path or not sheet: | |
| return jsonify({"error": "Input file and source sheet are required."}), 400 | |
| try: | |
| workbook_path = resolve_allowed_path(input_path, APP_ROOT, ALLOWED_FILE_ROOTS) | |
| except ValueError as exc: | |
| return jsonify({"error": str(exc)}), 400 | |
| command = [ | |
| sys.executable, | |
| "-u", | |
| str(APP_ROOT / "main.py"), | |
| "--input", | |
| str(workbook_path), | |
| "--sheet", | |
| sheet, | |
| "--output_sheet", | |
| output_sheet, | |
| ] | |
| if model_list: | |
| command.extend(["--models", model_list]) | |
| return Response(stream_process(command, cwd=APP_ROOT, job_id=job_id), mimetype="text/event-stream") | |
| def stop(): | |
| job_id = request.args.get("job_id", "") | |
| if not stop_process(job_id): | |
| return jsonify({"stopped": False, "message": "No active run found."}), 404 | |
| return jsonify({"stopped": True}) | |
| def apply_blueprint(): | |
| input_path = request.args.get("input", "") | |
| blueprint_path = request.args.get("blueprint", "") | |
| sheet = request.args.get("sheet", DEFAULT_OUTPUT_SHEET_NAME) | |
| if not input_path or not blueprint_path or not sheet: | |
| return jsonify({"error": "Workbook, blueprint, and target sheet are required."}), 400 | |
| try: | |
| workbook_path = resolve_allowed_path(input_path, APP_ROOT, ALLOWED_FILE_ROOTS) | |
| resolved_blueprint_path = resolve_allowed_path(blueprint_path, APP_ROOT, ALLOWED_FILE_ROOTS) | |
| except ValueError as exc: | |
| return jsonify({"error": str(exc)}), 400 | |
| command = [ | |
| sys.executable, | |
| "-u", | |
| str(APP_ROOT / "apply_blueprint.py"), | |
| "--input", | |
| str(workbook_path), | |
| "--blueprint", | |
| str(resolved_blueprint_path), | |
| "--sheet", | |
| sheet, | |
| ] | |
| return Response(stream_process(command, cwd=APP_ROOT), mimetype="text/event-stream") | |
| if __name__ == "__main__": | |
| app.run(debug=False, threaded=True) | |