Pikeras commited on
Commit
abb6f17
·
verified ·
1 Parent(s): e03f31a

Create web/app.py

Browse files
Files changed (1) hide show
  1. src/web/app.py +178 -0
src/web/app.py ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import shutil
7
+ import time
8
+ from collections import defaultdict, deque
9
+ from pathlib import Path
10
+
11
+ import pandas as pd
12
+ from fastapi import BackgroundTasks, FastAPI, HTTPException, Request
13
+ from fastapi.middleware.cors import CORSMiddleware
14
+ from fastapi.responses import FileResponse, JSONResponse
15
+ from fastapi.staticfiles import StaticFiles
16
+
17
+ from web.job_store import JobStore
18
+ from web.schemas import JobPreview, JobRequest, JobSummary, JobStatus
19
+
20
+
21
+ REPO_ROOT = Path(__file__).resolve().parents[2]
22
+ STATIC_DIR = REPO_ROOT / "web" / "static"
23
+ JOBS_DIR = REPO_ROOT / "tmp" / "web_jobs"
24
+ LOG_DIR = REPO_ROOT / "logs"
25
+ LOG_DIR.mkdir(parents=True, exist_ok=True)
26
+
27
+ logging.basicConfig(
28
+ level=logging.INFO,
29
+ filename=str(LOG_DIR / "webapp.log"),
30
+ filemode="a",
31
+ format="%(asctime)s %(levelname)s %(message)s",
32
+ )
33
+ logger = logging.getLogger("equitia.web")
34
+
35
+ app = FastAPI(title="EQUITIA Web API", version="0.1.0")
36
+ app.add_middleware(
37
+ CORSMiddleware,
38
+ allow_origins=["*"],
39
+ allow_credentials=False,
40
+ allow_methods=["*"],
41
+ allow_headers=["*"],
42
+ )
43
+ MAX_PENDING_JOBS = int(os.getenv("EQUITIA_MAX_PENDING_JOBS", "20"))
44
+ RETENTION_MINUTES = int(os.getenv("EQUITIA_RETENTION_MINUTES", "60"))
45
+ job_store = JobStore(JOBS_DIR, max_pendientes=MAX_PENDING_JOBS, retention_minutes=RETENTION_MINUTES)
46
+
47
+
48
+ MAX_BODY_BYTES = 300_000
49
+ RATE_LIMIT_WINDOW = 60
50
+ RATE_LIMIT_REQUESTS = 30
51
+ request_buckets: dict[str, deque[float]] = defaultdict(deque)
52
+ blocked_ips: dict[str, float] = {}
53
+ BLOCK_SECONDS = 300
54
+ MAX_429_BEFORE_BLOCK = 3
55
+ rate_limit_hits: dict[str, int] = defaultdict(int)
56
+
57
+
58
+ @app.middleware("http")
59
+ async def rate_limit_and_size_guard(request: Request, call_next):
60
+ client_ip = request.client.host if request.client else "unknown"
61
+ now = time.time()
62
+
63
+ blocked_until = blocked_ips.get(client_ip)
64
+ if blocked_until and blocked_until > now:
65
+ return JSONResponse(status_code=429, content={"detail": "IP temporalmente bloqueada por exceso de uso."})
66
+ if blocked_until and blocked_until <= now:
67
+ blocked_ips.pop(client_ip, None)
68
+ rate_limit_hits.pop(client_ip, None)
69
+
70
+ bucket = request_buckets[client_ip]
71
+ while bucket and now - bucket[0] > RATE_LIMIT_WINDOW:
72
+ bucket.popleft()
73
+ if len(bucket) >= RATE_LIMIT_REQUESTS:
74
+ rate_limit_hits[client_ip] += 1
75
+ if rate_limit_hits[client_ip] >= MAX_429_BEFORE_BLOCK:
76
+ blocked_ips[client_ip] = now + BLOCK_SECONDS
77
+ return JSONResponse(status_code=429, content={"detail": "Rate limit excedido. Inténtalo más tarde."})
78
+ rate_limit_hits[client_ip] = 0
79
+ bucket.append(now)
80
+
81
+ content_length = request.headers.get("content-length")
82
+ if content_length and int(content_length) > MAX_BODY_BYTES:
83
+ return JSONResponse(status_code=413, content={"detail": "Payload demasiado grande."})
84
+
85
+ return await call_next(request)
86
+
87
+
88
+ @app.get("/api/health")
89
+ def health() -> dict[str, str]:
90
+ return {"status": "ok"}
91
+
92
+
93
+ @app.get("/api/schema/plantilla-personalizada")
94
+ def obtener_schema_plantilla() -> dict:
95
+ ruta = REPO_ROOT / "config" / "schemas" / "plantilla_general_ejemplo.json"
96
+ if not ruta.exists():
97
+ raise HTTPException(status_code=404, detail="Schema no encontrado.")
98
+ with open(ruta, "r", encoding="utf-8") as f:
99
+ return json.load(f)
100
+
101
+
102
+ @app.post("/api/jobs", response_model=JobSummary)
103
+ def crear_job(payload: JobRequest) -> JobSummary:
104
+ try:
105
+ job = job_store.create_job(payload)
106
+ logger.info("Job creado id=%s modo=%s tipo=%s", job.id, payload.modo_evaluacion, payload.tipo_evaluacion)
107
+ return JobSummary(
108
+ id=job.id,
109
+ estado=job.estado,
110
+ creado_en=job.creado_en,
111
+ actualizado_en=job.actualizado_en,
112
+ error=job.error,
113
+ )
114
+ except RuntimeError as exc:
115
+ raise HTTPException(status_code=429, detail=str(exc)) from exc
116
+
117
+
118
+ @app.get("/api/jobs/{job_id}", response_model=JobSummary)
119
+ def estado_job(job_id: str) -> JobSummary:
120
+ job = job_store.get_job(job_id)
121
+ if not job:
122
+ raise HTTPException(status_code=404, detail="Job no encontrado.")
123
+ return JobSummary(
124
+ id=job.id,
125
+ estado=job.estado,
126
+ creado_en=job.creado_en,
127
+ actualizado_en=job.actualizado_en,
128
+ error=job.error,
129
+ )
130
+
131
+
132
+ @app.get("/api/jobs/{job_id}/preview", response_model=JobPreview)
133
+ def preview_job(job_id: str) -> JobPreview:
134
+ job = job_store.get_job(job_id)
135
+ if not job:
136
+ raise HTTPException(status_code=404, detail="Job no encontrado.")
137
+
138
+ resumen = None
139
+ if job.job_dir and (job.job_dir / "resumen.json").exists():
140
+ with open(job.job_dir / "resumen.json", "r", encoding="utf-8") as f:
141
+ resumen = json.load(f)
142
+
143
+ resultados_csv = job.job_dir / "graficos" / "resultados.csv"
144
+ if resultados_csv.exists():
145
+ df = pd.read_csv(resultados_csv, sep="|")
146
+ resumen["muestra"] = df.head(10).to_dict(orient="records")
147
+
148
+ return JobPreview(id=job.id, estado=job.estado, resumen=resumen)
149
+
150
+
151
+ @app.get("/api/jobs/{job_id}/download")
152
+ def descargar_job(job_id: str, background_tasks: BackgroundTasks):
153
+ job = job_store.get_job(job_id)
154
+ if not job:
155
+ raise HTTPException(status_code=404, detail="Job no encontrado.")
156
+ if job.estado != JobStatus.FINALIZADA:
157
+ raise HTTPException(status_code=409, detail="El job no ha finalizado todavía.")
158
+ if not job.job_dir or not job.job_dir.exists():
159
+ raise HTTPException(status_code=404, detail="No se encontraron artefactos para descargar.")
160
+
161
+ zip_base = job.job_dir.parent / f"{job.id}_resultados"
162
+ zip_path = Path(shutil.make_archive(str(zip_base), "zip", str(job.job_dir)))
163
+
164
+ def _cleanup() -> None:
165
+ try:
166
+ if zip_path.exists():
167
+ zip_path.unlink(missing_ok=True)
168
+ job_store.delete_job_artifacts(job_id)
169
+ logger.info("Artefactos eliminados tras descarga job=%s", job_id)
170
+ except Exception as exc:
171
+ logger.error("Error limpiando artefactos job=%s error=%s", job_id, exc)
172
+
173
+ background_tasks.add_task(_cleanup)
174
+ return FileResponse(path=zip_path, filename=f"resultados_{job.id}.zip", media_type="application/zip")
175
+
176
+
177
+ if STATIC_DIR.exists():
178
+ app.mount("/", StaticFiles(directory=str(STATIC_DIR), html=True), name="static")