Spaces:
Sleeping
Sleeping
Upload 4 files
Browse files- Dockerfile +52 -0
- index.html +0 -0
- main.py +1341 -0
- requirements.txt +15 -0
Dockerfile
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ──────────────────────────────────────────────────────────────────
|
| 2 |
+
# Eatlytic 2.0 — Multi-stage Docker build
|
| 3 |
+
# New in v2: reportlab (PDF export), twilio (WhatsApp bot),
|
| 4 |
+
# Hindi OCR model pre-download, /health endpoint
|
| 5 |
+
# ──────────────────────────────────────────────────────────────────
|
| 6 |
+
|
| 7 |
+
# ── Stage 1: Builder ───────────────────────────────────────────────
|
| 8 |
+
FROM python:3.11-slim AS builder
|
| 9 |
+
|
| 10 |
+
WORKDIR /app
|
| 11 |
+
RUN apt-get update && apt-get install -y --no-install-recommends build-essential && \
|
| 12 |
+
rm -rf /var/lib/apt/lists/*
|
| 13 |
+
|
| 14 |
+
COPY requirements.txt .
|
| 15 |
+
RUN pip install --no-cache-dir --prefix=/install -r requirements.txt
|
| 16 |
+
|
| 17 |
+
# ── Stage 2: Runtime ───────────────────────────────────────────────
|
| 18 |
+
FROM python:3.11-slim
|
| 19 |
+
|
| 20 |
+
WORKDIR /app
|
| 21 |
+
|
| 22 |
+
# Runtime libs required by OpenCV / EasyOCR
|
| 23 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 24 |
+
libgl1 libglib2.0-0 libsm6 libxext6 libxrender1 libgomp1 curl \
|
| 25 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 26 |
+
|
| 27 |
+
# Copy installed Python packages from builder
|
| 28 |
+
COPY --from=builder /install /usr/local
|
| 29 |
+
|
| 30 |
+
# App source
|
| 31 |
+
COPY main.py .
|
| 32 |
+
COPY index.html .
|
| 33 |
+
|
| 34 |
+
# Persistent directories for cache & data
|
| 35 |
+
RUN mkdir -p /app/.cache /app/data && chmod 777 /app/.cache /app/data
|
| 36 |
+
ENV HF_HOME=/app/.cache
|
| 37 |
+
ENV PYTHONUNBUFFERED=1
|
| 38 |
+
|
| 39 |
+
# Non-root user
|
| 40 |
+
RUN useradd -m -u 1000 user && chown -R user:user /app
|
| 41 |
+
USER user
|
| 42 |
+
|
| 43 |
+
# Port
|
| 44 |
+
EXPOSE 7860
|
| 45 |
+
|
| 46 |
+
# Health check — hits the /health endpoint every 30s
|
| 47 |
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
|
| 48 |
+
CMD curl -f http://localhost:7860/health || exit 1
|
| 49 |
+
|
| 50 |
+
# Async-ready startup: 2 workers, async event loop
|
| 51 |
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", \
|
| 52 |
+
"--workers", "1", "--loop", "asyncio", "--log-level", "info"]
|
index.html
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
main.py
ADDED
|
@@ -0,0 +1,1341 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
import io
|
| 3 |
+
import json
|
| 4 |
+
import logging
|
| 5 |
+
import hashlib
|
| 6 |
+
import base64
|
| 7 |
+
import secrets
|
| 8 |
+
import datetime
|
| 9 |
+
import easyocr
|
| 10 |
+
import cv2
|
| 11 |
+
import numpy as np
|
| 12 |
+
from PIL import Image, ImageDraw, ImageFont, ImageFilter, ImageEnhance
|
| 13 |
+
from io import BytesIO
|
| 14 |
+
from fastapi import FastAPI, File, UploadFile, Form, Request, HTTPException, Security
|
| 15 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 16 |
+
from fastapi.responses import FileResponse, JSONResponse, Response
|
| 17 |
+
from fastapi.security import APIKeyHeader
|
| 18 |
+
from duckduckgo_search import DDGS
|
| 19 |
+
from groq import Groq
|
| 20 |
+
from slowapi import Limiter, _rate_limit_exceeded_handler
|
| 21 |
+
from slowapi.util import get_remote_address
|
| 22 |
+
from slowapi.errors import RateLimitExceeded
|
| 23 |
+
|
| 24 |
+
logging.basicConfig(level=logging.INFO)
|
| 25 |
+
logger = logging.getLogger(__name__)
|
| 26 |
+
|
| 27 |
+
# --- CONFIGURATION ---
|
| 28 |
+
limiter = Limiter(key_func=get_remote_address)
|
| 29 |
+
app = FastAPI(title="Eatlytic: Startup Scale")
|
| 30 |
+
app.state.limiter = limiter
|
| 31 |
+
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
| 32 |
+
app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
|
| 33 |
+
|
| 34 |
+
# --- PERSISTENT STORAGE ---
|
| 35 |
+
DATA_DIR = os.path.join(os.getcwd(), "data")
|
| 36 |
+
CACHE_DIR = os.environ.get("HF_HOME", "/app/.cache")
|
| 37 |
+
MODEL_DIR = os.path.join(CACHE_DIR, "easyocr_models")
|
| 38 |
+
|
| 39 |
+
for d in [MODEL_DIR, DATA_DIR]:
|
| 40 |
+
if not os.path.exists(d):
|
| 41 |
+
os.makedirs(d)
|
| 42 |
+
|
| 43 |
+
# --- CACHE SETUP ---
|
| 44 |
+
OCR_CACHE_FILE = os.path.join(DATA_DIR, "ocr_cache.json")
|
| 45 |
+
AI_CACHE_FILE = os.path.join(DATA_DIR, "ai_cache.json")
|
| 46 |
+
|
| 47 |
+
def load_cache(file_path):
|
| 48 |
+
if os.path.exists(file_path):
|
| 49 |
+
try:
|
| 50 |
+
with open(file_path, "r") as f:
|
| 51 |
+
return json.load(f)
|
| 52 |
+
except:
|
| 53 |
+
return {}
|
| 54 |
+
return {}
|
| 55 |
+
|
| 56 |
+
def save_cache(cache, file_path):
|
| 57 |
+
try:
|
| 58 |
+
with open(file_path, "w") as f:
|
| 59 |
+
json.dump(cache, f)
|
| 60 |
+
except:
|
| 61 |
+
pass
|
| 62 |
+
|
| 63 |
+
ocr_cache = load_cache(OCR_CACHE_FILE)
|
| 64 |
+
ai_cache = load_cache(AI_CACHE_FILE)
|
| 65 |
+
|
| 66 |
+
# --- SCAN LIMITS & API KEYS (Task 11 + 13) ---
|
| 67 |
+
SCAN_LIMIT_FILE = os.path.join(DATA_DIR, "scan_limits.json")
|
| 68 |
+
API_KEYS_FILE = os.path.join(DATA_DIR, "api_keys.json")
|
| 69 |
+
FREE_SCAN_LIMIT = 10
|
| 70 |
+
|
| 71 |
+
def load_scan_limits():
|
| 72 |
+
if os.path.exists(SCAN_LIMIT_FILE):
|
| 73 |
+
try:
|
| 74 |
+
with open(SCAN_LIMIT_FILE) as f: return json.load(f)
|
| 75 |
+
except: return {}
|
| 76 |
+
return {}
|
| 77 |
+
|
| 78 |
+
def save_scan_limits(data):
|
| 79 |
+
try:
|
| 80 |
+
with open(SCAN_LIMIT_FILE, "w") as f: json.dump(data, f)
|
| 81 |
+
except: pass
|
| 82 |
+
|
| 83 |
+
def load_api_keys():
|
| 84 |
+
if os.path.exists(API_KEYS_FILE):
|
| 85 |
+
try:
|
| 86 |
+
with open(API_KEYS_FILE) as f: return json.load(f)
|
| 87 |
+
except: return {}
|
| 88 |
+
return {}
|
| 89 |
+
|
| 90 |
+
def save_api_keys(data):
|
| 91 |
+
try:
|
| 92 |
+
with open(API_KEYS_FILE, "w") as f: json.dump(data, f)
|
| 93 |
+
except: pass
|
| 94 |
+
|
| 95 |
+
scan_limits = load_scan_limits()
|
| 96 |
+
api_keys_db = load_api_keys()
|
| 97 |
+
|
| 98 |
+
# --- API KEY AUTH (Task 13) ---
|
| 99 |
+
api_key_header = APIKeyHeader(name="X-API-Key", auto_error=False)
|
| 100 |
+
|
| 101 |
+
def verify_api_key(api_key: str = Security(api_key_header)):
|
| 102 |
+
if not api_key: return None
|
| 103 |
+
return api_keys_db.get(api_key)
|
| 104 |
+
|
| 105 |
+
def generate_api_key(client_name: str, plan: str = "business") -> str:
|
| 106 |
+
key = "eak_" + secrets.token_urlsafe(32)
|
| 107 |
+
api_keys_db[key] = {
|
| 108 |
+
"name": client_name, "plan": plan,
|
| 109 |
+
"scans_this_month": 0, "month": "", "active": True
|
| 110 |
+
}
|
| 111 |
+
save_api_keys(api_keys_db)
|
| 112 |
+
return key
|
| 113 |
+
|
| 114 |
+
# --- DEVICE FINGERPRINT + SCAN GATE (Task 11) ---
|
| 115 |
+
def get_device_key(request: Request) -> str:
|
| 116 |
+
ip = request.client.host if request.client else "unknown"
|
| 117 |
+
ua = request.headers.get("user-agent", "")
|
| 118 |
+
return hashlib.md5(f"{ip}:{ua}".encode()).hexdigest()[:16]
|
| 119 |
+
|
| 120 |
+
def check_and_increment_scan(device_key: str) -> dict:
|
| 121 |
+
month_key = datetime.date.today().isoformat()[:7]
|
| 122 |
+
if device_key not in scan_limits:
|
| 123 |
+
scan_limits[device_key] = {}
|
| 124 |
+
u = scan_limits[device_key]
|
| 125 |
+
if u.get("month") != month_key:
|
| 126 |
+
u["month"] = month_key; u["count"] = 0; u["pro"] = u.get("pro", False)
|
| 127 |
+
if u.get("pro"):
|
| 128 |
+
return {"allowed": True, "scans_used": u["count"], "scans_remaining": 9999, "is_pro": True}
|
| 129 |
+
if u["count"] >= FREE_SCAN_LIMIT:
|
| 130 |
+
return {"allowed": False, "scans_used": u["count"], "scans_remaining": 0, "is_pro": False}
|
| 131 |
+
u["count"] += 1
|
| 132 |
+
save_scan_limits(scan_limits)
|
| 133 |
+
return {"allowed": True, "scans_used": u["count"],
|
| 134 |
+
"scans_remaining": FREE_SCAN_LIMIT - u["count"], "is_pro": False}
|
| 135 |
+
|
| 136 |
+
# --- CLIENTS ---
|
| 137 |
+
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
| 138 |
+
if not GROQ_API_KEY:
|
| 139 |
+
logger.warning("⚠️ GROQ_API_KEY missing! App will fail.")
|
| 140 |
+
client = None
|
| 141 |
+
else:
|
| 142 |
+
client = Groq(api_key=GROQ_API_KEY)
|
| 143 |
+
|
| 144 |
+
reader = easyocr.Reader(['en', 'ch_sim'], gpu=False, model_storage_directory=MODEL_DIR)
|
| 145 |
+
|
| 146 |
+
# --- MULTI-LANGUAGE READER CACHE (Task 17) ---
|
| 147 |
+
# Readers are expensive to load; cache by language group
|
| 148 |
+
_LANG_READERS: dict = {}
|
| 149 |
+
_EASYOCR_LANG_MAP = {
|
| 150 |
+
"en": ["en"],
|
| 151 |
+
"hi": ["en", "hi"],
|
| 152 |
+
"zh": ["en", "ch_sim"],
|
| 153 |
+
"ta": ["en", "ta"],
|
| 154 |
+
"te": ["en", "te"],
|
| 155 |
+
"bn": ["en", "bn"],
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
def get_reader_for(lang_hint: str):
|
| 159 |
+
langs = _EASYOCR_LANG_MAP.get(lang_hint, ["en"])
|
| 160 |
+
key = "_".join(sorted(langs))
|
| 161 |
+
if key not in _LANG_READERS:
|
| 162 |
+
logger.info(f"Loading EasyOCR reader for langs: {langs}")
|
| 163 |
+
_LANG_READERS[key] = easyocr.Reader(
|
| 164 |
+
langs, gpu=False, model_storage_directory=MODEL_DIR)
|
| 165 |
+
return _LANG_READERS[key]
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
# SECTION 1: MULTI-METHOD BLUR DETECTION
|
| 169 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 170 |
+
|
| 171 |
+
def _laplacian_score(gray: np.ndarray) -> float:
|
| 172 |
+
"""
|
| 173 |
+
Laplacian variance — high sensitivity to edges.
|
| 174 |
+
Scores below ~100 typically indicate blur.
|
| 175 |
+
"""
|
| 176 |
+
return float(cv2.Laplacian(gray, cv2.CV_64F).var())
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def _tenengrad_score(gray: np.ndarray) -> float:
|
| 180 |
+
"""
|
| 181 |
+
Tenengrad — sum of squared Sobel gradient magnitudes.
|
| 182 |
+
Very robust for detecting out-of-focus / motion blur.
|
| 183 |
+
Normalised to image pixel count for size-independence.
|
| 184 |
+
"""
|
| 185 |
+
gx = cv2.Sobel(gray, cv2.CV_64F, 1, 0, ksize=3)
|
| 186 |
+
gy = cv2.Sobel(gray, cv2.CV_64F, 0, 1, ksize=3)
|
| 187 |
+
mag = gx ** 2 + gy ** 2
|
| 188 |
+
return float(np.mean(mag))
|
| 189 |
+
|
| 190 |
+
|
| 191 |
+
def _brenner_score(gray: np.ndarray) -> float:
|
| 192 |
+
"""
|
| 193 |
+
Brenner gradient — fast and sensitive to fine text edges.
|
| 194 |
+
Computed as mean squared difference between pixels 2 apart.
|
| 195 |
+
"""
|
| 196 |
+
diff = gray[:, 2:].astype(np.float64) - gray[:, :-2].astype(np.float64)
|
| 197 |
+
return float(np.mean(diff ** 2))
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
def _local_blur_map(gray: np.ndarray, block: int = 64) -> float:
|
| 201 |
+
"""
|
| 202 |
+
Divide the image into blocks and compute Laplacian variance per block.
|
| 203 |
+
Returns the median block score — this prevents bright/dark corners
|
| 204 |
+
from inflating the global blur score on a mostly-blurry image.
|
| 205 |
+
"""
|
| 206 |
+
h, w = gray.shape
|
| 207 |
+
scores = []
|
| 208 |
+
for y in range(0, h - block, block):
|
| 209 |
+
for x in range(0, w - block, block):
|
| 210 |
+
patch = gray[y:y + block, x:x + block]
|
| 211 |
+
scores.append(cv2.Laplacian(patch, cv2.CV_64F).var())
|
| 212 |
+
return float(np.median(scores)) if scores else 0.0
|
| 213 |
+
|
| 214 |
+
|
| 215 |
+
def assess_image_quality(content: bytes) -> dict:
|
| 216 |
+
"""
|
| 217 |
+
Multi-method blur detection combining:
|
| 218 |
+
• Laplacian variance (global)
|
| 219 |
+
• Tenengrad (gradient energy)
|
| 220 |
+
• Brenner gradient (text sensitivity)
|
| 221 |
+
• Local block-median (spatial robustness)
|
| 222 |
+
Returns a rich quality dict with per-method scores.
|
| 223 |
+
"""
|
| 224 |
+
try:
|
| 225 |
+
img = Image.open(BytesIO(content)).convert("RGB")
|
| 226 |
+
img_np = np.array(img)
|
| 227 |
+
gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
|
| 228 |
+
|
| 229 |
+
lap = _laplacian_score(gray)
|
| 230 |
+
ten = _tenengrad_score(gray)
|
| 231 |
+
bren = _brenner_score(gray)
|
| 232 |
+
local = _local_blur_map(gray)
|
| 233 |
+
|
| 234 |
+
# Normalise scores to 0-100 for consistent comparison
|
| 235 |
+
# Thresholds tuned against a food-label test set
|
| 236 |
+
lap_norm = min(lap / 300.0 * 100, 100)
|
| 237 |
+
ten_norm = min(ten / 500.0 * 100, 100)
|
| 238 |
+
bren_norm = min(bren / 200.0 * 100, 100)
|
| 239 |
+
local_norm = min(local / 300.0 * 100, 100)
|
| 240 |
+
|
| 241 |
+
# Weighted composite: local_median carries the most weight
|
| 242 |
+
composite = (
|
| 243 |
+
0.25 * lap_norm +
|
| 244 |
+
0.20 * ten_norm +
|
| 245 |
+
0.20 * bren_norm +
|
| 246 |
+
0.35 * local_norm
|
| 247 |
+
)
|
| 248 |
+
|
| 249 |
+
# Blur severity bands
|
| 250 |
+
if composite < 15:
|
| 251 |
+
severity = "severe"
|
| 252 |
+
is_blurry = True
|
| 253 |
+
elif composite < 35:
|
| 254 |
+
severity = "moderate"
|
| 255 |
+
is_blurry = True
|
| 256 |
+
elif composite < 55:
|
| 257 |
+
severity = "mild"
|
| 258 |
+
is_blurry = True # still attempt enhancement
|
| 259 |
+
else:
|
| 260 |
+
severity = "none"
|
| 261 |
+
is_blurry = False
|
| 262 |
+
|
| 263 |
+
quality = "poor" if composite < 35 else ("fair" if composite < 55 else "good")
|
| 264 |
+
|
| 265 |
+
return {
|
| 266 |
+
"blur_score" : round(composite, 2),
|
| 267 |
+
"laplacian_score" : round(lap, 2),
|
| 268 |
+
"tenengrad_score" : round(ten, 2),
|
| 269 |
+
"brenner_score" : round(bren, 2),
|
| 270 |
+
"local_median_score": round(local, 2),
|
| 271 |
+
"is_blurry" : is_blurry,
|
| 272 |
+
"blur_severity" : severity,
|
| 273 |
+
"quality" : quality,
|
| 274 |
+
}
|
| 275 |
+
except Exception as e:
|
| 276 |
+
logger.error(f"Blur detection error: {e}")
|
| 277 |
+
return {
|
| 278 |
+
"blur_score": 999, "is_blurry": False,
|
| 279 |
+
"blur_severity": "unknown", "quality": "unknown",
|
| 280 |
+
}
|
| 281 |
+
|
| 282 |
+
|
| 283 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 284 |
+
# SECTION 2: DEBLURRING & IMAGE ENHANCEMENT PIPELINE
|
| 285 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 286 |
+
|
| 287 |
+
def _wiener_deconvolution(gray: np.ndarray, psf_size: int = 5,
|
| 288 |
+
noise_ratio: float = 0.02) -> np.ndarray:
|
| 289 |
+
"""
|
| 290 |
+
Blind Wiener deconvolution using an estimated Gaussian PSF.
|
| 291 |
+
Works in the frequency domain:
|
| 292 |
+
restored = (H* / (|H|^2 + K)) * Y
|
| 293 |
+
where H = FFT of the PSF, Y = FFT of the blurred image, K = noise ratio.
|
| 294 |
+
Effective for Gaussian and mild motion blur.
|
| 295 |
+
"""
|
| 296 |
+
# Clamp PSF size to valid odd numbers
|
| 297 |
+
psf_size = max(3, psf_size | 1)
|
| 298 |
+
|
| 299 |
+
# Build Gaussian PSF
|
| 300 |
+
psf = cv2.getGaussianKernel(psf_size, psf_size / 3.0)
|
| 301 |
+
psf = psf @ psf.T
|
| 302 |
+
psf /= psf.sum()
|
| 303 |
+
|
| 304 |
+
h, w = gray.shape
|
| 305 |
+
psf_padded = np.zeros_like(gray, dtype=np.float64)
|
| 306 |
+
ph, pw = psf.shape
|
| 307 |
+
psf_padded[:ph, :pw] = psf
|
| 308 |
+
|
| 309 |
+
# Roll to centre the PSF
|
| 310 |
+
psf_padded = np.roll(psf_padded, -ph // 2, axis=0)
|
| 311 |
+
psf_padded = np.roll(psf_padded, -pw // 2, axis=1)
|
| 312 |
+
|
| 313 |
+
# Frequency-domain Wiener filter
|
| 314 |
+
Y = np.fft.fft2(gray.astype(np.float64) / 255.0)
|
| 315 |
+
H = np.fft.fft2(psf_padded)
|
| 316 |
+
H_conj = np.conj(H)
|
| 317 |
+
W = H_conj / (np.abs(H) ** 2 + noise_ratio)
|
| 318 |
+
restored = np.real(np.fft.ifft2(W * Y))
|
| 319 |
+
|
| 320 |
+
# Normalise to uint8
|
| 321 |
+
restored = np.clip(restored * 255.0, 0, 255).astype(np.uint8)
|
| 322 |
+
return restored
|
| 323 |
+
|
| 324 |
+
|
| 325 |
+
def _unsharp_mask(img_np: np.ndarray, strength: float = 1.5,
|
| 326 |
+
radius: int = 3) -> np.ndarray:
|
| 327 |
+
"""
|
| 328 |
+
Unsharp masking: sharpened = original + strength * (original − blurred)
|
| 329 |
+
Works on colour images; more robust and artefact-free than Wiener for
|
| 330 |
+
already near-sharp images.
|
| 331 |
+
"""
|
| 332 |
+
blurred = cv2.GaussianBlur(img_np, (radius * 2 + 1, radius * 2 + 1), 0)
|
| 333 |
+
mask = cv2.subtract(img_np.astype(np.int16), blurred.astype(np.int16))
|
| 334 |
+
sharp = np.clip(img_np.astype(np.float32) + strength * mask, 0, 255)
|
| 335 |
+
return sharp.astype(np.uint8)
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
def _apply_clahe(img_np: np.ndarray,
|
| 339 |
+
clip: float = 2.5, tile: int = 8) -> np.ndarray:
|
| 340 |
+
"""
|
| 341 |
+
CLAHE (Contrast-Limited Adaptive Histogram Equalisation) applied to the
|
| 342 |
+
L-channel of LAB colour space. Preserves hue / saturation while
|
| 343 |
+
dramatically improving local contrast in dim or washed-out images.
|
| 344 |
+
"""
|
| 345 |
+
lab = cv2.cvtColor(img_np, cv2.COLOR_RGB2LAB)
|
| 346 |
+
clahe = cv2.createCLAHE(clipLimit=clip, tileGridSize=(tile, tile))
|
| 347 |
+
lab[:, :, 0] = clahe.apply(lab[:, :, 0])
|
| 348 |
+
return cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
def _denoise(img_np: np.ndarray, h: int = 6) -> np.ndarray:
|
| 352 |
+
"""
|
| 353 |
+
Non-local means denoising. Removes sensor/JPEG noise that unsharp masking
|
| 354 |
+
would otherwise amplify, giving cleaner text edges post-sharpening.
|
| 355 |
+
"""
|
| 356 |
+
bgr = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
|
| 357 |
+
bgr_denoised = cv2.fastNlMeansDenoisingColored(bgr, None, h, h, 7, 21)
|
| 358 |
+
return cv2.cvtColor(bgr_denoised, cv2.COLOR_BGR2RGB)
|
| 359 |
+
|
| 360 |
+
|
| 361 |
+
def deblur_and_enhance(content: bytes, severity: str = "moderate") -> tuple[bytes, str]:
|
| 362 |
+
"""
|
| 363 |
+
Full deblurring & enhancement pipeline. Returns (enhanced_bytes, method_log).
|
| 364 |
+
Pipeline stages (applied in order):
|
| 365 |
+
1. Upscale small images to improve OCR accuracy.
|
| 366 |
+
2. Denoise (mild NLM pass).
|
| 367 |
+
3. Wiener deconvolution on grey channel — removes Gaussian/defocus blur.
|
| 368 |
+
4. Colour unsharp masking — sharpens edges/text.
|
| 369 |
+
5. CLAHE — restores contrast in dark areas.
|
| 370 |
+
6. Final light sharpening pass.
|
| 371 |
+
Strength is tuned to blur severity:
|
| 372 |
+
severe → aggressive PSF + strong unsharp
|
| 373 |
+
moderate → standard settings
|
| 374 |
+
mild → gentle enhancement only
|
| 375 |
+
"""
|
| 376 |
+
img = Image.open(BytesIO(content)).convert("RGB")
|
| 377 |
+
img_np = np.array(img)
|
| 378 |
+
methods_used = []
|
| 379 |
+
|
| 380 |
+
# ── Stage 1: Upscale if too small ──────────────────────────────────
|
| 381 |
+
h, w = img_np.shape[:2]
|
| 382 |
+
target_short = 1200
|
| 383 |
+
short_side = min(h, w)
|
| 384 |
+
if short_side < target_short:
|
| 385 |
+
scale = target_short / short_side
|
| 386 |
+
new_h = int(h * scale)
|
| 387 |
+
new_w = int(w * scale)
|
| 388 |
+
img_np = cv2.resize(img_np, (new_w, new_h),
|
| 389 |
+
interpolation=cv2.INTER_LANCZOS4)
|
| 390 |
+
methods_used.append(f"upscale({new_w}×{new_h})")
|
| 391 |
+
|
| 392 |
+
# ── Stage 2: Denoise ──────────────────────────────────────────────
|
| 393 |
+
if severity in ("severe", "moderate"):
|
| 394 |
+
h_param = 8 if severity == "severe" else 5
|
| 395 |
+
img_np = _denoise(img_np, h=h_param)
|
| 396 |
+
methods_used.append(f"NLM-denoise(h={h_param})")
|
| 397 |
+
|
| 398 |
+
# ── Stage 3: Wiener deconvolution (grey channel) ───────────────────
|
| 399 |
+
if severity != "mild":
|
| 400 |
+
gray = cv2.cvtColor(img_np, cv2.COLOR_RGB2GRAY)
|
| 401 |
+
psf_size = 9 if severity == "severe" else 5
|
| 402 |
+
noise_ratio = 0.01 if severity == "severe" else 0.025
|
| 403 |
+
restored = _wiener_deconvolution(gray, psf_size, noise_ratio)
|
| 404 |
+
# Blend restored grey back: convert to LAB, replace L
|
| 405 |
+
lab = cv2.cvtColor(img_np, cv2.COLOR_RGB2LAB)
|
| 406 |
+
lab[:, :, 0] = restored
|
| 407 |
+
img_np = cv2.cvtColor(lab, cv2.COLOR_LAB2RGB)
|
| 408 |
+
methods_used.append(f"Wiener(psf={psf_size},K={noise_ratio})")
|
| 409 |
+
|
| 410 |
+
# ── Stage 4: Unsharp masking ───────────────────────────────────────
|
| 411 |
+
strength_map = {"severe": 2.2, "moderate": 1.8, "mild": 1.2}
|
| 412 |
+
radius_map = {"severe": 4, "moderate": 3, "mild": 2}
|
| 413 |
+
strength = strength_map.get(severity, 1.8)
|
| 414 |
+
radius = radius_map.get(severity, 3)
|
| 415 |
+
img_np = _unsharp_mask(img_np, strength=strength, radius=radius)
|
| 416 |
+
methods_used.append(f"unsharp(s={strength},r={radius})")
|
| 417 |
+
|
| 418 |
+
# ── Stage 5: CLAHE contrast enhancement ───────────────────────────
|
| 419 |
+
clip_map = {"severe": 3.0, "moderate": 2.5, "mild": 1.8}
|
| 420 |
+
clip = clip_map.get(severity, 2.5)
|
| 421 |
+
img_np = _apply_clahe(img_np, clip=clip)
|
| 422 |
+
methods_used.append(f"CLAHE(clip={clip})")
|
| 423 |
+
|
| 424 |
+
# ── Stage 6: Mild final sharpening pass ───────────────────────────
|
| 425 |
+
sharpen_kernel = np.array([[0, -0.3, 0],
|
| 426 |
+
[-0.3, 2.2, -0.3],
|
| 427 |
+
[0, -0.3, 0]], dtype=np.float32)
|
| 428 |
+
img_np = cv2.filter2D(img_np, -1, sharpen_kernel)
|
| 429 |
+
img_np = np.clip(img_np, 0, 255).astype(np.uint8)
|
| 430 |
+
methods_used.append("sharpen-kernel")
|
| 431 |
+
|
| 432 |
+
# ── Encode to bytes ───────────────────────────────────────────────
|
| 433 |
+
pil_out = Image.fromarray(img_np)
|
| 434 |
+
buf = BytesIO()
|
| 435 |
+
pil_out.save(buf, format="JPEG", quality=92)
|
| 436 |
+
return buf.getvalue(), " → ".join(methods_used)
|
| 437 |
+
|
| 438 |
+
|
| 439 |
+
def image_to_b64(content: bytes) -> str:
|
| 440 |
+
"""Convert raw image bytes to a base-64 data-URL for front-end display."""
|
| 441 |
+
return "data:image/jpeg;base64," + base64.b64encode(content).decode()
|
| 442 |
+
|
| 443 |
+
|
| 444 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 445 |
+
# SECTION 3: OCR QUALITY COMPARISON HELPER
|
| 446 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 447 |
+
|
| 448 |
+
def _ocr_quality_score(ocr_result: dict) -> float:
|
| 449 |
+
"""
|
| 450 |
+
Score an OCR result for quality comparison.
|
| 451 |
+
Higher is better. Used to choose original vs deblurred image.
|
| 452 |
+
"""
|
| 453 |
+
return (ocr_result.get("word_count", 0) * 0.6 +
|
| 454 |
+
ocr_result.get("avg_confidence", 0) * 100 * 0.4)
|
| 455 |
+
|
| 456 |
+
|
| 457 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 458 |
+
# SECTION 4: LABEL CONTENT DETECTION (unchanged logic, kept intact)
|
| 459 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 460 |
+
|
| 461 |
+
LABEL_KEYWORDS = [
|
| 462 |
+
'ingredients', 'nutrition', 'nutritional', 'calories', 'calorie',
|
| 463 |
+
'protein', 'fat', 'carbohydrate', 'carbs', 'sodium', 'sugar', 'sugars',
|
| 464 |
+
'fiber', 'fibre', 'serving', 'cholesterol', 'saturated', 'trans',
|
| 465 |
+
'vitamin', 'calcium', 'iron', 'potassium', 'per 100g', 'per 100 g',
|
| 466 |
+
'daily value', 'daily values', 'amount per', 'total fat',
|
| 467 |
+
'contains', 'may contain', 'preservative', 'flavour', 'flavor',
|
| 468 |
+
'colour', 'color', 'emulsifier', 'stabilizer', 'antioxidant',
|
| 469 |
+
'wheat', 'milk', 'soy', 'salt', 'water', 'oil', 'starch', 'extract',
|
| 470 |
+
'mg', 'mcg', 'kcal', 'kj', '% dv', '%dv', 'g per', 'per serving',
|
| 471 |
+
'fssai', 'veg', 'non-veg', 'best before', 'mfg', 'mrp', 'net wt',
|
| 472 |
+
'manufactured', 'packed', 'distributed',
|
| 473 |
+
]
|
| 474 |
+
|
| 475 |
+
FRONT_PACK_SIGNALS = [
|
| 476 |
+
'new', 'improved', 'original', 'classic', 'natural', 'organic',
|
| 477 |
+
'premium', 'delicious', 'flavoured', 'variety', 'crunchy', 'crispy',
|
| 478 |
+
'fresh', 'tasty', 'yummy', 'light', 'baked', 'roasted',
|
| 479 |
+
]
|
| 480 |
+
|
| 481 |
+
# BUG FIX: words like 'wheat','milk','salt','oil' are in LABEL_KEYWORDS but
|
| 482 |
+
# also appear on the FRONT of a pack. These NUTRITION TABLE ANCHORS are specific
|
| 483 |
+
# to the back label — at least 2 must be present to confirm a nutrition panel.
|
| 484 |
+
NUTRITION_TABLE_ANCHORS = [
|
| 485 |
+
'per 100g', 'per 100 g', 'per serving', 'serving size', 'amount per',
|
| 486 |
+
'daily value', 'daily values', '% dv', '%dv',
|
| 487 |
+
'calories', 'calorie', 'kcal', 'kj', 'energy',
|
| 488 |
+
'nutrition facts', 'nutritional information', 'nutrition information',
|
| 489 |
+
'total fat', 'saturated fat', 'trans fat',
|
| 490 |
+
'total carbohydrate', 'dietary fiber', 'total sugars',
|
| 491 |
+
'ingredients:', 'ingredients list',
|
| 492 |
+
'fssai', 'best before', 'mfg', 'mrp', 'net wt',
|
| 493 |
+
]
|
| 494 |
+
|
| 495 |
+
|
| 496 |
+
def detect_label_presence(ocr_text: str) -> dict:
|
| 497 |
+
if not ocr_text:
|
| 498 |
+
return {'has_label': False, 'confidence': 'high',
|
| 499 |
+
'label_hits': [], 'front_hits': [], 'suggestion': 'no_text'}
|
| 500 |
+
|
| 501 |
+
text_lower = ocr_text.lower()
|
| 502 |
+
label_hits = [kw for kw in LABEL_KEYWORDS if kw in text_lower]
|
| 503 |
+
front_hits = [kw for kw in FRONT_PACK_SIGNALS if kw in text_lower]
|
| 504 |
+
# Count how many nutrition-table-specific anchors are present
|
| 505 |
+
anchor_hits = [kw for kw in NUTRITION_TABLE_ANCHORS if kw in text_lower]
|
| 506 |
+
|
| 507 |
+
label_score = len(label_hits)
|
| 508 |
+
front_score = len(front_hits)
|
| 509 |
+
anchor_score = len(anchor_hits)
|
| 510 |
+
|
| 511 |
+
# BUG FIX: require at least 2 nutrition-table anchors to confirm a back label.
|
| 512 |
+
# Without this, front-of-pack images that mention "wheat / milk / salt / oil"
|
| 513 |
+
# reached label_score >= 3 and were incorrectly analysed.
|
| 514 |
+
has_nutrition_table = anchor_score >= 2
|
| 515 |
+
|
| 516 |
+
if has_nutrition_table and label_score >= 3:
|
| 517 |
+
return {'has_label': True,
|
| 518 |
+
'confidence': 'high' if label_score >= 6 else 'medium',
|
| 519 |
+
'label_hits': label_hits[:5], 'front_hits': front_hits[:3],
|
| 520 |
+
'suggestion': None}
|
| 521 |
+
elif has_nutrition_table and label_score >= 1 and front_score <= 2:
|
| 522 |
+
return {'has_label': True, 'confidence': 'low',
|
| 523 |
+
'label_hits': label_hits, 'front_hits': front_hits,
|
| 524 |
+
'suggestion': None}
|
| 525 |
+
elif front_score > label_score or not has_nutrition_table:
|
| 526 |
+
# If no nutrition table found at all → likely wrong side
|
| 527 |
+
suggestion = 'wrong_side' if front_score > 0 or not has_nutrition_table else 'no_label'
|
| 528 |
+
return {'has_label': False, 'confidence': 'high',
|
| 529 |
+
'label_hits': label_hits, 'front_hits': front_hits[:3],
|
| 530 |
+
'suggestion': suggestion}
|
| 531 |
+
else:
|
| 532 |
+
return {'has_label': True, 'confidence': 'low',
|
| 533 |
+
'label_hits': label_hits, 'front_hits': front_hits,
|
| 534 |
+
'suggestion': 'partial'}
|
| 535 |
+
|
| 536 |
+
|
| 537 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 538 |
+
# SECTION 5: OCR
|
| 539 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 540 |
+
|
| 541 |
+
def get_server_ocr(content: bytes, lang_hint: str = "en") -> dict:
|
| 542 |
+
img_hash = hashlib.md5(content).hexdigest()
|
| 543 |
+
cache_key = f"{img_hash}_{lang_hint}"
|
| 544 |
+
if cache_key in ocr_cache:
|
| 545 |
+
return ocr_cache[cache_key]
|
| 546 |
+
|
| 547 |
+
img = Image.open(BytesIO(content)).convert("RGB")
|
| 548 |
+
img.thumbnail((1200, 1200))
|
| 549 |
+
img_np = np.array(img)
|
| 550 |
+
|
| 551 |
+
active_reader = get_reader_for(lang_hint)
|
| 552 |
+
results = active_reader.readtext(img_np, detail=1)
|
| 553 |
+
words = [r[1] for r in results]
|
| 554 |
+
confidences = [r[2] for r in results]
|
| 555 |
+
text = " ".join(words)
|
| 556 |
+
avg_conf = sum(confidences) / len(confidences) if confidences else 0
|
| 557 |
+
word_count = len(words)
|
| 558 |
+
|
| 559 |
+
result = {
|
| 560 |
+
"text" : text,
|
| 561 |
+
"word_count" : word_count,
|
| 562 |
+
"avg_confidence": round(avg_conf, 3),
|
| 563 |
+
"is_readable" : word_count >= 3 and avg_conf > 0.15,
|
| 564 |
+
}
|
| 565 |
+
ocr_cache[cache_key] = result
|
| 566 |
+
save_cache(ocr_cache, OCR_CACHE_FILE)
|
| 567 |
+
return result
|
| 568 |
+
|
| 569 |
+
|
| 570 |
+
|
| 571 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 572 |
+
# SECTION 6: WEB SEARCH & UTILITIES
|
| 573 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 574 |
+
|
| 575 |
+
def get_live_search(query: str) -> str:
|
| 576 |
+
try:
|
| 577 |
+
with DDGS() as ddgs:
|
| 578 |
+
results = [f"{r['title']}: {r['body']}"
|
| 579 |
+
for r in ddgs.text(query, max_results=3)]
|
| 580 |
+
return "\n".join(results)
|
| 581 |
+
except:
|
| 582 |
+
return "No web data available."
|
| 583 |
+
|
| 584 |
+
|
| 585 |
+
LANGUAGE_MAP = {
|
| 586 |
+
"en": "English",
|
| 587 |
+
"zh": "Simplified Chinese (简体中文)",
|
| 588 |
+
"es": "Spanish (Español)",
|
| 589 |
+
"ar": "Arabic (العربية)",
|
| 590 |
+
"fr": "French (Français)",
|
| 591 |
+
"hi": "Hindi (हिन्दी)",
|
| 592 |
+
"pt": "Portuguese (Português)",
|
| 593 |
+
"de": "German (Deutsch)",
|
| 594 |
+
}
|
| 595 |
+
|
| 596 |
+
|
| 597 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 598 |
+
# SECTION 7: ROUTES
|
| 599 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 600 |
+
|
| 601 |
+
@app.get("/")
|
| 602 |
+
async def home():
|
| 603 |
+
return FileResponse('index.html')
|
| 604 |
+
|
| 605 |
+
|
| 606 |
+
@app.post("/check-image")
|
| 607 |
+
@limiter.limit("30/minute")
|
| 608 |
+
async def check_image(request: Request, image: UploadFile = File(...)):
|
| 609 |
+
"""
|
| 610 |
+
Pre-flight image quality check.
|
| 611 |
+
Returns multi-method blur scores + severity.
|
| 612 |
+
"""
|
| 613 |
+
content = await image.read()
|
| 614 |
+
return assess_image_quality(content)
|
| 615 |
+
|
| 616 |
+
|
| 617 |
+
@app.post("/enhance-preview")
|
| 618 |
+
@limiter.limit("20/minute")
|
| 619 |
+
async def enhance_preview(request: Request, image: UploadFile = File(...)):
|
| 620 |
+
"""
|
| 621 |
+
Deblur an image and return the result as a base-64 JPEG.
|
| 622 |
+
Useful for showing the user what the enhanced image looks like
|
| 623 |
+
before running a full analysis.
|
| 624 |
+
"""
|
| 625 |
+
content = await image.read()
|
| 626 |
+
quality = assess_image_quality(content)
|
| 627 |
+
|
| 628 |
+
if not quality["is_blurry"]:
|
| 629 |
+
return JSONResponse({
|
| 630 |
+
"deblurred": False,
|
| 631 |
+
"message" : "Image is already clear — no enhancement needed.",
|
| 632 |
+
"quality" : quality,
|
| 633 |
+
})
|
| 634 |
+
|
| 635 |
+
enhanced_bytes, method_log = deblur_and_enhance(content, quality["blur_severity"])
|
| 636 |
+
b64 = image_to_b64(enhanced_bytes)
|
| 637 |
+
|
| 638 |
+
return JSONResponse({
|
| 639 |
+
"deblurred" : True,
|
| 640 |
+
"image_b64" : b64,
|
| 641 |
+
"method_log" : method_log,
|
| 642 |
+
"blur_severity": quality["blur_severity"],
|
| 643 |
+
"quality_before": quality,
|
| 644 |
+
})
|
| 645 |
+
|
| 646 |
+
|
| 647 |
+
@app.post("/ocr")
|
| 648 |
+
@limiter.limit("20/minute")
|
| 649 |
+
async def perform_ocr(
|
| 650 |
+
request: Request,
|
| 651 |
+
image: UploadFile = File(...),
|
| 652 |
+
language: str = Form("en"),
|
| 653 |
+
):
|
| 654 |
+
"""Perform OCR and return text + readability assessment."""
|
| 655 |
+
content = await image.read()
|
| 656 |
+
result = get_server_ocr(content, language)
|
| 657 |
+
return result
|
| 658 |
+
|
| 659 |
+
|
| 660 |
+
@app.post("/analyze")
|
| 661 |
+
@limiter.limit("15/minute")
|
| 662 |
+
async def analyze_product(
|
| 663 |
+
request: Request,
|
| 664 |
+
persona : str = Form(...),
|
| 665 |
+
age_group : str = Form("adult"),
|
| 666 |
+
product_category : str = Form("general"),
|
| 667 |
+
language : str = Form("en"),
|
| 668 |
+
extracted_text : str = Form(None),
|
| 669 |
+
image : UploadFile = File(...),
|
| 670 |
+
):
|
| 671 |
+
"""
|
| 672 |
+
Full nutrition-label analysis pipeline with automatic blur correction.
|
| 673 |
+
Processing steps:
|
| 674 |
+
1. Multi-method blur detection.
|
| 675 |
+
2. If blurry → deblur/enhance, then run OCR on BOTH versions and
|
| 676 |
+
keep whichever yields better text quality.
|
| 677 |
+
3. Label presence detection.
|
| 678 |
+
4. AI analysis via Groq LLM.
|
| 679 |
+
5. Returns analysis JSON with blur_info metadata.
|
| 680 |
+
"""
|
| 681 |
+
if not client:
|
| 682 |
+
return {"error": "Server Error: Missing GROQ_API_KEY in Settings"}
|
| 683 |
+
|
| 684 |
+
# ── Scan gate (Task 11) ───────────────────────────────────────────
|
| 685 |
+
device_key = get_device_key(request)
|
| 686 |
+
scan_check = check_and_increment_scan(device_key)
|
| 687 |
+
if not scan_check["allowed"]:
|
| 688 |
+
return JSONResponse(status_code=402, content={
|
| 689 |
+
"error" : "scan_limit_reached",
|
| 690 |
+
"message" : f"You've used all {FREE_SCAN_LIMIT} free scans this month.",
|
| 691 |
+
"upgrade_url" : "/pro",
|
| 692 |
+
"scans_used" : scan_check["scans_used"],
|
| 693 |
+
})
|
| 694 |
+
|
| 695 |
+
try:
|
| 696 |
+
content = await image.read()
|
| 697 |
+
|
| 698 |
+
# ── Step 1: Blur Detection ────────────────────────────────────
|
| 699 |
+
quality = assess_image_quality(content)
|
| 700 |
+
blur_info = {
|
| 701 |
+
"detected" : quality["is_blurry"],
|
| 702 |
+
"severity" : quality["blur_severity"],
|
| 703 |
+
"score" : quality["blur_score"],
|
| 704 |
+
"deblurred" : False,
|
| 705 |
+
"method_log" : None,
|
| 706 |
+
"image_b64" : None,
|
| 707 |
+
"ocr_source" : "original",
|
| 708 |
+
}
|
| 709 |
+
|
| 710 |
+
working_content = content # may be swapped for deblurred version
|
| 711 |
+
|
| 712 |
+
# ── Step 2: Conditional Deblurring ───────────────────────────
|
| 713 |
+
if quality["is_blurry"]:
|
| 714 |
+
logger.info(
|
| 715 |
+
f"Blur detected — severity={quality['blur_severity']}, "
|
| 716 |
+
f"composite_score={quality['blur_score']}"
|
| 717 |
+
)
|
| 718 |
+
try:
|
| 719 |
+
enhanced_bytes, method_log = deblur_and_enhance(
|
| 720 |
+
content, quality["blur_severity"]
|
| 721 |
+
)
|
| 722 |
+
|
| 723 |
+
# Run OCR on both and compare quality
|
| 724 |
+
ocr_orig = get_server_ocr(content, language)
|
| 725 |
+
ocr_enhanced = get_server_ocr(enhanced_bytes, language)
|
| 726 |
+
|
| 727 |
+
orig_score = _ocr_quality_score(ocr_orig)
|
| 728 |
+
enhanced_score = _ocr_quality_score(ocr_enhanced)
|
| 729 |
+
|
| 730 |
+
logger.info(
|
| 731 |
+
f"OCR quality — original: {orig_score:.1f}, "
|
| 732 |
+
f"enhanced: {enhanced_score:.1f}"
|
| 733 |
+
)
|
| 734 |
+
|
| 735 |
+
if enhanced_score >= orig_score * 0.85:
|
| 736 |
+
# Enhanced is at least 85% as good → prefer it
|
| 737 |
+
working_content = enhanced_bytes
|
| 738 |
+
blur_info["deblurred"] = True
|
| 739 |
+
blur_info["method_log"] = method_log
|
| 740 |
+
blur_info["image_b64"] = image_to_b64(enhanced_bytes)
|
| 741 |
+
blur_info["ocr_source"] = "deblurred"
|
| 742 |
+
extracted_text = None # force re-OCR from enhanced image
|
| 743 |
+
logger.info("Using deblurred image for analysis.")
|
| 744 |
+
else:
|
| 745 |
+
logger.info("Original OCR was better; keeping original.")
|
| 746 |
+
|
| 747 |
+
except Exception as e:
|
| 748 |
+
logger.warning(f"Deblurring failed, using original: {e}")
|
| 749 |
+
|
| 750 |
+
# ── Step 3: OCR ───────────────────────────────────────────────
|
| 751 |
+
if not extracted_text:
|
| 752 |
+
ocr_result = get_server_ocr(working_content, language)
|
| 753 |
+
extracted_text = ocr_result["text"]
|
| 754 |
+
ocr_word_count = ocr_result["word_count"]
|
| 755 |
+
else:
|
| 756 |
+
ocr_word_count = len(extracted_text.split())
|
| 757 |
+
|
| 758 |
+
# ── Step 3a: Hard-block if truly no text ──────────────────────
|
| 759 |
+
if not extracted_text or ocr_word_count == 0:
|
| 760 |
+
return {
|
| 761 |
+
"error" : "no_text",
|
| 762 |
+
"message": "No text found on this image. Make sure the label side is facing the camera.",
|
| 763 |
+
"tip" : "flip_product",
|
| 764 |
+
}
|
| 765 |
+
|
| 766 |
+
# ── Step 3b: Label presence check ─────────────────────────────
|
| 767 |
+
label_check = detect_label_presence(extracted_text)
|
| 768 |
+
if not label_check["has_label"]:
|
| 769 |
+
if label_check["suggestion"] == "wrong_side":
|
| 770 |
+
return {
|
| 771 |
+
"error" : "no_label",
|
| 772 |
+
"message" : "This looks like the front of the product. Please flip it over and scan the back label.",
|
| 773 |
+
"tip" : "wrong_side",
|
| 774 |
+
"front_words_found": label_check.get("front_hits", []),
|
| 775 |
+
}
|
| 776 |
+
else:
|
| 777 |
+
return {
|
| 778 |
+
"error" : "no_label",
|
| 779 |
+
"message": "Could not find nutrition or ingredient information. Please upload a clear photo of the back label.",
|
| 780 |
+
"tip" : "flip_product",
|
| 781 |
+
}
|
| 782 |
+
|
| 783 |
+
label_confidence = label_check.get("confidence", "medium")
|
| 784 |
+
|
| 785 |
+
# ── Step 4: Cache lookup ──────────────────────────────────────
|
| 786 |
+
# Cache key — v2 prefix invalidates any old cached results that had
|
| 787 |
+
# the score=7-anchor bug (they would forever return score≈6).
|
| 788 |
+
cache_key = f"v2:{language}:{persona}:{age_group}:{extracted_text[:80]}"
|
| 789 |
+
if cache_key in ai_cache:
|
| 790 |
+
cached = dict(ai_cache[cache_key])
|
| 791 |
+
cached["blur_info"] = blur_info # always inject fresh blur_info
|
| 792 |
+
return cached
|
| 793 |
+
|
| 794 |
+
# ── Step 5: Web search ─────────────────────────────────────────
|
| 795 |
+
web_context = get_live_search(
|
| 796 |
+
f"health analysis ingredients {extracted_text[:120]}"
|
| 797 |
+
)
|
| 798 |
+
|
| 799 |
+
# ── Step 6: Prompt construction ────────────────────────────────
|
| 800 |
+
lang_name = LANGUAGE_MAP.get(language, "English")
|
| 801 |
+
output_lang_instr = (
|
| 802 |
+
f"CRITICAL: Respond ENTIRELY in {lang_name}. "
|
| 803 |
+
f"Every single field value must be in {lang_name}."
|
| 804 |
+
)
|
| 805 |
+
confidence_note = (
|
| 806 |
+
"Note: label text may be partially visible. Do your best with available information and set confidence=low in response."
|
| 807 |
+
if label_confidence == "low" else ""
|
| 808 |
+
)
|
| 809 |
+
|
| 810 |
+
# Blur context for the AI — helps it interpret partially illegible text
|
| 811 |
+
blur_context = ""
|
| 812 |
+
if blur_info["detected"]:
|
| 813 |
+
if blur_info["deblurred"]:
|
| 814 |
+
blur_context = (
|
| 815 |
+
f"Note: The image was detected as {blur_info['severity']}ly blurry and "
|
| 816 |
+
f"has been enhanced using advanced deblurring. "
|
| 817 |
+
f"The OCR text was extracted from the enhanced image. "
|
| 818 |
+
f"Some characters might still be uncertain — prioritise nutrients "
|
| 819 |
+
f"and ingredients you can identify with high confidence."
|
| 820 |
+
)
|
| 821 |
+
else:
|
| 822 |
+
blur_context = (
|
| 823 |
+
f"Note: The image has some blur (severity: {blur_info['severity']}). "
|
| 824 |
+
f"OCR was run on the original image. Where text is ambiguous, "
|
| 825 |
+
f"use your domain knowledge to infer likely values."
|
| 826 |
+
)
|
| 827 |
+
|
| 828 |
+
prompt = f"""
|
| 829 |
+
[INST] You are an expert nutritional scientist and health auditor. Analyze the product label below.
|
| 830 |
+
{output_lang_instr}
|
| 831 |
+
Target Persona: {persona}
|
| 832 |
+
Age Group: {age_group}
|
| 833 |
+
Product Category: {product_category}
|
| 834 |
+
{confidence_note}
|
| 835 |
+
{blur_context}
|
| 836 |
+
Label Text: "{extracted_text}"
|
| 837 |
+
Web Context: "{web_context}"
|
| 838 |
+
|
| 839 |
+
Return ONLY valid JSON — no markdown, no preamble — with this exact structure:
|
| 840 |
+
{{
|
| 841 |
+
"product_name": "Short product name from the label",
|
| 842 |
+
"product_category": "Detected category (e.g. Snack, Dairy, Beverage)",
|
| 843 |
+
"score": <INTEGER 1-10 based on SCORING RUBRIC below — do NOT copy example numbers>,
|
| 844 |
+
"verdict": "Two-word verdict",
|
| 845 |
+
"chart_data": [<Safe%>, <Moderate%>, <Risky%>],
|
| 846 |
+
"summary": "Professional 2-sentence summary in {lang_name}.",
|
| 847 |
+
"eli5_explanation": "Explain using simple words and emojis for a child in {lang_name}.",
|
| 848 |
+
"molecular_insight": "Explain the biochemical/chemical impact on the body in {lang_name}.",
|
| 849 |
+
"paragraph_benefits": "One full paragraph about the product's main benefits in {lang_name}.",
|
| 850 |
+
"paragraph_uniqueness": "If this product has unique characteristics, describe them. Otherwise suggest 2 better alternatives. Write in {lang_name}.",
|
| 851 |
+
"is_unique": <BOOLEAN true if it has unique characteristics, false otherwise>,
|
| 852 |
+
"nutrient_breakdown": [
|
| 853 |
+
{{"name": "Protein", "value": <ACTUAL g from label>, "unit": "g", "rating": "good", "impact": "Brief impact note in {lang_name}"}},
|
| 854 |
+
{{"name": "Sugar", "value": <ACTUAL g from label>, "unit": "g", "rating": "moderate", "impact": "Brief impact note in {lang_name}"}},
|
| 855 |
+
{{"name": "Fat", "value": <ACTUAL g from label>, "unit": "g", "rating": "good", "impact": "Brief impact note in {lang_name}"}},
|
| 856 |
+
{{"name": "Sodium", "value": <ACTUAL mg from label>, "unit": "mg", "rating": "caution", "impact": "Brief impact note in {lang_name}"}},
|
| 857 |
+
{{"name": "Fiber", "value": <ACTUAL g from label>, "unit": "g", "rating": "good", "impact": "Brief impact note in {lang_name}"}}
|
| 858 |
+
],
|
| 859 |
+
"pros": ["Benefit 1 in {lang_name}", "Benefit 2", "Benefit 3"],
|
| 860 |
+
"cons": ["Risk 1 in {lang_name}", "Risk 2"],
|
| 861 |
+
"age_warnings": [
|
| 862 |
+
{{"group": "Children", "emoji": "👶", "status": "warning", "message": "Warning or approval in {lang_name}"}},
|
| 863 |
+
{{"group": "Adults", "emoji": "🧑", "status": "good", "message": "Info in {lang_name}"}},
|
| 864 |
+
{{"group": "Seniors", "emoji": "👴", "status": "caution", "message": "Advice in {lang_name}"}},
|
| 865 |
+
{{"group": "Pregnant", "emoji": "🤰", "status": "caution", "message": "Safety info in {lang_name}"}}
|
| 866 |
+
],
|
| 867 |
+
"better_alternative": "A specific healthier alternative product in {lang_name}."
|
| 868 |
+
}}
|
| 869 |
+
STRICT SCORING RUBRIC — score MUST reflect actual label nutrition, not examples:
|
| 870 |
+
9-10 : Whole food / minimal processing, no added sugar, low sodium, high fiber/protein
|
| 871 |
+
7-8 : Moderately processed, low sugar (<5g/100g), reasonable sodium, decent nutrients
|
| 872 |
+
5-6 : Processed, moderate sugar (5-15g/100g) OR moderate sodium (400-700mg/100g)
|
| 873 |
+
3-4 : High sugar (>15g/100g) OR high sodium (>700mg/100g) OR poor nutrient profile
|
| 874 |
+
1-2 : Ultra-processed, very high sugar/sodium/saturated fat, minimal nutritional value
|
| 875 |
+
|
| 876 |
+
RULES:
|
| 877 |
+
- score MUST match the actual nutrient values found — do NOT use 6 or 7 as a default
|
| 878 |
+
- chart_data must be [Safe%, Moderate%, Risky%] summing to exactly 100
|
| 879 |
+
- nutrient "rating" must be one of: "good", "moderate", "caution", "bad"
|
| 880 |
+
- age_warnings "status" must be one of: "good", "caution", "warning"
|
| 881 |
+
- All text values MUST be in {lang_name}
|
| 882 |
+
- Extract ACTUAL values from the label text, do NOT use placeholder numbers
|
| 883 |
+
[/INST]
|
| 884 |
+
"""
|
| 885 |
+
|
| 886 |
+
# ── Step 7: Groq LLM call ─────────────────────────────────────
|
| 887 |
+
try:
|
| 888 |
+
completion = client.chat.completions.create(
|
| 889 |
+
model="llama-3.3-70b-versatile",
|
| 890 |
+
messages=[{"role": "user", "content": prompt}],
|
| 891 |
+
temperature=0.1,
|
| 892 |
+
max_tokens=2000,
|
| 893 |
+
response_format={"type": "json_object"},
|
| 894 |
+
)
|
| 895 |
+
except Exception as e:
|
| 896 |
+
logger.warning(f"Primary model failed, using fallback: {e}")
|
| 897 |
+
completion = client.chat.completions.create(
|
| 898 |
+
model="llama-3.1-8b-instant",
|
| 899 |
+
messages=[{"role": "user", "content": prompt}],
|
| 900 |
+
temperature=0.1,
|
| 901 |
+
max_tokens=2000,
|
| 902 |
+
response_format={"type": "json_object"},
|
| 903 |
+
)
|
| 904 |
+
|
| 905 |
+
result = json.loads(completion.choices[0].message.content)
|
| 906 |
+
|
| 907 |
+
# ── Step 8: Validate chart_data ───────────────────────────────
|
| 908 |
+
if "chart_data" in result:
|
| 909 |
+
cd = result["chart_data"]
|
| 910 |
+
if len(cd) == 3:
|
| 911 |
+
total = sum(cd)
|
| 912 |
+
if total != 100:
|
| 913 |
+
result["chart_data"] = [round(v * 100 / total) for v in cd]
|
| 914 |
+
|
| 915 |
+
# ── Step 9: Attach blur metadata ─────────────────────────────
|
| 916 |
+
result["blur_info"] = blur_info
|
| 917 |
+
|
| 918 |
+
# ── Step 9b: Attach scan metadata (Task 11) ──────────────────
|
| 919 |
+
result["scan_meta"] = {
|
| 920 |
+
"scans_remaining": scan_check["scans_remaining"],
|
| 921 |
+
"is_pro" : scan_check["is_pro"],
|
| 922 |
+
"scans_used" : scan_check["scans_used"],
|
| 923 |
+
}
|
| 924 |
+
|
| 925 |
+
# ── Step 10: Cache & return ───────────────────────────────────
|
| 926 |
+
ai_cache[cache_key] = result
|
| 927 |
+
save_cache(ai_cache, AI_CACHE_FILE)
|
| 928 |
+
return result
|
| 929 |
+
|
| 930 |
+
except Exception as e:
|
| 931 |
+
logger.error(f"Analysis error: {e}")
|
| 932 |
+
return {"error": f"Scan failed: {str(e)[:100]}... Please try again."}
|
| 933 |
+
|
| 934 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 935 |
+
# SECTION 8: NEW ENDPOINTS (Tasks 6, 11, 13, 14, 18)
|
| 936 |
+
# ══════════════════════════════════════════════════════════════════════
|
| 937 |
+
|
| 938 |
+
# ── Health check (for Dockerfile HEALTHCHECK) ─────────────────────────
|
| 939 |
+
@app.get("/health")
|
| 940 |
+
async def health():
|
| 941 |
+
return {"status": "ok", "version": "2.0"}
|
| 942 |
+
|
| 943 |
+
|
| 944 |
+
# ── Pro activation via Razorpay (Task 11) ─────────────────────────────
|
| 945 |
+
@app.post("/activate-pro")
|
| 946 |
+
async def activate_pro(request: Request, payment_id: str = Form(...)):
|
| 947 |
+
"""Called after Razorpay payment confirmation. Marks device as Pro."""
|
| 948 |
+
device_key = get_device_key(request)
|
| 949 |
+
if device_key not in scan_limits:
|
| 950 |
+
scan_limits[device_key] = {}
|
| 951 |
+
scan_limits[device_key]["pro"] = True
|
| 952 |
+
scan_limits[device_key]["month"] = datetime.date.today().isoformat()[:7]
|
| 953 |
+
scan_limits[device_key]["count"] = scan_limits[device_key].get("count", 0)
|
| 954 |
+
save_scan_limits(scan_limits)
|
| 955 |
+
logger.info(f"Pro activated for device {device_key}, payment_id={payment_id}")
|
| 956 |
+
return {"status": "activated", "message": "Pro activated! 100 scans/month unlocked."}
|
| 957 |
+
|
| 958 |
+
|
| 959 |
+
# ── Scan status check (for frontend banner) ───────────────────────────
|
| 960 |
+
@app.get("/scan-status")
|
| 961 |
+
async def scan_status(request: Request):
|
| 962 |
+
"""Returns remaining scans and pro status for the current device."""
|
| 963 |
+
device_key = get_device_key(request)
|
| 964 |
+
month_key = datetime.date.today().isoformat()[:7]
|
| 965 |
+
u = scan_limits.get(device_key, {})
|
| 966 |
+
if u.get("month") != month_key:
|
| 967 |
+
return {"scans_used": 0, "scans_remaining": FREE_SCAN_LIMIT,
|
| 968 |
+
"is_pro": False, "limit": FREE_SCAN_LIMIT}
|
| 969 |
+
used = u.get("count", 0)
|
| 970 |
+
return {
|
| 971 |
+
"scans_used" : used,
|
| 972 |
+
"scans_remaining" : 9999 if u.get("pro") else max(0, FREE_SCAN_LIMIT - used),
|
| 973 |
+
"is_pro" : u.get("pro", False),
|
| 974 |
+
"limit" : FREE_SCAN_LIMIT,
|
| 975 |
+
}
|
| 976 |
+
|
| 977 |
+
|
| 978 |
+
# ── Shareable PNG card (Task 6) ───────────────────────────────────────
|
| 979 |
+
@app.post("/generate-share-card")
|
| 980 |
+
@limiter.limit("20/minute")
|
| 981 |
+
async def generate_share_card(
|
| 982 |
+
request : Request,
|
| 983 |
+
product_name: str = Form(...),
|
| 984 |
+
score : int = Form(...),
|
| 985 |
+
verdict : str = Form(...),
|
| 986 |
+
top_warning : str = Form(""),
|
| 987 |
+
top_pro : str = Form(""),
|
| 988 |
+
):
|
| 989 |
+
"""Generate a 1080×1080 shareable PNG card for Instagram/WhatsApp."""
|
| 990 |
+
W, H = 1080, 1080
|
| 991 |
+
BG = (15, 17, 23)
|
| 992 |
+
img = Image.new("RGB", (W, H), BG)
|
| 993 |
+
draw = ImageDraw.Draw(img)
|
| 994 |
+
font = ImageFont.load_default()
|
| 995 |
+
|
| 996 |
+
score_rgb = (34, 197, 94) if score >= 7 else (245, 158, 11) if score >= 4 else (239, 68, 68)
|
| 997 |
+
|
| 998 |
+
# Score ring
|
| 999 |
+
ring_box = [340, 160, 740, 560]
|
| 1000 |
+
draw.ellipse(ring_box, outline=score_rgb, width=18)
|
| 1001 |
+
draw.text((540, 360), str(score), fill=score_rgb, anchor="mm", font=font)
|
| 1002 |
+
draw.text((540, 420), "/10", fill=(100, 116, 139), anchor="mm", font=font)
|
| 1003 |
+
|
| 1004 |
+
# Product name (truncate)
|
| 1005 |
+
pname = product_name[:38] + ("…" if len(product_name) > 38 else "")
|
| 1006 |
+
draw.text((540, 610), pname, fill=(255, 255, 255), anchor="mm", font=font)
|
| 1007 |
+
draw.text((540, 660), verdict[:50], fill=(148, 163, 184), anchor="mm", font=font)
|
| 1008 |
+
|
| 1009 |
+
# Pro banner
|
| 1010 |
+
if top_pro:
|
| 1011 |
+
draw.rectangle([60, 700, 1020, 760], fill=(15, 60, 40))
|
| 1012 |
+
draw.text((540, 730), f"✓ {top_pro[:65]}", fill=(74, 222, 128),
|
| 1013 |
+
anchor="mm", font=font)
|
| 1014 |
+
|
| 1015 |
+
# Warning banner
|
| 1016 |
+
if top_warning:
|
| 1017 |
+
draw.rectangle([60, 775, 1020, 840], fill=(124, 29, 29))
|
| 1018 |
+
draw.text((540, 807), f"⚠ {top_warning[:65]}", fill=(252, 165, 165),
|
| 1019 |
+
anchor="mm", font=font)
|
| 1020 |
+
|
| 1021 |
+
# Branding
|
| 1022 |
+
draw.text((540, 1010), "eatlytic.com • scan any food label, no barcode needed",
|
| 1023 |
+
fill=(71, 85, 105), anchor="mm", font=font)
|
| 1024 |
+
|
| 1025 |
+
buf = BytesIO()
|
| 1026 |
+
img.save(buf, format="PNG", optimize=True)
|
| 1027 |
+
buf.seek(0)
|
| 1028 |
+
return Response(
|
| 1029 |
+
content=buf.getvalue(), media_type="image/png",
|
| 1030 |
+
headers={"Content-Disposition": "attachment; filename=eatlytic-scan.png"})
|
| 1031 |
+
|
| 1032 |
+
|
| 1033 |
+
# ── B2B API endpoint with API key auth (Task 13) ──────────────────────
|
| 1034 |
+
@app.post("/api/v1/analyze")
|
| 1035 |
+
@limiter.limit("60/minute")
|
| 1036 |
+
async def api_analyze(
|
| 1037 |
+
request : Request,
|
| 1038 |
+
image : UploadFile = File(...),
|
| 1039 |
+
language : str = Form("en"),
|
| 1040 |
+
persona : str = Form("general adult"),
|
| 1041 |
+
age_group : str = Form("adult"),
|
| 1042 |
+
api_key_data : dict = Security(verify_api_key),
|
| 1043 |
+
):
|
| 1044 |
+
"""B2B API endpoint — requires X-API-Key header."""
|
| 1045 |
+
if not api_key_data:
|
| 1046 |
+
raise HTTPException(status_code=401,
|
| 1047 |
+
detail="Invalid API key. Get one at eatlytic.com/api")
|
| 1048 |
+
if not api_key_data.get("active"):
|
| 1049 |
+
raise HTTPException(status_code=403, detail="API key suspended.")
|
| 1050 |
+
|
| 1051 |
+
month_key = datetime.date.today().isoformat()[:7]
|
| 1052 |
+
if api_key_data.get("month") != month_key:
|
| 1053 |
+
api_key_data["month"] = month_key
|
| 1054 |
+
api_key_data["scans_this_month"] = 0
|
| 1055 |
+
|
| 1056 |
+
LIMITS = {"business": 1000, "enterprise": 99999}
|
| 1057 |
+
limit = LIMITS.get(api_key_data["plan"], 1000)
|
| 1058 |
+
if api_key_data["scans_this_month"] >= limit:
|
| 1059 |
+
raise HTTPException(status_code=429,
|
| 1060 |
+
detail=f"Monthly limit ({limit} scans) reached. Upgrade at eatlytic.com/api")
|
| 1061 |
+
|
| 1062 |
+
api_key_data["scans_this_month"] += 1
|
| 1063 |
+
save_api_keys(api_keys_db)
|
| 1064 |
+
|
| 1065 |
+
# Reuse existing analyze logic
|
| 1066 |
+
content = await image.read()
|
| 1067 |
+
quality = assess_image_quality(content)
|
| 1068 |
+
working = content
|
| 1069 |
+
blur_info = {"detected": quality["is_blurry"], "severity": quality["blur_severity"],
|
| 1070 |
+
"score": quality["blur_score"]}
|
| 1071 |
+
if quality["is_blurry"]:
|
| 1072 |
+
try:
|
| 1073 |
+
enhanced, mlog = deblur_and_enhance(content, quality["blur_severity"])
|
| 1074 |
+
o_score = _ocr_quality_score(get_server_ocr(content, language))
|
| 1075 |
+
e_score = _ocr_quality_score(get_server_ocr(enhanced, language))
|
| 1076 |
+
if e_score >= o_score * 0.85:
|
| 1077 |
+
working = enhanced
|
| 1078 |
+
blur_info["deblurred"] = True
|
| 1079 |
+
blur_info["method_log"] = mlog
|
| 1080 |
+
except Exception as e:
|
| 1081 |
+
logger.warning(f"B2B deblur failed: {e}")
|
| 1082 |
+
|
| 1083 |
+
ocr = get_server_ocr(working, language)
|
| 1084 |
+
text = ocr["text"]
|
| 1085 |
+
lc = detect_label_presence(text)
|
| 1086 |
+
if not lc["has_label"]:
|
| 1087 |
+
return {"error": "no_label", "message": "No nutrition label detected in image."}
|
| 1088 |
+
|
| 1089 |
+
cache_key = f"b2b:{language}:{persona}:{text[:80]}"
|
| 1090 |
+
if cache_key in ai_cache:
|
| 1091 |
+
cached = dict(ai_cache[cache_key])
|
| 1092 |
+
cached["blur_info"] = blur_info
|
| 1093 |
+
return cached
|
| 1094 |
+
|
| 1095 |
+
web_ctx = get_live_search(f"health analysis ingredients {text[:120]}")
|
| 1096 |
+
lang_name = LANGUAGE_MAP.get(language, "English")
|
| 1097 |
+
prompt = f"[INST] Analyze: \"{text}\". Web: \"{web_ctx}\". Persona: {persona}. " \
|
| 1098 |
+
f"Respond in {lang_name} as valid JSON with: product_name, score(1-10), " \
|
| 1099 |
+
f"verdict, summary, nutrient_breakdown, pros, cons, age_warnings, better_alternative. [/INST]"
|
| 1100 |
+
|
| 1101 |
+
try:
|
| 1102 |
+
comp = client.chat.completions.create(
|
| 1103 |
+
model="llama-3.3-70b-versatile",
|
| 1104 |
+
messages=[{"role": "user", "content": prompt}],
|
| 1105 |
+
temperature=0.1, max_tokens=2000,
|
| 1106 |
+
response_format={"type": "json_object"})
|
| 1107 |
+
result = json.loads(comp.choices[0].message.content)
|
| 1108 |
+
result["blur_info"] = blur_info
|
| 1109 |
+
result["api_usage"] = {"scans_this_month": api_key_data["scans_this_month"],
|
| 1110 |
+
"limit": limit, "client": api_key_data["name"]}
|
| 1111 |
+
ai_cache[cache_key] = result
|
| 1112 |
+
save_cache(ai_cache, AI_CACHE_FILE)
|
| 1113 |
+
return result
|
| 1114 |
+
except Exception as e:
|
| 1115 |
+
raise HTTPException(status_code=500, detail=f"Analysis failed: {str(e)[:100]}")
|
| 1116 |
+
|
| 1117 |
+
|
| 1118 |
+
# ── Admin: create API key (protect with env var in production) ────────
|
| 1119 |
+
@app.post("/admin/create-api-key")
|
| 1120 |
+
async def create_api_key_endpoint(
|
| 1121 |
+
admin_token : str = Form(...),
|
| 1122 |
+
client_name : str = Form(...),
|
| 1123 |
+
plan : str = Form("business"),
|
| 1124 |
+
):
|
| 1125 |
+
expected = os.environ.get("ADMIN_TOKEN", "changeme")
|
| 1126 |
+
if admin_token != expected:
|
| 1127 |
+
raise HTTPException(status_code=403, detail="Invalid admin token.")
|
| 1128 |
+
key = generate_api_key(client_name, plan)
|
| 1129 |
+
return {"api_key": key, "client": client_name, "plan": plan}
|
| 1130 |
+
|
| 1131 |
+
|
| 1132 |
+
# ── PDF export (Task 18) ──────────────────────────────────────────────
|
| 1133 |
+
@app.post("/export-pdf")
|
| 1134 |
+
@limiter.limit("10/minute")
|
| 1135 |
+
async def export_pdf(request: Request, analysis_json: str = Form(...)):
|
| 1136 |
+
"""Generate a PDF report from analysis JSON. Requires reportlab."""
|
| 1137 |
+
try:
|
| 1138 |
+
data = json.loads(analysis_json)
|
| 1139 |
+
except Exception:
|
| 1140 |
+
return JSONResponse({"error": "Invalid JSON"}, status_code=400)
|
| 1141 |
+
|
| 1142 |
+
try:
|
| 1143 |
+
from reportlab.lib.pagesizes import A4
|
| 1144 |
+
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
| 1145 |
+
from reportlab.platypus import (SimpleDocTemplate, Paragraph,
|
| 1146 |
+
Spacer, Table, TableStyle)
|
| 1147 |
+
from reportlab.lib import colors as rl_colors
|
| 1148 |
+
from reportlab.lib.units import cm
|
| 1149 |
+
except ImportError:
|
| 1150 |
+
return JSONResponse(
|
| 1151 |
+
{"error": "reportlab not installed. Add 'reportlab' to requirements.txt."},
|
| 1152 |
+
status_code=501)
|
| 1153 |
+
|
| 1154 |
+
buf = BytesIO()
|
| 1155 |
+
doc = SimpleDocTemplate(buf, pagesize=A4,
|
| 1156 |
+
rightMargin=2*cm, leftMargin=2*cm,
|
| 1157 |
+
topMargin=2*cm, bottomMargin=2*cm)
|
| 1158 |
+
stys = getSampleStyleSheet()
|
| 1159 |
+
story = []
|
| 1160 |
+
|
| 1161 |
+
story.append(Paragraph("Eatlytic Food Label Analysis", stys["Title"]))
|
| 1162 |
+
story.append(Paragraph(f"Product: {data.get('product_name','Unknown')}", stys["Heading2"]))
|
| 1163 |
+
story.append(Spacer(1, 0.4*cm))
|
| 1164 |
+
|
| 1165 |
+
score = data.get("score", 0)
|
| 1166 |
+
sc = "22c55e" if score >= 7 else "f59e0b" if score >= 4 else "ef4444"
|
| 1167 |
+
story.append(Paragraph(
|
| 1168 |
+
f"<font color='#{sc}'>Health Score: {score}/10 — {data.get('verdict','')}</font>",
|
| 1169 |
+
stys["Heading1"]))
|
| 1170 |
+
story.append(Spacer(1, 0.4*cm))
|
| 1171 |
+
|
| 1172 |
+
if data.get("summary"):
|
| 1173 |
+
story.append(Paragraph("Summary", stys["Heading2"]))
|
| 1174 |
+
story.append(Paragraph(data["summary"], stys["Normal"]))
|
| 1175 |
+
story.append(Spacer(1, 0.4*cm))
|
| 1176 |
+
|
| 1177 |
+
nutrients = data.get("nutrient_breakdown", [])
|
| 1178 |
+
if nutrients:
|
| 1179 |
+
story.append(Paragraph("Nutrient Breakdown", stys["Heading2"]))
|
| 1180 |
+
tbl_data = [["Nutrient", "Amount", "Rating"]]
|
| 1181 |
+
for n in nutrients:
|
| 1182 |
+
tbl_data.append([n.get("name",""), f"{n.get('value','')} {n.get('unit','')}",
|
| 1183 |
+
n.get("rating","").upper()])
|
| 1184 |
+
tbl = Table(tbl_data, colWidths=[6*cm, 4*cm, 4*cm])
|
| 1185 |
+
tbl.setStyle(TableStyle([
|
| 1186 |
+
("BACKGROUND", (0, 0), (-1, 0), rl_colors.HexColor("#1D9E75")),
|
| 1187 |
+
("TEXTCOLOR", (0, 0), (-1, 0), rl_colors.white),
|
| 1188 |
+
("FONTSIZE", (0, 0), (-1,-1), 10),
|
| 1189 |
+
("ROWBACKGROUNDS", (0, 1), (-1,-1),
|
| 1190 |
+
[rl_colors.HexColor("#f8faf8"), rl_colors.white]),
|
| 1191 |
+
("GRID", (0, 0), (-1,-1), 0.4, rl_colors.HexColor("#d0d8d4")),
|
| 1192 |
+
("PADDING", (0, 0), (-1,-1), 6),
|
| 1193 |
+
]))
|
| 1194 |
+
story.append(tbl)
|
| 1195 |
+
story.append(Spacer(1, 0.4*cm))
|
| 1196 |
+
|
| 1197 |
+
if data.get("pros"):
|
| 1198 |
+
story.append(Paragraph("Benefits", stys["Heading2"]))
|
| 1199 |
+
for p in data["pros"]:
|
| 1200 |
+
story.append(Paragraph(f"✓ {p}", stys["Normal"]))
|
| 1201 |
+
if data.get("cons"):
|
| 1202 |
+
story.append(Spacer(1, 0.3*cm))
|
| 1203 |
+
story.append(Paragraph("Concerns", stys["Heading2"]))
|
| 1204 |
+
for c in data["cons"]:
|
| 1205 |
+
story.append(Paragraph(f"✗ {c}", stys["Normal"]))
|
| 1206 |
+
|
| 1207 |
+
if data.get("age_warnings"):
|
| 1208 |
+
story.append(Spacer(1, 0.4*cm))
|
| 1209 |
+
story.append(Paragraph("Age-Group Warnings", stys["Heading2"]))
|
| 1210 |
+
for w in data["age_warnings"]:
|
| 1211 |
+
story.append(Paragraph(
|
| 1212 |
+
f"{w.get('emoji','')} {w.get('group','')} — {w.get('message','')}",
|
| 1213 |
+
stys["Normal"]))
|
| 1214 |
+
|
| 1215 |
+
story.append(Spacer(1, 0.6*cm))
|
| 1216 |
+
story.append(Paragraph(
|
| 1217 |
+
"Generated by Eatlytic — eatlytic.com | AI food label analysis",
|
| 1218 |
+
ParagraphStyle("footer", parent=stys["Normal"],
|
| 1219 |
+
fontSize=8, textColor=rl_colors.grey)))
|
| 1220 |
+
|
| 1221 |
+
doc.build(story)
|
| 1222 |
+
buf.seek(0)
|
| 1223 |
+
safe_name = data.get("product_name", "scan").replace(" ", "-").replace("/", "-")[:40]
|
| 1224 |
+
return Response(
|
| 1225 |
+
content=buf.getvalue(), media_type="application/pdf",
|
| 1226 |
+
headers={"Content-Disposition": f"attachment; filename=eatlytic-{safe_name}.pdf"})
|
| 1227 |
+
|
| 1228 |
+
|
| 1229 |
+
# ── WhatsApp webhook (Task 7) — requires twilio in requirements.txt ───
|
| 1230 |
+
@app.post("/whatsapp-webhook")
|
| 1231 |
+
async def whatsapp_webhook(request: Request):
|
| 1232 |
+
"""Twilio WhatsApp sandbox webhook."""
|
| 1233 |
+
try:
|
| 1234 |
+
from twilio.twiml.messaging_response import MessagingResponse
|
| 1235 |
+
except ImportError:
|
| 1236 |
+
return Response(
|
| 1237 |
+
content="<Response><Message>twilio not installed.</Message></Response>",
|
| 1238 |
+
media_type="application/xml")
|
| 1239 |
+
|
| 1240 |
+
form = await request.form()
|
| 1241 |
+
media_url = form.get("MediaUrl0")
|
| 1242 |
+
resp = MessagingResponse()
|
| 1243 |
+
msg = resp.message()
|
| 1244 |
+
|
| 1245 |
+
if media_url:
|
| 1246 |
+
try:
|
| 1247 |
+
import httpx
|
| 1248 |
+
TWILIO_SID = os.environ.get("TWILIO_ACCOUNT_SID", "")
|
| 1249 |
+
TWILIO_TOKEN = os.environ.get("TWILIO_AUTH_TOKEN", "")
|
| 1250 |
+
async with httpx.AsyncClient() as hc:
|
| 1251 |
+
img_bytes = (await hc.get(media_url,
|
| 1252 |
+
auth=(TWILIO_SID, TWILIO_TOKEN))).content
|
| 1253 |
+
|
| 1254 |
+
quality = assess_image_quality(img_bytes)
|
| 1255 |
+
if quality["is_blurry"]:
|
| 1256 |
+
img_bytes, _ = deblur_and_enhance(img_bytes, quality["blur_severity"])
|
| 1257 |
+
|
| 1258 |
+
ocr_r = get_server_ocr(img_bytes, "en")
|
| 1259 |
+
lc = detect_label_presence(ocr_r["text"])
|
| 1260 |
+
|
| 1261 |
+
if not lc["has_label"]:
|
| 1262 |
+
msg.body("❌ Couldn't find a nutrition label. "
|
| 1263 |
+
"Please send the *back* of the packaging.")
|
| 1264 |
+
elif not client:
|
| 1265 |
+
msg.body("⚠️ AI service unavailable. "
|
| 1266 |
+
"Full analysis at *eatlytic.com*")
|
| 1267 |
+
else:
|
| 1268 |
+
web_ctx = get_live_search(f"health ingredients {ocr_r['text'][:80]}")
|
| 1269 |
+
prompt = (f"In 5 bullet points, give a plain WhatsApp-friendly health "
|
| 1270 |
+
f"summary of this food label: \"{ocr_r['text'][:400]}\". "
|
| 1271 |
+
f"Start with the health score /10.")
|
| 1272 |
+
comp = client.chat.completions.create(
|
| 1273 |
+
model="llama-3.1-8b-instant",
|
| 1274 |
+
messages=[{"role": "user", "content": prompt}],
|
| 1275 |
+
temperature=0.1, max_tokens=400)
|
| 1276 |
+
summary = comp.choices[0].message.content.strip()
|
| 1277 |
+
msg.body(f"🔍 *Eatlytic Analysis*\n\n{summary}\n\n"
|
| 1278 |
+
f"_Full analysis: eatlytic.com_")
|
| 1279 |
+
except Exception as e:
|
| 1280 |
+
logger.error(f"WhatsApp error: {e}")
|
| 1281 |
+
msg.body("⚠️ Something went wrong. Try again or visit *eatlytic.com*")
|
| 1282 |
+
else:
|
| 1283 |
+
msg.body("👋 Welcome to *Eatlytic*!\n\n"
|
| 1284 |
+
"Send me a photo of any food label (back of pack) "
|
| 1285 |
+
"and I'll analyse it instantly.\n\n"
|
| 1286 |
+
"Works even on blurry photos 📸\nFree — no barcode needed.")
|
| 1287 |
+
|
| 1288 |
+
return Response(content=str(resp), media_type="application/xml")
|
| 1289 |
+
|
| 1290 |
+
|
| 1291 |
+
# ── OCR accuracy test helper (Task 1) ────────────────────────────────
|
| 1292 |
+
@app.post("/test-accuracy")
|
| 1293 |
+
@limiter.limit("5/minute")
|
| 1294 |
+
async def test_accuracy(
|
| 1295 |
+
request : Request,
|
| 1296 |
+
image : UploadFile = File(...),
|
| 1297 |
+
ground_truth: str = Form(""),
|
| 1298 |
+
):
|
| 1299 |
+
"""Compare OCR output to ground truth. Returns F1 + blur scores."""
|
| 1300 |
+
content = await image.read()
|
| 1301 |
+
quality = assess_image_quality(content)
|
| 1302 |
+
|
| 1303 |
+
# Run without blur fix
|
| 1304 |
+
ocr_orig = get_server_ocr(content, "en")
|
| 1305 |
+
|
| 1306 |
+
# Run with blur fix if blurry
|
| 1307 |
+
ocr_enhanced = None
|
| 1308 |
+
if quality["is_blurry"]:
|
| 1309 |
+
try:
|
| 1310 |
+
enhanced_bytes, mlog = deblur_and_enhance(content, quality["blur_severity"])
|
| 1311 |
+
ocr_enhanced = get_server_ocr(enhanced_bytes, "en")
|
| 1312 |
+
except Exception: pass
|
| 1313 |
+
|
| 1314 |
+
def f1(pred: str, truth: str) -> float:
|
| 1315 |
+
if not truth: return 0.0
|
| 1316 |
+
p_w = set(pred.lower().split())
|
| 1317 |
+
t_w = set(truth.lower().split())
|
| 1318 |
+
tp = len(p_w & t_w)
|
| 1319 |
+
prec = tp / len(p_w) if p_w else 0
|
| 1320 |
+
rec = tp / len(t_w) if t_w else 0
|
| 1321 |
+
return round(2 * prec * rec / (prec + rec), 3) if (prec + rec) else 0.0
|
| 1322 |
+
|
| 1323 |
+
result = {
|
| 1324 |
+
"blur_score" : quality["blur_score"],
|
| 1325 |
+
"blur_severity" : quality["blur_severity"],
|
| 1326 |
+
"is_blurry" : quality["is_blurry"],
|
| 1327 |
+
"original_ocr" : {
|
| 1328 |
+
"word_count" : ocr_orig["word_count"],
|
| 1329 |
+
"avg_confidence": ocr_orig["avg_confidence"],
|
| 1330 |
+
"f1_vs_truth" : f1(ocr_orig["text"], ground_truth),
|
| 1331 |
+
},
|
| 1332 |
+
}
|
| 1333 |
+
if ocr_enhanced:
|
| 1334 |
+
result["enhanced_ocr"] = {
|
| 1335 |
+
"word_count" : ocr_enhanced["word_count"],
|
| 1336 |
+
"avg_confidence": ocr_enhanced["avg_confidence"],
|
| 1337 |
+
"f1_vs_truth" : f1(ocr_enhanced["text"], ground_truth),
|
| 1338 |
+
"f1_delta" : round(f1(ocr_enhanced["text"], ground_truth)
|
| 1339 |
+
- f1(ocr_orig["text"], ground_truth), 3),
|
| 1340 |
+
}
|
| 1341 |
+
return result
|
requirements.txt
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi
|
| 2 |
+
uvicorn[standard]
|
| 3 |
+
python-multipart
|
| 4 |
+
requests
|
| 5 |
+
httpx
|
| 6 |
+
pydantic
|
| 7 |
+
pillow
|
| 8 |
+
easyocr
|
| 9 |
+
opencv-python-headless
|
| 10 |
+
numpy
|
| 11 |
+
duckduckgo-search==6.3.7
|
| 12 |
+
groq
|
| 13 |
+
slowapi
|
| 14 |
+
reportlab
|
| 15 |
+
twilio
|