Spaces:

libacc
/

claveros-extract

Sleeping

File size: 8,607 Bytes

"""
Claveros 4-page extraction Space — ZeroGPU on H200.

Processes 4-page slim claveros PDFs:
  Page 0 = Nivelación     → votantes_e11, votos_urna, votos_incinerados
  Page 1 = Verde (3020)   → verde_lista, cand_7, verde_total
  Page 2 = Especiales     → votos_blancos, votos_nulos, votos_no_marcados
  Page 3 = Constancias    → constancias text, hubo_recuento, firmas_count

Call via Gradio Client:
  from gradio_client import Client
  client = Client("libacc/claveros-extract")
  result = client.predict(pdf_file, api_name="/extract")

Co-Authored-By: Oz <oz-agent@warp.dev>
"""

import json
import os
import spaces
import gradio as gr
import torch
import fitz  # PyMuPDF
from PIL import Image
from transformers import AutoModelForImageTextToText, AutoProcessor
from qwen_vl_utils import process_vision_info

# ── Model (loaded at module level for ZeroGPU CUDA emulation) ─────────
MODEL_ID = "Qwen/Qwen3-VL-8B-Instruct"
DPI = 300

print(f"Loading {MODEL_ID}...")
model = AutoModelForImageTextToText.from_pretrained(
    MODEL_ID,
    dtype=torch.bfloat16,
    device_map="auto",
)
processor = AutoProcessor.from_pretrained(
    MODEL_ID, min_pixels=256 * 28 * 28, max_pixels=1280 * 28 * 28
)
print("Model loaded.")

# ── Prompts ───────────────────────────────────────────────────────────

PROMPT_NIV = """\
E-14 CLAVEROS SENADO — NIVELACIÓN page.
Read handwritten digit boxes:
1. "TOTAL VOTANTES FORMULARIO E-11" → votantes_e11
2. "TOTAL VOTOS DE SENADO EN LA URNA" → votos_urna
3. "TOTAL VOTOS INCINERADOS" → votos_incinerados (often 0)
Also read printed: dept (2-digit), muni (3-digit), mesa.
KIT/Form numbers at bottom are NOT votes.
Each row: 3 boxes (hundreds|tens|ones). Empty=0.
Return ONLY:
{"votantes_e11": <int>, "votos_urna": <int>, "votos_incinerados": <int>, "dept": "<str>", "muni": "<str>", "mesa": "<str>"}"""

PROMPT_VERDE = """\
E-14 CLAVEROS SENADO — ALIANZA POR COLOMBIA (3020).
Read 3 handwritten values from digit boxes (hundreds|tens|ones, empty=0):
1) "VOTOS SOLO POR LA LISTA" (row 0) → verde_lista
2) Row "7" — handwritten boxes RIGHT of printed "7" → cand_7
3) "TOTAL AGRUPACIÓN POLÍTICA" (bottom) → verde_total
Printed numbers 1-100 are ROW LABELS, not votes. KIT/Form numbers are NOT votes.
VERIFY: verde_lista ≤ verde_total AND cand_7 ≤ verde_total.
Return ONLY:
{"verde_lista": <int>, "cand_7": <int>, "verde_total": <int>}"""

PROMPT_VERDE_RETRY = """\
Re-read. Previous: {prev}. Common errors: 1 misread as 7, 0 as 6, \
printed row label "7" used as vote, KIT number used as total.
Constraints: verde_lista ≤ verde_total, cand_7 ≤ verde_total.
Return ONLY:
{{"verde_lista": <int>, "cand_7": <int>, "verde_total": <int>}}"""

PROMPT_ESP = """\
E-14 CLAVEROS SENADO — VOTOS ESPECIALES.
Read 3 rows (3 digit boxes each, empty=0):
1) VOTOS EN BLANCO → votos_blancos
2) VOTOS NULOS → votos_nulos
3) VOTOS NO MARCADOS → votos_no_marcados
Handwritten 0 can look like 6 — recheck if values seem high.
Return ONLY:
{"votos_blancos": <int>, "votos_nulos": <int>, "votos_no_marcados": <int>}"""

PROMPT_CONST = """\
E-14 CLAVEROS SENADO — CONSTANCIAS page.
1) Transcribe ALL handwritten text in "CONSTANCIAS DE LOS JURADOS" box. \
Preserve original Spanish exactly. Empty box = "".
2) "¿HUBO RECUENTO DE VOTOS?" — "si", "no", or "unclear".
3) Count signature boxes (FIRMA JURADO 1-6) that have signatures (0-6).
Return ONLY:
{"constancias": "<text>", "hubo_recuento": "si"|"no"|"unclear", "firmas_count": <int>}"""


# ── Helpers ────────────────────────────────────────────────────────────

def render_page(pdf_path, page_idx):
    doc = fitz.open(pdf_path)
    if page_idx >= len(doc):
        page_idx = len(doc) - 1
    mat = fitz.Matrix(DPI / 72, DPI / 72)
    pix = doc[page_idx].get_pixmap(matrix=mat)
    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
    doc.close()
    if img.width > img.height:
        img = img.rotate(90, expand=True)
    return img


def vlm_call(img, prompt, max_tokens=120):
    messages = [
        {"role": "system", "content": [{"type": "text", "text": "You are a careful OCR assistant. /no_think"}]},
        {"role": "user", "content": [
            {"type": "image", "image": img},
            {"type": "text", "text": prompt},
        ]},
    ]
    text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    image_inputs, video_inputs = process_vision_info(messages)
    inputs = processor(
        text=[text_input], images=image_inputs, videos=video_inputs,
        padding=True, return_tensors="pt",
    ).to(model.device)
    with torch.no_grad():
        out = model.generate(**inputs, max_new_tokens=max_tokens, do_sample=False)
    trimmed = out[0, inputs["input_ids"].shape[1]:]
    return processor.decode(trimmed, skip_special_tokens=True)


def parse_json(text):
    clean = text.strip()
    if "<think>" in clean:
        end = clean.find("</think>")
        clean = clean[end + 8:].strip() if end >= 0 else clean[clean.find("<think>") + 7:].strip()
    if clean.startswith("```"):
        lines = clean.split("\n")
        clean = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:]).strip()
    try:
        return json.loads(clean)
    except json.JSONDecodeError:
        pass
    s, e = clean.find("{"), clean.rfind("}") + 1
    if s >= 0 and e > s:
        try:
            return json.loads(clean[s:e])
        except json.JSONDecodeError:
            pass
    return {"_parse_error": True, "_raw": text[:500]}


def to_int(v):
    if isinstance(v, int): return v
    if isinstance(v, float): return int(v)
    if isinstance(v, str):
        s = v.strip().replace(",", "")
        try: return int(s)
        except: return 0
    return 0


# ── Main extraction (single GPU burst for all 4 pages) ────────────────

@spaces.GPU(duration=120)
def extract_form(pdf_path):
    """Extract all 4 pages from a slim claveros PDF in one GPU burst."""
    import time
    t0 = time.time()
    result = {}

    # Page 0: Nivelación
    try:
        img = render_page(pdf_path, 0)
        raw = vlm_call(img, PROMPT_NIV)
        result["nivelacion"] = parse_json(raw)
    except Exception as e:
        result["nivelacion"] = {"_error": str(e)}

    # Page 1: Verde
    try:
        img = render_page(pdf_path, 1)
        raw = vlm_call(img, PROMPT_VERDE)
        parsed = parse_json(raw)

        # Retry if arithmetic fails
        vl = to_int(parsed.get("verde_lista", 0))
        c7 = to_int(parsed.get("cand_7", 0))
        vt = to_int(parsed.get("verde_total", 0))
        if (vl > vt and vt > 0) or (c7 > vt and vt > 0) or c7 >= 50:
            raw2 = vlm_call(img, PROMPT_VERDE_RETRY.format(prev=json.dumps(parsed)))
            p2 = parse_json(raw2)
            if not p2.get("_parse_error"):
                parsed = p2

        result["verde"] = parsed
    except Exception as e:
        result["verde"] = {"_error": str(e)}

    # Page 2: Especiales
    try:
        img = render_page(pdf_path, 2)
        raw = vlm_call(img, PROMPT_ESP)
        result["especiales"] = parse_json(raw)
    except Exception as e:
        result["especiales"] = {"_error": str(e)}

    # Page 3: Constancias
    try:
        img = render_page(pdf_path, 3)
        raw = vlm_call(img, PROMPT_CONST, max_tokens=1500)
        parsed = parse_json(raw)
        ctext = str(parsed.get("constancias", "")).lower()
        parsed["constancia_relevant_verde"] = any(
            kw in ctext for kw in ["alianza", "verde", "3020", "candidat"]
        )
        result["constancias"] = parsed
    except Exception as e:
        result["constancias"] = {"_error": str(e)}

    result["elapsed_s"] = round(time.time() - t0, 1)
    return json.dumps(result, ensure_ascii=False)


# ── Gradio Interface ──────────────────────────────────────────────────

demo = gr.Interface(
    fn=extract_form,
    inputs=gr.File(label="Slim 4-page claveros PDF", file_types=[".pdf"]),
    outputs=gr.Textbox(label="Extraction result (JSON)", lines=20),
    title="Claveros 4-Page Extraction",
    description="Upload a 4-page slim claveros PDF. Extracts nivelación, Verde votes, especiales, and constancias.",
)

demo.launch()