Spaces:
Sleeping
Sleeping
| """ | |
| Claveros 4-page extraction Space β ZeroGPU on H200. | |
| Processes 4-page slim claveros PDFs: | |
| Page 0 = NivelaciΓ³n β votantes_e11, votos_urna, votos_incinerados | |
| Page 1 = Verde (3020) β verde_lista, cand_7, verde_total | |
| Page 2 = Especiales β votos_blancos, votos_nulos, votos_no_marcados | |
| Page 3 = Constancias β constancias text, hubo_recuento, firmas_count | |
| Call via Gradio Client: | |
| from gradio_client import Client | |
| client = Client("libacc/claveros-extract") | |
| result = client.predict(pdf_file, api_name="/extract") | |
| Co-Authored-By: Oz <oz-agent@warp.dev> | |
| """ | |
| import json | |
| import os | |
| import spaces | |
| import gradio as gr | |
| import torch | |
| import fitz # PyMuPDF | |
| from PIL import Image | |
| from transformers import AutoModelForImageTextToText, AutoProcessor | |
| from qwen_vl_utils import process_vision_info | |
| # ββ Model (loaded at module level for ZeroGPU CUDA emulation) βββββββββ | |
| MODEL_ID = "Qwen/Qwen3-VL-8B-Instruct" | |
| DPI = 300 | |
| print(f"Loading {MODEL_ID}...") | |
| model = AutoModelForImageTextToText.from_pretrained( | |
| MODEL_ID, | |
| dtype=torch.bfloat16, | |
| device_map="auto", | |
| ) | |
| processor = AutoProcessor.from_pretrained( | |
| MODEL_ID, min_pixels=256 * 28 * 28, max_pixels=1280 * 28 * 28 | |
| ) | |
| print("Model loaded.") | |
| # ββ Prompts βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| PROMPT_NIV = """\ | |
| E-14 CLAVEROS SENADO β NIVELACIΓN page. | |
| Read handwritten digit boxes: | |
| 1. "TOTAL VOTANTES FORMULARIO E-11" β votantes_e11 | |
| 2. "TOTAL VOTOS DE SENADO EN LA URNA" β votos_urna | |
| 3. "TOTAL VOTOS INCINERADOS" β votos_incinerados (often 0) | |
| Also read printed: dept (2-digit), muni (3-digit), mesa. | |
| KIT/Form numbers at bottom are NOT votes. | |
| Each row: 3 boxes (hundreds|tens|ones). Empty=0. | |
| Return ONLY: | |
| {"votantes_e11": <int>, "votos_urna": <int>, "votos_incinerados": <int>, "dept": "<str>", "muni": "<str>", "mesa": "<str>"}""" | |
| PROMPT_VERDE = """\ | |
| E-14 CLAVEROS SENADO β ALIANZA POR COLOMBIA (3020). | |
| Read 3 handwritten values from digit boxes (hundreds|tens|ones, empty=0): | |
| 1) "VOTOS SOLO POR LA LISTA" (row 0) β verde_lista | |
| 2) Row "7" β handwritten boxes RIGHT of printed "7" β cand_7 | |
| 3) "TOTAL AGRUPACIΓN POLΓTICA" (bottom) β verde_total | |
| Printed numbers 1-100 are ROW LABELS, not votes. KIT/Form numbers are NOT votes. | |
| VERIFY: verde_lista β€ verde_total AND cand_7 β€ verde_total. | |
| Return ONLY: | |
| {"verde_lista": <int>, "cand_7": <int>, "verde_total": <int>}""" | |
| PROMPT_VERDE_RETRY = """\ | |
| Re-read. Previous: {prev}. Common errors: 1 misread as 7, 0 as 6, \ | |
| printed row label "7" used as vote, KIT number used as total. | |
| Constraints: verde_lista β€ verde_total, cand_7 β€ verde_total. | |
| Return ONLY: | |
| {{"verde_lista": <int>, "cand_7": <int>, "verde_total": <int>}}""" | |
| PROMPT_ESP = """\ | |
| E-14 CLAVEROS SENADO β VOTOS ESPECIALES. | |
| Read 3 rows (3 digit boxes each, empty=0): | |
| 1) VOTOS EN BLANCO β votos_blancos | |
| 2) VOTOS NULOS β votos_nulos | |
| 3) VOTOS NO MARCADOS β votos_no_marcados | |
| Handwritten 0 can look like 6 β recheck if values seem high. | |
| Return ONLY: | |
| {"votos_blancos": <int>, "votos_nulos": <int>, "votos_no_marcados": <int>}""" | |
| PROMPT_CONST = """\ | |
| E-14 CLAVEROS SENADO β CONSTANCIAS page. | |
| 1) Transcribe ALL handwritten text in "CONSTANCIAS DE LOS JURADOS" box. \ | |
| Preserve original Spanish exactly. Empty box = "". | |
| 2) "ΒΏHUBO RECUENTO DE VOTOS?" β "si", "no", or "unclear". | |
| 3) Count signature boxes (FIRMA JURADO 1-6) that have signatures (0-6). | |
| Return ONLY: | |
| {"constancias": "<text>", "hubo_recuento": "si"|"no"|"unclear", "firmas_count": <int>}""" | |
| # ββ Helpers ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def render_page(pdf_path, page_idx): | |
| doc = fitz.open(pdf_path) | |
| if page_idx >= len(doc): | |
| page_idx = len(doc) - 1 | |
| mat = fitz.Matrix(DPI / 72, DPI / 72) | |
| pix = doc[page_idx].get_pixmap(matrix=mat) | |
| img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples) | |
| doc.close() | |
| if img.width > img.height: | |
| img = img.rotate(90, expand=True) | |
| return img | |
| def vlm_call(img, prompt, max_tokens=120): | |
| messages = [ | |
| {"role": "system", "content": [{"type": "text", "text": "You are a careful OCR assistant. /no_think"}]}, | |
| {"role": "user", "content": [ | |
| {"type": "image", "image": img}, | |
| {"type": "text", "text": prompt}, | |
| ]}, | |
| ] | |
| text_input = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True) | |
| image_inputs, video_inputs = process_vision_info(messages) | |
| inputs = processor( | |
| text=[text_input], images=image_inputs, videos=video_inputs, | |
| padding=True, return_tensors="pt", | |
| ).to(model.device) | |
| with torch.no_grad(): | |
| out = model.generate(**inputs, max_new_tokens=max_tokens, do_sample=False) | |
| trimmed = out[0, inputs["input_ids"].shape[1]:] | |
| return processor.decode(trimmed, skip_special_tokens=True) | |
| def parse_json(text): | |
| clean = text.strip() | |
| if "<think>" in clean: | |
| end = clean.find("</think>") | |
| clean = clean[end + 8:].strip() if end >= 0 else clean[clean.find("<think>") + 7:].strip() | |
| if clean.startswith("```"): | |
| lines = clean.split("\n") | |
| clean = "\n".join(lines[1:-1] if lines[-1].strip() == "```" else lines[1:]).strip() | |
| try: | |
| return json.loads(clean) | |
| except json.JSONDecodeError: | |
| pass | |
| s, e = clean.find("{"), clean.rfind("}") + 1 | |
| if s >= 0 and e > s: | |
| try: | |
| return json.loads(clean[s:e]) | |
| except json.JSONDecodeError: | |
| pass | |
| return {"_parse_error": True, "_raw": text[:500]} | |
| def to_int(v): | |
| if isinstance(v, int): return v | |
| if isinstance(v, float): return int(v) | |
| if isinstance(v, str): | |
| s = v.strip().replace(",", "") | |
| try: return int(s) | |
| except: return 0 | |
| return 0 | |
| # ββ Main extraction (single GPU burst for all 4 pages) ββββββββββββββββ | |
| def extract_form(pdf_path): | |
| """Extract all 4 pages from a slim claveros PDF in one GPU burst.""" | |
| import time | |
| t0 = time.time() | |
| result = {} | |
| # Page 0: NivelaciΓ³n | |
| try: | |
| img = render_page(pdf_path, 0) | |
| raw = vlm_call(img, PROMPT_NIV) | |
| result["nivelacion"] = parse_json(raw) | |
| except Exception as e: | |
| result["nivelacion"] = {"_error": str(e)} | |
| # Page 1: Verde | |
| try: | |
| img = render_page(pdf_path, 1) | |
| raw = vlm_call(img, PROMPT_VERDE) | |
| parsed = parse_json(raw) | |
| # Retry if arithmetic fails | |
| vl = to_int(parsed.get("verde_lista", 0)) | |
| c7 = to_int(parsed.get("cand_7", 0)) | |
| vt = to_int(parsed.get("verde_total", 0)) | |
| if (vl > vt and vt > 0) or (c7 > vt and vt > 0) or c7 >= 50: | |
| raw2 = vlm_call(img, PROMPT_VERDE_RETRY.format(prev=json.dumps(parsed))) | |
| p2 = parse_json(raw2) | |
| if not p2.get("_parse_error"): | |
| parsed = p2 | |
| result["verde"] = parsed | |
| except Exception as e: | |
| result["verde"] = {"_error": str(e)} | |
| # Page 2: Especiales | |
| try: | |
| img = render_page(pdf_path, 2) | |
| raw = vlm_call(img, PROMPT_ESP) | |
| result["especiales"] = parse_json(raw) | |
| except Exception as e: | |
| result["especiales"] = {"_error": str(e)} | |
| # Page 3: Constancias | |
| try: | |
| img = render_page(pdf_path, 3) | |
| raw = vlm_call(img, PROMPT_CONST, max_tokens=1500) | |
| parsed = parse_json(raw) | |
| ctext = str(parsed.get("constancias", "")).lower() | |
| parsed["constancia_relevant_verde"] = any( | |
| kw in ctext for kw in ["alianza", "verde", "3020", "candidat"] | |
| ) | |
| result["constancias"] = parsed | |
| except Exception as e: | |
| result["constancias"] = {"_error": str(e)} | |
| result["elapsed_s"] = round(time.time() - t0, 1) | |
| return json.dumps(result, ensure_ascii=False) | |
| # ββ Gradio Interface ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| demo = gr.Interface( | |
| fn=extract_form, | |
| inputs=gr.File(label="Slim 4-page claveros PDF", file_types=[".pdf"]), | |
| outputs=gr.Textbox(label="Extraction result (JSON)", lines=20), | |
| title="Claveros 4-Page Extraction", | |
| description="Upload a 4-page slim claveros PDF. Extracts nivelaciΓ³n, Verde votes, especiales, and constancias.", | |
| ) | |
| demo.launch() | |