Spaces:

feragostb
/

receiptesting

Sleeping

App Files Files Community

fgost commited on Jan 16

Commit

b167ed6

1 Parent(s): 1a14664

first try

Browse files

Files changed (2) hide show

app.py +195 -4
requirements.txt +0 -0

app.py CHANGED Viewed

@@ -1,7 +1,198 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

+import json
+import re
+from datetime import datetime
 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForQuestionAnswering
+# --------- MODELO QA (Kaleidoscope) ----------
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+qa_model_id = "2KKLabs/Kaleidoscope_small_v1"
+tokenizer = AutoTokenizer.from_pretrained(qa_model_id)
+model = AutoModelForQuestionAnswering.from_pretrained(qa_model_id)
+model.to(device)
+model.eval()
+TIPOS = [
+    "coche",
+    "comidas",
+    "envio postal",
+    "estacionamiento",
+    "hoteles",
+    "peaje",
+    "taxis",
+    "telefono/celular/internet",
+    "tren",
+    "vuelos",
+]
+# --------- OCR: imagen -> texto (placeholder) ----------
+def ocr_image_to_text(image):
+    """
+    Sustituye esto por tu OCR real (easyocr, paddleocr, etc.).
+    De momento devuelve un stub para poder probar el flujo.
+    """
+    return "stub text from OCR with date 2024-11-01 amount 23.50 EUR bar Velodromo comidas"
+# --------- Utilidades de post-procesado ----------
+def normalize_date(text):
+    patterns = [
+        r"(\d{4})-(\d{2})-(\d{2})",         # 2024-11-01
+        r"(\d{2})/(\d{2})/(\d{4})",         # 01/11/2024
+        r"(\d{2})-(\d{2})-(\d{4})",         # 01-11-2024
+    ]
+    for p in patterns:
+        m = re.search(p, text)
+        if m:
+            g = m.groups()
+            try:
+                if len(g) == 4:  # YYYY-MM-DD
+                    dt = datetime(int(g), int(g[5]), int(g[6]))
+                else:  # DD/MM/YYYY o DD-MM-YYYY
+                    dt = datetime(int(g[6]), int(g[5]), int(g))
+                return dt.strftime("%Y-%m-%d")
+            except Exception:
+                pass
+    return ""
+def normalize_amount(text):
+    m = re.search(r"(\d+[.,]\d{2})", text)
+    if not m:
+        return ""
+    return m.group(1).replace(",", ".")
+def best_tipo_from_text(text):
+    t = text.lower()
+    if "parking" in t or "aparcamiento" in t:
+        return "estacionamiento"
+    if "peaje" in t or "toll" in t:
+        return "peaje"
+    if "taxi" in t:
+        return "taxis"
+    if "hotel" in t:
+        return "hoteles"
+    if "train" in t or "renfe" in t or "tren" in t:
+        return "tren"
+    if "flight" in t or "vueling" in t or "iberia" in t:
+        return "vuelos"
+    if "diesel" in t or "fuel" in t or "gasolina" in t:
+        return "coche"
+    if "internet" in t or "movistar" in t or "vodafone" in t:
+        return "telefono/celular/internet"
+    return "comidas"
+def truncate_desc(desc, max_words=6):
+    words = desc.split()
+    if len(words) <= max_words:
+        return desc
+    return " ".join(words[:max_words])
+# --------- Llamada al modelo QA ----------
+def qa_answer(context, question, max_length=384):
+    inputs = tokenizer(
+        question,
+        context,
+        return_tensors="pt",
+        truncation=True,
+        max_length=max_length
+    )
+    inputs = {k: v.to(device) for k, v in inputs.items()}
+    with torch.no_grad():
+        outputs = model(**inputs)
+    start_index = int(torch.argmax(outputs.start_logits))
+    end_index = int(torch.argmax(outputs.end_logits))
+    answer_tokens = inputs["input_ids"][start_index : end_index + 1]
+    answer = tokenizer.decode(answer_tokens, skip_special_tokens=True)
+    return answer.strip()
+# --------- Pipeline principal ----------
+def process_receipt(image):
+    # 1) Imagen -> texto
+    context = ocr_image_to_text(image)
+    # 2) Pregunta al modelo para obtener JSON bruto
+    question = (
+        "From this receipt text extract: "
+        "fecha (date), tipo (one of coche, comidas, envio postal, estacionamiento, hoteles, peaje, "
+        "taxis, telefono/celular/internet, tren, vuelos), "
+        "description (<=6 words), amount (numeric), comments (business name). "
+        "Return only a JSON object with keys: fecha, tipo, description, amount, comments."
+    )
+    raw_answer = qa_answer(context, question)
+    # 3) Parseo / fallback
+    fecha = ""
+    tipo = ""
+    descripcion = ""
+    amount = ""
+    comments = ""
+    try:
+        obj = json.loads(raw_answer)
+        fecha = obj.get("fecha", "")
+        tipo = obj.get("tipo", "")
+        descripcion = obj.get("description", "")
+        amount = str(obj.get("amount", ""))
+        comments = obj.get("comments", "")
+    except Exception:
+        fecha = normalize_date(context)
+        amount = normalize_amount(context)
+        tipo = best_tipo_from_text(context)
+        descripcion = "expense item"
+        first_line = context.splitlines() if context.splitlines() else ""
+        comments = first_line[:60]
+    # 4) Normalización
+    if not fecha:
+        fecha = normalize_date(context)
+    if tipo not in TIPOS:
+        tipo = best_tipo_from_text(context)
+    descripcion = truncate_desc(descripcion, 6)
+    try:
+        amount_val = float(amount)
+    except Exception:
+        amount_val = 0.0
+    return fecha, tipo, descripcion, amount_val, comments
+# --------- Interfaz Gradio ----------
+with gr.Blocks(title="Receiptesting - Kaleidoscope") as demo:
+    gr.Markdown(
+        "## Receiptesting con Kaleidoscope_small_v1\n\n"
+        "Sube una imagen de un recibo y se extraerán: **fecha**, **tipo**, "
+        "**descripción corta**, **amount** y **comentarios (nombre del negocio)**."
+    )
+    with gr.Row():
+        with gr.Column():
+            image_in = gr.Image(
+                type="pil",
+                label="Imagen del recibo",
+            )
+            btn = gr.Button("Extraer")
+        with gr.Column():
+            fecha_out = gr.Textbox(label="Fecha (YYYY-MM-DD)")
+            tipo_out = gr.Dropdown(
+                choices=TIPOS,
+                label="Tipo",
+            )
+            desc_out = gr.Textbox(label="Descripción (<= 6 palabras)")
+            amount_out = gr.Number(label="Amount")
+            comments_out = gr.Textbox(label="Comentarios (nombre del negocio)")
+    btn.click(
+        process_receipt,
+        inputs=[image_in],
+        outputs=[fecha_out, tipo_out, desc_out, amount_out, comments_out],
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

Binary file (4.58 kB). View file