PDF_Upload

Sleeping

App Files Files Community

Seth0330 commited on May 30, 2025

Commit

8c52b14

verified ·

1 Parent(s): 0eb1833

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -5

app.py CHANGED Viewed

@@ -116,15 +116,49 @@ def fallback_supplier(text):
     return None
 def get_extraction_prompt(model_choice, txt):
-    # New, broad prompt for all models:
     return (
-        "Extract all possible metadata fields from the following invoice, including but not limited to header information, supplier and customer details, payment terms, tax details, references, and every possible line item with all available attributes. "
-        "Return a detailed JSON object containing every field you can identify, and make sure to include all line items as an array. "
-        "If any field is missing in the invoice, use null. Do not add any explanation or extra text outside the JSON. "
-        "\n\nInvoice Text:\n"
         f"{txt}"
     )
 def extract_invoice_info(model_choice, text):
     prompt = get_extraction_prompt(model_choice, text)
     raw = query_llm(model_choice, prompt)

     return None
 def get_extraction_prompt(model_choice, txt):
+    # Example output shows both header & line items
     return (
+        "Extract every possible piece of metadata and detail from the following invoice text—including all header information, supplier details, customer details, addresses, invoice numbers, dates, tax information, payment terms, references, summary totals, and a full list of line items with as many columns as possible. "
+        "Return a structured JSON with two keys: 'invoice_header' (an object with all header fields found) and 'line_items' (an array of all detected line items and their attributes). "
+        "If any field is not present, use null. Do not invent/hallucinate fields not present. "
+        "Your output must match the format of this example (but include only fields found in the invoice):\n"
+        '{\n'
+        '  "invoice_header": {\n'
+        '    "invoice_number": "string or null",\n'
+        '    "invoice_date": "string or null",\n'
+        '    "supplier_name": "string or null",\n'
+        '    "supplier_address": "string or null",\n'
+        '    "customer_name": "string or null",\n'
+        '    "customer_address": "string or null",\n'
+        '    "po_number": "string or null",\n'
+        '    "tax_id": "string or null",\n'
+        '    "payment_terms": "string or null",\n'
+        '    "total_before_tax": "string or null",\n'
+        '    "tax_amount": "string or null",\n'
+        '    "total_due": "string or null",\n'
+        '    "currency": "string or null",\n'
+        '    "due_date": "string or null",\n'
+        '    "any_other_metadata": "string or null"\n'
+        '  },\n'
+        '  "line_items": [\n'
+        '    {\n'
+        '      "item_number": "string or null",\n'
+        '      "description": "string or null",\n'
+        '      "quantity": "string or null",\n'
+        '      "unit_price": "string or null",\n'
+        '      "total_price": "string or null",\n'
+        '      "tax_rate": "string or null",\n'
+        '      "sku": "string or null",\n'
+        '      "any_other_line_item_field": "string or null"\n'
+        '    }\n'
+        '  ]\n'
+        '}'
+        "\nReturn ONLY the JSON object, no explanations.\n"
+        "\nInvoice Text:\n"
         f"{txt}"
     )
 def extract_invoice_info(model_choice, text):
     prompt = get_extraction_prompt(model_choice, text)
     raw = query_llm(model_choice, prompt)