Update app.py
Browse files
app.py
CHANGED
|
@@ -29,8 +29,11 @@ def upload_pdf(path):
|
|
| 29 |
# ------------------ Prompt ---------------------
|
| 30 |
def build_prompt():
|
| 31 |
return (
|
| 32 |
-
"
|
| 33 |
-
"
|
|
|
|
|
|
|
|
|
|
| 34 |
"{\n"
|
| 35 |
" \"po_number\": string|null,\n"
|
| 36 |
" \"ship_from\": string|null,\n"
|
|
@@ -38,11 +41,41 @@ def build_prompt():
|
|
| 38 |
" \"rail_car_number\": string|null,\n"
|
| 39 |
" \"total_quantity\": number|null,\n"
|
| 40 |
" \"inventories\": [\n"
|
| 41 |
-
" {\
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
" ],\n"
|
| 43 |
" \"custom_fields\": {}\n"
|
| 44 |
-
"}\n"
|
| 45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
)
|
| 47 |
|
| 48 |
|
|
|
|
| 29 |
# ------------------ Prompt ---------------------
|
| 30 |
def build_prompt():
|
| 31 |
return (
|
| 32 |
+
"You are an advanced extraction system for logistics, rail, lumber, shipping, trucking, "
|
| 33 |
+
"inventory, and packing documents. Your task is to read the attached PDF or image and "
|
| 34 |
+
"extract ONLY the information that is explicitly present.\n\n"
|
| 35 |
+
|
| 36 |
+
"You must return STRICT JSON in the EXACT structure below:\n"
|
| 37 |
"{\n"
|
| 38 |
" \"po_number\": string|null,\n"
|
| 39 |
" \"ship_from\": string|null,\n"
|
|
|
|
| 41 |
" \"rail_car_number\": string|null,\n"
|
| 42 |
" \"total_quantity\": number|null,\n"
|
| 43 |
" \"inventories\": [\n"
|
| 44 |
+
" {\n"
|
| 45 |
+
" \"productName\": string,\n"
|
| 46 |
+
" \"productCode\": string|null,\n"
|
| 47 |
+
" \"pcs\": number|null,\n"
|
| 48 |
+
" \"dimensions\": string|null\n"
|
| 49 |
+
" }\n"
|
| 50 |
" ],\n"
|
| 51 |
" \"custom_fields\": {}\n"
|
| 52 |
+
"}\n\n"
|
| 53 |
+
|
| 54 |
+
"YOUR RULES (MUST FOLLOW EXACTLY):\n"
|
| 55 |
+
"1. Do NOT guess or hallucinate. Only extract values explicitly shown in the document.\n"
|
| 56 |
+
"2. If a field is not present or cannot be confirmed → output null.\n"
|
| 57 |
+
"3. PO Number may appear under wording like 'PO', 'Purchase Order', 'P.O.', 'Customer PO', etc.\n"
|
| 58 |
+
"4. Ship From may appear as 'Origin', 'From', 'Exporter', 'Ship From', 'Supplier', etc.\n"
|
| 59 |
+
"5. Carrier Type may appear as 'Carrier', 'Carrier Type', 'Routing', 'Mode', 'Transport Type', "
|
| 60 |
+
"'RAIL', 'TRUCK', 'CN', 'BNSF', 'CP', 'Truckload', etc.\n"
|
| 61 |
+
"6. Rail Car Number may appear as 'Railcar', 'Rail Car #', 'Car Number', 'Car #', etc.\n"
|
| 62 |
+
"7. Total Quantity must be ONLY the explicit total PCS/pieces count if it appears. "
|
| 63 |
+
"If the only total shown is FBM/weight/volume → DO NOT treat that as quantity.\n"
|
| 64 |
+
"8. Inventories must capture every unique product line that appears. Extract product name, "
|
| 65 |
+
"item description, dimensions like '2x4', '23/32', and PCS when available.\n"
|
| 66 |
+
"9. Dimensions may appear as '2 X 4', '2x6', '48x96', '23/32', etc. Normalize to a single "
|
| 67 |
+
"string representation.\n"
|
| 68 |
+
"10. custom_fields must contain ANY additional fields not part of the main schema (dates, mills, "
|
| 69 |
+
"FBM, weights, routing codes, package counts, etc.). Key names must be lower_snake_case.\n"
|
| 70 |
+
"11. JSON MUST be valid, must not include comments, and must not include text outside the JSON object.\n\n"
|
| 71 |
+
|
| 72 |
+
"ADDITIONAL RULES FOR COMPLEX TABLES:\n"
|
| 73 |
+
"- If multiple product variants exist, create multiple inventory objects.\n"
|
| 74 |
+
"- If tables list PCS per package × number of packages, you MAY compute total PCS.\n"
|
| 75 |
+
"- Never compute derived values unless the math is explicitly possible.\n"
|
| 76 |
+
"- If a value is ambiguous, set it to null.\n\n"
|
| 77 |
+
|
| 78 |
+
"Final requirement: Return ONLY the JSON object. No explanation, no markdown.\n"
|
| 79 |
)
|
| 80 |
|
| 81 |
|