credent007 commited on
Commit
acf34d2
·
verified ·
1 Parent(s): ea7b386

Update prompt1.py

Browse files
Files changed (1) hide show
  1. prompt1.py +18 -2
prompt1.py CHANGED
@@ -1,6 +1,6 @@
1
  def get_part_classifier_prompt():
2
  return """
3
- Look at this customs bill page and identify which PART it belongs to.
4
  Return ONLY one of these exact values:
5
  PART-1
6
  PART-2
@@ -26,6 +26,22 @@ You convert unstructured text into a structured JSON object. OUTPUT ONLY VALID J
26
  """,
27
  "PART-6": """You convert unstructured text into a structured JSON object. OUTPUT ONLY VALID JSON. NO extra words. NO explanation. NO commentary. GOAL - Extract required data points as key–value pairs. - Organize data into top-level sections only. - Each section contains either:   (a) a flat object of key–value pairs, or     (b) an array of flat objects for repeated groups. - Depth limit = 2 (root → section → object/array of objects).     Never nest sections inside other sections. NAMING RULES - Use the exact section names and key names defined below. - Keys MUST NOT contain the dot character ".". - Maintain human-readable, normalized whitespace in key names. - Never rename keys, never add extra keys. DATA TYPING - Use JSON native types:   - Numbers as numbers.   - IDs or codes with leading zeros as strings.   - Dates as "YYYY-MM-DD" if clearly parseable; otherwise keep original text.   - Times as "HH:MM" 24-hour if clear. - If a value exists but is unreadable, return "" (not null). - NEVER infer any missing value. NOTIFICATION NUMBER RULE (IMPORTANT) For “CB Code” fields (e.g., "CB Code" inside PART-1 - BILL OF ENTRY SUMMARY) - Valid examples:"CB Code" =  "AAACF2350DCH006" For all “Notn No” fields (e.g., "IGST","G. CESS" inside Item Duty): - Valid examples:"IGST" =  "021/2023", "G CESS" = "001/2017". - Return "IGST" and "G CESS" can not be null or Empty For all “Notn SNo” fields (e.g., "IGST", "G CESS" inside Item Duty): - Valid examples:"IGST" =  "1" or "III70", "G CESS" = "56". For all “Rate” fields (e.g., BCD, SWS inside Item Duty): - Valid examples:BCD =  "7.5", SWS = "10". For all “Notn No” fields (e.g., CAIDC inside Other Duties): - Valid examples:CAIDC =  "011/2021". For all “Notn SNo” fields (e.g., CAIDC inside Other Duties): - Valid examples:CAIDC =  "18". For all “Amount” fields (e.g., CAIDC inside Other Duties): - Valid examples:CAIDC =  "0". - Return Exact value or can be 0 or Empty For all “Duty Fg” fields (e.g., CAIDC inside Other Duties): - Valid examples:CAIDC =  "4437540". - Return Exact value which can not be null or empty For all “INVSNO” and “ITEMSN” (inside Part - III - DUTIES) - Full form: “INVSNO” = "Invoice Serial number", “ITEMSN” = "Item Serial Number" - Return “INVSNO” and “ITEMSN” which can not be null or empty Return EXACTLY as shown in the document. REPETITIONS & TABLES - If multiple rows exist, return an array with one object per row. - Each object must remain FLAT. - Do not nest objects deeper than allowed. - Keep column order exactly as defined. MANDATORY BEHAVIOR - Detect which PART the document belongs to. - Output ONLY the JSON object for that PART. - If data for a required field is missing, return "". ============================================= PART-1 — BILL OF ENTRY SUMMARY ============================================= If the document corresponds to PART-1, output: {   "PART-1 - BILL OF ENTRY SUMMARY": {     "Port Code": "",     "BE NO": "",     "BE Date": "",     "BE TYPE": "",     "IEC/Br": "",     "CB Code": "",     "Mode": "",     "DEF BE": "",     "ASSESS": "",     "EXAM": "",     "PROV/FINAL": "",     "COUNTRY OF ORIGIN": "",     "PORT OF LOADING": "",     "PORT OF SHIPMENT": "",     "IMPORTER NAME & ADDRESS": "",     "AD CODE": "",     "CB NAME": "",     "BCD": "",     "SWS": "",     "CVD":"",     "IGST": "",     "TOT ASS VALUE": "",     "TOTAL DUTY": "",     "INT": "",     "PNLTY": "",     "FINE": "",     "TOT AMOUNT": "",     "Submission": "",     "ASSESSMENT": "",     "EXCHNAGE RATE": "",     "OOC NO": "",     "OOC DATE": ""   },   "MANIFEST DETAILS": [     {       "IGM NO": "",       "IGM DATE": "",       "INW DATE": "",       "MAWB NO": "",       "DATE": "",       "HAWB NO": "",       "HAWB DATE": "",       "PKG": "",       "GW": ""     }   ],   "BOND DETAILS": [     {       "BOND NO": "",       "PORT": "",       "BOND CD": "",       "DEBT AMT": ""     }   ],   "Payment Details": [     {       "SR NO": "",       "CHALLAN NO": "",       "PAID ON": "",       "AMOUNT(RS)": ""     }   ],   "CONTAINER DETAILS": [     {       "S NO": "",       "LCL/FCL": "",       "CONTAINER NUMBER": ""     }   ],   "INVOICE DETAILS-SUMMERY": [     {         "S NO":"",         "INVOICE NO":"",         "INV AMT": "",         "CUR":""     }   ] } ============================================ PART-II — INVOICE & VALUATION DETAILS ============================================ {     "PART -II - INVOICE & VALUATION DETAILS": {},          "Details": {                 "SUPPLIER NAME & ADDRESS": "",                 "FREIGHT": "",                 "INSURANCE": "",                 "LOADING": "",                 "COMMN": "",                 "VALUATION METHOD": "",                 "Cur": "",                 "Term": "",                 "REL TD": "",                 "SVB CH": "",                 "SVB NO": "",                 "DATE": ""                 },     "INVOICE": [                 {                     "S NO": "",                     "INVOICE No Dt": ""                 }                 ],     "Item Details": [                         {                             "S NO": "",                             "CTH": "",                             "DESCRIPTION": "",                             "UNIT PRICE": "",                             "QUANTITY": "",                             "UQC": "",                             "AMOUNT": ""                         }                     ]    } ==================================== PART-III — DUTIES ==================================== ✔ Item Duty section FIXED, simplified, corrected   ✔ Each table is ONE row   ✔ Field order fixed   ✔ Dot-free keys   ✔ No duplication {   "Part - III - DUTIES": {},     "Item Details": [                         {                             "INVSNO": "",                             "ITEMSN": "",                             "CTH": "",                             "ITEM DESCRIPTION": "",                             "COO": "",                             "C_QTY": "",                             "C_UQC": "",                             "S_QTY": "",                             "S_UQC": "",                             "SCH": "",                             "END USE": "",                             "ASSESS VALUE": "",                             "TOTAL DUTY": ""                         }                         ],     "Item Duty - Notn No": [                         {                             "INVSNO": "",                             "ITEMSN": "",                             "BCD": "",                             "SWS": "",                             "IGST": "",                             "G CESS": "",                             "ADD": "",                             "T VALUE": ""                         }                         ],     "Item Duty - Notn SNo": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "BCD": "",                                     "SWS": "",                                     "IGST": "",                                     "G CESS": "",                                     "ADD": "",                                     "T VALUE": ""                                 }                                 ],     "Item Duty - Rate": [                             {                                 "INVSNO": "",                                 "ITEMSN": "",                                 "BCD": "",                                 "SWS": "",                                 "IGST": "",                                 "G CESS": "",           ��                     "ADD": "",                                 "T VALUE": ""                             }                             ],     "Item Duty - Amount": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "BCD": "",                                     "SWS": "",                                     "IGST": "",                                     "G CESS": "",                                     "ADD": "",                                     "T VALUE": ""                                 }                                 ],     "Item Duty - Duty Fg": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "BCD": "",                                     "SWS": "",                                     "IGST": "",                                     "G CESS": "",                                     "ADD": "",                                     "T VALUE": ""                                 }                                 ],     "Other Duty - Notn No": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "CAIDC": ""                                 }                                 ],     "Other Duty - Notn SNo": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "CAIDC": ""                                 }                                 ],     "Other Duty - Rate": [                             {                                 "INVSNO": "",                                 "ITEMSN": "",                                 "CAIDC": ""                             }                             ],     "Other Duty - Amount": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "CAIDC": ""                                 }                                 ],     "Other Duty - Duty Fg": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "CAIDC": ""                                 }                                 ] } ======================================== PART-IV — ADDITIONAL DETAILS ======================================== {   "PART - IV - ADDITIONAL DETAILS": {},   "LICENSE DETAILS": [                         {                             "INVSNO": "",                             "ITMSNO": "",                             "LIC SLNO": "",                             "LIC NO": "",                             "LIC DATE": "",                             "CODE": "",                             "PORT": "",                             "DEBIT VALUE": "",                             "QTY": "",                             "UQC": "",                             "DEBIT DUTY": ""                         }                         ] } ======================================== PART-V — OTHER COMPLIANCES ======================================== {   "PART - V - OTHER COMPLIANCES": {     "EXAMINATION ORDER RMS": "",     "EXAMINATION ORDER": "",     "PGA EXAMINATION INSTRUCTIONS": "",     "EXAMINATION REPORT": "",     "SUPERINTENDENT COMMENTS": ""   } } ========================================== PART-VI — DECLARATIONS ========================================== {   "PART-VI- DECLARATIONS": "None" } ========================================== FINAL RULES ========================================== - Detect document PART.   - Return ONLY that PART’s JSON.   - Output MUST be valid JSON.   - No comments or text outside JSON.   - No dots in keys.   - Do NOT infer values; use "" when missing.
28
  """,
29
- "anything":"""You convert unstructured text into a structured JSON object. OUTPUT ONLY VALID JSON. NO extra words. GOAL - Extract EVERY possible data point as key–value pairs. - Organize data into top-level sections (descriptive headings), each containing ONLY: (a) a flat object of key–value pairs, or (b) an array of flat objects for repeated rows/groups. - Depth limit = 2: root → section → (object | array of objects). Do NOT nest a section inside another section. NAMING & CHAR RULES - Use descriptive section names from the document (e.g., "Data", "Vendor Details", "Measurement Data", "Totals"). - Keys MUST NOT contain the "." (dot) character. (Decimal points in numeric VALUES are allowed.) - Normalize whitespace; keep keys human-readable. Avoid cryptic keys. DATA TYPING & FORMATS - Use JSON native types: - Numbers as numbers when unambiguous (e.g., totals, quantities, measurements). - IDs with leading zeros (invoice numbers, codes) as strings. - Dates as "YYYY-MM-DD" when you can parse confidently; otherwise keep original string. - Times as "HH:MM" 24-hour when clear; otherwise keep original string. - If a value exists but is unreadable/blank, use an empty string "" (not null). REPETITIONS & TABLES - If a field or group repeats (e.g., multiple line items, measurements, addresses), represent it as an array of objects within the appropriate section. - For tabular columns whose headers are integers or numeric-like (e.g., sizes 36, 38, 40…), order them ascending and prefix with a stable positional index to preserve column order: Example keys in a row object: "1 Abbr", "2 Measurement", "3 Tol -", "4 Tol +", "5 36", "6 38", "7 40", ... - Keep each row FLAT (no nested objects inside a row). COMPLETENESS - Include ALL values found (integers, dates, times, totals, references, codes, addresses, etc.). - Do NOT omit duplicates; use arrays for multiple occurrences. STRUCTURE TO FOLLOW (TEMPLATE) { "Section Name A": { "key1": value, "key2": value }, "Section Name B": [ { "key1": value, "key2": value }, { "key1": value, "key2": value } ], "Section Name C": { "key1": value } } EXAMPLE (illustrative only) { "Data": { "invoice number": "123", "date": "2000-01-01" }, "Measurement Data": [ { "1 Abbr": "", "2 Measurement": "", "3 Tol -": "", "4 Tol +": "", "5 36": "", "6 38": "", "7 40": "", "8 42": "", "9 44": "", "10 46": "", "11 48": "", "12 Comment": "" } ], "Totals": { "Grand Total": 12740.00 } } STRICTLY ENFORCE - Output is a single JSON object. - No text outside JSON. - No sections nested inside other sections. - No dot characters in KEY NAMES anywhere in the JSON. """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  }
31
  return prompts[part_name]
 
1
  def get_part_classifier_prompt():
2
  return """
3
+ Look at this customs bill page and identify which PART it belongs to if no part there return .
4
  Return ONLY one of these exact values:
5
  PART-1
6
  PART-2
 
26
  """,
27
  "PART-6": """You convert unstructured text into a structured JSON object. OUTPUT ONLY VALID JSON. NO extra words. NO explanation. NO commentary. GOAL - Extract required data points as key–value pairs. - Organize data into top-level sections only. - Each section contains either:   (a) a flat object of key–value pairs, or     (b) an array of flat objects for repeated groups. - Depth limit = 2 (root → section → object/array of objects).     Never nest sections inside other sections. NAMING RULES - Use the exact section names and key names defined below. - Keys MUST NOT contain the dot character ".". - Maintain human-readable, normalized whitespace in key names. - Never rename keys, never add extra keys. DATA TYPING - Use JSON native types:   - Numbers as numbers.   - IDs or codes with leading zeros as strings.   - Dates as "YYYY-MM-DD" if clearly parseable; otherwise keep original text.   - Times as "HH:MM" 24-hour if clear. - If a value exists but is unreadable, return "" (not null). - NEVER infer any missing value. NOTIFICATION NUMBER RULE (IMPORTANT) For “CB Code” fields (e.g., "CB Code" inside PART-1 - BILL OF ENTRY SUMMARY) - Valid examples:"CB Code" =  "AAACF2350DCH006" For all “Notn No” fields (e.g., "IGST","G. CESS" inside Item Duty): - Valid examples:"IGST" =  "021/2023", "G CESS" = "001/2017". - Return "IGST" and "G CESS" can not be null or Empty For all “Notn SNo” fields (e.g., "IGST", "G CESS" inside Item Duty): - Valid examples:"IGST" =  "1" or "III70", "G CESS" = "56". For all “Rate” fields (e.g., BCD, SWS inside Item Duty): - Valid examples:BCD =  "7.5", SWS = "10". For all “Notn No” fields (e.g., CAIDC inside Other Duties): - Valid examples:CAIDC =  "011/2021". For all “Notn SNo” fields (e.g., CAIDC inside Other Duties): - Valid examples:CAIDC =  "18". For all “Amount” fields (e.g., CAIDC inside Other Duties): - Valid examples:CAIDC =  "0". - Return Exact value or can be 0 or Empty For all “Duty Fg” fields (e.g., CAIDC inside Other Duties): - Valid examples:CAIDC =  "4437540". - Return Exact value which can not be null or empty For all “INVSNO” and “ITEMSN” (inside Part - III - DUTIES) - Full form: “INVSNO” = "Invoice Serial number", “ITEMSN” = "Item Serial Number" - Return “INVSNO” and “ITEMSN” which can not be null or empty Return EXACTLY as shown in the document. REPETITIONS & TABLES - If multiple rows exist, return an array with one object per row. - Each object must remain FLAT. - Do not nest objects deeper than allowed. - Keep column order exactly as defined. MANDATORY BEHAVIOR - Detect which PART the document belongs to. - Output ONLY the JSON object for that PART. - If data for a required field is missing, return "". ============================================= PART-1 — BILL OF ENTRY SUMMARY ============================================= If the document corresponds to PART-1, output: {   "PART-1 - BILL OF ENTRY SUMMARY": {     "Port Code": "",     "BE NO": "",     "BE Date": "",     "BE TYPE": "",     "IEC/Br": "",     "CB Code": "",     "Mode": "",     "DEF BE": "",     "ASSESS": "",     "EXAM": "",     "PROV/FINAL": "",     "COUNTRY OF ORIGIN": "",     "PORT OF LOADING": "",     "PORT OF SHIPMENT": "",     "IMPORTER NAME & ADDRESS": "",     "AD CODE": "",     "CB NAME": "",     "BCD": "",     "SWS": "",     "CVD":"",     "IGST": "",     "TOT ASS VALUE": "",     "TOTAL DUTY": "",     "INT": "",     "PNLTY": "",     "FINE": "",     "TOT AMOUNT": "",     "Submission": "",     "ASSESSMENT": "",     "EXCHNAGE RATE": "",     "OOC NO": "",     "OOC DATE": ""   },   "MANIFEST DETAILS": [     {       "IGM NO": "",       "IGM DATE": "",       "INW DATE": "",       "MAWB NO": "",       "DATE": "",       "HAWB NO": "",       "HAWB DATE": "",       "PKG": "",       "GW": ""     }   ],   "BOND DETAILS": [     {       "BOND NO": "",       "PORT": "",       "BOND CD": "",       "DEBT AMT": ""     }   ],   "Payment Details": [     {       "SR NO": "",       "CHALLAN NO": "",       "PAID ON": "",       "AMOUNT(RS)": ""     }   ],   "CONTAINER DETAILS": [     {       "S NO": "",       "LCL/FCL": "",       "CONTAINER NUMBER": ""     }   ],   "INVOICE DETAILS-SUMMERY": [     {         "S NO":"",         "INVOICE NO":"",         "INV AMT": "",         "CUR":""     }   ] } ============================================ PART-II — INVOICE & VALUATION DETAILS ============================================ {     "PART -II - INVOICE & VALUATION DETAILS": {},          "Details": {                 "SUPPLIER NAME & ADDRESS": "",                 "FREIGHT": "",                 "INSURANCE": "",                 "LOADING": "",                 "COMMN": "",                 "VALUATION METHOD": "",                 "Cur": "",                 "Term": "",                 "REL TD": "",                 "SVB CH": "",                 "SVB NO": "",                 "DATE": ""                 },     "INVOICE": [                 {                     "S NO": "",                     "INVOICE No Dt": ""                 }                 ],     "Item Details": [                         {                             "S NO": "",                             "CTH": "",                             "DESCRIPTION": "",                             "UNIT PRICE": "",                             "QUANTITY": "",                             "UQC": "",                             "AMOUNT": ""                         }                     ]    } ==================================== PART-III — DUTIES ==================================== ✔ Item Duty section FIXED, simplified, corrected   ✔ Each table is ONE row   ✔ Field order fixed   ✔ Dot-free keys   ✔ No duplication {   "Part - III - DUTIES": {},     "Item Details": [                         {                             "INVSNO": "",                             "ITEMSN": "",                             "CTH": "",                             "ITEM DESCRIPTION": "",                             "COO": "",                             "C_QTY": "",                             "C_UQC": "",                             "S_QTY": "",                             "S_UQC": "",                             "SCH": "",                             "END USE": "",                             "ASSESS VALUE": "",                             "TOTAL DUTY": ""                         }                         ],     "Item Duty - Notn No": [                         {                             "INVSNO": "",                             "ITEMSN": "",                             "BCD": "",                             "SWS": "",                             "IGST": "",                             "G CESS": "",                             "ADD": "",                             "T VALUE": ""                         }                         ],     "Item Duty - Notn SNo": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "BCD": "",                                     "SWS": "",                                     "IGST": "",                                     "G CESS": "",                                     "ADD": "",                                     "T VALUE": ""                                 }                                 ],     "Item Duty - Rate": [                             {                                 "INVSNO": "",                                 "ITEMSN": "",                                 "BCD": "",                                 "SWS": "",                                 "IGST": "",                                 "G CESS": "",           ��                     "ADD": "",                                 "T VALUE": ""                             }                             ],     "Item Duty - Amount": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "BCD": "",                                     "SWS": "",                                     "IGST": "",                                     "G CESS": "",                                     "ADD": "",                                     "T VALUE": ""                                 }                                 ],     "Item Duty - Duty Fg": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "BCD": "",                                     "SWS": "",                                     "IGST": "",                                     "G CESS": "",                                     "ADD": "",                                     "T VALUE": ""                                 }                                 ],     "Other Duty - Notn No": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "CAIDC": ""                                 }                                 ],     "Other Duty - Notn SNo": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "CAIDC": ""                                 }                                 ],     "Other Duty - Rate": [                             {                                 "INVSNO": "",                                 "ITEMSN": "",                                 "CAIDC": ""                             }                             ],     "Other Duty - Amount": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "CAIDC": ""                                 }                                 ],     "Other Duty - Duty Fg": [                                 {                                     "INVSNO": "",                                     "ITEMSN": "",                                     "CAIDC": ""                                 }                                 ] } ======================================== PART-IV — ADDITIONAL DETAILS ======================================== {   "PART - IV - ADDITIONAL DETAILS": {},   "LICENSE DETAILS": [                         {                             "INVSNO": "",                             "ITMSNO": "",                             "LIC SLNO": "",                             "LIC NO": "",                             "LIC DATE": "",                             "CODE": "",                             "PORT": "",                             "DEBIT VALUE": "",                             "QTY": "",                             "UQC": "",                             "DEBIT DUTY": ""                         }                         ] } ======================================== PART-V — OTHER COMPLIANCES ======================================== {   "PART - V - OTHER COMPLIANCES": {     "EXAMINATION ORDER RMS": "",     "EXAMINATION ORDER": "",     "PGA EXAMINATION INSTRUCTIONS": "",     "EXAMINATION REPORT": "",     "SUPERINTENDENT COMMENTS": ""   } } ========================================== PART-VI — DECLARATIONS ========================================== {   "PART-VI- DECLARATIONS": "None" } ========================================== FINAL RULES ========================================== - Detect document PART.   - Return ONLY that PART’s JSON.   - Output MUST be valid JSON.   - No comments or text outside JSON.   - No dots in keys.   - Do NOT infer values; use "" when missing.
28
  """,
29
+ "anything": """
30
+ Extract all visible text from this image.
31
+
32
+ Return ONLY valid JSON in this exact format:
33
+ {
34
+ "FULL_TEXT": ""
35
+ }
36
+
37
+ Rules:
38
+ - Preserve reading order as closely as possible: top-to-bottom, left-to-right.
39
+ - Do not summarize.
40
+ - Do not classify the document.
41
+ - Do not infer missing text.
42
+ - If some text is unreadable, skip it.
43
+ - If nothing is readable, return:
44
+ {"FULL_TEXT": ""}
45
+ """
46
  }
47
  return prompts[part_name]