mlbench123 commited on
Commit
5af3434
·
verified ·
1 Parent(s): aa6d0d8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -65
app.py CHANGED
@@ -11,106 +11,98 @@ MODEL = "gpt-5.1"
11
 
12
  client = OpenAI(api_key=API_KEY)
13
 
14
-
15
  def upload_pdf(path):
16
- f = client.files.create(
17
- file=open(path, "rb"),
18
- purpose="assistants"
19
- )
20
- return f.id
21
-
22
 
23
- def build_prompt():
24
  return (
25
- "Extract structured JSON from this logistics shipping document. "
26
- "Return ONLY valid JSON.\n\n"
27
  "{\n"
28
  " \"po_number\": string|null,\n"
29
- " \"ship_from\": string|null,\n"
 
30
  " \"carrier_type\": string|null,\n"
31
  " \"rail_car_number\": string|null,\n"
32
  " \"total_quantity\": number|null,\n"
33
  " \"inventories\": [\n"
34
- " {\n"
35
- " \"productName\": string,\n"
36
- " \"productCode\": string|null,\n"
37
- " \"variants\": [\n"
38
- " {\n"
39
- " \"dimensions\": string|null,\n"
40
- " \"pcs_per_pkg\": number|null,\n"
41
- " \"length_ft\": number|null,\n"
42
- " \"width\": number|null,\n"
43
- " \"packages\": number|null,\n"
44
- " \"pieces\": number|null,\n"
45
- " \"fbm\": number|null\n"
46
- " }\n"
47
- " ],\n"
48
- " \"total_pcs\": number|null,\n"
49
- " \"total_fbm\": number|null\n"
50
- " }\n"
51
  " ],\n"
52
  " \"custom_fields\": {}\n"
53
  "}\n\n"
54
-
55
- "SHIP_FROM EXTRACTION RULES (MANDATORY):\n"
56
- "1. If document contains explicit Origin/Ship From labels, extract that value.\n"
57
- "2. If it is an email-based inbound notice and no explicit origin exists, use the email 'From:' field.\n"
58
- "3. If both Origin and Mill exist, use Origin.\n"
59
- "4. If only Mill exists and is clearly the shipping point, use Mill.\n"
60
- "5. Priority: Origin Email From Mill Sender block.\n"
61
- "6. If nothing matches, ship_from = null.\n\n"
62
-
63
- "Inventory rules:\n"
64
- "- Never merge different lengths.\n"
65
- "- Extract packages, pcs_per_pkg, pieces, fbm EXACTLY.\n"
66
- "- total_pcs = sum of variant pieces.\n"
67
- "- total_fbm = sum of variant fbm.\n\n"
68
-
69
- "total_quantity rules:\n"
70
- "- Use the explicit total if shown.\n"
71
- "- If not explicitly shown, do not infer.\n\n"
72
-
 
 
 
 
 
 
73
  "Return ONLY the JSON."
74
  )
75
 
76
-
77
  def extract(file):
78
  path = Path(file.name)
79
  suffix = path.suffix.lower()
80
 
81
  if suffix == ".pdf":
82
  fid = upload_pdf(path)
83
- msg = [
84
- {"type": "text", "text": build_prompt()},
85
  {"type": "file", "file": {"file_id": fid}}
86
  ]
87
  else:
88
  b64 = base64.b64encode(path.read_bytes()).decode()
89
- msg = [
90
- {"type": "text", "text": build_prompt()},
91
- {
92
- "type": "image_url",
93
- "image_url": {"url": f"data:image/{suffix[1:]};base64,{b64}"}
94
- }
95
  ]
96
 
97
  r = client.chat.completions.create(
98
  model=MODEL,
99
- messages=[{"role": "user", "content": msg}]
100
  )
101
 
102
- txt = r.choices[0].message.content
103
- s = txt.find("{")
104
- e = txt.rfind("}")
105
- return txt[s:e+1]
106
-
107
 
108
  def ui(file):
109
  return extract(file)
110
 
111
-
112
- # EXAMPLES must be ONLY filenames,
113
- # and these files must exist in the root directory of the Space.
114
  examples = [
115
  "IMG_0001.jpg",
116
  "IMG_0002.jpg"
 
11
 
12
  client = OpenAI(api_key=API_KEY)
13
 
 
14
  def upload_pdf(path):
15
+ return client.files.create(file=open(path, "rb"), purpose="assistants").id
 
 
 
 
 
16
 
17
+ def prompt():
18
  return (
19
+ "Extract structured JSON from the attached logistics document. Return ONLY valid JSON.\n"
 
20
  "{\n"
21
  " \"po_number\": string|null,\n"
22
+ " \"ship_from_name\": string|null,\n"
23
+ " \"ship_from_email\": string|null,\n"
24
  " \"carrier_type\": string|null,\n"
25
  " \"rail_car_number\": string|null,\n"
26
  " \"total_quantity\": number|null,\n"
27
  " \"inventories\": [\n"
28
+ " {\n"
29
+ " \"productName\": string|null,\n"
30
+ " \"productCode\": string|null,\n"
31
+ " \"variants\": [\n"
32
+ " {\n"
33
+ " \"dimensions\": string|null,\n"
34
+ " \"pcs_per_pkg\": number|null,\n"
35
+ " \"length_ft\": number|null,\n"
36
+ " \"width\": number|null,\n"
37
+ " \"packages\": number|null,\n"
38
+ " \"pieces\": number|null,\n"
39
+ " \"fbm\": number|string|null\n"
40
+ " }\n"
41
+ " ],\n"
42
+ " \"total_pcs\": number|null,\n"
43
+ " \"total_fbm\": number|string|null\n"
44
+ " }\n"
45
  " ],\n"
46
  " \"custom_fields\": {}\n"
47
  "}\n\n"
48
+ "SHIP FROM RULES:\n"
49
+ "- If explicit fields like 'Origin', 'Ship From' exist, extract that value.\n"
50
+ "- If the document is an email-style inbound notice (header block) and shows:\n"
51
+ " From: Name <email>\n"
52
+ " then ship_from_name = Name, ship_from_email = email.\n"
53
+ "- If only an email exists and no human name, set both fields to that email.\n"
54
+ "- If both Origin and an email sender exist, use Origin for ship_from_name and still capture the email under ship_from_email.\n"
55
+ "- Priority: Origin Email Name → Mill → Sender block → null.\n\n"
56
+ "CARRIER / EQUIPMENT RULE:\n"
57
+ "- If the table contains:\n"
58
+ " Equipment id = <value>\n"
59
+ " Mark = <value>\n"
60
+ " then ALWAYS treat 'Equipment id' as the railcar number.\n"
61
+ "- NEVER use 'Mark' as railcar number.\n"
62
+ "- Carrier type must match the carrier text exactly (e.g., CHICAGO RAIL LINK).\n\n"
63
+ "INVENTORY RULES:\n"
64
+ "- Do not merge length groups. Each unique length or dimension is its own variant.\n"
65
+ "- Extract pcs_per_pkg, packages, pieces, fbm exactly as written.\n"
66
+ "- total_pcs = sum of pieces.\n"
67
+ "- total_fbm = sum of fbm.\n\n"
68
+ "TOTAL QUANTITY RULE:\n"
69
+ "- Use explicit totals if they appear.\n"
70
+ "- If no explicit total quantity appears, leave null.\n\n"
71
+ "CUSTOM FIELDS RULE:\n"
72
+ "- Capture all meaningful leftover fields not part of main schema.\n\n"
73
  "Return ONLY the JSON."
74
  )
75
 
 
76
  def extract(file):
77
  path = Path(file.name)
78
  suffix = path.suffix.lower()
79
 
80
  if suffix == ".pdf":
81
  fid = upload_pdf(path)
82
+ content = [
83
+ {"type": "text", "text": prompt()},
84
  {"type": "file", "file": {"file_id": fid}}
85
  ]
86
  else:
87
  b64 = base64.b64encode(path.read_bytes()).decode()
88
+ content = [
89
+ {"type": "text", "text": prompt()},
90
+ {"type": "image_url", "image_url": {"url": f"data:image/{suffix[1:]};base64,{b64}"}}
 
 
 
91
  ]
92
 
93
  r = client.chat.completions.create(
94
  model=MODEL,
95
+ messages=[{"role": "user", "content": content}]
96
  )
97
 
98
+ text = r.choices[0].message.content
99
+ s = text.find("{")
100
+ e = text.rfind("}")
101
+ return text[s:e+1]
 
102
 
103
  def ui(file):
104
  return extract(file)
105
 
 
 
 
106
  examples = [
107
  "IMG_0001.jpg",
108
  "IMG_0002.jpg"