vithacocf commited on
Commit
775fa37
·
verified ·
1 Parent(s): 3fd0688

Update app.py

Browse files

using prompt, skip pdf

Files changed (1) hide show
  1. app.py +61 -54
app.py CHANGED
@@ -24,55 +24,60 @@ except AttributeError:
24
  RESAMPLE = Image.LANCZOS
25
 
26
  PROMPT_FREIGHT_JSON = """
27
- Please analyze the freight rate table in the file I provide and convert it into JSON in the following structure:
28
- {
29
- "shipping_line": "...",
30
- "shipping_line_code": "...",
31
- "shipping_line_reason": "Why this carrier is chosen?",
32
- "fee_type": "Air Freight",
33
- "valid_from": ...,
34
- "valid_to": ...,
35
- "charges": [
36
- {
37
- "frequency": "...",
38
- "package_type": "...",
39
- "aircraft_type": "...",
40
- "direction": "Export or Import or null",
41
- "origin": "...",
42
- "destination": "...",
43
- "charge_name": "...",
44
- "charge_code": "...",
45
- "charge_code_reason": "...",
46
- "cargo_type": "...",
47
- "currency": "...",
48
- "transit": "...",
49
- "transit_time": "...",
50
- "weight_breaks": {
51
- "M": ...,
52
- "N": ...,
53
- "+45kg": ...,
54
- "+100kg": ...,
55
- "+300kg": ...,
56
- "+500kg": ...,
57
- "+1000kg": ...,
58
- "other": {
59
- key: value
 
 
 
 
60
  },
61
- "weight_breaks_reason":"Why chosen weight_breaks?"
62
- },
63
- "remark": "..."
64
- }
65
- ],
66
- "local_charges": [
67
- {
68
- "charge_name": "...",
69
- "charge_code": "...",
70
- "unit": "...",
71
- "amount": ...,
72
- "remark": "..."
73
- }
74
- ]
75
- }
 
76
  ### Date rules
77
  - valid_from format:
78
  - `DD/MM/YYYY` (if full date)
@@ -82,20 +87,22 @@ Please analyze the freight rate table in the file I provide and convert it into
82
  - valid_to:
83
  - exact `DD/MM/YYYY` if present
84
  - else `UFN`
85
- STRICT RULES:
86
- - ONLY return a single JSON object as specified above.
87
- - All rates must exactly match the corresponding weight break columns (M,N,45kg, 100kg, 300kg, 500kg, 1000kg, etc.). set null if N/A. No assumptions or interpolations.
 
88
  - If the table shows "RQ" or similar, set value as "RQST".
89
  - Group same-price destinations into one record separated by "/".
90
  - Always use IATA code for origin and destination.
91
  - Flight number (e.g. ZH118) is not charge code.
92
  - Frequency: D[1-7]; 'Daily' = D1234567. Join multiple (e.g. D3,D4→D34).
93
  - If local charges exist, list them.
94
- - If validity missing, set null.
95
  - Direction: Export if origin is Vietnam (SGN, HAN, DAD...), else Import.
96
  - Provide short plain English reasons for "shipping_line_reason" & "charge_code_reason".
97
  - Replace commas in remarks with semicolons.
98
- - Only return JSON.
 
99
  """
100
 
101
  # ================== HELPERS ==================
@@ -246,7 +253,7 @@ def run_process(file, question, model_choice, temperature, top_p, external_api_u
246
  check_result = check_pdf_structure(file_bytes)
247
  print(f"[PDF Check] {filename}: {check_result}")
248
 
249
- if check_result == "có":
250
  try:
251
  print("➡️ PDF có nhiều cột/nhiều trang → dùng pdfplumber extract trước rồi Gemini.")
252
  all_dfs = []
 
24
  RESAMPLE = Image.LANCZOS
25
 
26
  PROMPT_FREIGHT_JSON = """
27
+ Please analyze the freight rate tables in the file I provide. This file may contain **multiple airlines' tariffs**. Your task is to extract **one JSON object per airline**, using the following schema:
28
+
29
+ [
30
+ {
31
+ "shipping_line": "...",
32
+ "shipping_line_code": "...",
33
+ "shipping_line_reason": "Why this carrier is chosen?",
34
+ "fee_type": "Air Freight",
35
+ "valid_from": ...,
36
+ "valid_to": ...,
37
+ "charges": [
38
+ {
39
+ "frequency": "...",
40
+ "package_type": "...",
41
+ "aircraft_type": "...",
42
+ "direction": "Export or Import or null",
43
+ "origin": "...",
44
+ "destination": "...",
45
+ "charge_name": "...",
46
+ "charge_code": "...",
47
+ "charge_code_reason": "...",
48
+ "cargo_type": "...",
49
+ "currency": "...",
50
+ "transit": "...",
51
+ "transit_time": "...",
52
+ "weight_breaks": {
53
+ "M": ...,
54
+ "N": ...,
55
+ "+45kg": ...,
56
+ "+100kg": ...,
57
+ "+300kg": ...,
58
+ "+500kg": ...,
59
+ "+1000kg": ...,
60
+ "other": {
61
+ key: value
62
+ },
63
+ "weight_breaks_reason":"Why chosen weight_breaks?"
64
  },
65
+ "remark": "..."
66
+ }
67
+ ],
68
+ "local_charges": [
69
+ {
70
+ "charge_name": "...",
71
+ "charge_code": "...",
72
+ "unit": "...",
73
+ "amount": ...,
74
+ "remark": "..."
75
+ }
76
+ ]
77
+ },
78
+ ...
79
+ ]
80
+
81
  ### Date rules
82
  - valid_from format:
83
  - `DD/MM/YYYY` (if full date)
 
87
  - valid_to:
88
  - exact `DD/MM/YYYY` if present
89
  - else `UFN`
90
+
91
+ ### STRICT RULES:
92
+ - Return a JSON **array** of airline objects (not just one).
93
+ - All rates must exactly match the corresponding weight break columns (M,N,45kg, 100kg, 300kg, 500kg, 1000kg, etc.). Set null if N/A. No assumptions or interpolations.
94
  - If the table shows "RQ" or similar, set value as "RQST".
95
  - Group same-price destinations into one record separated by "/".
96
  - Always use IATA code for origin and destination.
97
  - Flight number (e.g. ZH118) is not charge code.
98
  - Frequency: D[1-7]; 'Daily' = D1234567. Join multiple (e.g. D3,D4→D34).
99
  - If local charges exist, list them.
100
+ - If validity is missing, set null.
101
  - Direction: Export if origin is Vietnam (SGN, HAN, DAD...), else Import.
102
  - Provide short plain English reasons for "shipping_line_reason" & "charge_code_reason".
103
  - Replace commas in remarks with semicolons.
104
+ - **Only return valid JSON. No text explanation. No markdown.**
105
+
106
  """
107
 
108
  # ================== HELPERS ==================
 
253
  check_result = check_pdf_structure(file_bytes)
254
  print(f"[PDF Check] {filename}: {check_result}")
255
 
256
+ if check_result == "có" and 1=2: # bỏ qua if này test thử prompt nhiều hãng
257
  try:
258
  print("➡️ PDF có nhiều cột/nhiều trang → dùng pdfplumber extract trước rồi Gemini.")
259
  all_dfs = []