Spaces:
Paused
Paused
Update app.py
Browse filesusing prompt, skip pdf
app.py
CHANGED
|
@@ -24,55 +24,60 @@ except AttributeError:
|
|
| 24 |
RESAMPLE = Image.LANCZOS
|
| 25 |
|
| 26 |
PROMPT_FREIGHT_JSON = """
|
| 27 |
-
Please analyze the freight rate
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
"
|
| 52 |
-
"
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
},
|
| 61 |
-
"
|
| 62 |
-
}
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
|
|
|
| 76 |
### Date rules
|
| 77 |
- valid_from format:
|
| 78 |
- `DD/MM/YYYY` (if full date)
|
|
@@ -82,20 +87,22 @@ Please analyze the freight rate table in the file I provide and convert it into
|
|
| 82 |
- valid_to:
|
| 83 |
- exact `DD/MM/YYYY` if present
|
| 84 |
- else `UFN`
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
-
|
|
|
|
| 88 |
- If the table shows "RQ" or similar, set value as "RQST".
|
| 89 |
- Group same-price destinations into one record separated by "/".
|
| 90 |
- Always use IATA code for origin and destination.
|
| 91 |
- Flight number (e.g. ZH118) is not charge code.
|
| 92 |
- Frequency: D[1-7]; 'Daily' = D1234567. Join multiple (e.g. D3,D4→D34).
|
| 93 |
- If local charges exist, list them.
|
| 94 |
-
- If validity missing, set null.
|
| 95 |
- Direction: Export if origin is Vietnam (SGN, HAN, DAD...), else Import.
|
| 96 |
- Provide short plain English reasons for "shipping_line_reason" & "charge_code_reason".
|
| 97 |
- Replace commas in remarks with semicolons.
|
| 98 |
-
- Only return JSON.
|
|
|
|
| 99 |
"""
|
| 100 |
|
| 101 |
# ================== HELPERS ==================
|
|
@@ -246,7 +253,7 @@ def run_process(file, question, model_choice, temperature, top_p, external_api_u
|
|
| 246 |
check_result = check_pdf_structure(file_bytes)
|
| 247 |
print(f"[PDF Check] {filename}: {check_result}")
|
| 248 |
|
| 249 |
-
if check_result == "có":
|
| 250 |
try:
|
| 251 |
print("➡️ PDF có nhiều cột/nhiều trang → dùng pdfplumber extract trước rồi Gemini.")
|
| 252 |
all_dfs = []
|
|
|
|
| 24 |
RESAMPLE = Image.LANCZOS
|
| 25 |
|
| 26 |
PROMPT_FREIGHT_JSON = """
|
| 27 |
+
Please analyze the freight rate tables in the file I provide. This file may contain **multiple airlines' tariffs**. Your task is to extract **one JSON object per airline**, using the following schema:
|
| 28 |
+
|
| 29 |
+
[
|
| 30 |
+
{
|
| 31 |
+
"shipping_line": "...",
|
| 32 |
+
"shipping_line_code": "...",
|
| 33 |
+
"shipping_line_reason": "Why this carrier is chosen?",
|
| 34 |
+
"fee_type": "Air Freight",
|
| 35 |
+
"valid_from": ...,
|
| 36 |
+
"valid_to": ...,
|
| 37 |
+
"charges": [
|
| 38 |
+
{
|
| 39 |
+
"frequency": "...",
|
| 40 |
+
"package_type": "...",
|
| 41 |
+
"aircraft_type": "...",
|
| 42 |
+
"direction": "Export or Import or null",
|
| 43 |
+
"origin": "...",
|
| 44 |
+
"destination": "...",
|
| 45 |
+
"charge_name": "...",
|
| 46 |
+
"charge_code": "...",
|
| 47 |
+
"charge_code_reason": "...",
|
| 48 |
+
"cargo_type": "...",
|
| 49 |
+
"currency": "...",
|
| 50 |
+
"transit": "...",
|
| 51 |
+
"transit_time": "...",
|
| 52 |
+
"weight_breaks": {
|
| 53 |
+
"M": ...,
|
| 54 |
+
"N": ...,
|
| 55 |
+
"+45kg": ...,
|
| 56 |
+
"+100kg": ...,
|
| 57 |
+
"+300kg": ...,
|
| 58 |
+
"+500kg": ...,
|
| 59 |
+
"+1000kg": ...,
|
| 60 |
+
"other": {
|
| 61 |
+
key: value
|
| 62 |
+
},
|
| 63 |
+
"weight_breaks_reason":"Why chosen weight_breaks?"
|
| 64 |
},
|
| 65 |
+
"remark": "..."
|
| 66 |
+
}
|
| 67 |
+
],
|
| 68 |
+
"local_charges": [
|
| 69 |
+
{
|
| 70 |
+
"charge_name": "...",
|
| 71 |
+
"charge_code": "...",
|
| 72 |
+
"unit": "...",
|
| 73 |
+
"amount": ...,
|
| 74 |
+
"remark": "..."
|
| 75 |
+
}
|
| 76 |
+
]
|
| 77 |
+
},
|
| 78 |
+
...
|
| 79 |
+
]
|
| 80 |
+
|
| 81 |
### Date rules
|
| 82 |
- valid_from format:
|
| 83 |
- `DD/MM/YYYY` (if full date)
|
|
|
|
| 87 |
- valid_to:
|
| 88 |
- exact `DD/MM/YYYY` if present
|
| 89 |
- else `UFN`
|
| 90 |
+
|
| 91 |
+
### STRICT RULES:
|
| 92 |
+
- Return a JSON **array** of airline objects (not just one).
|
| 93 |
+
- All rates must exactly match the corresponding weight break columns (M,N,45kg, 100kg, 300kg, 500kg, 1000kg, etc.). Set null if N/A. No assumptions or interpolations.
|
| 94 |
- If the table shows "RQ" or similar, set value as "RQST".
|
| 95 |
- Group same-price destinations into one record separated by "/".
|
| 96 |
- Always use IATA code for origin and destination.
|
| 97 |
- Flight number (e.g. ZH118) is not charge code.
|
| 98 |
- Frequency: D[1-7]; 'Daily' = D1234567. Join multiple (e.g. D3,D4→D34).
|
| 99 |
- If local charges exist, list them.
|
| 100 |
+
- If validity is missing, set null.
|
| 101 |
- Direction: Export if origin is Vietnam (SGN, HAN, DAD...), else Import.
|
| 102 |
- Provide short plain English reasons for "shipping_line_reason" & "charge_code_reason".
|
| 103 |
- Replace commas in remarks with semicolons.
|
| 104 |
+
- **Only return valid JSON. No text explanation. No markdown.**
|
| 105 |
+
|
| 106 |
"""
|
| 107 |
|
| 108 |
# ================== HELPERS ==================
|
|
|
|
| 253 |
check_result = check_pdf_structure(file_bytes)
|
| 254 |
print(f"[PDF Check] {filename}: {check_result}")
|
| 255 |
|
| 256 |
+
if check_result == "có" and 1=2: # bỏ qua if này test thử prompt nhiều hãng
|
| 257 |
try:
|
| 258 |
print("➡️ PDF có nhiều cột/nhiều trang → dùng pdfplumber extract trước rồi Gemini.")
|
| 259 |
all_dfs = []
|