vithacocf commited on
Commit
2e92701
·
verified ·
1 Parent(s): 952d402

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +189 -126
app.py CHANGED
@@ -1,18 +1,15 @@
1
  from __future__ import annotations
2
- import os, io, re, json, tempfile, mimetypes
3
- from typing import Union, Tuple
4
  from PIL import Image
5
  import pandas as pd
6
  import gradio as gr
7
  import google.generativeai as genai
 
8
  import fitz # PyMuPDF
 
9
  import pdfplumber
10
 
11
- try:
12
- import camelot
13
- except Exception:
14
- camelot = None
15
-
16
  # ================== CONFIG ==================
17
  DEFAULT_API_KEY = "AIzaSyBbK-1P3JD6HPyE3QLhkOps6_-Xo3wUFbs"
18
 
@@ -21,27 +18,28 @@ INTERNAL_MODEL_MAP = {
21
  "Gemini 2.5 Pro": "gemini-2.5-pro",
22
  }
23
  EXTERNAL_MODEL_NAME = "prithivMLmods/Camel-Doc-OCR-062825 (External)"
24
-
25
  PROMPT_FREIGHT_JSON = """
26
- Please analyze the freight rate table and convert it into JSON with this schema:
27
  {
28
  "shipping_line": "...",
29
  "shipping_line_code": "...",
 
30
  "fee_type": "Air Freight",
31
- "valid_from": "...",
32
- "valid_to": "...",
33
  "charges": [
34
  {
35
- "origin": "...",
36
- "destination": "...",
37
  "frequency": "...",
38
  "package_type": "...",
39
  "aircraft_type": "...",
40
- "direction": "...",
 
 
41
  "charge_name": "...",
42
- "charge_code": "GCR, DGR, PER, etc.",
43
- "currency": "...",
44
  "cargo_type": "...",
 
45
  "transit": "...",
46
  "transit_time": "...",
47
  "weight_breaks": {
@@ -52,27 +50,50 @@ Please analyze the freight rate table and convert it into JSON with this schema:
52
  "+300kg": ...,
53
  "+500kg": ...,
54
  "+1000kg": ...,
55
- "other": { key: value }
 
 
 
56
  },
57
- "remark": "...",
58
- "pallet_rule": "...",
59
- "additional_cost": "..."
 
 
 
 
 
 
 
60
  }
61
  ]
62
  }
63
- ### RULES
64
- - If remark says "SKID shipment: add 10 cents" → add surcharge line (+0.10 USD/kg) for Pallet (GEN & PER)
65
- - Adjust all weight breaks (+0.1) keeping other keys the same.
66
- - If remark says "Carton = Pallet" → same rates; no extra surcharge.
67
- - If remark says "EU +USD0.30/kg and rest +USD0.20/kg" add 2 surcharge lines.
68
- - Always record Carton rates as base; generate Pallet rates if mentioned.
 
 
 
 
 
 
 
69
  - Group same-price destinations into one record separated by "/".
70
- - Frequency format: D[1-7]; "Daily" = D1234567.
71
- - Direction = Export if origin is Vietnam, else Import.
 
 
 
 
 
72
  - Replace commas in remarks with semicolons.
73
- - Only return valid JSON.
74
  """
75
 
 
76
  # ================== HELPERS ==================
77
  def _read_file_bytes(upload: Union[str, os.PathLike, dict, object] | None) -> bytes:
78
  if upload is None:
@@ -94,56 +115,61 @@ def _guess_name_and_mime(file, file_bytes: bytes) -> Tuple[str, str]:
94
  mime = "application/pdf"
95
  return filename, mime or "application/octet-stream"
96
 
97
- # ================== PDF TABLE EXTRACT ==================
98
  def extract_pdf_tables(file_path: str) -> pd.DataFrame:
99
- """Dùng Camelot trước, fallback pdfplumber nếu lỗi."""
 
 
 
 
 
 
100
  all_dfs = []
101
- try:
102
- total_pages = len(fitz.open(file_path))
103
- print(f"📄 Tổng số trang: {total_pages}")
104
- except:
105
- total_pages = 1
106
-
107
- if camelot is not None:
108
- for page_no in range(1, total_pages + 1):
109
- print(f"���� Đang xử lý trang {page_no}...")
110
- dfs_this_page = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  try:
112
- tables = camelot.read_pdf(file_path, flavor="lattice", pages=str(page_no), line_scale=40)
 
 
 
113
  if tables and tables.n > 0:
114
  for t in tables:
115
  if t.shape[0] > 0:
116
  dfs_this_page.append(t.df)
117
- print(f"✅ Lattice OK ({tables.n} bảng).")
118
  except Exception as e:
119
- print(f"⚠️ Lattice lỗi: {e}")
120
-
121
- if not dfs_this_page:
122
- try:
123
- tables = camelot.read_pdf(file_path, flavor="stream", pages=str(page_no), edge_tol=200)
124
- if tables and tables.n > 0:
125
- for t in tables:
126
- if t.shape[0] > 0:
127
- dfs_this_page.append(t.df)
128
- print(f"✅ Stream OK ({tables.n} bảng).")
129
- except Exception as e:
130
- print(f"❌ Stream lỗi: {e}")
131
-
132
- if dfs_this_page:
133
- all_dfs.extend(dfs_this_page)
134
 
135
- if not all_dfs:
136
- print("⚠️ Camelot không tìm thấy bảng → fallback pdfplumber.")
137
- with pdfplumber.open(file_path) as pdf:
138
- for page in pdf.pages:
139
- tables = page.extract_tables()
140
- for tb in tables:
141
- if tb and len(tb) > 2:
142
- df = pd.DataFrame(tb[1:], columns=tb[0])
143
- all_dfs.append(df)
144
 
145
  if not all_dfs:
146
- print("🚫 Không phát hiện bảng trong PDF.")
147
  return pd.DataFrame()
148
 
149
  df_final = pd.concat(all_dfs, ignore_index=True)
@@ -154,123 +180,160 @@ def extract_pdf_tables(file_path: str) -> pd.DataFrame:
154
  print(f"✅ Tổng hợp: {len(df_final)} dòng, {len(df_final.columns)} cột.")
155
  return df_final
156
 
157
- # ================== NOTE EXTRACTION ==================
158
  def extract_pdf_note(file_path: str) -> str:
 
 
 
 
159
  try:
160
  with pdfplumber.open(file_path) as pdf:
161
- text = ""
162
- for p in pdf.pages[-2:]: # lấy 2 trang cuối
163
- t = (p.extract_text() or "")
164
- text += "\n" + t
165
- lines = text.strip().splitlines()
166
- note_text = "\n".join(lines[-15:])
167
- print(f"📝 Note Extracted: {len(note_text)} chars")
168
  return note_text
169
  except Exception as e:
170
  print(f"⚠️ extract_pdf_note lỗi: {e}")
171
  return ""
172
 
173
- # ================== GEMINI CALL ==================
174
  def call_gemini_with_prompt(content_text: str, note_text: str, question: str, model_choice: str, temperature: float, top_p: float):
 
175
  api_key = os.environ.get("GOOGLE_API_KEY", DEFAULT_API_KEY)
176
  genai.configure(api_key=api_key)
 
177
  model = genai.GenerativeModel(
178
  model_name=INTERNAL_MODEL_MAP.get(model_choice, "gemini-2.5-flash"),
179
- generation_config={"temperature": float(temperature), "top_p": float(top_p)}
 
 
 
180
  )
 
 
181
  base_prompt = question.strip() if question and question.strip() else PROMPT_FREIGHT_JSON
 
182
  prompt = f"""
183
- {base_prompt}
 
 
 
 
 
 
 
 
 
184
 
185
- Below is the extracted CSV data:
186
- {content_text}
 
187
 
188
- Below are the notes (remark, package type, surcharges, etc.):
189
- {note_text}
190
 
191
- Please analyze everything and generate a valid JSON in the specified format.
192
- """
193
- print("🧠 Sending prompt to Gemini...")
194
- resp = model.generate_content(prompt)
195
- return getattr(resp, "text", str(resp))
196
 
197
- # ================== MAIN PROCESS ==================
198
  def run_process(file, question, model_choice, temperature, top_p, external_api_url):
199
  try:
200
  if file is None:
201
  return "❌ No file uploaded.", None
 
202
  file_bytes = _read_file_bytes(file)
203
  filename, mime = _guess_name_and_mime(file, file_bytes)
204
  print(f"[UPLOAD] {filename} ({mime})")
205
 
206
  if mime == "application/pdf":
 
207
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
208
  tmp.write(file_bytes)
209
  tmp_path = tmp.name
210
 
 
211
  df = extract_pdf_tables(tmp_path)
212
  note_text = extract_pdf_note(tmp_path)
213
 
214
  if not df.empty:
215
- # 🔹 Nếu phát hiện nhiều carrier
216
- carrier_rows = df[df.iloc[:, 0].astype(str).str.contains("CARRIER", case=False, na=False)].index.tolist()
217
- results = []
218
- if carrier_rows:
219
- for i, start in enumerate(carrier_rows):
220
- end = carrier_rows[i + 1] if i + 1 < len(carrier_rows) else len(df)
221
- sub_df = df.iloc[start:end]
222
- csv_text = sub_df.to_csv(index=False)
223
- print(f"🚀 Processing carrier block {i+1}/{len(carrier_rows)}...")
224
- message = call_gemini_with_prompt(csv_text, note_text, question, model_choice, temperature, top_p)
225
- results.append(message)
226
- return "\n\n".join(results), None
227
- else:
228
- csv_text = df.to_csv(index=False)
229
- print("✅ Gửi Gemini để sinh JSON...")
230
- message = call_gemini_with_prompt(csv_text, note_text, question, model_choice, temperature, top_p)
231
- return message, None
232
  else:
233
  print("⚠️ Không có bảng hợp lệ, fallback OCR Gemini.")
234
  return run_process_internal_base_v2(file_bytes, filename, mime, question, model_choice, temperature, top_p)
235
 
236
- # fallback nếu không phải PDF
237
  return run_process_internal_base_v2(file_bytes, filename, mime, question, model_choice, temperature, top_p)
238
 
239
  except Exception as e:
240
  return f"ERROR: {type(e).__name__}: {e}", None
 
 
 
 
 
 
 
 
241
 
242
- # ================== FALLBACK OCR ==================
243
- def pdf_to_images(pdf_bytes: bytes) -> list[Image.Image]:
244
- doc = fitz.open(stream=pdf_bytes, filetype="pdf")
245
- return [Image.frombytes("RGB", [p.get_pixmap(dpi=200).width, p.get_pixmap(dpi=200).height], p.get_pixmap(dpi=200).samples) for p in doc]
 
 
 
 
 
 
 
 
 
 
246
 
247
- def run_process_internal_base_v2(file_bytes, filename, mime, question, model_choice, temperature, top_p, batch_size=3):
248
- genai.configure(api_key=os.environ.get("GOOGLE_API_KEY", DEFAULT_API_KEY))
249
- model = genai.GenerativeModel(INTERNAL_MODEL_MAP.get(model_choice, "gemini-2.5-flash"),
250
- generation_config={"temperature": float(temperature), "top_p": float(top_p)})
251
- pages = pdf_to_images(file_bytes) if file_bytes[:4] == b"%PDF" else [Image.open(io.BytesIO(file_bytes))]
252
- all_text_results = []
253
  for i in range(0, len(pages), batch_size):
254
  batch = pages[i:i+batch_size]
255
- uploads = [genai.upload_file(path=tempfile.NamedTemporaryFile(delete=False, suffix=".png").name) for _ in batch]
256
- resp = model.generate_content([question or PROMPT_FREIGHT_JSON] + uploads)
257
- all_text_results.append(getattr(resp, "text", str(resp)))
258
- return "\n\n".join(all_text_results), None
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
 
260
  # ================== UI ==================
261
  def main():
262
- with gr.Blocks(title="📦 Freight JSON Extractor") as demo:
263
  file = gr.File(label="Upload PDF/Image")
264
- question = gr.Textbox(label="Prompt (optional)", lines=2)
265
  model_choice = gr.Dropdown(choices=[*INTERNAL_MODEL_MAP.keys(), EXTERNAL_MODEL_NAME],
266
  value="Gemini 2.5 Flash", label="Model")
267
  temperature = gr.Slider(0.0, 2.0, value=0.2, step=0.05)
268
  top_p = gr.Slider(0.0, 1.0, value=0.95, step=0.01)
269
- output = gr.Code(label="Gemini Output", language="json")
270
- btn = gr.Button("🚀 Run Extraction")
271
- btn.click(run_process, [file, question, model_choice, temperature, top_p, gr.State()], outputs=[output, gr.State()])
 
 
 
 
 
 
 
272
  return demo
273
 
 
274
  demo = main()
 
275
  if __name__ == "__main__":
276
  demo.launch()
 
1
  from __future__ import annotations
2
+ import os, io, re, json, time, mimetypes, tempfile
3
+ from typing import List, Union, Tuple, Any
4
  from PIL import Image
5
  import pandas as pd
6
  import gradio as gr
7
  import google.generativeai as genai
8
+ import requests
9
  import fitz # PyMuPDF
10
+ import camelot
11
  import pdfplumber
12
 
 
 
 
 
 
13
  # ================== CONFIG ==================
14
  DEFAULT_API_KEY = "AIzaSyBbK-1P3JD6HPyE3QLhkOps6_-Xo3wUFbs"
15
 
 
18
  "Gemini 2.5 Pro": "gemini-2.5-pro",
19
  }
20
  EXTERNAL_MODEL_NAME = "prithivMLmods/Camel-Doc-OCR-062825 (External)"
 
21
  PROMPT_FREIGHT_JSON = """
22
+ Please analyze the freight rate table in the file I provide and convert it into JSON in the following structure:
23
  {
24
  "shipping_line": "...",
25
  "shipping_line_code": "...",
26
+ "shipping_line_reason": "Why this carrier is chosen?",
27
  "fee_type": "Air Freight",
28
+ "valid_from": ...,
29
+ "valid_to": ...,
30
  "charges": [
31
  {
 
 
32
  "frequency": "...",
33
  "package_type": "...",
34
  "aircraft_type": "...",
35
+ "direction": "Export or Import or null",
36
+ "origin": "...",
37
+ "destination": "...",
38
  "charge_name": "...",
39
+ "charge_code": "charge_code": "GCR, DGR, PER, etc. (Use IATA Code DO NOT use flight number)",
40
+ "charge_code_reason": "...",
41
  "cargo_type": "...",
42
+ "currency": "...",
43
  "transit": "...",
44
  "transit_time": "...",
45
  "weight_breaks": {
 
50
  "+300kg": ...,
51
  "+500kg": ...,
52
  "+1000kg": ...,
53
+ "other": {
54
+ key: value
55
+ },
56
+ "weight_breaks_reason":"Why chosen weight_breaks?"
57
  },
58
+ "remark": "..."
59
+ }
60
+ ],
61
+ "local_charges": [
62
+ {
63
+ "charge_name": "...",
64
+ "charge_code": "...",
65
+ "unit": "...",
66
+ "amount": ...,
67
+ "remark": "..."
68
  }
69
  ]
70
  }
71
+ ### Date rules
72
+ - valid_from format:
73
+ - `DD/MM/YYYY` (if full date)
74
+ - `01/MM/YYYY` (if month+year only)
75
+ - `01/01/YYYY` (if year only)
76
+ - `UFN` if missing
77
+ - valid_to:
78
+ - exact `DD/MM/YYYY` if present
79
+ - else `UFN`
80
+ STRICT RULES:
81
+ - ONLY return a single JSON object as specified above.
82
+ - All rates must exactly match the corresponding weight break columns (M,N,45kg, 100kg, 300kg, 500kg, 1000kg, etc.). set null if N/A. No assumptions or interpolations.
83
+ - If the table shows "RQ" or similar, set value as "RQST".
84
  - Group same-price destinations into one record separated by "/".
85
+ - Always use IATA code for origin and destination.
86
+ - Flight number (e.g. ZH118) is not charge code.
87
+ - Frequency: D[1-7]; 'Daily' = D1234567. Join multiple (e.g. D3,D4→D34).
88
+ - If local charges exist, list them.
89
+ - If validity missing, set null.
90
+ - Direction: Export if origin is Vietnam (SGN, HAN, DAD...), else Import.
91
+ - Provide short plain English reasons for "shipping_line_reason" & "charge_code_reason".
92
  - Replace commas in remarks with semicolons.
93
+ - Only return JSON.
94
  """
95
 
96
+
97
  # ================== HELPERS ==================
98
  def _read_file_bytes(upload: Union[str, os.PathLike, dict, object] | None) -> bytes:
99
  if upload is None:
 
115
  mime = "application/pdf"
116
  return filename, mime or "application/octet-stream"
117
 
 
118
  def extract_pdf_tables(file_path: str) -> pd.DataFrame:
119
+ """
120
+ Extract bảng PDF bằng Camelot (từng trang):
121
+ - Thử lattice
122
+ - Nếu thất bại → fallback stream
123
+ - Gộp tất cả
124
+ """
125
+ import camelot
126
  all_dfs = []
127
+
128
+ # Đếm tổng số trang
129
+ import fitz
130
+ total_pages = len(fitz.open(file_path))
131
+ print(f"📄 Tổng số trang: {total_pages}")
132
+
133
+ for page_no in range(1, total_pages + 1):
134
+ print(f"🔍 Đang xử trang {page_no}...")
135
+ dfs_this_page = []
136
+
137
+ # --- Thử lattice ---
138
+ try:
139
+ tables = camelot.read_pdf(
140
+ file_path, flavor="lattice",
141
+ pages=str(page_no), strip_text="\n", line_scale=40
142
+ )
143
+ if tables and tables.n > 0:
144
+ for t in tables:
145
+ if t.shape[0] > 0:
146
+ dfs_this_page.append(t.df)
147
+ print(f"✅ Trang {page_no}: Lattice thành công ({tables.n} bảng).")
148
+ except Exception as e:
149
+ print(f"⚠️ Trang {page_no} lattice lỗi: {e}")
150
+
151
+ # --- Fallback stream ---
152
+ if not dfs_this_page:
153
  try:
154
+ tables = camelot.read_pdf(
155
+ file_path, flavor="stream",
156
+ pages=str(page_no), edge_tol=200, row_tol=10
157
+ )
158
  if tables and tables.n > 0:
159
  for t in tables:
160
  if t.shape[0] > 0:
161
  dfs_this_page.append(t.df)
162
+ print(f"✅ Trang {page_no}: Stream thành công ({tables.n} bảng).")
163
  except Exception as e:
164
+ print(f" Trang {page_no} stream lỗi: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
 
166
+ if dfs_this_page:
167
+ all_dfs.extend(dfs_this_page)
168
+ else:
169
+ print(f"🚫 Trang {page_no}: Không phát hiện bảng.")
 
 
 
 
 
170
 
171
  if not all_dfs:
172
+ print(" Không tìm thấy bảng trong toàn bộ PDF.")
173
  return pd.DataFrame()
174
 
175
  df_final = pd.concat(all_dfs, ignore_index=True)
 
180
  print(f"✅ Tổng hợp: {len(df_final)} dòng, {len(df_final.columns)} cột.")
181
  return df_final
182
 
 
183
  def extract_pdf_note(file_path: str) -> str:
184
+ """
185
+ Dùng pdfplumber để lấy phần text cuối tài liệu (note, remark...).
186
+ Chỉ lấy từ 10 dòng cuối của trang cuối.
187
+ """
188
  try:
189
  with pdfplumber.open(file_path) as pdf:
190
+ last_page = pdf.pages[-1]
191
+ text = (last_page.extract_text() or "").strip()
192
+ lines = text.splitlines()
193
+ note_text = "\n".join(lines[-12:]) # lấy ~12 dòng cuối
194
+ print(f"📝 Extracted note text thành công.{note_text}")
 
 
195
  return note_text
196
  except Exception as e:
197
  print(f"⚠️ extract_pdf_note lỗi: {e}")
198
  return ""
199
 
 
200
  def call_gemini_with_prompt(content_text: str, note_text: str, question: str, model_choice: str, temperature: float, top_p: float):
201
+ """Gửi bảng + note vào Gemini (ưu tiên prompt tùy chỉnh nếu có)"""
202
  api_key = os.environ.get("GOOGLE_API_KEY", DEFAULT_API_KEY)
203
  genai.configure(api_key=api_key)
204
+
205
  model = genai.GenerativeModel(
206
  model_name=INTERNAL_MODEL_MAP.get(model_choice, "gemini-2.5-flash"),
207
+ generation_config={
208
+ "temperature": float(temperature),
209
+ "top_p": float(top_p)
210
+ }
211
  )
212
+
213
+ # Nếu user không nhập câu hỏi riêng, dùng prompt chuẩn FREIGHT_JSON
214
  base_prompt = question.strip() if question and question.strip() else PROMPT_FREIGHT_JSON
215
+
216
  prompt = f"""
217
+ {base_prompt}
218
+
219
+ Below is the extracted CSV data:
220
+ {content_text}
221
+
222
+ Below are the notes extracted from the PDF (e.g. Valid From, Origin, Remark, Package Type rules):
223
+ {note_text}
224
+
225
+ Please analyze all data and generate the JSON output following the schema above.
226
+ """
227
 
228
+ print("🧠 Sending prompt to Gemini...")
229
+ response = model.generate_content(prompt)
230
+ result_text = getattr(response, "text", str(response))
231
 
232
+ return result_text
 
233
 
 
 
 
 
 
234
 
235
+ # ================== MAIN ROUTER ==================
236
  def run_process(file, question, model_choice, temperature, top_p, external_api_url):
237
  try:
238
  if file is None:
239
  return "❌ No file uploaded.", None
240
+
241
  file_bytes = _read_file_bytes(file)
242
  filename, mime = _guess_name_and_mime(file, file_bytes)
243
  print(f"[UPLOAD] {filename} ({mime})")
244
 
245
  if mime == "application/pdf":
246
+ # Lưu file tạm để camelot đọc
247
  with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as tmp:
248
  tmp.write(file_bytes)
249
  tmp_path = tmp.name
250
 
251
+ # 1️⃣ Extract bảng bằng Camelot
252
  df = extract_pdf_tables(tmp_path)
253
  note_text = extract_pdf_note(tmp_path)
254
 
255
  if not df.empty:
256
+ csv_text = df.to_csv(index=False)
257
+ print("✅ Gửi Gemini để sinh JSON...")
258
+ message = call_gemini_with_prompt(csv_text, note_text, question, model_choice, temperature, top_p)
259
+ return message, None
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  else:
261
  print("⚠️ Không có bảng hợp lệ, fallback OCR Gemini.")
262
  return run_process_internal_base_v2(file_bytes, filename, mime, question, model_choice, temperature, top_p)
263
 
264
+ # Các loại file khác → OCR trực tiếp
265
  return run_process_internal_base_v2(file_bytes, filename, mime, question, model_choice, temperature, top_p)
266
 
267
  except Exception as e:
268
  return f"ERROR: {type(e).__name__}: {e}", None
269
+ def run_process_internal_base_v2(file_bytes, filename, mime, question, model_choice, temperature, top_p, batch_size=3):
270
+ api_key = os.environ.get("GOOGLE_API_KEY", DEFAULT_API_KEY)
271
+ if not api_key:
272
+ return "ERROR: Missing GOOGLE_API_KEY.", None
273
+ genai.configure(api_key=api_key)
274
+ model_name = INTERNAL_MODEL_MAP.get(model_choice, "gemini-2.5-flash")
275
+ model = genai.GenerativeModel(model_name=model_name,
276
+ generation_config={"temperature": float(temperature), "top_p": float(top_p)})
277
 
278
+ if file_bytes[:4] == b"%PDF":
279
+ pages = pdf_to_images(file_bytes)
280
+ else:
281
+ pages = [Image.open(io.BytesIO(file_bytes))]
282
+
283
+ user_prompt = (question or "").strip() or PROMPT_FREIGHT_JSON
284
+ all_json_results, all_text_results = [], []
285
+ previous_header_json = None
286
+
287
+ def _safe_text(resp):
288
+ try:
289
+ return resp.text
290
+ except:
291
+ return ""
292
 
 
 
 
 
 
 
293
  for i in range(0, len(pages), batch_size):
294
  batch = pages[i:i+batch_size]
295
+ uploaded = []
296
+ for im in batch:
297
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as tmp:
298
+ im.save(tmp.name)
299
+ up = genai.upload_file(path=tmp.name, mime_type="image/png")
300
+ up = genai.get_file(up.name)
301
+ uploaded.append(up)
302
+
303
+ context_prompt = user_prompt
304
+ resp = model.generate_content([context_prompt] + uploaded)
305
+ text = _safe_text(resp)
306
+ all_text_results.append(text)
307
+ for up in uploaded:
308
+ try:
309
+ genai.delete_file(up.name)
310
+ except:
311
+ pass
312
 
313
+ return "\n\n".join(all_text_results), None
314
  # ================== UI ==================
315
  def main():
316
+ with gr.Blocks(title="OCR Multi-Agent System") as demo:
317
  file = gr.File(label="Upload PDF/Image")
318
+ question = gr.Textbox(label="Prompt", lines=2)
319
  model_choice = gr.Dropdown(choices=[*INTERNAL_MODEL_MAP.keys(), EXTERNAL_MODEL_NAME],
320
  value="Gemini 2.5 Flash", label="Model")
321
  temperature = gr.Slider(0.0, 2.0, value=0.2, step=0.05)
322
  top_p = gr.Slider(0.0, 1.0, value=0.95, step=0.01)
323
+ external_api_url = gr.Textbox(label="External API URL", visible=False)
324
+ output_text = gr.Code(label="Output", language="json")
325
+ run_btn = gr.Button("🚀 Process")
326
+
327
+ run_btn.click(
328
+ run_process,
329
+ inputs=[file, question, model_choice, temperature, top_p, external_api_url],
330
+ outputs=[output_text, gr.State()]
331
+ )
332
+
333
  return demo
334
 
335
+
336
  demo = main()
337
+
338
  if __name__ == "__main__":
339
  demo.launch()