fruk19 commited on
Commit
1dc843e
Β·
verified Β·
1 Parent(s): af64003

update resize image

Browse files
Files changed (1) hide show
  1. app.py +48 -68
app.py CHANGED
@@ -14,28 +14,27 @@ def resolve_file(file):
14
  """
15
  Normalize Gradio file object into a real filesystem file path.
16
  Handles:
17
- - dict {name, data} (HF Spaces)
18
  - NamedString
19
  - tempfile object
20
  """
21
-
22
- # Case 1: HF dict format
23
  if isinstance(file, dict) and "data" in file:
24
  raw = file["data"]
25
- fname = file.get("name", f"file_{uuid.uuid4().hex}")
26
- tmp_path = f"/tmp/{uuid.uuid4().hex}_{os.path.basename(fname)}"
27
- with open(tmp_path, "wb") as f:
28
  f.write(raw if isinstance(raw, bytes) else raw.read())
29
- return tmp_path
30
 
31
- # Case 2: Gradio NamedString
32
  if hasattr(file, "name") and not hasattr(file, "path"):
33
  tmp_path = f"/tmp/{uuid.uuid4().hex}_{os.path.basename(file.name)}"
34
  with open(tmp_path, "wb") as f:
35
  f.write(open(file.name, "rb").read())
36
  return tmp_path
37
 
38
- # Case 3: local tempfile
39
  if hasattr(file, "name"):
40
  return file.name
41
 
@@ -43,16 +42,37 @@ def resolve_file(file):
43
 
44
 
45
  # ================================================================
46
- # Helper: Resize (OCR version + Preview version)
47
  # ================================================================
48
- def resize_if_needed(img, max_size=1024):
 
 
 
 
49
  w, h = img.size
50
- if max(w, h) <= max_size:
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  return img
52
- scale = max_size / max(w, h)
53
- return img.resize((int(w * scale), int(h * scale)), Image.Resampling.LANCZOS)
 
54
 
55
 
 
 
 
56
  def resize_preview(img, max_size=400):
57
  w, h = img.size
58
  if max(w, h) <= max_size:
@@ -62,7 +82,7 @@ def resize_preview(img, max_size=400):
62
 
63
 
64
  # ================================================================
65
- # Typhoon OCR call
66
  # ================================================================
67
  def run_typhoon_ocr(img_bytes, api_key, model, task_type,
68
  max_tokens, temperature, top_p, repetition_penalty):
@@ -123,6 +143,7 @@ def pdf_to_images_pymupdf(pdf_path, dpi=220):
123
  # ================================================================
124
  def preview_files(files):
125
  previews = []
 
126
  for file in files:
127
  real_path = resolve_file(file)
128
  fp = real_path.lower()
@@ -130,18 +151,20 @@ def preview_files(files):
130
  if fp.endswith(".pdf"):
131
  pdf_imgs = pdf_to_images_pymupdf(real_path, dpi=120)
132
  for img in pdf_imgs:
 
133
  previews.append(resize_preview(img))
134
  else:
135
  img = Image.open(real_path)
136
  if img.mode == "RGBA":
137
  img = img.convert("RGB")
 
138
  previews.append(resize_preview(img))
139
 
140
  return previews
141
 
142
 
143
  # ================================================================
144
- # OCR 1 page (parallel)
145
  # ================================================================
146
  def ocr_single_page(page_img, label,
147
  api_key, model, task_type, max_tokens,
@@ -152,8 +175,7 @@ def ocr_single_page(page_img, label,
152
  buf.seek(0)
153
 
154
  txt = run_typhoon_ocr(
155
- buf.getvalue(),
156
- api_key, model, task_type,
157
  max_tokens, temperature, top_p, repetition_penalty
158
  )
159
  return label, txt
@@ -173,9 +195,7 @@ def extract_text(files,
173
  images_to_ocr = []
174
  labels = []
175
 
176
- # ---------------------------
177
  # LOAD FILES
178
- # ---------------------------
179
  for file in files:
180
  real_path = resolve_file(file)
181
  fp = real_path.lower()
@@ -183,23 +203,23 @@ def extract_text(files,
183
  if fp.endswith(".pdf"):
184
  pdf_imgs = pdf_to_images_pymupdf(real_path, dpi=220)
185
  for idx, img in enumerate(pdf_imgs, start=1):
186
- images_to_ocr.append(resize_if_needed(img))
 
187
  labels.append(f"{os.path.basename(real_path)} - Page {idx}")
188
  else:
189
  img = Image.open(real_path)
190
  if img.mode == "RGBA":
191
  img = img.convert("RGB")
192
- images_to_ocr.append(resize_if_needed(img))
 
193
  labels.append(os.path.basename(real_path))
194
 
195
  total = len(images_to_ocr)
196
  progress(0.03, desc=f"Loaded {total} pages/images")
197
 
198
- # ---------------------------
199
  # PARALLEL OCR
200
- # ---------------------------
201
- start = time.time()
202
  results = {}
 
203
 
204
  with ThreadPoolExecutor(max_workers=4) as ex:
205
  futures = []
@@ -221,13 +241,12 @@ def extract_text(files,
221
  elapsed = time.time() - start
222
  eta = (total - done) * (elapsed / max(done, 1))
223
 
224
- progress(done / total, desc=f"OCR {done}/{total} | ETA {eta:.1f}s")
 
225
 
226
  progress(1, desc="OCR Completed βœ”")
227
 
228
- # ---------------------------
229
  # MERGE RESULT
230
- # ---------------------------
231
  merged = ""
232
  for lbl in sorted(results.keys()):
233
  merged += f"## {lbl}\n{results[lbl]}\n\n"
@@ -242,20 +261,6 @@ def extract_text(files,
242
  # ================================================================
243
  # UI
244
  # ================================================================
245
- # with gr.Blocks() as demo:
246
-
247
- # gr.Markdown("""
248
- # # πŸ” Typhoon OCR v1.5
249
- # ### Multi-file OCR β€’ Parallel Processing β€’ ETA β€’ PDF/Image Support
250
-
251
- # ⚑ **High-speed OCR powered by Typhoon**
252
- # πŸ“„ Upload **multiple images or PDFs**
253
- # πŸš€ Parallel OCR with ETA per page
254
- # πŸ” Auto preview grid for all pages
255
-
256
- # πŸ”‘ **Get your API Key:**
257
- # πŸ‘‰ https://playground.opentyphoon.ai/settings/api-key
258
- # """)
259
  with gr.Blocks() as demo:
260
 
261
  gr.Markdown("""
@@ -264,7 +269,7 @@ with gr.Blocks() as demo:
264
 
265
  ⚑ **High-speed OCR powered by Typhoon**
266
  πŸ“„ Upload **multiple images or PDFs**
267
- πŸš€ Parallel OCR with ETA per page
268
  πŸ” Auto preview grid for all pages
269
 
270
  ---
@@ -278,29 +283,6 @@ Click it to generate or copy your key.
278
 
279
  gr.Markdown("### πŸ“˜ How to get API Key (step-by-step)")
280
 
281
- # gr.HTML("""
282
- # <div style='display:flex; gap:24px; margin-top:10px;'>
283
-
284
- # <div style='text-align:center;'>
285
- # <img src='https://huggingface.co/spaces/fruk19/TYPHOON_OCR_DEMO/resolve/main/ocr_login.png'
286
- # style='width:260px; border-radius:8px; border:1px solid #ccc;'>
287
- # <p><b>1) Login</b></p>
288
- # </div>
289
-
290
- # <div style='text-align:center;'>
291
- # <img src='https://huggingface.co/spaces/fruk19/TYPHOON_OCR_DEMO/resolve/main/ocr_first.png'
292
- # style='width:260px; border-radius:8px; border:1px solid #ccc;'>
293
- # <p><b>2) Find API Key Menu</b></p>
294
- # </div>
295
-
296
- # <div style='text-align:center;'>
297
- # <img src='https://huggingface.co/spaces/fruk19/TYPHOON_OCR_DEMO/resolve/main/ocr_getkey.png'
298
- # style='width:260px; border-radius:8px; border:1px solid #ccc;'>
299
- # <p><b>3) Copy Your Key</b></p>
300
- # </div>
301
-
302
- # </div>
303
- # """)
304
  with gr.Row():
305
  gr.Gallery(
306
  [
@@ -308,12 +290,10 @@ Click it to generate or copy your key.
308
  ("ocr_first.png", "2) Find API Key Menu"),
309
  ("ocr_getkey.png", "3) Copy Your Key"),
310
  ],
311
- label="How to Get Your API Key (click to zoom)",
312
  columns=3,
313
  height=250,
314
  show_label=False,
315
  )
316
-
317
 
318
  file_input = gr.Files(label="Upload images or PDFs", file_count="multiple")
319
 
 
14
  """
15
  Normalize Gradio file object into a real filesystem file path.
16
  Handles:
17
+ - dict {name, data} (HF Spaces)
18
  - NamedString
19
  - tempfile object
20
  """
21
+ # Case 1: HF dict
 
22
  if isinstance(file, dict) and "data" in file:
23
  raw = file["data"]
24
+ fname = file.get("name", f"{uuid.uuid4().hex}.bin")
25
+ path = f"/tmp/{uuid.uuid4().hex}_{os.path.basename(fname)}"
26
+ with open(path, "wb") as f:
27
  f.write(raw if isinstance(raw, bytes) else raw.read())
28
+ return path
29
 
30
+ # Case 2: NamedString (file.name only)
31
  if hasattr(file, "name") and not hasattr(file, "path"):
32
  tmp_path = f"/tmp/{uuid.uuid4().hex}_{os.path.basename(file.name)}"
33
  with open(tmp_path, "wb") as f:
34
  f.write(open(file.name, "rb").read())
35
  return tmp_path
36
 
37
+ # Case 3: normal tempfile with path
38
  if hasattr(file, "name"):
39
  return file.name
40
 
 
42
 
43
 
44
  # ================================================================
45
+ # UNIVERSAL RESIZE: max bounds 800Γ—1800, 1800Γ—800, 1200Γ—1200
46
  # ================================================================
47
+ def resize_to_max_bounds(img,
48
+ max_w1=800, max_h1=1800,
49
+ max_w2=1800, max_h2=800,
50
+ max_ws=1200, max_hs=1200):
51
+ """Resize image so it stays under max bounds while preserving aspect ratio."""
52
  w, h = img.size
53
+
54
+ bounds = [
55
+ (max_w1, max_h1),
56
+ (max_w2, max_h2),
57
+ (max_ws, max_hs),
58
+ ]
59
+
60
+ scale = 1.0
61
+ for max_w, max_h in bounds:
62
+ scale_w = max_w / w
63
+ scale_h = max_h / h
64
+ scale = min(scale, min(scale_w, scale_h))
65
+
66
+ if scale >= 1.0:
67
  return img
68
+
69
+ new_size = (int(w * scale), int(h * scale))
70
+ return img.resize(new_size, Image.Resampling.LANCZOS)
71
 
72
 
73
+ # ================================================================
74
+ # Preview resize
75
+ # ================================================================
76
  def resize_preview(img, max_size=400):
77
  w, h = img.size
78
  if max(w, h) <= max_size:
 
82
 
83
 
84
  # ================================================================
85
+ # Typhoon OCR API call
86
  # ================================================================
87
  def run_typhoon_ocr(img_bytes, api_key, model, task_type,
88
  max_tokens, temperature, top_p, repetition_penalty):
 
143
  # ================================================================
144
  def preview_files(files):
145
  previews = []
146
+
147
  for file in files:
148
  real_path = resolve_file(file)
149
  fp = real_path.lower()
 
151
  if fp.endswith(".pdf"):
152
  pdf_imgs = pdf_to_images_pymupdf(real_path, dpi=120)
153
  for img in pdf_imgs:
154
+ img = resize_to_max_bounds(img)
155
  previews.append(resize_preview(img))
156
  else:
157
  img = Image.open(real_path)
158
  if img.mode == "RGBA":
159
  img = img.convert("RGB")
160
+ img = resize_to_max_bounds(img)
161
  previews.append(resize_preview(img))
162
 
163
  return previews
164
 
165
 
166
  # ================================================================
167
+ # OCR 1 PAGE (PARALLEL)
168
  # ================================================================
169
  def ocr_single_page(page_img, label,
170
  api_key, model, task_type, max_tokens,
 
175
  buf.seek(0)
176
 
177
  txt = run_typhoon_ocr(
178
+ buf.getvalue(), api_key, model, task_type,
 
179
  max_tokens, temperature, top_p, repetition_penalty
180
  )
181
  return label, txt
 
195
  images_to_ocr = []
196
  labels = []
197
 
 
198
  # LOAD FILES
 
199
  for file in files:
200
  real_path = resolve_file(file)
201
  fp = real_path.lower()
 
203
  if fp.endswith(".pdf"):
204
  pdf_imgs = pdf_to_images_pymupdf(real_path, dpi=220)
205
  for idx, img in enumerate(pdf_imgs, start=1):
206
+ img = resize_to_max_bounds(img)
207
+ images_to_ocr.append(img)
208
  labels.append(f"{os.path.basename(real_path)} - Page {idx}")
209
  else:
210
  img = Image.open(real_path)
211
  if img.mode == "RGBA":
212
  img = img.convert("RGB")
213
+ img = resize_to_max_bounds(img)
214
+ images_to_ocr.append(img)
215
  labels.append(os.path.basename(real_path))
216
 
217
  total = len(images_to_ocr)
218
  progress(0.03, desc=f"Loaded {total} pages/images")
219
 
 
220
  # PARALLEL OCR
 
 
221
  results = {}
222
+ start = time.time()
223
 
224
  with ThreadPoolExecutor(max_workers=4) as ex:
225
  futures = []
 
241
  elapsed = time.time() - start
242
  eta = (total - done) * (elapsed / max(done, 1))
243
 
244
+ progress(done / total,
245
+ desc=f"OCR {done}/{total} | ETA {eta:.1f}s")
246
 
247
  progress(1, desc="OCR Completed βœ”")
248
 
 
249
  # MERGE RESULT
 
250
  merged = ""
251
  for lbl in sorted(results.keys()):
252
  merged += f"## {lbl}\n{results[lbl]}\n\n"
 
261
  # ================================================================
262
  # UI
263
  # ================================================================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  with gr.Blocks() as demo:
265
 
266
  gr.Markdown("""
 
269
 
270
  ⚑ **High-speed OCR powered by Typhoon**
271
  πŸ“„ Upload **multiple images or PDFs**
272
+ πŸš€ Parallel OCR with ETA
273
  πŸ” Auto preview grid for all pages
274
 
275
  ---
 
283
 
284
  gr.Markdown("### πŸ“˜ How to get API Key (step-by-step)")
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  with gr.Row():
287
  gr.Gallery(
288
  [
 
290
  ("ocr_first.png", "2) Find API Key Menu"),
291
  ("ocr_getkey.png", "3) Copy Your Key"),
292
  ],
 
293
  columns=3,
294
  height=250,
295
  show_label=False,
296
  )
 
297
 
298
  file_input = gr.Files(label="Upload images or PDFs", file_count="multiple")
299