Seth0330 commited on
Commit
c2e9904
·
verified ·
1 Parent(s): 7b929ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -24
app.py CHANGED
@@ -24,16 +24,21 @@ except ImportError:
24
  HF_CLIENT_AVAILABLE = False
25
 
26
  # ---------------------------
27
- # Page config
28
  # ---------------------------
29
  st.set_page_config(
30
  page_title="EZOFIS AI OCR",
31
  page_icon="🔍",
32
  layout="wide",
33
-
34
  initial_sidebar_state="expanded"
35
  )
36
- IMAGE_PREVIEW_WIDTH = 1250
 
 
 
 
 
 
37
  # ---------------------------
38
  # Secrets / Tokens
39
  # ---------------------------
@@ -148,8 +153,7 @@ def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
148
  question=prompt
149
  )
150
  except TypeError:
151
- # Fallback for older/newer client variants that don’t expose the helper
152
- # or expect a different signature. Try the generic .request() path.
153
  result = client.request(
154
  task="visual_question_answering",
155
  data={"inputs": {"question": prompt}},
@@ -157,20 +161,13 @@ def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
157
  )
158
 
159
  # Normalize result into a string
160
- # Possible shapes:
161
- # - str
162
- # - [{"answer": "..."}]
163
- # - {"answer": "..."}
164
- # - [{"generated_text": "..."}] (some backends)
165
  if isinstance(result, str):
166
  return result
167
-
168
  if isinstance(result, dict):
169
  if "answer" in result:
170
  return result["answer"]
171
  if "generated_text" in result:
172
  return result["generated_text"]
173
-
174
  if isinstance(result, list) and result:
175
  first = result[0]
176
  if isinstance(first, dict):
@@ -178,8 +175,6 @@ def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
178
  return first["answer"]
179
  if "generated_text" in first:
180
  return first["generated_text"]
181
-
182
- # Last resort
183
  return str(result)
184
 
185
  # ---------------------------
@@ -226,18 +221,22 @@ def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True
226
  pdf_document = fitz.open(stream=file_bytes, filetype="pdf")
227
  page_count = len(pdf_document)
228
 
 
 
 
 
 
 
229
  if process_pages_separately:
230
  for page_num in range(page_count):
231
  page = pdf_document[page_num]
232
- pix = page.get_pixmap(matrix=fitz.Matrix(1.5, 1.5))
233
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
234
  page_filename = f"{filename} (Page {page_num+1})"
235
  result, content, structured_data = process_image(img, page_filename, fields, model)
236
  yield page_num, page_count, img, page_filename, content, structured_data
237
  else:
238
  page = pdf_document[0]
239
- pix = page.get_pixmap(matrix=fitz.Matrix(1.5, 1.5))
240
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
241
  result, content, structured_data = process_image(img, filename, fields, model)
242
  yield 0, page_count, img, filename, content, structured_data
243
 
@@ -413,7 +412,7 @@ if uploaded_files and process_button:
413
  st.session_state.structured_results.append(structured_data)
414
 
415
  st.subheader(page_filename)
416
- c1, c2 = st.columns([1, 2])
417
  with c1:
418
  st.image(image, width=IMAGE_PREVIEW_WIDTH)
419
  if page_count > 1 and not process_separately:
@@ -443,7 +442,7 @@ if uploaded_files and process_button:
443
  st.session_state.structured_results.append(structured_data)
444
 
445
  st.subheader(f"Image: {f.name}")
446
- c1, c2 = st.columns([1, 2])
447
  with c1:
448
  st.image(image, width=IMAGE_PREVIEW_WIDTH)
449
  with c2:
@@ -473,9 +472,7 @@ if not uploaded_files:
473
  st.write("""
474
  How to use:
475
  1) Upload one or more images or PDFs
476
- 2) Choose a model:
477
- - OpenRouter: Gemma-3 4B IT, Gemma-3 12B IT, GPT-4.1, GPT-4.1-mini
478
- - HF API: LLaVA v1.6 Mistral-7B
479
  3) Pick description or custom field extraction
480
  4) For PDFs, choose page-by-page or first page
481
  5) Click Process Files
@@ -490,4 +487,4 @@ st.markdown(
490
  </div>
491
  """,
492
  unsafe_allow_html=True
493
- )
 
24
  HF_CLIENT_AVAILABLE = False
25
 
26
  # ---------------------------
27
+ # Page config (must be first Streamlit call)
28
  # ---------------------------
29
  st.set_page_config(
30
  page_title="EZOFIS AI OCR",
31
  page_icon="🔍",
32
  layout="wide",
 
33
  initial_sidebar_state="expanded"
34
  )
35
+
36
+ # ---------------------------
37
+ # Global UI / Render constants (NOT args to set_page_config)
38
+ # ---------------------------
39
+ IMAGE_PREVIEW_WIDTH = 1250 # 5x larger preview
40
+ PDF_RENDER_SCALE = 3.0 # higher-res PDF rasterization
41
+
42
  # ---------------------------
43
  # Secrets / Tokens
44
  # ---------------------------
 
153
  question=prompt
154
  )
155
  except TypeError:
156
+ # Fallback for client variants that don’t expose the helper
 
157
  result = client.request(
158
  task="visual_question_answering",
159
  data={"inputs": {"question": prompt}},
 
161
  )
162
 
163
  # Normalize result into a string
 
 
 
 
 
164
  if isinstance(result, str):
165
  return result
 
166
  if isinstance(result, dict):
167
  if "answer" in result:
168
  return result["answer"]
169
  if "generated_text" in result:
170
  return result["generated_text"]
 
171
  if isinstance(result, list) and result:
172
  first = result[0]
173
  if isinstance(first, dict):
 
175
  return first["answer"]
176
  if "generated_text" in first:
177
  return first["generated_text"]
 
 
178
  return str(result)
179
 
180
  # ---------------------------
 
221
  pdf_document = fitz.open(stream=file_bytes, filetype="pdf")
222
  page_count = len(pdf_document)
223
 
224
+ def _render_page(page):
225
+ # Higher-res, no alpha to keep RGB consistent
226
+ pix = page.get_pixmap(matrix=fitz.Matrix(PDF_RENDER_SCALE, PDF_RENDER_SCALE), alpha=False)
227
+ img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
228
+ return img
229
+
230
  if process_pages_separately:
231
  for page_num in range(page_count):
232
  page = pdf_document[page_num]
233
+ img = _render_page(page)
 
234
  page_filename = f"{filename} (Page {page_num+1})"
235
  result, content, structured_data = process_image(img, page_filename, fields, model)
236
  yield page_num, page_count, img, page_filename, content, structured_data
237
  else:
238
  page = pdf_document[0]
239
+ img = _render_page(page)
 
240
  result, content, structured_data = process_image(img, filename, fields, model)
241
  yield 0, page_count, img, filename, content, structured_data
242
 
 
412
  st.session_state.structured_results.append(structured_data)
413
 
414
  st.subheader(page_filename)
415
+ c1, c2 = st.columns([3, 2]) # give image more room
416
  with c1:
417
  st.image(image, width=IMAGE_PREVIEW_WIDTH)
418
  if page_count > 1 and not process_separately:
 
442
  st.session_state.structured_results.append(structured_data)
443
 
444
  st.subheader(f"Image: {f.name}")
445
+ c1, c2 = st.columns([3, 2])
446
  with c1:
447
  st.image(image, width=IMAGE_PREVIEW_WIDTH)
448
  with c2:
 
472
  st.write("""
473
  How to use:
474
  1) Upload one or more images or PDFs
475
+ 2) Choose a model
 
 
476
  3) Pick description or custom field extraction
477
  4) For PDFs, choose page-by-page or first page
478
  5) Click Process Files
 
487
  </div>
488
  """,
489
  unsafe_allow_html=True
490
+ )