Spaces:

Seth0330
/

OCR_VISION

Running

App Files Files Community

Seth0330 commited on Aug 18, 2025

Commit

c2e9904

verified ·

1 Parent(s): 7b929ef

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -24

app.py CHANGED Viewed

@@ -24,16 +24,21 @@ except ImportError:
     HF_CLIENT_AVAILABLE = False
 # ---------------------------
-# Page config
 # ---------------------------
 st.set_page_config(
     page_title="EZOFIS AI OCR",
     page_icon="🔍",
     layout="wide",
     initial_sidebar_state="expanded"
 )
-    IMAGE_PREVIEW_WIDTH = 1250
 # ---------------------------
 # Secrets / Tokens
 # ---------------------------
@@ -148,8 +153,7 @@ def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
             question=prompt
         )
     except TypeError:
-        # Fallback for older/newer client variants that don’t expose the helper
-        # or expect a different signature. Try the generic .request() path.
         result = client.request(
             task="visual_question_answering",
             data={"inputs": {"question": prompt}},
@@ -157,20 +161,13 @@ def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
         )
     # Normalize result into a string
-    # Possible shapes:
-    #  - str
-    #  - [{"answer": "..."}]
-    #  - {"answer": "..."}
-    #  - [{"generated_text": "..."}] (some backends)
     if isinstance(result, str):
         return result
     if isinstance(result, dict):
         if "answer" in result:
             return result["answer"]
         if "generated_text" in result:
             return result["generated_text"]
     if isinstance(result, list) and result:
         first = result[0]
         if isinstance(first, dict):
@@ -178,8 +175,6 @@ def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
                 return first["answer"]
             if "generated_text" in first:
                 return first["generated_text"]
-    # Last resort
     return str(result)
 # ---------------------------
@@ -226,18 +221,22 @@ def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True
         pdf_document = fitz.open(stream=file_bytes, filetype="pdf")
         page_count = len(pdf_document)
         if process_pages_separately:
             for page_num in range(page_count):
                 page = pdf_document[page_num]
-                pix = page.get_pixmap(matrix=fitz.Matrix(1.5, 1.5))
-                img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                 page_filename = f"{filename} (Page {page_num+1})"
                 result, content, structured_data = process_image(img, page_filename, fields, model)
                 yield page_num, page_count, img, page_filename, content, structured_data
         else:
             page = pdf_document[0]
-            pix = page.get_pixmap(matrix=fitz.Matrix(1.5, 1.5))
-            img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
             result, content, structured_data = process_image(img, filename, fields, model)
             yield 0, page_count, img, filename, content, structured_data
@@ -413,7 +412,7 @@ if uploaded_files and process_button:
                             st.session_state.structured_results.append(structured_data)
                         st.subheader(page_filename)
-                        c1, c2 = st.columns([1, 2])
                         with c1:
                             st.image(image, width=IMAGE_PREVIEW_WIDTH)
                             if page_count > 1 and not process_separately:
@@ -443,7 +442,7 @@ if uploaded_files and process_button:
                         st.session_state.structured_results.append(structured_data)
                     st.subheader(f"Image: {f.name}")
-                    c1, c2 = st.columns([1, 2])
                     with c1:
                         st.image(image, width=IMAGE_PREVIEW_WIDTH)
                     with c2:
@@ -473,9 +472,7 @@ if not uploaded_files:
     st.write("""
     How to use:
     1) Upload one or more images or PDFs
-    2) Choose a model:
-       - OpenRouter: Gemma-3 4B IT, Gemma-3 12B IT, GPT-4.1, GPT-4.1-mini
-       - HF API: LLaVA v1.6 Mistral-7B
     3) Pick description or custom field extraction
     4) For PDFs, choose page-by-page or first page
     5) Click Process Files
@@ -490,4 +487,4 @@ st.markdown(
     </div>
     """,
     unsafe_allow_html=True
-)

     HF_CLIENT_AVAILABLE = False
 # ---------------------------
+# Page config (must be first Streamlit call)
 # ---------------------------
 st.set_page_config(
     page_title="EZOFIS AI OCR",
     page_icon="🔍",
     layout="wide",
     initial_sidebar_state="expanded"
 )
+# ---------------------------
+# Global UI / Render constants (NOT args to set_page_config)
+# ---------------------------
+IMAGE_PREVIEW_WIDTH = 1250         # 5x larger preview
+PDF_RENDER_SCALE = 3.0             # higher-res PDF rasterization
 # ---------------------------
 # Secrets / Tokens
 # ---------------------------
             question=prompt
         )
     except TypeError:
+        # Fallback for client variants that don’t expose the helper
         result = client.request(
             task="visual_question_answering",
             data={"inputs": {"question": prompt}},
         )
     # Normalize result into a string
     if isinstance(result, str):
         return result
     if isinstance(result, dict):
         if "answer" in result:
             return result["answer"]
         if "generated_text" in result:
             return result["generated_text"]
     if isinstance(result, list) and result:
         first = result[0]
         if isinstance(first, dict):
                 return first["answer"]
             if "generated_text" in first:
                 return first["generated_text"]
     return str(result)
 # ---------------------------
         pdf_document = fitz.open(stream=file_bytes, filetype="pdf")
         page_count = len(pdf_document)
+        def _render_page(page):
+            # Higher-res, no alpha to keep RGB consistent
+            pix = page.get_pixmap(matrix=fitz.Matrix(PDF_RENDER_SCALE, PDF_RENDER_SCALE), alpha=False)
+            img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
+            return img
         if process_pages_separately:
             for page_num in range(page_count):
                 page = pdf_document[page_num]
+                img = _render_page(page)
                 page_filename = f"{filename} (Page {page_num+1})"
                 result, content, structured_data = process_image(img, page_filename, fields, model)
                 yield page_num, page_count, img, page_filename, content, structured_data
         else:
             page = pdf_document[0]
+            img = _render_page(page)
             result, content, structured_data = process_image(img, filename, fields, model)
             yield 0, page_count, img, filename, content, structured_data
                             st.session_state.structured_results.append(structured_data)
                         st.subheader(page_filename)
+                        c1, c2 = st.columns([3, 2])  # give image more room
                         with c1:
                             st.image(image, width=IMAGE_PREVIEW_WIDTH)
                             if page_count > 1 and not process_separately:
                         st.session_state.structured_results.append(structured_data)
                     st.subheader(f"Image: {f.name}")
+                    c1, c2 = st.columns([3, 2])
                     with c1:
                         st.image(image, width=IMAGE_PREVIEW_WIDTH)
                     with c2:
     st.write("""
     How to use:
     1) Upload one or more images or PDFs
+    2) Choose a model
     3) Pick description or custom field extraction
     4) For PDFs, choose page-by-page or first page
     5) Click Process Files
     </div>
     """,
     unsafe_allow_html=True
+)