Spaces:

Seth0330
/

OCR_VISION

Sleeping

App Files Files Community

Seth0330 commited on Aug 18, 2025

Commit

01d9f7e

verified ·

1 Parent(s): 8bc5bd6

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -21

app.py CHANGED Viewed

@@ -47,15 +47,12 @@ def image_to_base64(image):
     return base64.b64encode(buf.getvalue()).decode('utf-8')
 def extract_structured_data(content, fields):
-    """Try to pull a JSON object for the requested fields out of model text."""
     structured_data = {}
     try:
-        # Fenced JSON
         if "```json" in content and "```" in content.split("```json")[1]:
             json_str = content.split("```json")[1].split("```")[0].strip()
             structured_data.update(json.loads(json_str))
         else:
-            # As a fallback, attempt to parse whole content if it looks like JSON
             try:
                 maybe = json.loads(content)
                 if isinstance(maybe, dict):
@@ -78,7 +75,7 @@ def query_openrouter(prompt: str, image_base64: str, model_id: str) -> str:
     data_url = f"data:image/jpeg;base64,{image_base64}"
     payload = {
-        "model": model_id,  # e.g., "google/gemma-3-4b-it"
         "messages": [
             {
                 "role": "user",
@@ -94,7 +91,6 @@ def query_openrouter(prompt: str, image_base64: str, model_id: str) -> str:
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
         "Content-Type": "application/json",
-        # Optional but recommended for attribution
         "HTTP-Referer": st.secrets.get("SPACE_URL", "https://hf.space"),
         "X-Title": "EZOFIS AI OCR"
     }
@@ -133,7 +129,6 @@ def process_image(image, filename, fields=None, model=None):
         return {'filename': filename, 'extraction': content}, content, structured_data
 def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True, model=None):
-    """Rasterize PDF pages and run them through the same image path."""
     if not PDF_SUPPORT:
         yield None, None, None, filename, "PDF support requires PyMuPDF. Install pymupdf.", None
         return
@@ -162,14 +157,11 @@ def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True
 def create_download_buttons(results, structured_results, extraction_mode):
     st.header("Download Results")
-    # Simple CSV of descriptions or raw extraction
     base_csv = io.StringIO()
     base_writer = csv.writer(base_csv)
     base_writer.writerow(['Filename', 'Description/Extraction'])
     for r in results:
         base_writer.writerow([r['filename'], r.get('description', r.get('extraction', ''))])
     ts = datetime.now().strftime("%Y%m%d_%H%M%S")
     base_name = f"image_analysis_{ts}.csv"
@@ -182,7 +174,6 @@ def create_download_buttons(results, structured_results, extraction_mode):
         use_container_width=True
     )
-    # Structured CSV if available
     if extraction_mode == "Custom field extraction" and structured_results:
         all_fields = set(['filename'])
         for row in structured_results:
@@ -206,13 +197,11 @@ def create_download_buttons(results, structured_results, extraction_mode):
 # ---------------------------
 st.title("EZOFIS AI OCR")
-# Session state
 if 'results' not in st.session_state:
     st.session_state.results = []
 if 'structured_results' not in st.session_state:
     st.session_state.structured_results = []
-# Sidebar
 with st.sidebar:
     st.header("Upload Files")
     uploaded_files = st.file_uploader(
@@ -222,10 +211,13 @@ with st.sidebar:
     )
     st.header("Model Settings")
-    # OpenRouter model id for Gemma 3 4B Instruct (vision)
     selected_model = st.selectbox(
         "Choose vision model:",
-        ["google/gemma-3-4b-it"],
         help="OpenRouter model id"
     )
@@ -260,7 +252,7 @@ with st.sidebar:
         process_button = False
         st.info("Upload images or PDFs to begin.")
-# Main processing
 if uploaded_files and process_button:
     if not OPENROUTER_API_KEY:
         st.error("OPENROUTER_API_KEY is not set. Add it in your Space → Settings → Variables & secrets.")
@@ -272,7 +264,6 @@ if uploaded_files and process_button:
         st.session_state.results = []
         st.session_state.structured_results = []
-        # Count items to process
         total_items = 0
         for f in uploaded_files:
             file_bytes = f.read()
@@ -291,7 +282,6 @@ if uploaded_files and process_button:
         processed_count = 0
-        # Process files
         for f in uploaded_files:
             file_bytes = f.read()
             f.seek(0)
@@ -366,7 +356,6 @@ if uploaded_files and process_button:
                 progress_bar.progress(min(processed_count / max(total_items, 1), 1.0))
         status_text.text("Processing complete.")
         if st.session_state.results:
             create_download_buttons(
                 st.session_state.results,
@@ -374,13 +363,12 @@ if uploaded_files and process_button:
                 extraction_mode
             )
-# Empty state
 if not uploaded_files:
     st.info("Upload files using the sidebar to get started.")
     st.write("""
     How to use:
     1) Upload one or more images or PDFs
-    2) Choose the OpenRouter vision model (Gemma 3 4B IT)
     3) Pick description or custom field extraction
     4) For PDFs, choose page-by-page or first page
     5) Click Process Files
@@ -391,7 +379,7 @@ st.markdown("---")
 st.markdown(
     """
     <div style="text-align: center; margin-top: 12px; opacity: 0.7;">
-        EZOFIS AI OCR
     </div>
     """,
     unsafe_allow_html=True

     return base64.b64encode(buf.getvalue()).decode('utf-8')
 def extract_structured_data(content, fields):
     structured_data = {}
     try:
         if "```json" in content and "```" in content.split("```json")[1]:
             json_str = content.split("```json")[1].split("```")[0].strip()
             structured_data.update(json.loads(json_str))
         else:
             try:
                 maybe = json.loads(content)
                 if isinstance(maybe, dict):
     data_url = f"data:image/jpeg;base64,{image_base64}"
     payload = {
+        "model": model_id,
         "messages": [
             {
                 "role": "user",
     headers = {
         "Authorization": f"Bearer {OPENROUTER_API_KEY}",
         "Content-Type": "application/json",
         "HTTP-Referer": st.secrets.get("SPACE_URL", "https://hf.space"),
         "X-Title": "EZOFIS AI OCR"
     }
         return {'filename': filename, 'extraction': content}, content, structured_data
 def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True, model=None):
     if not PDF_SUPPORT:
         yield None, None, None, filename, "PDF support requires PyMuPDF. Install pymupdf.", None
         return
 def create_download_buttons(results, structured_results, extraction_mode):
     st.header("Download Results")
     base_csv = io.StringIO()
     base_writer = csv.writer(base_csv)
     base_writer.writerow(['Filename', 'Description/Extraction'])
     for r in results:
         base_writer.writerow([r['filename'], r.get('description', r.get('extraction', ''))])
     ts = datetime.now().strftime("%Y%m%d_%H%M%S")
     base_name = f"image_analysis_{ts}.csv"
         use_container_width=True
     )
     if extraction_mode == "Custom field extraction" and structured_results:
         all_fields = set(['filename'])
         for row in structured_results:
 # ---------------------------
 st.title("EZOFIS AI OCR")
 if 'results' not in st.session_state:
     st.session_state.results = []
 if 'structured_results' not in st.session_state:
     st.session_state.structured_results = []
 with st.sidebar:
     st.header("Upload Files")
     uploaded_files = st.file_uploader(
     )
     st.header("Model Settings")
     selected_model = st.selectbox(
         "Choose vision model:",
+        [
+            "google/gemma-3-4b-it",
+            "openai/gpt-4.1",
+            "openai/gpt-4.1-mini"
+        ],
         help="OpenRouter model id"
     )
         process_button = False
         st.info("Upload images or PDFs to begin.")
+# Processing loop
 if uploaded_files and process_button:
     if not OPENROUTER_API_KEY:
         st.error("OPENROUTER_API_KEY is not set. Add it in your Space → Settings → Variables & secrets.")
         st.session_state.results = []
         st.session_state.structured_results = []
         total_items = 0
         for f in uploaded_files:
             file_bytes = f.read()
         processed_count = 0
         for f in uploaded_files:
             file_bytes = f.read()
             f.seek(0)
                 progress_bar.progress(min(processed_count / max(total_items, 1), 1.0))
         status_text.text("Processing complete.")
         if st.session_state.results:
             create_download_buttons(
                 st.session_state.results,
                 extraction_mode
             )
 if not uploaded_files:
     st.info("Upload files using the sidebar to get started.")
     st.write("""
     How to use:
     1) Upload one or more images or PDFs
+    2) Choose a model (Gemma-3, GPT-4.1, GPT-4.1-mini)
     3) Pick description or custom field extraction
     4) For PDFs, choose page-by-page or first page
     5) Click Process Files
 st.markdown(
     """
     <div style="text-align: center; margin-top: 12px; opacity: 0.7;">
+        Built for Hugging Face Spaces + OpenRouter (EZOFIS AI OCR)
     </div>
     """,
     unsafe_allow_html=True