Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,16 +24,21 @@ except ImportError:
|
|
| 24 |
HF_CLIENT_AVAILABLE = False
|
| 25 |
|
| 26 |
# ---------------------------
|
| 27 |
-
# Page config
|
| 28 |
# ---------------------------
|
| 29 |
st.set_page_config(
|
| 30 |
page_title="EZOFIS AI OCR",
|
| 31 |
page_icon="🔍",
|
| 32 |
layout="wide",
|
| 33 |
-
|
| 34 |
initial_sidebar_state="expanded"
|
| 35 |
)
|
| 36 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
# ---------------------------
|
| 38 |
# Secrets / Tokens
|
| 39 |
# ---------------------------
|
|
@@ -148,8 +153,7 @@ def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
|
|
| 148 |
question=prompt
|
| 149 |
)
|
| 150 |
except TypeError:
|
| 151 |
-
# Fallback for
|
| 152 |
-
# or expect a different signature. Try the generic .request() path.
|
| 153 |
result = client.request(
|
| 154 |
task="visual_question_answering",
|
| 155 |
data={"inputs": {"question": prompt}},
|
|
@@ -157,20 +161,13 @@ def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
|
|
| 157 |
)
|
| 158 |
|
| 159 |
# Normalize result into a string
|
| 160 |
-
# Possible shapes:
|
| 161 |
-
# - str
|
| 162 |
-
# - [{"answer": "..."}]
|
| 163 |
-
# - {"answer": "..."}
|
| 164 |
-
# - [{"generated_text": "..."}] (some backends)
|
| 165 |
if isinstance(result, str):
|
| 166 |
return result
|
| 167 |
-
|
| 168 |
if isinstance(result, dict):
|
| 169 |
if "answer" in result:
|
| 170 |
return result["answer"]
|
| 171 |
if "generated_text" in result:
|
| 172 |
return result["generated_text"]
|
| 173 |
-
|
| 174 |
if isinstance(result, list) and result:
|
| 175 |
first = result[0]
|
| 176 |
if isinstance(first, dict):
|
|
@@ -178,8 +175,6 @@ def query_hf_llava_vqa(prompt: str, image_base64: str, model_id: str) -> str:
|
|
| 178 |
return first["answer"]
|
| 179 |
if "generated_text" in first:
|
| 180 |
return first["generated_text"]
|
| 181 |
-
|
| 182 |
-
# Last resort
|
| 183 |
return str(result)
|
| 184 |
|
| 185 |
# ---------------------------
|
|
@@ -226,18 +221,22 @@ def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True
|
|
| 226 |
pdf_document = fitz.open(stream=file_bytes, filetype="pdf")
|
| 227 |
page_count = len(pdf_document)
|
| 228 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
if process_pages_separately:
|
| 230 |
for page_num in range(page_count):
|
| 231 |
page = pdf_document[page_num]
|
| 232 |
-
|
| 233 |
-
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 234 |
page_filename = f"{filename} (Page {page_num+1})"
|
| 235 |
result, content, structured_data = process_image(img, page_filename, fields, model)
|
| 236 |
yield page_num, page_count, img, page_filename, content, structured_data
|
| 237 |
else:
|
| 238 |
page = pdf_document[0]
|
| 239 |
-
|
| 240 |
-
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 241 |
result, content, structured_data = process_image(img, filename, fields, model)
|
| 242 |
yield 0, page_count, img, filename, content, structured_data
|
| 243 |
|
|
@@ -413,7 +412,7 @@ if uploaded_files and process_button:
|
|
| 413 |
st.session_state.structured_results.append(structured_data)
|
| 414 |
|
| 415 |
st.subheader(page_filename)
|
| 416 |
-
c1, c2 = st.columns([
|
| 417 |
with c1:
|
| 418 |
st.image(image, width=IMAGE_PREVIEW_WIDTH)
|
| 419 |
if page_count > 1 and not process_separately:
|
|
@@ -443,7 +442,7 @@ if uploaded_files and process_button:
|
|
| 443 |
st.session_state.structured_results.append(structured_data)
|
| 444 |
|
| 445 |
st.subheader(f"Image: {f.name}")
|
| 446 |
-
c1, c2 = st.columns([
|
| 447 |
with c1:
|
| 448 |
st.image(image, width=IMAGE_PREVIEW_WIDTH)
|
| 449 |
with c2:
|
|
@@ -473,9 +472,7 @@ if not uploaded_files:
|
|
| 473 |
st.write("""
|
| 474 |
How to use:
|
| 475 |
1) Upload one or more images or PDFs
|
| 476 |
-
2) Choose a model
|
| 477 |
-
- OpenRouter: Gemma-3 4B IT, Gemma-3 12B IT, GPT-4.1, GPT-4.1-mini
|
| 478 |
-
- HF API: LLaVA v1.6 Mistral-7B
|
| 479 |
3) Pick description or custom field extraction
|
| 480 |
4) For PDFs, choose page-by-page or first page
|
| 481 |
5) Click Process Files
|
|
@@ -490,4 +487,4 @@ st.markdown(
|
|
| 490 |
</div>
|
| 491 |
""",
|
| 492 |
unsafe_allow_html=True
|
| 493 |
-
)
|
|
|
|
| 24 |
HF_CLIENT_AVAILABLE = False
|
| 25 |
|
| 26 |
# ---------------------------
|
| 27 |
+
# Page config (must be first Streamlit call)
|
| 28 |
# ---------------------------
|
| 29 |
st.set_page_config(
|
| 30 |
page_title="EZOFIS AI OCR",
|
| 31 |
page_icon="🔍",
|
| 32 |
layout="wide",
|
|
|
|
| 33 |
initial_sidebar_state="expanded"
|
| 34 |
)
|
| 35 |
+
|
| 36 |
+
# ---------------------------
|
| 37 |
+
# Global UI / Render constants (NOT args to set_page_config)
|
| 38 |
+
# ---------------------------
|
| 39 |
+
IMAGE_PREVIEW_WIDTH = 1250 # 5x larger preview
|
| 40 |
+
PDF_RENDER_SCALE = 3.0 # higher-res PDF rasterization
|
| 41 |
+
|
| 42 |
# ---------------------------
|
| 43 |
# Secrets / Tokens
|
| 44 |
# ---------------------------
|
|
|
|
| 153 |
question=prompt
|
| 154 |
)
|
| 155 |
except TypeError:
|
| 156 |
+
# Fallback for client variants that don’t expose the helper
|
|
|
|
| 157 |
result = client.request(
|
| 158 |
task="visual_question_answering",
|
| 159 |
data={"inputs": {"question": prompt}},
|
|
|
|
| 161 |
)
|
| 162 |
|
| 163 |
# Normalize result into a string
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
if isinstance(result, str):
|
| 165 |
return result
|
|
|
|
| 166 |
if isinstance(result, dict):
|
| 167 |
if "answer" in result:
|
| 168 |
return result["answer"]
|
| 169 |
if "generated_text" in result:
|
| 170 |
return result["generated_text"]
|
|
|
|
| 171 |
if isinstance(result, list) and result:
|
| 172 |
first = result[0]
|
| 173 |
if isinstance(first, dict):
|
|
|
|
| 175 |
return first["answer"]
|
| 176 |
if "generated_text" in first:
|
| 177 |
return first["generated_text"]
|
|
|
|
|
|
|
| 178 |
return str(result)
|
| 179 |
|
| 180 |
# ---------------------------
|
|
|
|
| 221 |
pdf_document = fitz.open(stream=file_bytes, filetype="pdf")
|
| 222 |
page_count = len(pdf_document)
|
| 223 |
|
| 224 |
+
def _render_page(page):
|
| 225 |
+
# Higher-res, no alpha to keep RGB consistent
|
| 226 |
+
pix = page.get_pixmap(matrix=fitz.Matrix(PDF_RENDER_SCALE, PDF_RENDER_SCALE), alpha=False)
|
| 227 |
+
img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
|
| 228 |
+
return img
|
| 229 |
+
|
| 230 |
if process_pages_separately:
|
| 231 |
for page_num in range(page_count):
|
| 232 |
page = pdf_document[page_num]
|
| 233 |
+
img = _render_page(page)
|
|
|
|
| 234 |
page_filename = f"{filename} (Page {page_num+1})"
|
| 235 |
result, content, structured_data = process_image(img, page_filename, fields, model)
|
| 236 |
yield page_num, page_count, img, page_filename, content, structured_data
|
| 237 |
else:
|
| 238 |
page = pdf_document[0]
|
| 239 |
+
img = _render_page(page)
|
|
|
|
| 240 |
result, content, structured_data = process_image(img, filename, fields, model)
|
| 241 |
yield 0, page_count, img, filename, content, structured_data
|
| 242 |
|
|
|
|
| 412 |
st.session_state.structured_results.append(structured_data)
|
| 413 |
|
| 414 |
st.subheader(page_filename)
|
| 415 |
+
c1, c2 = st.columns([3, 2]) # give image more room
|
| 416 |
with c1:
|
| 417 |
st.image(image, width=IMAGE_PREVIEW_WIDTH)
|
| 418 |
if page_count > 1 and not process_separately:
|
|
|
|
| 442 |
st.session_state.structured_results.append(structured_data)
|
| 443 |
|
| 444 |
st.subheader(f"Image: {f.name}")
|
| 445 |
+
c1, c2 = st.columns([3, 2])
|
| 446 |
with c1:
|
| 447 |
st.image(image, width=IMAGE_PREVIEW_WIDTH)
|
| 448 |
with c2:
|
|
|
|
| 472 |
st.write("""
|
| 473 |
How to use:
|
| 474 |
1) Upload one or more images or PDFs
|
| 475 |
+
2) Choose a model
|
|
|
|
|
|
|
| 476 |
3) Pick description or custom field extraction
|
| 477 |
4) For PDFs, choose page-by-page or first page
|
| 478 |
5) Click Process Files
|
|
|
|
| 487 |
</div>
|
| 488 |
""",
|
| 489 |
unsafe_allow_html=True
|
| 490 |
+
)
|