Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -47,15 +47,12 @@ def image_to_base64(image):
|
|
| 47 |
return base64.b64encode(buf.getvalue()).decode('utf-8')
|
| 48 |
|
| 49 |
def extract_structured_data(content, fields):
|
| 50 |
-
"""Try to pull a JSON object for the requested fields out of model text."""
|
| 51 |
structured_data = {}
|
| 52 |
try:
|
| 53 |
-
# Fenced JSON
|
| 54 |
if "```json" in content and "```" in content.split("```json")[1]:
|
| 55 |
json_str = content.split("```json")[1].split("```")[0].strip()
|
| 56 |
structured_data.update(json.loads(json_str))
|
| 57 |
else:
|
| 58 |
-
# As a fallback, attempt to parse whole content if it looks like JSON
|
| 59 |
try:
|
| 60 |
maybe = json.loads(content)
|
| 61 |
if isinstance(maybe, dict):
|
|
@@ -78,7 +75,7 @@ def query_openrouter(prompt: str, image_base64: str, model_id: str) -> str:
|
|
| 78 |
data_url = f"data:image/jpeg;base64,{image_base64}"
|
| 79 |
|
| 80 |
payload = {
|
| 81 |
-
"model": model_id,
|
| 82 |
"messages": [
|
| 83 |
{
|
| 84 |
"role": "user",
|
|
@@ -94,7 +91,6 @@ def query_openrouter(prompt: str, image_base64: str, model_id: str) -> str:
|
|
| 94 |
headers = {
|
| 95 |
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
| 96 |
"Content-Type": "application/json",
|
| 97 |
-
# Optional but recommended for attribution
|
| 98 |
"HTTP-Referer": st.secrets.get("SPACE_URL", "https://hf.space"),
|
| 99 |
"X-Title": "EZOFIS AI OCR"
|
| 100 |
}
|
|
@@ -133,7 +129,6 @@ def process_image(image, filename, fields=None, model=None):
|
|
| 133 |
return {'filename': filename, 'extraction': content}, content, structured_data
|
| 134 |
|
| 135 |
def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True, model=None):
|
| 136 |
-
"""Rasterize PDF pages and run them through the same image path."""
|
| 137 |
if not PDF_SUPPORT:
|
| 138 |
yield None, None, None, filename, "PDF support requires PyMuPDF. Install pymupdf.", None
|
| 139 |
return
|
|
@@ -162,14 +157,11 @@ def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True
|
|
| 162 |
|
| 163 |
def create_download_buttons(results, structured_results, extraction_mode):
|
| 164 |
st.header("Download Results")
|
| 165 |
-
|
| 166 |
-
# Simple CSV of descriptions or raw extraction
|
| 167 |
base_csv = io.StringIO()
|
| 168 |
base_writer = csv.writer(base_csv)
|
| 169 |
base_writer.writerow(['Filename', 'Description/Extraction'])
|
| 170 |
for r in results:
|
| 171 |
base_writer.writerow([r['filename'], r.get('description', r.get('extraction', ''))])
|
| 172 |
-
|
| 173 |
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 174 |
base_name = f"image_analysis_{ts}.csv"
|
| 175 |
|
|
@@ -182,7 +174,6 @@ def create_download_buttons(results, structured_results, extraction_mode):
|
|
| 182 |
use_container_width=True
|
| 183 |
)
|
| 184 |
|
| 185 |
-
# Structured CSV if available
|
| 186 |
if extraction_mode == "Custom field extraction" and structured_results:
|
| 187 |
all_fields = set(['filename'])
|
| 188 |
for row in structured_results:
|
|
@@ -206,13 +197,11 @@ def create_download_buttons(results, structured_results, extraction_mode):
|
|
| 206 |
# ---------------------------
|
| 207 |
st.title("EZOFIS AI OCR")
|
| 208 |
|
| 209 |
-
# Session state
|
| 210 |
if 'results' not in st.session_state:
|
| 211 |
st.session_state.results = []
|
| 212 |
if 'structured_results' not in st.session_state:
|
| 213 |
st.session_state.structured_results = []
|
| 214 |
|
| 215 |
-
# Sidebar
|
| 216 |
with st.sidebar:
|
| 217 |
st.header("Upload Files")
|
| 218 |
uploaded_files = st.file_uploader(
|
|
@@ -222,10 +211,13 @@ with st.sidebar:
|
|
| 222 |
)
|
| 223 |
|
| 224 |
st.header("Model Settings")
|
| 225 |
-
# OpenRouter model id for Gemma 3 4B Instruct (vision)
|
| 226 |
selected_model = st.selectbox(
|
| 227 |
"Choose vision model:",
|
| 228 |
-
[
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
help="OpenRouter model id"
|
| 230 |
)
|
| 231 |
|
|
@@ -260,7 +252,7 @@ with st.sidebar:
|
|
| 260 |
process_button = False
|
| 261 |
st.info("Upload images or PDFs to begin.")
|
| 262 |
|
| 263 |
-
#
|
| 264 |
if uploaded_files and process_button:
|
| 265 |
if not OPENROUTER_API_KEY:
|
| 266 |
st.error("OPENROUTER_API_KEY is not set. Add it in your Space β Settings β Variables & secrets.")
|
|
@@ -272,7 +264,6 @@ if uploaded_files and process_button:
|
|
| 272 |
st.session_state.results = []
|
| 273 |
st.session_state.structured_results = []
|
| 274 |
|
| 275 |
-
# Count items to process
|
| 276 |
total_items = 0
|
| 277 |
for f in uploaded_files:
|
| 278 |
file_bytes = f.read()
|
|
@@ -291,7 +282,6 @@ if uploaded_files and process_button:
|
|
| 291 |
|
| 292 |
processed_count = 0
|
| 293 |
|
| 294 |
-
# Process files
|
| 295 |
for f in uploaded_files:
|
| 296 |
file_bytes = f.read()
|
| 297 |
f.seek(0)
|
|
@@ -366,7 +356,6 @@ if uploaded_files and process_button:
|
|
| 366 |
progress_bar.progress(min(processed_count / max(total_items, 1), 1.0))
|
| 367 |
|
| 368 |
status_text.text("Processing complete.")
|
| 369 |
-
|
| 370 |
if st.session_state.results:
|
| 371 |
create_download_buttons(
|
| 372 |
st.session_state.results,
|
|
@@ -374,13 +363,12 @@ if uploaded_files and process_button:
|
|
| 374 |
extraction_mode
|
| 375 |
)
|
| 376 |
|
| 377 |
-
# Empty state
|
| 378 |
if not uploaded_files:
|
| 379 |
st.info("Upload files using the sidebar to get started.")
|
| 380 |
st.write("""
|
| 381 |
How to use:
|
| 382 |
1) Upload one or more images or PDFs
|
| 383 |
-
2) Choose
|
| 384 |
3) Pick description or custom field extraction
|
| 385 |
4) For PDFs, choose page-by-page or first page
|
| 386 |
5) Click Process Files
|
|
@@ -391,7 +379,7 @@ st.markdown("---")
|
|
| 391 |
st.markdown(
|
| 392 |
"""
|
| 393 |
<div style="text-align: center; margin-top: 12px; opacity: 0.7;">
|
| 394 |
-
EZOFIS AI OCR
|
| 395 |
</div>
|
| 396 |
""",
|
| 397 |
unsafe_allow_html=True
|
|
|
|
| 47 |
return base64.b64encode(buf.getvalue()).decode('utf-8')
|
| 48 |
|
| 49 |
def extract_structured_data(content, fields):
|
|
|
|
| 50 |
structured_data = {}
|
| 51 |
try:
|
|
|
|
| 52 |
if "```json" in content and "```" in content.split("```json")[1]:
|
| 53 |
json_str = content.split("```json")[1].split("```")[0].strip()
|
| 54 |
structured_data.update(json.loads(json_str))
|
| 55 |
else:
|
|
|
|
| 56 |
try:
|
| 57 |
maybe = json.loads(content)
|
| 58 |
if isinstance(maybe, dict):
|
|
|
|
| 75 |
data_url = f"data:image/jpeg;base64,{image_base64}"
|
| 76 |
|
| 77 |
payload = {
|
| 78 |
+
"model": model_id,
|
| 79 |
"messages": [
|
| 80 |
{
|
| 81 |
"role": "user",
|
|
|
|
| 91 |
headers = {
|
| 92 |
"Authorization": f"Bearer {OPENROUTER_API_KEY}",
|
| 93 |
"Content-Type": "application/json",
|
|
|
|
| 94 |
"HTTP-Referer": st.secrets.get("SPACE_URL", "https://hf.space"),
|
| 95 |
"X-Title": "EZOFIS AI OCR"
|
| 96 |
}
|
|
|
|
| 129 |
return {'filename': filename, 'extraction': content}, content, structured_data
|
| 130 |
|
| 131 |
def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True, model=None):
|
|
|
|
| 132 |
if not PDF_SUPPORT:
|
| 133 |
yield None, None, None, filename, "PDF support requires PyMuPDF. Install pymupdf.", None
|
| 134 |
return
|
|
|
|
| 157 |
|
| 158 |
def create_download_buttons(results, structured_results, extraction_mode):
|
| 159 |
st.header("Download Results")
|
|
|
|
|
|
|
| 160 |
base_csv = io.StringIO()
|
| 161 |
base_writer = csv.writer(base_csv)
|
| 162 |
base_writer.writerow(['Filename', 'Description/Extraction'])
|
| 163 |
for r in results:
|
| 164 |
base_writer.writerow([r['filename'], r.get('description', r.get('extraction', ''))])
|
|
|
|
| 165 |
ts = datetime.now().strftime("%Y%m%d_%H%M%S")
|
| 166 |
base_name = f"image_analysis_{ts}.csv"
|
| 167 |
|
|
|
|
| 174 |
use_container_width=True
|
| 175 |
)
|
| 176 |
|
|
|
|
| 177 |
if extraction_mode == "Custom field extraction" and structured_results:
|
| 178 |
all_fields = set(['filename'])
|
| 179 |
for row in structured_results:
|
|
|
|
| 197 |
# ---------------------------
|
| 198 |
st.title("EZOFIS AI OCR")
|
| 199 |
|
|
|
|
| 200 |
if 'results' not in st.session_state:
|
| 201 |
st.session_state.results = []
|
| 202 |
if 'structured_results' not in st.session_state:
|
| 203 |
st.session_state.structured_results = []
|
| 204 |
|
|
|
|
| 205 |
with st.sidebar:
|
| 206 |
st.header("Upload Files")
|
| 207 |
uploaded_files = st.file_uploader(
|
|
|
|
| 211 |
)
|
| 212 |
|
| 213 |
st.header("Model Settings")
|
|
|
|
| 214 |
selected_model = st.selectbox(
|
| 215 |
"Choose vision model:",
|
| 216 |
+
[
|
| 217 |
+
"google/gemma-3-4b-it",
|
| 218 |
+
"openai/gpt-4.1",
|
| 219 |
+
"openai/gpt-4.1-mini"
|
| 220 |
+
],
|
| 221 |
help="OpenRouter model id"
|
| 222 |
)
|
| 223 |
|
|
|
|
| 252 |
process_button = False
|
| 253 |
st.info("Upload images or PDFs to begin.")
|
| 254 |
|
| 255 |
+
# Processing loop
|
| 256 |
if uploaded_files and process_button:
|
| 257 |
if not OPENROUTER_API_KEY:
|
| 258 |
st.error("OPENROUTER_API_KEY is not set. Add it in your Space β Settings β Variables & secrets.")
|
|
|
|
| 264 |
st.session_state.results = []
|
| 265 |
st.session_state.structured_results = []
|
| 266 |
|
|
|
|
| 267 |
total_items = 0
|
| 268 |
for f in uploaded_files:
|
| 269 |
file_bytes = f.read()
|
|
|
|
| 282 |
|
| 283 |
processed_count = 0
|
| 284 |
|
|
|
|
| 285 |
for f in uploaded_files:
|
| 286 |
file_bytes = f.read()
|
| 287 |
f.seek(0)
|
|
|
|
| 356 |
progress_bar.progress(min(processed_count / max(total_items, 1), 1.0))
|
| 357 |
|
| 358 |
status_text.text("Processing complete.")
|
|
|
|
| 359 |
if st.session_state.results:
|
| 360 |
create_download_buttons(
|
| 361 |
st.session_state.results,
|
|
|
|
| 363 |
extraction_mode
|
| 364 |
)
|
| 365 |
|
|
|
|
| 366 |
if not uploaded_files:
|
| 367 |
st.info("Upload files using the sidebar to get started.")
|
| 368 |
st.write("""
|
| 369 |
How to use:
|
| 370 |
1) Upload one or more images or PDFs
|
| 371 |
+
2) Choose a model (Gemma-3, GPT-4.1, GPT-4.1-mini)
|
| 372 |
3) Pick description or custom field extraction
|
| 373 |
4) For PDFs, choose page-by-page or first page
|
| 374 |
5) Click Process Files
|
|
|
|
| 379 |
st.markdown(
|
| 380 |
"""
|
| 381 |
<div style="text-align: center; margin-top: 12px; opacity: 0.7;">
|
| 382 |
+
Built for Hugging Face Spaces + OpenRouter (EZOFIS AI OCR)
|
| 383 |
</div>
|
| 384 |
""",
|
| 385 |
unsafe_allow_html=True
|