Seth0330 commited on
Commit
01d9f7e
Β·
verified Β·
1 Parent(s): 8bc5bd6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -21
app.py CHANGED
@@ -47,15 +47,12 @@ def image_to_base64(image):
47
  return base64.b64encode(buf.getvalue()).decode('utf-8')
48
 
49
  def extract_structured_data(content, fields):
50
- """Try to pull a JSON object for the requested fields out of model text."""
51
  structured_data = {}
52
  try:
53
- # Fenced JSON
54
  if "```json" in content and "```" in content.split("```json")[1]:
55
  json_str = content.split("```json")[1].split("```")[0].strip()
56
  structured_data.update(json.loads(json_str))
57
  else:
58
- # As a fallback, attempt to parse whole content if it looks like JSON
59
  try:
60
  maybe = json.loads(content)
61
  if isinstance(maybe, dict):
@@ -78,7 +75,7 @@ def query_openrouter(prompt: str, image_base64: str, model_id: str) -> str:
78
  data_url = f"data:image/jpeg;base64,{image_base64}"
79
 
80
  payload = {
81
- "model": model_id, # e.g., "google/gemma-3-4b-it"
82
  "messages": [
83
  {
84
  "role": "user",
@@ -94,7 +91,6 @@ def query_openrouter(prompt: str, image_base64: str, model_id: str) -> str:
94
  headers = {
95
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
96
  "Content-Type": "application/json",
97
- # Optional but recommended for attribution
98
  "HTTP-Referer": st.secrets.get("SPACE_URL", "https://hf.space"),
99
  "X-Title": "EZOFIS AI OCR"
100
  }
@@ -133,7 +129,6 @@ def process_image(image, filename, fields=None, model=None):
133
  return {'filename': filename, 'extraction': content}, content, structured_data
134
 
135
  def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True, model=None):
136
- """Rasterize PDF pages and run them through the same image path."""
137
  if not PDF_SUPPORT:
138
  yield None, None, None, filename, "PDF support requires PyMuPDF. Install pymupdf.", None
139
  return
@@ -162,14 +157,11 @@ def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True
162
 
163
  def create_download_buttons(results, structured_results, extraction_mode):
164
  st.header("Download Results")
165
-
166
- # Simple CSV of descriptions or raw extraction
167
  base_csv = io.StringIO()
168
  base_writer = csv.writer(base_csv)
169
  base_writer.writerow(['Filename', 'Description/Extraction'])
170
  for r in results:
171
  base_writer.writerow([r['filename'], r.get('description', r.get('extraction', ''))])
172
-
173
  ts = datetime.now().strftime("%Y%m%d_%H%M%S")
174
  base_name = f"image_analysis_{ts}.csv"
175
 
@@ -182,7 +174,6 @@ def create_download_buttons(results, structured_results, extraction_mode):
182
  use_container_width=True
183
  )
184
 
185
- # Structured CSV if available
186
  if extraction_mode == "Custom field extraction" and structured_results:
187
  all_fields = set(['filename'])
188
  for row in structured_results:
@@ -206,13 +197,11 @@ def create_download_buttons(results, structured_results, extraction_mode):
206
  # ---------------------------
207
  st.title("EZOFIS AI OCR")
208
 
209
- # Session state
210
  if 'results' not in st.session_state:
211
  st.session_state.results = []
212
  if 'structured_results' not in st.session_state:
213
  st.session_state.structured_results = []
214
 
215
- # Sidebar
216
  with st.sidebar:
217
  st.header("Upload Files")
218
  uploaded_files = st.file_uploader(
@@ -222,10 +211,13 @@ with st.sidebar:
222
  )
223
 
224
  st.header("Model Settings")
225
- # OpenRouter model id for Gemma 3 4B Instruct (vision)
226
  selected_model = st.selectbox(
227
  "Choose vision model:",
228
- ["google/gemma-3-4b-it"],
 
 
 
 
229
  help="OpenRouter model id"
230
  )
231
 
@@ -260,7 +252,7 @@ with st.sidebar:
260
  process_button = False
261
  st.info("Upload images or PDFs to begin.")
262
 
263
- # Main processing
264
  if uploaded_files and process_button:
265
  if not OPENROUTER_API_KEY:
266
  st.error("OPENROUTER_API_KEY is not set. Add it in your Space β†’ Settings β†’ Variables & secrets.")
@@ -272,7 +264,6 @@ if uploaded_files and process_button:
272
  st.session_state.results = []
273
  st.session_state.structured_results = []
274
 
275
- # Count items to process
276
  total_items = 0
277
  for f in uploaded_files:
278
  file_bytes = f.read()
@@ -291,7 +282,6 @@ if uploaded_files and process_button:
291
 
292
  processed_count = 0
293
 
294
- # Process files
295
  for f in uploaded_files:
296
  file_bytes = f.read()
297
  f.seek(0)
@@ -366,7 +356,6 @@ if uploaded_files and process_button:
366
  progress_bar.progress(min(processed_count / max(total_items, 1), 1.0))
367
 
368
  status_text.text("Processing complete.")
369
-
370
  if st.session_state.results:
371
  create_download_buttons(
372
  st.session_state.results,
@@ -374,13 +363,12 @@ if uploaded_files and process_button:
374
  extraction_mode
375
  )
376
 
377
- # Empty state
378
  if not uploaded_files:
379
  st.info("Upload files using the sidebar to get started.")
380
  st.write("""
381
  How to use:
382
  1) Upload one or more images or PDFs
383
- 2) Choose the OpenRouter vision model (Gemma 3 4B IT)
384
  3) Pick description or custom field extraction
385
  4) For PDFs, choose page-by-page or first page
386
  5) Click Process Files
@@ -391,7 +379,7 @@ st.markdown("---")
391
  st.markdown(
392
  """
393
  <div style="text-align: center; margin-top: 12px; opacity: 0.7;">
394
- EZOFIS AI OCR
395
  </div>
396
  """,
397
  unsafe_allow_html=True
 
47
  return base64.b64encode(buf.getvalue()).decode('utf-8')
48
 
49
  def extract_structured_data(content, fields):
 
50
  structured_data = {}
51
  try:
 
52
  if "```json" in content and "```" in content.split("```json")[1]:
53
  json_str = content.split("```json")[1].split("```")[0].strip()
54
  structured_data.update(json.loads(json_str))
55
  else:
 
56
  try:
57
  maybe = json.loads(content)
58
  if isinstance(maybe, dict):
 
75
  data_url = f"data:image/jpeg;base64,{image_base64}"
76
 
77
  payload = {
78
+ "model": model_id,
79
  "messages": [
80
  {
81
  "role": "user",
 
91
  headers = {
92
  "Authorization": f"Bearer {OPENROUTER_API_KEY}",
93
  "Content-Type": "application/json",
 
94
  "HTTP-Referer": st.secrets.get("SPACE_URL", "https://hf.space"),
95
  "X-Title": "EZOFIS AI OCR"
96
  }
 
129
  return {'filename': filename, 'extraction': content}, content, structured_data
130
 
131
  def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True, model=None):
 
132
  if not PDF_SUPPORT:
133
  yield None, None, None, filename, "PDF support requires PyMuPDF. Install pymupdf.", None
134
  return
 
157
 
158
  def create_download_buttons(results, structured_results, extraction_mode):
159
  st.header("Download Results")
 
 
160
  base_csv = io.StringIO()
161
  base_writer = csv.writer(base_csv)
162
  base_writer.writerow(['Filename', 'Description/Extraction'])
163
  for r in results:
164
  base_writer.writerow([r['filename'], r.get('description', r.get('extraction', ''))])
 
165
  ts = datetime.now().strftime("%Y%m%d_%H%M%S")
166
  base_name = f"image_analysis_{ts}.csv"
167
 
 
174
  use_container_width=True
175
  )
176
 
 
177
  if extraction_mode == "Custom field extraction" and structured_results:
178
  all_fields = set(['filename'])
179
  for row in structured_results:
 
197
  # ---------------------------
198
  st.title("EZOFIS AI OCR")
199
 
 
200
  if 'results' not in st.session_state:
201
  st.session_state.results = []
202
  if 'structured_results' not in st.session_state:
203
  st.session_state.structured_results = []
204
 
 
205
  with st.sidebar:
206
  st.header("Upload Files")
207
  uploaded_files = st.file_uploader(
 
211
  )
212
 
213
  st.header("Model Settings")
 
214
  selected_model = st.selectbox(
215
  "Choose vision model:",
216
+ [
217
+ "google/gemma-3-4b-it",
218
+ "openai/gpt-4.1",
219
+ "openai/gpt-4.1-mini"
220
+ ],
221
  help="OpenRouter model id"
222
  )
223
 
 
252
  process_button = False
253
  st.info("Upload images or PDFs to begin.")
254
 
255
+ # Processing loop
256
  if uploaded_files and process_button:
257
  if not OPENROUTER_API_KEY:
258
  st.error("OPENROUTER_API_KEY is not set. Add it in your Space β†’ Settings β†’ Variables & secrets.")
 
264
  st.session_state.results = []
265
  st.session_state.structured_results = []
266
 
 
267
  total_items = 0
268
  for f in uploaded_files:
269
  file_bytes = f.read()
 
282
 
283
  processed_count = 0
284
 
 
285
  for f in uploaded_files:
286
  file_bytes = f.read()
287
  f.seek(0)
 
356
  progress_bar.progress(min(processed_count / max(total_items, 1), 1.0))
357
 
358
  status_text.text("Processing complete.")
 
359
  if st.session_state.results:
360
  create_download_buttons(
361
  st.session_state.results,
 
363
  extraction_mode
364
  )
365
 
 
366
  if not uploaded_files:
367
  st.info("Upload files using the sidebar to get started.")
368
  st.write("""
369
  How to use:
370
  1) Upload one or more images or PDFs
371
+ 2) Choose a model (Gemma-3, GPT-4.1, GPT-4.1-mini)
372
  3) Pick description or custom field extraction
373
  4) For PDFs, choose page-by-page or first page
374
  5) Click Process Files
 
379
  st.markdown(
380
  """
381
  <div style="text-align: center; margin-top: 12px; opacity: 0.7;">
382
+ Built for Hugging Face Spaces + OpenRouter (EZOFIS AI OCR)
383
  </div>
384
  """,
385
  unsafe_allow_html=True