Seth0330 commited on
Commit
4fcf23d
·
verified ·
1 Parent(s): 919773e

Delete src

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +0 -398
src/streamlit_app.py DELETED
@@ -1,398 +0,0 @@
1
- import streamlit as st
2
- import io
3
- import base64
4
- import pandas as pd
5
- from PIL import Image
6
- from datetime import datetime
7
- import csv
8
- import json
9
- import os
10
- import requests
11
-
12
- # Optional PDF support via PyMuPDF
13
- try:
14
- import fitz # PyMuPDF
15
- PDF_SUPPORT = True
16
- except ImportError:
17
- PDF_SUPPORT = False
18
-
19
- # ---------------------------
20
- # Page config
21
- # ---------------------------
22
- st.set_page_config(
23
- page_title="Curiosity AI Scans",
24
- page_icon="🔍",
25
- layout="wide",
26
- initial_sidebar_state="expanded"
27
- )
28
-
29
- # ---------------------------
30
- # Helpers
31
- # ---------------------------
32
- def resize_image(image, max_size=1920):
33
- w, h = image.size
34
- if w > max_size or h > max_size:
35
- if w > h:
36
- nw = max_size
37
- nh = int(h * (max_size / w))
38
- else:
39
- nh = max_size
40
- nw = int(w * (max_size / h))
41
- return image.resize((nw, nh), Image.LANCZOS)
42
- return image
43
-
44
- def image_to_base64(image):
45
- buf = io.BytesIO()
46
- image.save(buf, format='JPEG')
47
- return base64.b64encode(buf.getvalue()).decode('utf-8')
48
-
49
- def extract_structured_data(content, fields):
50
- """Try to pull a JSON object for the requested fields out of model text."""
51
- structured_data = {}
52
- try:
53
- # Fenced JSON
54
- if "```json" in content and "```" in content.split("```json")[1]:
55
- json_str = content.split("```json")[1].split("```")[0].strip()
56
- structured_data.update(json.loads(json_str))
57
- else:
58
- # As a fallback, attempt to parse whole content if it looks like JSON
59
- try:
60
- maybe = json.loads(content)
61
- if isinstance(maybe, dict):
62
- structured_data.update(maybe)
63
- except Exception:
64
- pass
65
- except Exception:
66
- pass
67
- return structured_data
68
-
69
- # ---------------------------
70
- # OpenRouter client
71
- # ---------------------------
72
- OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY") # set this in Space Secrets
73
-
74
- def query_openrouter(prompt: str, image_base64: str, model_id: str) -> str:
75
- if not OPENROUTER_API_KEY:
76
- raise RuntimeError("Missing OPENROUTER_API_KEY. Add it in your Space → Settings → Variables & secrets.")
77
-
78
- data_url = f"data:image/jpeg;base64,{image_base64}"
79
-
80
- payload = {
81
- "model": model_id, # e.g., "google/gemma-3-4b-it"
82
- "messages": [
83
- {
84
- "role": "user",
85
- "content": [
86
- {"type": "text", "text": prompt},
87
- {"type": "image_url", "image_url": {"url": data_url}}
88
- ]
89
- }
90
- ],
91
- "max_tokens": 800
92
- }
93
-
94
- headers = {
95
- "Authorization": f"Bearer {OPENROUTER_API_KEY}",
96
- "Content-Type": "application/json",
97
- # Optional but recommended for attribution
98
- "HTTP-Referer": st.secrets.get("SPACE_URL", "https://hf.space"),
99
- "X-Title": "Curiosity AI Scans"
100
- }
101
-
102
- r = requests.post(
103
- "https://openrouter.ai/api/v1/chat/completions",
104
- headers=headers,
105
- json=payload,
106
- timeout=120
107
- )
108
- r.raise_for_status()
109
- data = r.json()
110
- return data["choices"][0]["message"]["content"]
111
-
112
- # ---------------------------
113
- # Core processing
114
- # ---------------------------
115
- def process_image(image, filename, fields=None, model=None):
116
- img_base64 = image_to_base64(resize_image(image))
117
-
118
- if fields is None:
119
- prompt = "Describe this image in detail."
120
- content = query_openrouter(prompt, img_base64, model)
121
- return {'filename': filename, 'description': content}, content, None
122
- else:
123
- fields_str = ", ".join(fields)
124
- prompt = (
125
- "Extract the following fields from this image and return JSON only "
126
- f"with these exact keys: {fields_str}. If a field is missing, use an empty string."
127
- )
128
- content = query_openrouter(prompt, img_base64, model)
129
- structured_data = {'filename': filename}
130
- parsed = extract_structured_data(content, fields)
131
- if parsed:
132
- structured_data.update(parsed)
133
- return {'filename': filename, 'extraction': content}, content, structured_data
134
-
135
- def process_pdf(file_bytes, filename, fields=None, process_pages_separately=True, model=None):
136
- """Rasterize PDF pages and run them through the same image path."""
137
- if not PDF_SUPPORT:
138
- yield None, None, None, filename, "PDF support requires PyMuPDF. Install pymupdf.", None
139
- return
140
-
141
- try:
142
- pdf_document = fitz.open(stream=file_bytes, filetype="pdf")
143
- page_count = len(pdf_document)
144
-
145
- if process_pages_separately:
146
- for page_num in range(page_count):
147
- page = pdf_document[page_num]
148
- pix = page.get_pixmap(matrix=fitz.Matrix(1.5, 1.5))
149
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
150
- page_filename = f"{filename} (Page {page_num+1})"
151
- result, content, structured_data = process_image(img, page_filename, fields, model)
152
- yield page_num, page_count, img, page_filename, content, structured_data
153
- else:
154
- page = pdf_document[0]
155
- pix = page.get_pixmap(matrix=fitz.Matrix(1.5, 1.5))
156
- img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
157
- result, content, structured_data = process_image(img, filename, fields, model)
158
- yield 0, page_count, img, filename, content, structured_data
159
-
160
- except Exception as e:
161
- yield None, None, None, filename, f"Error processing PDF: {str(e)}", None
162
-
163
- def create_download_buttons(results, structured_results, extraction_mode):
164
- st.header("Download Results")
165
-
166
- # Simple CSV of descriptions or raw extraction
167
- base_csv = io.StringIO()
168
- base_writer = csv.writer(base_csv)
169
- base_writer.writerow(['Filename', 'Description/Extraction'])
170
- for r in results:
171
- base_writer.writerow([r['filename'], r.get('description', r.get('extraction', ''))])
172
-
173
- ts = datetime.now().strftime("%Y%m%d_%H%M%S")
174
- base_name = f"image_analysis_{ts}.csv"
175
-
176
- st.success("All files processed.")
177
- st.download_button(
178
- label="Download Results (CSV)",
179
- data=base_csv.getvalue(),
180
- file_name=base_name,
181
- mime="text/csv",
182
- use_container_width=True
183
- )
184
-
185
- # Structured CSV if available
186
- if extraction_mode == "Custom field extraction" and structured_results:
187
- all_fields = set(['filename'])
188
- for row in structured_results:
189
- all_fields.update(row.keys())
190
- headers = sorted(list(all_fields))
191
- buff = io.StringIO()
192
- w = csv.writer(buff)
193
- w.writerow(headers)
194
- for row in structured_results:
195
- w.writerow([row.get(h, '') for h in headers])
196
- st.download_button(
197
- label="Download Structured Data (CSV)",
198
- data=buff.getvalue(),
199
- file_name=f"structured_data_{ts}.csv",
200
- mime="text/csv",
201
- use_container_width=True
202
- )
203
-
204
- # ---------------------------
205
- # UI
206
- # ---------------------------
207
- st.title("Curiosity AI Scans")
208
-
209
- # Session state
210
- if 'results' not in st.session_state:
211
- st.session_state.results = []
212
- if 'structured_results' not in st.session_state:
213
- st.session_state.structured_results = []
214
-
215
- # Sidebar
216
- with st.sidebar:
217
- st.header("Upload Files")
218
- uploaded_files = st.file_uploader(
219
- "Choose images or PDFs",
220
- accept_multiple_files=True,
221
- type=['png', 'jpg', 'jpeg', 'pdf']
222
- )
223
-
224
- st.header("Model Settings")
225
- # OpenRouter model id for Gemma 3 4B Instruct (vision)
226
- selected_model = st.selectbox(
227
- "Choose vision model:",
228
- ["google/gemma-3-4b-it"],
229
- help="OpenRouter model id"
230
- )
231
-
232
- extraction_mode = "General description"
233
- pdf_process_mode = "Process each page separately"
234
- fields = None
235
-
236
- if uploaded_files:
237
- st.write(f"Uploaded {len(uploaded_files)} file(s)")
238
-
239
- st.header("Data Extraction Options")
240
- extraction_mode = st.radio(
241
- "Choose extraction mode:",
242
- ["General description", "Custom field extraction"]
243
- )
244
-
245
- if extraction_mode == "Custom field extraction":
246
- custom_fields = st.text_area(
247
- "Enter fields to extract (comma separated):",
248
- value="Invoice number, Date, Company name, Total amount"
249
- )
250
- fields = [f.strip() for f in custom_fields.split(",") if f.strip()]
251
-
252
- if any(file.name.lower().endswith('.pdf') for file in uploaded_files):
253
- pdf_process_mode = st.radio(
254
- "How to process PDF files:",
255
- ["Process each page separately", "Process entire PDF as one document"]
256
- )
257
-
258
- process_button = st.button("Process Files", use_container_width=True)
259
- else:
260
- process_button = False
261
- st.info("Upload images or PDFs to begin.")
262
-
263
- # Main processing
264
- if uploaded_files and process_button:
265
- if not OPENROUTER_API_KEY:
266
- st.error("OPENROUTER_API_KEY is not set. Add it in your Space → Settings → Variables & secrets.")
267
- else:
268
- st.header("Processing Results")
269
- progress_bar = st.progress(0)
270
- status_text = st.empty()
271
-
272
- st.session_state.results = []
273
- st.session_state.structured_results = []
274
-
275
- # Count items to process
276
- total_items = 0
277
- for f in uploaded_files:
278
- file_bytes = f.read()
279
- f.seek(0)
280
- if f.name.lower().endswith('.pdf') and PDF_SUPPORT:
281
- if pdf_process_mode == "Process each page separately":
282
- try:
283
- pdf_document = fitz.open(stream=file_bytes, filetype="pdf")
284
- total_items += len(pdf_document)
285
- except Exception:
286
- total_items += 1
287
- else:
288
- total_items += 1
289
- else:
290
- total_items += 1
291
-
292
- processed_count = 0
293
-
294
- # Process files
295
- for f in uploaded_files:
296
- file_bytes = f.read()
297
- f.seek(0)
298
-
299
- if f.name.lower().endswith('.pdf'):
300
- if not PDF_SUPPORT:
301
- st.error("PDF support requires PyMuPDF. Add 'pymupdf' to requirements.txt.")
302
- processed_count += 1
303
- progress_bar.progress(processed_count / max(total_items, 1))
304
- continue
305
-
306
- try:
307
- process_separately = pdf_process_mode == "Process each page separately"
308
- for page_info in process_pdf(file_bytes, f.name, fields, process_separately, selected_model):
309
- page_num, page_count, image, page_filename, content, structured_data = page_info
310
- if page_num is None:
311
- st.error(content)
312
- continue
313
-
314
- status_text.text(f"Processing {page_filename} ({page_num+1}/{page_count})")
315
- result = {'filename': page_filename, 'description': content}
316
- st.session_state.results.append(result)
317
- if structured_data and len(structured_data) > 1:
318
- st.session_state.structured_results.append(structured_data)
319
-
320
- st.subheader(page_filename)
321
- c1, c2 = st.columns([1, 2])
322
- with c1:
323
- st.image(image, width=250)
324
- if page_count > 1 and not process_separately:
325
- st.info(f"PDF has {page_count} pages. Showing first page only.")
326
- with c2:
327
- st.write(content)
328
- if structured_data and len(structured_data) > 1:
329
- st.success("Extracted structured data")
330
- st.json(structured_data)
331
-
332
- st.divider()
333
- processed_count += 1
334
- progress_bar.progress(min(processed_count / max(total_items, 1), 1.0))
335
-
336
- except Exception as e:
337
- st.error(f"Error processing PDF {f.name}: {e}")
338
- processed_count += 1
339
- progress_bar.progress(min(processed_count / max(total_items, 1), 1.0))
340
-
341
- else:
342
- try:
343
- status_text.text(f"Processing image {f.name}")
344
- image = Image.open(f).convert("RGB")
345
- result, content, structured_data = process_image(image, f.name, fields, selected_model)
346
- st.session_state.results.append(result)
347
- if structured_data and len(structured_data) > 1:
348
- st.session_state.structured_results.append(structured_data)
349
-
350
- st.subheader(f"Image: {f.name}")
351
- c1, c2 = st.columns([1, 2])
352
- with c1:
353
- st.image(image, width=250)
354
- with c2:
355
- st.write(content)
356
- if structured_data and len(structured_data) > 1:
357
- st.success("Extracted structured data")
358
- st.json(structured_data)
359
-
360
- st.divider()
361
-
362
- except Exception as e:
363
- st.error(f"Error processing image {f.name}: {e}")
364
-
365
- processed_count += 1
366
- progress_bar.progress(min(processed_count / max(total_items, 1), 1.0))
367
-
368
- status_text.text("Processing complete.")
369
-
370
- if st.session_state.results:
371
- create_download_buttons(
372
- st.session_state.results,
373
- st.session_state.structured_results,
374
- extraction_mode
375
- )
376
-
377
- # Empty state
378
- if not uploaded_files:
379
- st.info("Upload files using the sidebar to get started.")
380
- st.write("""
381
- How to use:
382
- 1) Upload one or more images or PDFs
383
- 2) Choose the OpenRouter vision model (Gemma 3 4B IT)
384
- 3) Pick description or custom field extraction
385
- 4) For PDFs, choose page-by-page or first page
386
- 5) Click Process Files
387
- 6) Review outputs and download CSVs
388
- """)
389
-
390
- st.markdown("---")
391
- st.markdown(
392
- """
393
- <div style="text-align: center; margin-top: 12px; opacity: 0.7;">
394
- Built for Hugging Face Spaces + OpenRouter
395
- </div>
396
- """,
397
- unsafe_allow_html=True
398
- )