github-actions[bot] commited on
Commit
50fd07f
·
1 Parent(s): a7f1144

Sync from GitHub: 8a50d1a476de09cf21a2f3b86974fb6b18b71f60

Browse files
app.py CHANGED
@@ -98,7 +98,8 @@ async def health_check():
98
  @app.post("/extract")
99
  async def extract_invoice(
100
  file: UploadFile = File(..., description="Invoice image file (JPG, PNG, JPEG)"),
101
- doc_id: Optional[str] = Form(None, description="Optional document identifier")
 
102
  ):
103
  """
104
  Extract information from invoice image
@@ -171,7 +172,7 @@ async def extract_invoice(
171
  doc_id = os.path.splitext(file.filename)[0]
172
 
173
  # Process invoice
174
- result = InferenceProcessor.process_invoice(temp_file, doc_id)
175
 
176
  # Add total request time (includes file I/O)
177
  result['total_request_time_sec'] = round(time.time() - request_start, 2)
@@ -199,7 +200,8 @@ async def extract_invoice(
199
 
200
  @app.post("/process-invoice")
201
  async def process_invoice(
202
- file: UploadFile = File(..., description="Invoice image file")
 
203
  ):
204
  """
205
  Process a single invoice and return extracted information
@@ -207,6 +209,7 @@ async def process_invoice(
207
 
208
  **Parameters:**
209
  - **file**: Invoice image file (required)
 
210
 
211
  **Returns:**
212
  - JSON with extracted_text, signature_coords, stamp_coords
@@ -238,7 +241,7 @@ async def process_invoice(
238
  doc_id = os.path.splitext(file.filename)[0] if file.filename else "invoice"
239
 
240
  # Process invoice
241
- result = InferenceProcessor.process_invoice(temp_file, doc_id)
242
 
243
  # Extract fields from result
244
  fields = result.get("fields", {})
 
98
  @app.post("/extract")
99
  async def extract_invoice(
100
  file: UploadFile = File(..., description="Invoice image file (JPG, PNG, JPEG)"),
101
+ doc_id: Optional[str] = Form(None, description="Optional document identifier"),
102
+ enhance_image: Optional[bool] = Form(False, description="Apply OpenCV enhancement preprocessing")
103
  ):
104
  """
105
  Extract information from invoice image
 
172
  doc_id = os.path.splitext(file.filename)[0]
173
 
174
  # Process invoice
175
+ result = InferenceProcessor.process_invoice(temp_file, doc_id, enhance_image)
176
 
177
  # Add total request time (includes file I/O)
178
  result['total_request_time_sec'] = round(time.time() - request_start, 2)
 
200
 
201
  @app.post("/process-invoice")
202
  async def process_invoice(
203
+ file: UploadFile = File(..., description="Invoice image file"),
204
+ enhance_image: Optional[bool] = Form(False, description="Apply OpenCV enhancement preprocessing")
205
  ):
206
  """
207
  Process a single invoice and return extracted information
 
209
 
210
  **Parameters:**
211
  - **file**: Invoice image file (required)
212
+ - **enhance_image**: Apply OpenCV enhancement preprocessing (optional)
213
 
214
  **Returns:**
215
  - JSON with extracted_text, signature_coords, stamp_coords
 
241
  doc_id = os.path.splitext(file.filename)[0] if file.filename else "invoice"
242
 
243
  # Process invoice
244
+ result = InferenceProcessor.process_invoice(temp_file, doc_id, enhance_image)
245
 
246
  # Extract fields from result
247
  fields = result.get("fields", {})
frontend/src/App.jsx CHANGED
@@ -17,6 +17,7 @@ function App() {
17
  const [processingIndex, setProcessingIndex] = useState(null);
18
  const [resolutionMap, setResolutionMap] = useState({});
19
  const [resultResolutionMap, setResultResolutionMap] = useState({});
 
20
 
21
  const handleFilesSelected = async (files) => {
22
  setProcessing(false);
@@ -25,6 +26,7 @@ function App() {
25
  setImageDataMap({});
26
  setPreviewImages([]);
27
  setResolutionMap({});
 
28
 
29
  try {
30
  // Step 1: Convert all files to images and show previews
@@ -92,8 +94,9 @@ function App() {
92
  // Use resolution-adjusted image if available
93
  const processData = resolutionMap[preview.key] || { dataUrl: preview.dataUrl, resolution: 100 };
94
  const blob = dataUrlToBlob(processData.dataUrl);
 
95
 
96
- const result = await processSingleInvoice(blob, preview.filename);
97
 
98
  const resultWithMetadata = {
99
  ...result,
@@ -139,8 +142,9 @@ function App() {
139
  try {
140
  // Use resolution-adjusted image from ResultCard
141
  const blob = dataUrlToBlob(adjustedDataUrl || imageDataMap[result.key]);
 
142
 
143
- const newResult = await processSingleInvoice(blob, result.filename);
144
 
145
  const resultWithMetadata = {
146
  ...newResult,
@@ -172,6 +176,13 @@ function App() {
172
  }));
173
  };
174
 
 
 
 
 
 
 
 
175
  return (
176
  <div className="min-h-screen py-8 px-4 sm:px-6 lg:px-8">
177
  <div className="max-w-7xl mx-auto">
@@ -224,6 +235,8 @@ function App() {
224
  onResolutionChange={(dataUrl, resolution) =>
225
  handleResolutionChange(preview.key, dataUrl, resolution)
226
  }
 
 
227
  />
228
  ))}
229
  </div>
 
17
  const [processingIndex, setProcessingIndex] = useState(null);
18
  const [resolutionMap, setResolutionMap] = useState({});
19
  const [resultResolutionMap, setResultResolutionMap] = useState({});
20
+ const [enhancedMap, setEnhancedMap] = useState({}); // Track which images are enhanced
21
 
22
  const handleFilesSelected = async (files) => {
23
  setProcessing(false);
 
26
  setImageDataMap({});
27
  setPreviewImages([]);
28
  setResolutionMap({});
29
+ setEnhancedMap({}); // Reset enhanced state
30
 
31
  try {
32
  // Step 1: Convert all files to images and show previews
 
94
  // Use resolution-adjusted image if available
95
  const processData = resolutionMap[preview.key] || { dataUrl: preview.dataUrl, resolution: 100 };
96
  const blob = dataUrlToBlob(processData.dataUrl);
97
+ const isEnhanced = enhancedMap[preview.key] || false;
98
 
99
+ const result = await processSingleInvoice(blob, preview.filename, isEnhanced);
100
 
101
  const resultWithMetadata = {
102
  ...result,
 
142
  try {
143
  // Use resolution-adjusted image from ResultCard
144
  const blob = dataUrlToBlob(adjustedDataUrl || imageDataMap[result.key]);
145
+ const isEnhanced = enhancedMap[result.key] || false;
146
 
147
+ const newResult = await processSingleInvoice(blob, result.filename, isEnhanced);
148
 
149
  const resultWithMetadata = {
150
  ...newResult,
 
176
  }));
177
  };
178
 
179
+ const handleEnhanceToggle = (key) => {
180
+ setEnhancedMap(prev => ({
181
+ ...prev,
182
+ [key]: !prev[key]
183
+ }));
184
+ };
185
+
186
  return (
187
  <div className="min-h-screen py-8 px-4 sm:px-6 lg:px-8">
188
  <div className="max-w-7xl mx-auto">
 
235
  onResolutionChange={(dataUrl, resolution) =>
236
  handleResolutionChange(preview.key, dataUrl, resolution)
237
  }
238
+ onEnhanceToggle={() => handleEnhanceToggle(preview.key)}
239
+ isEnhanced={enhancedMap[preview.key] || false}
240
  />
241
  ))}
242
  </div>
frontend/src/components/ImagePreview.jsx CHANGED
@@ -1,7 +1,7 @@
1
  import React, { useState, useEffect, useRef } from 'react';
2
- import { SlidersHorizontal } from 'lucide-react';
3
 
4
- const ImagePreview = ({ imageData, fileName, onResolutionChange }) => {
5
  const [resolution, setResolution] = useState(100);
6
  const canvasRef = useRef(null);
7
  const [originalDimensions, setOriginalDimensions] = useState({ width: 0, height: 0 });
@@ -68,6 +68,25 @@ const ImagePreview = ({ imageData, fileName, onResolutionChange }) => {
68
  <canvas ref={canvasRef} className="rounded shadow-sm" />
69
  </div>
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  <div className="space-y-2">
72
  <div className="flex items-center justify-between">
73
  <label className="text-sm font-medium text-gray-700 flex items-center gap-2">
 
1
  import React, { useState, useEffect, useRef } from 'react';
2
+ import { SlidersHorizontal, Sparkles } from 'lucide-react';
3
 
4
+ const ImagePreview = ({ imageData, fileName, onResolutionChange, onEnhanceToggle, isEnhanced }) => {
5
  const [resolution, setResolution] = useState(100);
6
  const canvasRef = useRef(null);
7
  const [originalDimensions, setOriginalDimensions] = useState({ width: 0, height: 0 });
 
68
  <canvas ref={canvasRef} className="rounded shadow-sm" />
69
  </div>
70
 
71
+ {/* Enhance Button */}
72
+ <button
73
+ onClick={() => onEnhanceToggle && onEnhanceToggle()}
74
+ className={`w-full py-2 px-4 rounded-lg font-medium transition-all flex items-center justify-center gap-2 ${
75
+ isEnhanced
76
+ ? 'bg-purple-600 hover:bg-purple-700 text-white shadow-lg'
77
+ : 'bg-gradient-to-r from-purple-500 to-pink-500 hover:from-purple-600 hover:to-pink-600 text-white shadow-md'
78
+ }`}
79
+ >
80
+ <Sparkles className="w-4 h-4" />
81
+ {isEnhanced ? 'Enhanced ✓' : 'Enhance Image'}
82
+ </button>
83
+
84
+ {isEnhanced && (
85
+ <div className="bg-purple-50 border border-purple-200 rounded p-2 text-xs text-purple-700">
86
+ ✨ Image will be enhanced with OpenCV (CLAHE, denoising, sharpening) before processing
87
+ </div>
88
+ )}
89
+
90
  <div className="space-y-2">
91
  <div className="flex items-center justify-between">
92
  <label className="text-sm font-medium text-gray-700 flex items-center gap-2">
frontend/src/utils/api.js CHANGED
@@ -7,11 +7,13 @@ const API_BASE_URL = import.meta.env.VITE_API_URL || window.location.origin;
7
  * Process a single invoice image
8
  * @param {Blob} imageBlob - Image blob
9
  * @param {string} filename - Original filename
 
10
  * @returns {Promise<Object>} Processed result
11
  */
12
- export async function processSingleInvoice(imageBlob, filename) {
13
  const formData = new FormData();
14
  formData.append('file', imageBlob, filename);
 
15
 
16
  const response = await axios.post(`${API_BASE_URL}/process-invoice`, formData, {
17
  headers: {
 
7
  * Process a single invoice image
8
  * @param {Blob} imageBlob - Image blob
9
  * @param {string} filename - Original filename
10
+ * @param {boolean} enhanceImage - Whether to apply OpenCV enhancement
11
  * @returns {Promise<Object>} Processed result
12
  */
13
+ export async function processSingleInvoice(imageBlob, filename, enhanceImage = false) {
14
  const formData = new FormData();
15
  formData.append('file', imageBlob, filename);
16
+ formData.append('enhance_image', enhanceImage);
17
 
18
  const response = await axios.post(`${API_BASE_URL}/process-invoice`, formData, {
19
  headers: {
inference.py CHANGED
@@ -7,6 +7,8 @@ import time
7
  import json
8
  import codecs
9
  import re
 
 
10
  from PIL import Image
11
  from qwen_vl_utils import process_vision_info
12
  from typing import Dict, Tuple
@@ -63,6 +65,48 @@ Output rules:
63
  class InferenceProcessor:
64
  """Handles VLM inference, validation, and result processing"""
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  @staticmethod
67
  def preprocess_image(image_path: str) -> Image.Image:
68
  """Load and resize image if needed"""
@@ -284,13 +328,14 @@ class InferenceProcessor:
284
  return validated, field_confidence, warnings
285
 
286
  @staticmethod
287
- def process_invoice(image_path: str, doc_id: str = None) -> Dict:
288
  """
289
  Complete invoice processing pipeline
290
 
291
  Args:
292
  image_path: Path to invoice image
293
  doc_id: Document identifier (optional)
 
294
 
295
  Returns:
296
  dict: Complete JSON output with all fields
@@ -303,6 +348,12 @@ class InferenceProcessor:
303
  import os
304
  doc_id = os.path.splitext(os.path.basename(image_path))[0]
305
 
 
 
 
 
 
 
306
  # Step 1: Preprocess image
307
  t1 = time.time()
308
  image = InferenceProcessor.preprocess_image(image_path)
 
7
  import json
8
  import codecs
9
  import re
10
+ import cv2
11
+ import numpy as np
12
  from PIL import Image
13
  from qwen_vl_utils import process_vision_info
14
  from typing import Dict, Tuple
 
65
  class InferenceProcessor:
66
  """Handles VLM inference, validation, and result processing"""
67
 
68
+ @staticmethod
69
+ def enhance_image_opencv(image_path: str) -> str:
70
+ """
71
+ Apply OpenCV preprocessing to enhance image quality
72
+ Returns path to enhanced image (same as input, modified in place)
73
+ """
74
+ # Load image (BGR)
75
+ img = cv2.imread(image_path)
76
+ if img is None:
77
+ raise ValueError(f"Could not read image from {image_path}")
78
+
79
+ # Convert to LAB color space (better for contrast)
80
+ lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
81
+ l, a, b = cv2.split(lab)
82
+
83
+ # CLAHE on L-channel
84
+ clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8))
85
+ l_enhanced = clahe.apply(l)
86
+
87
+ # Merge back
88
+ lab_enhanced = cv2.merge((l_enhanced, a, b))
89
+ contrast_enhanced = cv2.cvtColor(lab_enhanced, cv2.COLOR_LAB2BGR)
90
+
91
+ # Noise reduction
92
+ denoised = cv2.fastNlMeansDenoisingColored(
93
+ contrast_enhanced,
94
+ None,
95
+ h=10, hColor=10,
96
+ templateWindowSize=7,
97
+ searchWindowSize=21
98
+ )
99
+
100
+ # Sharpening (Unsharp Mask)
101
+ blur = cv2.GaussianBlur(denoised, (0, 0), sigmaX=1.2)
102
+ sharpened = cv2.addWeighted(denoised, 1.5, blur, -0.5, 0)
103
+
104
+ # Save enhanced image back to the same path
105
+ cv2.imwrite(image_path, sharpened)
106
+ print(f"✨ Image enhanced with OpenCV preprocessing")
107
+
108
+ return image_path
109
+
110
  @staticmethod
111
  def preprocess_image(image_path: str) -> Image.Image:
112
  """Load and resize image if needed"""
 
328
  return validated, field_confidence, warnings
329
 
330
  @staticmethod
331
+ def process_invoice(image_path: str, doc_id: str = None, enhance_image: bool = False) -> Dict:
332
  """
333
  Complete invoice processing pipeline
334
 
335
  Args:
336
  image_path: Path to invoice image
337
  doc_id: Document identifier (optional)
338
+ enhance_image: Whether to apply OpenCV enhancement (optional)
339
 
340
  Returns:
341
  dict: Complete JSON output with all fields
 
348
  import os
349
  doc_id = os.path.splitext(os.path.basename(image_path))[0]
350
 
351
+ # Step 0: Apply OpenCV Enhancement if requested
352
+ if enhance_image:
353
+ t0 = time.time()
354
+ image_path = InferenceProcessor.enhance_image_opencv(image_path)
355
+ timing_breakdown['opencv_enhancement'] = round(time.time() - t0, 3)
356
+
357
  # Step 1: Preprocess image
358
  t1 = time.time()
359
  image = InferenceProcessor.preprocess_image(image_path)