Spaces:

quantumbit
/

invoice_extractor

Paused

App Files Files Community

github-actions[bot] commited on 6 days ago

Commit

50fd07f

1 Parent(s): a7f1144

Sync from GitHub: 8a50d1a476de09cf21a2f3b86974fb6b18b71f60

Browse files

Files changed (5) hide show

app.py +7 -4
frontend/src/App.jsx +15 -2
frontend/src/components/ImagePreview.jsx +21 -2
frontend/src/utils/api.js +3 -1
inference.py +52 -1

app.py CHANGED Viewed

@@ -98,7 +98,8 @@ async def health_check():
 @app.post("/extract")
 async def extract_invoice(
     file: UploadFile = File(..., description="Invoice image file (JPG, PNG, JPEG)"),
-    doc_id: Optional[str] = Form(None, description="Optional document identifier")
 ):
     """
     Extract information from invoice image
@@ -171,7 +172,7 @@ async def extract_invoice(
             doc_id = os.path.splitext(file.filename)[0]
         # Process invoice
-        result = InferenceProcessor.process_invoice(temp_file, doc_id)
         # Add total request time (includes file I/O)
         result['total_request_time_sec'] = round(time.time() - request_start, 2)
@@ -199,7 +200,8 @@ async def extract_invoice(
 @app.post("/process-invoice")
 async def process_invoice(
-    file: UploadFile = File(..., description="Invoice image file")
 ):
     """
     Process a single invoice and return extracted information
@@ -207,6 +209,7 @@ async def process_invoice(
     **Parameters:**
     - **file**: Invoice image file (required)
     **Returns:**
     - JSON with extracted_text, signature_coords, stamp_coords
@@ -238,7 +241,7 @@ async def process_invoice(
         doc_id = os.path.splitext(file.filename)[0] if file.filename else "invoice"
         # Process invoice
-        result = InferenceProcessor.process_invoice(temp_file, doc_id)
         # Extract fields from result
         fields = result.get("fields", {})

 @app.post("/extract")
 async def extract_invoice(
     file: UploadFile = File(..., description="Invoice image file (JPG, PNG, JPEG)"),
+    doc_id: Optional[str] = Form(None, description="Optional document identifier"),
+    enhance_image: Optional[bool] = Form(False, description="Apply OpenCV enhancement preprocessing")
 ):
     """
     Extract information from invoice image
             doc_id = os.path.splitext(file.filename)[0]
         # Process invoice
+        result = InferenceProcessor.process_invoice(temp_file, doc_id, enhance_image)
         # Add total request time (includes file I/O)
         result['total_request_time_sec'] = round(time.time() - request_start, 2)
 @app.post("/process-invoice")
 async def process_invoice(
+    file: UploadFile = File(..., description="Invoice image file"),
+    enhance_image: Optional[bool] = Form(False, description="Apply OpenCV enhancement preprocessing")
 ):
     """
     Process a single invoice and return extracted information
     **Parameters:**
     - **file**: Invoice image file (required)
+    - **enhance_image**: Apply OpenCV enhancement preprocessing (optional)
     **Returns:**
     - JSON with extracted_text, signature_coords, stamp_coords
         doc_id = os.path.splitext(file.filename)[0] if file.filename else "invoice"
         # Process invoice
+        result = InferenceProcessor.process_invoice(temp_file, doc_id, enhance_image)
         # Extract fields from result
         fields = result.get("fields", {})

frontend/src/App.jsx CHANGED Viewed

@@ -17,6 +17,7 @@ function App() {
   const [processingIndex, setProcessingIndex] = useState(null);
   const [resolutionMap, setResolutionMap] = useState({});
   const [resultResolutionMap, setResultResolutionMap] = useState({});
   const handleFilesSelected = async (files) => {
     setProcessing(false);
@@ -25,6 +26,7 @@ function App() {
     setImageDataMap({});
     setPreviewImages([]);
     setResolutionMap({});
     try {
       // Step 1: Convert all files to images and show previews
@@ -92,8 +94,9 @@ function App() {
           // Use resolution-adjusted image if available
           const processData = resolutionMap[preview.key] || { dataUrl: preview.dataUrl, resolution: 100 };
           const blob = dataUrlToBlob(processData.dataUrl);
-          const result = await processSingleInvoice(blob, preview.filename);
           const resultWithMetadata = {
             ...result,
@@ -139,8 +142,9 @@ function App() {
     try {
       // Use resolution-adjusted image from ResultCard
       const blob = dataUrlToBlob(adjustedDataUrl || imageDataMap[result.key]);
-      const newResult = await processSingleInvoice(blob, result.filename);
       const resultWithMetadata = {
         ...newResult,
@@ -172,6 +176,13 @@ function App() {
     }));
   };
   return (
     <div className="min-h-screen py-8 px-4 sm:px-6 lg:px-8">
       <div className="max-w-7xl mx-auto">
@@ -224,6 +235,8 @@ function App() {
                     onResolutionChange={(dataUrl, resolution) =>
                       handleResolutionChange(preview.key, dataUrl, resolution)
                     }
                   />
                 ))}
               </div>

   const [processingIndex, setProcessingIndex] = useState(null);
   const [resolutionMap, setResolutionMap] = useState({});
   const [resultResolutionMap, setResultResolutionMap] = useState({});
+  const [enhancedMap, setEnhancedMap] = useState({}); // Track which images are enhanced
   const handleFilesSelected = async (files) => {
     setProcessing(false);
     setImageDataMap({});
     setPreviewImages([]);
     setResolutionMap({});
+    setEnhancedMap({}); // Reset enhanced state
     try {
       // Step 1: Convert all files to images and show previews
           // Use resolution-adjusted image if available
           const processData = resolutionMap[preview.key] || { dataUrl: preview.dataUrl, resolution: 100 };
           const blob = dataUrlToBlob(processData.dataUrl);
+          const isEnhanced = enhancedMap[preview.key] || false;
+          const result = await processSingleInvoice(blob, preview.filename, isEnhanced);
           const resultWithMetadata = {
             ...result,
     try {
       // Use resolution-adjusted image from ResultCard
       const blob = dataUrlToBlob(adjustedDataUrl || imageDataMap[result.key]);
+      const isEnhanced = enhancedMap[result.key] || false;
+      const newResult = await processSingleInvoice(blob, result.filename, isEnhanced);
       const resultWithMetadata = {
         ...newResult,
     }));
   };
+  const handleEnhanceToggle = (key) => {
+    setEnhancedMap(prev => ({
+      ...prev,
+      [key]: !prev[key]
+    }));
+  };
   return (
     <div className="min-h-screen py-8 px-4 sm:px-6 lg:px-8">
       <div className="max-w-7xl mx-auto">
                     onResolutionChange={(dataUrl, resolution) =>
                       handleResolutionChange(preview.key, dataUrl, resolution)
                     }
+                    onEnhanceToggle={() => handleEnhanceToggle(preview.key)}
+                    isEnhanced={enhancedMap[preview.key] || false}
                   />
                 ))}
               </div>

frontend/src/components/ImagePreview.jsx CHANGED Viewed

@@ -1,7 +1,7 @@
 import React, { useState, useEffect, useRef } from 'react';
-import { SlidersHorizontal } from 'lucide-react';
-const ImagePreview = ({ imageData, fileName, onResolutionChange }) => {
   const [resolution, setResolution] = useState(100);
   const canvasRef = useRef(null);
   const [originalDimensions, setOriginalDimensions] = useState({ width: 0, height: 0 });
@@ -68,6 +68,25 @@ const ImagePreview = ({ imageData, fileName, onResolutionChange }) => {
         <canvas ref={canvasRef} className="rounded shadow-sm" />
       </div>
       <div className="space-y-2">
         <div className="flex items-center justify-between">
           <label className="text-sm font-medium text-gray-700 flex items-center gap-2">

 import React, { useState, useEffect, useRef } from 'react';
+import { SlidersHorizontal, Sparkles } from 'lucide-react';
+const ImagePreview = ({ imageData, fileName, onResolutionChange, onEnhanceToggle, isEnhanced }) => {
   const [resolution, setResolution] = useState(100);
   const canvasRef = useRef(null);
   const [originalDimensions, setOriginalDimensions] = useState({ width: 0, height: 0 });
         <canvas ref={canvasRef} className="rounded shadow-sm" />
       </div>
+      {/* Enhance Button */}
+      <button
+        onClick={() => onEnhanceToggle && onEnhanceToggle()}
+        className={`w-full py-2 px-4 rounded-lg font-medium transition-all flex items-center justify-center gap-2 ${
+          isEnhanced
+            ? 'bg-purple-600 hover:bg-purple-700 text-white shadow-lg'
+            : 'bg-gradient-to-r from-purple-500 to-pink-500 hover:from-purple-600 hover:to-pink-600 text-white shadow-md'
+        }`}
+      >
+        <Sparkles className="w-4 h-4" />
+        {isEnhanced ? 'Enhanced ✓' : 'Enhance Image'}
+      </button>
+      {isEnhanced && (
+        <div className="bg-purple-50 border border-purple-200 rounded p-2 text-xs text-purple-700">
+          ✨ Image will be enhanced with OpenCV (CLAHE, denoising, sharpening) before processing
+        </div>
+      )}
       <div className="space-y-2">
         <div className="flex items-center justify-between">
           <label className="text-sm font-medium text-gray-700 flex items-center gap-2">

frontend/src/utils/api.js CHANGED Viewed

@@ -7,11 +7,13 @@ const API_BASE_URL = import.meta.env.VITE_API_URL || window.location.origin;
  * Process a single invoice image
  * @param {Blob} imageBlob - Image blob
  * @param {string} filename - Original filename
  * @returns {Promise<Object>} Processed result
  */
-export async function processSingleInvoice(imageBlob, filename) {
   const formData = new FormData();
   formData.append('file', imageBlob, filename);
   const response = await axios.post(`${API_BASE_URL}/process-invoice`, formData, {
     headers: {

  * Process a single invoice image
  * @param {Blob} imageBlob - Image blob
  * @param {string} filename - Original filename
+ * @param {boolean} enhanceImage - Whether to apply OpenCV enhancement
  * @returns {Promise<Object>} Processed result
  */
+export async function processSingleInvoice(imageBlob, filename, enhanceImage = false) {
   const formData = new FormData();
   formData.append('file', imageBlob, filename);
+  formData.append('enhance_image', enhanceImage);
   const response = await axios.post(`${API_BASE_URL}/process-invoice`, formData, {
     headers: {

inference.py CHANGED Viewed

@@ -7,6 +7,8 @@ import time
 import json
 import codecs
 import re
 from PIL import Image
 from qwen_vl_utils import process_vision_info
 from typing import Dict, Tuple
@@ -63,6 +65,48 @@ Output rules:
 class InferenceProcessor:
     """Handles VLM inference, validation, and result processing"""
     @staticmethod
     def preprocess_image(image_path: str) -> Image.Image:
         """Load and resize image if needed"""
@@ -284,13 +328,14 @@ class InferenceProcessor:
         return validated, field_confidence, warnings
     @staticmethod
-    def process_invoice(image_path: str, doc_id: str = None) -> Dict:
         """
         Complete invoice processing pipeline
         Args:
             image_path: Path to invoice image
             doc_id: Document identifier (optional)
         Returns:
             dict: Complete JSON output with all fields
@@ -303,6 +348,12 @@ class InferenceProcessor:
             import os
             doc_id = os.path.splitext(os.path.basename(image_path))[0]
         # Step 1: Preprocess image
         t1 = time.time()
         image = InferenceProcessor.preprocess_image(image_path)

 import json
 import codecs
 import re
+import cv2
+import numpy as np
 from PIL import Image
 from qwen_vl_utils import process_vision_info
 from typing import Dict, Tuple
 class InferenceProcessor:
     """Handles VLM inference, validation, and result processing"""
+    @staticmethod
+    def enhance_image_opencv(image_path: str) -> str:
+        """
+        Apply OpenCV preprocessing to enhance image quality
+        Returns path to enhanced image (same as input, modified in place)
+        """
+        # Load image (BGR)
+        img = cv2.imread(image_path)
+        if img is None:
+            raise ValueError(f"Could not read image from {image_path}")
+        # Convert to LAB color space (better for contrast)
+        lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
+        l, a, b = cv2.split(lab)
+        # CLAHE on L-channel
+        clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8))
+        l_enhanced = clahe.apply(l)
+        # Merge back
+        lab_enhanced = cv2.merge((l_enhanced, a, b))
+        contrast_enhanced = cv2.cvtColor(lab_enhanced, cv2.COLOR_LAB2BGR)
+        # Noise reduction
+        denoised = cv2.fastNlMeansDenoisingColored(
+            contrast_enhanced,
+            None,
+            h=10, hColor=10,
+            templateWindowSize=7,
+            searchWindowSize=21
+        )
+        # Sharpening (Unsharp Mask)
+        blur = cv2.GaussianBlur(denoised, (0, 0), sigmaX=1.2)
+        sharpened = cv2.addWeighted(denoised, 1.5, blur, -0.5, 0)
+        # Save enhanced image back to the same path
+        cv2.imwrite(image_path, sharpened)
+        print(f"✨ Image enhanced with OpenCV preprocessing")
+        return image_path
     @staticmethod
     def preprocess_image(image_path: str) -> Image.Image:
         """Load and resize image if needed"""
         return validated, field_confidence, warnings
     @staticmethod
+    def process_invoice(image_path: str, doc_id: str = None, enhance_image: bool = False) -> Dict:
         """
         Complete invoice processing pipeline
         Args:
             image_path: Path to invoice image
             doc_id: Document identifier (optional)
+            enhance_image: Whether to apply OpenCV enhancement (optional)
         Returns:
             dict: Complete JSON output with all fields
             import os
             doc_id = os.path.splitext(os.path.basename(image_path))[0]
+        # Step 0: Apply OpenCV Enhancement if requested
+        if enhance_image:
+            t0 = time.time()
+            image_path = InferenceProcessor.enhance_image_opencv(image_path)
+            timing_breakdown['opencv_enhancement'] = round(time.time() - t0, 3)
         # Step 1: Preprocess image
         t1 = time.time()
         image = InferenceProcessor.preprocess_image(image_path)