Spaces:

quantumbit
/

invoice_extractor

Paused

App Files Files Community

github-actions[bot] commited on Feb 6

Commit

ba99b21

1 Parent(s): d062149

Sync from GitHub: 00abff8ce0e5fc17d13c16c7a28b60591519690d

Browse files

Files changed (3) hide show

app.py +2 -1
frontend/src/components/ResultCard.jsx +34 -1
inference.py +25 -130

app.py CHANGED Viewed

@@ -286,7 +286,8 @@ async def process_invoice(
             "processing_time": result.get("processing_time_sec", 0),
             "confidence": result.get("confidence", 0),
             "cost_estimate_usd": result.get("cost_estimate_usd", 0),
-            "fields": fields  # Include raw fields for reference
         }, media_type="application/json; charset=utf-8")
     except Exception as e:

             "processing_time": result.get("processing_time_sec", 0),
             "confidence": result.get("confidence", 0),
             "cost_estimate_usd": result.get("cost_estimate_usd", 0),
+            "fields": fields,  # Include raw fields for reference
+            "timing_breakdown": result.get("timing_breakdown", {})  # Include timing info (with reasoning output if present)
         }, media_type="application/json; charset=utf-8")
     except Exception as e:

frontend/src/components/ResultCard.jsx CHANGED Viewed

@@ -1,5 +1,5 @@
 import React, { useRef, useEffect, useState } from 'react';
-import { SlidersHorizontal } from 'lucide-react';
 const ResultCard = ({ result, imageData, processedImageData, onReprocess, isProcessing }) => {
   const canvasRef = useRef(null);
@@ -11,6 +11,7 @@ const ResultCard = ({ result, imageData, processedImageData, onReprocess, isProc
   const [adjustedDataUrl, setAdjustedDataUrl] = useState(null);
   const [previewDimensions, setPreviewDimensions] = useState({ width: 0, height: 0 });
   const [currentImageData, setCurrentImageData] = useState(processedImageData || imageData);
   // Function to crop image regions
   const cropRegion = (img, coords, scaleX, scaleY) => {
@@ -316,6 +317,38 @@ const ResultCard = ({ result, imageData, processedImageData, onReprocess, isProc
               </div>
             </div>
             {/* Detection Status */}
             <div className="grid grid-cols-2 gap-3">
               <div className="bg-white rounded-lg p-4 shadow-sm">

 import React, { useRef, useEffect, useState } from 'react';
+import { SlidersHorizontal, ChevronDown, ChevronUp, Brain } from 'lucide-react';
 const ResultCard = ({ result, imageData, processedImageData, onReprocess, isProcessing }) => {
   const canvasRef = useRef(null);
   const [adjustedDataUrl, setAdjustedDataUrl] = useState(null);
   const [previewDimensions, setPreviewDimensions] = useState({ width: 0, height: 0 });
   const [currentImageData, setCurrentImageData] = useState(processedImageData || imageData);
+  const [showReasoning, setShowReasoning] = useState(false);
   // Function to crop image regions
   const cropRegion = (img, coords, scaleX, scaleY) => {
               </div>
             </div>
+            {/* Reasoning Output (Chain of Thought) */}
+            {result.timing_breakdown?.reasoning_output && (
+              <div className="bg-blue-50 rounded-lg border border-blue-200 overflow-hidden">
+                <button
+                  onClick={() => setShowReasoning(!showReasoning)}
+                  className="w-full px-4 py-3 flex items-center justify-between hover:bg-blue-100 transition-colors"
+                >
+                  <div className="flex items-center gap-2">
+                    <Brain className="w-5 h-5 text-blue-600" />
+                    <h5 className="text-sm font-semibold text-blue-700 uppercase tracking-wide">
+                      Chain of Thought Reasoning Output
+                    </h5>
+                  </div>
+                  {showReasoning ? (
+                    <ChevronUp className="w-5 h-5 text-blue-600" />
+                  ) : (
+                    <ChevronDown className="w-5 h-5 text-blue-600" />
+                  )}
+                </button>
+                {showReasoning && (
+                  <div className="px-4 pb-4">
+                    <div className="text-xs text-blue-600 mb-2">
+                      This is the model's reasoning before extracting structured fields
+                    </div>
+                    <div className="text-sm text-gray-800 whitespace-pre-wrap max-h-96 overflow-y-auto font-mono bg-white p-3 rounded border border-blue-300">
+                      {result.timing_breakdown.reasoning_output}
+                    </div>
+                  </div>
+                )}
+              </div>
+            )}
             {/* Detection Status */}
             <div className="grid grid-cols-2 gap-3">
               <div className="bg-white rounded-lg p-4 shadow-sm">

inference.py CHANGED Viewed

@@ -63,100 +63,38 @@ Output rules:
 """
-# Two-step Chain of Thought prompts (reasoning mode)
 REASONING_PROMPT = """
-You are an expert at analyzing noisy, handwritten Indian invoices and quotations for tractors.
-Your task is to carefully observe and describe the document structure WITHOUT extracting yet.
-Analyze this tractor invoice image and provide detailed observations about:
-1. DEALER/COMPANY NAME
-   - Where is it located? (top header, letterhead, stamp, footer)
-   - What language is it written in?
-   - Is it printed or handwritten?
-   - Exact text you see (preserve original language)
-2. MODEL INFORMATION
-   - Where is the model mentioned? (checkbox list, handwritten field, printed table, near "Model:" label)
-   - Are there multiple model options shown?
-   - If checkboxes exist, which one is marked? (look for ✓, ✗, [X], ●, ☑, filled boxes)
-   - Is the model name in English or regional language?
-   - Exact text you see for the selected/mentioned model
-3. HORSE POWER (HP)
-   - Where is HP information located? (separate field, within model name, checkbox list, specifications table)
-   - Is HP explicitly written or implied from model code?
-   - If there's a checkbox list with HP options, which one is selected?
-   - Are there multiple HP values shown? Which one corresponds to the selected model?
-   - Exact HP text you see (e.g., "49 HP", "63hp", "HP-30")
-4. TOTAL AMOUNT/ASSET COST
-   - Where is the final total located? (bottom of page, after tax section, grand total line)
-   - What label is used? (Total, Grand Total, Final Amount, कुल राशि, etc.)
-   - Are there multiple amount fields? Which is the final one after all taxes/charges?
-   - Exact amount you see with any currency symbols
-5. CHECKBOX SELECTIONS (if applicable)
-   - Are there any checkbox lists on the page?
-   - What options are available in these lists?
-   - Which options are clearly marked/selected? (describe the selection mark)
-   - Which options are clearly unmarked/unselected?
-6. AMBIGUITIES OR CHALLENGES
-   - Is any handwriting difficult to read?
-   - Are any fields unclear or could have multiple interpretations?
-   - Are there any conflicting pieces of information?
-Return ONLY valid JSON in this exact format:
 {
-  "dealer_location": string,
-  "dealer_text_observed": string,
-  "dealer_language": string,
-  "model_location": string,
-  "model_format": string,
-  "model_text_observed": string,
   "model_is_checkbox": boolean,
-  "model_selected_option": string,
-  "hp_location": string,
-  "hp_format": string,
-  "hp_text_observed": string,
   "hp_is_checkbox": boolean,
-  "hp_value_observed": string,
-  "amount_location": string,
-  "amount_label": string,
-  "amount_text_observed": string,
   "checkboxes_present": boolean,
-  "checkbox_details": string,
-  "ambiguities": string,
-  "overall_document_quality": string
 }
-Guidelines:
-- Be extremely specific about locations (e.g., "top-left header", "middle section below tractor image", "bottom-right in total box")
-- Preserve original language text in observations
-- Describe what you see, don't interpret or extract yet
-- If something is unclear, describe why
-- Focus on SELECTED/MARKED options when checkboxes are present
-Output rules:
-- Output ONLY valid JSON
-- Do NOT include markdown, explanations, or extra text
 """
 EXTRACTION_WITH_CONTEXT_PROMPT = """
-You are an expert at extracting structured data from Indian invoices and quotations.
-You have already analyzed this document. Here is your previous analysis:
-CONTEXT FROM REASONING:
 {reasoning_output}
-Based on your previous analysis, now extract the exact field values.
-Return ONLY valid JSON in this exact format:
 {{
   "dealer_name": string,
@@ -165,56 +103,13 @@ Return ONLY valid JSON in this exact format:
   "asset_cost": number
 }}
-Critical extraction rules:
-1. DEALER NAME
-   - Copy EXACTLY as it appears in the original language and spelling
-   - Do NOT translate from Hindi/Marathi/Kannada to English
-   - Do NOT correct spelling or expand abbreviations
-   - Include any punctuation or special characters as shown
-2. MODEL NAME
-   - Copy EXACTLY as it appears in the original language
-   - If from checkbox selection, extract ONLY the selected/marked option
-   - Do NOT translate or normalize
-   - Preserve numbers, hyphens, and spacing exactly
-   - Do NOT include HP value within model name
-3. HORSE POWER
-   - Must be a number only (integer or decimal)
-   - Extract from explicit HP mentions only (never infer from model codes)
-   - If from checkbox, use only the selected option's HP value
-   - Remove text like "HP", "hp", "हॉर्स पावर" - keep only the number
-   - If HP appears as "49 HP" → extract: 49
-   - If HP appears as "63.5hp" → extract: 63.5
-   - If multiple HP values exist, use the one for the selected model
-4. ASSET COST
-   - Must be a number only (integer or decimal)
-   - Use the FINAL total amount after all taxes and charges
-   - Remove currency symbols (₹, Rs, INR)
-   - Remove commas (e.g., "1,50,000" → 150000)
-   - If amount is "₹ 1,75,500.00" → extract: 175500
-   - Use the largest/final amount if multiple totals exist
-Data validation:
-- dealer_name: Must be non-empty string in original language
-- model_name: Must be non-empty string in original language
-- horse_power: Must be positive number (typically between 15-100 for tractors)
-- asset_cost: Must be positive number (typically between 100000-3000000 for tractors)
-Special handling based on your reasoning:
-- If you noted checkboxes: Extract ONLY marked/selected options
-- If you noted ambiguities: Make best judgment and use most likely value
-- If you noted poor handwriting: Interpret characters as best as possible while preserving language
-- If you noted multiple values: Use the one that matches the selected/final configuration
-Output rules:
-- Output ONLY valid JSON
-- Do NOT include markdown code fences
-- Do NOT include explanations or extra text
-- Ensure all four fields are present
-- Ensure numbers are actual numbers, not strings with currency/commas
 """
@@ -382,8 +277,8 @@ class InferenceProcessor:
         start = time.time()
-        # Generate (allow more tokens for detailed reasoning)
-        generated_ids = model.generate(**inputs, max_new_tokens=512)
         latency = time.time() - start

 """
+# Two-step Chain of Thought prompts (reasoning mode) - OPTIMIZED FOR SPEED
 REASONING_PROMPT = """
+Analyze this Indian tractor invoice. Observe WITHOUT extracting:
+1. DEALER: Location, language, exact text
+2. MODEL: Location, format (checkbox/text), selected option, exact text
+3. HP: Location, format, selected value if checkbox, exact text
+4. TOTAL: Location, label, final amount with currency
+5. CHECKBOXES: Present? Which marked?
+6. CHALLENGES: Unclear handwriting or ambiguities?
+Return ONLY valid JSON:
 {
+  "dealer_text": string,
+  "model_text": string,
   "model_is_checkbox": boolean,
+  "hp_text": string,
   "hp_is_checkbox": boolean,
+  "amount_text": string,
   "checkboxes_present": boolean,
+  "notes": string
 }
+Preserve original language. Be concise.
 """
 EXTRACTION_WITH_CONTEXT_PROMPT = """
+Based on your analysis:
 {reasoning_output}
+Extract these fields:
 {{
   "dealer_name": string,
   "asset_cost": number
 }}
+Rules:
+1. DEALER/MODEL: Copy EXACTLY in original language, don't translate
+2. HP: Number only (e.g., "49 HP" → 49). Use selected checkbox if applicable
+3. ASSET COST: Final total as number (remove ₹, commas: "1,50,000" → 150000)
+4. Checkboxes: Extract only marked options
+Output ONLY valid JSON, no markdown.
 """
         start = time.time()
+        # Generate (reduced tokens for faster processing)
+        generated_ids = model.generate(**inputs, max_new_tokens=256)
         latency = time.time() - start