github-actions[bot] commited on
Commit
ba99b21
·
1 Parent(s): d062149

Sync from GitHub: 00abff8ce0e5fc17d13c16c7a28b60591519690d

Browse files
Files changed (3) hide show
  1. app.py +2 -1
  2. frontend/src/components/ResultCard.jsx +34 -1
  3. inference.py +25 -130
app.py CHANGED
@@ -286,7 +286,8 @@ async def process_invoice(
286
  "processing_time": result.get("processing_time_sec", 0),
287
  "confidence": result.get("confidence", 0),
288
  "cost_estimate_usd": result.get("cost_estimate_usd", 0),
289
- "fields": fields # Include raw fields for reference
 
290
  }, media_type="application/json; charset=utf-8")
291
 
292
  except Exception as e:
 
286
  "processing_time": result.get("processing_time_sec", 0),
287
  "confidence": result.get("confidence", 0),
288
  "cost_estimate_usd": result.get("cost_estimate_usd", 0),
289
+ "fields": fields, # Include raw fields for reference
290
+ "timing_breakdown": result.get("timing_breakdown", {}) # Include timing info (with reasoning output if present)
291
  }, media_type="application/json; charset=utf-8")
292
 
293
  except Exception as e:
frontend/src/components/ResultCard.jsx CHANGED
@@ -1,5 +1,5 @@
1
  import React, { useRef, useEffect, useState } from 'react';
2
- import { SlidersHorizontal } from 'lucide-react';
3
 
4
  const ResultCard = ({ result, imageData, processedImageData, onReprocess, isProcessing }) => {
5
  const canvasRef = useRef(null);
@@ -11,6 +11,7 @@ const ResultCard = ({ result, imageData, processedImageData, onReprocess, isProc
11
  const [adjustedDataUrl, setAdjustedDataUrl] = useState(null);
12
  const [previewDimensions, setPreviewDimensions] = useState({ width: 0, height: 0 });
13
  const [currentImageData, setCurrentImageData] = useState(processedImageData || imageData);
 
14
 
15
  // Function to crop image regions
16
  const cropRegion = (img, coords, scaleX, scaleY) => {
@@ -316,6 +317,38 @@ const ResultCard = ({ result, imageData, processedImageData, onReprocess, isProc
316
  </div>
317
  </div>
318
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  {/* Detection Status */}
320
  <div className="grid grid-cols-2 gap-3">
321
  <div className="bg-white rounded-lg p-4 shadow-sm">
 
1
  import React, { useRef, useEffect, useState } from 'react';
2
+ import { SlidersHorizontal, ChevronDown, ChevronUp, Brain } from 'lucide-react';
3
 
4
  const ResultCard = ({ result, imageData, processedImageData, onReprocess, isProcessing }) => {
5
  const canvasRef = useRef(null);
 
11
  const [adjustedDataUrl, setAdjustedDataUrl] = useState(null);
12
  const [previewDimensions, setPreviewDimensions] = useState({ width: 0, height: 0 });
13
  const [currentImageData, setCurrentImageData] = useState(processedImageData || imageData);
14
+ const [showReasoning, setShowReasoning] = useState(false);
15
 
16
  // Function to crop image regions
17
  const cropRegion = (img, coords, scaleX, scaleY) => {
 
317
  </div>
318
  </div>
319
 
320
+ {/* Reasoning Output (Chain of Thought) */}
321
+ {result.timing_breakdown?.reasoning_output && (
322
+ <div className="bg-blue-50 rounded-lg border border-blue-200 overflow-hidden">
323
+ <button
324
+ onClick={() => setShowReasoning(!showReasoning)}
325
+ className="w-full px-4 py-3 flex items-center justify-between hover:bg-blue-100 transition-colors"
326
+ >
327
+ <div className="flex items-center gap-2">
328
+ <Brain className="w-5 h-5 text-blue-600" />
329
+ <h5 className="text-sm font-semibold text-blue-700 uppercase tracking-wide">
330
+ Chain of Thought Reasoning Output
331
+ </h5>
332
+ </div>
333
+ {showReasoning ? (
334
+ <ChevronUp className="w-5 h-5 text-blue-600" />
335
+ ) : (
336
+ <ChevronDown className="w-5 h-5 text-blue-600" />
337
+ )}
338
+ </button>
339
+ {showReasoning && (
340
+ <div className="px-4 pb-4">
341
+ <div className="text-xs text-blue-600 mb-2">
342
+ This is the model's reasoning before extracting structured fields
343
+ </div>
344
+ <div className="text-sm text-gray-800 whitespace-pre-wrap max-h-96 overflow-y-auto font-mono bg-white p-3 rounded border border-blue-300">
345
+ {result.timing_breakdown.reasoning_output}
346
+ </div>
347
+ </div>
348
+ )}
349
+ </div>
350
+ )}
351
+
352
  {/* Detection Status */}
353
  <div className="grid grid-cols-2 gap-3">
354
  <div className="bg-white rounded-lg p-4 shadow-sm">
inference.py CHANGED
@@ -63,100 +63,38 @@ Output rules:
63
  """
64
 
65
 
66
- # Two-step Chain of Thought prompts (reasoning mode)
67
  REASONING_PROMPT = """
68
- You are an expert at analyzing noisy, handwritten Indian invoices and quotations for tractors.
69
 
70
- Your task is to carefully observe and describe the document structure WITHOUT extracting yet.
71
-
72
- Analyze this tractor invoice image and provide detailed observations about:
73
-
74
- 1. DEALER/COMPANY NAME
75
- - Where is it located? (top header, letterhead, stamp, footer)
76
- - What language is it written in?
77
- - Is it printed or handwritten?
78
- - Exact text you see (preserve original language)
79
-
80
- 2. MODEL INFORMATION
81
- - Where is the model mentioned? (checkbox list, handwritten field, printed table, near "Model:" label)
82
- - Are there multiple model options shown?
83
- - If checkboxes exist, which one is marked? (look for ✓, ✗, [X], ●, ☑, filled boxes)
84
- - Is the model name in English or regional language?
85
- - Exact text you see for the selected/mentioned model
86
-
87
- 3. HORSE POWER (HP)
88
- - Where is HP information located? (separate field, within model name, checkbox list, specifications table)
89
- - Is HP explicitly written or implied from model code?
90
- - If there's a checkbox list with HP options, which one is selected?
91
- - Are there multiple HP values shown? Which one corresponds to the selected model?
92
- - Exact HP text you see (e.g., "49 HP", "63hp", "HP-30")
93
-
94
- 4. TOTAL AMOUNT/ASSET COST
95
- - Where is the final total located? (bottom of page, after tax section, grand total line)
96
- - What label is used? (Total, Grand Total, Final Amount, कुल राशि, etc.)
97
- - Are there multiple amount fields? Which is the final one after all taxes/charges?
98
- - Exact amount you see with any currency symbols
99
-
100
- 5. CHECKBOX SELECTIONS (if applicable)
101
- - Are there any checkbox lists on the page?
102
- - What options are available in these lists?
103
- - Which options are clearly marked/selected? (describe the selection mark)
104
- - Which options are clearly unmarked/unselected?
105
-
106
- 6. AMBIGUITIES OR CHALLENGES
107
- - Is any handwriting difficult to read?
108
- - Are any fields unclear or could have multiple interpretations?
109
- - Are there any conflicting pieces of information?
110
-
111
- Return ONLY valid JSON in this exact format:
112
 
 
113
  {
114
- "dealer_location": string,
115
- "dealer_text_observed": string,
116
- "dealer_language": string,
117
- "model_location": string,
118
- "model_format": string,
119
- "model_text_observed": string,
120
  "model_is_checkbox": boolean,
121
- "model_selected_option": string,
122
- "hp_location": string,
123
- "hp_format": string,
124
- "hp_text_observed": string,
125
  "hp_is_checkbox": boolean,
126
- "hp_value_observed": string,
127
- "amount_location": string,
128
- "amount_label": string,
129
- "amount_text_observed": string,
130
  "checkboxes_present": boolean,
131
- "checkbox_details": string,
132
- "ambiguities": string,
133
- "overall_document_quality": string
134
  }
135
 
136
- Guidelines:
137
- - Be extremely specific about locations (e.g., "top-left header", "middle section below tractor image", "bottom-right in total box")
138
- - Preserve original language text in observations
139
- - Describe what you see, don't interpret or extract yet
140
- - If something is unclear, describe why
141
- - Focus on SELECTED/MARKED options when checkboxes are present
142
-
143
- Output rules:
144
- - Output ONLY valid JSON
145
- - Do NOT include markdown, explanations, or extra text
146
  """
147
 
148
 
149
  EXTRACTION_WITH_CONTEXT_PROMPT = """
150
- You are an expert at extracting structured data from Indian invoices and quotations.
151
-
152
- You have already analyzed this document. Here is your previous analysis:
153
-
154
- CONTEXT FROM REASONING:
155
  {reasoning_output}
156
 
157
- Based on your previous analysis, now extract the exact field values.
158
-
159
- Return ONLY valid JSON in this exact format:
160
 
161
  {{
162
  "dealer_name": string,
@@ -165,56 +103,13 @@ Return ONLY valid JSON in this exact format:
165
  "asset_cost": number
166
  }}
167
 
168
- Critical extraction rules:
169
-
170
- 1. DEALER NAME
171
- - Copy EXACTLY as it appears in the original language and spelling
172
- - Do NOT translate from Hindi/Marathi/Kannada to English
173
- - Do NOT correct spelling or expand abbreviations
174
- - Include any punctuation or special characters as shown
175
-
176
- 2. MODEL NAME
177
- - Copy EXACTLY as it appears in the original language
178
- - If from checkbox selection, extract ONLY the selected/marked option
179
- - Do NOT translate or normalize
180
- - Preserve numbers, hyphens, and spacing exactly
181
- - Do NOT include HP value within model name
182
-
183
- 3. HORSE POWER
184
- - Must be a number only (integer or decimal)
185
- - Extract from explicit HP mentions only (never infer from model codes)
186
- - If from checkbox, use only the selected option's HP value
187
- - Remove text like "HP", "hp", "हॉर्स पावर" - keep only the number
188
- - If HP appears as "49 HP" → extract: 49
189
- - If HP appears as "63.5hp" → extract: 63.5
190
- - If multiple HP values exist, use the one for the selected model
191
 
192
- 4. ASSET COST
193
- - Must be a number only (integer or decimal)
194
- - Use the FINAL total amount after all taxes and charges
195
- - Remove currency symbols (₹, Rs, INR)
196
- - Remove commas (e.g., "1,50,000" → 150000)
197
- - If amount is "₹ 1,75,500.00" → extract: 175500
198
- - Use the largest/final amount if multiple totals exist
199
-
200
- Data validation:
201
- - dealer_name: Must be non-empty string in original language
202
- - model_name: Must be non-empty string in original language
203
- - horse_power: Must be positive number (typically between 15-100 for tractors)
204
- - asset_cost: Must be positive number (typically between 100000-3000000 for tractors)
205
-
206
- Special handling based on your reasoning:
207
- - If you noted checkboxes: Extract ONLY marked/selected options
208
- - If you noted ambiguities: Make best judgment and use most likely value
209
- - If you noted poor handwriting: Interpret characters as best as possible while preserving language
210
- - If you noted multiple values: Use the one that matches the selected/final configuration
211
-
212
- Output rules:
213
- - Output ONLY valid JSON
214
- - Do NOT include markdown code fences
215
- - Do NOT include explanations or extra text
216
- - Ensure all four fields are present
217
- - Ensure numbers are actual numbers, not strings with currency/commas
218
  """
219
 
220
 
@@ -382,8 +277,8 @@ class InferenceProcessor:
382
 
383
  start = time.time()
384
 
385
- # Generate (allow more tokens for detailed reasoning)
386
- generated_ids = model.generate(**inputs, max_new_tokens=512)
387
 
388
  latency = time.time() - start
389
 
 
63
  """
64
 
65
 
66
+ # Two-step Chain of Thought prompts (reasoning mode) - OPTIMIZED FOR SPEED
67
  REASONING_PROMPT = """
68
+ Analyze this Indian tractor invoice. Observe WITHOUT extracting:
69
 
70
+ 1. DEALER: Location, language, exact text
71
+ 2. MODEL: Location, format (checkbox/text), selected option, exact text
72
+ 3. HP: Location, format, selected value if checkbox, exact text
73
+ 4. TOTAL: Location, label, final amount with currency
74
+ 5. CHECKBOXES: Present? Which marked?
75
+ 6. CHALLENGES: Unclear handwriting or ambiguities?
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
+ Return ONLY valid JSON:
78
  {
79
+ "dealer_text": string,
80
+ "model_text": string,
 
 
 
 
81
  "model_is_checkbox": boolean,
82
+ "hp_text": string,
 
 
 
83
  "hp_is_checkbox": boolean,
84
+ "amount_text": string,
 
 
 
85
  "checkboxes_present": boolean,
86
+ "notes": string
 
 
87
  }
88
 
89
+ Preserve original language. Be concise.
 
 
 
 
 
 
 
 
 
90
  """
91
 
92
 
93
  EXTRACTION_WITH_CONTEXT_PROMPT = """
94
+ Based on your analysis:
 
 
 
 
95
  {reasoning_output}
96
 
97
+ Extract these fields:
 
 
98
 
99
  {{
100
  "dealer_name": string,
 
103
  "asset_cost": number
104
  }}
105
 
106
+ Rules:
107
+ 1. DEALER/MODEL: Copy EXACTLY in original language, don't translate
108
+ 2. HP: Number only (e.g., "49 HP" → 49). Use selected checkbox if applicable
109
+ 3. ASSET COST: Final total as number (remove ₹, commas: "1,50,000" 150000)
110
+ 4. Checkboxes: Extract only marked options
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
+ Output ONLY valid JSON, no markdown.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
  """
114
 
115
 
 
277
 
278
  start = time.time()
279
 
280
+ # Generate (reduced tokens for faster processing)
281
+ generated_ids = model.generate(**inputs, max_new_tokens=256)
282
 
283
  latency = time.time() - start
284