Um34ER commited on
Commit
63ae52b
·
verified ·
1 Parent(s): a072884

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -498
app.py CHANGED
@@ -1,549 +1,158 @@
1
  """
2
- Smart Parchi OCR - Working Version for HF CPU Basic
3
- Tested with: Atta-2 kg 200, Bugger 2, Cheeni 21 kg, Total = 950 u dhara
4
  """
5
 
6
- from __future__ import annotations
7
-
8
- import hashlib
9
- import io
10
- import logging
11
- import re
12
- import time
13
- import threading
14
- from typing import List, Tuple, Optional, Dict, Any
15
-
16
  import cv2
17
  import numpy as np
18
  from PIL import Image
19
- from fastapi import FastAPI, File, UploadFile, HTTPException
20
- from fastapi.middleware.cors import CORSMiddleware
 
 
 
21
  from pydantic import BaseModel
22
- import easyocr
23
-
24
- # ============================================================================
25
- # CONFIGURATION
26
- # ============================================================================
27
 
28
  logging.basicConfig(level=logging.INFO)
29
  logger = logging.getLogger(__name__)
30
 
31
- # Image settings
32
- MAX_SIZE = 1200
33
- MIN_CONFIDENCE = 0.05 # Very low for handwriting
34
- TEXT_THRESHOLD = 0.10
35
- LOW_TEXT = 0.15
36
-
37
- # Price validation
38
- MAX_PRICE = 50000
39
- MIN_PRICE = 1
40
 
41
- # Cache
42
- CACHE_TTL = 86400 # 24 hours
 
 
 
 
 
43
 
 
 
44
 
45
- # ============================================================================
46
- # DATA MODELS
47
- # ============================================================================
48
 
49
  class ExtractedItem(BaseModel):
50
  name: str
51
- quantity: float = 1.0
52
  price: float
53
- confidence: float = 0.0
54
- low_confidence: bool = False
55
- unit: str = "pc"
56
 
57
 
58
  class ProcessResponse(BaseModel):
59
- request_id: str
60
  success: bool
61
- customer_name: Optional[str] = None
62
- items: List[ExtractedItem] = []
63
- total: float = 0.0
64
- mismatch: bool = False
65
- transaction_type: str = "unknown"
66
- processing_time_ms: float = 0.0
67
- item_count: int = 0
68
- error: Optional[str] = None
69
-
70
-
71
- # ============================================================================
72
- # SIMPLE ITEM CORRECTIONS
73
- # ============================================================================
74
-
75
- ITEM_CORRECTIONS = {
76
- 'atta': ['atta', 'aata', 'arta', 'ata', 'flour', 'aataa'],
77
- 'cheeni': ['cheeni', 'chini', 'cheeny', 'cheni', 'sugar', 'chinni'],
78
- 'burger': ['burger', 'buger', 'bubiger', 'buggar', 'burjer'],
79
- 'ghee': ['ghee', 'ghi', 'desi ghee'],
80
- 'doodh': ['doodh', 'dudh', 'milk'],
81
- 'chawal': ['chawal', 'rice', 'chawal rice'],
82
- 'daal': ['daal', 'dal', 'lentils'],
83
- 'namak': ['namak', 'salt'],
84
- 'mirch': ['mirch', 'chili'],
85
- 'sabun': ['sabun', 'soap'],
86
- }
87
 
88
- TRANSACTION_WORDS = ['udhaar', 'udhar', 'u dhara', 'wasooli', 'وصولی', 'ادھار']
89
 
90
-
91
- # ============================================================================
92
- # UTILITIES
93
- # ============================================================================
94
-
95
- def normalize_text(text: str) -> str:
96
  """Clean OCR text"""
97
- if not text:
98
- return ""
99
-
100
- # Urdu to English digits
101
- urdu_digits = '۰۱۲۳۴۵۶۷۸۹'
102
- eng_digits = '0123456789'
103
- for u, e in zip(urdu_digits, eng_digits):
104
- text = text.replace(u, e)
105
-
106
- # Fix common confusions
107
- text = text.replace('O', '0').replace('o', '0')
108
- text = text.replace('l', '1').replace('I', '1')
109
- text = text.replace('S', '5').replace('s', '5')
110
- text = text.replace('Z', '2').replace('z', '2')
111
-
112
- # Remove special chars
113
- text = re.sub(r'[^\w\sء-ي0-9]', ' ', text)
114
  text = re.sub(r'\s+', ' ', text).strip()
115
-
116
  return text.lower()
117
 
118
 
119
- def extract_numbers(text: str) -> List[float]:
120
- """Extract numbers from text"""
121
- numbers = []
122
- for match in re.finditer(r'\b(\d+(?:\.\d+)?)\b', text):
123
- try:
124
- num = float(match.group(1))
125
- if 0 < num < 100000:
126
- numbers.append(num)
127
- except:
128
- pass
129
- return numbers
130
-
131
-
132
- def correct_item_name(name: str) -> str:
133
- """Correct common OCR errors"""
134
- name_lower = name.lower().strip()
135
- for correct, variants in ITEM_CORRECTIONS.items():
136
- if name_lower in variants:
137
- return correct
138
- for var in variants:
139
- if var in name_lower or name_lower in var:
140
- if len(var) > 2 and len(name_lower) > 2:
141
- return correct
142
- return name_lower
143
-
144
-
145
- # ============================================================================
146
- # OCR ENGINE
147
- # ============================================================================
148
-
149
- _reader = None
150
- _lock = threading.Lock()
151
-
152
-
153
- def get_reader():
154
- global _reader
155
- if _reader is None:
156
- with _lock:
157
- if _reader is None:
158
- logger.info("Loading EasyOCR (Urdu+English)...")
159
- _reader = easyocr.Reader(['ur', 'en'], gpu=False)
160
- logger.info("Ready!")
161
- return _reader
162
-
163
-
164
- def run_ocr(image: np.ndarray) -> List[Tuple[float, str, float]]:
165
- """Run OCR and return tokens"""
166
- reader = get_reader()
167
-
168
- try:
169
- results = reader.readtext(
170
- image,
171
- detail=1,
172
- paragraph=False,
173
- text_threshold=TEXT_THRESHOLD,
174
- low_text=LOW_TEXT,
175
- width_ths=0.5,
176
- ycenter_ths=0.5
177
- )
178
-
179
- tokens = []
180
- for bbox, text, conf in results:
181
- if conf >= MIN_CONFIDENCE:
182
- cleaned = normalize_text(text)
183
- if cleaned and len(cleaned) > 1:
184
- y_center = (bbox[0][1] + bbox[2][1]) / 2
185
- tokens.append((y_center, cleaned, conf))
186
-
187
- tokens.sort(key=lambda x: x[0])
188
- return tokens
189
- except Exception as e:
190
- logger.error(f"OCR error: {e}")
191
- return []
192
-
193
-
194
- # ============================================================================
195
- # IMAGE PREPROCESSING
196
- # ============================================================================
197
-
198
- def preprocess_image(rgb: np.ndarray) -> List[np.ndarray]:
199
- """Generate preprocessing variants"""
200
- variants = []
201
-
202
- gray = cv2.cvtColor(rgb, cv2.COLOR_RGB2GRAY)
203
- h, w = gray.shape
204
-
205
- # Resize if needed
206
- if max(h, w) > MAX_SIZE:
207
- scale = MAX_SIZE / max(h, w)
208
- gray = cv2.resize(gray, None, fx=scale, fy=scale)
209
-
210
- # Variant 1: CLAHE
211
- clahe = cv2.createCLAHE(clipLimit=2.5, tileGridSize=(8, 8))
212
- enhanced = clahe.apply(gray)
213
- variants.append(cv2.cvtColor(enhanced, cv2.COLOR_GRAY2RGB))
214
-
215
- # Variant 2: Adaptive threshold
216
- thresh = cv2.adaptiveThreshold(
217
- enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
218
- cv2.THRESH_BINARY, 15, 5
219
- )
220
- variants.append(cv2.cvtColor(thresh, cv2.COLOR_GRAY2RGB))
221
-
222
- # Variant 3: Inverted (for light text)
223
- inverted = cv2.bitwise_not(thresh)
224
- variants.append(cv2.cvtColor(inverted, cv2.COLOR_GRAY2RGB))
225
-
226
- return variants
227
-
228
-
229
- # ============================================================================
230
- # PARSING
231
- # ============================================================================
232
-
233
- def group_into_lines(tokens: List[Tuple[float, str, float]]) -> List[str]:
234
- """Group tokens into lines"""
235
- if not tokens:
236
- return []
237
-
238
- lines = []
239
- current = [tokens[0]]
240
-
241
- for t in tokens[1:]:
242
- if abs(t[0] - current[-1][0]) <= 25:
243
- current.append(t)
244
- else:
245
- lines.append(' '.join(x[1] for x in current))
246
- current = [t]
247
-
248
- if current:
249
- lines.append(' '.join(x[1] for x in current))
250
-
251
- return lines
252
 
253
 
254
- def parse_items_and_total(lines: List[str]) -> Tuple[List[Dict], float]:
255
- """Parse items and extract total"""
256
  items = []
257
- numbers_in_lines = []
258
 
259
  for line in lines:
260
- # Skip header/footer
261
- line_lower = line.lower()
262
- skip = ['date', 'time', 'total', 'udhaar', 'wasooli', 'cash', 'name', 'customer']
263
- if any(k in line_lower for k in skip):
264
- # Check for total in these lines
265
- nums = extract_numbers(line)
266
- if nums and 'total' in line_lower or 'udhaar' in line_lower:
267
- numbers_in_lines.extend(nums)
268
  continue
269
 
270
- # Check for patterns
271
- # Pattern 1: "item - qty - price" or "item - price"
272
- if '-' in line:
273
- parts = line.split('-')
274
- if len(parts) >= 2:
275
- item_name = parts[0].strip()
276
- rest = '-'.join(parts[1:])
277
- nums = extract_numbers(rest)
278
-
279
- if len(nums) >= 2:
280
- # item - qty - price
281
- qty = nums[0]
282
- price = nums[1]
283
- elif len(nums) == 1:
284
- # item - price
285
- qty = 1.0
286
- price = nums[0]
287
- else:
288
- continue
289
-
290
- if price and MIN_PRICE <= price <= MAX_PRICE:
291
- items.append({
292
- 'name': correct_item_name(item_name),
293
- 'quantity': qty,
294
- 'price': price,
295
- 'confidence': 0.75
296
- })
297
- continue
298
 
299
- # Pattern 2: "item qty price"
300
- nums = extract_numbers(line)
301
- if len(nums) >= 2:
302
- # Find text part (remove numbers)
303
- text_part = line
304
- for n in nums:
305
- text_part = text_part.replace(str(int(n)), '', 1)
306
- text_part = re.sub(r'\d+', '', text_part)
307
- text_part = re.sub(r'[^\w\sء-ي]', ' ', text_part).strip()
308
 
309
- if text_part and len(text_part) > 1:
310
- qty = nums[0] if nums[0] > 1 or len(nums) == 2 else 1.0
311
- price = nums[-1]
312
-
313
- if price and MIN_PRICE <= price <= MAX_PRICE:
314
- items.append({
315
- 'name': correct_item_name(text_part),
316
- 'quantity': qty,
317
- 'price': price,
318
- 'confidence': 0.70
319
- })
320
- elif len(nums) == 1:
321
- # Single number - might be total or price without quantity
322
- text_part = re.sub(r'\d+', '', line)
323
- text_part = re.sub(r'[^\w\sء-ي]', ' ', text_part).strip()
324
 
325
- if text_part and len(text_part) > 2 and len(text_part) < 30:
326
- # This is likely an item with implicit quantity=1
327
- price = nums[0]
328
- if price and MIN_PRICE <= price <= MAX_PRICE:
329
- items.append({
330
- 'name': correct_item_name(text_part),
331
- 'quantity': 1.0,
332
- 'price': price,
333
- 'confidence': 0.65
334
- })
335
- else:
336
- # This might be a total
337
- numbers_in_lines.extend(nums)
338
-
339
- # Determine total
340
- total = 0.0
341
- if numbers_in_lines:
342
- # Take the largest number as total
343
- total = max(numbers_in_lines)
344
-
345
- # Also check for explicit total line
346
- for line in lines[-3:]:
347
- if 'total' in line.lower() or 'udhaar' in line.lower() or 'ٹوٹل' in line:
348
- nums = extract_numbers(line)
349
- if nums:
350
- total = max(nums)
351
- break
352
-
353
- # Calculate items sum
354
- items_sum = sum(i['price'] * i['quantity'] for i in items) if items else 0
355
 
356
- # If no total found, use items sum
357
- if total == 0 and items_sum > 0:
358
- total = items_sum
359
 
360
- # Check if mismatch
361
- mismatch = abs(total - items_sum) > 5 if total > 0 and items_sum > 0 else False
362
-
363
- return items, total, mismatch
364
 
365
 
366
- def extract_customer_name(lines: List[str]) -> Optional[str]:
367
- """Extract customer name from top lines"""
368
- for i, line in enumerate(lines[:4]):
369
- cleaned = re.sub(r'[^\w\sء-ي]', ' ', line).strip()
370
-
371
- # Must have no digits
372
- if any(c.isdigit() for c in cleaned):
373
- continue
374
-
375
- # Must have reasonable length
376
- if len(cleaned) < 3 or len(cleaned) > 35:
377
- continue
378
-
379
- cleaned_lower = cleaned.lower()
380
- skip = ['date', 'time', 'total', 'udhaar', 'wasooli', 'cash', 'name', 'customer', 'shop']
381
- if any(k in cleaned_lower for k in skip):
382
- continue
383
-
384
- # Remove extra spaces
385
- cleaned = re.sub(r'\s+', ' ', cleaned).strip()
386
- if cleaned:
387
- words = [w.capitalize() if w[0].isascii() else w for w in cleaned.split()]
388
- return ' '.join(words)
389
-
390
- return None
391
-
392
-
393
- def detect_type(lines: List[str]) -> str:
394
- """Detect transaction type"""
395
- for line in lines[-3:]:
396
- line_lower = line.lower()
397
- if any(w in line_lower for w in ['udhaar', 'udhar', 'u dhara', 'ادھار']):
398
- return 'udhaar'
399
- if any(w in line_lower for w in ['wasooli', 'وصولی']):
400
- return 'wasooli'
401
- return 'unknown'
402
-
403
-
404
- # ============================================================================
405
- # CACHE
406
- # ============================================================================
407
-
408
- result_cache = {}
409
-
410
-
411
- def get_cache_key(data: bytes) -> str:
412
- return hashlib.sha256(data).hexdigest()
413
-
414
-
415
- # ============================================================================
416
- # FASTAPI APP
417
- # ============================================================================
418
-
419
- app = FastAPI(title="Parchi OCR", version="7.0.0")
420
-
421
- app.add_middleware(
422
- CORSMiddleware,
423
- allow_origins=["*"],
424
- allow_credentials=True,
425
- allow_methods=["*"],
426
- allow_headers=["*"],
427
- )
428
-
429
-
430
- @app.on_event("startup")
431
- async def startup():
432
- """Warm up OCR"""
433
- logger.info("Starting Parchi OCR v7.0...")
434
- threading.Thread(target=get_reader).start()
435
-
436
-
437
- @app.get("/health")
438
- async def health():
439
- return {"status": "ok", "version": "7.0.0"}
440
-
441
-
442
- @app.post("/process-parchi", response_model=ProcessResponse)
443
  async def process_parchi(image: UploadFile = File(...)):
444
- """Process a parchi image"""
445
-
446
- if not image.content_type or not image.content_type.startswith("image/"):
447
- raise HTTPException(400, "Must be an image")
448
 
 
449
  contents = await image.read()
450
- request_id = hashlib.md5(contents).hexdigest()[:16]
 
451
 
452
- # Check cache
453
- if request_id in result_cache:
454
- logger.info(f"[{request_id}] Cache hit")
455
- return result_cache[request_id]
456
 
457
- start_time = time.time()
458
-
459
- try:
460
- # Load image
461
- pil_img = Image.open(io.BytesIO(contents)).convert('RGB')
462
- rgb = np.array(pil_img)
463
-
464
- # Preprocess
465
- variants = preprocess_image(rgb)
466
-
467
- # OCR on all variants
468
- all_tokens = []
469
- for variant in variants:
470
- tokens = run_ocr(variant)
471
- all_tokens.extend(tokens)
472
-
473
- # Deduplicate
474
- seen = set()
475
- unique = []
476
- for y, text, conf in all_tokens:
477
- key = (text, int(y // 25))
478
- if key not in seen:
479
- seen.add(key)
480
- unique.append((y, text, conf))
481
-
482
- unique.sort(key=lambda x: x[0])
483
-
484
- # Group into lines
485
- lines = group_into_lines(unique)
486
-
487
- if not lines:
488
- result = ProcessResponse(
489
- request_id=request_id,
490
- success=False,
491
- error="No text detected",
492
- processing_time_ms=(time.time() - start_time) * 1000
493
- )
494
- result_cache[request_id] = result
495
- return result
496
-
497
- # Parse
498
- customer_name = extract_customer_name(lines)
499
- items, total, mismatch = parse_items_and_total(lines)
500
- tx_type = detect_type(lines)
501
-
502
- # Format items
503
- extracted_items = []
504
- for item in items:
505
- extracted_items.append(ExtractedItem(
506
- name=item['name'],
507
- quantity=item['quantity'],
508
- price=round(item['price'], 2),
509
- confidence=item['confidence'],
510
- low_confidence=item['confidence'] < 0.5,
511
- unit='kg' if 'kg' in item['name'] else 'pc'
512
- ))
513
-
514
- processing_time = (time.time() - start_time) * 1000
515
-
516
- result = ProcessResponse(
517
- request_id=request_id,
518
- success=True,
519
- customer_name=customer_name,
520
- items=extracted_items,
521
- total=round(total, 2),
522
- mismatch=mismatch,
523
- transaction_type=tx_type,
524
- processing_time_ms=round(processing_time, 1),
525
- item_count=len(extracted_items)
526
- )
527
-
528
- # Cache
529
- result_cache[request_id] = result
530
-
531
- # Clean old cache
532
- if len(result_cache) > 100:
533
- oldest = min(result_cache.keys())
534
- del result_cache[oldest]
535
-
536
- logger.info(f"[{request_id}] Items: {len(extracted_items)}, Total: {total}, Time: {processing_time:.0f}ms")
537
- return result
538
-
539
- except Exception as e:
540
- logger.error(f"[{request_id}] Error: {e}")
541
- return ProcessResponse(
542
- request_id=request_id,
543
- success=False,
544
- error=str(e),
545
- processing_time_ms=(time.time() - start_time) * 1000
546
- )
547
 
548
 
549
  if __name__ == "__main__":
 
1
  """
2
+ Parchi OCR - PaddleOCR Version (Works for Handwritten Urdu)
 
3
  """
4
 
5
+ from fastapi import FastAPI, File, UploadFile, HTTPException
6
+ from fastapi.middleware.cors import CORSMiddleware
7
+ from paddleocr import PaddleOCR
 
 
 
 
 
 
 
8
  import cv2
9
  import numpy as np
10
  from PIL import Image
11
+ import io
12
+ import re
13
+ import hashlib
14
+ import time
15
+ from typing import List, Dict, Any, Optional
16
  from pydantic import BaseModel
17
+ import logging
 
 
 
 
18
 
19
  logging.basicConfig(level=logging.INFO)
20
  logger = logging.getLogger(__name__)
21
 
22
+ app = FastAPI(title="Parchi OCR Pro")
 
 
 
 
 
 
 
 
23
 
24
+ app.add_middleware(
25
+ CORSMiddleware,
26
+ allow_origins=["*"],
27
+ allow_credentials=True,
28
+ allow_methods=["*"],
29
+ allow_headers=["*"],
30
+ )
31
 
32
+ # Initialize PaddleOCR once
33
+ ocr = PaddleOCR(use_angle_cls=True, lang='en', use_gpu=False, show_log=False)
34
 
 
 
 
35
 
36
  class ExtractedItem(BaseModel):
37
  name: str
38
+ quantity: float
39
  price: float
40
+ confidence: float
 
 
41
 
42
 
43
  class ProcessResponse(BaseModel):
 
44
  success: bool
45
+ items: List[ExtractedItem]
46
+ total: float
47
+ transaction_type: str
48
+ processing_time_ms: float
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
 
50
 
51
+ def clean_text(text: str) -> str:
 
 
 
 
 
52
  """Clean OCR text"""
53
+ # Remove special characters
54
+ text = re.sub(r'[^\w\s]', ' ', text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  text = re.sub(r'\s+', ' ', text).strip()
 
56
  return text.lower()
57
 
58
 
59
+ def fix_urdu_digits(text: str) -> str:
60
+ """Convert Urdu digits to English"""
61
+ urdu_digits = {
62
+ '۰': '0', '۱': '1', '۲': '2', '۳': '3', '۴': '4',
63
+ '۵': '5', '۶': '6', '۷': '7', '۸': '8', '۹': '9'
64
+ }
65
+ for u, e in urdu_digits.items():
66
+ text = text.replace(u, e)
67
+ return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
 
69
 
70
+ def parse_items_and_total(lines: List[str]) -> tuple:
71
+ """Parse items and total from OCR lines"""
72
  items = []
73
+ total = 0
74
 
75
  for line in lines:
76
+ # Fix digits
77
+ line = fix_urdu_digits(line)
78
+
79
+ # Check for total
80
+ if 'total' in line.lower() or 'udhaar' in line.lower():
81
+ numbers = re.findall(r'\d+', line)
82
+ if numbers:
83
+ total = int(numbers[-1])
84
  continue
85
 
86
+ # Look for pattern: "Item Qty Price" or "Item Price"
87
+ parts = line.split()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ # Find numbers
90
+ numbers = [int(n) for n in re.findall(r'\d+', line)]
91
+
92
+ if len(numbers) >= 2:
93
+ # Has both quantity and price
94
+ price = numbers[-1]
95
+ qty = numbers[0] if len(numbers) >= 2 else 1
 
 
96
 
97
+ # Item name is text without numbers
98
+ name = re.sub(r'\d+', '', line)
99
+ name = clean_text(name)
 
 
 
 
 
 
 
 
 
 
 
 
100
 
101
+ if name and price:
102
+ items.append({
103
+ 'name': name[:20],
104
+ 'quantity': qty,
105
+ 'price': price,
106
+ 'confidence': 0.8
107
+ })
108
+ elif len(numbers) == 1 and not total:
109
+ # Single number - might be total
110
+ total = numbers[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
+ # If no total found, calculate from items
113
+ if total == 0 and items:
114
+ total = sum(i['price'] * i['quantity'] for i in items)
115
 
116
+ return items, total
 
 
 
117
 
118
 
119
+ @app.post("/process-parchi")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  async def process_parchi(image: UploadFile = File(...)):
121
+ """Process parchi image"""
122
+ start_time = time.time()
 
 
123
 
124
+ # Read image
125
  contents = await image.read()
126
+ img = Image.open(io.BytesIO(contents))
127
+ img = np.array(img)
128
 
129
+ # Run OCR
130
+ result = ocr.ocr(img, cls=True)
 
 
131
 
132
+ # Extract text lines
133
+ lines = []
134
+ if result and result[0]:
135
+ for line in result[0]:
136
+ text = line[1][0]
137
+ if text:
138
+ lines.append(text)
139
+
140
+ # Parse
141
+ items, total = parse_items_and_total(lines)
142
+
143
+ # Detect transaction type
144
+ full_text = ' '.join(lines).lower()
145
+ tx_type = 'udhaar' if 'udhaar' in full_text or 'udhar' in full_text else 'unknown'
146
+
147
+ processing_time = (time.time() - start_time) * 1000
148
+
149
+ return ProcessResponse(
150
+ success=True,
151
+ items=[ExtractedItem(**i) for i in items],
152
+ total=float(total),
153
+ transaction_type=tx_type,
154
+ processing_time_ms=processing_time
155
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
 
158
  if __name__ == "__main__":