Akash076 commited on
Commit
1e7b353
·
verified ·
1 Parent(s): a8e74f4

Upload ocr_processor.py

Browse files
Files changed (1) hide show
  1. src/ocr_processor.py +23 -0
src/ocr_processor.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import easyocr
2
+ from text_cleaner import clean_product_text, extract_keywords
3
+ import re
4
+
5
+ def extract_info(image_path):
6
+ reader = easyocr.Reader(['en'])
7
+ results = reader.readtext(image_path, detail=0)
8
+ raw_text = " ".join(results)
9
+
10
+ # Clean and extract the info we did
11
+ clean_text = clean_product_text(raw_text)
12
+ brand, product_name = extract_keywords(clean_text)
13
+
14
+ # Barcode detection
15
+ barcodes = re.findall(r'\b(\d{12,13})\b', raw_text)
16
+
17
+ return {
18
+ "raw_text": raw_text,
19
+ "clean_text": clean_text,
20
+ "brand": brand,
21
+ "product_name": product_name,
22
+ "barcodes": barcodes
23
+ }