Spaces:

Akash076
/

ocr_product_scanner

Sleeping

Akash076 commited on Jun 3, 2025

Commit

b8e8ab6

verified ·

1 Parent(s): 3dbd7f5

Upload 5 files

Files changed (5) hide show

api_client.py ADDED Viewed

+import requests
+def fetch_product(info):
+    """Multi-step search strategy"""
+    # barcode search first
+    for barcode in info['barcodes']:
+        product = fetch_by_barcode(barcode)
+        if product:
+            return product
+    # incase Try brand + product name search
+    if info['brand'] and info['product_name']:
+        product = fetch_by_query(f"{info['brand']} {info['product_name']}")
+        if product:
+            return product
+    # Trying product name only
+    if info['product_name']:
+        product = fetch_by_query(info['product_name'])
+        if product:
+            return product
+    # Fallback to brand search
+    if info['brand']:
+        return fetch_by_query(info['brand'])
+    return None
+def fetch_by_barcode(barcode):
+    url = f"https://world.openfoodfacts.org/api/v0/product/{barcode}.json"
+    response = requests.get(url)
+    data = response.json()
+    return data.get('product') if data.get('status') == 1 else None
+def fetch_by_query(query):
+    url = f"https://world.openfoodfacts.org/cgi/search.pl?search_terms={query}&sort_by=unique_scans_n&json=1"
+    response = requests.get(url)
+    products = response.json().get('products', [])
+    return products[0] if products else None

ocr_processor.py ADDED Viewed

+import easyocr
+from text_cleaner import clean_product_text, extract_keywords
+import re
+def extract_info(image_path):
+    reader = easyocr.Reader(['en'])
+    results = reader.readtext(image_path, detail=0)
+    raw_text = " ".join(results)
+    # Clean and extract the info we did
+    clean_text = clean_product_text(raw_text)
+    brand, product_name = extract_keywords(clean_text)
+    # Barcode detection
+    barcodes = re.findall(r'\b(\d{12,13})\b', raw_text)
+    return {
+        "raw_text": raw_text,
+        "clean_text": clean_text,
+        "brand": brand,
+        "product_name": product_name,
+        "barcodes": barcodes
+    }

temp_img.jpg ADDED Viewed

text_cleaner.py ADDED Viewed

+import re
+def clean_product_text(text):
+    # Remove quantities
+    text = re.sub(r'\d+\s*(ml|g|kg|oz|l|lb|fl\.?)\b', '', text, flags=re.IGNORECASE)
+    # Remove special characters
+    text = re.sub(r'[^\w\s]', '', text)
+    text = re.sub(r'\b\d+\b', '', text)
+    # Remove non-product words
+    stop_phrases = ['net wt', 'net weight', 'volume', 'best before', 'expiry']
+    for phrase in stop_phrases:
+        text = text.replace(phrase, '')
+    # whitespaces
+    text = re.sub(r'\s+', ' ', text).strip()
+    return text
+def extract_keywords(text):
+    # extract brand and product details
+    words = text.split()
+    if not words:
+        return "", ""
+    # assign the first word is brand
+    brand = words[0]
+    # Find longest  product name
+    product_phrase = ""
+    current_phrase = ""
+    for word in words[1:]:
+        if word.isupper() or len(word) > 3:
+            current_phrase += f"{word} "
+        else:
+            if len(current_phrase) > len(product_phrase):
+                product_phrase = current_phrase
+            current_phrase = ""
+    # Fial checking
+    product_phrase = product_phrase or current_phrase or " ".join(words[1:min(4, len(words))])
+    return brand, product_phrase.strip()

utils.py ADDED Viewed

+import os
+def clean_temp_files():
+    if os.path.exists("temp_img.jpg"):
+        os.remove("temp_img.jpg")