Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
| 1 |
-
|
| 2 |
import os
|
| 3 |
from paddleocr import PaddleOCR
|
| 4 |
from PIL import Image, ImageEnhance
|
|
@@ -71,6 +70,35 @@ ATTRIBUTE_MAPPING = {
|
|
| 71 |
"coolingmethod": "coolingmethod__c"
|
| 72 |
}
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
# Salesforce credentials
|
| 75 |
SALESFORCE_USERNAME = "venkatramana@sandbox.com"
|
| 76 |
SALESFORCE_PASSWORD = "Venkat12345@"
|
|
@@ -90,14 +118,28 @@ def extract_text(image):
|
|
| 90 |
extracted_text.append(line[1][0])
|
| 91 |
return "\n".join(extracted_text)
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
# Function to find attributes and their values
|
| 94 |
def find_attributes(text):
|
| 95 |
structured_data = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
for readable_attr, sf_attr in ATTRIBUTE_MAPPING.items():
|
| 97 |
pattern = rf"{re.escape(readable_attr)}[:\-]?\s*(.+)" # Match the attribute and capture its value
|
| 98 |
match = re.search(pattern, text, re.IGNORECASE)
|
| 99 |
if match:
|
| 100 |
structured_data[sf_attr] = match.group(1).strip()
|
|
|
|
| 101 |
return structured_data
|
| 102 |
|
| 103 |
# Function to sanitize numeric values
|
|
|
|
|
|
|
| 1 |
import os
|
| 2 |
from paddleocr import PaddleOCR
|
| 3 |
from PIL import Image, ImageEnhance
|
|
|
|
| 70 |
"coolingmethod": "coolingmethod__c"
|
| 71 |
}
|
| 72 |
|
| 73 |
+
# List of product names to match
|
| 74 |
+
PRODUCT_NAMES = [
|
| 75 |
+
"Centrifugal mono block pump", "SINGLE PHASE MOTOR STARTER", "EasyPact EZC 100",
|
| 76 |
+
"Openwell Submersible Pumpset", "Electric Motor", "Self Priming Pump",
|
| 77 |
+
"Control panel for single phase submersible pumps", "MOTOR", "Submersible pump set",
|
| 78 |
+
"Fusion submersible pump set", "DCT", "Shock proof water proof",
|
| 79 |
+
"control panel for single phase submerisible pumps",
|
| 80 |
+
"single phase digital starter dry run and timer panel",
|
| 81 |
+
"Phase stainless steel submersible pump", "Submersible pump", "WB15X",
|
| 82 |
+
"Vtype self priming pump", "SP SHINE DISC", "havells submersible pump",
|
| 83 |
+
"Havells open well Submersible pump", "Bertolini pump CK3 90pp",
|
| 84 |
+
"WPA 772 Water Pump Assy", "bertolini TTL triplex high pressure plunger pumps",
|
| 85 |
+
"Generic plunger high pressure pump", "Apple Normal, Banana",
|
| 86 |
+
"Cast Iron KSb centrifugal pump", "5.5kw Water Pump",
|
| 87 |
+
"KSB reliable i line centrifuged pumps", "Apple Normal, Orange, Banana",
|
| 88 |
+
"Positive API 6745 hydraulic diaphragm pump", "1/2 inch Fuel Hose Pipe",
|
| 89 |
+
"Rotodel motor pump", "PVC Electrical Insulation Materials",
|
| 90 |
+
"Electric kirloskar domestic water pump", "Electrical Insulation Materials",
|
| 91 |
+
"sellowell motor pump", "bhupathi submersible pump set",
|
| 92 |
+
"Flowshine Submersible pump set", "Index submersible pump",
|
| 93 |
+
"Wintoss Plastic Electric Switch Board", "Electric 18 watt ujagar cooler pump",
|
| 94 |
+
"Generator Service", "LG WM FHT1207ZWL, LG REF GL-S292RSCY",
|
| 95 |
+
"Water tank, Filters, Water Pump", "MS Control Submersible Panel",
|
| 96 |
+
"Centrifugal Monoblock Pumps", "Electric Motor with Pump BodyBlue and White",
|
| 97 |
+
"Various Repair and Maintenance Parts", "Earthmax Pump",
|
| 98 |
+
"Water Tank, Filters, Water Pump", "Centrifugal Water Pump for Agriculture",
|
| 99 |
+
"mono block pumps"
|
| 100 |
+
]
|
| 101 |
+
|
| 102 |
# Salesforce credentials
|
| 103 |
SALESFORCE_USERNAME = "venkatramana@sandbox.com"
|
| 104 |
SALESFORCE_PASSWORD = "Venkat12345@"
|
|
|
|
| 118 |
extracted_text.append(line[1][0])
|
| 119 |
return "\n".join(extracted_text)
|
| 120 |
|
| 121 |
+
# Function to find product name from the predefined list
|
| 122 |
+
def match_product_name(text):
|
| 123 |
+
for product_name in PRODUCT_NAMES:
|
| 124 |
+
if re.search(rf"\b{re.escape(product_name)}\b", text, re.IGNORECASE):
|
| 125 |
+
return product_name
|
| 126 |
+
return None
|
| 127 |
+
|
| 128 |
# Function to find attributes and their values
|
| 129 |
def find_attributes(text):
|
| 130 |
structured_data = {}
|
| 131 |
+
|
| 132 |
+
# Match and add product name
|
| 133 |
+
matched_product = match_product_name(text)
|
| 134 |
+
if matched_product:
|
| 135 |
+
structured_data["Productname__c"] = matched_product
|
| 136 |
+
|
| 137 |
for readable_attr, sf_attr in ATTRIBUTE_MAPPING.items():
|
| 138 |
pattern = rf"{re.escape(readable_attr)}[:\-]?\s*(.+)" # Match the attribute and capture its value
|
| 139 |
match = re.search(pattern, text, re.IGNORECASE)
|
| 140 |
if match:
|
| 141 |
structured_data[sf_attr] = match.group(1).strip()
|
| 142 |
+
|
| 143 |
return structured_data
|
| 144 |
|
| 145 |
# Function to sanitize numeric values
|