gopichandra commited on
Commit
e1dd14d
·
verified ·
1 Parent(s): 4cd6e0f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -1
app.py CHANGED
@@ -1,4 +1,3 @@
1
-
2
  import os
3
  from paddleocr import PaddleOCR
4
  from PIL import Image, ImageEnhance
@@ -71,6 +70,35 @@ ATTRIBUTE_MAPPING = {
71
  "coolingmethod": "coolingmethod__c"
72
  }
73
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  # Salesforce credentials
75
  SALESFORCE_USERNAME = "venkatramana@sandbox.com"
76
  SALESFORCE_PASSWORD = "Venkat12345@"
@@ -90,14 +118,28 @@ def extract_text(image):
90
  extracted_text.append(line[1][0])
91
  return "\n".join(extracted_text)
92
 
 
 
 
 
 
 
 
93
  # Function to find attributes and their values
94
  def find_attributes(text):
95
  structured_data = {}
 
 
 
 
 
 
96
  for readable_attr, sf_attr in ATTRIBUTE_MAPPING.items():
97
  pattern = rf"{re.escape(readable_attr)}[:\-]?\s*(.+)" # Match the attribute and capture its value
98
  match = re.search(pattern, text, re.IGNORECASE)
99
  if match:
100
  structured_data[sf_attr] = match.group(1).strip()
 
101
  return structured_data
102
 
103
  # Function to sanitize numeric values
 
 
1
  import os
2
  from paddleocr import PaddleOCR
3
  from PIL import Image, ImageEnhance
 
70
  "coolingmethod": "coolingmethod__c"
71
  }
72
 
73
+ # List of product names to match
74
+ PRODUCT_NAMES = [
75
+ "Centrifugal mono block pump", "SINGLE PHASE MOTOR STARTER", "EasyPact EZC 100",
76
+ "Openwell Submersible Pumpset", "Electric Motor", "Self Priming Pump",
77
+ "Control panel for single phase submersible pumps", "MOTOR", "Submersible pump set",
78
+ "Fusion submersible pump set", "DCT", "Shock proof water proof",
79
+ "control panel for single phase submerisible pumps",
80
+ "single phase digital starter dry run and timer panel",
81
+ "Phase stainless steel submersible pump", "Submersible pump", "WB15X",
82
+ "Vtype self priming pump", "SP SHINE DISC", "havells submersible pump",
83
+ "Havells open well Submersible pump", "Bertolini pump CK3 90pp",
84
+ "WPA 772 Water Pump Assy", "bertolini TTL triplex high pressure plunger pumps",
85
+ "Generic plunger high pressure pump", "Apple Normal, Banana",
86
+ "Cast Iron KSb centrifugal pump", "5.5kw Water Pump",
87
+ "KSB reliable i line centrifuged pumps", "Apple Normal, Orange, Banana",
88
+ "Positive API 6745 hydraulic diaphragm pump", "1/2 inch Fuel Hose Pipe",
89
+ "Rotodel motor pump", "PVC Electrical Insulation Materials",
90
+ "Electric kirloskar domestic water pump", "Electrical Insulation Materials",
91
+ "sellowell motor pump", "bhupathi submersible pump set",
92
+ "Flowshine Submersible pump set", "Index submersible pump",
93
+ "Wintoss Plastic Electric Switch Board", "Electric 18 watt ujagar cooler pump",
94
+ "Generator Service", "LG WM FHT1207ZWL, LG REF GL-S292RSCY",
95
+ "Water tank, Filters, Water Pump", "MS Control Submersible Panel",
96
+ "Centrifugal Monoblock Pumps", "Electric Motor with Pump BodyBlue and White",
97
+ "Various Repair and Maintenance Parts", "Earthmax Pump",
98
+ "Water Tank, Filters, Water Pump", "Centrifugal Water Pump for Agriculture",
99
+ "mono block pumps"
100
+ ]
101
+
102
  # Salesforce credentials
103
  SALESFORCE_USERNAME = "venkatramana@sandbox.com"
104
  SALESFORCE_PASSWORD = "Venkat12345@"
 
118
  extracted_text.append(line[1][0])
119
  return "\n".join(extracted_text)
120
 
121
+ # Function to find product name from the predefined list
122
+ def match_product_name(text):
123
+ for product_name in PRODUCT_NAMES:
124
+ if re.search(rf"\b{re.escape(product_name)}\b", text, re.IGNORECASE):
125
+ return product_name
126
+ return None
127
+
128
  # Function to find attributes and their values
129
  def find_attributes(text):
130
  structured_data = {}
131
+
132
+ # Match and add product name
133
+ matched_product = match_product_name(text)
134
+ if matched_product:
135
+ structured_data["Productname__c"] = matched_product
136
+
137
  for readable_attr, sf_attr in ATTRIBUTE_MAPPING.items():
138
  pattern = rf"{re.escape(readable_attr)}[:\-]?\s*(.+)" # Match the attribute and capture its value
139
  match = re.search(pattern, text, re.IGNORECASE)
140
  if match:
141
  structured_data[sf_attr] = match.group(1).strip()
142
+
143
  return structured_data
144
 
145
  # Function to sanitize numeric values