gopichandra commited on
Commit
1fc043a
·
verified ·
1 Parent(s): d856f07

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -16
app.py CHANGED
@@ -11,7 +11,7 @@ from io import BytesIO
11
  from fuzzywuzzy import process
12
  import kaleido # Ensure kaleido is imported
13
 
14
- # Attribute mappings: readable names to Salesforce API names
15
  ATTRIBUTE_MAPPING = {
16
  "Product name": "Productname__c",
17
  "Type": "Type__c",
@@ -23,25 +23,21 @@ ATTRIBUTE_MAPPING = {
23
  "Model": "Model__c"
24
  }
25
 
26
- # List of product names to match
27
  PRODUCT_NAMES = [
28
  "CG COMMERCIAL MOTORS", "Fusion", "Agroking", "Openwell"
29
  ]
30
 
31
- # List of model names to match
32
  MODEL_NAMES = [
33
  "V9", "V4", "V3", "V7", "V6"
34
  ]
35
 
36
- # Initialize PaddleOCR
37
- ocr = PaddleOCR(use_angle_cls=True, lang='en')
38
-
39
- def extract_text_from_image(image_array):
40
  """
41
  Extracts text from an image using PaddleOCR.
42
- Accepts an image as a numpy array (cv2 image or similar).
43
  """
44
- result = ocr.ocr(image_array)
45
  extracted_text = []
46
 
47
  for line in result[0]:
@@ -79,14 +75,29 @@ def match_model_name(extracted_text):
79
 
80
  return best_match if best_score >= 70 else None # Threshold of 70 for a match
81
 
82
- # Example Usage: Load an image file into a numpy array (cv2 format)
83
- def process_image(image_path):
84
- image = cv2.imread(image_path) # Read the image
85
- extracted_text = extract_text_from_image(image) # Extract text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
- # Matching extracted text with product and model names
88
- matched_product = match_product_name(extracted_text)
89
- matched_model = match_model_name(extracted_text)
90
 
91
  # Function to extract attributes and their values
92
  def extract_attributes(extracted_text):
 
11
  from fuzzywuzzy import process
12
  import kaleido # Ensure kaleido is imported
13
 
14
+ # Attribute mappings: readable names to Salesforce API names
15
  ATTRIBUTE_MAPPING = {
16
  "Product name": "Productname__c",
17
  "Type": "Type__c",
 
23
  "Model": "Model__c"
24
  }
25
 
26
+ # List of product names to match
27
  PRODUCT_NAMES = [
28
  "CG COMMERCIAL MOTORS", "Fusion", "Agroking", "Openwell"
29
  ]
30
 
31
+ # List of model names to match
32
  MODEL_NAMES = [
33
  "V9", "V4", "V3", "V7", "V6"
34
  ]
35
 
36
+ def extract_text(image_path):
 
 
 
37
  """
38
  Extracts text from an image using PaddleOCR.
 
39
  """
40
+ result = ocr.ocr(image_path)
41
  extracted_text = []
42
 
43
  for line in result[0]:
 
75
 
76
  return best_match if best_score >= 70 else None # Threshold of 70 for a match
77
 
78
+ def extract_attributes(extracted_text):
79
+ """
80
+ Extracts key attributes like Model, H.P, Stage, Voltage, etc., from the extracted text.
81
+ """
82
+ attributes = {}
83
+
84
+ for text in extracted_text.split("\n"):
85
+ if "Model" in text:
86
+ attributes["Model"] = text.split("Model")[-1].strip(" :")
87
+ elif "H.P" in text or "HP" in text:
88
+ attributes["H.P"] = text.split("H.P")[-1].strip(" :")
89
+ elif "Stage" in text:
90
+ attributes["Stage"] = text.split("Stage")[-1].strip(" :")
91
+ elif "Outlet" in text:
92
+ attributes["Outlet"] = text.split("Outlet")[-1].strip(" :")
93
+ elif "Voltage" in text:
94
+ attributes["Voltage"] = text.split("Voltage")[-1].strip(" :")
95
+ elif "Phase" in text:
96
+ attributes["Phase"] = text.split("Phase")[-1].strip(" :")
97
+ elif "Year of Mfg" in text:
98
+ attributes["Year of Mfg"] = text.split("Year of Mfg")[-1].strip(" :")
99
 
100
+ return attributes
 
 
101
 
102
  # Function to extract attributes and their values
103
  def extract_attributes(extracted_text):