Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,43 +15,44 @@ import kaleido # Ensure kaleido is imported
|
|
| 15 |
ATTRIBUTE_MAPPING = {
|
| 16 |
"Product name": "Productname__c",
|
| 17 |
"Type": "Type__c",
|
| 18 |
-
"model name"
|
| 19 |
-
"Voltage":"Voltage__c",
|
| 20 |
"Outlet": "Outlet__c",
|
| 21 |
-
"Stage":"Stage__c",
|
| 22 |
"H.p": "H_p__c",
|
| 23 |
"Model": "Model__c"
|
| 24 |
}
|
| 25 |
|
| 26 |
# List of product names to match
|
| 27 |
-
PRODUCT_NAMES =
|
| 28 |
"CG COMMERCIAL MOTORS", "Fusion", "Agroking", "Openwell"
|
| 29 |
-
|
| 30 |
|
| 31 |
# List of model names to match
|
| 32 |
-
MODEL_NAMES =
|
| 33 |
"V9", "V4", "V3", "V7", "V6"
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
# Salesforce credentials
|
| 38 |
-
SALESFORCE_USERNAME = "venkatramana@sandbox.com"
|
| 39 |
-
SALESFORCE_PASSWORD = "Venkat12345@"
|
| 40 |
-
SALESFORCE_SECURITY_TOKEN = "GhcJJmjBEefdnukJoz4CAQlR"
|
| 41 |
|
| 42 |
# Initialize PaddleOCR
|
| 43 |
ocr = PaddleOCR(use_angle_cls=True, lang='en')
|
| 44 |
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
| 48 |
extracted_text = []
|
|
|
|
| 49 |
for line in result[0]:
|
| 50 |
-
extracted_text.append(line[1][0])
|
|
|
|
| 51 |
return "\n".join(extracted_text)
|
| 52 |
|
| 53 |
-
# Function to match product name using fuzzy matching
|
| 54 |
def match_product_name(extracted_text):
|
|
|
|
|
|
|
|
|
|
| 55 |
best_match = None
|
| 56 |
best_score = 0
|
| 57 |
|
|
@@ -63,7 +64,6 @@ def match_product_name(extracted_text):
|
|
| 63 |
|
| 64 |
return best_match if best_score >= 70 else None # Threshold of 70 for a match
|
| 65 |
|
| 66 |
-
# Function to match model name using fuzzy matching
|
| 67 |
def match_model_name(extracted_text):
|
| 68 |
"""
|
| 69 |
Uses fuzzy matching to find the best model name match from extracted text.
|
|
@@ -79,6 +79,14 @@ def match_model_name(extracted_text):
|
|
| 79 |
|
| 80 |
return best_match if best_score >= 70 else None # Threshold of 70 for a match
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
# Function to extract attributes and their values
|
| 84 |
def extract_attributes(extracted_text):
|
|
|
|
| 15 |
ATTRIBUTE_MAPPING = {
|
| 16 |
"Product name": "Productname__c",
|
| 17 |
"Type": "Type__c",
|
| 18 |
+
"model name": "Model Name",
|
| 19 |
+
"Voltage": "Voltage__c",
|
| 20 |
"Outlet": "Outlet__c",
|
| 21 |
+
"Stage": "Stage__c",
|
| 22 |
"H.p": "H_p__c",
|
| 23 |
"Model": "Model__c"
|
| 24 |
}
|
| 25 |
|
| 26 |
# List of product names to match
|
| 27 |
+
PRODUCT_NAMES = [
|
| 28 |
"CG COMMERCIAL MOTORS", "Fusion", "Agroking", "Openwell"
|
| 29 |
+
]
|
| 30 |
|
| 31 |
# List of model names to match
|
| 32 |
+
MODEL_NAMES = [
|
| 33 |
"V9", "V4", "V3", "V7", "V6"
|
| 34 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
|
| 36 |
# Initialize PaddleOCR
|
| 37 |
ocr = PaddleOCR(use_angle_cls=True, lang='en')
|
| 38 |
|
| 39 |
+
def extract_text_from_image(image_array):
|
| 40 |
+
"""
|
| 41 |
+
Extracts text from an image using PaddleOCR.
|
| 42 |
+
Accepts an image as a numpy array (cv2 image or similar).
|
| 43 |
+
"""
|
| 44 |
+
result = ocr.ocr(image_array)
|
| 45 |
extracted_text = []
|
| 46 |
+
|
| 47 |
for line in result[0]:
|
| 48 |
+
extracted_text.append(line[1][0]) # Extract detected text
|
| 49 |
+
|
| 50 |
return "\n".join(extracted_text)
|
| 51 |
|
|
|
|
| 52 |
def match_product_name(extracted_text):
|
| 53 |
+
"""
|
| 54 |
+
Uses fuzzy matching to find the best product name match from extracted text.
|
| 55 |
+
"""
|
| 56 |
best_match = None
|
| 57 |
best_score = 0
|
| 58 |
|
|
|
|
| 64 |
|
| 65 |
return best_match if best_score >= 70 else None # Threshold of 70 for a match
|
| 66 |
|
|
|
|
| 67 |
def match_model_name(extracted_text):
|
| 68 |
"""
|
| 69 |
Uses fuzzy matching to find the best model name match from extracted text.
|
|
|
|
| 79 |
|
| 80 |
return best_match if best_score >= 70 else None # Threshold of 70 for a match
|
| 81 |
|
| 82 |
+
# Example Usage: Load an image file into a numpy array (cv2 format)
|
| 83 |
+
def process_image(image_path):
|
| 84 |
+
image = cv2.imread(image_path) # Read the image
|
| 85 |
+
extracted_text = extract_text_from_image(image) # Extract text
|
| 86 |
+
|
| 87 |
+
# Matching extracted text with product and model names
|
| 88 |
+
matched_product = match_product_name(extracted_text)
|
| 89 |
+
matched_model = match_model_name(extracted_text)
|
| 90 |
|
| 91 |
# Function to extract attributes and their values
|
| 92 |
def extract_attributes(extracted_text):
|