Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,7 +11,7 @@ from io import BytesIO
|
|
| 11 |
from fuzzywuzzy import process
|
| 12 |
import kaleido # Ensure kaleido is imported
|
| 13 |
|
| 14 |
-
# Attribute mappings: readable names to Salesforce API names
|
| 15 |
ATTRIBUTE_MAPPING = {
|
| 16 |
"Product name": "Productname__c",
|
| 17 |
"Type": "Type__c",
|
|
@@ -23,25 +23,21 @@ ATTRIBUTE_MAPPING = {
|
|
| 23 |
"Model": "Model__c"
|
| 24 |
}
|
| 25 |
|
| 26 |
-
# List of product names to match
|
| 27 |
PRODUCT_NAMES = [
|
| 28 |
"CG COMMERCIAL MOTORS", "Fusion", "Agroking", "Openwell"
|
| 29 |
]
|
| 30 |
|
| 31 |
-
# List of model names to match
|
| 32 |
MODEL_NAMES = [
|
| 33 |
"V9", "V4", "V3", "V7", "V6"
|
| 34 |
]
|
| 35 |
|
| 36 |
-
|
| 37 |
-
ocr = PaddleOCR(use_angle_cls=True, lang='en')
|
| 38 |
-
|
| 39 |
-
def extract_text_from_image(image_array):
|
| 40 |
"""
|
| 41 |
Extracts text from an image using PaddleOCR.
|
| 42 |
-
Accepts an image as a numpy array (cv2 image or similar).
|
| 43 |
"""
|
| 44 |
-
result = ocr.ocr(
|
| 45 |
extracted_text = []
|
| 46 |
|
| 47 |
for line in result[0]:
|
|
@@ -79,14 +75,29 @@ def match_model_name(extracted_text):
|
|
| 79 |
|
| 80 |
return best_match if best_score >= 70 else None # Threshold of 70 for a match
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 86 |
|
| 87 |
-
|
| 88 |
-
matched_product = match_product_name(extracted_text)
|
| 89 |
-
matched_model = match_model_name(extracted_text)
|
| 90 |
|
| 91 |
# Function to extract attributes and their values
|
| 92 |
def extract_attributes(extracted_text):
|
|
|
|
| 11 |
from fuzzywuzzy import process
|
| 12 |
import kaleido # Ensure kaleido is imported
|
| 13 |
|
| 14 |
+
# ✅ Attribute mappings: readable names to Salesforce API names
|
| 15 |
ATTRIBUTE_MAPPING = {
|
| 16 |
"Product name": "Productname__c",
|
| 17 |
"Type": "Type__c",
|
|
|
|
| 23 |
"Model": "Model__c"
|
| 24 |
}
|
| 25 |
|
| 26 |
+
# ✅ List of product names to match
|
| 27 |
PRODUCT_NAMES = [
|
| 28 |
"CG COMMERCIAL MOTORS", "Fusion", "Agroking", "Openwell"
|
| 29 |
]
|
| 30 |
|
| 31 |
+
# ✅ List of model names to match
|
| 32 |
MODEL_NAMES = [
|
| 33 |
"V9", "V4", "V3", "V7", "V6"
|
| 34 |
]
|
| 35 |
|
| 36 |
+
def extract_text(image_path):
|
|
|
|
|
|
|
|
|
|
| 37 |
"""
|
| 38 |
Extracts text from an image using PaddleOCR.
|
|
|
|
| 39 |
"""
|
| 40 |
+
result = ocr.ocr(image_path)
|
| 41 |
extracted_text = []
|
| 42 |
|
| 43 |
for line in result[0]:
|
|
|
|
| 75 |
|
| 76 |
return best_match if best_score >= 70 else None # Threshold of 70 for a match
|
| 77 |
|
| 78 |
+
def extract_attributes(extracted_text):
|
| 79 |
+
"""
|
| 80 |
+
Extracts key attributes like Model, H.P, Stage, Voltage, etc., from the extracted text.
|
| 81 |
+
"""
|
| 82 |
+
attributes = {}
|
| 83 |
+
|
| 84 |
+
for text in extracted_text.split("\n"):
|
| 85 |
+
if "Model" in text:
|
| 86 |
+
attributes["Model"] = text.split("Model")[-1].strip(" :")
|
| 87 |
+
elif "H.P" in text or "HP" in text:
|
| 88 |
+
attributes["H.P"] = text.split("H.P")[-1].strip(" :")
|
| 89 |
+
elif "Stage" in text:
|
| 90 |
+
attributes["Stage"] = text.split("Stage")[-1].strip(" :")
|
| 91 |
+
elif "Outlet" in text:
|
| 92 |
+
attributes["Outlet"] = text.split("Outlet")[-1].strip(" :")
|
| 93 |
+
elif "Voltage" in text:
|
| 94 |
+
attributes["Voltage"] = text.split("Voltage")[-1].strip(" :")
|
| 95 |
+
elif "Phase" in text:
|
| 96 |
+
attributes["Phase"] = text.split("Phase")[-1].strip(" :")
|
| 97 |
+
elif "Year of Mfg" in text:
|
| 98 |
+
attributes["Year of Mfg"] = text.split("Year of Mfg")[-1].strip(" :")
|
| 99 |
|
| 100 |
+
return attributes
|
|
|
|
|
|
|
| 101 |
|
| 102 |
# Function to extract attributes and their values
|
| 103 |
def extract_attributes(extracted_text):
|