Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -75,29 +75,7 @@ ATTRIBUTE_MAPPING = {
|
|
| 75 |
PRODUCT_NAMES = [
|
| 76 |
"Centrifugal mono block pump", "SINGLE PHASE MOTOR STARTER", "EasyPact EZC 100",
|
| 77 |
"Openwell Submersible Pumpset", "Electric Motor", "Self Priming Pump",
|
| 78 |
-
|
| 79 |
-
"Fusion submersible pump set", "DCT", "Shock proof water proof", "CG COMMERCIAL MOTORS", "Fusion",
|
| 80 |
-
"control panel for single phase submerisible pumps",
|
| 81 |
-
"single phase digital starter dry run and timer panel", "5HP AV1 XL Kirloskar Pump",
|
| 82 |
-
"Phase stainless steel submersible pump", "Submersible pump", "WB15X",
|
| 83 |
-
"Vtype self priming pump", "SP SHINE DISC", "havells submersible pump",
|
| 84 |
-
"Havells open well Submersible pump", "Bertolini pump CK3 90pp",
|
| 85 |
-
"WPA 772 Water Pump Assy", "bertolini TTL triplex high pressure plunger pumps",
|
| 86 |
-
"Generic plunger high pressure pump", "Apple Normal, Banana",
|
| 87 |
-
"Cast Iron KSb centrifugal pump", "5.5kw Water Pump",
|
| 88 |
-
"KSB reliable i line centrifuged pumps", "Apple Normal, Orange, Banana",
|
| 89 |
-
"Positive API 6745 hydraulic diaphragm pump", "1/2 inch Fuel Hose Pipe", "Kirloskar Water Pump",
|
| 90 |
-
"Rotodel motor pump", "PVC Electrical Insulation Materials",
|
| 91 |
-
"Electric kirloskar domestic water pump", "Electrical Insulation Materials",
|
| 92 |
-
"sellowell motor pump", "bhupathi submersible pump set",
|
| 93 |
-
"Flowshine Submersible pump set", "Index submersible pump",
|
| 94 |
-
"Wintoss Plastic Electric Switch Board", "Electric 18 watt ujagar cooler pump",
|
| 95 |
-
"Generator Service", "LG WM FHT1207ZWL, LG REF GL-S292RSCY",
|
| 96 |
-
"Water tank, Filters, Water Pump", "MS Control Submersible Panel",
|
| 97 |
-
"Centrifugal Monoblock Pumps", "Electric Motor with Pump BodyBlue and White",
|
| 98 |
-
"Various Repair and Maintenance Parts", "Earthmax Pump",
|
| 99 |
-
"Water Tank, Filters, Water Pump", "Centrifugal Water Pump for Agriculture",
|
| 100 |
-
"mono block pumps"
|
| 101 |
]
|
| 102 |
|
| 103 |
# Salesforce credentials
|
|
@@ -114,176 +92,82 @@ EXCEL_FILE_PATH = os.getenv("EXCEL_FILE_PATH", "DataStorage.xlsx")
|
|
| 114 |
# Function to extract text using PaddleOCR
|
| 115 |
def extract_text(image):
|
| 116 |
result = ocr.ocr(image)
|
| 117 |
-
extracted_text = []
|
| 118 |
-
|
| 119 |
-
extracted_text.append(line[1][0])
|
| 120 |
-
extracted_text_str = "\n".join(extracted_text)
|
| 121 |
-
print("Extracted Text:", extracted_text_str) # Debug: Log extracted text
|
| 122 |
-
return extracted_text_str
|
| 123 |
|
| 124 |
# Function to find product name from the predefined list using fuzzy matching
|
| 125 |
def match_product_name(text):
|
| 126 |
-
best_match = None
|
| 127 |
-
best_score = 0
|
| 128 |
-
|
| 129 |
for line in text.split("\n"):
|
| 130 |
match, score = process.extractOne(line, PRODUCT_NAMES)
|
| 131 |
if score > best_score:
|
| 132 |
-
best_match = match
|
| 133 |
-
best_score = score
|
| 134 |
-
|
| 135 |
-
print(f"Best Match: {best_match}, Score: {best_score}")
|
| 136 |
return best_match if best_score >= 70 else None
|
| 137 |
|
| 138 |
# Function to find attributes and their values
|
| 139 |
def find_attributes(text):
|
| 140 |
structured_data = {}
|
| 141 |
-
|
| 142 |
-
matched_product = match_product_name(text)
|
| 143 |
-
if matched_product:
|
| 144 |
-
structured_data["Productname__c"] = matched_product
|
| 145 |
-
|
| 146 |
for readable_attr, sf_attr in ATTRIBUTE_MAPPING.items():
|
| 147 |
pattern = rf"{re.escape(readable_attr)}[:\-]?\s*(.+)"
|
| 148 |
match = re.search(pattern, text, re.IGNORECASE)
|
| 149 |
if match:
|
| 150 |
structured_data[sf_attr] = match.group(1).strip()
|
| 151 |
-
|
| 152 |
return structured_data
|
| 153 |
|
| 154 |
-
# Function to sanitize numeric values
|
| 155 |
-
def sanitize_numeric(value):
|
| 156 |
-
try:
|
| 157 |
-
if isinstance(value, (int, float)):
|
| 158 |
-
return value
|
| 159 |
-
if '/' in value:
|
| 160 |
-
numerator, denominator = value.split('/')
|
| 161 |
-
return float(numerator) / float(denominator)
|
| 162 |
-
sanitized = re.sub(r'[^\d\.\-]', '', value)
|
| 163 |
-
return float(sanitized) if sanitized else None
|
| 164 |
-
except (ValueError, ZeroDivisionError):
|
| 165 |
-
return None
|
| 166 |
-
|
| 167 |
-
# Function to save structured data to the Excel file
|
| 168 |
-
def save_to_excel(data):
|
| 169 |
-
if not data:
|
| 170 |
-
return "No data to save."
|
| 171 |
-
|
| 172 |
-
if not os.path.exists(EXCEL_FILE_PATH):
|
| 173 |
-
df = pd.DataFrame([data])
|
| 174 |
-
df.to_excel(EXCEL_FILE_PATH, index=False, engine="openpyxl")
|
| 175 |
-
else:
|
| 176 |
-
existing_df = pd.read_excel(EXCEL_FILE_PATH, engine="openpyxl")
|
| 177 |
-
new_data_df = pd.DataFrame([data])
|
| 178 |
-
updated_df = pd.concat([existing_df, new_data_df], ignore_index=True)
|
| 179 |
-
updated_df.to_excel(EXCEL_FILE_PATH, index=False, engine="openpyxl")
|
| 180 |
-
|
| 181 |
-
return EXCEL_FILE_PATH
|
| 182 |
-
|
| 183 |
# Unified function for processing images
|
| 184 |
def process_image(image, quantity, mode, entry_type):
|
| 185 |
try:
|
| 186 |
-
extracted_text = extract_text(image)
|
| 187 |
-
attributes = find_attributes(extracted_text)
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
attributes["Quantity__c"] = sanitize_numeric(quantity)
|
| 193 |
-
|
| 194 |
-
if not attributes:
|
| 195 |
-
return f"Extracted Text:\n{extracted_text}\n\nNo attributes found in the image.", None
|
| 196 |
-
|
| 197 |
-
numbered_output = "\n".join(
|
| 198 |
-
[f"{key.replace('__c', '').replace('_', ' ').title()}: {value}" for key, value in attributes.items()]
|
| 199 |
-
)
|
| 200 |
-
|
| 201 |
-
file_path = save_to_excel(attributes)
|
| 202 |
-
|
| 203 |
-
if mode == "Entry":
|
| 204 |
-
if entry_type == "Sales":
|
| 205 |
-
message = "Sales entry added."
|
| 206 |
-
elif entry_type == "Non-Sales":
|
| 207 |
-
message = "Non-Sales entry added."
|
| 208 |
-
else:
|
| 209 |
-
message = "Invalid entry type. Please select Sales or Non-Sales."
|
| 210 |
-
elif mode == "Exit":
|
| 211 |
-
message = "Exit mode is currently unavailable."
|
| 212 |
-
else:
|
| 213 |
-
message = "Invalid mode. Please select Entry or Exit."
|
| 214 |
-
|
| 215 |
-
return f"Extracted Text:\n{extracted_text}\n\n{numbered_output}\n\n{message}", file_path
|
| 216 |
-
|
| 217 |
except Exception as e:
|
| 218 |
-
return f"Error
|
| 219 |
|
| 220 |
-
# Function to pull data from Salesforce
|
| 221 |
-
def
|
| 222 |
try:
|
| 223 |
sf = Salesforce(
|
| 224 |
username=SALESFORCE_USERNAME,
|
| 225 |
password=SALESFORCE_PASSWORD,
|
| 226 |
security_token=SALESFORCE_SECURITY_TOKEN,
|
| 227 |
)
|
| 228 |
-
|
| 229 |
-
return
|
| 230 |
except Exception as e:
|
| 231 |
-
print("Error pulling data from MotorDataAPI:", e)
|
| 232 |
return f"Error: {str(e)}"
|
| 233 |
|
| 234 |
# Function to format Salesforce data for display
|
| 235 |
def format_salesforce_data():
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
for i, item in enumerate(data)
|
| 245 |
-
]
|
| 246 |
-
)
|
| 247 |
-
else:
|
| 248 |
-
return data
|
| 249 |
-
except Exception as e:
|
| 250 |
-
print("Error in format_salesforce_data:", e)
|
| 251 |
-
return f"Error retrieving data: {str(e)}"
|
| 252 |
-
|
| 253 |
-
interface = gr.Interface(
|
| 254 |
-
tabs=[
|
| 255 |
gr.Interface(
|
| 256 |
fn=process_image,
|
| 257 |
inputs=[
|
| 258 |
gr.Image(type="numpy"),
|
| 259 |
-
gr.Number(label="Quantity"
|
| 260 |
-
gr.Dropdown(label="Mode", choices=["Entry", "Exit"]
|
| 261 |
-
gr.Radio(label="Entry Type", choices=["Sales", "Non-Sales"],
|
| 262 |
-
],
|
| 263 |
-
outputs=[
|
| 264 |
-
gr.Text(label="Image Data Viewer"),
|
| 265 |
-
gr.File(label="Data Storage Manager")
|
| 266 |
],
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
),
|
| 270 |
-
gr.Interface(
|
| 271 |
-
fn=lambda: pd.read_excel(EXCEL_FILE_PATH) if os.path.exists(EXCEL_FILE_PATH) else pd.DataFrame(),
|
| 272 |
-
inputs=[],
|
| 273 |
-
outputs=gr.DataFrame(label="Sales Data"),
|
| 274 |
-
title="View Saved Data",
|
| 275 |
-
description="Review previously saved entries in tabular format."
|
| 276 |
),
|
| 277 |
gr.Interface(
|
| 278 |
fn=format_salesforce_data,
|
| 279 |
inputs=[],
|
| 280 |
-
outputs=
|
| 281 |
-
title="
|
| 282 |
-
|
| 283 |
-
)
|
| 284 |
],
|
| 285 |
title="VENKATA RAMANA MOTORS Dashboard",
|
| 286 |
-
description="
|
| 287 |
)
|
| 288 |
|
| 289 |
if __name__ == "__main__":
|
|
|
|
| 75 |
PRODUCT_NAMES = [
|
| 76 |
"Centrifugal mono block pump", "SINGLE PHASE MOTOR STARTER", "EasyPact EZC 100",
|
| 77 |
"Openwell Submersible Pumpset", "Electric Motor", "Self Priming Pump",
|
| 78 |
+
# Add more products here
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
]
|
| 80 |
|
| 81 |
# Salesforce credentials
|
|
|
|
| 92 |
# Function to extract text using PaddleOCR
|
| 93 |
def extract_text(image):
|
| 94 |
result = ocr.ocr(image)
|
| 95 |
+
extracted_text = "\n".join([line[1][0] for line in result[0]])
|
| 96 |
+
return extracted_text
|
|
|
|
|
|
|
|
|
|
|
|
|
| 97 |
|
| 98 |
# Function to find product name from the predefined list using fuzzy matching
|
| 99 |
def match_product_name(text):
|
| 100 |
+
best_match, best_score = None, 0
|
|
|
|
|
|
|
| 101 |
for line in text.split("\n"):
|
| 102 |
match, score = process.extractOne(line, PRODUCT_NAMES)
|
| 103 |
if score > best_score:
|
| 104 |
+
best_match, best_score = match, score
|
|
|
|
|
|
|
|
|
|
| 105 |
return best_match if best_score >= 70 else None
|
| 106 |
|
| 107 |
# Function to find attributes and their values
|
| 108 |
def find_attributes(text):
|
| 109 |
structured_data = {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
for readable_attr, sf_attr in ATTRIBUTE_MAPPING.items():
|
| 111 |
pattern = rf"{re.escape(readable_attr)}[:\-]?\s*(.+)"
|
| 112 |
match = re.search(pattern, text, re.IGNORECASE)
|
| 113 |
if match:
|
| 114 |
structured_data[sf_attr] = match.group(1).strip()
|
|
|
|
| 115 |
return structured_data
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
# Unified function for processing images
|
| 118 |
def process_image(image, quantity, mode, entry_type):
|
| 119 |
try:
|
| 120 |
+
extracted_text = extract_text(image)
|
| 121 |
+
attributes = find_attributes(extracted_text)
|
| 122 |
+
attributes["Quantity__c"] = quantity
|
| 123 |
+
numbered_output = "\n".join([f"{k}: {v}" for k, v in attributes.items()])
|
| 124 |
+
return f"Extracted Text:\n{extracted_text}\n\nAttributes:\n{numbered_output}", None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 125 |
except Exception as e:
|
| 126 |
+
return f"Error: {str(e)}", None
|
| 127 |
|
| 128 |
+
# Function to pull stock data from Salesforce
|
| 129 |
+
def pull_stock_data():
|
| 130 |
try:
|
| 131 |
sf = Salesforce(
|
| 132 |
username=SALESFORCE_USERNAME,
|
| 133 |
password=SALESFORCE_PASSWORD,
|
| 134 |
security_token=SALESFORCE_SECURITY_TOKEN,
|
| 135 |
)
|
| 136 |
+
stock_data = sf.apexecute("MotorDataAPI", method="GET")
|
| 137 |
+
return stock_data
|
| 138 |
except Exception as e:
|
|
|
|
| 139 |
return f"Error: {str(e)}"
|
| 140 |
|
| 141 |
# Function to format Salesforce data for display
|
| 142 |
def format_salesforce_data():
|
| 143 |
+
data = pull_stock_data()
|
| 144 |
+
if isinstance(data, list):
|
| 145 |
+
return "\n".join([f"{item.get('Product_Name__c', 'N/A')} - {item.get('Stock', 'N/A')}" for item in data])
|
| 146 |
+
return data
|
| 147 |
+
|
| 148 |
+
# Gradio interfaces
|
| 149 |
+
interface = gr.TabbedInterface(
|
| 150 |
+
[
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
gr.Interface(
|
| 152 |
fn=process_image,
|
| 153 |
inputs=[
|
| 154 |
gr.Image(type="numpy"),
|
| 155 |
+
gr.Number(label="Quantity"),
|
| 156 |
+
gr.Dropdown(label="Mode", choices=["Entry", "Exit"]),
|
| 157 |
+
gr.Radio(label="Entry Type", choices=["Sales", "Non-Sales"]),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
],
|
| 159 |
+
outputs="text",
|
| 160 |
+
title="Process Image"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
),
|
| 162 |
gr.Interface(
|
| 163 |
fn=format_salesforce_data,
|
| 164 |
inputs=[],
|
| 165 |
+
outputs="text",
|
| 166 |
+
title="Stock Updates"
|
| 167 |
+
),
|
|
|
|
| 168 |
],
|
| 169 |
title="VENKATA RAMANA MOTORS Dashboard",
|
| 170 |
+
description="OCR and Salesforce integration for motor inventory."
|
| 171 |
)
|
| 172 |
|
| 173 |
if __name__ == "__main__":
|