Spaces:

rairo
/

iris-products-api

Sleeping

App Files Files Community

rairo commited on Sep 20, 2025

Commit

f27ad5a

verified ·

1 Parent(s): a2c8a5f

Update main.py

Browse files

Files changed (1) hide show

main.py +16 -10

main.py CHANGED Viewed

@@ -3,7 +3,6 @@ import io
 import logging
 import re
 import pandas as pd
-import pdfplumber
 from flask import Flask, request, jsonify
 from flask_cors import CORS
 from flask_sqlalchemy import SQLAlchemy
@@ -71,25 +70,32 @@ FUZZY_MATCH_THRESHOLD = 85
 EXISTING_PRODUCT_NAMES = []
 HS_CODE_DESCRIPTIONS = {}
-# --- FIX: Replaced the entire PDF parsing logic with a simple, robust CSV reader ---
-def load_hs_codes(filename="HS_Codes_for_use_under_FDMS.xlsx - Table 1.csv"):
-    """Loads HS codes from the user-provided clean CSV file."""
     log.info(f"Loading HS Codes from '{filename}'...")
     if not os.path.exists(filename):
         log.error(f"HS Code file not found at '{filename}'. Categorization will fail.")
         return
     try:
-        df = pd.read_csv(filename)
-        # Ensure the column names are correct
-        df.columns = ['HS CODE', 'GOODS DESCRIPTION']
         for _, row in df.iterrows():
             code = str(row['HS CODE']).strip()
             desc = str(row['GOODS DESCRIPTION']).strip()
             if code and desc and code != 'nan' and desc != 'nan':
                 HS_CODE_DESCRIPTIONS[desc] = code
-        log.info(f"Successfully parsed {len(HS_CODE_DESCRIPTIONS)} HS codes from CSV.")
     except Exception as e:
-        log.error(f"Failed to load HS codes from CSV: {e}")
 def load_existing_products(filepath='Product List.csv'):
@@ -233,7 +239,7 @@ if __name__ == "__main__":
     with app.app_context():
         log.info("Initializing server...")
         db.create_all()
-        load_hs_codes() # Call the new, corrected function
         EXISTING_PRODUCT_NAMES = load_existing_products()
         log.info(f"Server is ready. Database is at: {DB_PATH}")

 import logging
 import re
 import pandas as pd
 from flask import Flask, request, jsonify
 from flask_cors import CORS
 from flask_sqlalchemy import SQLAlchemy
 EXISTING_PRODUCT_NAMES = []
 HS_CODE_DESCRIPTIONS = {}
+# --- FINAL FIX: Point to the correct .xlsx file and use pandas.read_excel ---
+def load_hs_codes(filename="HS_Codes_for_use_under_FDMS.xlsx"):
+    """Loads HS codes from the user-provided clean Excel file."""
     log.info(f"Loading HS Codes from '{filename}'...")
     if not os.path.exists(filename):
         log.error(f"HS Code file not found at '{filename}'. Categorization will fail.")
         return
     try:
+        # Use read_excel for .xlsx files
+        df = pd.read_excel(filename, engine='openpyxl')
+        # Check for expected columns
+        if 'HS CODE' not in df.columns or 'GOODS DESCRIPTION' not in df.columns:
+            log.error("HS Code Excel file is missing 'HS CODE' or 'GOODS DESCRIPTION' columns.")
+            return
         for _, row in df.iterrows():
+            # Convert to string and strip whitespace to handle potential data issues
             code = str(row['HS CODE']).strip()
             desc = str(row['GOODS DESCRIPTION']).strip()
             if code and desc and code != 'nan' and desc != 'nan':
+                # Ensure the code is treated as a string, preserving leading zeros if any
                 HS_CODE_DESCRIPTIONS[desc] = code
+        log.info(f"Successfully parsed {len(HS_CODE_DESCRIPTIONS)} HS codes from Excel file.")
     except Exception as e:
+        log.error(f"Failed to load HS codes from Excel file: {e}")
 def load_existing_products(filepath='Product List.csv'):
     with app.app_context():
         log.info("Initializing server...")
         db.create_all()
+        load_hs_codes()
         EXISTING_PRODUCT_NAMES = load_existing_products()
         log.info(f"Server is ready. Database is at: {DB_PATH}")