rairo commited on
Commit
f27ad5a
·
verified ·
1 Parent(s): a2c8a5f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +16 -10
main.py CHANGED
@@ -3,7 +3,6 @@ import io
3
  import logging
4
  import re
5
  import pandas as pd
6
- import pdfplumber
7
  from flask import Flask, request, jsonify
8
  from flask_cors import CORS
9
  from flask_sqlalchemy import SQLAlchemy
@@ -71,25 +70,32 @@ FUZZY_MATCH_THRESHOLD = 85
71
  EXISTING_PRODUCT_NAMES = []
72
  HS_CODE_DESCRIPTIONS = {}
73
 
74
- # --- FIX: Replaced the entire PDF parsing logic with a simple, robust CSV reader ---
75
- def load_hs_codes(filename="HS_Codes_for_use_under_FDMS.xlsx - Table 1.csv"):
76
- """Loads HS codes from the user-provided clean CSV file."""
77
  log.info(f"Loading HS Codes from '{filename}'...")
78
  if not os.path.exists(filename):
79
  log.error(f"HS Code file not found at '{filename}'. Categorization will fail.")
80
  return
81
  try:
82
- df = pd.read_csv(filename)
83
- # Ensure the column names are correct
84
- df.columns = ['HS CODE', 'GOODS DESCRIPTION']
 
 
 
 
 
85
  for _, row in df.iterrows():
 
86
  code = str(row['HS CODE']).strip()
87
  desc = str(row['GOODS DESCRIPTION']).strip()
88
  if code and desc and code != 'nan' and desc != 'nan':
 
89
  HS_CODE_DESCRIPTIONS[desc] = code
90
- log.info(f"Successfully parsed {len(HS_CODE_DESCRIPTIONS)} HS codes from CSV.")
91
  except Exception as e:
92
- log.error(f"Failed to load HS codes from CSV: {e}")
93
 
94
 
95
  def load_existing_products(filepath='Product List.csv'):
@@ -233,7 +239,7 @@ if __name__ == "__main__":
233
  with app.app_context():
234
  log.info("Initializing server...")
235
  db.create_all()
236
- load_hs_codes() # Call the new, corrected function
237
  EXISTING_PRODUCT_NAMES = load_existing_products()
238
  log.info(f"Server is ready. Database is at: {DB_PATH}")
239
 
 
3
  import logging
4
  import re
5
  import pandas as pd
 
6
  from flask import Flask, request, jsonify
7
  from flask_cors import CORS
8
  from flask_sqlalchemy import SQLAlchemy
 
70
  EXISTING_PRODUCT_NAMES = []
71
  HS_CODE_DESCRIPTIONS = {}
72
 
73
+ # --- FINAL FIX: Point to the correct .xlsx file and use pandas.read_excel ---
74
+ def load_hs_codes(filename="HS_Codes_for_use_under_FDMS.xlsx"):
75
+ """Loads HS codes from the user-provided clean Excel file."""
76
  log.info(f"Loading HS Codes from '{filename}'...")
77
  if not os.path.exists(filename):
78
  log.error(f"HS Code file not found at '{filename}'. Categorization will fail.")
79
  return
80
  try:
81
+ # Use read_excel for .xlsx files
82
+ df = pd.read_excel(filename, engine='openpyxl')
83
+
84
+ # Check for expected columns
85
+ if 'HS CODE' not in df.columns or 'GOODS DESCRIPTION' not in df.columns:
86
+ log.error("HS Code Excel file is missing 'HS CODE' or 'GOODS DESCRIPTION' columns.")
87
+ return
88
+
89
  for _, row in df.iterrows():
90
+ # Convert to string and strip whitespace to handle potential data issues
91
  code = str(row['HS CODE']).strip()
92
  desc = str(row['GOODS DESCRIPTION']).strip()
93
  if code and desc and code != 'nan' and desc != 'nan':
94
+ # Ensure the code is treated as a string, preserving leading zeros if any
95
  HS_CODE_DESCRIPTIONS[desc] = code
96
+ log.info(f"Successfully parsed {len(HS_CODE_DESCRIPTIONS)} HS codes from Excel file.")
97
  except Exception as e:
98
+ log.error(f"Failed to load HS codes from Excel file: {e}")
99
 
100
 
101
  def load_existing_products(filepath='Product List.csv'):
 
239
  with app.app_context():
240
  log.info("Initializing server...")
241
  db.create_all()
242
+ load_hs_codes()
243
  EXISTING_PRODUCT_NAMES = load_existing_products()
244
  log.info(f"Server is ready. Database is at: {DB_PATH}")
245