rairo commited on
Commit
6eb56d5
·
verified ·
1 Parent(s): 3e16bd6

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +120 -434
main.py CHANGED
@@ -10,8 +10,6 @@ from flask_sqlalchemy import SQLAlchemy
10
  from sqlalchemy.exc import IntegrityError
11
  from thefuzz import process, fuzz
12
  from werkzeug.utils import secure_filename
13
- import tempfile
14
- import sqlite3
15
 
16
  # ───────────────────────────────────────────────────────────────────────────────
17
  # CONFIGURATION
@@ -23,34 +21,24 @@ log = logging.getLogger("product-pipeline-api")
23
  app = Flask(__name__)
24
  CORS(app)
25
 
26
- # --- HUGGING FACE SPACES OPTIMIZED DATABASE CONFIGURATION ---
27
- # Use in-memory SQLite database for Hugging Face Spaces compatibility
28
- # This avoids all file permission issues while maintaining full functionality
29
- app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite:///:memory:'
 
 
 
30
  app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
31
-
32
- # --- Upload Folder Configuration ---
33
- def get_upload_folder():
34
- """Get a writable upload folder with fallbacks."""
35
- try:
36
- # Use system temp directory which is always writable
37
- temp_dir = tempfile.mkdtemp(prefix='product_uploads_')
38
- log.info(f"Using upload folder: {temp_dir}")
39
- return temp_dir
40
- except Exception as e:
41
- log.error(f"Failed to create upload folder: {e}")
42
- return tempfile.gettempdir()
43
-
44
- app.config['UPLOAD_FOLDER'] = get_upload_folder()
45
 
46
  # --- File Upload Configuration ---
47
  ALLOWED_EXTENSIONS = {'csv', 'xls', 'xlsx'}
48
 
49
- # --- Database Initialization ---
50
  db = SQLAlchemy(app)
51
 
52
  # ───────────────────────────────────────────────────────────────────────────────
53
- # DATABASE MODEL
54
  # ───────────────────────────────────────────────────────────────────────────────
55
 
56
  class Product(db.Model):
@@ -84,105 +72,41 @@ HS_CODES_DATA = []
84
  EXISTING_PRODUCT_NAMES = []
85
  HS_CODE_DESCRIPTIONS = {}
86
 
87
- def initialize_database():
88
- """Initialize database and load reference data."""
89
- try:
90
- with app.app_context():
91
- # Create all tables
92
- db.create_all()
93
- log.info("In-memory database tables created successfully")
94
-
95
- # Pre-populate with reference data from the CSV
96
- reference_products = [
97
- "RUSSELL HOBBS GLOSSY BLACK TOASTER",
98
- "RUSSELL HOBBS OVAL SLOW COOKER 6.5LTR",
99
- "RUSSELL HOBBS PRESSURE COOKER 6LTR",
100
- "SAACHI SANDWICH MAKER",
101
- "SANFORD BLENDER",
102
- "TOTALLY HOME DINNER PLATE",
103
- # Add more reference products as needed
104
- ]
105
-
106
- for product_name in reference_products:
107
- try:
108
- existing_product = Product.query.filter_by(name=product_name).first()
109
- if not existing_product:
110
- new_product = Product(
111
- name=product_name,
112
- primary_category="Kitchen Appliances",
113
- hs_code="85167200" # Sample HS code for kitchen appliances
114
- )
115
- db.session.add(new_product)
116
- except Exception as e:
117
- log.warning(f"Failed to add reference product {product_name}: {e}")
118
- continue
119
-
120
- try:
121
- db.session.commit()
122
- log.info(f"Database initialized with {len(reference_products)} reference products")
123
- except Exception as e:
124
- db.session.rollback()
125
- log.error(f"Failed to commit reference products: {e}")
126
-
127
- return True
128
-
129
- except Exception as e:
130
- log.error(f"Database initialization failed: {e}")
131
- return False
132
-
133
  def parse_hs_codes_pdf(filepath='HS Codes for use under FDMS.pdf'):
134
- """Parse HS Codes from PDF with error handling."""
135
- log.info(f"Attempting to parse HS Codes from '{filepath}'...")
136
  if not os.path.exists(filepath):
137
- log.warning(f"HS Code PDF not found at '{filepath}'. Using sample HS codes.")
138
- # Provide sample HS codes for kitchen/household items
139
- sample_codes = {
140
- "Kitchen appliances": "85167200",
141
- "Cookware": "73239100",
142
- "Glassware": "70134900",
143
- "Cutlery": "82159900",
144
- "Textiles": "63079000"
145
- }
146
- global HS_CODE_DESCRIPTIONS
147
- HS_CODE_DESCRIPTIONS.update(sample_codes)
148
- return list(sample_codes.items())
149
-
150
  codes = []
151
  try:
152
  with pdfplumber.open(filepath) as pdf:
153
- for page_num, page in enumerate(pdf.pages):
154
- try:
155
- text = page.extract_text()
156
- if not text:
157
- continue
158
-
159
- matches = re.findall(r'\"(\d{8})\"\s*,\s*\"(.*?)\"', text, re.DOTALL)
160
- for code, desc in matches:
161
- clean_desc = desc.replace('\n', ' ').strip()
162
- if code and clean_desc:
163
- codes.append({'code': code, 'description': clean_desc})
164
- HS_CODE_DESCRIPTIONS[clean_desc] = code
165
-
166
- except Exception as e:
167
- log.warning(f"Error processing page {page_num + 1}: {e}")
168
- continue
169
-
170
  except Exception as e:
171
- log.error(f"Failed to parse PDF '{filepath}': {e}")
172
-
173
- log.info(f"Successfully parsed {len(codes)} HS codes from PDF.")
174
  return codes
175
 
176
- def load_existing_products():
177
- """Load existing products from database and CSV reference."""
 
 
 
178
  try:
179
- # Load from database
180
- with app.app_context():
181
- db_products = [p.name for p in Product.query.all()]
182
- log.info(f"Loaded {len(db_products)} products from database")
183
- return db_products
184
  except Exception as e:
185
- log.error(f"Failed to load products from database: {e}")
186
  return []
187
 
188
  # ───────────────────────────────────────────────────────────────────────────────
@@ -190,178 +114,82 @@ def load_existing_products():
190
  # ───────────────────────────────────────────────────────────────────────────────
191
 
192
  def process_uploaded_file(filepath, filename):
193
- """Process uploaded file with comprehensive error handling and validation."""
194
  log.info(f"Starting processing for file: {filepath}")
195
  results = {
196
  "processed": 0, "added": 0, "updated": 0, "skipped_duplicates": 0,
197
  "errors": [], "processed_data": []
198
  }
199
-
200
- # Verify database is accessible
201
- try:
202
- with app.app_context():
203
- db.engine.execute('SELECT 1')
204
- except Exception as e:
205
- results['errors'].append(f"Database connection failed: {str(e)}")
206
- return results
207
-
208
  try:
209
- # Read file with robust error handling
210
- df = None
211
- file_ext = filename.rsplit('.', 1)[1].lower() if '.' in filename else ''
212
-
213
- # Try different encodings for CSV files
214
  if file_ext == 'csv':
215
- for encoding in ['utf-8', 'latin-1', 'cp1252']:
216
- try:
217
- df = pd.read_csv(filepath, encoding=encoding, header=None)
218
- log.info(f"Successfully read CSV with {encoding} encoding")
219
- break
220
- except (UnicodeDecodeError, ValueError):
221
- continue
222
- except Exception as e:
223
- log.warning(f"Error reading CSV with {encoding}: {e}")
224
- continue
225
  elif file_ext in ['xls', 'xlsx']:
226
- try:
227
- df = pd.read_excel(filepath, header=None, engine='openpyxl')
228
- log.info("Successfully read Excel file")
229
- except Exception as e:
230
- log.error(f"Error reading Excel file: {e}")
231
-
232
- if df is None or df.empty:
233
- results['errors'].append("Could not read the uploaded file or file is empty")
234
- return results
235
-
236
- # Determine the correct column for product names
237
- # Based on your CSV structure, product names are in column 1 (second column)
238
- product_column_idx = 1
239
-
240
- if len(df.columns) < 2:
241
- results['errors'].append("File must have at least 2 columns. Product names should be in the second column.")
242
- return results
243
-
244
- log.info(f"Processing {len(df)} rows from uploaded file")
245
- log.info(f"Using column index {product_column_idx} for product names")
246
-
247
- # Load current existing products for fuzzy matching
248
- current_existing = load_existing_products()
249
-
250
- for index, row in df.iterrows():
251
- try:
252
- raw_name = row.iloc[product_column_idx] if len(row) > product_column_idx else None
253
- results['processed'] += 1
254
-
255
- if pd.isna(raw_name) or not str(raw_name).strip():
256
- log.debug(f"Skipping empty product name at row {index + 1}")
257
- continue
258
-
259
- cleaned_name = str(raw_name).strip()
260
-
261
- # Skip if name is too short or invalid
262
- if len(cleaned_name) < 3:
263
- log.debug(f"Skipping too short product name: '{cleaned_name}'")
264
- continue
265
-
266
- # Fuzzy matching with existing products
267
- best_match, score = (cleaned_name, 100)
268
- if current_existing:
269
- try:
270
- match_result = process.extractOne(
271
- cleaned_name, current_existing, scorer=fuzz.token_sort_ratio
272
- )
273
- if match_result:
274
- best_match, score = match_result
275
- except Exception as e:
276
- log.warning(f"Fuzzy matching failed for '{cleaned_name}': {e}")
277
-
278
- validated_name = best_match if score >= FUZZY_MATCH_THRESHOLD else cleaned_name
279
-
280
- # HS Code matching
281
- best_hs_desc = None
282
- hs_code = None
283
- if HS_CODE_DESCRIPTIONS:
284
- try:
285
- hs_match = process.extractOne(
286
- validated_name, list(HS_CODE_DESCRIPTIONS.keys())
287
- )
288
- if hs_match:
289
- best_hs_desc, _ = hs_match
290
- hs_code = HS_CODE_DESCRIPTIONS.get(best_hs_desc)
291
- except Exception as e:
292
- log.warning(f"HS code matching failed for '{validated_name}': {e}")
293
-
294
- processed_entry = {
295
- "raw_name": str(raw_name),
296
- "cleaned_name": validated_name,
297
- "hs_code": hs_code,
298
- "primary_category": best_hs_desc or "N/A",
299
- "status": ""
300
- }
301
-
302
- # Database operations with comprehensive error handling
303
- try:
304
- with app.app_context():
305
- existing_product = Product.query.filter_by(name=validated_name).first()
306
-
307
- if existing_product:
308
- # Update existing product if we have new information
309
- updated = False
310
- if hs_code and existing_product.hs_code != hs_code:
311
- existing_product.hs_code = hs_code
312
- updated = True
313
- if best_hs_desc and existing_product.primary_category != best_hs_desc:
314
- existing_product.primary_category = best_hs_desc
315
- updated = True
316
-
317
- if updated:
318
- db.session.commit()
319
- results['updated'] += 1
320
- processed_entry['status'] = 'Updated'
321
- log.debug(f"Updated product: {validated_name}")
322
- else:
323
- results['skipped_duplicates'] += 1
324
- processed_entry['status'] = 'Skipped (Duplicate)'
325
- log.debug(f"Skipped duplicate: {validated_name}")
326
- else:
327
- # Add new product
328
- new_product = Product(
329
- name=validated_name,
330
- hs_code=hs_code,
331
- primary_category=best_hs_desc or 'N/A'
332
- )
333
- db.session.add(new_product)
334
- db.session.commit()
335
- results['added'] += 1
336
- processed_entry['status'] = 'Added'
337
- current_existing.append(validated_name) # Update local cache
338
- log.debug(f"Added new product: {validated_name}")
339
-
340
- results['processed_data'].append(processed_entry)
341
-
342
- except Exception as e:
343
- try:
344
- db.session.rollback()
345
- except:
346
- pass
347
- error_msg = f"Database error for '{validated_name}': {str(e)}"
348
- log.error(error_msg)
349
- results['errors'].append(error_msg)
350
- processed_entry['status'] = 'Error'
351
- results['processed_data'].append(processed_entry)
352
-
353
- except Exception as e:
354
- error_msg = f"Error processing row {index + 1}: {str(e)}"
355
- log.error(error_msg)
356
- results['errors'].append(error_msg)
357
- continue
358
-
359
  except Exception as e:
360
- error_msg = f"Critical error processing file: {str(e)}"
361
- log.error(error_msg)
362
- results['errors'].append(error_msg)
363
-
364
- log.info(f"Processing complete. Added: {results['added']}, Updated: {results['updated']}, Skipped: {results['skipped_duplicates']}, Errors: {len(results['errors'])}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  return results
366
 
367
  # ───────────────────────────────────────────────────────────────────────────────
@@ -373,191 +201,49 @@ def allowed_file(filename):
373
 
374
  @app.get("/")
375
  def root():
376
- try:
377
- # Test database connection
378
- with app.app_context():
379
- product_count = Product.query.count()
380
-
381
- return jsonify({
382
- "ok": True,
383
- "message": "The Product Validation server is running.",
384
- "database": "in-memory SQLite",
385
- "products_count": product_count,
386
- "upload_folder": app.config['UPLOAD_FOLDER']
387
- })
388
- except Exception as e:
389
- return jsonify({
390
- "ok": False,
391
- "message": "Server running but database error",
392
- "error": str(e)
393
- }), 500
394
-
395
- @app.get("/api/health")
396
- def health_check():
397
- """Comprehensive health check endpoint."""
398
- try:
399
- with app.app_context():
400
- # Test database connection and get counts
401
- product_count = Product.query.count()
402
-
403
- # Test a simple database operation
404
- test_product = Product.query.first()
405
-
406
- return jsonify({
407
- "ok": True,
408
- "database": "connected",
409
- "database_type": "in-memory SQLite",
410
- "products_count": product_count,
411
- "has_sample_data": test_product is not None,
412
- "upload_folder_exists": os.path.exists(app.config['UPLOAD_FOLDER'])
413
- })
414
- except Exception as e:
415
- log.error(f"Health check failed: {e}")
416
- return jsonify({
417
- "ok": False,
418
- "database": "disconnected",
419
- "error": str(e)
420
- }), 500
421
 
422
  @app.post("/api/upload")
423
  def upload_products():
424
- """Upload and process product file with comprehensive validation."""
425
  if 'file' not in request.files:
426
  return jsonify({"ok": False, "error": "No file part in the request"}), 400
427
-
428
  file = request.files['file']
429
  if file.filename == '':
430
  return jsonify({"ok": False, "error": "No file selected"}), 400
431
 
432
- if not allowed_file(file.filename):
433
- return jsonify({
434
- "ok": False,
435
- "error": f"Invalid file type. Allowed types are: {', '.join(ALLOWED_EXTENSIONS)}"
436
- }), 400
437
-
438
- # Verify database is accessible before processing
439
- try:
440
- with app.app_context():
441
- db.engine.execute('SELECT 1')
442
- except Exception as e:
443
- return jsonify({
444
- "ok": False,
445
- "error": f"Database not available: {str(e)}"
446
- }), 500
447
-
448
- try:
449
  filename = secure_filename(file.filename)
450
  filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
451
  file.save(filepath)
452
- log.info(f"File saved to: {filepath}")
453
-
454
- # Process the uploaded file
455
  results = process_uploaded_file(filepath, filename)
456
-
457
- # Clean up uploaded file
458
- try:
459
- os.remove(filepath)
460
- log.debug(f"Cleaned up file: {filepath}")
461
- except Exception as e:
462
- log.warning(f"Failed to clean up file {filepath}: {e}")
463
-
464
- # Return results with proper error status
465
- if results['errors'] and not (results['added'] or results['updated']):
466
- # If we have errors and no successful operations, return error status
467
- return jsonify({
468
- "ok": False,
469
- "message": "File processing failed",
470
- "results": results
471
- }), 400
472
- else:
473
- # Return success even if there were some errors, as long as some operations succeeded
474
- return jsonify({
475
- "ok": True,
476
- "message": "File processed successfully",
477
- "results": results
478
- })
479
-
480
- except Exception as e:
481
- error_msg = f"Upload processing failed: {str(e)}"
482
- log.error(error_msg)
483
- return jsonify({
484
- "ok": False,
485
- "error": error_msg
486
- }), 500
487
 
488
  @app.get("/api/products")
489
  def get_products():
490
- """Get all products from database."""
491
  log.info("Request received to fetch all products.")
492
  try:
493
- with app.app_context():
494
- all_products = Product.query.all()
495
- products_list = [product.to_dict() for product in all_products]
496
-
497
  log.info(f"Successfully retrieved {len(products_list)} products.")
498
- return jsonify({
499
- "ok": True,
500
- "count": len(products_list),
501
- "products": products_list
502
- })
503
- except Exception as e:
504
- error_msg = f"Failed to retrieve products: {str(e)}"
505
- log.error(error_msg)
506
- return jsonify({
507
- "ok": False,
508
- "error": error_msg
509
- }), 500
510
-
511
- @app.delete("/api/products")
512
- def clear_products():
513
- """Clear all products from database (useful for testing)."""
514
- try:
515
- with app.app_context():
516
- count = Product.query.count()
517
- Product.query.delete()
518
- db.session.commit()
519
-
520
- log.info(f"Cleared {count} products from database")
521
- return jsonify({
522
- "ok": True,
523
- "message": f"Cleared {count} products from database"
524
- })
525
  except Exception as e:
526
- try:
527
- db.session.rollback()
528
- except:
529
- pass
530
- error_msg = f"Failed to clear products: {str(e)}"
531
- log.error(error_msg)
532
- return jsonify({
533
- "ok": False,
534
- "error": error_msg
535
- }), 500
536
 
537
  # ───────────────────────────────────────────────────────────────────────────────
538
  # MAIN (Server Initialization)
539
  # ───────────────────────────────────────────────────────────────────────────────
540
 
541
  if __name__ == "__main__":
542
- log.info("===== Application Startup =====")
543
-
544
- # Initialize database
545
- if not initialize_database():
546
- log.error("Failed to initialize database. Exiting.")
547
- exit(1)
548
-
549
- # Load supporting data (non-critical)
550
- try:
551
- log.info("Loading supporting data...")
552
  HS_CODES_DATA = parse_hs_codes_pdf()
553
  EXISTING_PRODUCT_NAMES = load_existing_products()
554
- log.info("Supporting data loaded successfully")
555
- except Exception as e:
556
- log.warning(f"Failed to load supporting data: {e}")
557
- log.info("Server will continue with basic functionality")
558
-
559
- log.info("Server initialization complete")
560
- log.info("Using in-memory SQLite database (data will not persist between restarts)")
561
- log.info("This is optimized for Hugging Face Spaces deployment")
562
-
563
- port = int(os.environ.get("PORT", "7860"))
 
10
  from sqlalchemy.exc import IntegrityError
11
  from thefuzz import process, fuzz
12
  from werkzeug.utils import secure_filename
 
 
13
 
14
  # ───────────────────────────────────────────────────────────────────────────────
15
  # CONFIGURATION
 
21
  app = Flask(__name__)
22
  CORS(app)
23
 
24
+ # --- App Configuration ---
25
+ # --- FIX 1: Switched to a persistent file-based SQLite database ---
26
+ # This ensures data survives between requests on Hugging Face Spaces.
27
+ DB_FOLDER = 'data'
28
+ DB_PATH = os.path.join(DB_FOLDER, 'products.db')
29
+ os.makedirs(DB_FOLDER, exist_ok=True)
30
+ app.config['SQLALCHEMY_DATABASE_URI'] = f'sqlite:///{DB_PATH}'
31
  app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
32
+ app.config['UPLOAD_FOLDER'] = 'uploads'
33
+ os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  # --- File Upload Configuration ---
36
  ALLOWED_EXTENSIONS = {'csv', 'xls', 'xlsx'}
37
 
 
38
  db = SQLAlchemy(app)
39
 
40
  # ───────────────────────────────────────────────────────────────────────────────
41
+ # DATABASE MODEL (Based on products-20.sql)
42
  # ───────────────────────────────────────────────────────────────────────────────
43
 
44
  class Product(db.Model):
 
72
  EXISTING_PRODUCT_NAMES = []
73
  HS_CODE_DESCRIPTIONS = {}
74
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
  def parse_hs_codes_pdf(filepath='HS Codes for use under FDMS.pdf'):
76
+ log.info(f"Parsing HS Codes from '{filepath}'...")
 
77
  if not os.path.exists(filepath):
78
+ log.error(f"HS Code PDF not found at '{filepath}'. Categorization will fail.")
79
+ return []
 
 
 
 
 
 
 
 
 
 
 
80
  codes = []
81
  try:
82
  with pdfplumber.open(filepath) as pdf:
83
+ for page in pdf.pages:
84
+ text = page.extract_text()
85
+ # Improved regex to handle variations in PDF formatting
86
+ matches = re.findall(r'\"(\d{8})\"\s*,\s*\"(.*?)\"', text, re.DOTALL)
87
+ for code, desc in matches:
88
+ clean_desc = desc.replace('\n', ' ').strip()
89
+ if code and clean_desc:
90
+ codes.append({'code': code, 'description': clean_desc})
91
+ HS_CODE_DESCRIPTIONS[clean_desc] = code
 
 
 
 
 
 
 
 
92
  except Exception as e:
93
+ log.error(f"Failed to parse PDF: {e}")
94
+ log.info(f"Successfully parsed {len(codes)} HS codes.")
 
95
  return codes
96
 
97
+ def load_existing_products(filepath='Product List.csv'):
98
+ log.info(f"Loading master product list from '{filepath}'...")
99
+ if not os.path.exists(filepath):
100
+ log.error(f"Master product list not found at '{filepath}'. Validation may be inaccurate.")
101
+ return []
102
  try:
103
+ # Based on the CSV structure, the 'name' is in the second column.
104
+ df = pd.read_csv(filepath, usecols=[1], names=['name'], header=0)
105
+ product_names = df['name'].dropna().unique().tolist()
106
+ log.info(f"Loaded {len(product_names)} unique existing products.")
107
+ return product_names
108
  except Exception as e:
109
+ log.error(f"Failed to load master product list: {e}")
110
  return []
111
 
112
  # ───────────────────────────────────────────────────────────────────────────────
 
114
  # ───────────────────────────────────────────────────────────────────────────────
115
 
116
  def process_uploaded_file(filepath, filename):
117
+ """The main pipeline to validate, clean, categorize, and store product data."""
118
  log.info(f"Starting processing for file: {filepath}")
119
  results = {
120
  "processed": 0, "added": 0, "updated": 0, "skipped_duplicates": 0,
121
  "errors": [], "processed_data": []
122
  }
123
+ df = None
124
+
 
 
 
 
 
 
 
125
  try:
126
+ file_ext = filename.rsplit('.', 1)[1].lower()
127
+ # --- FIX 2: Robustly parse the second column (index 1) for names ---
128
+ # The user's uploaded `list.csv` clearly has the product name in the second column.
 
 
129
  if file_ext == 'csv':
130
+ df = pd.read_csv(filepath, header=None, usecols=[1], names=['product_name'])
 
 
 
 
 
 
 
 
 
131
  elif file_ext in ['xls', 'xlsx']:
132
+ df = pd.read_excel(filepath, header=None, usecols=[1], names=['product_name'], engine='openpyxl')
133
+ except ValueError:
134
+ results['errors'].append("Could not find the product name column. Ensure the product name is in the second column.")
135
+ return results
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  except Exception as e:
137
+ log.error(f"Could not read the uploaded file: {e}")
138
+ results['errors'].append(f"Invalid file format or corrupt file: {e}")
139
+ return results
140
+
141
+ if df.empty:
142
+ results['errors'].append("The uploaded file is empty.")
143
+ return results
144
+
145
+ for index, row in df.iterrows():
146
+ raw_name = row['product_name']
147
+ results['processed'] += 1
148
+
149
+ if not isinstance(raw_name, str) or not raw_name.strip():
150
+ continue
151
+
152
+ cleaned_name = raw_name.strip()
153
+
154
+ best_match, score = process.extractOne(
155
+ cleaned_name, EXISTING_PRODUCT_NAMES, scorer=fuzz.token_sort_ratio
156
+ ) if EXISTING_PRODUCT_NAMES else (cleaned_name, 100)
157
+ validated_name = best_match if score >= FUZZY_MATCH_THRESHOLD else cleaned_name
158
+
159
+ best_hs_desc, _ = process.extractOne(
160
+ validated_name, HS_CODE_DESCRIPTIONS.keys()
161
+ ) if HS_CODE_DESCRIPTIONS else (None, 0)
162
+ hs_code = HS_CODE_DESCRIPTIONS.get(best_hs_desc)
163
+
164
+ processed_entry = {
165
+ "raw_name": raw_name, "cleaned_name": validated_name, "hs_code": hs_code,
166
+ "primary_category": best_hs_desc or "N/A", "status": ""
167
+ }
168
+ try:
169
+ # Each operation needs its own app context to interact with the database
170
+ with app.app_context():
171
+ existing_product = Product.query.filter_by(name=validated_name).first()
172
+ if existing_product:
173
+ if hs_code and existing_product.hs_code != hs_code:
174
+ existing_product.hs_code = hs_code
175
+ existing_product.primary_category = best_hs_desc
176
+ db.session.commit()
177
+ results['updated'] += 1
178
+ processed_entry['status'] = 'Updated'
179
+ else:
180
+ results['skipped_duplicates'] += 1
181
+ processed_entry['status'] = 'Skipped (Duplicate)'
182
+ else:
183
+ new_product = Product(name=validated_name, hs_code=hs_code, primary_category=best_hs_desc or 'N/A')
184
+ db.session.add(new_product)
185
+ db.session.commit()
186
+ results['added'] += 1
187
+ processed_entry['status'] = 'Added'
188
+ results['processed_data'].append(processed_entry)
189
+ except Exception as e:
190
+ db.session.rollback()
191
+ log.error(f"Database error for '{validated_name}': {e}")
192
+ results['errors'].append(f"DB Error on '{validated_name}': {e}")
193
  return results
194
 
195
  # ───────────────────────────────────────────────────────────────────────────────
 
201
 
202
  @app.get("/")
203
  def root():
204
+ return jsonify({"ok": True, "message": "The Product Validation server is running."})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
206
  @app.post("/api/upload")
207
  def upload_products():
 
208
  if 'file' not in request.files:
209
  return jsonify({"ok": False, "error": "No file part in the request"}), 400
 
210
  file = request.files['file']
211
  if file.filename == '':
212
  return jsonify({"ok": False, "error": "No file selected"}), 400
213
 
214
+ if file and allowed_file(file.filename):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  filename = secure_filename(file.filename)
216
  filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
217
  file.save(filepath)
 
 
 
218
  results = process_uploaded_file(filepath, filename)
219
+ return jsonify({"ok": True, "message": "File processed successfully", "results": results})
220
+
221
+ return jsonify({"ok": False, "error": f"Invalid file type. Allowed types are: {', '.join(ALLOWED_EXTENSIONS)}"}), 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
 
223
  @app.get("/api/products")
224
  def get_products():
 
225
  log.info("Request received to fetch all products.")
226
  try:
227
+ all_products = Product.query.all()
228
+ products_list = [product.to_dict() for product in all_products]
 
 
229
  log.info(f"Successfully retrieved {len(products_list)} products.")
230
+ return jsonify({"ok": True, "count": len(products_list), "products": products_list})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  except Exception as e:
232
+ log.error(f"Could not retrieve products from database: {e}")
233
+ return jsonify({"ok": False, "error": "Failed to retrieve products from the database."}), 500
 
 
 
 
 
 
 
 
234
 
235
  # ───────────────────────────────────────────────────────────────────────────────
236
  # MAIN (Server Initialization)
237
  # ───────────────────────────────────────────────────────────────────────────────
238
 
239
  if __name__ == "__main__":
240
+ with app.app_context():
241
+ log.info("Initializing server...")
242
+ db.create_all()
 
 
 
 
 
 
 
243
  HS_CODES_DATA = parse_hs_codes_pdf()
244
  EXISTING_PRODUCT_NAMES = load_existing_products()
245
+ log.info(f"Server is ready. Database is at: {DB_PATH}")
246
+
247
+ port = int(os.environ.get("PORT", "7860"))
248
+ app.run(host="0.0.0.0", port=port, debug=False)
249
+