# """ # 🎯 COMPLETE API SERVER - Matches Cross-Store Training System # ============================================================= # ✅ Works with cross-store synonyms (washing machine = laundry machine) # ✅ Uses auto-tags from training # ✅ Single model (fast predictions) # ✅ Guaranteed category_id match # ✅ Real-time classification # """ # from flask import Flask, request, jsonify, render_template_string # from sentence_transformers import SentenceTransformer # import faiss # import pickle # import numpy as np # from pathlib import Path # import time # import re # app = Flask(__name__) # # ============================================================================ # # GLOBAL VARIABLES # # ============================================================================ # CACHE_DIR = Path('cache') # # Model # encoder = None # faiss_index = None # metadata = [] # cross_store_synonyms = {} # # ============================================================================ # # CROSS-STORE SYNONYM DATABASE (Same as training) # # ============================================================================ # def build_cross_store_synonyms(): # """Build cross-store synonym database""" # synonyms = { # # Appliances # 'washing machine': {'laundry machine', 'washer', 'clothes washer', 'washing appliance'}, # 'laundry machine': {'washing machine', 'washer', 'clothes washer'}, # 'dryer': {'drying machine', 'clothes dryer', 'tumble dryer'}, # 'refrigerator': {'fridge', 'cooler', 'ice box', 'cooling appliance'}, # 'dishwasher': {'dish washer', 'dish cleaning machine'}, # 'microwave': {'microwave oven', 'micro wave'}, # 'vacuum': {'vacuum cleaner', 'hoover', 'vac'}, # # Electronics # 'tv': {'television', 'telly', 'smart tv', 'display'}, # 'laptop': {'notebook', 'portable computer', 'laptop computer'}, # 'mobile': {'phone', 'cell phone', 'smartphone', 'cellphone'}, # 'tablet': {'ipad', 'tab', 'tablet computer'}, # 'headphones': {'headset', 'earphones', 'earbuds', 'ear buds'}, # 'speaker': {'audio speaker', 'sound system', 'speakers'}, # # Furniture # 'sofa': {'couch', 'settee', 'divan'}, # 'wardrobe': {'closet', 'armoire', 'cupboard'}, # 'drawer': {'chest of drawers', 'dresser'}, # # Clothing # 'pants': {'trousers', 'slacks', 'bottoms'}, # 'sweater': {'jumper', 'pullover', 'sweatshirt'}, # 'sneakers': {'trainers', 'tennis shoes', 'running shoes'}, # 'jacket': {'coat', 'blazer', 'outerwear'}, # # Kitchen # 'cooker': {'stove', 'range', 'cooking range'}, # 'blender': {'mixer', 'food processor', 'liquidizer'}, # 'kettle': {'electric kettle', 'water boiler'}, # # Baby/Kids # 'stroller': {'pram', 'pushchair', 'buggy', 'baby carriage'}, # 'diaper': {'nappy', 'nappies'}, # 'pacifier': {'dummy', 'soother'}, # # Tools # 'wrench': {'spanner', 'adjustable wrench'}, # 'flashlight': {'torch', 'flash light'}, # 'screwdriver': {'screw driver'}, # # Home # 'tap': {'faucet', 'water tap'}, # 'bin': {'trash can', 'garbage can', 'waste bin'}, # 'curtain': {'drape', 'window covering'}, # # Crafts/Office # 'guillotine': {'paper cutter', 'paper trimmer', 'blade cutter'}, # 'trimmer': {'cutter', 'cutting tool', 'edge cutter'}, # 'stapler': {'stapling machine', 'staple gun'}, # # Books/Media # 'magazine': {'periodical', 'journal', 'publication'}, # 'comic': {'comic book', 'graphic novel', 'manga'}, # 'ebook': {'e-book', 'digital book', 'electronic book'}, # # General # 'kids': {'children', 'child', 'childrens', 'youth', 'junior'}, # 'women': {'womens', 'ladies', 'female', 'lady'}, # 'men': {'mens', 'male', 'gentleman'}, # 'baby': {'infant', 'newborn', 'toddler'}, # } # # Build bidirectional mapping # expanded = {} # for term, syns in synonyms.items(): # expanded[term] = syns.copy() # for syn in syns: # if syn not in expanded: # expanded[syn] = set() # expanded[syn].add(term) # expanded[syn].update(syns - {syn}) # return expanded # # ============================================================================ # # HELPER FUNCTIONS # # ============================================================================ # def clean_text(text): # """Clean and normalize text""" # if not text: # return "" # text = str(text).lower() # text = re.sub(r'[^\w\s-]', ' ', text) # text = re.sub(r'\s+', ' ', text).strip() # return text # def extract_cross_store_terms(text): # """Extract terms with cross-store variations""" # cleaned = clean_text(text) # words = cleaned.split() # all_terms = set() # all_terms.add(cleaned) # Full text # # Single words # for word in words: # if len(word) > 2: # all_terms.add(word) # # Add cross-store synonyms # if word in cross_store_synonyms: # all_terms.update(cross_store_synonyms[word]) # # 2-word phrases # for i in range(len(words) - 1): # if len(words[i]) > 2 and len(words[i+1]) > 2: # phrase = f"{words[i]} {words[i+1]}" # all_terms.add(phrase) # if phrase in cross_store_synonyms: # all_terms.update(cross_store_synonyms[phrase]) # # 3-word phrases # if len(words) >= 3: # for i in range(len(words) - 2): # if all(len(w) > 2 for w in words[i:i+3]): # phrase = f"{words[i]} {words[i+1]} {words[i+2]}" # all_terms.add(phrase) # return list(all_terms) # def build_enhanced_query(title, description=""): # """Build enhanced query with cross-store intelligence""" # # Extract terms with variations # all_terms = extract_cross_store_terms(f"{title} {description}") # # Clean product terms # product_terms = [t for t in clean_text(f"{title} {description}").split() if len(t) > 2] # # Build query # # Emphasize original + all variations # product_text = ' '.join(product_terms) # variations_text = ' '.join(all_terms[:30]) # Top 30 variations # # Repeat for emphasis # emphasized = ' '.join([product_text] * 3) # query = f"{emphasized} {variations_text} {title} {description}" # return query, all_terms[:20] # def encode_query(text): # """Encode query using the trained model""" # embedding = encoder.encode( # text, # convert_to_numpy=True, # normalize_embeddings=True # ) # if embedding.ndim == 1: # embedding = embedding.reshape(1, -1) # return embedding.astype('float32') # def classify_product(title, description="", top_k=5): # """ # Classify product using trained system # Returns: category_id, category_path, confidence, and alternatives # """ # start_time = time.time() # # Step 1: Build enhanced query with cross-store synonyms # query, matched_terms = build_enhanced_query(title, description) # # Step 2: Encode query # query_embedding = encode_query(query) # # Step 3: Search FAISS index # distances, indices = faiss_index.search(query_embedding, top_k) # # Step 4: Get results # results = [] # for i in range(len(indices[0])): # idx = indices[0][i] # if idx < len(metadata): # meta = metadata[idx] # confidence = float(distances[0][i]) * 100 # # Get final product name # levels = meta.get('levels', []) # final_product = levels[-1] if levels else meta['category_path'].split('/')[-1] # results.append({ # 'rank': i + 1, # 'category_id': meta['category_id'], # 'category_path': meta['category_path'], # 'final_product': final_product, # 'confidence': round(confidence, 2), # 'depth': meta.get('depth', 0) # }) # # Best result # best = results[0] if results else None # if not best: # return { # 'error': 'No results found', # 'product': title # } # # Confidence level # conf_pct = best['confidence'] # if conf_pct >= 90: # conf_level = "EXCELLENT" # elif conf_pct >= 85: # conf_level = "VERY HIGH" # elif conf_pct >= 80: # conf_level = "HIGH" # elif conf_pct >= 75: # conf_level = "GOOD" # elif conf_pct >= 70: # conf_level = "MEDIUM" # else: # conf_level = "LOW" # processing_time = (time.time() - start_time) * 1000 # return { # 'product': title, # 'category_id': best['category_id'], # 'category_path': best['category_path'], # 'final_product': best['final_product'], # 'confidence': f"{conf_level} ({conf_pct:.2f}%)", # 'confidence_percent': conf_pct, # 'depth': best['depth'], # 'matched_terms': matched_terms, # 'top_5_results': results, # 'processing_time_ms': round(processing_time, 2) # } # # ============================================================================ # # SERVER INITIALIZATION # # ============================================================================ # def load_server(): # """Load all trained data""" # global encoder, faiss_index, metadata, cross_store_synonyms # print("\n" + "="*80) # print("🔄 LOADING TRAINED MODEL") # print("="*80 + "\n") # # Load model # print("📥 Loading sentence transformer...") # encoder = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') # print("✅ Model loaded\n") # # Load FAISS index # print("📥 Loading FAISS index...") # index_path = CACHE_DIR / 'main_index.faiss' # if not index_path.exists(): # raise FileNotFoundError(f"FAISS index not found: {index_path}\nPlease run training first!") # faiss_index = faiss.read_index(str(index_path)) # print(f"✅ Index loaded ({faiss_index.ntotal:,} vectors)\n") # # Load metadata # print("📥 Loading metadata...") # meta_path = CACHE_DIR / 'metadata.pkl' # if not meta_path.exists(): # raise FileNotFoundError(f"Metadata not found: {meta_path}\nPlease run training first!") # with open(meta_path, 'rb') as f: # metadata = pickle.load(f) # print(f"✅ Metadata loaded ({len(metadata):,} categories)\n") # # Load cross-store synonyms # print("📥 Loading cross-store synonyms...") # syn_path = CACHE_DIR / 'cross_store_synonyms.pkl' # if syn_path.exists(): # with open(syn_path, 'rb') as f: # cross_store_synonyms = pickle.load(f) # print(f"✅ Cross-store synonyms loaded ({len(cross_store_synonyms)} terms)\n") # else: # print("⚠️ Cross-store synonyms not found, building default set...") # cross_store_synonyms = build_cross_store_synonyms() # print(f"✅ Built {len(cross_store_synonyms)} synonym mappings\n") # print("="*80) # print("✅ SERVER READY!") # print("="*80 + "\n") # # ============================================================================ # # HTML INTERFACE # # ============================================================================ # HTML_TEMPLATE = """ # # # # 🎯 Product Category Classifier # # # # # #
#
#

🎯 Product Category Classifier

#
Cross-Store Intelligence
#
Auto-Tag Support
#
Real-Time
#
#
#
# ✅ Cross-Store Synonyms Active!
# Understands: washing machine = laundry machine | tv = television | kids = children #
#
# # #
#
# # #
# #
#
#

Analyzing...

#
#
#
#

✅ Best Match

#
#
# Product: #
#
# Category ID: # #
#
# Final Product: #
#
# Full Path:
# #
#
# Confidence: # #
#
# Depth: levels | # Time: ms #
#
#
#
#

🔗 Matched Terms (Cross-Store Variations)

#
#
#
#

📋 Top 5 Alternative Matches

#
#
#
#
#
# # # # """ # # ============================================================================ # # FLASK ROUTES # # ============================================================================ # @app.route('/') # def index(): # """Serve the web interface""" # return render_template_string(HTML_TEMPLATE) # @app.route('/classify', methods=['POST']) # def classify_route(): # """API endpoint for classification""" # data = request.json # title = data.get('title', '').strip() # description = data.get('description', '').strip() # if not title: # return jsonify({'error': 'Title required'}), 400 # try: # result = classify_product(title, description) # return jsonify(result) # except Exception as e: # print(f"Error: {e}") # return jsonify({'error': str(e)}), 500 # @app.route('/health') # def health(): # """Health check endpoint""" # return jsonify({ # 'status': 'healthy', # 'categories': len(metadata), # 'cross_store_synonyms': len(cross_store_synonyms), # 'model': 'all-mpnet-base-v2' # }) # # ============================================================================ # # MAIN # # ============================================================================ # if __name__ == '__main__': # try: # load_server() # print("\n🌐 Server starting...") # print(" URL: http://localhost:5000") # print(" Press CTRL+C to stop\n") # app.run(host='0.0.0.0', port=5000, debug=False) # except FileNotFoundError as e: # print(f"\n❌ ERROR: {e}") # print("\n💡 Solution: Run training first:") # print(" python train.py data/category_id_path_only.csv\n") # except Exception as e: # print(f"\n❌ UNEXPECTED ERROR: {e}\n") #!/usr/bin/env python3 """ API Server for product category classification Merged UI + classification logic Model: intfloat/e5-base-v2 (must match training) Usage: python api_server.py Requirements: pip install flask sentence-transformers faiss-cpu numpy pickle5 Files expected in cache/: - main_index.faiss - metadata.pkl - cross_store_synonyms.pkl (optional) """ from flask import Flask, request, jsonify, render_template_string from sentence_transformers import SentenceTransformer import faiss import pickle import numpy as np from pathlib import Path import time import re import os from typing import List # ============================================================================ # CONFIG # ============================================================================ CACHE_DIR = Path('cache') MODEL_NAME = 'intfloat/e5-base-v2' # <-- MUST match the model used during training FAISS_INDEX_PATH = CACHE_DIR / 'main_index.faiss' METADATA_PATH = CACHE_DIR / 'metadata.pkl' SYN_PATH = CACHE_DIR / 'cross_store_synonyms.pkl' # Server globals encoder = None faiss_index = None metadata = [] cross_store_synonyms = {} # ============================================================================ # CROSS-STORE SYNONYM FALLBACK # ============================================================================ def build_cross_store_synonyms(): """Default cross-store synonyms fallback (bidirectional mapping). If you have a trained cross_store_synonyms.pkl produced by training, the server will load that file instead. This function only used when no file exists in the cache. """ synonyms = { 'washing machine': {'laundry machine', 'washer', 'clothes washer', 'washing appliance'}, 'laundry machine': {'washing machine', 'washer', 'clothes washer'}, 'dryer': {'drying machine', 'clothes dryer', 'tumble dryer'}, 'refrigerator': {'fridge', 'cooler', 'ice box', 'cooling appliance'}, 'dishwasher': {'dish washer', 'dish cleaning machine'}, 'microwave': {'microwave oven', 'micro wave'}, 'vacuum': {'vacuum cleaner', 'hoover', 'vac'}, 'tv': {'television', 'telly', 'smart tv', 'display'}, 'laptop': {'notebook', 'portable computer', 'laptop computer'}, 'mobile': {'phone', 'cell phone', 'smartphone', 'cellphone'}, 'tablet': {'ipad', 'tab', 'tablet computer'}, 'headphones': {'headset', 'earphones', 'earbuds', 'ear buds'}, 'speaker': {'audio speaker', 'sound system', 'speakers'}, 'sofa': {'couch', 'settee', 'divan'}, 'wardrobe': {'closet', 'armoire', 'cupboard'}, 'drawer': {'chest of drawers', 'dresser'}, 'pants': {'trousers', 'slacks', 'bottoms'}, 'sweater': {'jumper', 'pullover', 'sweatshirt'}, 'sneakers': {'trainers', 'tennis shoes', 'running shoes'}, 'jacket': {'coat', 'blazer', 'outerwear'}, 'cooker': {'stove', 'range', 'cooking range'}, 'blender': {'mixer', 'food processor', 'liquidizer'}, 'kettle': {'electric kettle', 'water boiler'}, 'stroller': {'pram', 'pushchair', 'buggy', 'baby carriage'}, 'diaper': {'nappy', 'nappies'}, 'pacifier': {'dummy', 'soother'}, 'wrench': {'spanner', 'adjustable wrench'}, 'flashlight': {'torch', 'flash light'}, 'screwdriver': {'screw driver'}, 'tap': {'faucet', 'water tap'}, 'bin': {'trash can', 'garbage can', 'waste bin'}, 'curtain': {'drape', 'window covering'}, 'guillotine': {'paper cutter', 'paper trimmer', 'blade cutter'}, 'trimmer': {'cutter', 'cutting tool', 'edge cutter'}, 'stapler': {'stapling machine', 'staple gun'}, 'magazine': {'periodical', 'journal', 'publication'}, 'comic': {'comic book', 'graphic novel', 'manga'}, 'ebook': {'e-book', 'digital book', 'electronic book'}, 'kids': {'children', 'child', 'childrens', 'youth', 'junior'}, 'women': {'womens', 'ladies', 'female', 'lady'}, 'men': {'mens', 'male', 'gentleman'}, 'baby': {'infant', 'newborn', 'toddler'}, } expanded = {} for term, syns in synonyms.items(): expanded[term] = set(syns) for syn in syns: if syn not in expanded: expanded[syn] = set() expanded[syn].add(term) expanded[syn].update(syns - {syn}) return expanded # ============================================================================ # TEXT CLEANING / QUERY BUILDING # ============================================================================ def clean_text(text: str) -> str: if not text: return "" text = str(text).lower() # keep alphanumerics, dashes and spaces text = re.sub(r"[^\w\s-]", " ", text) text = re.sub(r"\s+", " ", text).strip() return text def extract_cross_store_terms(text: str) -> List[str]: cleaned = clean_text(text) words = cleaned.split() all_terms = set() all_terms.add(cleaned) # full cleaned text # single words + synonyms for word in words: if len(word) > 2: all_terms.add(word) if word in cross_store_synonyms: all_terms.update(cross_store_synonyms[word]) # 2-word phrases for i in range(len(words) - 1): if len(words[i]) > 2 and len(words[i + 1]) > 2: phrase = f"{words[i]} {words[i+1]}" all_terms.add(phrase) if phrase in cross_store_synonyms: all_terms.update(cross_store_synonyms[phrase]) # 3-word phrases if len(words) >= 3: for i in range(len(words) - 2): if all(len(w) > 2 for w in words[i:i + 3]): phrase = f"{words[i]} {words[i+1]} {words[i+2]}" all_terms.add(phrase) return list(all_terms) def build_enhanced_query(title, description="", max_synonyms=10): """Build query emphasizing original title and cross-store variations""" title_clean = clean_text(title) description_clean = clean_text(description) # Extract cross-store variations synonyms_list = extract_cross_store_terms(f"{title_clean} {description_clean}") # Emphasize original title 3x, then include top synonyms enhanced_query = ' '.join([title_clean] * 3 + synonyms_list[:max_synonyms]) return enhanced_query, synonyms_list[:20] # return top 20 for matched_terms display # ============================================================================ # ENCODER / FAISS # ============================================================================ def encode_query(text: str) -> np.ndarray: embedding = encoder.encode(text, convert_to_numpy=True, normalize_embeddings=True) if embedding.ndim == 1: embedding = embedding.reshape(1, -1) return embedding.astype('float32') def classify_product(title, description="", top_k=5): """Classify product using e5-base embeddings with cross-store optimization""" start_time = time.time() # Step 1: Build enhanced query query_text, matched_terms = build_enhanced_query(title, description) # Step 2: Encode query query_embedding = encoder.encode( query_text, convert_to_numpy=True, normalize_embeddings=True ).astype('float32') if query_embedding.ndim == 1: query_embedding = query_embedding.reshape(1, -1) # Step 3: FAISS search distances, indices = faiss_index.search(query_embedding, top_k) results = [] for i, idx in enumerate(indices[0]): if idx >= len(metadata): continue meta = metadata[idx] # Convert FAISS distance to cosine similarity similarity = 1 - distances[0][i] confidence_pct = float(similarity) * 100 final_product = meta.get('levels', [])[-1] if meta.get('levels') else meta['category_path'].split('/')[-1] results.append({ 'rank': i + 1, 'category_id': meta['category_id'], 'category_path': meta['category_path'], 'final_product': final_product, 'confidence': round(confidence_pct, 2), 'depth': meta.get('depth', 0) }) if not results: return {'error': 'No results found', 'product': title} # Pick best match best = results[0] conf_pct = best['confidence'] if conf_pct >= 90: conf_level = "EXCELLENT" elif conf_pct >= 85: conf_level = "VERY HIGH" elif conf_pct >= 80: conf_level = "HIGH" elif conf_pct >= 75: conf_level = "GOOD" elif conf_pct >= 70: conf_level = "MEDIUM" else: conf_level = "LOW" processing_time = (time.time() - start_time) * 1000 return { 'product': title, 'category_id': best['category_id'], 'category_path': best['category_path'], 'final_product': best['final_product'], 'confidence': f"{conf_level} ({conf_pct:.2f}%)", 'confidence_percent': conf_pct, 'depth': best['depth'], 'matched_terms': matched_terms, 'top_5_results': results, 'processing_time_ms': round(processing_time, 2) } # FAISS returns squared L2 distances or inner product depending on index type. # We'll treat lower distance as better. We convert to a 0-100-ish confidence by # using a simple heuristic: score = 100 - normalized_distance*100 (clamped). # Determine a normalization constant: use mean of top distance if available flat_dist = distances[0] max_d = float(np.max(flat_dist)) if flat_dist.size else 1.0 min_d = float(np.min(flat_dist)) if flat_dist.size else 0.0 range_d = max(1e-6, max_d - min_d) for i, idx in enumerate(indices[0]): if idx < 0 or idx >= len(metadata): continue meta = metadata[idx] raw_d = float(distances[0][i]) # normalize and invert to make higher -> better norm = (raw_d - min_d) / range_d conf = max(0.0, min(100.0, 100.0 * (1.0 - norm))) levels = meta.get('levels') or [] final_product = levels[-1] if levels else meta.get('category_path', '').split('/')[-1] results.append({ 'rank': i + 1, 'category_id': meta.get('category_id'), 'category_path': meta.get('category_path'), 'final_product': final_product, 'confidence': round(conf, 2), 'depth': meta.get('depth', 0) }) if not results: return { 'error': 'No results found', 'product': title } best = results[0] conf_pct = best['confidence'] if conf_pct >= 90: conf_level = "EXCELLENT" elif conf_pct >= 85: conf_level = "VERY HIGH" elif conf_pct >= 80: conf_level = "HIGH" elif conf_pct >= 75: conf_level = "GOOD" elif conf_pct >= 70: conf_level = "MEDIUM" else: conf_level = "LOW" processing_time = (time.time() - start_time) * 1000.0 return { 'product': title, 'category_id': best['category_id'], 'category_path': best['category_path'], 'final_product': best['final_product'], 'confidence': f"{conf_level} ({conf_pct:.2f}%)", 'confidence_percent': conf_pct, 'depth': best['depth'], 'matched_terms': matched_terms, 'top_5_results': results, 'processing_time_ms': round(processing_time, 2) } # ============================================================================ # SERVER LOAD # ============================================================================ def load_server(): global encoder, faiss_index, metadata, cross_store_synonyms print('\n' + '=' * 80) print('🔄 LOADING TRAINED MODEL') print('=' * 80 + '\n') # Load encoder print('📥 Loading sentence transformer...') encoder = SentenceTransformer(MODEL_NAME) print('✅ Model loaded\n') # Load FAISS index print('📥 Loading FAISS index...') if not FAISS_INDEX_PATH.exists(): raise FileNotFoundError(f"FAISS index not found: {FAISS_INDEX_PATH}\nPlease run training first!") faiss_index = faiss.read_index(str(FAISS_INDEX_PATH)) print(f"✅ Index loaded ({faiss_index.ntotal:,} vectors)\n") # Load metadata print('📥 Loading metadata...') if not METADATA_PATH.exists(): raise FileNotFoundError(f"Metadata not found: {METADATA_PATH}\nPlease run training first!") with open(METADATA_PATH, 'rb') as f: metadata = pickle.load(f) print(f"✅ Metadata loaded ({len(metadata):,} categories)\n") # Load or build cross-store synonyms print('📥 Loading cross-store synonyms...') if SYN_PATH.exists(): with open(SYN_PATH, 'rb') as f: cross_store_synonyms = pickle.load(f) print(f"✅ Cross-store synonyms loaded ({len(cross_store_synonyms)} terms)\n") else: print('⚠️ Cross-store synonyms not found, building default set...') cross_store_synonyms = build_cross_store_synonyms() print(f"✅ Built {len(cross_store_synonyms)} synonym mappings\n") print('=' * 80) print('✅ SERVER READY!') print('=' * 80 + '\n') # ============================================================================ # HTML TEMPLATE (same as provided) # ============================================================================ HTML_TEMPLATE = r""" 🎯 Product Category Classifier

🎯 Product Category Classifier

Cross-Store Intelligence
Auto-Tag Support
Real-Time
✅ Cross-Store Synonyms Active!
Understands: washing machine = laundry machine | tv = television | kids = children

Analyzing...

✅ Best Match

Product:
Category ID:
Final Product:
Full Path:
Confidence:
Depth: levels | Time: ms

🔗 Matched Terms (Cross-Store Variations)

📋 Top 5 Alternative Matches

""" # ============================================================================ # FLASK APP # ============================================================================ app = Flask(__name__) @app.route('/') def index(): return render_template_string(HTML_TEMPLATE) @app.route('/classify', methods=['POST']) def classify_route(): data = request.get_json(force=True) title = data.get('title', '').strip() description = data.get('description', '').strip() if not title: return jsonify({'error': 'Title required'}), 400 try: result = classify_product(title, description) return jsonify(result) except Exception as e: app.logger.exception('Classification error') return jsonify({'error': str(e)}), 500 @app.route('/health') def health(): return jsonify({ 'status': 'healthy', 'categories': len(metadata), 'cross_store_synonyms': len(cross_store_synonyms), 'model': MODEL_NAME }) # ============================================================================ # MAIN # ============================================================================ if __name__ == '__main__': try: load_server() print('\n🌐 Server starting...') print(' URL: http://localhost:5000') print(' Press CTRL+C to stop\n') # Recommended: run with a production server like gunicorn for production use app.run(host='0.0.0.0', port=5000, debug=False) except FileNotFoundError as e: print(f"\n❌ ERROR: {e}") print('\n💡 Solution: Run training first to create FAISS index and metadata') except Exception as e: print(f"\n❌ UNEXPECTED ERROR: {e}\n")