# """
# 🎯 COMPLETE API SERVER - Matches Cross-Store Training System
# =============================================================
# ✅ Works with cross-store synonyms (washing machine = laundry machine)
# ✅ Uses auto-tags from training
# ✅ Single model (fast predictions)
# ✅ Guaranteed category_id match
# ✅ Real-time classification
# """
# from flask import Flask, request, jsonify, render_template_string
# from sentence_transformers import SentenceTransformer
# import faiss
# import pickle
# import numpy as np
# from pathlib import Path
# import time
# import re
# app = Flask(__name__)
# # ============================================================================
# # GLOBAL VARIABLES
# # ============================================================================
# CACHE_DIR = Path('cache')
# # Model
# encoder = None
# faiss_index = None
# metadata = []
# cross_store_synonyms = {}
# # ============================================================================
# # CROSS-STORE SYNONYM DATABASE (Same as training)
# # ============================================================================
# def build_cross_store_synonyms():
# """Build cross-store synonym database"""
# synonyms = {
# # Appliances
# 'washing machine': {'laundry machine', 'washer', 'clothes washer', 'washing appliance'},
# 'laundry machine': {'washing machine', 'washer', 'clothes washer'},
# 'dryer': {'drying machine', 'clothes dryer', 'tumble dryer'},
# 'refrigerator': {'fridge', 'cooler', 'ice box', 'cooling appliance'},
# 'dishwasher': {'dish washer', 'dish cleaning machine'},
# 'microwave': {'microwave oven', 'micro wave'},
# 'vacuum': {'vacuum cleaner', 'hoover', 'vac'},
# # Electronics
# 'tv': {'television', 'telly', 'smart tv', 'display'},
# 'laptop': {'notebook', 'portable computer', 'laptop computer'},
# 'mobile': {'phone', 'cell phone', 'smartphone', 'cellphone'},
# 'tablet': {'ipad', 'tab', 'tablet computer'},
# 'headphones': {'headset', 'earphones', 'earbuds', 'ear buds'},
# 'speaker': {'audio speaker', 'sound system', 'speakers'},
# # Furniture
# 'sofa': {'couch', 'settee', 'divan'},
# 'wardrobe': {'closet', 'armoire', 'cupboard'},
# 'drawer': {'chest of drawers', 'dresser'},
# # Clothing
# 'pants': {'trousers', 'slacks', 'bottoms'},
# 'sweater': {'jumper', 'pullover', 'sweatshirt'},
# 'sneakers': {'trainers', 'tennis shoes', 'running shoes'},
# 'jacket': {'coat', 'blazer', 'outerwear'},
# # Kitchen
# 'cooker': {'stove', 'range', 'cooking range'},
# 'blender': {'mixer', 'food processor', 'liquidizer'},
# 'kettle': {'electric kettle', 'water boiler'},
# # Baby/Kids
# 'stroller': {'pram', 'pushchair', 'buggy', 'baby carriage'},
# 'diaper': {'nappy', 'nappies'},
# 'pacifier': {'dummy', 'soother'},
# # Tools
# 'wrench': {'spanner', 'adjustable wrench'},
# 'flashlight': {'torch', 'flash light'},
# 'screwdriver': {'screw driver'},
# # Home
# 'tap': {'faucet', 'water tap'},
# 'bin': {'trash can', 'garbage can', 'waste bin'},
# 'curtain': {'drape', 'window covering'},
# # Crafts/Office
# 'guillotine': {'paper cutter', 'paper trimmer', 'blade cutter'},
# 'trimmer': {'cutter', 'cutting tool', 'edge cutter'},
# 'stapler': {'stapling machine', 'staple gun'},
# # Books/Media
# 'magazine': {'periodical', 'journal', 'publication'},
# 'comic': {'comic book', 'graphic novel', 'manga'},
# 'ebook': {'e-book', 'digital book', 'electronic book'},
# # General
# 'kids': {'children', 'child', 'childrens', 'youth', 'junior'},
# 'women': {'womens', 'ladies', 'female', 'lady'},
# 'men': {'mens', 'male', 'gentleman'},
# 'baby': {'infant', 'newborn', 'toddler'},
# }
# # Build bidirectional mapping
# expanded = {}
# for term, syns in synonyms.items():
# expanded[term] = syns.copy()
# for syn in syns:
# if syn not in expanded:
# expanded[syn] = set()
# expanded[syn].add(term)
# expanded[syn].update(syns - {syn})
# return expanded
# # ============================================================================
# # HELPER FUNCTIONS
# # ============================================================================
# def clean_text(text):
# """Clean and normalize text"""
# if not text:
# return ""
# text = str(text).lower()
# text = re.sub(r'[^\w\s-]', ' ', text)
# text = re.sub(r'\s+', ' ', text).strip()
# return text
# def extract_cross_store_terms(text):
# """Extract terms with cross-store variations"""
# cleaned = clean_text(text)
# words = cleaned.split()
# all_terms = set()
# all_terms.add(cleaned) # Full text
# # Single words
# for word in words:
# if len(word) > 2:
# all_terms.add(word)
# # Add cross-store synonyms
# if word in cross_store_synonyms:
# all_terms.update(cross_store_synonyms[word])
# # 2-word phrases
# for i in range(len(words) - 1):
# if len(words[i]) > 2 and len(words[i+1]) > 2:
# phrase = f"{words[i]} {words[i+1]}"
# all_terms.add(phrase)
# if phrase in cross_store_synonyms:
# all_terms.update(cross_store_synonyms[phrase])
# # 3-word phrases
# if len(words) >= 3:
# for i in range(len(words) - 2):
# if all(len(w) > 2 for w in words[i:i+3]):
# phrase = f"{words[i]} {words[i+1]} {words[i+2]}"
# all_terms.add(phrase)
# return list(all_terms)
# def build_enhanced_query(title, description=""):
# """Build enhanced query with cross-store intelligence"""
# # Extract terms with variations
# all_terms = extract_cross_store_terms(f"{title} {description}")
# # Clean product terms
# product_terms = [t for t in clean_text(f"{title} {description}").split() if len(t) > 2]
# # Build query
# # Emphasize original + all variations
# product_text = ' '.join(product_terms)
# variations_text = ' '.join(all_terms[:30]) # Top 30 variations
# # Repeat for emphasis
# emphasized = ' '.join([product_text] * 3)
# query = f"{emphasized} {variations_text} {title} {description}"
# return query, all_terms[:20]
# def encode_query(text):
# """Encode query using the trained model"""
# embedding = encoder.encode(
# text,
# convert_to_numpy=True,
# normalize_embeddings=True
# )
# if embedding.ndim == 1:
# embedding = embedding.reshape(1, -1)
# return embedding.astype('float32')
# def classify_product(title, description="", top_k=5):
# """
# Classify product using trained system
# Returns: category_id, category_path, confidence, and alternatives
# """
# start_time = time.time()
# # Step 1: Build enhanced query with cross-store synonyms
# query, matched_terms = build_enhanced_query(title, description)
# # Step 2: Encode query
# query_embedding = encode_query(query)
# # Step 3: Search FAISS index
# distances, indices = faiss_index.search(query_embedding, top_k)
# # Step 4: Get results
# results = []
# for i in range(len(indices[0])):
# idx = indices[0][i]
# if idx < len(metadata):
# meta = metadata[idx]
# confidence = float(distances[0][i]) * 100
# # Get final product name
# levels = meta.get('levels', [])
# final_product = levels[-1] if levels else meta['category_path'].split('/')[-1]
# results.append({
# 'rank': i + 1,
# 'category_id': meta['category_id'],
# 'category_path': meta['category_path'],
# 'final_product': final_product,
# 'confidence': round(confidence, 2),
# 'depth': meta.get('depth', 0)
# })
# # Best result
# best = results[0] if results else None
# if not best:
# return {
# 'error': 'No results found',
# 'product': title
# }
# # Confidence level
# conf_pct = best['confidence']
# if conf_pct >= 90:
# conf_level = "EXCELLENT"
# elif conf_pct >= 85:
# conf_level = "VERY HIGH"
# elif conf_pct >= 80:
# conf_level = "HIGH"
# elif conf_pct >= 75:
# conf_level = "GOOD"
# elif conf_pct >= 70:
# conf_level = "MEDIUM"
# else:
# conf_level = "LOW"
# processing_time = (time.time() - start_time) * 1000
# return {
# 'product': title,
# 'category_id': best['category_id'],
# 'category_path': best['category_path'],
# 'final_product': best['final_product'],
# 'confidence': f"{conf_level} ({conf_pct:.2f}%)",
# 'confidence_percent': conf_pct,
# 'depth': best['depth'],
# 'matched_terms': matched_terms,
# 'top_5_results': results,
# 'processing_time_ms': round(processing_time, 2)
# }
# # ============================================================================
# # SERVER INITIALIZATION
# # ============================================================================
# def load_server():
# """Load all trained data"""
# global encoder, faiss_index, metadata, cross_store_synonyms
# print("\n" + "="*80)
# print("🔄 LOADING TRAINED MODEL")
# print("="*80 + "\n")
# # Load model
# print("📥 Loading sentence transformer...")
# encoder = SentenceTransformer('sentence-transformers/all-mpnet-base-v2')
# print("✅ Model loaded\n")
# # Load FAISS index
# print("📥 Loading FAISS index...")
# index_path = CACHE_DIR / 'main_index.faiss'
# if not index_path.exists():
# raise FileNotFoundError(f"FAISS index not found: {index_path}\nPlease run training first!")
# faiss_index = faiss.read_index(str(index_path))
# print(f"✅ Index loaded ({faiss_index.ntotal:,} vectors)\n")
# # Load metadata
# print("📥 Loading metadata...")
# meta_path = CACHE_DIR / 'metadata.pkl'
# if not meta_path.exists():
# raise FileNotFoundError(f"Metadata not found: {meta_path}\nPlease run training first!")
# with open(meta_path, 'rb') as f:
# metadata = pickle.load(f)
# print(f"✅ Metadata loaded ({len(metadata):,} categories)\n")
# # Load cross-store synonyms
# print("📥 Loading cross-store synonyms...")
# syn_path = CACHE_DIR / 'cross_store_synonyms.pkl'
# if syn_path.exists():
# with open(syn_path, 'rb') as f:
# cross_store_synonyms = pickle.load(f)
# print(f"✅ Cross-store synonyms loaded ({len(cross_store_synonyms)} terms)\n")
# else:
# print("⚠️ Cross-store synonyms not found, building default set...")
# cross_store_synonyms = build_cross_store_synonyms()
# print(f"✅ Built {len(cross_store_synonyms)} synonym mappings\n")
# print("="*80)
# print("✅ SERVER READY!")
# print("="*80 + "\n")
# # ============================================================================
# # HTML INTERFACE
# # ============================================================================
# HTML_TEMPLATE = """
#
#
#
# 🎯 Product Category Classifier
#
#
#
#
#
#
#
#
🎯 Product Category Classifier
#
Cross-Store Intelligence
#
Auto-Tag Support
#
Real-Time
#
#
#
# ✅ Cross-Store Synonyms Active!
# Understands: washing machine = laundry machine | tv = television | kids = children
#
#
#
#
#
#
#
#
#
#
#
#
#
Analyzing...
#
#
#
#
✅ Best Match
#
#
# Product:
#
#
# Category ID:
#
#
#
# Final Product:
#
#
# Full Path:
#
#
#
# Confidence:
#
#
#
# Depth: levels |
# Time: ms
#
#
#
#
#
🔗 Matched Terms (Cross-Store Variations)
#
#
#
#
📋 Top 5 Alternative Matches
#
#
#
#
#
#
#
#
# """
# # ============================================================================
# # FLASK ROUTES
# # ============================================================================
# @app.route('/')
# def index():
# """Serve the web interface"""
# return render_template_string(HTML_TEMPLATE)
# @app.route('/classify', methods=['POST'])
# def classify_route():
# """API endpoint for classification"""
# data = request.json
# title = data.get('title', '').strip()
# description = data.get('description', '').strip()
# if not title:
# return jsonify({'error': 'Title required'}), 400
# try:
# result = classify_product(title, description)
# return jsonify(result)
# except Exception as e:
# print(f"Error: {e}")
# return jsonify({'error': str(e)}), 500
# @app.route('/health')
# def health():
# """Health check endpoint"""
# return jsonify({
# 'status': 'healthy',
# 'categories': len(metadata),
# 'cross_store_synonyms': len(cross_store_synonyms),
# 'model': 'all-mpnet-base-v2'
# })
# # ============================================================================
# # MAIN
# # ============================================================================
# if __name__ == '__main__':
# try:
# load_server()
# print("\n🌐 Server starting...")
# print(" URL: http://localhost:5000")
# print(" Press CTRL+C to stop\n")
# app.run(host='0.0.0.0', port=5000, debug=False)
# except FileNotFoundError as e:
# print(f"\n❌ ERROR: {e}")
# print("\n💡 Solution: Run training first:")
# print(" python train.py data/category_id_path_only.csv\n")
# except Exception as e:
# print(f"\n❌ UNEXPECTED ERROR: {e}\n")
#!/usr/bin/env python3
"""
API Server for product category classification
Merged UI + classification logic
Model: intfloat/e5-base-v2 (must match training)
Usage:
python api_server.py
Requirements:
pip install flask sentence-transformers faiss-cpu numpy pickle5
Files expected in cache/:
- main_index.faiss
- metadata.pkl
- cross_store_synonyms.pkl (optional)
"""
from flask import Flask, request, jsonify, render_template_string
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import numpy as np
from pathlib import Path
import time
import re
import os
from typing import List
# ============================================================================
# CONFIG
# ============================================================================
CACHE_DIR = Path('cache')
MODEL_NAME = 'intfloat/e5-base-v2' # <-- MUST match the model used during training
FAISS_INDEX_PATH = CACHE_DIR / 'main_index.faiss'
METADATA_PATH = CACHE_DIR / 'metadata.pkl'
SYN_PATH = CACHE_DIR / 'cross_store_synonyms.pkl'
# Server globals
encoder = None
faiss_index = None
metadata = []
cross_store_synonyms = {}
# ============================================================================
# CROSS-STORE SYNONYM FALLBACK
# ============================================================================
def build_cross_store_synonyms():
"""Default cross-store synonyms fallback (bidirectional mapping).
If you have a trained cross_store_synonyms.pkl produced by training, the
server will load that file instead. This function only used when no file
exists in the cache.
"""
synonyms = {
'washing machine': {'laundry machine', 'washer', 'clothes washer', 'washing appliance'},
'laundry machine': {'washing machine', 'washer', 'clothes washer'},
'dryer': {'drying machine', 'clothes dryer', 'tumble dryer'},
'refrigerator': {'fridge', 'cooler', 'ice box', 'cooling appliance'},
'dishwasher': {'dish washer', 'dish cleaning machine'},
'microwave': {'microwave oven', 'micro wave'},
'vacuum': {'vacuum cleaner', 'hoover', 'vac'},
'tv': {'television', 'telly', 'smart tv', 'display'},
'laptop': {'notebook', 'portable computer', 'laptop computer'},
'mobile': {'phone', 'cell phone', 'smartphone', 'cellphone'},
'tablet': {'ipad', 'tab', 'tablet computer'},
'headphones': {'headset', 'earphones', 'earbuds', 'ear buds'},
'speaker': {'audio speaker', 'sound system', 'speakers'},
'sofa': {'couch', 'settee', 'divan'},
'wardrobe': {'closet', 'armoire', 'cupboard'},
'drawer': {'chest of drawers', 'dresser'},
'pants': {'trousers', 'slacks', 'bottoms'},
'sweater': {'jumper', 'pullover', 'sweatshirt'},
'sneakers': {'trainers', 'tennis shoes', 'running shoes'},
'jacket': {'coat', 'blazer', 'outerwear'},
'cooker': {'stove', 'range', 'cooking range'},
'blender': {'mixer', 'food processor', 'liquidizer'},
'kettle': {'electric kettle', 'water boiler'},
'stroller': {'pram', 'pushchair', 'buggy', 'baby carriage'},
'diaper': {'nappy', 'nappies'},
'pacifier': {'dummy', 'soother'},
'wrench': {'spanner', 'adjustable wrench'},
'flashlight': {'torch', 'flash light'},
'screwdriver': {'screw driver'},
'tap': {'faucet', 'water tap'},
'bin': {'trash can', 'garbage can', 'waste bin'},
'curtain': {'drape', 'window covering'},
'guillotine': {'paper cutter', 'paper trimmer', 'blade cutter'},
'trimmer': {'cutter', 'cutting tool', 'edge cutter'},
'stapler': {'stapling machine', 'staple gun'},
'magazine': {'periodical', 'journal', 'publication'},
'comic': {'comic book', 'graphic novel', 'manga'},
'ebook': {'e-book', 'digital book', 'electronic book'},
'kids': {'children', 'child', 'childrens', 'youth', 'junior'},
'women': {'womens', 'ladies', 'female', 'lady'},
'men': {'mens', 'male', 'gentleman'},
'baby': {'infant', 'newborn', 'toddler'},
}
expanded = {}
for term, syns in synonyms.items():
expanded[term] = set(syns)
for syn in syns:
if syn not in expanded:
expanded[syn] = set()
expanded[syn].add(term)
expanded[syn].update(syns - {syn})
return expanded
# ============================================================================
# TEXT CLEANING / QUERY BUILDING
# ============================================================================
def clean_text(text: str) -> str:
if not text:
return ""
text = str(text).lower()
# keep alphanumerics, dashes and spaces
text = re.sub(r"[^\w\s-]", " ", text)
text = re.sub(r"\s+", " ", text).strip()
return text
def extract_cross_store_terms(text: str) -> List[str]:
cleaned = clean_text(text)
words = cleaned.split()
all_terms = set()
all_terms.add(cleaned) # full cleaned text
# single words + synonyms
for word in words:
if len(word) > 2:
all_terms.add(word)
if word in cross_store_synonyms:
all_terms.update(cross_store_synonyms[word])
# 2-word phrases
for i in range(len(words) - 1):
if len(words[i]) > 2 and len(words[i + 1]) > 2:
phrase = f"{words[i]} {words[i+1]}"
all_terms.add(phrase)
if phrase in cross_store_synonyms:
all_terms.update(cross_store_synonyms[phrase])
# 3-word phrases
if len(words) >= 3:
for i in range(len(words) - 2):
if all(len(w) > 2 for w in words[i:i + 3]):
phrase = f"{words[i]} {words[i+1]} {words[i+2]}"
all_terms.add(phrase)
return list(all_terms)
def build_enhanced_query(title, description="", max_synonyms=10):
"""Build query emphasizing original title and cross-store variations"""
title_clean = clean_text(title)
description_clean = clean_text(description)
# Extract cross-store variations
synonyms_list = extract_cross_store_terms(f"{title_clean} {description_clean}")
# Emphasize original title 3x, then include top synonyms
enhanced_query = ' '.join([title_clean] * 3 + synonyms_list[:max_synonyms])
return enhanced_query, synonyms_list[:20] # return top 20 for matched_terms display
# ============================================================================
# ENCODER / FAISS
# ============================================================================
def encode_query(text: str) -> np.ndarray:
embedding = encoder.encode(text, convert_to_numpy=True, normalize_embeddings=True)
if embedding.ndim == 1:
embedding = embedding.reshape(1, -1)
return embedding.astype('float32')
def classify_product(title, description="", top_k=5):
"""Classify product using e5-base embeddings with cross-store optimization"""
start_time = time.time()
# Step 1: Build enhanced query
query_text, matched_terms = build_enhanced_query(title, description)
# Step 2: Encode query
query_embedding = encoder.encode(
query_text,
convert_to_numpy=True,
normalize_embeddings=True
).astype('float32')
if query_embedding.ndim == 1:
query_embedding = query_embedding.reshape(1, -1)
# Step 3: FAISS search
distances, indices = faiss_index.search(query_embedding, top_k)
results = []
for i, idx in enumerate(indices[0]):
if idx >= len(metadata):
continue
meta = metadata[idx]
# Convert FAISS distance to cosine similarity
similarity = 1 - distances[0][i]
confidence_pct = float(similarity) * 100
final_product = meta.get('levels', [])[-1] if meta.get('levels') else meta['category_path'].split('/')[-1]
results.append({
'rank': i + 1,
'category_id': meta['category_id'],
'category_path': meta['category_path'],
'final_product': final_product,
'confidence': round(confidence_pct, 2),
'depth': meta.get('depth', 0)
})
if not results:
return {'error': 'No results found', 'product': title}
# Pick best match
best = results[0]
conf_pct = best['confidence']
if conf_pct >= 90:
conf_level = "EXCELLENT"
elif conf_pct >= 85:
conf_level = "VERY HIGH"
elif conf_pct >= 80:
conf_level = "HIGH"
elif conf_pct >= 75:
conf_level = "GOOD"
elif conf_pct >= 70:
conf_level = "MEDIUM"
else:
conf_level = "LOW"
processing_time = (time.time() - start_time) * 1000
return {
'product': title,
'category_id': best['category_id'],
'category_path': best['category_path'],
'final_product': best['final_product'],
'confidence': f"{conf_level} ({conf_pct:.2f}%)",
'confidence_percent': conf_pct,
'depth': best['depth'],
'matched_terms': matched_terms,
'top_5_results': results,
'processing_time_ms': round(processing_time, 2)
}
# FAISS returns squared L2 distances or inner product depending on index type.
# We'll treat lower distance as better. We convert to a 0-100-ish confidence by
# using a simple heuristic: score = 100 - normalized_distance*100 (clamped).
# Determine a normalization constant: use mean of top distance if available
flat_dist = distances[0]
max_d = float(np.max(flat_dist)) if flat_dist.size else 1.0
min_d = float(np.min(flat_dist)) if flat_dist.size else 0.0
range_d = max(1e-6, max_d - min_d)
for i, idx in enumerate(indices[0]):
if idx < 0 or idx >= len(metadata):
continue
meta = metadata[idx]
raw_d = float(distances[0][i])
# normalize and invert to make higher -> better
norm = (raw_d - min_d) / range_d
conf = max(0.0, min(100.0, 100.0 * (1.0 - norm)))
levels = meta.get('levels') or []
final_product = levels[-1] if levels else meta.get('category_path', '').split('/')[-1]
results.append({
'rank': i + 1,
'category_id': meta.get('category_id'),
'category_path': meta.get('category_path'),
'final_product': final_product,
'confidence': round(conf, 2),
'depth': meta.get('depth', 0)
})
if not results:
return {
'error': 'No results found',
'product': title
}
best = results[0]
conf_pct = best['confidence']
if conf_pct >= 90:
conf_level = "EXCELLENT"
elif conf_pct >= 85:
conf_level = "VERY HIGH"
elif conf_pct >= 80:
conf_level = "HIGH"
elif conf_pct >= 75:
conf_level = "GOOD"
elif conf_pct >= 70:
conf_level = "MEDIUM"
else:
conf_level = "LOW"
processing_time = (time.time() - start_time) * 1000.0
return {
'product': title,
'category_id': best['category_id'],
'category_path': best['category_path'],
'final_product': best['final_product'],
'confidence': f"{conf_level} ({conf_pct:.2f}%)",
'confidence_percent': conf_pct,
'depth': best['depth'],
'matched_terms': matched_terms,
'top_5_results': results,
'processing_time_ms': round(processing_time, 2)
}
# ============================================================================
# SERVER LOAD
# ============================================================================
def load_server():
global encoder, faiss_index, metadata, cross_store_synonyms
print('\n' + '=' * 80)
print('🔄 LOADING TRAINED MODEL')
print('=' * 80 + '\n')
# Load encoder
print('📥 Loading sentence transformer...')
encoder = SentenceTransformer(MODEL_NAME)
print('✅ Model loaded\n')
# Load FAISS index
print('📥 Loading FAISS index...')
if not FAISS_INDEX_PATH.exists():
raise FileNotFoundError(f"FAISS index not found: {FAISS_INDEX_PATH}\nPlease run training first!")
faiss_index = faiss.read_index(str(FAISS_INDEX_PATH))
print(f"✅ Index loaded ({faiss_index.ntotal:,} vectors)\n")
# Load metadata
print('📥 Loading metadata...')
if not METADATA_PATH.exists():
raise FileNotFoundError(f"Metadata not found: {METADATA_PATH}\nPlease run training first!")
with open(METADATA_PATH, 'rb') as f:
metadata = pickle.load(f)
print(f"✅ Metadata loaded ({len(metadata):,} categories)\n")
# Load or build cross-store synonyms
print('📥 Loading cross-store synonyms...')
if SYN_PATH.exists():
with open(SYN_PATH, 'rb') as f:
cross_store_synonyms = pickle.load(f)
print(f"✅ Cross-store synonyms loaded ({len(cross_store_synonyms)} terms)\n")
else:
print('⚠️ Cross-store synonyms not found, building default set...')
cross_store_synonyms = build_cross_store_synonyms()
print(f"✅ Built {len(cross_store_synonyms)} synonym mappings\n")
print('=' * 80)
print('✅ SERVER READY!')
print('=' * 80 + '\n')
# ============================================================================
# HTML TEMPLATE (same as provided)
# ============================================================================
HTML_TEMPLATE = r"""
🎯 Product Category Classifier
🎯 Product Category Classifier
Cross-Store Intelligence
Auto-Tag Support
Real-Time
✅ Cross-Store Synonyms Active!
Understands: washing machine = laundry machine | tv = television | kids = children
Analyzing...
✅ Best Match
Product:
Category ID:
Final Product:
Full Path:
Confidence:
Depth: levels |
Time: ms
🔗 Matched Terms (Cross-Store Variations)
📋 Top 5 Alternative Matches
"""
# ============================================================================
# FLASK APP
# ============================================================================
app = Flask(__name__)
@app.route('/')
def index():
return render_template_string(HTML_TEMPLATE)
@app.route('/classify', methods=['POST'])
def classify_route():
data = request.get_json(force=True)
title = data.get('title', '').strip()
description = data.get('description', '').strip()
if not title:
return jsonify({'error': 'Title required'}), 400
try:
result = classify_product(title, description)
return jsonify(result)
except Exception as e:
app.logger.exception('Classification error')
return jsonify({'error': str(e)}), 500
@app.route('/health')
def health():
return jsonify({
'status': 'healthy',
'categories': len(metadata),
'cross_store_synonyms': len(cross_store_synonyms),
'model': MODEL_NAME
})
# ============================================================================
# MAIN
# ============================================================================
if __name__ == '__main__':
try:
load_server()
print('\n🌐 Server starting...')
print(' URL: http://localhost:5000')
print(' Press CTRL+C to stop\n')
# Recommended: run with a production server like gunicorn for production use
app.run(host='0.0.0.0', port=5000, debug=False)
except FileNotFoundError as e:
print(f"\n❌ ERROR: {e}")
print('\n💡 Solution: Run training first to create FAISS index and metadata')
except Exception as e:
print(f"\n❌ UNEXPECTED ERROR: {e}\n")