product-classify / src /config.py
Abhishek7356
creating new projects fro product categorise
d12790d
# """
# Configuration settings for the insurance product classifier
# """
import os
from pathlib import Path
# Base directory (project root)
BASE_DIR = Path(__file__).resolve().parent.parent
# Model directory
MODEL_DIR = BASE_DIR / "models"
# Model files
EMBEDDINGS_FILE = MODEL_DIR / "category_embeddings_mpnet.npy"
METADATA_FILE = MODEL_DIR / "category_metadata.pkl"
CONFIG_FILE = MODEL_DIR / "config.json"
# Model configuration
MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
EMBEDDING_DIMENSION = 768
# Classification thresholds
AUTO_APPROVE_THRESHOLD = 0.75 # 75% confidence
QUICK_REVIEW_THRESHOLD = 0.60 # 60% confidence
# Keyword boosting
BOOST_FACTOR = 0.15 # 15% boost for keyword matches
MAX_BOOST = 0.30 # Maximum 30% total boost
# API settings
API_TITLE = "Insurance Product Classification API"
API_VERSION = "1.0.0"
API_DESCRIPTION = "ML-powered product categorization for insurance underwriting"
# Processing settings
DEFAULT_TOP_K = 5 # Return top 5 matches
BATCH_SIZE = 32 # For batch processing
# Keywords for boosting
PRODUCT_KEYWORDS = {
# Electronics
"iphone",
"ipad",
"macbook",
"smartphone",
"laptop",
"tablet",
"computer",
"electronics",
"phone",
"mobile",
"samsung",
"apple",
"dell",
"hp",
# Appliances
"refrigerator",
"dishwasher",
"washing machine",
"dryer",
"oven",
"microwave",
"coffee maker",
"blender",
"toaster",
"appliance",
# Clothing
"shoes",
"shirt",
"pants",
"dress",
"jacket",
"sneakers",
"boots",
"clothing",
"apparel",
"footwear",
# Books
"book",
"novel",
"textbook",
"ebook",
"reading",
"literature",
# Sports
"sports",
"fitness",
"exercise",
"gym",
"athletic",
"running",
"yoga",
# Home
"furniture",
"decor",
"bedding",
"kitchen",
"home",
"garden",
}
def validate_files():
"""Validate that all required model files exist"""
required_files = [EMBEDDINGS_FILE, METADATA_FILE, CONFIG_FILE]
missing_files = []
for file_path in required_files:
if not file_path.exists():
missing_files.append(str(file_path))
if missing_files:
raise FileNotFoundError(
f"Missing required files:\n" + "\n".join(f" - {f}" for f in missing_files)
)
return True
if __name__ == "__main__":
print("Configuration Settings:")
print(f" Model Directory: {MODEL_DIR}")
print(f" Embeddings File: {EMBEDDINGS_FILE.name}")
print(f" Metadata File: {METADATA_FILE.name}")
print(f" Auto-Approve Threshold: {AUTO_APPROVE_THRESHOLD * 100}%")
print(f" Quick Review Threshold: {QUICK_REVIEW_THRESHOLD * 100}%")
try:
validate_files()
print("\n✅ All required files found!")
except FileNotFoundError as e:
print(f"\n❌ Error: {e}")