Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Gohan CID Product Recommendation Inference Engine | |
| """ | |
| import os | |
| import sys | |
| import json | |
| import pickle | |
| import torch | |
| import numpy as np | |
| import pandas as pd | |
| from typing import Dict, List, Tuple | |
| try: | |
| from rtdl import FTTransformer | |
| except ImportError: | |
| FTTransformer = None | |
| class GohanCIDInferenceEngine: | |
| def __init__(self, model_path: str, encoders_dir: str, product_master_path: str): | |
| self.model_path = model_path | |
| self.encoders_dir = encoders_dir | |
| self.product_master_path = product_master_path | |
| self._load_encoders() | |
| self._load_product_master() | |
| self.model = self._load_model() | |
| def _load_encoders(self): | |
| # JSON first, fallback to pickle (legacy) | |
| try: | |
| with open(os.path.join(self.encoders_dir, 'idx_to_cid1.json'), 'r', encoding='utf-8') as f: | |
| self.idx_to_cid = json.load(f) | |
| with open(os.path.join(self.encoders_dir, 'all_cids1.json'), 'r', encoding='utf-8') as f: | |
| self.all_cids = json.load(f) | |
| with open(os.path.join(self.encoders_dir, 'cat_encoders1.json'), 'r', encoding='utf-8') as f: | |
| self.cat_encoders = json.load(f) | |
| with open(os.path.join(self.encoders_dir, 'cat_cardinalities1.json'), 'r', encoding='utf-8') as f: | |
| self.cat_cardinalities = json.load(f) | |
| except Exception as e: | |
| print(f"Error loading encoders: {e}") | |
| self.idx_to_cid = {} | |
| self.all_cids = [] | |
| self.cat_encoders = {} | |
| self.cat_cardinalities = [] | |
| def _load_product_master(self): | |
| if os.path.exists(self.product_master_path): | |
| pm = pd.read_csv(self.product_master_path, encoding='utf-8-sig') | |
| # Normalize to uppercase columns if present in lowercase | |
| cols = {c.lower(): c for c in pm.columns} | |
| if 'category_id' in cols and 'category_name' in cols: | |
| pm = pm.rename(columns={cols['category_id']: 'CATEGORY_ID', cols['category_name']: 'CATEGORY_NAME'}) | |
| self.product_master = pm | |
| else: | |
| self.product_master = pd.DataFrame(columns=['CATEGORY_ID','CATEGORY_NAME']) | |
| def _load_model(self): | |
| if FTTransformer is None: | |
| return None | |
| # Use training-script hyperparameters | |
| model = FTTransformer.make_baseline( | |
| n_num_features=5, # Updated: 5 numerical features (age ranges are now categorical) | |
| cat_cardinalities=self.cat_cardinalities, | |
| d_out=len(self.all_cids), | |
| d_token=1024, # Use the actual saved model's d_token | |
| n_blocks=8, | |
| attention_dropout=0.15, | |
| ffn_d_hidden=1024, # Use the actual saved model's ffn_d_hidden | |
| ffn_dropout=0.15, | |
| residual_dropout=0.10 | |
| ) | |
| if os.path.exists(self.model_path): | |
| try: | |
| state = torch.load(self.model_path, map_location='cpu') | |
| model.load_state_dict(state) | |
| except Exception as e: | |
| print(f"⚠️ Could not load weights: {e}. Falling back to no-model mode.", file=sys.stderr) | |
| return None | |
| model.eval() | |
| return model | |
| def _encode_categorical(self, value_map: Dict[str, int], value: str) -> int: | |
| if value in value_map: | |
| return int(value_map[value]) | |
| return int(value_map.get('__UNKNOWN__', 0)) | |
| def _preprocess(self, data: Dict) -> Tuple[torch.Tensor, torch.Tensor]: | |
| # Expect English field names as provided by the client | |
| required_en = [ | |
| 'INDUSTRY', 'EMPLOYEE_RANGE', 'FRIDGE_RANGE', 'PAYMENT_METHOD', 'PREFECTURE', | |
| 'FIRST_YEAR', 'FIRST_MONTH', 'LAT', 'LONG', 'DELIVERY_NUM', 'MEDIAN_GENDER_RATIO', | |
| 'MODE_TOP_AGE_RANGE_1', 'MODE_TOP_AGE_RANGE_2', 'MODE_TOP_AGE_RANGE_3' | |
| ] | |
| missing = [k for k in required_en if k not in data] | |
| if missing: | |
| raise ValueError(f"Missing required inputs: {missing}") | |
| df = pd.DataFrame([data]) | |
| # Categorical features: use any keys present in cat_encoders (assumed English keys) | |
| X_cat = [] | |
| for col in self.cat_encoders.keys(): | |
| if col in df.columns: | |
| v = df[col].iloc[0] | |
| X_cat.append(self._encode_categorical(self.cat_encoders[col], v)) | |
| else: | |
| X_cat.append(self._encode_categorical(self.cat_encoders[col], '__UNKNOWN__')) | |
| X_cat = torch.tensor([X_cat], dtype=torch.long) | |
| # Numerical features (5 features to match training script - age ranges are now categorical) | |
| # Remove age range fields from numerical features since they're now categorical | |
| num_cols = ['LAT', 'LONG', 'DELIVERY_NUM', 'MEDIAN_GENDER_RATIO', 'TOTAL_VOLUME'] | |
| X_num = [] | |
| for col in num_cols: | |
| if col in df.columns: | |
| try: | |
| X_num.append(float(df[col].iloc[0])) | |
| except (ValueError, TypeError): | |
| X_num.append(0.0) | |
| else: | |
| # Provide default values for missing fields | |
| if col == 'TOTAL_VOLUME': | |
| X_num.append(0.0) # Default total volume | |
| else: | |
| X_num.append(0.0) | |
| X_num = torch.tensor([X_num], dtype=torch.float32) | |
| return X_cat, X_num | |
| def predict(self, data: Dict) -> List[Dict]: | |
| if self.model is None: | |
| topK = int(data.get('topK', 30)) | |
| pm = self.product_master[['CATEGORY_ID','CATEGORY_NAME']].dropna() | |
| rows = pm.head(topK).to_dict(orient='records') | |
| return [ | |
| {"category_id": int(r['CATEGORY_ID']), "category_name": str(r['CATEGORY_NAME']), "score": 0.0} | |
| for r in rows | |
| ] | |
| X_cat, X_num = self._preprocess(data) | |
| with torch.no_grad(): | |
| logits = self.model(X_num, X_cat) | |
| scores = torch.sigmoid(logits).flatten().cpu().numpy() | |
| indices = np.argsort(scores)[::-1] | |
| cids = [self.all_cids[i] if i < len(self.all_cids) else None for i in indices] | |
| pm = self.product_master | |
| name_map = {int(row['CATEGORY_ID']): str(row['CATEGORY_NAME']) for _, row in pm.iterrows() if pd.notna(row['CATEGORY_ID'])} | |
| results = [] | |
| for idx, cid in zip(indices, cids): | |
| if cid is None: | |
| continue | |
| results.append({ | |
| "category_id": int(cid), | |
| "category_name": name_map.get(int(cid), "不明"), | |
| "score": float(scores[idx]) | |
| }) | |
| return results |