Fetching metadata from the HF Docker repository... import re, os import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from sklearn.preprocessing import OneHotEncoder from sklearn.linear_model import Ridge def _norm_key(title: str): toks = re.findall(r"[a-z0-9]+", str(title).lower()) toks = [t for t in toks if len(t) > 2] key = " ".join(sorted(dict.fromkeys(toks))[:6]) return key or None class DealValuator: def __init__(self, comps_csv: str, feedback_csv: str | None = None): self.df = pd.read_csv(comps_csv) self.feedback_csv = feedback_csv self.feedback_stats = self._load_feedback(feedback_csv) if feedback_csv else {} self.model = self._train(self.df) def _train(self, df: pd.DataFrame): text_features = 'title' cat_features = ['category','brand','condition'] text_pipe = Pipeline([('tfidf', TfidfVectorizer(ngram_range=(1,2), min_df=1))]) cat_pipe = Pipeline([('ohe', OneHotEncoder(handle_unknown='ignore'))]) pre = ColumnTransformer([('t', text_pipe, text_features), ('c', cat_pipe, cat_features)]) model = Pipeline([('pre', pre), ('reg', Ridge(alpha=1.0))]) X = df[['title','category','brand','condition']] y = df['price'] model.fit(X, y) return model def _load_feedback(self, feedback_csv: str): if not feedback_csv or not os.path.exists(feedback_csv): return {} fb = pd.read_csv(feedback_csv) if fb.empty: return {} fb['key'] = fb['title'].apply(_norm_key) stats = (fb.dropna(subset=['key']) .groupby('key')['correct'] .mean() .to_dict()) return stats def _apply_feedback_adjustment(self, title: str, price: float) -> float: key = _norm_key(title) if not key or key not in self.feedback_stats: return price score = self.feedback_stats[key] # 0..1 if score >= 0.7: return price * 1.10 elif score <= 0.3: return price * 0.90 return price def predict_resale(self, title: str, category: str=None, brand: str=None, condition: str='Used-Good'): guess_cat, guess_brand = self._guess_meta(title) category = category or guess_cat brand = brand or guess_brand X = pd.DataFrame([{ 'title': title, 'category': category or 'Unknown', 'brand': brand or 'Unknown', 'condition': condition or 'Used-Good' }]) pred = float(self.model.predict(X)[0]) pred = self._apply_feedback_adjustment(title, pred) return { 'predicted_resale': max(5.0, round(pred, 2)), 'category': category, 'brand': brand, 'condition': condition } @staticmethod def _guess_meta(title: str): title_l = str(title).lower() known_brands = [ 'nintendo','sony','apple','kitchenaid','bose','canon','seiko','ikea','carhartt', 'levis','levi','patagonia','yeti','coach','dansk','dansko','dewalt','makita','all-clad', 'pokemon','herman miller','pyrex','le creuset','marantz','ll bean','ralph lauren','vera bradley','nerf' ] brand = None for b in known_brands: if b in title_l: brand = b.title() break cat_map = { 'console':'Electronics','playstation':'Electronics','wii':'Electronics','iphone':'Electronics','camera':'Electronics', 'headphone':'Electronics','receiver':'Electronics','walkman':'Electronics', 'bowl':'Home & Kitchen','mixer':'Home & Kitchen','pan':'Home & Kitchen','skillet':'Home & Kitchen','dutch oven':'Home & Kitchen','tumbler':'Home & Kitchen', 'jeans':'Clothing','jacket':'Clothing','shirt':'Clothing','dress':'Clothing','fleece':'Clothing', 'watch':'Accessories','handbag':'Accessories','tote':'Accessories', 'boots':'Shoes','clogs':'Shoes', 'chair':'Furniture', 'drill':'Tools','saw':'Tools', 'pokemon':'Collectibles','cards':'Collectibles', 'toy':'Toys','blaster':'Toys' } category = None for k,v in cat_map.items(): if k in title_l: category = v break return category, brand def compute_deal_score(predicted_resale: float, asking_price: float, fees_rate: float=0.13, ship_estimate: float=12.0): fees = predicted_resale * fees_rate net = predicted_resale - fees - ship_estimate profit = net - asking_price margin = profit / asking_price if asking_price > 0 else 0.0 if profit >= 50 and margin >= 0.8: label = 'Home Run' elif profit >= 25 and margin >= 0.5: label = 'Great' elif profit >= 10 and margin >= 0.3: label = 'Good' elif profit >= 5: label = 'Meh' else: label = 'Pass' return { 'fees': round(fees,2), 'net_after_fees': round(net,2), 'profit': round(profit,2), 'margin': round(margin,2), 'label': label }
Browse files