HelloWorld0204 commited on
Commit
fea9d68
·
verified ·
1 Parent(s): cc3be8b

Upload 22 files

Browse files
Files changed (1) hide show
  1. app.py +12 -224
app.py CHANGED
@@ -86,22 +86,11 @@ MATCHING_RESULT_CACHE_LOCK = threading.Lock()
86
  MATCHING_RESULT_CACHE_MAX = int(os.getenv("MATCHING_RESULT_CACHE_MAX", "500"))
87
  MATCHING_RESULT_CACHE_TTL_SECONDS = int(os.getenv("MATCHING_RESULT_CACHE_TTL_SECONDS", "86400"))
88
 
89
- SCRAPER_QUERY_CACHE: dict[str, tuple[dict[str, Any], float]] = {}
90
- SCRAPER_QUERY_CACHE_LOCK = threading.Lock()
91
- SCRAPER_QUERY_CACHE_TTL_SECONDS = int(os.getenv("SCRAPER_QUERY_CACHE_TTL_SECONDS", "1296000")) # 15 days
92
-
93
 
94
  def _matching_cache_storage_key(key: str) -> str:
95
  return f"matching:{key}"
96
 
97
 
98
- def _scraper_cache_key(user_prompt: str, store: str, gender: str, target_category: str) -> str:
99
- """Create deterministic cache key for scraper queries"""
100
- import hashlib
101
- key_str = f"{user_prompt.lower().strip()}|{store}|{gender}|{target_category}"
102
- return hashlib.md5(key_str.encode()).hexdigest()
103
-
104
-
105
  def _normalize_cache_category(value: Any) -> str:
106
  category = _norm(value)
107
  if category in {"topwear", "bottomwear", "others"}:
@@ -3118,7 +3107,7 @@ def product_urls(payload: dict[str, Any] = Body(default_factory=dict)) -> dict[s
3118
 
3119
  @app.post("/suggestions")
3120
  @app.post("/api/suggestions")
3121
- def suggestions(payload: dict[str, Any] = Body(default_factory=dict)) -> dict[str, Any]:
3122
  occasion = str(payload.get("occasion") or "casual")
3123
  target_category = str(payload.get("target_category") or payload.get("targetCategory") or "both")
3124
  gender_preference = str(payload.get("gender_preference") or payload.get("genderPreference") or "any")
@@ -3142,99 +3131,12 @@ def suggestions(payload: dict[str, Any] = Body(default_factory=dict)) -> dict[st
3142
  raise HTTPException(status_code=502, detail=str(exc)) from exc
3143
  except NvidiaPayloadError as exc:
3144
  raise HTTPException(status_code=502, detail=str(exc)) from exc
3145
- except requests.RequestException as exc:
3146
- raise HTTPException(status_code=502, detail=f"Failed to fetch {store.title()} pages: {exc}") from exc
3147
-
3148
-
3149
- def _build_static_scraper_result(
3150
- static_plan: dict[str, Any],
3151
- *,
3152
- occasion: str,
3153
- gender: str,
3154
- preferences: str,
3155
- store: str,
3156
- max_products: int | None,
3157
- ) -> dict[str, Any]:
3158
- query = str(static_plan.get("query") or "").strip()
3159
- color = str(static_plan.get("color") or "").strip()
3160
- category = str(static_plan.get("category") or "").strip()
3161
- if not query:
3162
- query = " ".join(part for part in [gender, color, category] if str(part or "").strip()).strip()
3163
- if not query:
3164
- raise HTTPException(status_code=400, detail="static_query_plan.query is required")
3165
-
3166
- plan_occasion = str(static_plan.get("occasion") or occasion or "casual").strip() or "casual"
3167
- plan_gender = _normalize_scraper_gender(gender) or _normalize_scraper_gender(static_plan.get("gender")) or None
3168
- limit = max_products if isinstance(max_products, int) and max_products > 0 else 12
3169
- search_urls = _build_store_search_urls_from_query(query, store=store, gender=plan_gender)
3170
-
3171
- products: list[dict[str, Any]] = []
3172
- seen_links: set[str] = set()
3173
- errors: list[str] = []
3174
- for search_url in search_urls:
3175
- try:
3176
- for product in _extract_store_product_summaries(search_url, store=store):
3177
- item_link = str(product.get("item_link") or "").strip()
3178
- if not item_link or item_link in seen_links:
3179
- continue
3180
- seen_links.add(item_link)
3181
- products.append(product)
3182
- if len(products) >= limit:
3183
- break
3184
- except requests.RequestException as exc:
3185
- errors.append(str(exc))
3186
- if len(products) >= limit:
3187
- break
3188
-
3189
- query_plan_payload = {
3190
- "target_category": static_plan.get("target_category") or _normalize_target_category(static_plan.get("targetCategory")),
3191
- "color": color or "neutral",
3192
- "category": category or "mixed",
3193
- "gender": plan_gender,
3194
- "style_direction": str(static_plan.get("style_direction") or "direct-static").strip() or "direct-static",
3195
- "occasion_bucket": _occasion_bucket(plan_occasion),
3196
- "reference_item_ids": [],
3197
- "query": query,
3198
- "final_query": query,
3199
- "wardrobe_grounding": "Static example query selected from the shopping suggestions page.",
3200
- "reason": "Used a predefined query plan and URL builder without model planning.",
3201
- "source": "static",
3202
- }
3203
-
3204
- response_payload: dict[str, Any] = {
3205
- "runtime_id": str(uuid.uuid4()),
3206
- "created_at": _now_iso(),
3207
- "store": store,
3208
- "occasion": plan_occasion,
3209
- "gender": plan_gender or gender or "",
3210
- "preferences": preferences,
3211
- "wardrobe_snapshot": _wardrobe_metadata_snapshot(limit=12),
3212
- "query_plan": query_plan_payload,
3213
- "search_urls": search_urls,
3214
- "product_urls": [item["item_link"] for item in products if item.get("item_link")],
3215
- "products": products,
3216
- "count": len(products),
3217
- "intermediate_steps": [
3218
- {
3219
- "step": "static_query_plan",
3220
- "query": query,
3221
- "url_count": len(search_urls),
3222
- "new_products": len(products),
3223
- "total_products": len(products),
3224
- "errors": errors,
3225
- "message": "Predefined webpage query used; model planner skipped.",
3226
- }
3227
- ],
3228
- "plan_source": "static",
3229
- "plan_error": None,
3230
- "scrape_error": "; ".join(errors) if errors and not products else None,
3231
- }
3232
- response_payload["saved_json_path"] = _save_scraper_json_payload("product_urls", response_payload)
3233
- return _store_scraper_runtime_result(response_payload)
3234
-
3235
-
3236
- @app.post("/scraper/recommend")
3237
- def scraper_recommend(payload: dict[str, Any] = Body(default_factory=dict)) -> dict[str, Any]:
3238
  user_prompt = str(payload.get("user_prompt") or payload.get("prompt") or "").strip()
3239
  inferred = _infer_structured_request_from_prompt(user_prompt)
3240
  inferred_target_category = _normalize_target_category(inferred.get("target_category"))
@@ -3281,41 +3183,11 @@ def scraper_recommend(payload: dict[str, Any] = Body(default_factory=dict)) -> d
3281
  max_products = int(max_products_raw) if max_products_raw not in {None, ""} else None
3282
  store = _normalize_store_name(str(payload.get("store") or SCRAPER_DEFAULT_STORE or "nike"))
3283
 
3284
- if isinstance(max_products, int) and max_products < 1:
3285
- raise HTTPException(status_code=400, detail="max_products must be at least 1")
3286
-
3287
- static_plan = payload.get("static_query_plan") or payload.get("staticQueryPlan")
3288
- if isinstance(static_plan, dict):
3289
- try:
3290
- return _build_static_scraper_result(
3291
- static_plan,
3292
- occasion=occasion,
3293
- gender=gender,
3294
- preferences=preferences,
3295
- store=store,
3296
- max_products=max_products,
3297
- )
3298
- except requests.RequestException as exc:
3299
- raise HTTPException(status_code=502, detail=f"Failed to fetch {store.title()} pages: {exc}") from exc
3300
-
3301
- # Check cache first for faster repeat queries
3302
- cache_key = _scraper_cache_key(user_prompt, store, gender, target_category)
3303
- with SCRAPER_QUERY_CACHE_LOCK:
3304
- if cache_key in SCRAPER_QUERY_CACHE:
3305
- cached_result, cached_timestamp = SCRAPER_QUERY_CACHE[cache_key]
3306
- if time.time() - cached_timestamp < SCRAPER_QUERY_CACHE_TTL_SECONDS:
3307
- print(f"[CACHE HIT] Returning cached scraper results for: {user_prompt[:50]}...")
3308
- return cached_result
3309
- # Clean up expired cache entries
3310
- expired_keys = [
3311
- k for k, (_, ts) in SCRAPER_QUERY_CACHE.items()
3312
- if time.time() - ts >= SCRAPER_QUERY_CACHE_TTL_SECONDS
3313
- ]
3314
- for k in expired_keys:
3315
- del SCRAPER_QUERY_CACHE[k]
3316
 
3317
  try:
3318
- result = _generate_scraper_plan_with_kimi(
3319
  occasion=occasion,
3320
  gender=gender,
3321
  preferences=preferences,
@@ -3324,12 +3196,8 @@ def scraper_recommend(payload: dict[str, Any] = Body(default_factory=dict)) -> d
3324
  filters=filters,
3325
  max_products=max_products,
3326
  store=store,
3327
- strict_kimi=False,
3328
  )
3329
- # Cache the result
3330
- with SCRAPER_QUERY_CACHE_LOCK:
3331
- SCRAPER_QUERY_CACHE[cache_key] = (result, time.time())
3332
- return result
3333
  except NvidiaGatewayError as exc:
3334
  raise HTTPException(status_code=502, detail=str(exc)) from exc
3335
  except NvidiaPayloadError as exc:
@@ -3417,7 +3285,7 @@ def scraper_page() -> Response:
3417
  </div>
3418
  </div>
3419
  <label for="preferences">Other Preferences</label>
3420
- <textarea id="preferences" placeholder="Example: Category: shirt. Color: navy. Occasion: formal office. Style: structured minimal. Avoid: oversized."></textarea>
3421
  <button id="runBtn">Generate Kimi Query and Scrape</button>
3422
  <div id="status" class="status"></div>
3423
  </div>
@@ -3839,86 +3707,6 @@ def ai_recommend_outfits(payload: dict[str, Any] = Body(default_factory=dict)) -
3839
  bottoms=bottoms,
3840
  others=priority_other_candidates,
3841
  ))
3842
-
3843
-
3844
- @app.post("/ai/classify-item")
3845
- def ai_classify_item(payload: dict[str, Any] = Body(default_factory=dict)) -> dict[str, Any]:
3846
- """
3847
- Classify a fashion item using NVIDIA model (primary) with HuggingFace fallback.
3848
-
3849
- Args:
3850
- item: Wardrobe item dict with metadata and/or image_url
3851
-
3852
- Returns:
3853
- Classification result with category, confidence, and attributes
3854
- """
3855
- try:
3856
- item = payload.get("item")
3857
- if not isinstance(item, dict):
3858
- raise HTTPException(status_code=400, detail="'item' must be a dictionary")
3859
-
3860
- service = get_recommendation_service()
3861
- result = service.classify_item(item)
3862
-
3863
- return {
3864
- "success": True,
3865
- "classification": result,
3866
- "model_backend": result.get("backend", "unknown"),
3867
- }
3868
- except HTTPException:
3869
- raise
3870
- except Exception as e:
3871
- print(f"[classify-item] Error: {e}")
3872
- _raise_http_error(e)
3873
-
3874
-
3875
- @app.post("/ai/match-items")
3876
- def ai_match_items(payload: dict[str, Any] = Body(default_factory=dict)) -> dict[str, Any]:
3877
- """
3878
- Determine if two fashion items match well together.
3879
-
3880
- Uses NVIDIA model as primary with HuggingFace as fallback.
3881
-
3882
- Args:
3883
- item1: First wardrobe item dict
3884
- item2: Second wardrobe item dict
3885
- match_threshold: Confidence threshold (0-1), default 0.5
3886
-
3887
- Returns:
3888
- Match result with compatibility scores and reason
3889
- """
3890
- try:
3891
- item1 = payload.get("item1")
3892
- item2 = payload.get("item2")
3893
- match_threshold = float(payload.get("match_threshold", 0.5))
3894
-
3895
- if not isinstance(item1, dict):
3896
- raise HTTPException(status_code=400, detail="'item1' must be a dictionary")
3897
- if not isinstance(item2, dict):
3898
- raise HTTPException(status_code=400, detail="'item2' must be a dictionary")
3899
-
3900
- if match_threshold < 0 or match_threshold > 1:
3901
- raise HTTPException(status_code=400, detail="'match_threshold' must be between 0 and 1")
3902
-
3903
- service = get_recommendation_service()
3904
- result = service.match_items(item1, item2, match_threshold)
3905
-
3906
- return {
3907
- "success": True,
3908
- "item1_id": item1.get("id", "unknown"),
3909
- "item2_id": item2.get("id", "unknown"),
3910
- "match": result.get("match", False),
3911
- "match_score": result.get("score", 0.0),
3912
- "reason": result.get("reason", ""),
3913
- "compatibility_breakdown": result.get("compatibility", {}),
3914
- }
3915
- except HTTPException:
3916
- raise
3917
- except Exception as e:
3918
- print(f"[match-items] Error: {e}")
3919
- _raise_http_error(e)
3920
-
3921
-
3922
  @app.get("/image-proxy")
3923
  def image_proxy(url: str = Query(..., description="Remote image URL")) -> Response:
3924
  parsed = urlparse(url)
 
86
  MATCHING_RESULT_CACHE_MAX = int(os.getenv("MATCHING_RESULT_CACHE_MAX", "500"))
87
  MATCHING_RESULT_CACHE_TTL_SECONDS = int(os.getenv("MATCHING_RESULT_CACHE_TTL_SECONDS", "86400"))
88
 
 
 
 
 
89
 
90
  def _matching_cache_storage_key(key: str) -> str:
91
  return f"matching:{key}"
92
 
93
 
 
 
 
 
 
 
 
94
  def _normalize_cache_category(value: Any) -> str:
95
  category = _norm(value)
96
  if category in {"topwear", "bottomwear", "others"}:
 
3107
 
3108
  @app.post("/suggestions")
3109
  @app.post("/api/suggestions")
3110
+ def suggestions(payload: dict[str, Any] = Body(default_factory=dict)) -> dict[str, Any]:
3111
  occasion = str(payload.get("occasion") or "casual")
3112
  target_category = str(payload.get("target_category") or payload.get("targetCategory") or "both")
3113
  gender_preference = str(payload.get("gender_preference") or payload.get("genderPreference") or "any")
 
3131
  raise HTTPException(status_code=502, detail=str(exc)) from exc
3132
  except NvidiaPayloadError as exc:
3133
  raise HTTPException(status_code=502, detail=str(exc)) from exc
3134
+ except requests.RequestException as exc:
3135
+ raise HTTPException(status_code=502, detail=f"Failed to fetch {store.title()} pages: {exc}") from exc
3136
+
3137
+
3138
+ @app.post("/scraper/recommend")
3139
+ def scraper_recommend(payload: dict[str, Any] = Body(default_factory=dict)) -> dict[str, Any]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3140
  user_prompt = str(payload.get("user_prompt") or payload.get("prompt") or "").strip()
3141
  inferred = _infer_structured_request_from_prompt(user_prompt)
3142
  inferred_target_category = _normalize_target_category(inferred.get("target_category"))
 
3183
  max_products = int(max_products_raw) if max_products_raw not in {None, ""} else None
3184
  store = _normalize_store_name(str(payload.get("store") or SCRAPER_DEFAULT_STORE or "nike"))
3185
 
3186
+ if isinstance(max_products, int) and max_products < 1:
3187
+ raise HTTPException(status_code=400, detail="max_products must be at least 1")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3188
 
3189
  try:
3190
+ return _generate_scraper_plan_with_kimi(
3191
  occasion=occasion,
3192
  gender=gender,
3193
  preferences=preferences,
 
3196
  filters=filters,
3197
  max_products=max_products,
3198
  store=store,
3199
+ strict_kimi=True,
3200
  )
 
 
 
 
3201
  except NvidiaGatewayError as exc:
3202
  raise HTTPException(status_code=502, detail=str(exc)) from exc
3203
  except NvidiaPayloadError as exc:
 
3285
  </div>
3286
  </div>
3287
  <label for="preferences">Other Preferences</label>
3288
+ <textarea id="preferences" placeholder="Example: formal office look, breathable fabric, neutral tones, regular fit, avoid oversized silhouettes"></textarea>
3289
  <button id="runBtn">Generate Kimi Query and Scrape</button>
3290
  <div id="status" class="status"></div>
3291
  </div>
 
3707
  bottoms=bottoms,
3708
  others=priority_other_candidates,
3709
  ))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3710
  @app.get("/image-proxy")
3711
  def image_proxy(url: str = Query(..., description="Remote image URL")) -> Response:
3712
  parsed = urlparse(url)