Ali Mohsin commited on
Commit
c2644dc
·
1 Parent(s): 55c158e
Files changed (2) hide show
  1. scripts/prepare_polyvore.py +13 -0
  2. utils/data_fetch.py +19 -3
scripts/prepare_polyvore.py CHANGED
@@ -84,6 +84,9 @@ def load_outfits_json(root: str, split: str) -> List[Dict[str, Any]]:
84
  os.path.join(root, f"{split}_no_dup.json"),
85
  os.path.join(root, "splits", f"{split}.json"),
86
  os.path.join(root, "splits", f"{split}_no_dup.json"),
 
 
 
87
  ]
88
  for p in candidates:
89
  if os.path.exists(p):
@@ -114,6 +117,16 @@ def try_load_any_outfits(root: str) -> List[Dict[str, Any]]:
114
  data = _normalize_outfits(raw)
115
  if data:
116
  return data
 
 
 
 
 
 
 
 
 
 
117
  return []
118
 
119
 
 
84
  os.path.join(root, f"{split}_no_dup.json"),
85
  os.path.join(root, "splits", f"{split}.json"),
86
  os.path.join(root, "splits", f"{split}_no_dup.json"),
87
+ # Official Polyvore often ships splits under nondisjoint/ or disjoint/
88
+ os.path.join(root, "nondisjoint", f"{split}.json"),
89
+ os.path.join(root, "disjoint", f"{split}.json"),
90
  ]
91
  for p in candidates:
92
  if os.path.exists(p):
 
117
  data = _normalize_outfits(raw)
118
  if data:
119
  return data
120
+ # Last resort: check nondisjoint/disjoint JSONs directly
121
+ for sub in ("nondisjoint", "disjoint"):
122
+ for name in ("train.json", "valid.json", "test.json"):
123
+ p = os.path.join(root, sub, name)
124
+ if os.path.exists(p):
125
+ with open(p, "r") as f:
126
+ raw = json.load(f)
127
+ data = _normalize_outfits(raw)
128
+ if data:
129
+ return data
130
  return []
131
 
132
 
utils/data_fetch.py CHANGED
@@ -44,9 +44,18 @@ def ensure_dataset_ready() -> Optional[str]:
44
  # Only fetch what's needed to run and prepare splits
45
  allow = [
46
  "images.zip",
 
47
  "train.json",
48
  "valid.json",
49
  "test.json",
 
 
 
 
 
 
 
 
50
  "polyvore_item_metadata.json",
51
  "polyvore_outfit_titles.json",
52
  "categories.csv",
@@ -61,9 +70,16 @@ def ensure_dataset_ready() -> Optional[str]:
61
  "**/*.tar.gz",
62
  "**/*.7z",
63
  ]
64
- need_meta = not all(os.path.exists(os.path.join(root, f)) for f in [
65
- "train.json", "valid.json", "test.json", "categories.csv"
66
- ])
 
 
 
 
 
 
 
67
  if need_meta or not os.path.isdir(os.path.join(root, "images")):
68
  snapshot_download(
69
  "Stylique/Polyvore",
 
44
  # Only fetch what's needed to run and prepare splits
45
  allow = [
46
  "images.zip",
47
+ # root-level (some mirrors place jsons here)
48
  "train.json",
49
  "valid.json",
50
  "test.json",
51
+ # official splits often live here
52
+ "nondisjoint/train.json",
53
+ "nondisjoint/valid.json",
54
+ "nondisjoint/test.json",
55
+ "disjoint/train.json",
56
+ "disjoint/valid.json",
57
+ "disjoint/test.json",
58
+ # light metadata
59
  "polyvore_item_metadata.json",
60
  "polyvore_outfit_titles.json",
61
  "categories.csv",
 
70
  "**/*.tar.gz",
71
  "**/*.7z",
72
  ]
73
+ need_meta = not (
74
+ all(os.path.exists(os.path.join(root, f)) for f in [
75
+ "categories.csv",
76
+ ]) and (
77
+ # any location providing official splits is acceptable
78
+ all(os.path.exists(os.path.join(root, f)) for f in ["train.json", "valid.json", "test.json"]) or
79
+ all(os.path.exists(os.path.join(root, "nondisjoint", f)) for f in ["train.json", "valid.json", "test.json"]) or
80
+ all(os.path.exists(os.path.join(root, "disjoint", f)) for f in ["train.json", "valid.json", "test.json"])
81
+ )
82
+ )
83
  if need_meta or not os.path.isdir(os.path.join(root, "images")):
84
  snapshot_download(
85
  "Stylique/Polyvore",