File size: 884 Bytes
68f893f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import pandas as pd
import numpy as np
import pickle
from tqdm import tqdm
from model.feature_extractor import FeatureExtractor
from utils.image_utils import load_image_from_url

def main():
    df = pd.read_csv("data/product_data.csv")
    fe = FeatureExtractor()
    embeddings = []
    valid_urls = []

    # tqdm wraps the iterable and shows progress bar with description
    for url in tqdm(df['IMAGE_URL'], desc="Extracting embeddings"):
        img = load_image_from_url(url)
        if img is not None:
            emb = fe.extract(img)
            embeddings.append(emb)
            valid_urls.append(url)

    embeddings = np.array(embeddings)
    np.save("data/embeddings.npy", embeddings)

    with open("data/image_urls.pkl", "wb") as f:
        pickle.dump(valid_urls, f)

    print(f"Saved {len(valid_urls)} embeddings and URLs.")

if __name__ == "__main__":
    main()