bobs24 commited on
Commit
7eec21e
·
verified ·
1 Parent(s): cd90af7

Upload 5 files

Browse files
Files changed (5) hide show
  1. .gitattributes +1 -35
  2. .gitignore +8 -0
  3. README.md +0 -13
  4. precompute_embeddings.py +31 -0
  5. requirements.txt +13 -0
.gitattributes CHANGED
@@ -1,35 +1 @@
1
- *.7z filter=lfs diff=lfs merge=lfs -text
2
- *.arrow filter=lfs diff=lfs merge=lfs -text
3
- *.bin filter=lfs diff=lfs merge=lfs -text
4
- *.bz2 filter=lfs diff=lfs merge=lfs -text
5
- *.ckpt filter=lfs diff=lfs merge=lfs -text
6
- *.ftz filter=lfs diff=lfs merge=lfs -text
7
- *.gz filter=lfs diff=lfs merge=lfs -text
8
- *.h5 filter=lfs diff=lfs merge=lfs -text
9
- *.joblib filter=lfs diff=lfs merge=lfs -text
10
- *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
- *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
- *.model filter=lfs diff=lfs merge=lfs -text
13
- *.msgpack filter=lfs diff=lfs merge=lfs -text
14
- *.npy filter=lfs diff=lfs merge=lfs -text
15
- *.npz filter=lfs diff=lfs merge=lfs -text
16
- *.onnx filter=lfs diff=lfs merge=lfs -text
17
- *.ot filter=lfs diff=lfs merge=lfs -text
18
- *.parquet filter=lfs diff=lfs merge=lfs -text
19
- *.pb filter=lfs diff=lfs merge=lfs -text
20
- *.pickle filter=lfs diff=lfs merge=lfs -text
21
- *.pkl filter=lfs diff=lfs merge=lfs -text
22
- *.pt filter=lfs diff=lfs merge=lfs -text
23
- *.pth filter=lfs diff=lfs merge=lfs -text
24
- *.rar filter=lfs diff=lfs merge=lfs -text
25
- *.safetensors filter=lfs diff=lfs merge=lfs -text
26
- saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
- *.tar.* filter=lfs diff=lfs merge=lfs -text
28
- *.tar filter=lfs diff=lfs merge=lfs -text
29
- *.tflite filter=lfs diff=lfs merge=lfs -text
30
- *.tgz filter=lfs diff=lfs merge=lfs -text
31
- *.wasm filter=lfs diff=lfs merge=lfs -text
32
- *.xz filter=lfs diff=lfs merge=lfs -text
33
- *.zip filter=lfs diff=lfs merge=lfs -text
34
- *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
1
+ data/product_data.csv filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
.gitignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ env/
2
+ __pycache__/
3
+ *.pyc
4
+ *.pyo
5
+ *.pyd
6
+ *.DS_Store
7
+ data/embeddings.npy
8
+ data/image_urls.pkl
README.md CHANGED
@@ -1,13 +0,0 @@
1
- ---
2
- title: Product Recommendation
3
- emoji: 📉
4
- colorFrom: blue
5
- colorTo: pink
6
- sdk: gradio
7
- sdk_version: 5.34.2
8
- app_file: app.py
9
- pinned: false
10
- short_description: Transformer model with vector embedding and faiss index
11
- ---
12
-
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
precompute_embeddings.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ import numpy as np
3
+ import pickle
4
+ from tqdm import tqdm
5
+ from model.feature_extractor import FeatureExtractor
6
+ from utils.image_utils import load_image_from_url
7
+
8
+ def main():
9
+ df = pd.read_csv("data/product_data.csv")
10
+ fe = FeatureExtractor()
11
+ embeddings = []
12
+ valid_urls = []
13
+
14
+ # tqdm wraps the iterable and shows progress bar with description
15
+ for url in tqdm(df['IMAGE_URL'], desc="Extracting embeddings"):
16
+ img = load_image_from_url(url)
17
+ if img is not None:
18
+ emb = fe.extract(img)
19
+ embeddings.append(emb)
20
+ valid_urls.append(url)
21
+
22
+ embeddings = np.array(embeddings)
23
+ np.save("data/embeddings.npy", embeddings)
24
+
25
+ with open("data/image_urls.pkl", "wb") as f:
26
+ pickle.dump(valid_urls, f)
27
+
28
+ print(f"Saved {len(valid_urls)} embeddings and URLs.")
29
+
30
+ if __name__ == "__main__":
31
+ main()
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ torch
3
+ torchvision
4
+ timm
5
+ faiss-cpu
6
+ pandas
7
+ Pillow
8
+ requests
9
+ tqdm
10
+ numpy
11
+ fastapi
12
+ uvicorn
13
+ openpyxl