Daniel Varga commited on
Commit
2c91769
·
1 Parent(s): d1fe6b0

taking files from the 02_LOCATION_PHOTOS.thumbs folder instead of non-thumbnailed PhotoLibrary.

Browse files
Files changed (3) hide show
  1. app.ini +2 -2
  2. app.py +9 -2
  3. readme.sh +23 -0
app.ini CHANGED
@@ -1,4 +1,4 @@
1
  [DEFAULT]
2
- # pkl = 02_LOCATION_PHOTOS.deduped.f16.pkl
3
- pkl = PhotoLibrary.854G.deduped.f16.pkl
4
  url = https://static.renyi.hu/ai-shared/daniel/sameenergy/
 
1
  [DEFAULT]
2
+ pkl = 02_LOCATION_PHOTOS.deduped.f16.pkl
3
+ # pkl = PhotoLibrary.854G.deduped.f16.pkl
4
  url = https://static.renyi.hu/ai-shared/daniel/sameenergy/
app.py CHANGED
@@ -68,10 +68,15 @@ def build_ann_index(embeddings):
68
  return annoy_index
69
 
70
 
71
- annoy_index = build_ann_index(embeddings)
 
 
 
 
72
 
73
 
74
- filenames = data["filenames"]
 
75
 
76
  folders = ["/".join(filename.split("/")[:-1]) for filename in filenames]
77
  # to make smart indexing possible:
@@ -81,6 +86,8 @@ urls = [base_url + filename for filename in filenames]
81
  urls = np.array(urls)
82
 
83
 
 
 
84
  model, preprocess = clip.load('RN50', device=device)
85
 
86
 
 
68
  return annoy_index
69
 
70
 
71
+ filenames = data["filenames"]
72
+ def thumb_patch(filename):
73
+ prefix = "02_LOCATION_PHOTOS"
74
+ assert filename.startswith(prefix)
75
+ return prefix + ".thumbs" + filename[len(prefix): ]
76
 
77
 
78
+ print("patching filenames")
79
+ filenames = [thumb_patch(filename) for filename in filenames]
80
 
81
  folders = ["/".join(filename.split("/")[:-1]) for filename in filenames]
82
  # to make smart indexing possible:
 
86
  urls = np.array(urls)
87
 
88
 
89
+ annoy_index = build_ann_index(embeddings)
90
+
91
  model, preprocess = clip.load('RN50', device=device)
92
 
93
 
readme.sh CHANGED
@@ -95,3 +95,26 @@ wc -l 02_and_PhotoLibrary.854G.deduped_md5sums PhotoLibrary.854G.deduped_md5sums
95
  # doing the complete would need a re-hash, is not worth the hassle either. staying with PhotoLibrary.854G.deduped_md5sums
96
 
97
  # TODO I don't think lftp has finished successfully, because the Tünde folder has never arrived.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
  # doing the complete would need a re-hash, is not worth the hassle either. staying with PhotoLibrary.854G.deduped_md5sums
96
 
97
  # TODO I don't think lftp has finished successfully, because the Tünde folder has never arrived.
98
+
99
+
100
+
101
+ #####
102
+ # thumbnailing
103
+
104
+ # on hexagon
105
+ cd ~/ai-shared/daniel/sameenergy
106
+ nohup cp -r 02_LOCATION_PHOTOS 02_LOCATION_PHOTOS.thumbs &
107
+ nohup cp -r PhotoLibrary PhotoLibrary.thumbs &
108
+ # -> this is slooooow, a day or so.
109
+
110
+ # the following code, located at hexagon:~/ai-shared/daniel/sameenergy/downscale.sh ,
111
+ # downscales so that the image fits into 1024x1024
112
+ find $root -type f | grep -i "jpeg\|jpg$" | while read f ; do echo "$f" ; convert "$f" -resize "1024x1024>" "$f" ; done
113
+
114
+ # it was run like this, setting root=02_LOCATION_PHOTOS.thumbs
115
+ nohup bash downscale.sh > 02_LOCATION_PHOTOS.downscale.cout 2> 02_LOCATION_PHOTOS.downscale.cerr &
116
+ # -> took a night or so.
117
+ nohup bash downscale.sh > PhotoLibrary.downscale.cout 2> PhotoLibrary.downscale.cerr &
118
+ # -> will take 4 days or so
119
+
120
+ # added to app.py to patch the filenames in the pickle to change 02_LOCATION_PHOTOS to 02_LOCATION_PHOTOS.thumbs