Spaces:
Sleeping
Sleeping
Daniel Varga
commited on
Commit
·
2c91769
1
Parent(s):
d1fe6b0
taking files from the 02_LOCATION_PHOTOS.thumbs folder instead of non-thumbnailed PhotoLibrary.
Browse files
app.ini
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
[DEFAULT]
|
| 2 |
-
|
| 3 |
-
pkl = PhotoLibrary.854G.deduped.f16.pkl
|
| 4 |
url = https://static.renyi.hu/ai-shared/daniel/sameenergy/
|
|
|
|
| 1 |
[DEFAULT]
|
| 2 |
+
pkl = 02_LOCATION_PHOTOS.deduped.f16.pkl
|
| 3 |
+
# pkl = PhotoLibrary.854G.deduped.f16.pkl
|
| 4 |
url = https://static.renyi.hu/ai-shared/daniel/sameenergy/
|
app.py
CHANGED
|
@@ -68,10 +68,15 @@ def build_ann_index(embeddings):
|
|
| 68 |
return annoy_index
|
| 69 |
|
| 70 |
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
-
|
|
|
|
| 75 |
|
| 76 |
folders = ["/".join(filename.split("/")[:-1]) for filename in filenames]
|
| 77 |
# to make smart indexing possible:
|
|
@@ -81,6 +86,8 @@ urls = [base_url + filename for filename in filenames]
|
|
| 81 |
urls = np.array(urls)
|
| 82 |
|
| 83 |
|
|
|
|
|
|
|
| 84 |
model, preprocess = clip.load('RN50', device=device)
|
| 85 |
|
| 86 |
|
|
|
|
| 68 |
return annoy_index
|
| 69 |
|
| 70 |
|
| 71 |
+
filenames = data["filenames"]
|
| 72 |
+
def thumb_patch(filename):
|
| 73 |
+
prefix = "02_LOCATION_PHOTOS"
|
| 74 |
+
assert filename.startswith(prefix)
|
| 75 |
+
return prefix + ".thumbs" + filename[len(prefix): ]
|
| 76 |
|
| 77 |
|
| 78 |
+
print("patching filenames")
|
| 79 |
+
filenames = [thumb_patch(filename) for filename in filenames]
|
| 80 |
|
| 81 |
folders = ["/".join(filename.split("/")[:-1]) for filename in filenames]
|
| 82 |
# to make smart indexing possible:
|
|
|
|
| 86 |
urls = np.array(urls)
|
| 87 |
|
| 88 |
|
| 89 |
+
annoy_index = build_ann_index(embeddings)
|
| 90 |
+
|
| 91 |
model, preprocess = clip.load('RN50', device=device)
|
| 92 |
|
| 93 |
|
readme.sh
CHANGED
|
@@ -95,3 +95,26 @@ wc -l 02_and_PhotoLibrary.854G.deduped_md5sums PhotoLibrary.854G.deduped_md5sums
|
|
| 95 |
# doing the complete would need a re-hash, is not worth the hassle either. staying with PhotoLibrary.854G.deduped_md5sums
|
| 96 |
|
| 97 |
# TODO I don't think lftp has finished successfully, because the Tünde folder has never arrived.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
# doing the complete would need a re-hash, is not worth the hassle either. staying with PhotoLibrary.854G.deduped_md5sums
|
| 96 |
|
| 97 |
# TODO I don't think lftp has finished successfully, because the Tünde folder has never arrived.
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
#####
|
| 102 |
+
# thumbnailing
|
| 103 |
+
|
| 104 |
+
# on hexagon
|
| 105 |
+
cd ~/ai-shared/daniel/sameenergy
|
| 106 |
+
nohup cp -r 02_LOCATION_PHOTOS 02_LOCATION_PHOTOS.thumbs &
|
| 107 |
+
nohup cp -r PhotoLibrary PhotoLibrary.thumbs &
|
| 108 |
+
# -> this is slooooow, a day or so.
|
| 109 |
+
|
| 110 |
+
# the following code, located at hexagon:~/ai-shared/daniel/sameenergy/downscale.sh ,
|
| 111 |
+
# downscales so that the image fits into 1024x1024
|
| 112 |
+
find $root -type f | grep -i "jpeg\|jpg$" | while read f ; do echo "$f" ; convert "$f" -resize "1024x1024>" "$f" ; done
|
| 113 |
+
|
| 114 |
+
# it was run like this, setting root=02_LOCATION_PHOTOS.thumbs
|
| 115 |
+
nohup bash downscale.sh > 02_LOCATION_PHOTOS.downscale.cout 2> 02_LOCATION_PHOTOS.downscale.cerr &
|
| 116 |
+
# -> took a night or so.
|
| 117 |
+
nohup bash downscale.sh > PhotoLibrary.downscale.cout 2> PhotoLibrary.downscale.cerr &
|
| 118 |
+
# -> will take 4 days or so
|
| 119 |
+
|
| 120 |
+
# added to app.py to patch the filenames in the pickle to change 02_LOCATION_PHOTOS to 02_LOCATION_PHOTOS.thumbs
|