Anisha Bhatnagar
commited on
Commit
·
0ce5cd2
1
Parent(s):
3ad08b5
download logic
Browse files- app.py +5 -0
- config/config.yaml +11 -0
- utils/file_download.py +23 -7
app.py
CHANGED
|
@@ -29,6 +29,11 @@ cfg = load_config()
|
|
| 29 |
download_file_override(cfg.get('interp_space_url'), cfg.get('interp_space_path'))
|
| 30 |
download_file_override(cfg.get('instances_to_explain_url'), cfg.get('instances_to_explain_path'))
|
| 31 |
download_file_override(cfg.get('gram2vec_feats_url'), cfg.get('gram2vec_feats_path'))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
from utils.visualizations import *
|
| 34 |
from utils.llm_feat_utils import *
|
|
|
|
| 29 |
download_file_override(cfg.get('interp_space_url'), cfg.get('interp_space_path'))
|
| 30 |
download_file_override(cfg.get('instances_to_explain_url'), cfg.get('instances_to_explain_path'))
|
| 31 |
download_file_override(cfg.get('gram2vec_feats_url'), cfg.get('gram2vec_feats_path'))
|
| 32 |
+
download_file_override(cfg.get('embeddings_cache_url'), cfg.get('embeddings_cache_path'))
|
| 33 |
+
download_file_override(cfg.get('zoom_cache_url'), cfg.get('zoom_cache_path'))
|
| 34 |
+
download_file_override(cfg.get('region_cache_url'), cfg.get('region_cache_path'))
|
| 35 |
+
download_file_override(cfg.get('tsne_cache_url'), cfg.get('tsne_cache_path'))
|
| 36 |
+
download_file_override(cfg.get('llm_style_features_cache_url'), cfg.get('llm_style_features_cache_path'))
|
| 37 |
|
| 38 |
from utils.visualizations import *
|
| 39 |
from utils.llm_feat_utils import *
|
config/config.yaml
CHANGED
|
@@ -6,6 +6,17 @@ interp_space_url: "https://huggingface.co/datasets/miladalsh/explanation_tool
|
|
| 6 |
gram2vec_feats_path: "./datasets/gram2vec_feats.csv"
|
| 7 |
gram2vec_feats_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/gram2vec_feats.csv?download=true"
|
| 8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
style_feat_clm: "llm_tfidf_weights"
|
| 10 |
top_k: 10
|
| 11 |
only_llm_feats: false
|
|
|
|
| 6 |
gram2vec_feats_path: "./datasets/gram2vec_feats.csv"
|
| 7 |
gram2vec_feats_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/gram2vec_feats.csv?download=true"
|
| 8 |
|
| 9 |
+
embeddings_cache_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/embeddings_cache.zip?download=true"
|
| 10 |
+
embeddings_cache_path: "./datasets/embeddings_cache/"
|
| 11 |
+
zoom_cache_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/zoom_cache.zip?download=true"
|
| 12 |
+
zoom_cache_path: "./datasets/zoom_cache/"
|
| 13 |
+
region_cache_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/region_cache.zip?download=true"
|
| 14 |
+
region_cache_path: "./datasets/region_cache/"
|
| 15 |
+
tsne_cache_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/tsne_cache.pkl?download=true"
|
| 16 |
+
tsne_cache_path: "./datasets/tsne_cache.pkl"
|
| 17 |
+
llm_style_features_cache_url: "https://huggingface.co/datasets/miladalsh/explanation_tool_files/resolve/main/feature_spans_cache.zip?download=true"
|
| 18 |
+
llm_style_features_cache_path: "./datasets/feature_spans_cache/"
|
| 19 |
+
|
| 20 |
style_feat_clm: "llm_tfidf_weights"
|
| 21 |
top_k: 10
|
| 22 |
only_llm_feats: false
|
utils/file_download.py
CHANGED
|
@@ -46,14 +46,30 @@ def download_file_override(url: str, dest_path: str):
|
|
| 46 |
with zipfile.ZipFile(tmp_path, 'r') as z:
|
| 47 |
z.extractall(tmp_extract_dir)
|
| 48 |
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
else:
|
| 56 |
-
|
|
|
|
|
|
|
| 57 |
|
| 58 |
print(f"Extracted zip contents into '{dest_path}'.")
|
| 59 |
else:
|
|
|
|
| 46 |
with zipfile.ZipFile(tmp_path, 'r') as z:
|
| 47 |
z.extractall(tmp_extract_dir)
|
| 48 |
|
| 49 |
+
if "cache" not in dest_path:
|
| 50 |
+
# Move *contents* of extracted folder into dest_path
|
| 51 |
+
# cache folders have a different structure, so we skip this step for them
|
| 52 |
+
for item in os.listdir(tmp_extract_dir):
|
| 53 |
+
src = os.path.join(tmp_extract_dir, item)
|
| 54 |
+
dst = os.path.join(dest_path, item)
|
| 55 |
+
if os.path.isdir(src):
|
| 56 |
+
shutil.move(src, dst)
|
| 57 |
+
else:
|
| 58 |
+
shutil.move(src, dst)
|
| 59 |
+
else:
|
| 60 |
+
# processing for cache folders of structure like zoom_cache.zip -> zoom_cache/zoom_cache/*
|
| 61 |
+
# also hold some auto generated macos metadata.
|
| 62 |
+
# Move the entire extracted folder into dest_path
|
| 63 |
+
contents = [x for x in os.listdir(tmp_extract_dir) if not x.startswith('__MACOSX')]
|
| 64 |
+
if len(contents) == 1 and os.path.isdir(os.path.join(tmp_extract_dir, contents[0])):
|
| 65 |
+
# Flatten: Only one top-level dir, move its contents
|
| 66 |
+
only_dir = os.path.join(tmp_extract_dir, contents[0])
|
| 67 |
+
for item in os.listdir(only_dir):
|
| 68 |
+
shutil.move(os.path.join(only_dir, item), os.path.join(dest_path, item))
|
| 69 |
else:
|
| 70 |
+
# Usual: move everything as-is
|
| 71 |
+
for item in contents:
|
| 72 |
+
shutil.move(os.path.join(tmp_extract_dir, item), os.path.join(dest_path, item))
|
| 73 |
|
| 74 |
print(f"Extracted zip contents into '{dest_path}'.")
|
| 75 |
else:
|