Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Alina Lozovskaia
commited on
Commit
·
87e47c2
1
Parent(s):
9b133aa
Updated app.py download_dataset function
Browse files- app.py +15 -11
- src/populate.py +0 -1
- src/tools/collections.py +1 -1
app.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import logging
|
| 3 |
import gradio as gr
|
| 4 |
import pandas as pd
|
|
@@ -56,13 +57,12 @@ enable_space_ci()
|
|
| 56 |
def restart_space():
|
| 57 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
| 58 |
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
"""Attempt to download dataset with retries."""
|
| 62 |
attempt = 0
|
| 63 |
while attempt < max_attempts:
|
| 64 |
try:
|
| 65 |
-
|
| 66 |
snapshot_download(
|
| 67 |
repo_id=repo_id,
|
| 68 |
local_dir=local_dir,
|
|
@@ -71,21 +71,25 @@ def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3):
|
|
| 71 |
etag_timeout=30,
|
| 72 |
max_workers=8,
|
| 73 |
)
|
|
|
|
| 74 |
return
|
| 75 |
except Exception as e:
|
| 76 |
-
|
|
|
|
|
|
|
| 77 |
attempt += 1
|
| 78 |
-
|
| 79 |
-
restart_space()
|
| 80 |
-
|
| 81 |
|
| 82 |
def init_space(full_init: bool = True):
|
| 83 |
"""Initializes the application space, loading only necessary data."""
|
| 84 |
if full_init:
|
| 85 |
# These downloads only occur on full initialization
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
# Always retrieve the leaderboard DataFrame
|
| 91 |
raw_data, original_df = get_leaderboard_df(
|
|
|
|
| 1 |
import os
|
| 2 |
+
import time
|
| 3 |
import logging
|
| 4 |
import gradio as gr
|
| 5 |
import pandas as pd
|
|
|
|
| 57 |
def restart_space():
|
| 58 |
API.restart_space(repo_id=REPO_ID, token=H4_TOKEN)
|
| 59 |
|
| 60 |
+
def download_dataset(repo_id, local_dir, repo_type="dataset", max_attempts=3, backoff_factor=1.5):
|
| 61 |
+
"""Download dataset with exponential backoff retries."""
|
|
|
|
| 62 |
attempt = 0
|
| 63 |
while attempt < max_attempts:
|
| 64 |
try:
|
| 65 |
+
logging.info(f"Downloading {repo_id} to {local_dir}")
|
| 66 |
snapshot_download(
|
| 67 |
repo_id=repo_id,
|
| 68 |
local_dir=local_dir,
|
|
|
|
| 71 |
etag_timeout=30,
|
| 72 |
max_workers=8,
|
| 73 |
)
|
| 74 |
+
logging.info("Download successful")
|
| 75 |
return
|
| 76 |
except Exception as e:
|
| 77 |
+
wait_time = backoff_factor ** attempt
|
| 78 |
+
logging.error(f"Error downloading {repo_id}: {e}, retrying in {wait_time}s")
|
| 79 |
+
time.sleep(wait_time)
|
| 80 |
attempt += 1
|
| 81 |
+
raise Exception(f"Failed to download {repo_id} after {max_attempts} attempts")
|
|
|
|
|
|
|
| 82 |
|
| 83 |
def init_space(full_init: bool = True):
|
| 84 |
"""Initializes the application space, loading only necessary data."""
|
| 85 |
if full_init:
|
| 86 |
# These downloads only occur on full initialization
|
| 87 |
+
try:
|
| 88 |
+
download_dataset(QUEUE_REPO, EVAL_REQUESTS_PATH)
|
| 89 |
+
download_dataset(DYNAMIC_INFO_REPO, DYNAMIC_INFO_PATH)
|
| 90 |
+
download_dataset(RESULTS_REPO, EVAL_RESULTS_PATH)
|
| 91 |
+
except Exception:
|
| 92 |
+
restart_space()
|
| 93 |
|
| 94 |
# Always retrieve the leaderboard DataFrame
|
| 95 |
raw_data, original_df = get_leaderboard_df(
|
src/populate.py
CHANGED
|
@@ -52,4 +52,3 @@ def get_leaderboard_df(results_path, requests_path, dynamic_path, cols, benchmar
|
|
| 52 |
df = df[cols].round(decimals=2)
|
| 53 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
| 54 |
return raw_data, df
|
| 55 |
-
|
|
|
|
| 52 |
df = df[cols].round(decimals=2)
|
| 53 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
| 54 |
return raw_data, df
|
|
|
src/tools/collections.py
CHANGED
|
@@ -73,4 +73,4 @@ def update_collections(df: DataFrame):
|
|
| 73 |
try:
|
| 74 |
delete_collection_item(collection_slug=PATH_TO_COLLECTION, item_object_id=item_id, token=H4_TOKEN)
|
| 75 |
except HfHubHTTPError:
|
| 76 |
-
continue
|
|
|
|
| 73 |
try:
|
| 74 |
delete_collection_item(collection_slug=PATH_TO_COLLECTION, item_object_id=item_id, token=H4_TOKEN)
|
| 75 |
except HfHubHTTPError:
|
| 76 |
+
continue
|