Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Make the code robust against HTTP errors
Browse files- src/details.py +3 -0
- src/hub.py +22 -1
- src/model_tree.py +5 -1
- src/results.py +3 -0
src/details.py
CHANGED
|
@@ -55,6 +55,8 @@ async def load_details_dataframe(model_id, subtask):
|
|
| 55 |
return
|
| 56 |
path = max(paths)
|
| 57 |
data = await load_jsonlines_file(path)
|
|
|
|
|
|
|
| 58 |
df = pd.json_normalize(data)
|
| 59 |
# Keep model_name:
|
| 60 |
df["model_name"] = model_id
|
|
@@ -70,6 +72,7 @@ async def load_details(subtask, *model_ids_lists):
|
|
| 70 |
for model_id in model_ids
|
| 71 |
]
|
| 72 |
)
|
|
|
|
| 73 |
if dfs:
|
| 74 |
return pd.concat(dfs)
|
| 75 |
|
|
|
|
| 55 |
return
|
| 56 |
path = max(paths)
|
| 57 |
data = await load_jsonlines_file(path)
|
| 58 |
+
if not data:
|
| 59 |
+
return
|
| 60 |
df = pd.json_normalize(data)
|
| 61 |
# Keep model_name:
|
| 62 |
df["model_name"] = model_id
|
|
|
|
| 72 |
for model_id in model_ids
|
| 73 |
]
|
| 74 |
)
|
| 75 |
+
dfs = [df for df in dfs if df is not None]
|
| 76 |
if dfs:
|
| 77 |
return pd.concat(dfs)
|
| 78 |
|
src/hub.py
CHANGED
|
@@ -8,7 +8,20 @@ from huggingface_hub.utils import build_hf_headers
|
|
| 8 |
import src.constants as constants
|
| 9 |
|
| 10 |
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
fs = HfFileSystem()
|
| 13 |
|
| 14 |
|
|
@@ -20,12 +33,16 @@ def glob(path):
|
|
| 20 |
async def load_json_file(path):
|
| 21 |
url = to_url(path)
|
| 22 |
r = await client.get(url)
|
|
|
|
|
|
|
| 23 |
return r.json()
|
| 24 |
|
| 25 |
|
| 26 |
async def load_jsonlines_file(path):
|
| 27 |
url = to_url(path)
|
| 28 |
r = await client.get(url, headers=build_hf_headers())
|
|
|
|
|
|
|
| 29 |
f = io.StringIO(r.text)
|
| 30 |
return [json.loads(line) for line in f]
|
| 31 |
|
|
@@ -39,6 +56,8 @@ def to_url(path):
|
|
| 39 |
async def load_model_card(model_id):
|
| 40 |
url = to_url(f"{model_id}/README.md")
|
| 41 |
r = await client.get(url)
|
|
|
|
|
|
|
| 42 |
return ModelCard(r.text, ignore_metadata_errors=True)
|
| 43 |
|
| 44 |
|
|
@@ -47,4 +66,6 @@ async def list_models(filtering=None):
|
|
| 47 |
if filtering:
|
| 48 |
params["filter"] = filtering
|
| 49 |
r = await client.get(f"{constants.HF_API_URL}/models", params=params)
|
|
|
|
|
|
|
| 50 |
return r.json()
|
|
|
|
| 8 |
import src.constants as constants
|
| 9 |
|
| 10 |
|
| 11 |
+
class Client:
|
| 12 |
+
def __init__(self):
|
| 13 |
+
self.client = httpx.AsyncClient(follow_redirects=True)
|
| 14 |
+
|
| 15 |
+
async def get(self, url, headers=None, params=None):
|
| 16 |
+
try:
|
| 17 |
+
r = await self.client.get(url, headers=headers, params=params)
|
| 18 |
+
r.raise_for_status()
|
| 19 |
+
except httpx.HTTPError:
|
| 20 |
+
return
|
| 21 |
+
return r
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
client = Client()
|
| 25 |
fs = HfFileSystem()
|
| 26 |
|
| 27 |
|
|
|
|
| 33 |
async def load_json_file(path):
|
| 34 |
url = to_url(path)
|
| 35 |
r = await client.get(url)
|
| 36 |
+
if r is None:
|
| 37 |
+
return
|
| 38 |
return r.json()
|
| 39 |
|
| 40 |
|
| 41 |
async def load_jsonlines_file(path):
|
| 42 |
url = to_url(path)
|
| 43 |
r = await client.get(url, headers=build_hf_headers())
|
| 44 |
+
if r is None:
|
| 45 |
+
return
|
| 46 |
f = io.StringIO(r.text)
|
| 47 |
return [json.loads(line) for line in f]
|
| 48 |
|
|
|
|
| 56 |
async def load_model_card(model_id):
|
| 57 |
url = to_url(f"{model_id}/README.md")
|
| 58 |
r = await client.get(url)
|
| 59 |
+
if r is None:
|
| 60 |
+
return
|
| 61 |
return ModelCard(r.text, ignore_metadata_errors=True)
|
| 62 |
|
| 63 |
|
|
|
|
| 66 |
if filtering:
|
| 67 |
params["filter"] = filtering
|
| 68 |
r = await client.get(f"{constants.HF_API_URL}/models", params=params)
|
| 69 |
+
if r is None:
|
| 70 |
+
return
|
| 71 |
return r.json()
|
src/model_tree.py
CHANGED
|
@@ -30,13 +30,17 @@ async def load_model_tree(result_paths_per_model, model_ids):
|
|
| 30 |
|
| 31 |
async def load_base_models(model_id) -> list[str]:
|
| 32 |
card = await load_model_card(model_id)
|
|
|
|
|
|
|
| 33 |
base_models = getattr(card.data, constants.BASE_MODEL_TYPE[1])
|
| 34 |
if not isinstance(base_models, list):
|
| 35 |
base_models = [base_models]
|
| 36 |
return base_models
|
| 37 |
|
| 38 |
|
| 39 |
-
async def load_derived_models_by_type(model_id, derived_model_type):
|
| 40 |
models = await list_models(filtering=f"base_model:{derived_model_type}:{model_id}")
|
|
|
|
|
|
|
| 41 |
models = [model["id"] for model in models]
|
| 42 |
return models
|
|
|
|
| 30 |
|
| 31 |
async def load_base_models(model_id) -> list[str]:
|
| 32 |
card = await load_model_card(model_id)
|
| 33 |
+
if not card:
|
| 34 |
+
return []
|
| 35 |
base_models = getattr(card.data, constants.BASE_MODEL_TYPE[1])
|
| 36 |
if not isinstance(base_models, list):
|
| 37 |
base_models = [base_models]
|
| 38 |
return base_models
|
| 39 |
|
| 40 |
|
| 41 |
+
async def load_derived_models_by_type(model_id, derived_model_type) -> list[str]:
|
| 42 |
models = await list_models(filtering=f"base_model:{derived_model_type}:{model_id}")
|
| 43 |
+
if not models:
|
| 44 |
+
return []
|
| 45 |
models = [model["id"] for model in models]
|
| 46 |
return models
|
src/results.py
CHANGED
|
@@ -34,6 +34,9 @@ async def load_results_dataframe(model_id, result_paths_per_model=None):
|
|
| 34 |
return
|
| 35 |
result_paths = result_paths_per_model[model_id]
|
| 36 |
results = await asyncio.gather(*[load_json_file(path) for path in result_paths])
|
|
|
|
|
|
|
|
|
|
| 37 |
data = {"results": {}, "configs": {}}
|
| 38 |
for result in results:
|
| 39 |
data["results"].update(result["results"])
|
|
|
|
| 34 |
return
|
| 35 |
result_paths = result_paths_per_model[model_id]
|
| 36 |
results = await asyncio.gather(*[load_json_file(path) for path in result_paths])
|
| 37 |
+
results = [result for result in results if result]
|
| 38 |
+
if not results:
|
| 39 |
+
return
|
| 40 |
data = {"results": {}, "configs": {}}
|
| 41 |
for result in results:
|
| 42 |
data["results"].update(result["results"])
|