Ark-kun commited on
Commit
1b5e9d9
·
1 Parent(s): bb5af01

feat: Create artifacts repo if needed

Browse files
huggingface_overlay/start_HuggingFace.py CHANGED
@@ -63,8 +63,11 @@ print(f"{os.environ=}")
63
 
64
  print(f'{os.environ["PERSISTENT_STORAGE_ENABLED"]=}')
65
 
 
66
  hf_space_author_name = os.environ.get("SPACE_AUTHOR_NAME")
 
67
  hf_space_creator_user_id = os.environ.get("SPACE_CREATOR_USER_ID")
 
68
  print(f"{hf_space_author_name=}")
69
  print(f"{hf_space_creator_user_id=}")
70
 
@@ -111,6 +114,51 @@ if IS_HUGGINGFACE_SPACE:
111
  # Selecting the tenant. It's the user or arg that host the space.
112
  tenant_name = hf_space_author_name
113
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
  # We need to be careful and prevent public spaces with HF_TOKEN set from letting anyone exploit the HF_TOKEN user.
115
  def get_user_details(request: fastapi.Request):
116
  user_can_read = False
@@ -234,6 +282,9 @@ if IS_HUGGINGFACE_SPACE:
234
 
235
  else:
236
  # We're not in space.
 
 
 
237
  ADMIN_USER_NAME = hf_whoami_user_name or "admin"
238
  print(f"{ADMIN_USER_NAME=}")
239
 
 
63
 
64
  print(f'{os.environ["PERSISTENT_STORAGE_ENABLED"]=}')
65
 
66
+ # user or org name
67
  hf_space_author_name = os.environ.get("SPACE_AUTHOR_NAME")
68
+ # Creator *user* ID (never org ID)
69
  hf_space_creator_user_id = os.environ.get("SPACE_CREATOR_USER_ID")
70
+ # SPACE_ID="TangleML/tangle" == f"{SPACE_AUTHOR_NAME}/{SPACE_REPO_NAME}"
71
  print(f"{hf_space_author_name=}")
72
  print(f"{hf_space_creator_user_id=}")
73
 
 
114
  # Selecting the tenant. It's the user or arg that host the space.
115
  tenant_name = hf_space_author_name
116
 
117
+ # Create artifact repo if it does not exist.
118
+ if not artifacts_root_uri:
119
+ repo_user: str = tenant_name
120
+ if not repo_user:
121
+ raise ValueError("artifacts_root_uri, tenant_name are None")
122
+
123
+ repo_type = "dataset"
124
+ # dataset_repo_id = f"{repo_user}/{repo_name}"
125
+ # SPACE_ID == "TangleML/tangle" == f"{SPACE_AUTHOR_NAME}/{SPACE_REPO_NAME}"
126
+ space_repo_id = os.environ["SPACE_ID"]
127
+ artifacts_repo_id = space_repo_id + "_data"
128
+ # proposed_artifacts_root_uri = f"hf://{repo_type}s/{repo_user}/{repo_name}/data"
129
+ proposed_artifacts_root_uri = f"hf://{repo_type}s/{artifacts_repo_id}/data"
130
+ print(
131
+ f"Artifact repo is not specified. Checking or creating it. {artifacts_repo_id=}"
132
+ )
133
+ repo_exists = False
134
+ try:
135
+ _ = huggingface_hub.repo_info(
136
+ repo_id=artifacts_repo_id,
137
+ repo_type=repo_type,
138
+ )
139
+ repo_exists = True
140
+
141
+ except Exception as ex:
142
+ raise RuntimeError(
143
+ f"Error checking for the artifacts repo existence. {artifacts_repo_id=}"
144
+ ) from ex
145
+ if not repo_exists:
146
+ try:
147
+ _ = huggingface_hub.create_repo(
148
+ repo_id=artifacts_repo_id,
149
+ repo_type=repo_type,
150
+ private=True,
151
+ exist_ok=True,
152
+ )
153
+ artifacts_root_uri = proposed_artifacts_root_uri
154
+ logs_root_uri = artifacts_root_uri
155
+ except Exception as ex:
156
+ raise RuntimeError(
157
+ f"Error creating the artifacts repo. {artifacts_repo_id=}"
158
+ ) from ex
159
+
160
+ print(f"{artifacts_root_uri=}")
161
+
162
  # We need to be careful and prevent public spaces with HF_TOKEN set from letting anyone exploit the HF_TOKEN user.
163
  def get_user_details(request: fastapi.Request):
164
  user_can_read = False
 
282
 
283
  else:
284
  # We're not in space.
285
+ if not artifacts_root_uri:
286
+ raise ValueError("Must provide artifacts repo root URI")
287
+
288
  ADMIN_USER_NAME = hf_whoami_user_name or "admin"
289
  print(f"{ADMIN_USER_NAME=}")
290