Spaces:
Running on Zero
Running on Zero
Add HF login gating + default dataset settings
Browse files
app.py
CHANGED
|
@@ -53,7 +53,7 @@ DEFAULT_MAX_BOOST_DB = 20.0
|
|
| 53 |
DEFAULT_MAX_ATTEN_DB = 10.0
|
| 54 |
DEFAULT_AUTO_RESUME = bool(os.getenv("SPACE_ID"))
|
| 55 |
DEFAULT_ZERO_GPU_SHARD_SIZE = int(
|
| 56 |
-
os.getenv("CHIZZLER_ZERO_GPU_SHARD_SIZE", "
|
| 57 |
)
|
| 58 |
DEFAULT_ZERO_GPU_MAX_SHARDS = int(
|
| 59 |
os.getenv("CHIZZLER_ZERO_GPU_MAX_SHARDS", "1")
|
|
@@ -109,6 +109,39 @@ def get_hf_token() -> Optional[str]:
|
|
| 109 |
)
|
| 110 |
|
| 111 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
def normalize_dataset_id(value: str) -> str:
|
| 113 |
if not value:
|
| 114 |
return ""
|
|
@@ -135,7 +168,7 @@ _ENV_MAX_SHARDS = os.getenv("CHIZZLER_MAX_SHARDS_PER_RUN")
|
|
| 135 |
if _ENV_MAX_SHARDS is not None:
|
| 136 |
DEFAULT_MAX_SHARDS_PER_RUN = int(_ENV_MAX_SHARDS)
|
| 137 |
else:
|
| 138 |
-
DEFAULT_MAX_SHARDS_PER_RUN =
|
| 139 |
|
| 140 |
_ENV_CACHE_TO_HUB = os.getenv("CHIZZLER_CACHE_TO_HUB")
|
| 141 |
if _ENV_CACHE_TO_HUB is None:
|
|
@@ -1146,8 +1179,12 @@ def process_dataset_and_push(
|
|
| 1146 |
shard_size: Optional[float],
|
| 1147 |
cache_on_hub: bool,
|
| 1148 |
max_shards_per_run: Optional[float],
|
|
|
|
| 1149 |
progress=gr.Progress(),
|
| 1150 |
) -> str:
|
|
|
|
|
|
|
|
|
|
| 1151 |
attempts = 0
|
| 1152 |
while True:
|
| 1153 |
try:
|
|
@@ -1340,7 +1377,7 @@ def assemble_cached_dataset_and_push(
|
|
| 1340 |
|
| 1341 |
|
| 1342 |
@gpu_decorator(DEFAULT_GPU_DURATION)
|
| 1343 |
-
def
|
| 1344 |
audio_file,
|
| 1345 |
vad_threshold,
|
| 1346 |
max_silence_gap,
|
|
@@ -1362,6 +1399,30 @@ def gradio_single_file(
|
|
| 1362 |
)
|
| 1363 |
|
| 1364 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1365 |
with gr.Blocks(title="Representation Chizzler") as demo:
|
| 1366 |
gr.Markdown(
|
| 1367 |
"# Representation Chizzler\n"
|
|
@@ -1369,6 +1430,15 @@ with gr.Blocks(title="Representation Chizzler") as demo:
|
|
| 1369 |
"denoising. Use the Single File tab for ad-hoc processing or the Dataset tab "
|
| 1370 |
"to clean and publish a dataset to the Hugging Face Hub."
|
| 1371 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1372 |
|
| 1373 |
with gr.Tab("Single File"):
|
| 1374 |
audio_input = gr.Audio(label="Upload Audio File", type="filepath")
|
|
@@ -1434,10 +1504,10 @@ with gr.Blocks(title="Representation Chizzler") as demo:
|
|
| 1434 |
with gr.Tab("Dataset to Hub"):
|
| 1435 |
dataset_id_input = gr.Textbox(
|
| 1436 |
label="Dataset ID or URL",
|
| 1437 |
-
value="https://huggingface.co/datasets/
|
| 1438 |
)
|
| 1439 |
config_input = gr.Textbox(label="Config (optional)", value="")
|
| 1440 |
-
split_input = gr.Textbox(label="Split (optional, or 'all')", value="")
|
| 1441 |
audio_column_input = gr.Textbox(
|
| 1442 |
label="Audio column (optional, auto-detect if empty)", value=""
|
| 1443 |
)
|
|
@@ -1460,7 +1530,7 @@ with gr.Blocks(title="Representation Chizzler") as demo:
|
|
| 1460 |
value=DEFAULT_CACHE_TO_HUB,
|
| 1461 |
)
|
| 1462 |
shard_size_input = gr.Number(
|
| 1463 |
-
label="Shard size (examples)", value=
|
| 1464 |
)
|
| 1465 |
max_shards_input = gr.Number(
|
| 1466 |
label="Max shards per run (ZeroGPU: 1-5, 0 = no limit)",
|
|
|
|
| 53 |
DEFAULT_MAX_ATTEN_DB = 10.0
|
| 54 |
DEFAULT_AUTO_RESUME = bool(os.getenv("SPACE_ID"))
|
| 55 |
DEFAULT_ZERO_GPU_SHARD_SIZE = int(
|
| 56 |
+
os.getenv("CHIZZLER_ZERO_GPU_SHARD_SIZE", "25")
|
| 57 |
)
|
| 58 |
DEFAULT_ZERO_GPU_MAX_SHARDS = int(
|
| 59 |
os.getenv("CHIZZLER_ZERO_GPU_MAX_SHARDS", "1")
|
|
|
|
| 109 |
)
|
| 110 |
|
| 111 |
|
| 112 |
+
def get_request_username(request: Optional[gr.Request]) -> Optional[str]:
|
| 113 |
+
if request is None:
|
| 114 |
+
return None
|
| 115 |
+
username = getattr(request, "username", None)
|
| 116 |
+
if username:
|
| 117 |
+
return username
|
| 118 |
+
user = getattr(request, "user", None)
|
| 119 |
+
if isinstance(user, dict):
|
| 120 |
+
return user.get("name") or user.get("username")
|
| 121 |
+
if user:
|
| 122 |
+
return str(user)
|
| 123 |
+
return None
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def require_login(request: Optional[gr.Request], action: str) -> Optional[str]:
|
| 127 |
+
if not os.getenv("SPACE_ID"):
|
| 128 |
+
return None
|
| 129 |
+
username = get_request_username(request)
|
| 130 |
+
if not username:
|
| 131 |
+
return (
|
| 132 |
+
"Please log in with your Hugging Face account to use "
|
| 133 |
+
f"{action} on ZeroGPU."
|
| 134 |
+
)
|
| 135 |
+
return None
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def render_login_status(request: gr.Request) -> str:
|
| 139 |
+
username = get_request_username(request)
|
| 140 |
+
if username:
|
| 141 |
+
return f"Logged in as **{username}**."
|
| 142 |
+
return "Not logged in. Use the login button above."
|
| 143 |
+
|
| 144 |
+
|
| 145 |
def normalize_dataset_id(value: str) -> str:
|
| 146 |
if not value:
|
| 147 |
return ""
|
|
|
|
| 168 |
if _ENV_MAX_SHARDS is not None:
|
| 169 |
DEFAULT_MAX_SHARDS_PER_RUN = int(_ENV_MAX_SHARDS)
|
| 170 |
else:
|
| 171 |
+
DEFAULT_MAX_SHARDS_PER_RUN = 1 if os.getenv("SPACE_ID") else 0
|
| 172 |
|
| 173 |
_ENV_CACHE_TO_HUB = os.getenv("CHIZZLER_CACHE_TO_HUB")
|
| 174 |
if _ENV_CACHE_TO_HUB is None:
|
|
|
|
| 1179 |
shard_size: Optional[float],
|
| 1180 |
cache_on_hub: bool,
|
| 1181 |
max_shards_per_run: Optional[float],
|
| 1182 |
+
request: gr.Request | None = None,
|
| 1183 |
progress=gr.Progress(),
|
| 1184 |
) -> str:
|
| 1185 |
+
login_error = require_login(request, "dataset processing")
|
| 1186 |
+
if login_error:
|
| 1187 |
+
return login_error
|
| 1188 |
attempts = 0
|
| 1189 |
while True:
|
| 1190 |
try:
|
|
|
|
| 1377 |
|
| 1378 |
|
| 1379 |
@gpu_decorator(DEFAULT_GPU_DURATION)
|
| 1380 |
+
def _gradio_single_file_gpu(
|
| 1381 |
audio_file,
|
| 1382 |
vad_threshold,
|
| 1383 |
max_silence_gap,
|
|
|
|
| 1399 |
)
|
| 1400 |
|
| 1401 |
|
| 1402 |
+
def gradio_single_file(
|
| 1403 |
+
audio_file,
|
| 1404 |
+
vad_threshold,
|
| 1405 |
+
max_silence_gap,
|
| 1406 |
+
normalize_audio,
|
| 1407 |
+
target_dbfs,
|
| 1408 |
+
max_boost_db,
|
| 1409 |
+
max_atten_db,
|
| 1410 |
+
request: gr.Request | None = None,
|
| 1411 |
+
):
|
| 1412 |
+
login_error = require_login(request, "single-file processing")
|
| 1413 |
+
if login_error:
|
| 1414 |
+
return None, None, None, login_error
|
| 1415 |
+
return _gradio_single_file_gpu(
|
| 1416 |
+
audio_file,
|
| 1417 |
+
vad_threshold,
|
| 1418 |
+
max_silence_gap,
|
| 1419 |
+
normalize_audio,
|
| 1420 |
+
target_dbfs,
|
| 1421 |
+
max_boost_db,
|
| 1422 |
+
max_atten_db,
|
| 1423 |
+
)
|
| 1424 |
+
|
| 1425 |
+
|
| 1426 |
with gr.Blocks(title="Representation Chizzler") as demo:
|
| 1427 |
gr.Markdown(
|
| 1428 |
"# Representation Chizzler\n"
|
|
|
|
| 1430 |
"denoising. Use the Single File tab for ad-hoc processing or the Dataset tab "
|
| 1431 |
"to clean and publish a dataset to the Hugging Face Hub."
|
| 1432 |
)
|
| 1433 |
+
with gr.Row():
|
| 1434 |
+
if hasattr(gr, "LoginButton"):
|
| 1435 |
+
gr.LoginButton()
|
| 1436 |
+
else:
|
| 1437 |
+
gr.Markdown(
|
| 1438 |
+
"Sign in using the Hugging Face button in the Space header."
|
| 1439 |
+
)
|
| 1440 |
+
login_status = gr.Markdown()
|
| 1441 |
+
demo.load(render_login_status, None, login_status)
|
| 1442 |
|
| 1443 |
with gr.Tab("Single File"):
|
| 1444 |
audio_input = gr.Audio(label="Upload Audio File", type="filepath")
|
|
|
|
| 1504 |
with gr.Tab("Dataset to Hub"):
|
| 1505 |
dataset_id_input = gr.Textbox(
|
| 1506 |
label="Dataset ID or URL",
|
| 1507 |
+
value="https://huggingface.co/datasets/MohammadGholizadeh/fleurs-farsi",
|
| 1508 |
)
|
| 1509 |
config_input = gr.Textbox(label="Config (optional)", value="")
|
| 1510 |
+
split_input = gr.Textbox(label="Split (optional, or 'all')", value="dev")
|
| 1511 |
audio_column_input = gr.Textbox(
|
| 1512 |
label="Audio column (optional, auto-detect if empty)", value=""
|
| 1513 |
)
|
|
|
|
| 1530 |
value=DEFAULT_CACHE_TO_HUB,
|
| 1531 |
)
|
| 1532 |
shard_size_input = gr.Number(
|
| 1533 |
+
label="Shard size (examples)", value=25
|
| 1534 |
)
|
| 1535 |
max_shards_input = gr.Number(
|
| 1536 |
label="Max shards per run (ZeroGPU: 1-5, 0 = no limit)",
|