Reza2kn commited on
Commit
a98c4d2
·
verified ·
1 Parent(s): 65d5dac

Add HF login gating + default dataset settings

Browse files
Files changed (1) hide show
  1. app.py +76 -6
app.py CHANGED
@@ -53,7 +53,7 @@ DEFAULT_MAX_BOOST_DB = 20.0
53
  DEFAULT_MAX_ATTEN_DB = 10.0
54
  DEFAULT_AUTO_RESUME = bool(os.getenv("SPACE_ID"))
55
  DEFAULT_ZERO_GPU_SHARD_SIZE = int(
56
- os.getenv("CHIZZLER_ZERO_GPU_SHARD_SIZE", "10")
57
  )
58
  DEFAULT_ZERO_GPU_MAX_SHARDS = int(
59
  os.getenv("CHIZZLER_ZERO_GPU_MAX_SHARDS", "1")
@@ -109,6 +109,39 @@ def get_hf_token() -> Optional[str]:
109
  )
110
 
111
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
  def normalize_dataset_id(value: str) -> str:
113
  if not value:
114
  return ""
@@ -135,7 +168,7 @@ _ENV_MAX_SHARDS = os.getenv("CHIZZLER_MAX_SHARDS_PER_RUN")
135
  if _ENV_MAX_SHARDS is not None:
136
  DEFAULT_MAX_SHARDS_PER_RUN = int(_ENV_MAX_SHARDS)
137
  else:
138
- DEFAULT_MAX_SHARDS_PER_RUN = 5 if os.getenv("SPACE_ID") else 0
139
 
140
  _ENV_CACHE_TO_HUB = os.getenv("CHIZZLER_CACHE_TO_HUB")
141
  if _ENV_CACHE_TO_HUB is None:
@@ -1146,8 +1179,12 @@ def process_dataset_and_push(
1146
  shard_size: Optional[float],
1147
  cache_on_hub: bool,
1148
  max_shards_per_run: Optional[float],
 
1149
  progress=gr.Progress(),
1150
  ) -> str:
 
 
 
1151
  attempts = 0
1152
  while True:
1153
  try:
@@ -1340,7 +1377,7 @@ def assemble_cached_dataset_and_push(
1340
 
1341
 
1342
  @gpu_decorator(DEFAULT_GPU_DURATION)
1343
- def gradio_single_file(
1344
  audio_file,
1345
  vad_threshold,
1346
  max_silence_gap,
@@ -1362,6 +1399,30 @@ def gradio_single_file(
1362
  )
1363
 
1364
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1365
  with gr.Blocks(title="Representation Chizzler") as demo:
1366
  gr.Markdown(
1367
  "# Representation Chizzler\n"
@@ -1369,6 +1430,15 @@ with gr.Blocks(title="Representation Chizzler") as demo:
1369
  "denoising. Use the Single File tab for ad-hoc processing or the Dataset tab "
1370
  "to clean and publish a dataset to the Hugging Face Hub."
1371
  )
 
 
 
 
 
 
 
 
 
1372
 
1373
  with gr.Tab("Single File"):
1374
  audio_input = gr.Audio(label="Upload Audio File", type="filepath")
@@ -1434,10 +1504,10 @@ with gr.Blocks(title="Representation Chizzler") as demo:
1434
  with gr.Tab("Dataset to Hub"):
1435
  dataset_id_input = gr.Textbox(
1436
  label="Dataset ID or URL",
1437
- value="https://huggingface.co/datasets/kiarashQ/farsi-asr-unified-cleaned",
1438
  )
1439
  config_input = gr.Textbox(label="Config (optional)", value="")
1440
- split_input = gr.Textbox(label="Split (optional, or 'all')", value="")
1441
  audio_column_input = gr.Textbox(
1442
  label="Audio column (optional, auto-detect if empty)", value=""
1443
  )
@@ -1460,7 +1530,7 @@ with gr.Blocks(title="Representation Chizzler") as demo:
1460
  value=DEFAULT_CACHE_TO_HUB,
1461
  )
1462
  shard_size_input = gr.Number(
1463
- label="Shard size (examples)", value=1000
1464
  )
1465
  max_shards_input = gr.Number(
1466
  label="Max shards per run (ZeroGPU: 1-5, 0 = no limit)",
 
53
  DEFAULT_MAX_ATTEN_DB = 10.0
54
  DEFAULT_AUTO_RESUME = bool(os.getenv("SPACE_ID"))
55
  DEFAULT_ZERO_GPU_SHARD_SIZE = int(
56
+ os.getenv("CHIZZLER_ZERO_GPU_SHARD_SIZE", "25")
57
  )
58
  DEFAULT_ZERO_GPU_MAX_SHARDS = int(
59
  os.getenv("CHIZZLER_ZERO_GPU_MAX_SHARDS", "1")
 
109
  )
110
 
111
 
112
+ def get_request_username(request: Optional[gr.Request]) -> Optional[str]:
113
+ if request is None:
114
+ return None
115
+ username = getattr(request, "username", None)
116
+ if username:
117
+ return username
118
+ user = getattr(request, "user", None)
119
+ if isinstance(user, dict):
120
+ return user.get("name") or user.get("username")
121
+ if user:
122
+ return str(user)
123
+ return None
124
+
125
+
126
+ def require_login(request: Optional[gr.Request], action: str) -> Optional[str]:
127
+ if not os.getenv("SPACE_ID"):
128
+ return None
129
+ username = get_request_username(request)
130
+ if not username:
131
+ return (
132
+ "Please log in with your Hugging Face account to use "
133
+ f"{action} on ZeroGPU."
134
+ )
135
+ return None
136
+
137
+
138
+ def render_login_status(request: gr.Request) -> str:
139
+ username = get_request_username(request)
140
+ if username:
141
+ return f"Logged in as **{username}**."
142
+ return "Not logged in. Use the login button above."
143
+
144
+
145
  def normalize_dataset_id(value: str) -> str:
146
  if not value:
147
  return ""
 
168
  if _ENV_MAX_SHARDS is not None:
169
  DEFAULT_MAX_SHARDS_PER_RUN = int(_ENV_MAX_SHARDS)
170
  else:
171
+ DEFAULT_MAX_SHARDS_PER_RUN = 1 if os.getenv("SPACE_ID") else 0
172
 
173
  _ENV_CACHE_TO_HUB = os.getenv("CHIZZLER_CACHE_TO_HUB")
174
  if _ENV_CACHE_TO_HUB is None:
 
1179
  shard_size: Optional[float],
1180
  cache_on_hub: bool,
1181
  max_shards_per_run: Optional[float],
1182
+ request: gr.Request | None = None,
1183
  progress=gr.Progress(),
1184
  ) -> str:
1185
+ login_error = require_login(request, "dataset processing")
1186
+ if login_error:
1187
+ return login_error
1188
  attempts = 0
1189
  while True:
1190
  try:
 
1377
 
1378
 
1379
  @gpu_decorator(DEFAULT_GPU_DURATION)
1380
+ def _gradio_single_file_gpu(
1381
  audio_file,
1382
  vad_threshold,
1383
  max_silence_gap,
 
1399
  )
1400
 
1401
 
1402
+ def gradio_single_file(
1403
+ audio_file,
1404
+ vad_threshold,
1405
+ max_silence_gap,
1406
+ normalize_audio,
1407
+ target_dbfs,
1408
+ max_boost_db,
1409
+ max_atten_db,
1410
+ request: gr.Request | None = None,
1411
+ ):
1412
+ login_error = require_login(request, "single-file processing")
1413
+ if login_error:
1414
+ return None, None, None, login_error
1415
+ return _gradio_single_file_gpu(
1416
+ audio_file,
1417
+ vad_threshold,
1418
+ max_silence_gap,
1419
+ normalize_audio,
1420
+ target_dbfs,
1421
+ max_boost_db,
1422
+ max_atten_db,
1423
+ )
1424
+
1425
+
1426
  with gr.Blocks(title="Representation Chizzler") as demo:
1427
  gr.Markdown(
1428
  "# Representation Chizzler\n"
 
1430
  "denoising. Use the Single File tab for ad-hoc processing or the Dataset tab "
1431
  "to clean and publish a dataset to the Hugging Face Hub."
1432
  )
1433
+ with gr.Row():
1434
+ if hasattr(gr, "LoginButton"):
1435
+ gr.LoginButton()
1436
+ else:
1437
+ gr.Markdown(
1438
+ "Sign in using the Hugging Face button in the Space header."
1439
+ )
1440
+ login_status = gr.Markdown()
1441
+ demo.load(render_login_status, None, login_status)
1442
 
1443
  with gr.Tab("Single File"):
1444
  audio_input = gr.Audio(label="Upload Audio File", type="filepath")
 
1504
  with gr.Tab("Dataset to Hub"):
1505
  dataset_id_input = gr.Textbox(
1506
  label="Dataset ID or URL",
1507
+ value="https://huggingface.co/datasets/MohammadGholizadeh/fleurs-farsi",
1508
  )
1509
  config_input = gr.Textbox(label="Config (optional)", value="")
1510
+ split_input = gr.Textbox(label="Split (optional, or 'all')", value="dev")
1511
  audio_column_input = gr.Textbox(
1512
  label="Audio column (optional, auto-detect if empty)", value=""
1513
  )
 
1530
  value=DEFAULT_CACHE_TO_HUB,
1531
  )
1532
  shard_size_input = gr.Number(
1533
+ label="Shard size (examples)", value=25
1534
  )
1535
  max_shards_input = gr.Number(
1536
  label="Max shards per run (ZeroGPU: 1-5, 0 = no limit)",