Spaces:

Reza2kn
/

representation-chizzler

Running on Zero

App Files Files Community

Reza2kn commited on Jan 5

Commit

a98c4d2

verified ·

1 Parent(s): 65d5dac

Add HF login gating + default dataset settings

Browse files

Files changed (1) hide show

app.py +76 -6

app.py CHANGED Viewed

@@ -53,7 +53,7 @@ DEFAULT_MAX_BOOST_DB = 20.0
 DEFAULT_MAX_ATTEN_DB = 10.0
 DEFAULT_AUTO_RESUME = bool(os.getenv("SPACE_ID"))
 DEFAULT_ZERO_GPU_SHARD_SIZE = int(
-    os.getenv("CHIZZLER_ZERO_GPU_SHARD_SIZE", "10")
 )
 DEFAULT_ZERO_GPU_MAX_SHARDS = int(
     os.getenv("CHIZZLER_ZERO_GPU_MAX_SHARDS", "1")
@@ -109,6 +109,39 @@ def get_hf_token() -> Optional[str]:
     )
 def normalize_dataset_id(value: str) -> str:
     if not value:
         return ""
@@ -135,7 +168,7 @@ _ENV_MAX_SHARDS = os.getenv("CHIZZLER_MAX_SHARDS_PER_RUN")
 if _ENV_MAX_SHARDS is not None:
     DEFAULT_MAX_SHARDS_PER_RUN = int(_ENV_MAX_SHARDS)
 else:
-    DEFAULT_MAX_SHARDS_PER_RUN = 5 if os.getenv("SPACE_ID") else 0
 _ENV_CACHE_TO_HUB = os.getenv("CHIZZLER_CACHE_TO_HUB")
 if _ENV_CACHE_TO_HUB is None:
@@ -1146,8 +1179,12 @@ def process_dataset_and_push(
     shard_size: Optional[float],
     cache_on_hub: bool,
     max_shards_per_run: Optional[float],
     progress=gr.Progress(),
 ) -> str:
     attempts = 0
     while True:
         try:
@@ -1340,7 +1377,7 @@ def assemble_cached_dataset_and_push(
 @gpu_decorator(DEFAULT_GPU_DURATION)
-def gradio_single_file(
     audio_file,
     vad_threshold,
     max_silence_gap,
@@ -1362,6 +1399,30 @@ def gradio_single_file(
     )
 with gr.Blocks(title="Representation Chizzler") as demo:
     gr.Markdown(
         "# Representation Chizzler\n"
@@ -1369,6 +1430,15 @@ with gr.Blocks(title="Representation Chizzler") as demo:
         "denoising. Use the Single File tab for ad-hoc processing or the Dataset tab "
         "to clean and publish a dataset to the Hugging Face Hub."
     )
     with gr.Tab("Single File"):
         audio_input = gr.Audio(label="Upload Audio File", type="filepath")
@@ -1434,10 +1504,10 @@ with gr.Blocks(title="Representation Chizzler") as demo:
     with gr.Tab("Dataset to Hub"):
         dataset_id_input = gr.Textbox(
             label="Dataset ID or URL",
-            value="https://huggingface.co/datasets/kiarashQ/farsi-asr-unified-cleaned",
         )
         config_input = gr.Textbox(label="Config (optional)", value="")
-        split_input = gr.Textbox(label="Split (optional, or 'all')", value="")
         audio_column_input = gr.Textbox(
             label="Audio column (optional, auto-detect if empty)", value=""
         )
@@ -1460,7 +1530,7 @@ with gr.Blocks(title="Representation Chizzler") as demo:
             value=DEFAULT_CACHE_TO_HUB,
         )
         shard_size_input = gr.Number(
-            label="Shard size (examples)", value=1000
         )
         max_shards_input = gr.Number(
             label="Max shards per run (ZeroGPU: 1-5, 0 = no limit)",

 DEFAULT_MAX_ATTEN_DB = 10.0
 DEFAULT_AUTO_RESUME = bool(os.getenv("SPACE_ID"))
 DEFAULT_ZERO_GPU_SHARD_SIZE = int(
+    os.getenv("CHIZZLER_ZERO_GPU_SHARD_SIZE", "25")
 )
 DEFAULT_ZERO_GPU_MAX_SHARDS = int(
     os.getenv("CHIZZLER_ZERO_GPU_MAX_SHARDS", "1")
     )
+def get_request_username(request: Optional[gr.Request]) -> Optional[str]:
+    if request is None:
+        return None
+    username = getattr(request, "username", None)
+    if username:
+        return username
+    user = getattr(request, "user", None)
+    if isinstance(user, dict):
+        return user.get("name") or user.get("username")
+    if user:
+        return str(user)
+    return None
+def require_login(request: Optional[gr.Request], action: str) -> Optional[str]:
+    if not os.getenv("SPACE_ID"):
+        return None
+    username = get_request_username(request)
+    if not username:
+        return (
+            "Please log in with your Hugging Face account to use "
+            f"{action} on ZeroGPU."
+        )
+    return None
+def render_login_status(request: gr.Request) -> str:
+    username = get_request_username(request)
+    if username:
+        return f"Logged in as **{username}**."
+    return "Not logged in. Use the login button above."
 def normalize_dataset_id(value: str) -> str:
     if not value:
         return ""
 if _ENV_MAX_SHARDS is not None:
     DEFAULT_MAX_SHARDS_PER_RUN = int(_ENV_MAX_SHARDS)
 else:
+    DEFAULT_MAX_SHARDS_PER_RUN = 1 if os.getenv("SPACE_ID") else 0
 _ENV_CACHE_TO_HUB = os.getenv("CHIZZLER_CACHE_TO_HUB")
 if _ENV_CACHE_TO_HUB is None:
     shard_size: Optional[float],
     cache_on_hub: bool,
     max_shards_per_run: Optional[float],
+    request: gr.Request | None = None,
     progress=gr.Progress(),
 ) -> str:
+    login_error = require_login(request, "dataset processing")
+    if login_error:
+        return login_error
     attempts = 0
     while True:
         try:
 @gpu_decorator(DEFAULT_GPU_DURATION)
+def _gradio_single_file_gpu(
     audio_file,
     vad_threshold,
     max_silence_gap,
     )
+def gradio_single_file(
+    audio_file,
+    vad_threshold,
+    max_silence_gap,
+    normalize_audio,
+    target_dbfs,
+    max_boost_db,
+    max_atten_db,
+    request: gr.Request | None = None,
+):
+    login_error = require_login(request, "single-file processing")
+    if login_error:
+        return None, None, None, login_error
+    return _gradio_single_file_gpu(
+        audio_file,
+        vad_threshold,
+        max_silence_gap,
+        normalize_audio,
+        target_dbfs,
+        max_boost_db,
+        max_atten_db,
+    )
 with gr.Blocks(title="Representation Chizzler") as demo:
     gr.Markdown(
         "# Representation Chizzler\n"
         "denoising. Use the Single File tab for ad-hoc processing or the Dataset tab "
         "to clean and publish a dataset to the Hugging Face Hub."
     )
+    with gr.Row():
+        if hasattr(gr, "LoginButton"):
+            gr.LoginButton()
+        else:
+            gr.Markdown(
+                "Sign in using the Hugging Face button in the Space header."
+            )
+        login_status = gr.Markdown()
+    demo.load(render_login_status, None, login_status)
     with gr.Tab("Single File"):
         audio_input = gr.Audio(label="Upload Audio File", type="filepath")
     with gr.Tab("Dataset to Hub"):
         dataset_id_input = gr.Textbox(
             label="Dataset ID or URL",
+            value="https://huggingface.co/datasets/MohammadGholizadeh/fleurs-farsi",
         )
         config_input = gr.Textbox(label="Config (optional)", value="")
+        split_input = gr.Textbox(label="Split (optional, or 'all')", value="dev")
         audio_column_input = gr.Textbox(
             label="Audio column (optional, auto-detect if empty)", value=""
         )
             value=DEFAULT_CACHE_TO_HUB,
         )
         shard_size_input = gr.Number(
+            label="Shard size (examples)", value=25
         )
         max_shards_input = gr.Number(
             label="Max shards per run (ZeroGPU: 1-5, 0 = no limit)",