Spaces:

Reza2kn
/

representation-chizzler

Running on Zero

App Files Files Community

Reza2kn commited on Jan 4

Commit

c93b3b8

verified ·

1 Parent(s): 29d3797

Upload folder using huggingface_hub

Browse files

Files changed (1) hide show

app.py +90 -12

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import shutil
 import subprocess
 import sys
 import tempfile
 from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Tuple
@@ -441,7 +442,71 @@ def load_audio_bytes(audio_bytes: bytes, log: bool = False) -> Tuple[torch.Tenso
     return waveform, sample_rate
-def prepare_waveform_from_entry(entry, log: bool = False) -> Tuple[torch.Tensor, int]:
     if entry is None:
         raise ValueError("Empty audio entry.")
@@ -467,14 +532,23 @@ def prepare_waveform_from_entry(entry, log: bool = False) -> Tuple[torch.Tensor,
                 )
             return waveform, sample_rate
-        if entry.get("path"):
-            return load_audio_file(entry["path"], log=log)
         if entry.get("bytes"):
-            return load_audio_bytes(entry["bytes"], log=log)
     if isinstance(entry, str):
-        return load_audio_file(entry, log=log)
     raise ValueError("Unsupported audio entry format.")
@@ -610,7 +684,7 @@ def process_dataset_and_push(
                 error_message = ""
                 try:
                     waveform, sample_rate = prepare_waveform_from_entry(
-                        entry, log=False
                     )
                     vad_waveform, denoised_waveform, _, has_speech = process_waveform(
                         waveform,
@@ -632,15 +706,19 @@ def process_dataset_and_push(
                         .numpy()
                         .astype(np.float32)
                     )
                 except Exception as exc:
                     ok = False
                     error_message = str(exc)
-                    output_np = np.zeros(1, dtype=np.float32)
-                example[audio_column] = {
-                    "array": output_np,
-                    "sampling_rate": DEFAULT_SAMPLE_RATE,
-                }
                 example["chizzler_ok"] = ok
                 example["chizzler_error"] = error_message

 import subprocess
 import sys
 import tempfile
+import urllib.request
 from datetime import datetime
 from pathlib import Path
 from typing import List, Optional, Tuple
     return waveform, sample_rate
+def _is_http_url(value: str) -> bool:
+    return value.startswith("http://") or value.startswith("https://")
+def _parse_hf_dataset_uri(uri: str) -> Optional[Tuple[str, str, Optional[str]]]:
+    prefix = "hf://datasets/"
+    if not uri.startswith(prefix):
+        return None
+    rest = uri[len(prefix) :]
+    if "/" not in rest:
+        return None
+    repo_id, file_path = rest.split("/", 1)
+    revision = None
+    if "@" in repo_id:
+        repo_id, revision = repo_id.split("@", 1)
+    return repo_id, file_path, revision
+def load_audio_url(url: str, token: Optional[str], log: bool = False) -> Tuple[torch.Tensor, int]:
+    headers = {}
+    if token and "huggingface.co" in url:
+        headers["Authorization"] = f"Bearer {token}"
+    request = urllib.request.Request(url, headers=headers)
+    with urllib.request.urlopen(request) as response:
+        data = response.read()
+    return load_audio_bytes(data, log=log)
+def resolve_audio_path(
+    path: str, dataset_id: Optional[str], token: Optional[str]
+) -> str:
+    if os.path.exists(path):
+        return path
+    parsed = _parse_hf_dataset_uri(path)
+    if parsed:
+        repo_id, filename, revision = parsed
+        try:
+            return hf_hub_download(
+                repo_id=repo_id,
+                repo_type="dataset",
+                filename=filename,
+                revision=revision,
+                token=token,
+            )
+        except Exception:
+            return path
+    if dataset_id and not os.path.isabs(path):
+        try:
+            return hf_hub_download(
+                repo_id=dataset_id,
+                repo_type="dataset",
+                filename=path,
+                token=token,
+            )
+        except Exception:
+            return path
+    return path
+def prepare_waveform_from_entry(
+    entry,
+    log: bool = False,
+    dataset_id: Optional[str] = None,
+    token: Optional[str] = None,
+) -> Tuple[torch.Tensor, int]:
     if entry is None:
         raise ValueError("Empty audio entry.")
                 )
             return waveform, sample_rate
         if entry.get("bytes"):
+            audio_bytes = entry["bytes"]
+            if not isinstance(audio_bytes, (bytes, bytearray)):
+                audio_bytes = bytes(audio_bytes)
+            return load_audio_bytes(audio_bytes, log=log)
+        if entry.get("path"):
+            path = resolve_audio_path(entry["path"], dataset_id, token)
+            if _is_http_url(path):
+                return load_audio_url(path, token, log=log)
+            return load_audio_file(path, log=log)
     if isinstance(entry, str):
+        path = resolve_audio_path(entry, dataset_id, token)
+        if _is_http_url(path):
+            return load_audio_url(path, token, log=log)
+        return load_audio_file(path, log=log)
     raise ValueError("Unsupported audio entry format.")
                 error_message = ""
                 try:
                     waveform, sample_rate = prepare_waveform_from_entry(
+                        entry, log=False, dataset_id=dataset_id, token=token
                     )
                     vad_waveform, denoised_waveform, _, has_speech = process_waveform(
                         waveform,
                         .numpy()
                         .astype(np.float32)
                     )
+                    output_entry = {
+                        "array": output_np,
+                        "sampling_rate": DEFAULT_SAMPLE_RATE,
+                    }
                 except Exception as exc:
                     ok = False
                     error_message = str(exc)
+                    output_entry = entry if entry is not None else {
+                        "array": np.zeros(1, dtype=np.float32),
+                        "sampling_rate": DEFAULT_SAMPLE_RATE,
+                    }
+                example[audio_column] = output_entry
                 example["chizzler_ok"] = ok
                 example["chizzler_error"] = error_message