Spaces:

divvun-tts
/

tts-evaluation

Running

App Files Files Community

asuni commited on Oct 30, 2025

Commit

a09b358

verified ·

1 Parent(s): 16e12da

Upload app.py

Browse files

Files changed (1) hide show

app.py +69 -1

app.py CHANGED Viewed

@@ -67,6 +67,74 @@ def list_samples(samples_dir):
     return files
 def save_responses_to_hf(rows, repo_id: str | None = None, token: str | None = None):
     """
     Push a list of dict rows to a private HF dataset, one JSON file per row.
@@ -108,7 +176,7 @@ def save_responses_to_hf(rows, repo_id: str | None = None, token: str | None = N
             json_bytes = json.dumps(row_dict, indent=2).encode("utf-8")
             api.upload_file(
-                path_or_fileobj=json_bytes,
                 path_in_repo=path_in_repo,
                 repo_id=repo_id,
                 repo_type="dataset",

     return files
 def save_responses_to_hf(rows, repo_id: str | None = None, token: str | None = None):
+    """
+    Append new rows to a CSV file in a private Hugging Face dataset.
+    - Reads the existing CSV (if present).
+    - Appends new rows.
+    - Uploads the updated file back to the repo.
+    Each 'row' should be a dict with consistent keys.
+    NOTE:
+    - Replaces the entire CSV on each update (no true append on the server side).
+    - Use small/medium datasets; large ones should use the `datasets` library instead.
+    """
+    if HfApi is None:
+        return {"status": "hf_unavailable", "reason": "missing_packages"}
+    token = token or os.environ.get("HF_TOKEN")
+    repo_id = repo_id or os.environ.get("HF_DATASET_ID")
+    if not token or not repo_id:
+        return {"status": "hf_skipped", "reason": "missing_token_or_repo_env"}
+    api = HfApi(token=token)
+    path_in_repo = "data/responses.csv"  # fixed CSV location in repo
+    repo_err = None
+    # Ensure dataset exists
+    try:
+        api.create_repo(repo_id=repo_id, repo_type="dataset", private=True, exist_ok=True)
+    except Exception as e:
+        repo_err = str(e)
+    # Try downloading existing CSV
+    existing_df = pd.DataFrame()
+    try:
+        local_path = hf_hub_download(
+            repo_id=repo_id,
+            filename=path_in_repo,
+            repo_type="dataset",
+            token=token,
+        )
+        existing_df = pd.read_csv(local_path)
+    except Exception:
+        # File doesn't exist or is unreadable — start fresh
+        pass
+    # Convert new rows to DataFrame and append
+    new_df = pd.DataFrame(rows)
+    combined_df = pd.concat([existing_df, new_df], ignore_index=True)
+    # Save to memory as CSV
+    csv_buffer = io.StringIO()
+    combined_df.to_csv(csv_buffer, index=False)
+    csv_bytes = csv_buffer.getvalue().encode("utf-8")
+    # Upload the updated CSV
+    try:
+        api.upload_file(
+            path_or_fileobj=csv_bytes,
+            path_in_repo=path_in_repo,
+            repo_id=repo_id,
+            repo_type="dataset",
+        )
+    except Exception as e:
+        return {"status": "hf_push_error", "error": str(e), "repo_error": repo_err}
+    return {"status": "hf_pushed", "rows_added": len(rows), "repo": repo_id, "repo_error": repo_err}
+def _save_responses_to_hf(rows, repo_id: str | None = None, token: str | None = None):
     """
     Push a list of dict rows to a private HF dataset, one JSON file per row.
             json_bytes = json.dumps(row_dict, indent=2).encode("utf-8")
             api.upload_file(
+                path_or_obj=json_bytes,
                 path_in_repo=path_in_repo,
                 repo_id=repo_id,
                 repo_type="dataset",