Sulitha commited on
Commit
10b0a14
·
1 Parent(s): 18fe10d

feat: optional Hub upload for recordings + persistence docs

Browse files
Files changed (3) hide show
  1. README.md +25 -0
  2. app.py +74 -7
  3. requirements.txt +2 -1
README.md CHANGED
@@ -12,3 +12,28 @@ short_description: Collect microphone recordings for six spells
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
15
+
16
+ ## Persistence of Recordings
17
+
18
+ Recordings created via the UI are written at runtime into the `recordings/` folder inside the Space container. These files are NOT automatically versioned or shown in the repository file browser. To make them visible in the repo you must either:
19
+
20
+ 1. Commit them manually (e.g., pull the Space locally, copy files, `git add recordings/*.wav`, push).
21
+ 2. Or enable automatic upload using a Hugging Face token.
22
+
23
+ ### Automatic Upload (Recommended)
24
+
25
+ Set a secret named `HF_TOKEN` in the Space settings (must have write access). Optionally set:
26
+
27
+ - `HF_UPLOAD_REPO` target repo id (recommended: a dataset like `username/spell-recordings`).
28
+ - `HF_UPLOAD_REPO_TYPE` one of `dataset` (default), `space`, or `model`.
29
+
30
+ If `HF_UPLOAD_REPO` is omitted the current Space id is used (uploading into the Space repo when `HF_UPLOAD_REPO_TYPE=space`).
31
+
32
+ Then check the "Upload to Hub" box before submitting. Each saved `.wav` file will be committed via the Hub API with a message like `Add recordings <timestamp>`.
33
+
34
+ Uploads may take a few seconds. Large batches could hit rate limits; keep per-submit sizes modest.
35
+
36
+ ### Why You Don't See Runtime Files
37
+
38
+ The repository view shows only Git-tracked content. Runtime-generated files live only in the ephemeral container filesystem until the Space restarts. Upload or commit them if you need persistence.
39
+
app.py CHANGED
@@ -2,12 +2,17 @@ import os
2
  import re
3
  import time
4
  import math
5
- from typing import List, Tuple, Optional
6
 
7
  import numpy as np
8
  import gradio as gr
9
  import soundfile as sf
10
  from scipy.signal import resample_poly
 
 
 
 
 
11
 
12
  # Output directory for saved recordings
13
  OUT_DIR = "recordings"
@@ -84,6 +89,53 @@ def save_one_from_path(filepath: Optional[str], spell: str, username: str) -> Op
84
  return out_path
85
 
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  def submit_recordings(
88
  username: str,
89
  lumos_path: Optional[str],
@@ -92,6 +144,7 @@ def submit_recordings(
92
  wingardium_path: Optional[str],
93
  accio_path: Optional[str],
94
  reparo_path: Optional[str],
 
95
  ) -> str:
96
  user = sanitize_username(username)
97
 
@@ -107,16 +160,18 @@ def submit_recordings(
107
  saved = []
108
  skipped = []
109
 
 
110
  for spell, path in pairs:
111
  out = save_one_from_path(path, spell, user)
112
  if out:
113
  saved.append(f"{spell} -> {os.path.basename(out)}")
 
114
  else:
115
  skipped.append(spell)
116
 
117
  lines = []
118
  if saved:
119
- lines.append("Saved recordings:")
120
  lines += [f"- {s}" for s in saved]
121
  if skipped:
122
  lines.append("")
@@ -125,6 +180,15 @@ def submit_recordings(
125
  if not lines:
126
  return "No audio captured. Please record at least one spell."
127
 
 
 
 
 
 
 
 
 
 
128
  return "\n".join(lines)
129
 
130
 
@@ -150,20 +214,23 @@ def build_ui() -> gr.Blocks:
150
  accio = gr.Audio(label="Accio", sources=["microphone", "upload"], type="filepath")
151
  reparo = gr.Audio(label="Reparo", sources=["microphone", "upload"], type="filepath")
152
 
 
 
153
  submit = gr.Button("Submit")
154
  result = gr.Markdown()
155
 
156
  submit.click(
157
  fn=submit_recordings,
158
- inputs=[username, lumos, nox, alohomora, wingardium, accio, reparo],
159
  outputs=[result],
160
  )
161
 
162
  gr.Markdown("""
163
- Notes:
164
- - Files are saved in the app's `recordings/` folder using: `<spell>_<username>_<timestamp>.wav`.
165
- - 16 kHz mono WAV is used to make model training consistent.
166
- - You don't have to record all spells at once—submit whatever you have.
 
167
  """)
168
 
169
  return demo
 
2
  import re
3
  import time
4
  import math
5
+ from typing import List, Tuple, Optional, Sequence
6
 
7
  import numpy as np
8
  import gradio as gr
9
  import soundfile as sf
10
  from scipy.signal import resample_poly
11
+ try:
12
+ from huggingface_hub import HfApi, HfFolder
13
+ except Exception: # package might be missing in some local runs
14
+ HfApi = None
15
+ HfFolder = None
16
 
17
  # Output directory for saved recordings
18
  OUT_DIR = "recordings"
 
89
  return out_path
90
 
91
 
92
+ def upload_recordings(paths: Sequence[str]) -> Tuple[int, Optional[str]]:
93
+ """Upload given file paths to the Hub repo indicated by env HF_UPLOAD_REPO or the current Space repo.
94
+
95
+ Returns (uploaded_count, error_message). error_message is None on success.
96
+ Requires HF_TOKEN secret configured with write permission.
97
+ """
98
+ if not paths:
99
+ return 0, None
100
+ if HfApi is None:
101
+ return 0, "huggingface_hub not installed."
102
+ token = os.getenv("HF_TOKEN") or (HfFolder.get_token() if HfFolder else None)
103
+ if not token:
104
+ return 0, "No HF_TOKEN available (set as Space secret to enable uploads)."
105
+
106
+ repo_id = os.getenv("HF_UPLOAD_REPO")
107
+ # Best-effort infer the current Space repo id from environment if not provided
108
+ if not repo_id:
109
+ # In Spaces, SPACE_ID is like "username/space_name" for the current space.
110
+ # Use that as default so users can upload back to their Space if they want.
111
+ repo_id = os.getenv("SPACE_ID") or os.getenv("REPO_ID")
112
+ if not repo_id:
113
+ return 0, "Unable to infer target repo id (set HF_UPLOAD_REPO)."
114
+
115
+ api = HfApi(token=token)
116
+ uploaded = 0
117
+ commit_msg = f"Add recordings {int(time.time())}"
118
+ # repo_type: "dataset" (recommended) or "space" to commit to the Space repo
119
+ repo_type = os.getenv("HF_UPLOAD_REPO_TYPE", "dataset").lower()
120
+ if repo_type not in {"dataset", "space", "model"}:
121
+ repo_type = "dataset"
122
+ try:
123
+ for p in paths:
124
+ if not os.path.isfile(p):
125
+ continue
126
+ api.upload_file(
127
+ path_or_fileobj=p,
128
+ path_in_repo=f"recordings/{os.path.basename(p)}",
129
+ repo_id=repo_id,
130
+ repo_type=repo_type,
131
+ commit_message=commit_msg,
132
+ )
133
+ uploaded += 1
134
+ except Exception as e: # broad catch to surface error in UI
135
+ return uploaded, f"Upload error: {e}"
136
+ return uploaded, None
137
+
138
+
139
  def submit_recordings(
140
  username: str,
141
  lumos_path: Optional[str],
 
144
  wingardium_path: Optional[str],
145
  accio_path: Optional[str],
146
  reparo_path: Optional[str],
147
+ upload_flag: bool,
148
  ) -> str:
149
  user = sanitize_username(username)
150
 
 
160
  saved = []
161
  skipped = []
162
 
163
+ saved_paths: List[str] = []
164
  for spell, path in pairs:
165
  out = save_one_from_path(path, spell, user)
166
  if out:
167
  saved.append(f"{spell} -> {os.path.basename(out)}")
168
+ saved_paths.append(out)
169
  else:
170
  skipped.append(spell)
171
 
172
  lines = []
173
  if saved:
174
+ lines.append("Saved recordings (local runtime):")
175
  lines += [f"- {s}" for s in saved]
176
  if skipped:
177
  lines.append("")
 
180
  if not lines:
181
  return "No audio captured. Please record at least one spell."
182
 
183
+ if upload_flag:
184
+ uploaded, err = upload_recordings(saved_paths)
185
+ lines.append("")
186
+ if err:
187
+ lines.append(f"Hub upload attempted: {uploaded} succeeded, error: {err}")
188
+ else:
189
+ lines.append(f"Hub upload: {uploaded} file(s) committed to repo.")
190
+ lines.append("(It may take a few seconds to appear in the file browser.)")
191
+
192
  return "\n".join(lines)
193
 
194
 
 
214
  accio = gr.Audio(label="Accio", sources=["microphone", "upload"], type="filepath")
215
  reparo = gr.Audio(label="Reparo", sources=["microphone", "upload"], type="filepath")
216
 
217
+ with gr.Row():
218
+ upload_checkbox = gr.Checkbox(label="Upload to Hub (requires HF_TOKEN)", value=False)
219
  submit = gr.Button("Submit")
220
  result = gr.Markdown()
221
 
222
  submit.click(
223
  fn=submit_recordings,
224
+ inputs=[username, lumos, nox, alohomora, wingardium, accio, reparo, upload_checkbox],
225
  outputs=[result],
226
  )
227
 
228
  gr.Markdown("""
229
+ Notes:
230
+ - Files are saved locally in `recordings/` with `<spell>_<username>_<timestamp>.wav`.
231
+ - Check "Upload to Hub" to commit them to the repo (needs HF_TOKEN secret).
232
+ - 16 kHz mono WAV ensures consistent model training.
233
+ - You can submit partial sets; only provided spells are saved.
234
  """)
235
 
236
  return demo
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  gradio
2
  numpy
3
  soundfile
4
- scipy
 
 
1
  gradio
2
  numpy
3
  soundfile
4
+ scipy
5
+ huggingface_hub