Sulitha commited on
Commit
4ea0a00
·
1 Parent(s): d8d08b1

feat: add optional MongoDB GridFS upload

Browse files
Files changed (3) hide show
  1. README.md +13 -0
  2. app.py +97 -1
  3. requirements.txt +2 -1
README.md CHANGED
@@ -48,3 +48,16 @@ If you prefer uploading to Google Drive:
48
 
49
  The app uses `google-api-python-client` to upload each WAV file into that folder. Errors will be shown in the results area if credentials or permissions are incorrect.
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
 
49
  The app uses `google-api-python-client` to upload each WAV file into that folder. Errors will be shown in the results area if credentials or permissions are incorrect.
50
 
51
+ ## MongoDB Upload (Alternative)
52
+
53
+ You can also upload recordings to MongoDB using GridFS.
54
+
55
+ Secrets to configure in your Space:
56
+ - `MONGO_URI`: your MongoDB connection string (supports `mongodb+srv://`)
57
+ - `MONGO_DB`: database name (default: `spells`)
58
+ - `GRIDFS_BUCKET`: GridFS bucket prefix (default: `fs`)
59
+
60
+ Then in the UI, tick "Upload to MongoDB (GridFS)" before Submit.
61
+
62
+ Each file is stored in GridFS with metadata: `spell`, `username`, `timestamp`, and original `filename`.
63
+
app.py CHANGED
@@ -25,6 +25,14 @@ except Exception:
25
  build = None
26
  MediaFileUpload = None
27
 
 
 
 
 
 
 
 
 
28
  # Output directory for saved recordings
29
  OUT_DIR = "recordings"
30
  os.makedirs(OUT_DIR, exist_ok=True)
@@ -208,6 +216,83 @@ def upload_recordings_to_gdrive(paths: Sequence[str]) -> Tuple[int, Optional[str
208
  return uploaded, None
209
 
210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
211
  def submit_recordings(
212
  username: str,
213
  lumos_path: Optional[str],
@@ -218,6 +303,7 @@ def submit_recordings(
218
  reparo_path: Optional[str],
219
  upload_flag: bool,
220
  gdrive_flag: bool,
 
221
  ) -> str:
222
  user = sanitize_username(username)
223
 
@@ -270,6 +356,14 @@ def submit_recordings(
270
  else:
271
  lines.append(f"Drive upload: {gup} file(s) uploaded to folder.")
272
 
 
 
 
 
 
 
 
 
273
  return "\n".join(lines)
274
 
275
 
@@ -298,12 +392,13 @@ def build_ui() -> gr.Blocks:
298
  with gr.Row():
299
  upload_checkbox = gr.Checkbox(label="Upload to Hub (requires HF_TOKEN)", value=False)
300
  gdrive_checkbox = gr.Checkbox(label="Upload to Google Drive (service account)", value=False)
 
301
  submit = gr.Button("Submit")
302
  result = gr.Markdown()
303
 
304
  submit.click(
305
  fn=submit_recordings,
306
- inputs=[username, lumos, nox, alohomora, wingardium, accio, reparo, upload_checkbox, gdrive_checkbox],
307
  outputs=[result],
308
  )
309
 
@@ -312,6 +407,7 @@ def build_ui() -> gr.Blocks:
312
  - Files are saved locally in `recordings/` with `<spell>_<username>_<timestamp>.wav`.
313
  - Check "Upload to Hub" to commit them to the repo (needs HF_TOKEN secret).
314
  - Or check "Upload to Google Drive" to upload via a service account.
 
315
  - 16 kHz mono WAV ensures consistent model training.
316
  - You can submit partial sets; only provided spells are saved.
317
  """)
 
25
  build = None
26
  MediaFileUpload = None
27
 
28
+ # MongoDB (GridFS) optional imports
29
+ try:
30
+ from pymongo import MongoClient
31
+ import gridfs
32
+ except Exception:
33
+ MongoClient = None
34
+ gridfs = None
35
+
36
  # Output directory for saved recordings
37
  OUT_DIR = "recordings"
38
  os.makedirs(OUT_DIR, exist_ok=True)
 
216
  return uploaded, None
217
 
218
 
219
+ def _parse_meta_from_filename(basename: str) -> Tuple[str, str, Optional[int]]:
220
+ """Parse (spell_slug, username, timestamp) from `<spell_slug>_<username>_<ts>.wav`.
221
+ Username and spell slug can contain underscores; timestamp is the last token.
222
+ """
223
+ name = basename
224
+ if name.endswith(".wav"):
225
+ name = name[:-4]
226
+ parts = name.split("_")
227
+ if len(parts) < 3:
228
+ return name, "", None
229
+ try:
230
+ ts = int(parts[-1])
231
+ except Exception:
232
+ ts = None
233
+ username = parts[-2]
234
+ spell_slug = "_".join(parts[:-2])
235
+ return spell_slug, username, ts
236
+
237
+
238
+ def upload_recordings_to_mongo(paths: Sequence[str]) -> Tuple[int, Optional[str]]:
239
+ """Upload files to MongoDB using GridFS.
240
+
241
+ Env configuration:
242
+ - MONGO_URI: connection string (e.g., mongodb+srv://user:pass@cluster/)
243
+ - MONGO_DB: database name (default: spells)
244
+ - GRIDFS_BUCKET: GridFS bucket prefix (default: fs)
245
+ Returns (uploaded_count, error_message).
246
+ """
247
+ if not paths:
248
+ return 0, None
249
+ if not (MongoClient and gridfs):
250
+ return 0, "pymongo/gridfs not installed."
251
+ uri = os.getenv("MONGO_URI")
252
+ if not uri:
253
+ return 0, "Missing MONGO_URI."
254
+ db_name = os.getenv("MONGO_DB", "spells")
255
+ bucket = os.getenv("GRIDFS_BUCKET", "fs")
256
+
257
+ try:
258
+ client = MongoClient(uri, serverSelectionTimeoutMS=5000)
259
+ # quick connectivity check
260
+ client.admin.command("ping")
261
+ db = client[db_name]
262
+ fs = gridfs.GridFS(db, collection=bucket)
263
+ except Exception as e:
264
+ return 0, f"Mongo connect error: {e}"
265
+
266
+ uploaded = 0
267
+ try:
268
+ for p in paths:
269
+ if not os.path.isfile(p):
270
+ continue
271
+ base = os.path.basename(p)
272
+ spell_slug, username, ts = _parse_meta_from_filename(base)
273
+ with open(p, "rb") as f:
274
+ fs.put(
275
+ f.read(),
276
+ filename=base,
277
+ contentType="audio/wav",
278
+ metadata={
279
+ "spell": spell_slug,
280
+ "username": username,
281
+ "timestamp": ts,
282
+ "path": p,
283
+ },
284
+ )
285
+ uploaded += 1
286
+ except Exception as e:
287
+ return uploaded, f"Mongo upload error: {e}"
288
+ finally:
289
+ try:
290
+ client.close()
291
+ except Exception:
292
+ pass
293
+ return uploaded, None
294
+
295
+
296
  def submit_recordings(
297
  username: str,
298
  lumos_path: Optional[str],
 
303
  reparo_path: Optional[str],
304
  upload_flag: bool,
305
  gdrive_flag: bool,
306
+ mongo_flag: bool,
307
  ) -> str:
308
  user = sanitize_username(username)
309
 
 
356
  else:
357
  lines.append(f"Drive upload: {gup} file(s) uploaded to folder.")
358
 
359
+ if mongo_flag:
360
+ mup, merr = upload_recordings_to_mongo(saved_paths)
361
+ lines.append("")
362
+ if merr:
363
+ lines.append(f"Mongo upload attempted: {mup} succeeded, error: {merr}")
364
+ else:
365
+ lines.append(f"Mongo upload: {mup} file(s) stored in GridFS.")
366
+
367
  return "\n".join(lines)
368
 
369
 
 
392
  with gr.Row():
393
  upload_checkbox = gr.Checkbox(label="Upload to Hub (requires HF_TOKEN)", value=False)
394
  gdrive_checkbox = gr.Checkbox(label="Upload to Google Drive (service account)", value=False)
395
+ mongo_checkbox = gr.Checkbox(label="Upload to MongoDB (GridFS)", value=False)
396
  submit = gr.Button("Submit")
397
  result = gr.Markdown()
398
 
399
  submit.click(
400
  fn=submit_recordings,
401
+ inputs=[username, lumos, nox, alohomora, wingardium, accio, reparo, upload_checkbox, gdrive_checkbox, mongo_checkbox],
402
  outputs=[result],
403
  )
404
 
 
407
  - Files are saved locally in `recordings/` with `<spell>_<username>_<timestamp>.wav`.
408
  - Check "Upload to Hub" to commit them to the repo (needs HF_TOKEN secret).
409
  - Or check "Upload to Google Drive" to upload via a service account.
410
+ - Or check "Upload to MongoDB (GridFS)" to store in your database.
411
  - 16 kHz mono WAV ensures consistent model training.
412
  - You can submit partial sets; only provided spells are saved.
413
  """)
requirements.txt CHANGED
@@ -6,4 +6,5 @@ huggingface_hub
6
  google-api-python-client
7
  google-auth
8
  google-auth-httplib2
9
- google-auth-oauthlib
 
 
6
  google-api-python-client
7
  google-auth
8
  google-auth-httplib2
9
+ google-auth-oauthlib
10
+ pymongo