Spaces:
Runtime error
Runtime error
feat: add optional MongoDB GridFS upload
Browse files- README.md +13 -0
- app.py +97 -1
- requirements.txt +2 -1
README.md
CHANGED
|
@@ -48,3 +48,16 @@ If you prefer uploading to Google Drive:
|
|
| 48 |
|
| 49 |
The app uses `google-api-python-client` to upload each WAV file into that folder. Errors will be shown in the results area if credentials or permissions are incorrect.
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
The app uses `google-api-python-client` to upload each WAV file into that folder. Errors will be shown in the results area if credentials or permissions are incorrect.
|
| 50 |
|
| 51 |
+
## MongoDB Upload (Alternative)
|
| 52 |
+
|
| 53 |
+
You can also upload recordings to MongoDB using GridFS.
|
| 54 |
+
|
| 55 |
+
Secrets to configure in your Space:
|
| 56 |
+
- `MONGO_URI`: your MongoDB connection string (supports `mongodb+srv://`)
|
| 57 |
+
- `MONGO_DB`: database name (default: `spells`)
|
| 58 |
+
- `GRIDFS_BUCKET`: GridFS bucket prefix (default: `fs`)
|
| 59 |
+
|
| 60 |
+
Then in the UI, tick "Upload to MongoDB (GridFS)" before Submit.
|
| 61 |
+
|
| 62 |
+
Each file is stored in GridFS with metadata: `spell`, `username`, `timestamp`, and original `filename`.
|
| 63 |
+
|
app.py
CHANGED
|
@@ -25,6 +25,14 @@ except Exception:
|
|
| 25 |
build = None
|
| 26 |
MediaFileUpload = None
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
# Output directory for saved recordings
|
| 29 |
OUT_DIR = "recordings"
|
| 30 |
os.makedirs(OUT_DIR, exist_ok=True)
|
|
@@ -208,6 +216,83 @@ def upload_recordings_to_gdrive(paths: Sequence[str]) -> Tuple[int, Optional[str
|
|
| 208 |
return uploaded, None
|
| 209 |
|
| 210 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 211 |
def submit_recordings(
|
| 212 |
username: str,
|
| 213 |
lumos_path: Optional[str],
|
|
@@ -218,6 +303,7 @@ def submit_recordings(
|
|
| 218 |
reparo_path: Optional[str],
|
| 219 |
upload_flag: bool,
|
| 220 |
gdrive_flag: bool,
|
|
|
|
| 221 |
) -> str:
|
| 222 |
user = sanitize_username(username)
|
| 223 |
|
|
@@ -270,6 +356,14 @@ def submit_recordings(
|
|
| 270 |
else:
|
| 271 |
lines.append(f"Drive upload: {gup} file(s) uploaded to folder.")
|
| 272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 273 |
return "\n".join(lines)
|
| 274 |
|
| 275 |
|
|
@@ -298,12 +392,13 @@ def build_ui() -> gr.Blocks:
|
|
| 298 |
with gr.Row():
|
| 299 |
upload_checkbox = gr.Checkbox(label="Upload to Hub (requires HF_TOKEN)", value=False)
|
| 300 |
gdrive_checkbox = gr.Checkbox(label="Upload to Google Drive (service account)", value=False)
|
|
|
|
| 301 |
submit = gr.Button("Submit")
|
| 302 |
result = gr.Markdown()
|
| 303 |
|
| 304 |
submit.click(
|
| 305 |
fn=submit_recordings,
|
| 306 |
-
inputs=[username, lumos, nox, alohomora, wingardium, accio, reparo, upload_checkbox, gdrive_checkbox],
|
| 307 |
outputs=[result],
|
| 308 |
)
|
| 309 |
|
|
@@ -312,6 +407,7 @@ def build_ui() -> gr.Blocks:
|
|
| 312 |
- Files are saved locally in `recordings/` with `<spell>_<username>_<timestamp>.wav`.
|
| 313 |
- Check "Upload to Hub" to commit them to the repo (needs HF_TOKEN secret).
|
| 314 |
- Or check "Upload to Google Drive" to upload via a service account.
|
|
|
|
| 315 |
- 16 kHz mono WAV ensures consistent model training.
|
| 316 |
- You can submit partial sets; only provided spells are saved.
|
| 317 |
""")
|
|
|
|
| 25 |
build = None
|
| 26 |
MediaFileUpload = None
|
| 27 |
|
| 28 |
+
# MongoDB (GridFS) optional imports
|
| 29 |
+
try:
|
| 30 |
+
from pymongo import MongoClient
|
| 31 |
+
import gridfs
|
| 32 |
+
except Exception:
|
| 33 |
+
MongoClient = None
|
| 34 |
+
gridfs = None
|
| 35 |
+
|
| 36 |
# Output directory for saved recordings
|
| 37 |
OUT_DIR = "recordings"
|
| 38 |
os.makedirs(OUT_DIR, exist_ok=True)
|
|
|
|
| 216 |
return uploaded, None
|
| 217 |
|
| 218 |
|
| 219 |
+
def _parse_meta_from_filename(basename: str) -> Tuple[str, str, Optional[int]]:
|
| 220 |
+
"""Parse (spell_slug, username, timestamp) from `<spell_slug>_<username>_<ts>.wav`.
|
| 221 |
+
Username and spell slug can contain underscores; timestamp is the last token.
|
| 222 |
+
"""
|
| 223 |
+
name = basename
|
| 224 |
+
if name.endswith(".wav"):
|
| 225 |
+
name = name[:-4]
|
| 226 |
+
parts = name.split("_")
|
| 227 |
+
if len(parts) < 3:
|
| 228 |
+
return name, "", None
|
| 229 |
+
try:
|
| 230 |
+
ts = int(parts[-1])
|
| 231 |
+
except Exception:
|
| 232 |
+
ts = None
|
| 233 |
+
username = parts[-2]
|
| 234 |
+
spell_slug = "_".join(parts[:-2])
|
| 235 |
+
return spell_slug, username, ts
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
def upload_recordings_to_mongo(paths: Sequence[str]) -> Tuple[int, Optional[str]]:
|
| 239 |
+
"""Upload files to MongoDB using GridFS.
|
| 240 |
+
|
| 241 |
+
Env configuration:
|
| 242 |
+
- MONGO_URI: connection string (e.g., mongodb+srv://user:pass@cluster/)
|
| 243 |
+
- MONGO_DB: database name (default: spells)
|
| 244 |
+
- GRIDFS_BUCKET: GridFS bucket prefix (default: fs)
|
| 245 |
+
Returns (uploaded_count, error_message).
|
| 246 |
+
"""
|
| 247 |
+
if not paths:
|
| 248 |
+
return 0, None
|
| 249 |
+
if not (MongoClient and gridfs):
|
| 250 |
+
return 0, "pymongo/gridfs not installed."
|
| 251 |
+
uri = os.getenv("MONGO_URI")
|
| 252 |
+
if not uri:
|
| 253 |
+
return 0, "Missing MONGO_URI."
|
| 254 |
+
db_name = os.getenv("MONGO_DB", "spells")
|
| 255 |
+
bucket = os.getenv("GRIDFS_BUCKET", "fs")
|
| 256 |
+
|
| 257 |
+
try:
|
| 258 |
+
client = MongoClient(uri, serverSelectionTimeoutMS=5000)
|
| 259 |
+
# quick connectivity check
|
| 260 |
+
client.admin.command("ping")
|
| 261 |
+
db = client[db_name]
|
| 262 |
+
fs = gridfs.GridFS(db, collection=bucket)
|
| 263 |
+
except Exception as e:
|
| 264 |
+
return 0, f"Mongo connect error: {e}"
|
| 265 |
+
|
| 266 |
+
uploaded = 0
|
| 267 |
+
try:
|
| 268 |
+
for p in paths:
|
| 269 |
+
if not os.path.isfile(p):
|
| 270 |
+
continue
|
| 271 |
+
base = os.path.basename(p)
|
| 272 |
+
spell_slug, username, ts = _parse_meta_from_filename(base)
|
| 273 |
+
with open(p, "rb") as f:
|
| 274 |
+
fs.put(
|
| 275 |
+
f.read(),
|
| 276 |
+
filename=base,
|
| 277 |
+
contentType="audio/wav",
|
| 278 |
+
metadata={
|
| 279 |
+
"spell": spell_slug,
|
| 280 |
+
"username": username,
|
| 281 |
+
"timestamp": ts,
|
| 282 |
+
"path": p,
|
| 283 |
+
},
|
| 284 |
+
)
|
| 285 |
+
uploaded += 1
|
| 286 |
+
except Exception as e:
|
| 287 |
+
return uploaded, f"Mongo upload error: {e}"
|
| 288 |
+
finally:
|
| 289 |
+
try:
|
| 290 |
+
client.close()
|
| 291 |
+
except Exception:
|
| 292 |
+
pass
|
| 293 |
+
return uploaded, None
|
| 294 |
+
|
| 295 |
+
|
| 296 |
def submit_recordings(
|
| 297 |
username: str,
|
| 298 |
lumos_path: Optional[str],
|
|
|
|
| 303 |
reparo_path: Optional[str],
|
| 304 |
upload_flag: bool,
|
| 305 |
gdrive_flag: bool,
|
| 306 |
+
mongo_flag: bool,
|
| 307 |
) -> str:
|
| 308 |
user = sanitize_username(username)
|
| 309 |
|
|
|
|
| 356 |
else:
|
| 357 |
lines.append(f"Drive upload: {gup} file(s) uploaded to folder.")
|
| 358 |
|
| 359 |
+
if mongo_flag:
|
| 360 |
+
mup, merr = upload_recordings_to_mongo(saved_paths)
|
| 361 |
+
lines.append("")
|
| 362 |
+
if merr:
|
| 363 |
+
lines.append(f"Mongo upload attempted: {mup} succeeded, error: {merr}")
|
| 364 |
+
else:
|
| 365 |
+
lines.append(f"Mongo upload: {mup} file(s) stored in GridFS.")
|
| 366 |
+
|
| 367 |
return "\n".join(lines)
|
| 368 |
|
| 369 |
|
|
|
|
| 392 |
with gr.Row():
|
| 393 |
upload_checkbox = gr.Checkbox(label="Upload to Hub (requires HF_TOKEN)", value=False)
|
| 394 |
gdrive_checkbox = gr.Checkbox(label="Upload to Google Drive (service account)", value=False)
|
| 395 |
+
mongo_checkbox = gr.Checkbox(label="Upload to MongoDB (GridFS)", value=False)
|
| 396 |
submit = gr.Button("Submit")
|
| 397 |
result = gr.Markdown()
|
| 398 |
|
| 399 |
submit.click(
|
| 400 |
fn=submit_recordings,
|
| 401 |
+
inputs=[username, lumos, nox, alohomora, wingardium, accio, reparo, upload_checkbox, gdrive_checkbox, mongo_checkbox],
|
| 402 |
outputs=[result],
|
| 403 |
)
|
| 404 |
|
|
|
|
| 407 |
- Files are saved locally in `recordings/` with `<spell>_<username>_<timestamp>.wav`.
|
| 408 |
- Check "Upload to Hub" to commit them to the repo (needs HF_TOKEN secret).
|
| 409 |
- Or check "Upload to Google Drive" to upload via a service account.
|
| 410 |
+
- Or check "Upload to MongoDB (GridFS)" to store in your database.
|
| 411 |
- 16 kHz mono WAV ensures consistent model training.
|
| 412 |
- You can submit partial sets; only provided spells are saved.
|
| 413 |
""")
|
requirements.txt
CHANGED
|
@@ -6,4 +6,5 @@ huggingface_hub
|
|
| 6 |
google-api-python-client
|
| 7 |
google-auth
|
| 8 |
google-auth-httplib2
|
| 9 |
-
google-auth-oauthlib
|
|
|
|
|
|
| 6 |
google-api-python-client
|
| 7 |
google-auth
|
| 8 |
google-auth-httplib2
|
| 9 |
+
google-auth-oauthlib
|
| 10 |
+
pymongo
|