Spaces:

Sulitha
/

harry_potter_spells

Runtime error

App Files Files Community

Sulitha commited on Nov 22, 2025

Commit

f5ec16f

1 Parent(s): c0881d7

Store audio in MongoDB GridFS; add pymongo and docs

Browse files

Files changed (3) hide show

README.md +20 -7
app.py +57 -50
requirements.txt +1 -0

README.md CHANGED Viewed

@@ -14,7 +14,7 @@ short_description: Collect spell recordings for model training
 # Spell Recorder (Gradio)
-Collect microphone recordings for a small set of Harry Potter spells and save them to disk for training a classifier.
 Spells collected:
 - Lumos
@@ -25,12 +25,11 @@ Spells collected:
 - Reparo
 ## How it works
-- Enter a username (used in filenames; sanitized to safe characters).
 - Record with your microphone (preferred) or upload an audio file for any spell.
 - Click Submit.
-- The app will save any provided recordings to `recordings/` as 16 kHz mono WAVs named: `<spell>_<username>_<timestamp>.wav`.
 - A live counter shows how many spells are selected (recorded/uploaded) before submitting.
-- A CSV log is written to `recordings/log.csv` with columns: `timestamp_ms, session_id, username, spell, filename`.
 ## Run locally
@@ -51,15 +50,29 @@ python app.py
 Then open the printed local URL in your browser.
-## Deploy on Hugging Face Spaces
 1. Create a new Space (Gradio) in your account.
 2. Upload `app.py`, `requirements.txt`, and optionally `README.md`.
 3. Spaces will auto-build and run the app.
-4. Recordings will be saved inside the Space's `recordings/` directory. You can download them from the Space files tab or via `git lfs` if you commit them.
 Notes:
 - Microphone recording is enabled in the browser; no need to upload.
-- If you need more durable storage or collaboration, consider pushing saved WAVs to a dataset repo programmatically.
 ## Privacy and consent
 - Only collect voices from people who consent to being recorded.

 # Spell Recorder (Gradio)
+Collect microphone recordings for a small set of Harry Potter spells and store them in MongoDB for training a classifier.
 Spells collected:
 - Lumos
 - Reparo
 ## How it works
+- Enter a username (used in metadata; sanitized to safe characters).
 - Record with your microphone (preferred) or upload an audio file for any spell.
 - Click Submit.
+- Audio is resampled to 16 kHz mono and stored in MongoDB GridFS with metadata (username, spell, timestamp).
 - A live counter shows how many spells are selected (recorded/uploaded) before submitting.
 ## Run locally
 Then open the printed local URL in your browser.
 1. Create a new Space (Gradio) in your account.
 2. Upload `app.py`, `requirements.txt`, and optionally `README.md`.
 3. Spaces will auto-build and run the app.
+4. Submissions are stored directly in your MongoDB (GridFS), not in the Space filesystem.
 Notes:
 - Microphone recording is enabled in the browser; no need to upload.
+- Ensure MongoDB secrets are configured; otherwise the app will display that DB is not configured.
+## MongoDB configuration (Spaces secrets)
+Set these in your Space → Settings → Variables and secrets:
+- `MONGO_URI`: your MongoDB connection string (e.g., from MongoDB Atlas)
+- `MONGO_DB`: database name (default: `spells`)
+- `MONGO_BUCKET`: GridFS bucket/collection prefix (default: `recordings`)
+Locally (PowerShell) you can set temporarily for a session:
+```powershell
+$env:MONGO_URI = "mongodb+srv://user:pass@cluster.mongodb.net/?retryWrites=true&w=majority"
+$env:MONGO_DB = "spells"
+$env:MONGO_BUCKET = "recordings"
+python app.py
+```
 ## Privacy and consent
 - Only collect voices from people who consent to being recorded.

app.py CHANGED Viewed

@@ -2,19 +2,24 @@ import os
 import re
 import time
 import math
-import csv
-import uuid
 from typing import List, Tuple, Optional
 import numpy as np
 import gradio as gr
 import soundfile as sf
 from scipy.signal import resample_poly
-# Output directory for saved recordings
-OUT_DIR = "recordings"
-os.makedirs(OUT_DIR, exist_ok=True)
-LOG_CSV = os.path.join(OUT_DIR, "log.csv")
 # Fixed target sample rate for ML training
 TARGET_SR = 16000
@@ -62,23 +67,21 @@ def resample_to_target(audio: np.ndarray, sr: int, target_sr: int = TARGET_SR) -
     return resample_poly(audio, up=up, down=down)
-def ensure_log_header():
-    if not os.path.exists(LOG_CSV):
-        with open(LOG_CSV, mode="w", newline="", encoding="utf-8") as f:
-            writer = csv.writer(f)
-            writer.writerow(["timestamp_ms", "session_id", "username", "spell", "filename"])  # header
-def log_row(timestamp_ms: int, session_id: str, username: str, spell: str, filename: str) -> None:
-    ensure_log_header()
-    with open(LOG_CSV, mode="a", newline="", encoding="utf-8") as f:
-        writer = csv.writer(f)
-        writer.writerow([timestamp_ms, session_id, username, spell, filename])
-def save_one_from_path(filepath: Optional[str], spell: str, username: str) -> Optional[Tuple[str, int]]:
-    """Load an audio file path (from mic/upload), process to 16k mono, and save.
-    Returns (saved file path, timestamp_ms) or None if no audio provided.
     """
     if not filepath:
         return None
@@ -91,14 +94,28 @@ def save_one_from_path(filepath: Optional[str], spell: str, username: str) -> Op
     audio = resample_to_target(audio, sr, TARGET_SR)
     audio = np.clip(audio, -1.0, 1.0)
-    # Build descriptive filename: spell_username_timestamp.wav
     ts = int(time.time() * 1000)
     spell_slug = re.sub(r"[^a-zA-Z0-9]+", "_", spell).strip("_").lower()
-    fname = f"{spell_slug}_{username}_{ts}.wav"
-    out_path = os.path.join(OUT_DIR, fname)
-    sf.write(out_path, audio, TARGET_SR, subtype="PCM_16")
-    return out_path, ts
 def submit_recordings(
@@ -109,9 +126,7 @@ def submit_recordings(
     wingardium_path: Optional[str],
     accio_path: Optional[str],
     reparo_path: Optional[str],
-    session_id: str,
-    session_files: List[str],
-) -> Tuple[str, List[str], int]:
     user = sanitize_username(username)
     pairs: List[Tuple[str, Optional[str]]] = [
@@ -125,20 +140,19 @@ def submit_recordings(
     saved = []
     skipped = []
-    newly_saved_paths: List[str] = []
     for spell, path in pairs:
-        out = save_one_from_path(path, spell, user)
-        if out:
-            out_path, ts = out
-            saved.append(f"{spell} -> {os.path.basename(out_path)}")
-            newly_saved_paths.append(out_path)
-            # CSV log
-            log_row(ts, session_id, user, spell, os.path.basename(out_path))
         else:
             skipped.append(spell)
     lines = []
     if saved:
         lines.append("Saved recordings:")
         lines += [f"- {s}" for s in saved]
@@ -147,11 +161,9 @@ def submit_recordings(
         lines.append("Missing (not provided):")
         lines += [f"- {s}" for s in skipped]
     if not lines:
-        return "No audio captured. Please record at least one spell.", session_files, 0
-    # Update session files list
-    session_files = list(session_files or []) + newly_saved_paths
-    return "\n".join(lines), session_files, len(newly_saved_paths)
 def count_selected(
@@ -176,10 +188,6 @@ def build_ui() -> gr.Blocks:
         Spells to collect: Lumos, Nox, Alohomora, Wingardium Leviosa, Accio, Reparo.
         """)
-        # Per-session state
-        session_id = gr.State(uuid.uuid4().hex)
-        session_files = gr.State([])  # paths saved during this session
         with gr.Row():
             username = gr.Textbox(label="Your Name (for filename)", placeholder="e.g., harry_p" , autofocus=True)
@@ -202,8 +210,8 @@ def build_ui() -> gr.Blocks:
         submit.click(
             fn=submit_recordings,
-            inputs=[username, lumos, nox, alohomora, wingardium, accio, reparo, session_id, session_files],
-            outputs=[result, session_files, submitted_count],
         )
         # Live counter updates when any audio input changes
@@ -216,10 +224,9 @@ def build_ui() -> gr.Blocks:
         gr.Markdown("""
         Notes:
-        - Files are saved in the app's `recordings/` folder using: `<spell>_<username>_<timestamp>.wav`.
         - 16 kHz mono WAV is used to make model training consistent.
         - You don't have to record all spells at once—submit whatever you have.
-        - A CSV log is kept at `recordings/log.csv` with username, spell, timestamp, filename.
         """)
     return demo

 import re
 import time
 import math
+import io
 from typing import List, Tuple, Optional
 import numpy as np
 import gradio as gr
 import soundfile as sf
 from scipy.signal import resample_poly
+from scipy.io import wavfile as wav_write
+from pymongo import MongoClient
+from gridfs import GridFS
+# MongoDB configuration via environment variables
+MONGO_URI = os.getenv("MONGO_URI", "")
+MONGO_DB = os.getenv("MONGO_DB", "spells")
+MONGO_BUCKET = os.getenv("MONGO_BUCKET", "recordings")
+_mongo_client: Optional[MongoClient] = None
+_mongo_fs: Optional[GridFS] = None
 # Fixed target sample rate for ML training
 TARGET_SR = 16000
     return resample_poly(audio, up=up, down=down)
+def get_gridfs() -> Optional[GridFS]:
+    global _mongo_client, _mongo_fs
+    if not MONGO_URI:
+        return None
+    if _mongo_fs is not None:
+        return _mongo_fs
+    _mongo_client = MongoClient(MONGO_URI)
+    db = _mongo_client[MONGO_DB]
+    _mongo_fs = GridFS(db, collection=MONGO_BUCKET)
+    return _mongo_fs
+def save_one_from_path(filepath: Optional[str], spell: str, username: str) -> Optional[str]:
+    """Load an audio file (from mic/upload), process to 16k mono, and store in MongoDB GridFS.
+    Returns inserted file id (as str) or None if no audio provided / DB not configured.
     """
     if not filepath:
         return None
     audio = resample_to_target(audio, sr, TARGET_SR)
     audio = np.clip(audio, -1.0, 1.0)
+    # Convert to int16 PCM bytes in-memory
+    pcm16 = (audio * 32767.0).astype(np.int16)
+    buf = io.BytesIO()
+    wav_write.write(buf, TARGET_SR, pcm16)
+    wav_bytes = buf.getvalue()
+    fs = get_gridfs()
+    if fs is None:
+        return None
     ts = int(time.time() * 1000)
     spell_slug = re.sub(r"[^a-zA-Z0-9]+", "_", spell).strip("_").lower()
+    filename = f"{spell_slug}_{username}_{ts}.wav"
+    metadata = {
+        "username": username,
+        "spell": spell,
+        "timestamp_ms": ts,
+        "sample_rate": TARGET_SR,
+        "format": "wav",
+    }
+    file_id = fs.put(wav_bytes, filename=filename, contentType="audio/wav", metadata=metadata)
+    return str(file_id)
 def submit_recordings(
     wingardium_path: Optional[str],
     accio_path: Optional[str],
     reparo_path: Optional[str],
+) -> Tuple[str, int]:
     user = sanitize_username(username)
     pairs: List[Tuple[str, Optional[str]]] = [
     saved = []
     skipped = []
+    inserted = 0
     for spell, path in pairs:
+        file_id = save_one_from_path(path, spell, user)
+        if file_id:
+            saved.append(f"{spell} -> id {file_id}")
+            inserted += 1
         else:
             skipped.append(spell)
     lines = []
+    if not MONGO_URI:
+        lines.append("Database not configured: set MONGO_URI secret in the Space.")
     if saved:
         lines.append("Saved recordings:")
         lines += [f"- {s}" for s in saved]
         lines.append("Missing (not provided):")
         lines += [f"- {s}" for s in skipped]
     if not lines:
+        return "No audio captured. Please record at least one spell.", 0
+    return "\n".join(lines), inserted
 def count_selected(
         Spells to collect: Lumos, Nox, Alohomora, Wingardium Leviosa, Accio, Reparo.
         """)
         with gr.Row():
             username = gr.Textbox(label="Your Name (for filename)", placeholder="e.g., harry_p" , autofocus=True)
         submit.click(
             fn=submit_recordings,
+            inputs=[username, lumos, nox, alohomora, wingardium, accio, reparo],
+            outputs=[result, submitted_count],
         )
         # Live counter updates when any audio input changes
         gr.Markdown("""
         Notes:
+        - Submissions are stored directly in MongoDB (GridFS) using environment secrets.
         - 16 kHz mono WAV is used to make model training consistent.
         - You don't have to record all spells at once—submit whatever you have.
         """)
     return demo

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ numpy
 soundfile
 scipy
 huggingface_hub<0.25

 soundfile
 scipy
 huggingface_hub<0.25
+pymongo