Sulitha commited on
Commit
f5ec16f
·
1 Parent(s): c0881d7

Store audio in MongoDB GridFS; add pymongo and docs

Browse files
Files changed (3) hide show
  1. README.md +20 -7
  2. app.py +57 -50
  3. requirements.txt +1 -0
README.md CHANGED
@@ -14,7 +14,7 @@ short_description: Collect spell recordings for model training
14
 
15
  # Spell Recorder (Gradio)
16
 
17
- Collect microphone recordings for a small set of Harry Potter spells and save them to disk for training a classifier.
18
 
19
  Spells collected:
20
  - Lumos
@@ -25,12 +25,11 @@ Spells collected:
25
  - Reparo
26
 
27
  ## How it works
28
- - Enter a username (used in filenames; sanitized to safe characters).
29
  - Record with your microphone (preferred) or upload an audio file for any spell.
30
  - Click Submit.
31
- - The app will save any provided recordings to `recordings/` as 16 kHz mono WAVs named: `<spell>_<username>_<timestamp>.wav`.
32
  - A live counter shows how many spells are selected (recorded/uploaded) before submitting.
33
- - A CSV log is written to `recordings/log.csv` with columns: `timestamp_ms, session_id, username, spell, filename`.
34
 
35
  ## Run locally
36
 
@@ -51,15 +50,29 @@ python app.py
51
 
52
  Then open the printed local URL in your browser.
53
 
54
- ## Deploy on Hugging Face Spaces
55
  1. Create a new Space (Gradio) in your account.
56
  2. Upload `app.py`, `requirements.txt`, and optionally `README.md`.
57
  3. Spaces will auto-build and run the app.
58
- 4. Recordings will be saved inside the Space's `recordings/` directory. You can download them from the Space files tab or via `git lfs` if you commit them.
59
 
60
  Notes:
61
  - Microphone recording is enabled in the browser; no need to upload.
62
- - If you need more durable storage or collaboration, consider pushing saved WAVs to a dataset repo programmatically.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
 
64
  ## Privacy and consent
65
  - Only collect voices from people who consent to being recorded.
 
14
 
15
  # Spell Recorder (Gradio)
16
 
17
+ Collect microphone recordings for a small set of Harry Potter spells and store them in MongoDB for training a classifier.
18
 
19
  Spells collected:
20
  - Lumos
 
25
  - Reparo
26
 
27
  ## How it works
28
+ - Enter a username (used in metadata; sanitized to safe characters).
29
  - Record with your microphone (preferred) or upload an audio file for any spell.
30
  - Click Submit.
31
+ - Audio is resampled to 16 kHz mono and stored in MongoDB GridFS with metadata (username, spell, timestamp).
32
  - A live counter shows how many spells are selected (recorded/uploaded) before submitting.
 
33
 
34
  ## Run locally
35
 
 
50
 
51
  Then open the printed local URL in your browser.
52
 
 
53
  1. Create a new Space (Gradio) in your account.
54
  2. Upload `app.py`, `requirements.txt`, and optionally `README.md`.
55
  3. Spaces will auto-build and run the app.
56
+ 4. Submissions are stored directly in your MongoDB (GridFS), not in the Space filesystem.
57
 
58
  Notes:
59
  - Microphone recording is enabled in the browser; no need to upload.
60
+ - Ensure MongoDB secrets are configured; otherwise the app will display that DB is not configured.
61
+
62
+ ## MongoDB configuration (Spaces secrets)
63
+ Set these in your Space → Settings → Variables and secrets:
64
+ - `MONGO_URI`: your MongoDB connection string (e.g., from MongoDB Atlas)
65
+ - `MONGO_DB`: database name (default: `spells`)
66
+ - `MONGO_BUCKET`: GridFS bucket/collection prefix (default: `recordings`)
67
+
68
+ Locally (PowerShell) you can set temporarily for a session:
69
+
70
+ ```powershell
71
+ $env:MONGO_URI = "mongodb+srv://user:pass@cluster.mongodb.net/?retryWrites=true&w=majority"
72
+ $env:MONGO_DB = "spells"
73
+ $env:MONGO_BUCKET = "recordings"
74
+ python app.py
75
+ ```
76
 
77
  ## Privacy and consent
78
  - Only collect voices from people who consent to being recorded.
app.py CHANGED
@@ -2,19 +2,24 @@ import os
2
  import re
3
  import time
4
  import math
5
- import csv
6
- import uuid
7
  from typing import List, Tuple, Optional
8
 
9
  import numpy as np
10
  import gradio as gr
11
  import soundfile as sf
12
  from scipy.signal import resample_poly
 
 
 
13
 
14
- # Output directory for saved recordings
15
- OUT_DIR = "recordings"
16
- os.makedirs(OUT_DIR, exist_ok=True)
17
- LOG_CSV = os.path.join(OUT_DIR, "log.csv")
 
 
 
18
 
19
  # Fixed target sample rate for ML training
20
  TARGET_SR = 16000
@@ -62,23 +67,21 @@ def resample_to_target(audio: np.ndarray, sr: int, target_sr: int = TARGET_SR) -
62
  return resample_poly(audio, up=up, down=down)
63
 
64
 
65
- def ensure_log_header():
66
- if not os.path.exists(LOG_CSV):
67
- with open(LOG_CSV, mode="w", newline="", encoding="utf-8") as f:
68
- writer = csv.writer(f)
69
- writer.writerow(["timestamp_ms", "session_id", "username", "spell", "filename"]) # header
70
-
71
-
72
- def log_row(timestamp_ms: int, session_id: str, username: str, spell: str, filename: str) -> None:
73
- ensure_log_header()
74
- with open(LOG_CSV, mode="a", newline="", encoding="utf-8") as f:
75
- writer = csv.writer(f)
76
- writer.writerow([timestamp_ms, session_id, username, spell, filename])
77
 
78
 
79
- def save_one_from_path(filepath: Optional[str], spell: str, username: str) -> Optional[Tuple[str, int]]:
80
- """Load an audio file path (from mic/upload), process to 16k mono, and save.
81
- Returns (saved file path, timestamp_ms) or None if no audio provided.
82
  """
83
  if not filepath:
84
  return None
@@ -91,14 +94,28 @@ def save_one_from_path(filepath: Optional[str], spell: str, username: str) -> Op
91
  audio = resample_to_target(audio, sr, TARGET_SR)
92
  audio = np.clip(audio, -1.0, 1.0)
93
 
94
- # Build descriptive filename: spell_username_timestamp.wav
 
 
 
 
 
 
 
 
 
95
  ts = int(time.time() * 1000)
96
  spell_slug = re.sub(r"[^a-zA-Z0-9]+", "_", spell).strip("_").lower()
97
- fname = f"{spell_slug}_{username}_{ts}.wav"
98
- out_path = os.path.join(OUT_DIR, fname)
99
-
100
- sf.write(out_path, audio, TARGET_SR, subtype="PCM_16")
101
- return out_path, ts
 
 
 
 
 
102
 
103
 
104
  def submit_recordings(
@@ -109,9 +126,7 @@ def submit_recordings(
109
  wingardium_path: Optional[str],
110
  accio_path: Optional[str],
111
  reparo_path: Optional[str],
112
- session_id: str,
113
- session_files: List[str],
114
- ) -> Tuple[str, List[str], int]:
115
  user = sanitize_username(username)
116
 
117
  pairs: List[Tuple[str, Optional[str]]] = [
@@ -125,20 +140,19 @@ def submit_recordings(
125
 
126
  saved = []
127
  skipped = []
128
- newly_saved_paths: List[str] = []
129
 
130
  for spell, path in pairs:
131
- out = save_one_from_path(path, spell, user)
132
- if out:
133
- out_path, ts = out
134
- saved.append(f"{spell} -> {os.path.basename(out_path)}")
135
- newly_saved_paths.append(out_path)
136
- # CSV log
137
- log_row(ts, session_id, user, spell, os.path.basename(out_path))
138
  else:
139
  skipped.append(spell)
140
 
141
  lines = []
 
 
142
  if saved:
143
  lines.append("Saved recordings:")
144
  lines += [f"- {s}" for s in saved]
@@ -147,11 +161,9 @@ def submit_recordings(
147
  lines.append("Missing (not provided):")
148
  lines += [f"- {s}" for s in skipped]
149
  if not lines:
150
- return "No audio captured. Please record at least one spell.", session_files, 0
151
 
152
- # Update session files list
153
- session_files = list(session_files or []) + newly_saved_paths
154
- return "\n".join(lines), session_files, len(newly_saved_paths)
155
 
156
 
157
  def count_selected(
@@ -176,10 +188,6 @@ def build_ui() -> gr.Blocks:
176
  Spells to collect: Lumos, Nox, Alohomora, Wingardium Leviosa, Accio, Reparo.
177
  """)
178
 
179
- # Per-session state
180
- session_id = gr.State(uuid.uuid4().hex)
181
- session_files = gr.State([]) # paths saved during this session
182
-
183
  with gr.Row():
184
  username = gr.Textbox(label="Your Name (for filename)", placeholder="e.g., harry_p" , autofocus=True)
185
 
@@ -202,8 +210,8 @@ def build_ui() -> gr.Blocks:
202
 
203
  submit.click(
204
  fn=submit_recordings,
205
- inputs=[username, lumos, nox, alohomora, wingardium, accio, reparo, session_id, session_files],
206
- outputs=[result, session_files, submitted_count],
207
  )
208
 
209
  # Live counter updates when any audio input changes
@@ -216,10 +224,9 @@ def build_ui() -> gr.Blocks:
216
 
217
  gr.Markdown("""
218
  Notes:
219
- - Files are saved in the app's `recordings/` folder using: `<spell>_<username>_<timestamp>.wav`.
220
  - 16 kHz mono WAV is used to make model training consistent.
221
  - You don't have to record all spells at once—submit whatever you have.
222
- - A CSV log is kept at `recordings/log.csv` with username, spell, timestamp, filename.
223
  """)
224
 
225
  return demo
 
2
  import re
3
  import time
4
  import math
5
+ import io
 
6
  from typing import List, Tuple, Optional
7
 
8
  import numpy as np
9
  import gradio as gr
10
  import soundfile as sf
11
  from scipy.signal import resample_poly
12
+ from scipy.io import wavfile as wav_write
13
+ from pymongo import MongoClient
14
+ from gridfs import GridFS
15
 
16
+ # MongoDB configuration via environment variables
17
+ MONGO_URI = os.getenv("MONGO_URI", "")
18
+ MONGO_DB = os.getenv("MONGO_DB", "spells")
19
+ MONGO_BUCKET = os.getenv("MONGO_BUCKET", "recordings")
20
+
21
+ _mongo_client: Optional[MongoClient] = None
22
+ _mongo_fs: Optional[GridFS] = None
23
 
24
  # Fixed target sample rate for ML training
25
  TARGET_SR = 16000
 
67
  return resample_poly(audio, up=up, down=down)
68
 
69
 
70
+ def get_gridfs() -> Optional[GridFS]:
71
+ global _mongo_client, _mongo_fs
72
+ if not MONGO_URI:
73
+ return None
74
+ if _mongo_fs is not None:
75
+ return _mongo_fs
76
+ _mongo_client = MongoClient(MONGO_URI)
77
+ db = _mongo_client[MONGO_DB]
78
+ _mongo_fs = GridFS(db, collection=MONGO_BUCKET)
79
+ return _mongo_fs
 
 
80
 
81
 
82
+ def save_one_from_path(filepath: Optional[str], spell: str, username: str) -> Optional[str]:
83
+ """Load an audio file (from mic/upload), process to 16k mono, and store in MongoDB GridFS.
84
+ Returns inserted file id (as str) or None if no audio provided / DB not configured.
85
  """
86
  if not filepath:
87
  return None
 
94
  audio = resample_to_target(audio, sr, TARGET_SR)
95
  audio = np.clip(audio, -1.0, 1.0)
96
 
97
+ # Convert to int16 PCM bytes in-memory
98
+ pcm16 = (audio * 32767.0).astype(np.int16)
99
+ buf = io.BytesIO()
100
+ wav_write.write(buf, TARGET_SR, pcm16)
101
+ wav_bytes = buf.getvalue()
102
+
103
+ fs = get_gridfs()
104
+ if fs is None:
105
+ return None
106
+
107
  ts = int(time.time() * 1000)
108
  spell_slug = re.sub(r"[^a-zA-Z0-9]+", "_", spell).strip("_").lower()
109
+ filename = f"{spell_slug}_{username}_{ts}.wav"
110
+ metadata = {
111
+ "username": username,
112
+ "spell": spell,
113
+ "timestamp_ms": ts,
114
+ "sample_rate": TARGET_SR,
115
+ "format": "wav",
116
+ }
117
+ file_id = fs.put(wav_bytes, filename=filename, contentType="audio/wav", metadata=metadata)
118
+ return str(file_id)
119
 
120
 
121
  def submit_recordings(
 
126
  wingardium_path: Optional[str],
127
  accio_path: Optional[str],
128
  reparo_path: Optional[str],
129
+ ) -> Tuple[str, int]:
 
 
130
  user = sanitize_username(username)
131
 
132
  pairs: List[Tuple[str, Optional[str]]] = [
 
140
 
141
  saved = []
142
  skipped = []
143
+ inserted = 0
144
 
145
  for spell, path in pairs:
146
+ file_id = save_one_from_path(path, spell, user)
147
+ if file_id:
148
+ saved.append(f"{spell} -> id {file_id}")
149
+ inserted += 1
 
 
 
150
  else:
151
  skipped.append(spell)
152
 
153
  lines = []
154
+ if not MONGO_URI:
155
+ lines.append("Database not configured: set MONGO_URI secret in the Space.")
156
  if saved:
157
  lines.append("Saved recordings:")
158
  lines += [f"- {s}" for s in saved]
 
161
  lines.append("Missing (not provided):")
162
  lines += [f"- {s}" for s in skipped]
163
  if not lines:
164
+ return "No audio captured. Please record at least one spell.", 0
165
 
166
+ return "\n".join(lines), inserted
 
 
167
 
168
 
169
  def count_selected(
 
188
  Spells to collect: Lumos, Nox, Alohomora, Wingardium Leviosa, Accio, Reparo.
189
  """)
190
 
 
 
 
 
191
  with gr.Row():
192
  username = gr.Textbox(label="Your Name (for filename)", placeholder="e.g., harry_p" , autofocus=True)
193
 
 
210
 
211
  submit.click(
212
  fn=submit_recordings,
213
+ inputs=[username, lumos, nox, alohomora, wingardium, accio, reparo],
214
+ outputs=[result, submitted_count],
215
  )
216
 
217
  # Live counter updates when any audio input changes
 
224
 
225
  gr.Markdown("""
226
  Notes:
227
+ - Submissions are stored directly in MongoDB (GridFS) using environment secrets.
228
  - 16 kHz mono WAV is used to make model training consistent.
229
  - You don't have to record all spells at once—submit whatever you have.
 
230
  """)
231
 
232
  return demo
requirements.txt CHANGED
@@ -2,3 +2,4 @@ numpy
2
  soundfile
3
  scipy
4
  huggingface_hub<0.25
 
 
2
  soundfile
3
  scipy
4
  huggingface_hub<0.25
5
+ pymongo