Sulitha commited on
Commit
d61c73b
·
1 Parent(s): b9f274b

inital commit

Browse files
Files changed (3) hide show
  1. README.md +7 -6
  2. app.py +176 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,13 +1,14 @@
1
  ---
2
- title: Harry Potter Spells
3
- emoji: 🐨
4
- colorFrom: yellow
5
- colorTo: gray
 
6
  sdk: gradio
7
- sdk_version: 5.49.1
8
  app_file: app.py
9
  pinned: false
10
- short_description: This space is for collect Harry potter spell voices
 
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: Spell Recorder
3
+ emoji:
4
+ colorFrom: indigo
5
+ colorTo: purple
6
+ colorBottom: purple
7
  sdk: gradio
 
8
  app_file: app.py
9
  pinned: false
10
+ license: mit
11
+ short_description: Collect microphone recordings for six spells (Lumos, Nox, Alohomora, Wingardium Leviosa, Accio, Reparo)
12
  ---
13
 
14
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,176 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+ import time
4
+ import math
5
+ from typing import List, Tuple, Optional
6
+
7
+ import numpy as np
8
+ import gradio as gr
9
+ import soundfile as sf
10
+ from scipy.signal import resample_poly
11
+
12
+ # Output directory for saved recordings
13
+ OUT_DIR = "recordings"
14
+ os.makedirs(OUT_DIR, exist_ok=True)
15
+
16
+ # Fixed target sample rate for ML training
17
+ TARGET_SR = 16000
18
+
19
+ # Spells to collect
20
+ SPELLS = [
21
+ "Lumos",
22
+ "Nox",
23
+ "Alohomora",
24
+ "Wingardium Leviosa",
25
+ "Accio",
26
+ "Reparo",
27
+ ]
28
+
29
+
30
+ def sanitize_username(name: Optional[str]) -> str:
31
+ """Sanitize username for safe filenames.
32
+ - only keep a-z, 0-9, dash and underscore
33
+ - collapse whitespace to underscore
34
+ - default to 'anon' if empty
35
+ """
36
+ if not name:
37
+ return "anon"
38
+ # normalize whitespace then strip
39
+ name = re.sub(r"\s+", "_", name.strip())
40
+ # keep safe chars only
41
+ name = re.sub(r"[^a-zA-Z0-9_-]", "", name)
42
+ return name.lower() or "anon"
43
+
44
+
45
+ def to_mono(audio: np.ndarray) -> np.ndarray:
46
+ if audio.ndim == 2:
47
+ # average channels to mono
48
+ return audio.mean(axis=1)
49
+ return audio
50
+
51
+
52
+ def resample_to_target(audio: np.ndarray, sr: int, target_sr: int = TARGET_SR) -> np.ndarray:
53
+ if sr == target_sr:
54
+ return audio
55
+ # rational resampling factors
56
+ g = math.gcd(sr, target_sr)
57
+ up = target_sr // g
58
+ down = sr // g
59
+ return resample_poly(audio, up=up, down=down)
60
+
61
+
62
+ def save_one_from_path(filepath: Optional[str], spell: str, username: str) -> Optional[str]:
63
+ """Load an audio file path (from mic/upload), process to 16k mono, and save.
64
+ Returns saved file path or None if no audio provided.
65
+ """
66
+ if not filepath:
67
+ return None
68
+
69
+ audio, sr = sf.read(filepath, dtype="float32", always_2d=False)
70
+ if audio is None or (isinstance(audio, np.ndarray) and audio.size == 0):
71
+ return None
72
+
73
+ audio = to_mono(np.asarray(audio))
74
+ audio = resample_to_target(audio, sr, TARGET_SR)
75
+ audio = np.clip(audio, -1.0, 1.0)
76
+
77
+ # Build descriptive filename: spell_username_timestamp.wav
78
+ ts = int(time.time() * 1000)
79
+ spell_slug = re.sub(r"[^a-zA-Z0-9]+", "_", spell).strip("_").lower()
80
+ fname = f"{spell_slug}_{username}_{ts}.wav"
81
+ out_path = os.path.join(OUT_DIR, fname)
82
+
83
+ sf.write(out_path, audio, TARGET_SR, subtype="PCM_16")
84
+ return out_path
85
+
86
+
87
+ def submit_recordings(
88
+ username: str,
89
+ lumos_path: Optional[str],
90
+ nox_path: Optional[str],
91
+ alohomora_path: Optional[str],
92
+ wingardium_path: Optional[str],
93
+ accio_path: Optional[str],
94
+ reparo_path: Optional[str],
95
+ ) -> str:
96
+ user = sanitize_username(username)
97
+
98
+ pairs: List[Tuple[str, Optional[str]]] = [
99
+ ("Lumos", lumos_path),
100
+ ("Nox", nox_path),
101
+ ("Alohomora", alohomora_path),
102
+ ("Wingardium Leviosa", wingardium_path),
103
+ ("Accio", accio_path),
104
+ ("Reparo", reparo_path),
105
+ ]
106
+
107
+ saved = []
108
+ skipped = []
109
+
110
+ for spell, path in pairs:
111
+ out = save_one_from_path(path, spell, user)
112
+ if out:
113
+ saved.append(f"{spell} -> {os.path.basename(out)}")
114
+ else:
115
+ skipped.append(spell)
116
+
117
+ lines = []
118
+ if saved:
119
+ lines.append("Saved recordings:")
120
+ lines += [f"- {s}" for s in saved]
121
+ if skipped:
122
+ lines.append("")
123
+ lines.append("Missing (not provided):")
124
+ lines += [f"- {s}" for s in skipped]
125
+ if not lines:
126
+ return "No audio captured. Please record at least one spell."
127
+
128
+ return "\n".join(lines)
129
+
130
+
131
+ def build_ui() -> gr.Blocks:
132
+ with gr.Blocks(title="Spell Recorder") as demo:
133
+ gr.Markdown("""
134
+ # Spell Recorder
135
+ Record any of the listed spells and press Submit. You can use your microphone directly (preferred) or upload a file.
136
+
137
+ Spells to collect: Lumos, Nox, Alohomora, Wingardium Leviosa, Accio, Reparo.
138
+ """)
139
+
140
+ with gr.Row():
141
+ username = gr.Textbox(label="Your Name (for filename)", placeholder="e.g., harry_p" , autofocus=True)
142
+
143
+ with gr.Row():
144
+ with gr.Column():
145
+ lumos = gr.Audio(label="Lumos", sources=["microphone", "upload"], type="filepath")
146
+ nox = gr.Audio(label="Nox", sources=["microphone", "upload"], type="filepath")
147
+ alohomora = gr.Audio(label="Alohomora", sources=["microphone", "upload"], type="filepath")
148
+ with gr.Column():
149
+ wingardium = gr.Audio(label="Wingardium Leviosa", sources=["microphone", "upload"], type="filepath")
150
+ accio = gr.Audio(label="Accio", sources=["microphone", "upload"], type="filepath")
151
+ reparo = gr.Audio(label="Reparo", sources=["microphone", "upload"], type="filepath")
152
+
153
+ submit = gr.Button("Submit")
154
+ result = gr.Markdown()
155
+
156
+ submit.click(
157
+ fn=submit_recordings,
158
+ inputs=[username, lumos, nox, alohomora, wingardium, accio, reparo],
159
+ outputs=[result],
160
+ )
161
+
162
+ gr.Markdown("""
163
+ Notes:
164
+ - Files are saved in the app's `recordings/` folder using: `<spell>_<username>_<timestamp>.wav`.
165
+ - 16 kHz mono WAV is used to make model training consistent.
166
+ - You don't have to record all spells at once—submit whatever you have.
167
+ """)
168
+
169
+ return demo
170
+
171
+
172
+ demo = build_ui()
173
+
174
+ if __name__ == "__main__":
175
+ # Works locally and on Hugging Face Spaces
176
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio
2
+ numpy
3
+ soundfile
4
+ scipy