Files changed (4) hide show
  1. main2.py +739 -0
  2. requirements.txt +17 -0
  3. server.py +186 -0
  4. yamnet_class_map.csv +522 -0
main2.py ADDED
@@ -0,0 +1,739 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import numpy as np
4
+ import pandas as pd
5
+ import torch
6
+ import torchaudio
7
+ import librosa
8
+ import matplotlib.pyplot as plt
9
+ import csv
10
+ from typing import List, Dict, Tuple, Optional
11
+ from scipy.stats import kurtosis, skew
12
+ import concurrent.futures
13
+ import multiprocessing
14
+ from functools import partial
15
+ import time
16
+ import threading
17
+ from queue import Queue
18
+ from dotenv import load_dotenv
19
+ from groq import Groq
20
+
21
+ # Import required models
22
+ from pyannote.audio import Pipeline
23
+ import whisper
24
+ from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
25
+ from torch_vggish_yamnet import yamnet
26
+ from torch_vggish_yamnet.input_proc import WaveformToInput
27
+ import warnings
28
+ warnings.filterwarnings("ignore")
29
+
30
+ class UnifiedAudioAnalyzer:
31
+ """
32
+ Unified Audio Analysis System combining:
33
+ 1. Speaker Diarization + Transcription
34
+ 2. Audio Event Detection (YAMNet)
35
+ 3. Emotion Recognition + Paralinguistic Features
36
+
37
+ Enhanced with parallel processing for faster execution
38
+ """
39
+
40
+ def __init__(self, enable_parallel_processing=True, max_workers=None):
41
+ """Initialize all models and components"""
42
+ print("🔄 Initializing Unified Audio Analyzer...")
43
+
44
+ # Configure device
45
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
46
+ print(f"Using device: {self.device}")
47
+
48
+ # Parallel processing settings
49
+ self.enable_parallel_processing = enable_parallel_processing
50
+ self.max_workers = max_workers or max(1, multiprocessing.cpu_count() - 1)
51
+ print(f"Parallel processing: {'Enabled' if enable_parallel_processing else 'Disabled'}")
52
+ if enable_parallel_processing:
53
+ print(f"Max workers: {self.max_workers}")
54
+
55
+ # Initialize models
56
+ self._load_diarization_models()
57
+ self._load_emotion_models()
58
+ self._load_event_detection_models()
59
+ self._load_class_names()
60
+
61
+ print("✅ All models loaded successfully!")
62
+
63
+ def _load_diarization_models(self):
64
+ """Load speaker diarization and transcription models"""
65
+ print("Loading speaker diarization and transcription models...")
66
+
67
+ # Load pyannote diarization pipeline
68
+ try:
69
+ self.diarization_pipeline = Pipeline.from_pretrained(
70
+ "pyannote/speaker-diarization-3.1"
71
+ # Uncomment and add your token: use_auth_token="YOUR_HUGGINGFACE_TOKEN"
72
+ )
73
+ if torch.cuda.is_available():
74
+ self.diarization_pipeline = self.diarization_pipeline.to(self.device)
75
+ except Exception as e:
76
+ print(f"Warning: Could not load diarization model: {e}")
77
+ self.diarization_pipeline = None
78
+
79
+ # Load Whisper transcription model
80
+ try:
81
+ self.whisper_model = whisper.load_model("base")
82
+ except Exception as e:
83
+ print(f"Warning: Could not load Whisper model: {e}")
84
+ self.whisper_model = None
85
+
86
+ def _load_emotion_models(self):
87
+ """Load emotion recognition models"""
88
+ print("Loading emotion recognition models...")
89
+
90
+ try:
91
+ self.emotion_model = Wav2Vec2ForSequenceClassification.from_pretrained(
92
+ "Dpngtm/wav2vec2-emotion-recognition"
93
+ )
94
+ self.emotion_processor = Wav2Vec2Processor.from_pretrained(
95
+ "Dpngtm/wav2vec2-emotion-recognition"
96
+ )
97
+ self.emotion_labels = ["angry", "calm", "disgust", "fearful", "happy", "neutral", "sad", "surprised"]
98
+ except Exception as e:
99
+ print(f"Warning: Could not load emotion model: {e}")
100
+ self.emotion_model = None
101
+
102
+ def _load_event_detection_models(self):
103
+ """Load YAMNet for audio event detection"""
104
+ print("Loading audio event detection models...")
105
+
106
+ try:
107
+ self.yamnet_model = yamnet.yamnet(pretrained=True)
108
+ self.yamnet_model.eval()
109
+ self.yamnet_converter = WaveformToInput()
110
+ except Exception as e:
111
+ print(f"Warning: Could not load YAMNet model: {e}")
112
+ self.yamnet_model = None
113
+
114
+ def _load_class_names(self):
115
+ """Load AudioSet class names for YAMNet from CSV"""
116
+ csv_path = "yamnet_class_map.csv"
117
+ self.audioset_classes = []
118
+ try:
119
+ with open(csv_path, "r") as f:
120
+ reader = csv.reader(f)
121
+ next(reader) # skip header
122
+ for row in reader:
123
+ self.audioset_classes.append(row[2]) # display_name
124
+ except Exception as e:
125
+ print(f"Warning: Could not load class names from {csv_path}: {e}")
126
+ # Fallback to common AudioSet classes
127
+ self.audioset_classes = [
128
+ "Speech", "Male speech, man speaking", "Female speech, woman speaking",
129
+ "Child speech, kid speaking", "Conversation", "Narration, monologue",
130
+ "Babbling", "Speech synthesizer", "Shout", "Bellow", "Whoop", "Yell",
131
+ "Children shouting", "Screaming", "Whispering", "Laughter", "Baby laughter",
132
+ "Giggle", "Snicker", "Belly laugh", "Chuckle, chortle", "Crying, sobbing",
133
+ "Baby cry, infant cry", "Whimper", "Wail, moan", "Sigh", "Singing",
134
+ "Choir", "Yodeling", "Chant", "Mantra", "Male singing", "Female singing",
135
+ "Child singing", "Synthetic singing", "Rapping", "Humming", "Music",
136
+ "Musical instrument", "Piano", "Guitar", "Drum", "Orchestra", "Pop music",
137
+ "Rock music", "Jazz", "Classical music", "Electronic music", "Animal",
138
+ "Dog", "Cat", "Bird", "Insect", "Vehicle", "Car", "Motorcycle", "Train",
139
+ "Aircraft", "Helicopter", "Wind", "Rain", "Thunder", "Water", "Fire",
140
+ "Applause", "Crowd", "Footsteps", "Door", "Bell", "Alarm", "Clock"
141
+ ]
142
+
143
+ def _transcribe_segment_parallel(self, segment_data):
144
+ """Helper function for parallel transcription of segments"""
145
+ segment, sample_rate, speaker, start_time, end_time, whisper_model = segment_data
146
+
147
+ try:
148
+ # Create temporary file for this segment
149
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
150
+ temp_filename = temp_file.name
151
+ torchaudio.save(temp_filename, segment, sample_rate)
152
+
153
+ # Transcribe segment
154
+ try:
155
+ transcription_result = whisper_model.transcribe(
156
+ temp_filename,
157
+ language="en",
158
+ temperature=0,
159
+ no_speech_threshold=0.6
160
+ )
161
+ segment_text = transcription_result["text"].strip()
162
+
163
+ if segment_text:
164
+ result = {
165
+ "speaker": speaker,
166
+ "start": round(start_time, 2),
167
+ "end": round(end_time, 2),
168
+ "duration": round(end_time - start_time, 2),
169
+ "text": segment_text,
170
+ "confidence": transcription_result.get("language_probability", 0.0)
171
+ }
172
+ else:
173
+ result = None
174
+
175
+ except Exception as e:
176
+ print(f"⚠️ Error transcribing segment: {e}")
177
+ result = None
178
+
179
+ finally:
180
+ # Clean up temp file
181
+ try:
182
+ os.unlink(temp_filename)
183
+ except OSError:
184
+ pass
185
+
186
+ return result
187
+
188
+ except Exception as e:
189
+ print(f"⚠️ Error in parallel transcription: {e}")
190
+ return None
191
+
192
+ def transcribe_with_diarization(self, audio_file: str, min_segment_duration: float = 1.0) -> List[Dict]:
193
+ """Perform speaker diarization and transcription (aligned with main.py logic)"""
194
+ if self.diarization_pipeline is None or self.whisper_model is None:
195
+ print("❌ Diarization or transcription models not available")
196
+ return []
197
+
198
+ print("🎯 Performing speaker diarization and transcription...")
199
+
200
+ # Perform diarization
201
+ diarization_result = self.diarization_pipeline(audio_file,num_speakers=2)
202
+
203
+ # Load audio
204
+ waveform, sample_rate = torchaudio.load(audio_file)
205
+ if sample_rate != 16000:
206
+ waveform = torchaudio.functional.resample(waveform, sample_rate, 16000)
207
+ sample_rate = 16000
208
+
209
+ results = []
210
+ temp_files = []
211
+
212
+ try:
213
+ for turn, _, speaker in diarization_result.itertracks(yield_label=True):
214
+ if turn.end - turn.start < min_segment_duration:
215
+ continue
216
+
217
+ # Extract segment
218
+ start_sample = int(turn.start * sample_rate)
219
+ end_sample = int(turn.end * sample_rate)
220
+ segment = waveform[:, start_sample:end_sample]
221
+
222
+ # Create temporary file for transcription
223
+ with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
224
+ temp_filename = temp_file.name
225
+ temp_files.append(temp_filename)
226
+ torchaudio.save(temp_filename, segment, sample_rate)
227
+
228
+ # Transcribe
229
+ try:
230
+ transcription_result = self.whisper_model.transcribe(
231
+ temp_filename,
232
+ language="en",
233
+ temperature=0,
234
+ no_speech_threshold=0.6
235
+ )
236
+ segment_text = transcription_result["text"].strip()
237
+
238
+ if segment_text:
239
+ results.append({
240
+ "speaker": speaker,
241
+ "start": round(turn.start, 2),
242
+ "end": round(turn.end, 2),
243
+ "duration": round(turn.end - turn.start, 2),
244
+ "text": segment_text,
245
+ "confidence": transcription_result.get("language_probability", 0.0)
246
+ })
247
+ except Exception as e:
248
+ print(f"⚠️ Error transcribing segment: {e}")
249
+ continue
250
+
251
+ finally:
252
+ # Cleanup temp files
253
+ for temp_file in temp_files:
254
+ try:
255
+ os.unlink(temp_file)
256
+ except OSError:
257
+ pass
258
+
259
+ return results
260
+
261
+ def detect_audio_events(self, audio_file: str, top_k: int = 10) -> Dict:
262
+ """Detect audio events using YAMNet"""
263
+ if self.yamnet_model is None:
264
+ print("❌ YAMNet model not available")
265
+ return {}
266
+
267
+ print("🔊 Detecting audio events...")
268
+
269
+ try:
270
+ # Load and preprocess audio
271
+ waveform, sr = torchaudio.load(audio_file)
272
+ if sr != 16000:
273
+ waveform = torchaudio.functional.resample(waveform, sr, 16000)
274
+
275
+ # Process through YAMNet
276
+ inputs = self.yamnet_converter(waveform, 16000)
277
+
278
+ with torch.no_grad():
279
+ embeddings, logits = self.yamnet_model(inputs)
280
+ mean_logits = logits.mean(dim=0)
281
+ probs = torch.softmax(mean_logits, dim=-1)
282
+ top_probs, top_idx = torch.topk(probs, top_k)
283
+
284
+ # Format results
285
+ events = []
286
+ for i in range(top_k):
287
+ idx = top_idx[i].item()
288
+ prob = top_probs[i].item()
289
+ if idx < len(self.audioset_classes):
290
+ label = self.audioset_classes[idx]
291
+ else:
292
+ label = f"Unknown_Class_{idx}"
293
+
294
+ events.append({
295
+ "event": label,
296
+ "class_id": idx,
297
+ "probability": prob
298
+ })
299
+
300
+ return {
301
+ "top_events": events,
302
+ "total_classes": len(self.audioset_classes)
303
+ }
304
+
305
+ except Exception as e:
306
+ print(f"⚠️ Error in event detection: {e}")
307
+ return {}
308
+
309
+ def _extract_feature_chunk(self, audio_chunk, sr, feature_type):
310
+ """Helper function for parallel feature extraction"""
311
+ try:
312
+ if feature_type == "mfcc":
313
+ mfcc = librosa.feature.mfcc(y=audio_chunk, sr=sr, n_mfcc=13)
314
+ features = {}
315
+ for i in range(13):
316
+ features[f'mfcc_{i+1}_mean'] = float(np.mean(mfcc[i]))
317
+ features[f'mfcc_{i+1}_std'] = float(np.std(mfcc[i]))
318
+ return features
319
+
320
+ elif feature_type == "chroma":
321
+ chroma = librosa.feature.chroma_stft(y=audio_chunk, sr=sr)
322
+ features = {}
323
+ for i in range(12):
324
+ features[f'chroma_{i+1}_mean'] = float(np.mean(chroma[i]))
325
+ return features
326
+
327
+ elif feature_type == "spectral":
328
+ features = {}
329
+ features['spectral_centroid_mean'] = float(np.mean(librosa.feature.spectral_centroid(y=audio_chunk, sr=sr)[0]))
330
+ features['spectral_rolloff_mean'] = float(np.mean(librosa.feature.spectral_rolloff(y=audio_chunk, sr=sr)[0]))
331
+ return features
332
+
333
+ elif feature_type == "basic":
334
+ features = {}
335
+ features['rms_energy'] = float(np.mean(librosa.feature.rms(y=audio_chunk)[0]))
336
+ features['zero_crossing_rate'] = float(np.mean(librosa.feature.zero_crossing_rate(audio_chunk)[0]))
337
+ return features
338
+
339
+ except Exception as e:
340
+ print(f"⚠️ Error extracting {feature_type} features: {e}")
341
+ return {}
342
+
343
+ def extract_paralinguistic_features(self, audio_data, sr):
344
+ """Extract comprehensive paralinguistic features"""
345
+ print("🎵 Extracting paralinguistic features...")
346
+
347
+ features = {}
348
+
349
+ # Basic properties
350
+ features['duration'] = len(audio_data) / sr
351
+ features['sample_rate'] = sr
352
+
353
+ if self.enable_parallel_processing:
354
+ print("🚀 Using parallel feature extraction...")
355
+
356
+ # Prepare feature extraction tasks
357
+ feature_tasks = [
358
+ ("mfcc", audio_data, sr),
359
+ ("chroma", audio_data, sr),
360
+ ("spectral", audio_data, sr),
361
+ ("basic", audio_data, sr)
362
+ ]
363
+
364
+ # Execute feature extraction in parallel
365
+ with concurrent.futures.ThreadPoolExecutor(max_workers=min(4, self.max_workers)) as executor:
366
+ future_to_feature = {
367
+ executor.submit(self._extract_feature_chunk, audio_chunk, sr, feature_type): feature_type
368
+ for feature_type, audio_chunk, sr in feature_tasks
369
+ }
370
+
371
+ for future in concurrent.futures.as_completed(future_to_feature):
372
+ feature_result = future.result()
373
+ features.update(feature_result)
374
+ else:
375
+ # Sequential feature extraction (original logic)
376
+ # Energy features
377
+ features['rms_energy'] = float(np.mean(librosa.feature.rms(y=audio_data)[0]))
378
+ features['zero_crossing_rate'] = float(np.mean(librosa.feature.zero_crossing_rate(audio_data)[0]))
379
+
380
+ # MFCC features
381
+ mfcc = librosa.feature.mfcc(y=audio_data, sr=sr, n_mfcc=13)
382
+ for i in range(13):
383
+ features[f'mfcc_{i+1}_mean'] = float(np.mean(mfcc[i]))
384
+ features[f'mfcc_{i+1}_std'] = float(np.std(mfcc[i]))
385
+
386
+ # Spectral features
387
+ features['spectral_centroid_mean'] = float(np.mean(librosa.feature.spectral_centroid(y=audio_data, sr=sr)[0]))
388
+ features['spectral_rolloff_mean'] = float(np.mean(librosa.feature.spectral_rolloff(y=audio_data, sr=sr)[0]))
389
+
390
+ # Chroma features
391
+ chroma = librosa.feature.chroma_stft(y=audio_data, sr=sr)
392
+ for i in range(12):
393
+ features[f'chroma_{i+1}_mean'] = float(np.mean(chroma[i]))
394
+
395
+ # Pitch features (kept sequential due to complexity)
396
+ try:
397
+ pitches, magnitudes = librosa.piptrack(y=audio_data, sr=sr, threshold=0.1)
398
+ pitch_values = []
399
+ for t in range(pitches.shape[1]):
400
+ index = magnitudes[:, t].argmax()
401
+ pitch = pitches[index, t]
402
+ if pitch > 0:
403
+ pitch_values.append(pitch)
404
+
405
+ if pitch_values:
406
+ features['pitch_mean'] = float(np.mean(pitch_values))
407
+ features['pitch_std'] = float(np.std(pitch_values))
408
+ features['pitch_min'] = float(np.min(pitch_values))
409
+ features['pitch_max'] = float(np.max(pitch_values))
410
+ else:
411
+ features.update({'pitch_mean': 0.0, 'pitch_std': 0.0, 'pitch_min': 0.0, 'pitch_max': 0.0})
412
+ except:
413
+ features.update({'pitch_mean': 0.0, 'pitch_std': 0.0, 'pitch_min': 0.0, 'pitch_max': 0.0})
414
+
415
+ # Tempo
416
+ try:
417
+ tempo, _ = librosa.beat.beat_track(y=audio_data, sr=sr)
418
+ if isinstance(tempo, np.ndarray):
419
+ features['tempo'] = float(tempo.item() if tempo.size == 1 else tempo[0])
420
+ else:
421
+ features['tempo'] = float(tempo)
422
+ except:
423
+ features['tempo'] = 0.0
424
+
425
+ return features
426
+
427
+ def predict_emotion(self, audio_data, sr):
428
+ """Predict emotion using transformer model"""
429
+ if self.emotion_model is None:
430
+ return None
431
+
432
+ print("😊 Predicting emotions...")
433
+
434
+ try:
435
+ # Resample to 16kHz if needed
436
+ if sr != 16000:
437
+ audio_data = librosa.resample(audio_data, orig_sr=sr, target_sr=16000)
438
+
439
+ # Process through model
440
+ inputs = self.emotion_processor(audio_data, sampling_rate=16000, return_tensors="pt", padding=True)
441
+
442
+ with torch.no_grad():
443
+ outputs = self.emotion_model(**inputs)
444
+ predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
445
+
446
+ # Get emotion probabilities
447
+ emotion_probs = {}
448
+ for i, emotion in enumerate(self.emotion_labels):
449
+ emotion_probs[emotion] = predictions[0][i].item()
450
+
451
+ predicted_emotion = self.emotion_labels[predictions.argmax().item()]
452
+ confidence = predictions.max().item()
453
+
454
+ return {
455
+ 'predicted_emotion': predicted_emotion,
456
+ 'confidence': confidence,
457
+ 'all_emotions': emotion_probs
458
+ }
459
+
460
+ except Exception as e:
461
+ print(f"⚠️ Error in emotion prediction: {e}")
462
+ return None
463
+
464
+ def analyze_complete_audio(self, audio_file: str) -> Dict:
465
+ """Perform complete unified audio analysis with parallel processing"""
466
+ if not os.path.exists(audio_file):
467
+ print(f"❌ Audio file not found: {audio_file}")
468
+ return {}
469
+
470
+ print(f"\n🚀 Starting complete analysis of: {audio_file}")
471
+ print("="*60)
472
+
473
+ start_time = time.time()
474
+
475
+ # Load audio for paralinguistic analysis
476
+ try:
477
+ audio_data, sr = librosa.load(audio_file, sr=22050)
478
+ audio_data, _ = librosa.effects.trim(audio_data, top_db=20)
479
+ audio_data = librosa.util.normalize(audio_data)
480
+ except Exception as e:
481
+ print(f"❌ Error loading audio: {e}")
482
+ return {}
483
+
484
+ if self.enable_parallel_processing:
485
+ print("🚀 Running analysis components in parallel...")
486
+
487
+ # Create a queue for results
488
+ results_queue = Queue()
489
+
490
+ # Define analysis functions
491
+ def run_diarization():
492
+ result = self.transcribe_with_diarization(audio_file)
493
+ results_queue.put(('diarization', result))
494
+
495
+ def run_event_detection():
496
+ result = self.detect_audio_events(audio_file)
497
+ results_queue.put(('events', result))
498
+
499
+ def run_feature_extraction():
500
+ result = self.extract_paralinguistic_features(audio_data, sr)
501
+ results_queue.put(('features', result))
502
+
503
+ def run_emotion_prediction():
504
+ result = self.predict_emotion(audio_data, sr)
505
+ results_queue.put(('emotion', result))
506
+
507
+ # Start threads for parallel execution
508
+ threads = [
509
+ threading.Thread(target=run_diarization),
510
+ threading.Thread(target=run_event_detection),
511
+ threading.Thread(target=run_feature_extraction),
512
+ threading.Thread(target=run_emotion_prediction)
513
+ ]
514
+
515
+ # Start all threads
516
+ for thread in threads:
517
+ thread.start()
518
+
519
+ # Wait for all threads to complete
520
+ for thread in threads:
521
+ thread.join()
522
+
523
+ # Collect results
524
+ analysis_components = {}
525
+ while not results_queue.empty():
526
+ component, result = results_queue.get()
527
+ analysis_components[component] = result
528
+
529
+ # Assign results
530
+ diarization_results = analysis_components.get('diarization', [])
531
+ event_results = analysis_components.get('events', {})
532
+ paralinguistic_features = analysis_components.get('features', {})
533
+ emotion_results = analysis_components.get('emotion', None)
534
+
535
+ else:
536
+ # Sequential processing (original logic)
537
+ # 1. Speaker Diarization + Transcription
538
+ diarization_results = self.transcribe_with_diarization(audio_file)
539
+
540
+ # 2. Audio Event Detection
541
+ event_results = self.detect_audio_events(audio_file)
542
+
543
+ # 3. Paralinguistic Features
544
+ paralinguistic_features = self.extract_paralinguistic_features(audio_data, sr)
545
+
546
+ # 4. Emotion Recognition
547
+ emotion_results = self.predict_emotion(audio_data, sr)
548
+
549
+ processing_time = time.time() - start_time
550
+ print(f"⏱️ Total processing time: {processing_time:.2f} seconds")
551
+
552
+ # Combine all results
553
+ complete_analysis = {
554
+ 'file_info': {
555
+ 'filename': os.path.basename(audio_file),
556
+ 'filepath': audio_file,
557
+ 'duration': paralinguistic_features.get('duration', 0),
558
+ 'sample_rate': paralinguistic_features.get('sample_rate', 0),
559
+ 'processing_time': processing_time
560
+ },
561
+ 'diarization_transcription': diarization_results,
562
+ 'audio_events': event_results,
563
+ 'paralinguistic_features': paralinguistic_features,
564
+ 'emotion_analysis': emotion_results
565
+ }
566
+
567
+ return complete_analysis
568
+
569
+ def print_analysis_summary(self, analysis_results: Dict):
570
+ """Print formatted analysis summary"""
571
+ if not analysis_results:
572
+ print("❌ No analysis results to display")
573
+ return
574
+
575
+ file_info = analysis_results.get('file_info', {})
576
+ diarization = analysis_results.get('diarization_transcription', [])
577
+ events = analysis_results.get('audio_events', {})
578
+ emotion = analysis_results.get('emotion_analysis', {})
579
+
580
+ print(f"\n{'='*80}")
581
+ print("🎯 UNIFIED AUDIO ANALYSIS RESULTS")
582
+ print(f"{'='*80}")
583
+
584
+ # File Information
585
+ print(f"📁 File: {file_info.get('filename', 'Unknown')}")
586
+ print(f"⏱️ Duration: {file_info.get('duration', 0):.2f} seconds")
587
+ print(f"🔊 Sample Rate: {file_info.get('sample_rate', 0)} Hz")
588
+ print(f"⚡ Processing Time: {file_info.get('processing_time', 0):.2f} seconds")
589
+
590
+ # 1. Speaker Diarization Results
591
+ print(f"\n{'🎤 SPEAKER DIARIZATION & TRANSCRIPTION'}")
592
+ print("-" * 50)
593
+ if diarization:
594
+ speakers = set(seg['speaker'] for seg in diarization)
595
+ print(f"Speakers detected: {len(speakers)}")
596
+ print(f"Total segments: {len(diarization)}")
597
+
598
+ for i, segment in enumerate(diarization, 1):
599
+ print(f"{i}. {segment['speaker']} [{segment['start']:.1f}s-{segment['end']:.1f}s]: {segment['text'][:80]}{'...' if len(segment['text']) > 80 else ''}")
600
+ else:
601
+ print("No diarization results available")
602
+
603
+ # 2. Audio Event Detection
604
+ print(f"\n{'🔊 AUDIO EVENT DETECTION (Top 10)'}")
605
+ print("-" * 50)
606
+ top_events = events.get('top_events', [])
607
+ if top_events:
608
+ for i, event in enumerate(top_events[:10], 1):
609
+ print(f"{i:2d}. {event['event']:<30} | Probability: {event['probability']:.4f}")
610
+ else:
611
+ print("No audio events detected")
612
+
613
+ # 3. Emotion Analysis
614
+ print(f"\n{'😊 EMOTION ANALYSIS'}")
615
+ print("-" * 30)
616
+ if emotion:
617
+ print(f"Predicted Emotion: {emotion['predicted_emotion']} (Confidence: {emotion['confidence']:.3f})")
618
+ print("\nAll Emotion Probabilities:")
619
+ for emo, prob in emotion['all_emotions'].items():
620
+ print(f" {emo.capitalize():<12}: {prob:.3f}")
621
+ else:
622
+ print("No emotion analysis available")
623
+
624
+ # 4. Key Paralinguistic Features
625
+ features = analysis_results.get('paralinguistic_features', {})
626
+ if features:
627
+ print(f"\n{'🎵 KEY PARALINGUISTIC FEATURES'}")
628
+ print("-" * 40)
629
+ print(f"RMS Energy: {features.get('rms_energy', 0):.4f}")
630
+ print(f"Pitch Mean: {features.get('pitch_mean', 0):.2f} Hz")
631
+ print(f"Spectral Centroid: {features.get('spectral_centroid_mean', 0):.2f} Hz")
632
+ print(f"Tempo: {features.get('tempo', 0):.2f} BPM")
633
+ print(f"Zero Crossing Rate: {features.get('zero_crossing_rate', 0):.4f}")
634
+
635
+ def save_results_to_csv(self, analysis_results: Dict, output_prefix: str = "unified_analysis"):
636
+ """Save analysis results to CSV files"""
637
+ if not analysis_results:
638
+ print("❌ No results to save")
639
+ return
640
+
641
+ # Save diarization results
642
+ diarization = analysis_results.get('diarization_transcription', [])
643
+ if diarization:
644
+ df_diarization = pd.DataFrame(diarization)
645
+ diarization_file = f"{output_prefix}_diarization.csv"
646
+ df_diarization.to_csv(diarization_file, index=False)
647
+ print(f"💾 Diarization results saved to: {diarization_file}")
648
+
649
+ # Save audio events
650
+ events = analysis_results.get('audio_events', {}).get('top_events', [])
651
+ if events:
652
+ df_events = pd.DataFrame(events)
653
+ events_file = f"{output_prefix}_audio_events.csv"
654
+ df_events.to_csv(events_file, index=False)
655
+ print(f"💾 Audio events saved to: {events_file}")
656
+
657
+ # Save paralinguistic features
658
+ features = analysis_results.get('paralinguistic_features', {})
659
+ if features:
660
+ df_features = pd.DataFrame([features])
661
+ features_file = f"{output_prefix}_features.csv"
662
+ df_features.to_csv(features_file, index=False)
663
+ print(f"💾 Features saved to: {features_file}")
664
+
665
+ # Save emotion analysis
666
+ emotion = analysis_results.get('emotion_analysis', {})
667
+ if emotion:
668
+ df_emotion = pd.DataFrame([emotion])
669
+ emotion_file = f"{output_prefix}_emotion.csv"
670
+ df_emotion.to_csv(emotion_file, index=False)
671
+ print(f"💾 Emotion analysis saved to: {emotion_file}")
672
+
673
+ def summarize_audio_analysis_with_llm(analysis_results: dict) -> str:
674
+ """
675
+ Send all analysis results to a Groq LLM (gpt-oss-20b) and get a summary
676
+ describing relationships between diarization, events, emotion, and features.
677
+ Requires GROQ_API_KEY in environment.
678
+ """
679
+ # Prepare the prompt
680
+ prompt = (
681
+ "You are an expert audio scene interpreter. Given the structured audio analysis results, "
682
+ "summarize what is happening in plain, natural language, as if explaining the situation to someone. "
683
+ "Avoid technical terms, metrics, or probabilities. Instead, combine the speaker's words, background "
684
+ "sounds, emotions and other paralingusistic features to infer the most likely real-world context. Keep it short and clear.\n\n"
685
+ "Sample input : Recording of a person call reaching an airport (with background noise of airplanes, announcements, and crowd chatter). Sample output : The subway sound and other vehicle sound suggest that person is in Highway, and the aero plane sound indicate nearby Airport, while announcement provide information about the Airplane Schedule, that means person reached in boarding area or into the waiting hall.\n\n"
686
+ f"Audio Analysis Results:\n{analysis_results}\n\n"
687
+ "Plain Summary:"
688
+ )
689
+
690
+ # Load environment variables
691
+ load_dotenv()
692
+ api_key = os.getenv("GROQ_API_KEY")
693
+ if not api_key:
694
+ raise ValueError("GROQ_API_KEY environment variable not set.")
695
+
696
+ # Initialize Groq client
697
+ client = Groq(api_key=api_key)
698
+
699
+ # Make the API call
700
+ response = client.chat.completions.create(
701
+ model="openai/gpt-oss-20b",
702
+ messages=[
703
+ {"role": "system", "content": "You are an expert audio analyst."},
704
+ {"role": "user", "content": prompt},
705
+ ],
706
+ )
707
+
708
+ # Extract summary
709
+ summary = response.choices[0].message.content.strip()
710
+ return summary
711
+
712
+ def main():
713
+ """Main function demonstrating usage"""
714
+ # Initialize analyzer with parallel processing enabled
715
+ analyzer = UnifiedAudioAnalyzer(enable_parallel_processing=True, max_workers=None)
716
+
717
+ # Specify input audio file
718
+ audio_file = "dataset/flight/15.wav" # Update with your audio file path
719
+
720
+ if os.path.exists(audio_file):
721
+ # Perform complete analysis
722
+ results = analyzer.analyze_complete_audio(audio_file)
723
+
724
+ # Print summary
725
+ analyzer.print_analysis_summary(results)
726
+
727
+ # Save results to CSV files
728
+ # analyzer.save_results_to_csv(results, "my_audio_analysis")
729
+
730
+ print(f"\n✅ Analysis complete! Check CSV files for detailed results.")
731
+ summary=summarize_audio_analysis_with_llm(results)
732
+ print("\n=== LLM Summary of Audio Analysis ===")
733
+ print(summary)
734
+ else:
735
+ print(f"❌ Audio file not found: {audio_file}")
736
+ print("Please update the audio_file path to point to your audio file.")
737
+
738
+ if __name__ == "__main__":
739
+ main()
requirements.txt ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ python-multipart==0.0.6
4
+ groq==0.4.1
5
+ python-dotenv==1.0.0
6
+ librosa==0.10.1
7
+ soundfile==0.12.1
8
+ torch==2.1.0
9
+ torchaudio==2.1.0
10
+ numpy==1.24.3
11
+ pandas==2.0.3
12
+ matplotlib==3.7.2
13
+ scipy==1.11.3
14
+ pyannote.audio==3.1.1
15
+ openai-whisper==20231117
16
+ transformers==4.35.2
17
+ torch-vggish-yamnet==0.1.0
server.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import asyncio
4
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Depends
5
+ from fastapi.middleware.cors import CORSMiddleware
6
+ from fastapi.responses import JSONResponse
7
+ from pydantic import BaseModel
8
+ from typing import Optional, Dict, Any
9
+ import uvicorn
10
+ from groq import Groq
11
+ from dotenv import load_dotenv
12
+ import librosa
13
+ import soundfile as sf
14
+ from main2 import UnifiedAudioAnalyzer, summarize_audio_analysis_with_llm
15
+
16
+ # Load environment variables
17
+ load_dotenv()
18
+
19
+ app = FastAPI(title="Audio Analysis API", version="1.0.0")
20
+
21
+ # CORS middleware
22
+ app.add_middleware(
23
+ CORSMiddleware,
24
+ allow_origins=["http://localhost:9002", "http://localhost:3000"], # Frontend URLs
25
+ allow_credentials=True,
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
+
30
+ # Initialize the audio analyzer
31
+ analyzer = UnifiedAudioAnalyzer(enable_parallel_processing=True)
32
+
33
+ # Groq client for chat
34
+ groq_client = None
35
+ try:
36
+ groq_api_key = os.getenv("GROQ_API_KEY")
37
+ if groq_api_key:
38
+ groq_client = Groq(api_key=groq_api_key)
39
+ except Exception as e:
40
+ print(f"Warning: Could not initialize Groq client: {e}")
41
+
42
+ # Pydantic models
43
+ class ChatRequest(BaseModel):
44
+ question: str
45
+ analysis_data: Dict[str, Any]
46
+
47
+ class ChatResponse(BaseModel):
48
+ answer: str
49
+
50
+ class AnalysisResponse(BaseModel):
51
+ success: bool
52
+ data: Optional[Dict[str, Any]] = None
53
+ error: Optional[str] = None
54
+
55
+ def convert_audio_to_wav(audio_file_path: str) -> str:
56
+ """Convert audio file to WAV format if needed"""
57
+ try:
58
+ # Load audio with librosa (supports many formats)
59
+ audio_data, sample_rate = librosa.load(audio_file_path, sr=16000)
60
+
61
+ # Create temporary WAV file
62
+ temp_wav = tempfile.NamedTemporaryFile(suffix='.wav', delete=False)
63
+ temp_wav_path = temp_wav.name
64
+ temp_wav.close()
65
+
66
+ # Save as WAV
67
+ sf.write(temp_wav_path, audio_data, sample_rate)
68
+
69
+ return temp_wav_path
70
+ except Exception as e:
71
+ raise HTTPException(status_code=400, detail=f"Error converting audio to WAV: {str(e)}")
72
+
73
+ @app.get("/")
74
+ async def root():
75
+ return {"message": "Audio Analysis API is running"}
76
+
77
+ @app.post("/upload", response_model=AnalysisResponse)
78
+ async def upload_audio(file: UploadFile = File(...)):
79
+ """Upload and analyze audio file"""
80
+ try:
81
+ # Check file type
82
+ if not file.content_type.startswith("audio/"):
83
+ raise HTTPException(status_code=400, detail="File must be an audio file")
84
+
85
+ # Create temporary file
86
+ with tempfile.NamedTemporaryFile(delete=False, suffix=f".{file.filename.split('.')[-1]}") as temp_file:
87
+ content = await file.read()
88
+ temp_file.write(content)
89
+ temp_file_path = temp_file.name
90
+
91
+ try:
92
+ # Convert to WAV if needed
93
+ wav_file_path = convert_audio_to_wav(temp_file_path)
94
+
95
+ # Perform analysis
96
+ analysis_results = analyzer.analyze_complete_audio(wav_file_path)
97
+
98
+ if not analysis_results:
99
+ raise HTTPException(status_code=500, detail="Analysis failed")
100
+
101
+ # Generate LLM summary
102
+ try:
103
+ summary = summarize_audio_analysis_with_llm(analysis_results)
104
+ analysis_results['llm_summary'] = summary
105
+ except Exception as e:
106
+ print(f"Warning: LLM summary failed: {e}")
107
+ analysis_results['llm_summary'] = "Summary generation failed"
108
+
109
+ return AnalysisResponse(
110
+ success=True,
111
+ data=analysis_results
112
+ )
113
+
114
+ finally:
115
+ # Clean up temporary files
116
+ try:
117
+ os.unlink(temp_file_path)
118
+ if 'wav_file_path' in locals():
119
+ os.unlink(wav_file_path)
120
+ except OSError:
121
+ pass
122
+
123
+ except Exception as e:
124
+ return AnalysisResponse(
125
+ success=False,
126
+ error=str(e)
127
+ )
128
+
129
+ @app.post("/chat", response_model=ChatResponse)
130
+ async def chat_with_analysis(request: ChatRequest):
131
+ """Chat with AI about the analysis results"""
132
+ if not groq_client:
133
+ raise HTTPException(status_code=500, detail="Groq API not configured")
134
+
135
+ try:
136
+ # Prepare context from analysis data
137
+ context = f"""
138
+ Audio Analysis Summary:
139
+ - File: {request.analysis_data.get('file_info', {}).get('filename', 'Unknown')}
140
+ - Duration: {request.analysis_data.get('file_info', {}).get('duration', 0):.2f} seconds
141
+ - LLM Summary: {request.analysis_data.get('llm_summary', 'No summary available')}
142
+
143
+ Speaker Diarization:
144
+ {request.analysis_data.get('diarization_transcription', [])}
145
+
146
+ Audio Events:
147
+ {request.analysis_data.get('audio_events', {}).get('top_events', [])}
148
+
149
+ Emotion Analysis:
150
+ {request.analysis_data.get('emotion_analysis', {})}
151
+
152
+ Paralinguistic Features:
153
+ {request.analysis_data.get('paralinguistic_features', {})}
154
+ """
155
+
156
+ # Create chat completion
157
+ response = groq_client.chat.completions.create(
158
+ model="llama-3.1-8b-instant", # Using smaller model as requested
159
+ messages=[
160
+ {
161
+ "role": "system",
162
+ "content": "You are an expert audio analyst. Answer questions about the provided audio analysis data. Be helpful and provide insights based on the analysis results."
163
+ },
164
+ {
165
+ "role": "user",
166
+ "content": f"Context: {context}\n\nQuestion: {request.question}"
167
+ }
168
+ ],
169
+ temperature=0.7,
170
+ max_tokens=1000
171
+ )
172
+
173
+ answer = response.choices[0].message.content.strip()
174
+
175
+ return ChatResponse(answer=answer)
176
+
177
+ except Exception as e:
178
+ raise HTTPException(status_code=500, detail=f"Chat error: {str(e)}")
179
+
180
+ @app.get("/health")
181
+ async def health_check():
182
+ """Health check endpoint"""
183
+ return {"status": "healthy", "analyzer_loaded": analyzer is not None}
184
+
185
+ if __name__ == "__main__":
186
+ uvicorn.run(app, host="0.0.0.0", port=8000)
yamnet_class_map.csv ADDED
@@ -0,0 +1,522 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ index,mid,display_name
2
+ 0,/m/09x0r,Speech
3
+ 1,/m/0ytgt,"Child speech, kid speaking"
4
+ 2,/m/01h8n0,Conversation
5
+ 3,/m/02qldy,"Narration, monologue"
6
+ 4,/m/0261r1,Babbling
7
+ 5,/m/0brhx,Speech synthesizer
8
+ 6,/m/07p6fty,Shout
9
+ 7,/m/07q4ntr,Bellow
10
+ 8,/m/07rwj3x,Whoop
11
+ 9,/m/07sr1lc,Yell
12
+ 10,/t/dd00135,Children shouting
13
+ 11,/m/03qc9zr,Screaming
14
+ 12,/m/02rtxlg,Whispering
15
+ 13,/m/01j3sz,Laughter
16
+ 14,/t/dd00001,Baby laughter
17
+ 15,/m/07r660_,Giggle
18
+ 16,/m/07s04w4,Snicker
19
+ 17,/m/07sq110,Belly laugh
20
+ 18,/m/07rgt08,"Chuckle, chortle"
21
+ 19,/m/0463cq4,"Crying, sobbing"
22
+ 20,/t/dd00002,"Baby cry, infant cry"
23
+ 21,/m/07qz6j3,Whimper
24
+ 22,/m/07qw_06,"Wail, moan"
25
+ 23,/m/07plz5l,Sigh
26
+ 24,/m/015lz1,Singing
27
+ 25,/m/0l14jd,Choir
28
+ 26,/m/01swy6,Yodeling
29
+ 27,/m/02bk07,Chant
30
+ 28,/m/01c194,Mantra
31
+ 29,/t/dd00005,Child singing
32
+ 30,/t/dd00006,Synthetic singing
33
+ 31,/m/06bxc,Rapping
34
+ 32,/m/02fxyj,Humming
35
+ 33,/m/07s2xch,Groan
36
+ 34,/m/07r4k75,Grunt
37
+ 35,/m/01w250,Whistling
38
+ 36,/m/0lyf6,Breathing
39
+ 37,/m/07mzm6,Wheeze
40
+ 38,/m/01d3sd,Snoring
41
+ 39,/m/07s0dtb,Gasp
42
+ 40,/m/07pyy8b,Pant
43
+ 41,/m/07q0yl5,Snort
44
+ 42,/m/01b_21,Cough
45
+ 43,/m/0dl9sf8,Throat clearing
46
+ 44,/m/01hsr_,Sneeze
47
+ 45,/m/07ppn3j,Sniff
48
+ 46,/m/06h7j,Run
49
+ 47,/m/07qv_x_,Shuffle
50
+ 48,/m/07pbtc8,"Walk, footsteps"
51
+ 49,/m/03cczk,"Chewing, mastication"
52
+ 50,/m/07pdhp0,Biting
53
+ 51,/m/0939n_,Gargling
54
+ 52,/m/01g90h,Stomach rumble
55
+ 53,/m/03q5_w,"Burping, eructation"
56
+ 54,/m/02p3nc,Hiccup
57
+ 55,/m/02_nn,Fart
58
+ 56,/m/0k65p,Hands
59
+ 57,/m/025_jnm,Finger snapping
60
+ 58,/m/0l15bq,Clapping
61
+ 59,/m/01jg02,"Heart sounds, heartbeat"
62
+ 60,/m/01jg1z,Heart murmur
63
+ 61,/m/053hz1,Cheering
64
+ 62,/m/028ght,Applause
65
+ 63,/m/07rkbfh,Chatter
66
+ 64,/m/03qtwd,Crowd
67
+ 65,/m/07qfr4h,"Hubbub, speech noise, speech babble"
68
+ 66,/t/dd00013,Children playing
69
+ 67,/m/0jbk,Animal
70
+ 68,/m/068hy,"Domestic animals, pets"
71
+ 69,/m/0bt9lr,Dog
72
+ 70,/m/05tny_,Bark
73
+ 71,/m/07r_k2n,Yip
74
+ 72,/m/07qf0zm,Howl
75
+ 73,/m/07rc7d9,Bow-wow
76
+ 74,/m/0ghcn6,Growling
77
+ 75,/t/dd00136,Whimper (dog)
78
+ 76,/m/01yrx,Cat
79
+ 77,/m/02yds9,Purr
80
+ 78,/m/07qrkrw,Meow
81
+ 79,/m/07rjwbb,Hiss
82
+ 80,/m/07r81j2,Caterwaul
83
+ 81,/m/0ch8v,"Livestock, farm animals, working animals"
84
+ 82,/m/03k3r,Horse
85
+ 83,/m/07rv9rh,Clip-clop
86
+ 84,/m/07q5rw0,"Neigh, whinny"
87
+ 85,/m/01xq0k1,"Cattle, bovinae"
88
+ 86,/m/07rpkh9,Moo
89
+ 87,/m/0239kh,Cowbell
90
+ 88,/m/068zj,Pig
91
+ 89,/t/dd00018,Oink
92
+ 90,/m/03fwl,Goat
93
+ 91,/m/07q0h5t,Bleat
94
+ 92,/m/07bgp,Sheep
95
+ 93,/m/025rv6n,Fowl
96
+ 94,/m/09b5t,"Chicken, rooster"
97
+ 95,/m/07st89h,Cluck
98
+ 96,/m/07qn5dc,"Crowing, cock-a-doodle-doo"
99
+ 97,/m/01rd7k,Turkey
100
+ 98,/m/07svc2k,Gobble
101
+ 99,/m/09ddx,Duck
102
+ 100,/m/07qdb04,Quack
103
+ 101,/m/0dbvp,Goose
104
+ 102,/m/07qwf61,Honk
105
+ 103,/m/01280g,Wild animals
106
+ 104,/m/0cdnk,"Roaring cats (lions, tigers)"
107
+ 105,/m/04cvmfc,Roar
108
+ 106,/m/015p6,Bird
109
+ 107,/m/020bb7,"Bird vocalization, bird call, bird song"
110
+ 108,/m/07pggtn,"Chirp, tweet"
111
+ 109,/m/07sx8x_,Squawk
112
+ 110,/m/0h0rv,"Pigeon, dove"
113
+ 111,/m/07r_25d,Coo
114
+ 112,/m/04s8yn,Crow
115
+ 113,/m/07r5c2p,Caw
116
+ 114,/m/09d5_,Owl
117
+ 115,/m/07r_80w,Hoot
118
+ 116,/m/05_wcq,"Bird flight, flapping wings"
119
+ 117,/m/01z5f,"Canidae, dogs, wolves"
120
+ 118,/m/06hps,"Rodents, rats, mice"
121
+ 119,/m/04rmv,Mouse
122
+ 120,/m/07r4gkf,Patter
123
+ 121,/m/03vt0,Insect
124
+ 122,/m/09xqv,Cricket
125
+ 123,/m/09f96,Mosquito
126
+ 124,/m/0h2mp,"Fly, housefly"
127
+ 125,/m/07pjwq1,Buzz
128
+ 126,/m/01h3n,"Bee, wasp, etc."
129
+ 127,/m/09ld4,Frog
130
+ 128,/m/07st88b,Croak
131
+ 129,/m/078jl,Snake
132
+ 130,/m/07qn4z3,Rattle
133
+ 131,/m/032n05,Whale vocalization
134
+ 132,/m/04rlf,Music
135
+ 133,/m/04szw,Musical instrument
136
+ 134,/m/0fx80y,Plucked string instrument
137
+ 135,/m/0342h,Guitar
138
+ 136,/m/02sgy,Electric guitar
139
+ 137,/m/018vs,Bass guitar
140
+ 138,/m/042v_gx,Acoustic guitar
141
+ 139,/m/06w87,"Steel guitar, slide guitar"
142
+ 140,/m/01glhc,Tapping (guitar technique)
143
+ 141,/m/07s0s5r,Strum
144
+ 142,/m/018j2,Banjo
145
+ 143,/m/0jtg0,Sitar
146
+ 144,/m/04rzd,Mandolin
147
+ 145,/m/01bns_,Zither
148
+ 146,/m/07xzm,Ukulele
149
+ 147,/m/05148p4,Keyboard (musical)
150
+ 148,/m/05r5c,Piano
151
+ 149,/m/01s0ps,Electric piano
152
+ 150,/m/013y1f,Organ
153
+ 151,/m/03xq_f,Electronic organ
154
+ 152,/m/03gvt,Hammond organ
155
+ 153,/m/0l14qv,Synthesizer
156
+ 154,/m/01v1d8,Sampler
157
+ 155,/m/03q5t,Harpsichord
158
+ 156,/m/0l14md,Percussion
159
+ 157,/m/02hnl,Drum kit
160
+ 158,/m/0cfdd,Drum machine
161
+ 159,/m/026t6,Drum
162
+ 160,/m/06rvn,Snare drum
163
+ 161,/m/03t3fj,Rimshot
164
+ 162,/m/02k_mr,Drum roll
165
+ 163,/m/0bm02,Bass drum
166
+ 164,/m/011k_j,Timpani
167
+ 165,/m/01p970,Tabla
168
+ 166,/m/01qbl,Cymbal
169
+ 167,/m/03qtq,Hi-hat
170
+ 168,/m/01sm1g,Wood block
171
+ 169,/m/07brj,Tambourine
172
+ 170,/m/05r5wn,Rattle (instrument)
173
+ 171,/m/0xzly,Maraca
174
+ 172,/m/0mbct,Gong
175
+ 173,/m/016622,Tubular bells
176
+ 174,/m/0j45pbj,Mallet percussion
177
+ 175,/m/0dwsp,"Marimba, xylophone"
178
+ 176,/m/0dwtp,Glockenspiel
179
+ 177,/m/0dwt5,Vibraphone
180
+ 178,/m/0l156b,Steelpan
181
+ 179,/m/05pd6,Orchestra
182
+ 180,/m/01kcd,Brass instrument
183
+ 181,/m/0319l,French horn
184
+ 182,/m/07gql,Trumpet
185
+ 183,/m/07c6l,Trombone
186
+ 184,/m/0l14_3,Bowed string instrument
187
+ 185,/m/02qmj0d,String section
188
+ 186,/m/07y_7,"Violin, fiddle"
189
+ 187,/m/0d8_n,Pizzicato
190
+ 188,/m/01xqw,Cello
191
+ 189,/m/02fsn,Double bass
192
+ 190,/m/085jw,"Wind instrument, woodwind instrument"
193
+ 191,/m/0l14j_,Flute
194
+ 192,/m/06ncr,Saxophone
195
+ 193,/m/01wy6,Clarinet
196
+ 194,/m/03m5k,Harp
197
+ 195,/m/0395lw,Bell
198
+ 196,/m/03w41f,Church bell
199
+ 197,/m/027m70_,Jingle bell
200
+ 198,/m/0gy1t2s,Bicycle bell
201
+ 199,/m/07n_g,Tuning fork
202
+ 200,/m/0f8s22,Chime
203
+ 201,/m/026fgl,Wind chime
204
+ 202,/m/0150b9,Change ringing (campanology)
205
+ 203,/m/03qjg,Harmonica
206
+ 204,/m/0mkg,Accordion
207
+ 205,/m/0192l,Bagpipes
208
+ 206,/m/02bxd,Didgeridoo
209
+ 207,/m/0l14l2,Shofar
210
+ 208,/m/07kc_,Theremin
211
+ 209,/m/0l14t7,Singing bowl
212
+ 210,/m/01hgjl,Scratching (performance technique)
213
+ 211,/m/064t9,Pop music
214
+ 212,/m/0glt670,Hip hop music
215
+ 213,/m/02cz_7,Beatboxing
216
+ 214,/m/06by7,Rock music
217
+ 215,/m/03lty,Heavy metal
218
+ 216,/m/05r6t,Punk rock
219
+ 217,/m/0dls3,Grunge
220
+ 218,/m/0dl5d,Progressive rock
221
+ 219,/m/07sbbz2,Rock and roll
222
+ 220,/m/05w3f,Psychedelic rock
223
+ 221,/m/06j6l,Rhythm and blues
224
+ 222,/m/0gywn,Soul music
225
+ 223,/m/06cqb,Reggae
226
+ 224,/m/01lyv,Country
227
+ 225,/m/015y_n,Swing music
228
+ 226,/m/0gg8l,Bluegrass
229
+ 227,/m/02x8m,Funk
230
+ 228,/m/02w4v,Folk music
231
+ 229,/m/06j64v,Middle Eastern music
232
+ 230,/m/03_d0,Jazz
233
+ 231,/m/026z9,Disco
234
+ 232,/m/0ggq0m,Classical music
235
+ 233,/m/05lls,Opera
236
+ 234,/m/02lkt,Electronic music
237
+ 235,/m/03mb9,House music
238
+ 236,/m/07gxw,Techno
239
+ 237,/m/07s72n,Dubstep
240
+ 238,/m/0283d,Drum and bass
241
+ 239,/m/0m0jc,Electronica
242
+ 240,/m/08cyft,Electronic dance music
243
+ 241,/m/0fd3y,Ambient music
244
+ 242,/m/07lnk,Trance music
245
+ 243,/m/0g293,Music of Latin America
246
+ 244,/m/0ln16,Salsa music
247
+ 245,/m/0326g,Flamenco
248
+ 246,/m/0155w,Blues
249
+ 247,/m/05fw6t,Music for children
250
+ 248,/m/02v2lh,New-age music
251
+ 249,/m/0y4f8,Vocal music
252
+ 250,/m/0z9c,A capella
253
+ 251,/m/0164x2,Music of Africa
254
+ 252,/m/0145m,Afrobeat
255
+ 253,/m/02mscn,Christian music
256
+ 254,/m/016cjb,Gospel music
257
+ 255,/m/028sqc,Music of Asia
258
+ 256,/m/015vgc,Carnatic music
259
+ 257,/m/0dq0md,Music of Bollywood
260
+ 258,/m/06rqw,Ska
261
+ 259,/m/02p0sh1,Traditional music
262
+ 260,/m/05rwpb,Independent music
263
+ 261,/m/074ft,Song
264
+ 262,/m/025td0t,Background music
265
+ 263,/m/02cjck,Theme music
266
+ 264,/m/03r5q_,Jingle (music)
267
+ 265,/m/0l14gg,Soundtrack music
268
+ 266,/m/07pkxdp,Lullaby
269
+ 267,/m/01z7dr,Video game music
270
+ 268,/m/0140xf,Christmas music
271
+ 269,/m/0ggx5q,Dance music
272
+ 270,/m/04wptg,Wedding music
273
+ 271,/t/dd00031,Happy music
274
+ 272,/t/dd00033,Sad music
275
+ 273,/t/dd00034,Tender music
276
+ 274,/t/dd00035,Exciting music
277
+ 275,/t/dd00036,Angry music
278
+ 276,/t/dd00037,Scary music
279
+ 277,/m/03m9d0z,Wind
280
+ 278,/m/09t49,Rustling leaves
281
+ 279,/t/dd00092,Wind noise (microphone)
282
+ 280,/m/0jb2l,Thunderstorm
283
+ 281,/m/0ngt1,Thunder
284
+ 282,/m/0838f,Water
285
+ 283,/m/06mb1,Rain
286
+ 284,/m/07r10fb,Raindrop
287
+ 285,/t/dd00038,Rain on surface
288
+ 286,/m/0j6m2,Stream
289
+ 287,/m/0j2kx,Waterfall
290
+ 288,/m/05kq4,Ocean
291
+ 289,/m/034srq,"Waves, surf"
292
+ 290,/m/06wzb,Steam
293
+ 291,/m/07swgks,Gurgling
294
+ 292,/m/02_41,Fire
295
+ 293,/m/07pzfmf,Crackle
296
+ 294,/m/07yv9,Vehicle
297
+ 295,/m/019jd,"Boat, Water vehicle"
298
+ 296,/m/0hsrw,"Sailboat, sailing ship"
299
+ 297,/m/056ks2,"Rowboat, canoe, kayak"
300
+ 298,/m/02rlv9,"Motorboat, speedboat"
301
+ 299,/m/06q74,Ship
302
+ 300,/m/012f08,Motor vehicle (road)
303
+ 301,/m/0k4j,Car
304
+ 302,/m/0912c9,"Vehicle horn, car horn, honking"
305
+ 303,/m/07qv_d5,Toot
306
+ 304,/m/02mfyn,Car alarm
307
+ 305,/m/04gxbd,"Power windows, electric windows"
308
+ 306,/m/07rknqz,Skidding
309
+ 307,/m/0h9mv,Tire squeal
310
+ 308,/t/dd00134,Car passing by
311
+ 309,/m/0ltv,"Race car, auto racing"
312
+ 310,/m/07r04,Truck
313
+ 311,/m/0gvgw0,Air brake
314
+ 312,/m/05x_td,"Air horn, truck horn"
315
+ 313,/m/02rhddq,Reversing beeps
316
+ 314,/m/03cl9h,"Ice cream truck, ice cream van"
317
+ 315,/m/01bjv,Bus
318
+ 316,/m/03j1ly,Emergency vehicle
319
+ 317,/m/04qvtq,Police car (siren)
320
+ 318,/m/012n7d,Ambulance (siren)
321
+ 319,/m/012ndj,"Fire engine, fire truck (siren)"
322
+ 320,/m/04_sv,Motorcycle
323
+ 321,/m/0btp2,"Traffic noise, roadway noise"
324
+ 322,/m/06d_3,Rail transport
325
+ 323,/m/07jdr,Train
326
+ 324,/m/04zmvq,Train whistle
327
+ 325,/m/0284vy3,Train horn
328
+ 326,/m/01g50p,"Railroad car, train wagon"
329
+ 327,/t/dd00048,Train wheels squealing
330
+ 328,/m/0195fx,"Subway, metro, underground"
331
+ 329,/m/0k5j,Aircraft
332
+ 330,/m/014yck,Aircraft engine
333
+ 331,/m/04229,Jet engine
334
+ 332,/m/02l6bg,"Propeller, airscrew"
335
+ 333,/m/09ct_,Helicopter
336
+ 334,/m/0cmf2,"Fixed-wing aircraft, airplane"
337
+ 335,/m/0199g,Bicycle
338
+ 336,/m/06_fw,Skateboard
339
+ 337,/m/02mk9,Engine
340
+ 338,/t/dd00065,Light engine (high frequency)
341
+ 339,/m/08j51y,"Dental drill, dentist's drill"
342
+ 340,/m/01yg9g,Lawn mower
343
+ 341,/m/01j4z9,Chainsaw
344
+ 342,/t/dd00066,Medium engine (mid frequency)
345
+ 343,/t/dd00067,Heavy engine (low frequency)
346
+ 344,/m/01h82_,Engine knocking
347
+ 345,/t/dd00130,Engine starting
348
+ 346,/m/07pb8fc,Idling
349
+ 347,/m/07q2z82,"Accelerating, revving, vroom"
350
+ 348,/m/02dgv,Door
351
+ 349,/m/03wwcy,Doorbell
352
+ 350,/m/07r67yg,Ding-dong
353
+ 351,/m/02y_763,Sliding door
354
+ 352,/m/07rjzl8,Slam
355
+ 353,/m/07r4wb8,Knock
356
+ 354,/m/07qcpgn,Tap
357
+ 355,/m/07q6cd_,Squeak
358
+ 356,/m/0642b4,Cupboard open or close
359
+ 357,/m/0fqfqc,Drawer open or close
360
+ 358,/m/04brg2,"Dishes, pots, and pans"
361
+ 359,/m/023pjk,"Cutlery, silverware"
362
+ 360,/m/07pn_8q,Chopping (food)
363
+ 361,/m/0dxrf,Frying (food)
364
+ 362,/m/0fx9l,Microwave oven
365
+ 363,/m/02pjr4,Blender
366
+ 364,/m/02jz0l,"Water tap, faucet"
367
+ 365,/m/0130jx,Sink (filling or washing)
368
+ 366,/m/03dnzn,Bathtub (filling or washing)
369
+ 367,/m/03wvsk,Hair dryer
370
+ 368,/m/01jt3m,Toilet flush
371
+ 369,/m/012xff,Toothbrush
372
+ 370,/m/04fgwm,Electric toothbrush
373
+ 371,/m/0d31p,Vacuum cleaner
374
+ 372,/m/01s0vc,Zipper (clothing)
375
+ 373,/m/03v3yw,Keys jangling
376
+ 374,/m/0242l,Coin (dropping)
377
+ 375,/m/01lsmm,Scissors
378
+ 376,/m/02g901,"Electric shaver, electric razor"
379
+ 377,/m/05rj2,Shuffling cards
380
+ 378,/m/0316dw,Typing
381
+ 379,/m/0c2wf,Typewriter
382
+ 380,/m/01m2v,Computer keyboard
383
+ 381,/m/081rb,Writing
384
+ 382,/m/07pp_mv,Alarm
385
+ 383,/m/07cx4,Telephone
386
+ 384,/m/07pp8cl,Telephone bell ringing
387
+ 385,/m/01hnzm,Ringtone
388
+ 386,/m/02c8p,"Telephone dialing, DTMF"
389
+ 387,/m/015jpf,Dial tone
390
+ 388,/m/01z47d,Busy signal
391
+ 389,/m/046dlr,Alarm clock
392
+ 390,/m/03kmc9,Siren
393
+ 391,/m/0dgbq,Civil defense siren
394
+ 392,/m/030rvx,Buzzer
395
+ 393,/m/01y3hg,"Smoke detector, smoke alarm"
396
+ 394,/m/0c3f7m,Fire alarm
397
+ 395,/m/04fq5q,Foghorn
398
+ 396,/m/0l156k,Whistle
399
+ 397,/m/06hck5,Steam whistle
400
+ 398,/t/dd00077,Mechanisms
401
+ 399,/m/02bm9n,"Ratchet, pawl"
402
+ 400,/m/01x3z,Clock
403
+ 401,/m/07qjznt,Tick
404
+ 402,/m/07qjznl,Tick-tock
405
+ 403,/m/0l7xg,Gears
406
+ 404,/m/05zc1,Pulleys
407
+ 405,/m/0llzx,Sewing machine
408
+ 406,/m/02x984l,Mechanical fan
409
+ 407,/m/025wky1,Air conditioning
410
+ 408,/m/024dl,Cash register
411
+ 409,/m/01m4t,Printer
412
+ 410,/m/0dv5r,Camera
413
+ 411,/m/07bjf,Single-lens reflex camera
414
+ 412,/m/07k1x,Tools
415
+ 413,/m/03l9g,Hammer
416
+ 414,/m/03p19w,Jackhammer
417
+ 415,/m/01b82r,Sawing
418
+ 416,/m/02p01q,Filing (rasp)
419
+ 417,/m/023vsd,Sanding
420
+ 418,/m/0_ksk,Power tool
421
+ 419,/m/01d380,Drill
422
+ 420,/m/014zdl,Explosion
423
+ 421,/m/032s66,"Gunshot, gunfire"
424
+ 422,/m/04zjc,Machine gun
425
+ 423,/m/02z32qm,Fusillade
426
+ 424,/m/0_1c,Artillery fire
427
+ 425,/m/073cg4,Cap gun
428
+ 426,/m/0g6b5,Fireworks
429
+ 427,/g/122z_qxw,Firecracker
430
+ 428,/m/07qsvvw,"Burst, pop"
431
+ 429,/m/07pxg6y,Eruption
432
+ 430,/m/07qqyl4,Boom
433
+ 431,/m/083vt,Wood
434
+ 432,/m/07pczhz,Chop
435
+ 433,/m/07pl1bw,Splinter
436
+ 434,/m/07qs1cx,Crack
437
+ 435,/m/039jq,Glass
438
+ 436,/m/07q7njn,"Chink, clink"
439
+ 437,/m/07rn7sz,Shatter
440
+ 438,/m/04k94,Liquid
441
+ 439,/m/07rrlb6,"Splash, splatter"
442
+ 440,/m/07p6mqd,Slosh
443
+ 441,/m/07qlwh6,Squish
444
+ 442,/m/07r5v4s,Drip
445
+ 443,/m/07prgkl,Pour
446
+ 444,/m/07pqc89,"Trickle, dribble"
447
+ 445,/t/dd00088,Gush
448
+ 446,/m/07p7b8y,Fill (with liquid)
449
+ 447,/m/07qlf79,Spray
450
+ 448,/m/07ptzwd,Pump (liquid)
451
+ 449,/m/07ptfmf,Stir
452
+ 450,/m/0dv3j,Boiling
453
+ 451,/m/0790c,Sonar
454
+ 452,/m/0dl83,Arrow
455
+ 453,/m/07rqsjt,"Whoosh, swoosh, swish"
456
+ 454,/m/07qnq_y,"Thump, thud"
457
+ 455,/m/07rrh0c,Thunk
458
+ 456,/m/0b_fwt,Electronic tuner
459
+ 457,/m/02rr_,Effects unit
460
+ 458,/m/07m2kt,Chorus effect
461
+ 459,/m/018w8,Basketball bounce
462
+ 460,/m/07pws3f,Bang
463
+ 461,/m/07ryjzk,"Slap, smack"
464
+ 462,/m/07rdhzs,"Whack, thwack"
465
+ 463,/m/07pjjrj,"Smash, crash"
466
+ 464,/m/07pc8lb,Breaking
467
+ 465,/m/07pqn27,Bouncing
468
+ 466,/m/07rbp7_,Whip
469
+ 467,/m/07pyf11,Flap
470
+ 468,/m/07qb_dv,Scratch
471
+ 469,/m/07qv4k0,Scrape
472
+ 470,/m/07pdjhy,Rub
473
+ 471,/m/07s8j8t,Roll
474
+ 472,/m/07plct2,Crushing
475
+ 473,/t/dd00112,"Crumpling, crinkling"
476
+ 474,/m/07qcx4z,Tearing
477
+ 475,/m/02fs_r,"Beep, bleep"
478
+ 476,/m/07qwdck,Ping
479
+ 477,/m/07phxs1,Ding
480
+ 478,/m/07rv4dm,Clang
481
+ 479,/m/07s02z0,Squeal
482
+ 480,/m/07qh7jl,Creak
483
+ 481,/m/07qwyj0,Rustle
484
+ 482,/m/07s34ls,Whir
485
+ 483,/m/07qmpdm,Clatter
486
+ 484,/m/07p9k1k,Sizzle
487
+ 485,/m/07qc9xj,Clicking
488
+ 486,/m/07rwm0c,Clickety-clack
489
+ 487,/m/07phhsh,Rumble
490
+ 488,/m/07qyrcz,Plop
491
+ 489,/m/07qfgpx,"Jingle, tinkle"
492
+ 490,/m/07rcgpl,Hum
493
+ 491,/m/07p78v5,Zing
494
+ 492,/t/dd00121,Boing
495
+ 493,/m/07s12q4,Crunch
496
+ 494,/m/028v0c,Silence
497
+ 495,/m/01v_m0,Sine wave
498
+ 496,/m/0b9m1,Harmonic
499
+ 497,/m/0hdsk,Chirp tone
500
+ 498,/m/0c1dj,Sound effect
501
+ 499,/m/07pt_g0,Pulse
502
+ 500,/t/dd00125,"Inside, small room"
503
+ 501,/t/dd00126,"Inside, large room or hall"
504
+ 502,/t/dd00127,"Inside, public space"
505
+ 503,/t/dd00128,"Outside, urban or manmade"
506
+ 504,/t/dd00129,"Outside, rural or natural"
507
+ 505,/m/01b9nn,Reverberation
508
+ 506,/m/01jnbd,Echo
509
+ 507,/m/096m7z,Noise
510
+ 508,/m/06_y0by,Environmental noise
511
+ 509,/m/07rgkc5,Static
512
+ 510,/m/06xkwv,Mains hum
513
+ 511,/m/0g12c5,Distortion
514
+ 512,/m/08p9q4,Sidetone
515
+ 513,/m/07szfh9,Cacophony
516
+ 514,/m/0chx_,White noise
517
+ 515,/m/0cj0r,Pink noise
518
+ 516,/m/07p_0gm,Throbbing
519
+ 517,/m/01jwx6,Vibration
520
+ 518,/m/07c52,Television
521
+ 519,/m/06bz3,Radio
522
+ 520,/m/07hvw1,Field recording