Spaces:

nice-bill
/

vigilaudio

Sleeping

App Files Files Community

nice-bill commited on Dec 22, 2025

Commit

d0ec0b6

1 Parent(s): 6d719fd

readme updated

Browse files

Files changed (4) hide show

README.md +78 -0
src/data/harmonize.py +8 -8
src/features/build_features.py +4 -4
src/features/extractor.py +6 -6

README.md ADDED Viewed

	@@ -0,0 +1,78 @@

+# VigilAudio: AI-Powered Audio Moderation Engine
+**A production-ready audio emotion classification system built for content moderation.**
+VigilAudio is the first phase of a multimodal moderation suite designed to detect distress, aggression, and safety risks in user-generated content. Unlike traditional moderators that look for keywords, VigilAudio listens to the *tone* of the voice—detecting anger, fear, or distress even when the words themselves are neutral.
+## Key Features
+*   **State-of-the-Art Architecture:** Fine-tuned `facebook/wav2vec2-base-960h` Transformer model.
+*   **High Accuracy:** Achieved **82% accuracy** on a 7-class emotion dataset (Angry, Happy, Sad, Fearful, Disgusted, Neutral, Surprised).
+*   **Production Pipeline:** End-to-end data harmonization, stratified splitting, and efficient feature extraction.
+*   **Cloud-Native Training:** Optimized training scripts for Google Colab (T4 GPU), reducing training time from 50+ hours to <20 minutes.
+## Technology Stack
+*   **Language:** Python 3.10+
+*   **Environment:** `uv` (for fast dependency management)
+*   **ML Framework:** PyTorch, Hugging Face Transformers, Accelerate
+*   **Audio Processing:** Librosa, Soundfile
+*   **Data Ops:** Pandas, Scikit-learn
+## Installation
+1.  **Clone the repository:**
+    ```bash
+    git clone https://github.com/yourusername/vigilaudio.git
+    cd vigilaudio
+    ```
+2.  **Initialize the environment:**
+    We use `uv` for lightning-fast setups.
+    ```bash
+    uv sync
+    ```
+## Execution Guide
+### 1. Data Pipeline (Harmonization)
+Turn raw, messy folders into a clean, stratified dataset.
+```bash
+uv run src/data/harmonize.py
+```
+*   **Input:** Raw audio folders (`Emotions/Angry`, `Emotions/Happy`...)
+*   **Output:** `data/processed/metadata.csv` (Unified labels + 80/10/10 splits)
+### 2. Feature Extraction (Local Test)
+Verify that your machine can process audio using the Wav2Vec2 processor.
+```bash
+uv run src/features/extractor.py
+```
+*   **Output:** Prints the embedding shape `(768,)` for a sample file.
+### 3. Model Training (The "Professional" Way)
+Training a Transformer on a CPU is too slow. We use Google Colab.
+1.  Upload `train_colab.py` and your `Emotions` folder to Google Drive.
+2.  Open `VigilAudio_Fine_Tuning.ipynb` in Colab.
+3.  Set Runtime to **T4 GPU**.
+4.  Run the training script.
+    *   **Result:** A fine-tuned model saved to `wav2vec2-finetuned/`.
+    *   **Performance:** ~82% Accuracy / 0.81 F1 Score.
+## Dataset
+The model was trained on a combined dataset of **12,798 audio recordings** across 7 emotions.
+*   **Source:** [Kaggle - Audio Emotions Dataset](https://www.kaggle.com/datasets/uldisvalainis/audio-emotions)
+*   **Composition:** An amalgam of CREMA-D, TESS, RAVDESS, and SAVEE datasets.
+## Results Summary
+| Model | Architecture | Training Time | Accuracy |
+|-------|--------------|---------------|----------|
+| Baseline | Simple MLP (CPU) | ~3 hours | 54% |
+| **VigilAudio** | **Fine-Tuned Wav2Vec2 (GPU)** | **17 mins** | **82%** |
+## License
+MIT

src/data/harmonize.py CHANGED Viewed

@@ -6,7 +6,7 @@ from tqdm import tqdm
 import librosa
 def harmonize_data(raw_data_path, output_path):
-    print(f"🔍 Scanning directory: {raw_data_path}")
     data = []
     # Folder names are our labels
@@ -20,7 +20,7 @@ def harmonize_data(raw_data_path, output_path):
         folder_path = Path(raw_data_path) / folder
         files = list(folder_path.glob("*.wav"))
-        print(f"📂 Processing {folder}: {len(files)} files")
         for file_path in tqdm(files, desc=f"Processing {folder}"):
             try:
@@ -33,16 +33,16 @@ def harmonize_data(raw_data_path, output_path):
                         "path": str(file_path.absolute())
                     })
             except Exception as e:
-                print(f"❌ Error processing {file_path}: {e}")
     df = pd.DataFrame(data)
     if df.empty:
-        print("❌ No data found! Please check the raw_data_path.")
         return
     # --- Stratified Splitting (80/10/10) ---
-    print("\n⚖️ Creating stratified splits...")
     # First split: Train vs Temp (20%)
     train_df, temp_df = train_test_split(
@@ -66,9 +66,9 @@ def harmonize_data(raw_data_path, output_path):
     os.makedirs(os.path.dirname(output_path), exist_ok=True)
     final_df.to_csv(output_path, index=False)
-    print(f"\n✅ Harmonization Complete!")
-    print(f"📊 Total files: {len(final_df)}")
-    print(f"📍 Metadata saved to: {output_path}")
     print("\nSplit Statistics:")
     print(final_df.groupby(['split', 'emotion']).size().unstack(fill_value=0))

 import librosa
 def harmonize_data(raw_data_path, output_path):
+    print(f"Scanning directory: {raw_data_path}")
     data = []
     # Folder names are our labels
         folder_path = Path(raw_data_path) / folder
         files = list(folder_path.glob("*.wav"))
+        print(f"Processing {folder}: {len(files)} files")
         for file_path in tqdm(files, desc=f"Processing {folder}"):
             try:
                         "path": str(file_path.absolute())
                     })
             except Exception as e:
+                print(f"Error processing {file_path}: {e}")
     df = pd.DataFrame(data)
     if df.empty:
+        print("No data found! Please check the raw_data_path.")
         return
     # --- Stratified Splitting (80/10/10) ---
+    print("\nCreating stratified splits...")
     # First split: Train vs Temp (20%)
     train_df, temp_df = train_test_split(
     os.makedirs(os.path.dirname(output_path), exist_ok=True)
     final_df.to_csv(output_path, index=False)
+    print(f"\nHarmonization Complete!")
+    print(f"Total files: {len(final_df)}")
+    print(f"Metadata saved to: {output_path}")
     print("\nSplit Statistics:")
     print(final_df.groupby(['split', 'emotion']).size().unstack(fill_value=0))

src/features/build_features.py CHANGED Viewed

@@ -13,7 +13,7 @@ def build_all_features(metadata_path, output_dir):
     df = pd.read_csv(metadata_path)
     extractor = AudioFeatureExtractor()
-    print(f"📊 Starting bulk extraction for {len(df)} files...")
     # 2. Loop with progress bar
     # We use a custom naming scheme: {split}_{original_filename}.npy
@@ -32,8 +32,8 @@ def build_all_features(metadata_path, output_dir):
         if embedding is not None:
             np.save(embedding_path, embedding)
-    print(f"\n✅ Bulk Extraction Complete!")
-    print(f"📍 Embeddings saved to: {output_dir.absolute()}")
 if __name__ == "__main__":
     METADATA = "data/processed/metadata.csv"
@@ -42,4 +42,4 @@ if __name__ == "__main__":
     if os.path.exists(METADATA):
         build_all_features(METADATA, OUTPUT)
     else:
-        print("❌ Metadata not found. Run harmonize.py first.")

     df = pd.read_csv(metadata_path)
     extractor = AudioFeatureExtractor()
+    print(f"Starting bulk extraction for {len(df)} files...")
     # 2. Loop with progress bar
     # We use a custom naming scheme: {split}_{original_filename}.npy
         if embedding is not None:
             np.save(embedding_path, embedding)
+    print(f"\nBulk Extraction Complete!")
+    print(f"Embeddings saved to: {output_dir.absolute()}")
 if __name__ == "__main__":
     METADATA = "data/processed/metadata.csv"
     if os.path.exists(METADATA):
         build_all_features(METADATA, OUTPUT)
     else:
+        print("Metadata not found. Run harmonize.py first.")

src/features/extractor.py CHANGED Viewed

@@ -12,8 +12,8 @@ class AudioFeatureExtractor:
         self.cache_dir = Path(cache_dir)
         self.cache_dir.mkdir(parents=True, exist_ok=True)
-        print(f"🚀 Loading model: {model_name}...")
-        print(f"📦 Cache directory: {self.cache_dir.absolute()}")
         # Load processor and model with explicit cache_dir
         self.processor = Wav2Vec2Processor.from_pretrained(model_name, cache_dir=self.cache_dir)
@@ -24,7 +24,7 @@ class AudioFeatureExtractor:
         self.model.to(self.device)
         self.model.eval()
-        print(f"✅ Model loaded on {self.device}")
     def extract(self, audio_path):
         """
@@ -48,7 +48,7 @@ class AudioFeatureExtractor:
             return embeddings.cpu().numpy().flatten()
         except Exception as e:
-            print(f"❌ Error extracting features from {audio_path}: {e}")
             return None
 if __name__ == "__main__":
@@ -66,7 +66,7 @@ if __name__ == "__main__":
         if embedding is not None:
             print(f"\n✨ Success!")
             print(f"File: {sample_path}")
-            print(f"Embedding shape: {embedding.shape}") # Should be (768,)
             print(f"First 5 values: {embedding[:5]}")
     else:
-        print("❌ Metadata not found. Please run harmonization first.")

         self.cache_dir = Path(cache_dir)
         self.cache_dir.mkdir(parents=True, exist_ok=True)
+        print(f"Loading model: {model_name}...")
+        print(f"Cache directory: {self.cache_dir.absolute()}")
         # Load processor and model with explicit cache_dir
         self.processor = Wav2Vec2Processor.from_pretrained(model_name, cache_dir=self.cache_dir)
         self.model.to(self.device)
         self.model.eval()
+        print(f"Model loaded on {self.device}")
     def extract(self, audio_path):
         """
             return embeddings.cpu().numpy().flatten()
         except Exception as e:
+            print(f"Error extracting features from {audio_path}: {e}")
             return None
 if __name__ == "__main__":
         if embedding is not None:
             print(f"\n✨ Success!")
             print(f"File: {sample_path}")
+            print(f"Embedding shape: {embedding.shape}")
             print(f"First 5 values: {embedding[:5]}")
     else:
+        print("Metadata not found. Please run harmonization first.")