|
|
--- |
|
|
language: ko |
|
|
license: mit |
|
|
tags: |
|
|
- audio |
|
|
- emotion-detection |
|
|
- classification |
|
|
metrics: |
|
|
- accuracy |
|
|
model-index: |
|
|
- name: audio-emotion-model |
|
|
results: |
|
|
- task: |
|
|
type: audio-classification |
|
|
name: Audio Classification |
|
|
dataset: |
|
|
name: custom-dataset |
|
|
type: custom |
|
|
metrics: |
|
|
- type: accuracy |
|
|
value: 0.92 |
|
|
--- |
|
|
|
|
|
- Input: MFCC 13ch, length 100 → shape (B, 13, 100) |
|
|
- Delta: (X - mean) / (std + 1e-8) |
|
|
- Labels: see `labels.json` (index ↔ label 1:1) |
|
|
|
|
|
## Usage |
|
|
```python |
|
|
import json, torch, numpy as np |
|
|
from huggingface_hub import hf_hub_download |
|
|
from importlib.machinery import SourceFileLoader |
|
|
|
|
|
repo = "HyukII/audio-emotion-model" |
|
|
w = hf_hub_download(repo, "pytorch_model.pth") |
|
|
m = hf_hub_download(repo, "model.py") |
|
|
lab = hf_hub_download(repo, "labels.json") |
|
|
|
|
|
labels = json.load(open(lab, encoding="utf-8")) |
|
|
Model = SourceFileLoader("amodel", m).load_module().PyTorchAudioModel |
|
|
|
|
|
model = Model(num_labels=len(labels)).eval() |
|
|
state = torch.load(w, map_location="cpu") |
|
|
model.load_state_dict(state) |
|
|
# x: tensor (1,13,100) → probs = softmax(model(x), dim=1) |