Audio Classification
Transformers
Safetensors
Japanese
animescore_ranknet
image-feature-extraction
audio
speech
preference
anime
custom_code
Instructions to use spellbrush/animescore with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use spellbrush/animescore with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("audio-classification", model="spellbrush/animescore", trust_remote_code=True)# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("spellbrush/animescore", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| """Gradio demo for AnimeScore: audio in -> anime-likeness score out.""" | |
| import sys | |
| from pathlib import Path | |
| import gradio as gr | |
| import torch | |
| import torchaudio | |
| HERE = Path(__file__).resolve().parent | |
| sys.path.insert(0, str(HERE)) | |
| from modeling_animescore import AnimeScoreConfig, AnimeScoreRankNet | |
| from safetensors.torch import load_file | |
| DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| def _build_model() -> AnimeScoreRankNet: | |
| cfg = AnimeScoreConfig.from_json_file(str(HERE / "config.json")) | |
| model = AnimeScoreRankNet(cfg).to(DEVICE).eval() | |
| sd = load_file(str(HERE / "model.safetensors")) | |
| missing, unexpected = model.load_state_dict(sd, strict=False) | |
| if [m for m in missing if not m.startswith("ssl.")]: | |
| raise RuntimeError(f"unexpected missing head keys: {missing}") | |
| if unexpected: | |
| raise RuntimeError(f"unexpected keys in safetensors: {unexpected}") | |
| return model | |
| MODEL = _build_model() | |
| TARGET_SR = MODEL.config.target_sr | |
| def _read_audio(path: str): | |
| """Load audio to a [channels, frames] float32 tensor and its sample rate. | |
| Uses soundfile (self-contained libsndfile) first so the demo does not depend | |
| on torchaudio's optional torchcodec/ffmpeg backend; falls back to | |
| torchaudio.load for the rare format libsndfile cannot decode. | |
| """ | |
| try: | |
| import soundfile as sf | |
| data, sr = sf.read(path, dtype="float32", always_2d=True) # [frames, ch] | |
| return torch.from_numpy(data.T).contiguous(), sr | |
| except Exception: | |
| wav, sr = torchaudio.load(path) | |
| return wav.to(torch.float32), sr | |
| def _load_wav_to_tensor(path: str) -> torch.Tensor: | |
| wav, sr = _read_audio(path) | |
| if wav.size(0) > 1: | |
| wav = wav.mean(0, keepdim=True) | |
| if sr != TARGET_SR: | |
| wav = torchaudio.functional.resample(wav, sr, TARGET_SR) | |
| return wav.to(DEVICE) | |
| def predict(audio): | |
| if audio is None: | |
| return "—" | |
| wav = _load_wav_to_tensor(audio) | |
| with torch.no_grad(): | |
| score = MODEL.score(wav).item() | |
| return f"{score:.4f}" | |
| with gr.Blocks(title="AnimeScore") as demo: | |
| gr.Markdown("# AnimeScore\n\nScore a speech clip for anime-likeness. Higher = more anime-like.") | |
| audio_in = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Audio") | |
| run = gr.Button("Score", variant="primary") | |
| score_out = gr.Textbox(label="AnimeScore", interactive=False) | |
| run.click(predict, inputs=audio_in, outputs=score_out) | |
| audio_in.change(predict, inputs=audio_in, outputs=score_out) | |
| if __name__ == "__main__": | |
| demo.queue().launch() | |