Spaces:

FJFehr
/

virtual_keyboard

Running

App Files Files Community

FJFehr commited on 18 days ago

Commit

f7e26a4

1 Parent(s): 9de8877

First iteration of a midi model intergration.

Browse files

Files changed (7) hide show

.gitignore +6 -0
README.md +12 -0
engines.py +69 -49
keyboard.html +1 -0
midi_model.py +376 -0
requirements.txt +5 -1
static/keyboard.js +25 -0

.gitignore CHANGED Viewed

@@ -26,6 +26,12 @@ share/python-wheels/
 .installed.cfg
 *.egg
 # Virtual environments
 .venv/
 venv/

 .installed.cfg
 *.egg
+# External model dependencies
+external/
+# Output files
+output/
 # Virtual environments
 .venv/
 venv/

README.md CHANGED Viewed

@@ -14,6 +14,9 @@ short_description: Browser-based MIDI keyboard with recording and synthesis
 A minimal, responsive browser-based MIDI keyboard. Play live, record performances, and export as MIDI files. 🎹
 ## 🗂️ Project Structure
@@ -22,6 +25,7 @@ A minimal, responsive browser-based MIDI keyboard. Play live, record performance
 ├── app.py                  # Gradio server & API endpoints
 ├── config.py               # Centralized configuration
 ├── engines.py              # MIDI processing engines
 ├── midi.py                 # MIDI file utilities
 ├── keyboard.html           # HTML structure
 ├── static/
@@ -43,6 +47,13 @@ uv run python app.py
 Open **http://127.0.0.1:7861**
 ## 🌐 Deploy to Hugging Face Spaces
 ```bash
@@ -55,6 +66,7 @@ git push hf main
 - **Frontend**: Tone.js v6+ (Web Audio API)
 - **Backend**: Gradio 6.x + Python 3.10+
 - **MIDI**: mido library
 ## 📝 License

 A minimal, responsive browser-based MIDI keyboard. Play live, record performances, and export as MIDI files. 🎹
+This build includes a **Godzilla** engine that can continue a short phrase using the
+Godzilla Piano Transformer.
 ## 🗂️ Project Structure
 ├── app.py                  # Gradio server & API endpoints
 ├── config.py               # Centralized configuration
 ├── engines.py              # MIDI processing engines
+├── midi_model.py           # Godzilla model integration
 ├── midi.py                 # MIDI file utilities
 ├── keyboard.html           # HTML structure
 ├── static/
 Open **http://127.0.0.1:7861**
+## 🎹 Godzilla Engine
+Select **Godzilla** in the engine dropdown to generate a short continuation from your
+recorded phrase. The model is downloaded on first use and cached locally.
+Note: the engine filters generated notes to your on-screen keyboard range.
 ## 🌐 Deploy to Hugging Face Spaces
 ```bash
 - **Frontend**: Tone.js v6+ (Web Audio API)
 - **Backend**: Gradio 6.x + Python 3.10+
 - **MIDI**: mido library
+- **Model**: Godzilla Piano Transformer (via Hugging Face)
 ## 📝 License

engines.py CHANGED Viewed

@@ -4,33 +4,13 @@ Virtual MIDI Keyboard - Engines
 MIDI processing engines that transform, analyze, or manipulate MIDI events.
 """
-from abc import ABC, abstractmethod
 from typing import List, Dict, Any
-# =============================================================================
-# BASE ENGINE CLASS
-# =============================================================================
-class MIDIEngine(ABC):
-    """Abstract base class for MIDI engines"""
-    def __init__(self, name: str):
-        self.name = name
-    @abstractmethod
-    def process(self, events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
-        """
-        Process MIDI events and return transformed events.
-        Args:
-            events: List of MIDI event dictionaries
-        Returns:
-            List of processed MIDI event dictionaries
-        """
-        pass
 # =============================================================================
@@ -38,7 +18,7 @@ class MIDIEngine(ABC):
 # =============================================================================
-class ParrotEngine(MIDIEngine):
     """
     Parrot Engine - plays back MIDI exactly as recorded.
@@ -46,7 +26,7 @@ class ParrotEngine(MIDIEngine):
     """
     def __init__(self):
-        super().__init__("Parrot")
     def process(self, events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Return events unchanged"""
@@ -69,7 +49,7 @@ class ParrotEngine(MIDIEngine):
 # =============================================================================
-class ReverseParrotEngine(MIDIEngine):
     """
     Reverse Parrot Engine - plays back MIDI in reverse order.
@@ -78,7 +58,7 @@ class ReverseParrotEngine(MIDIEngine):
     """
     def __init__(self):
-        super().__init__("Reverse Parrot")
     def process(self, events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Reverse the sequence of note numbers while keeping timing and event types"""
@@ -88,43 +68,79 @@ class ReverseParrotEngine(MIDIEngine):
         # Separate note_on and note_off events
         note_on_events = [e for e in events if e.get("type") == "note_on"]
         note_off_events = [e for e in events if e.get("type") == "note_off"]
         # Extract note numbers from note_on events and reverse them
         on_notes = [e.get("note") for e in note_on_events]
         reversed_on_notes = list(reversed(on_notes))
         # Extract note numbers from note_off events and reverse them
         off_notes = [e.get("note") for e in note_off_events]
         reversed_off_notes = list(reversed(off_notes))
         # Reconstruct events with reversed notes but original structure
         result = []
         on_index = 0
         off_index = 0
         for event in events:
             if event.get("type") == "note_on":
-                result.append({
-                    "type": "note_on",
-                    "note": reversed_on_notes[on_index],
-                    "velocity": event.get("velocity"),
-                    "time": event.get("time"),
-                    "channel": event.get("channel", 0),
-                })
                 on_index += 1
             elif event.get("type") == "note_off":
-                result.append({
-                    "type": "note_off",
-                    "note": reversed_off_notes[off_index],
-                    "velocity": event.get("velocity"),
-                    "time": event.get("time"),
-                    "channel": event.get("channel", 0),
-                })
                 off_index += 1
         return result
 # =============================================================================
 # ENGINE REGISTRY
 # =============================================================================
@@ -133,7 +149,11 @@ class ReverseParrotEngine(MIDIEngine):
 class EngineRegistry:
     """Registry for managing available MIDI engines"""
-    _engines = {"parrot": ParrotEngine, "reverse_parrot": ReverseParrotEngine}
     @classmethod
     def register(cls, engine_id: str, engine_class: type):
@@ -141,7 +161,7 @@ class EngineRegistry:
         cls._engines[engine_id] = engine_class
     @classmethod
-    def get_engine(cls, engine_id: str) -> MIDIEngine:
         """Get an engine instance by ID"""
         if engine_id not in cls._engines:
             raise ValueError(f"Unknown engine: {engine_id}")

 MIDI processing engines that transform, analyze, or manipulate MIDI events.
 """
 from typing import List, Dict, Any
+from midi_model import (
+    count_out_of_range_events,
+    filter_events_to_keyboard_range,
+    get_model,
+)
 # =============================================================================
 # =============================================================================
+class ParrotEngine:
     """
     Parrot Engine - plays back MIDI exactly as recorded.
     """
     def __init__(self):
+        self.name = "Parrot"
     def process(self, events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Return events unchanged"""
 # =============================================================================
+class ReverseParrotEngine:
     """
     Reverse Parrot Engine - plays back MIDI in reverse order.
     """
     def __init__(self):
+        self.name = "Reverse Parrot"
     def process(self, events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
         """Reverse the sequence of note numbers while keeping timing and event types"""
         # Separate note_on and note_off events
         note_on_events = [e for e in events if e.get("type") == "note_on"]
         note_off_events = [e for e in events if e.get("type") == "note_off"]
         # Extract note numbers from note_on events and reverse them
         on_notes = [e.get("note") for e in note_on_events]
         reversed_on_notes = list(reversed(on_notes))
         # Extract note numbers from note_off events and reverse them
         off_notes = [e.get("note") for e in note_off_events]
         reversed_off_notes = list(reversed(off_notes))
         # Reconstruct events with reversed notes but original structure
         result = []
         on_index = 0
         off_index = 0
         for event in events:
             if event.get("type") == "note_on":
+                result.append(
+                    {
+                        "type": "note_on",
+                        "note": reversed_on_notes[on_index],
+                        "velocity": event.get("velocity"),
+                        "time": event.get("time"),
+                        "channel": event.get("channel", 0),
+                    }
+                )
                 on_index += 1
             elif event.get("type") == "note_off":
+                result.append(
+                    {
+                        "type": "note_off",
+                        "note": reversed_off_notes[off_index],
+                        "velocity": event.get("velocity"),
+                        "time": event.get("time"),
+                        "channel": event.get("channel", 0),
+                    }
+                )
                 off_index += 1
         return result
+# =============================================================================
+# GODZILLA CONTINUATION ENGINE
+# =============================================================================
+class GodzillaContinuationEngine:
+    """
+    Continue a short MIDI phrase with the Godzilla Piano Transformer.
+    Generates a small continuation and appends it after the input events.
+    """
+    def __init__(self, generate_tokens: int = 32):
+        self.name = "Godzilla"
+        self.generate_tokens = generate_tokens
+    def process(self, events: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+        if not events:
+            return []
+        model = get_model("godzilla")
+        new_events = model.generate_continuation(
+            events,
+            tokens=self.generate_tokens,
+            seed=None,
+        )
+        out_of_range = count_out_of_range_events(new_events)
+        if out_of_range:
+            print(f"Godzilla: dropped {out_of_range} out-of-range events")
+        return filter_events_to_keyboard_range(new_events)
 # =============================================================================
 # ENGINE REGISTRY
 # =============================================================================
 class EngineRegistry:
     """Registry for managing available MIDI engines"""
+    _engines = {
+        "parrot": ParrotEngine,
+        "reverse_parrot": ReverseParrotEngine,
+        "godzilla_continue": GodzillaContinuationEngine,
+    }
     @classmethod
     def register(cls, engine_id: str, engine_class: type):
         cls._engines[engine_id] = engine_class
     @classmethod
+    def get_engine(cls, engine_id: str):
         """Get an engine instance by ID"""
         if engine_id not in cls._engines:
             raise ValueError(f"Unknown engine: {engine_id}")

keyboard.html CHANGED Viewed

@@ -35,6 +35,7 @@
           <select id="engineSelect">
             <option value="parrot">Parrot</option>
             <option value="reverse_parrot">Reverse Parrot</option>
           </select>
         </label>

           <select id="engineSelect">
             <option value="parrot">Parrot</option>
             <option value="reverse_parrot">Reverse Parrot</option>
+            <option value="godzilla_continue">Godzilla</option>
           </select>
         </label>

midi_model.py ADDED Viewed

	@@ -0,0 +1,376 @@

+#!/usr/bin/env python3
+from __future__ import annotations
+import subprocess
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Iterable, Optional
+from config import MIDI_DEFAULTS, KEYBOARD_BASE_MIDI, KEYBOARD_OCTAVES
+DEFAULT_REPO = "asigalov61/Godzilla-Piano-Transformer"
+DEFAULT_FILENAME = (
+    "Godzilla_Piano_Chords_Texturing_Transformer_Trained_Model_22708_steps_"
+    "0.7515_loss_0.7853_acc.pth"
+)
+_MODEL_CACHE: dict[str, object] = {}
+@dataclass(frozen=True)
+class MidiModel:
+    model_id: str
+    name: str
+    def generate_continuation(
+        self,
+        events: list[dict],
+        *,
+        tokens: int = 32,
+        seed: Optional[int] = None,
+    ) -> list[dict]:
+        raise NotImplementedError
+def ensure_tegridy_tools(base_dir: Path) -> tuple[Path, Path]:
+    repo_dir = base_dir / "tegridy-tools"
+    tools_dir = repo_dir / "tegridy-tools"
+    x_transformer_dir = tools_dir / "X-Transformer"
+    if not x_transformer_dir.exists():
+        repo_url = "https://github.com/asigalov61/tegridy-tools"
+        repo_dir.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            subprocess.check_call(
+                [
+                    "git",
+                    "clone",
+                    "--depth",
+                    "1",
+                    repo_url,
+                    str(repo_dir),
+                ]
+            )
+        except FileNotFoundError as exc:
+            raise RuntimeError("git is required to clone tegridy-tools") from exc
+    return tools_dir, x_transformer_dir
+def add_sys_path(*paths: Path) -> None:
+    for path in paths:
+        path_str = str(path.resolve())
+        if path_str not in sys.path:
+            sys.path.insert(0, path_str)
+def build_model(seq_len: int, pad_idx: int):
+    from x_transformer_2_3_1 import AutoregressiveWrapper, Decoder, TransformerWrapper
+    model = TransformerWrapper(
+        num_tokens=pad_idx + 1,
+        max_seq_len=seq_len,
+        attn_layers=Decoder(
+            dim=2048,
+            depth=8,
+            heads=32,
+            rotary_pos_emb=True,
+            attn_flash=True,
+        ),
+    )
+    return AutoregressiveWrapper(model, ignore_index=pad_idx, pad_value=pad_idx)
+def resolve_device(requested: str) -> str:
+    import torch
+    if requested == "auto":
+        return "cuda" if torch.cuda.is_available() else "cpu"
+    return requested
+def load_checkpoint(model, checkpoint_path: Path, device: str) -> None:
+    import torch
+    state = torch.load(checkpoint_path, map_location=device)
+    model.load_state_dict(state)
+def events_to_score_tokens(events: list[dict]) -> list[int]:
+    if not events:
+        return []
+    active: dict[int, float] = {}
+    notes: list[tuple[float, float, int]] = []
+    sorted_events = sorted(events, key=lambda e: e.get("time", 0.0))
+    for event in sorted_events:
+        ev_type = event.get("type")
+        note = int(event.get("note", 0))
+        velocity = int(event.get("velocity", 0))
+        time_sec = float(event.get("time", 0.0))
+        if ev_type == "note_on" and velocity > 0:
+            active[note] = time_sec
+        elif ev_type in {"note_off", "note_on"}:
+            if note in active:
+                start = active.pop(note)
+                duration = max(0.0, time_sec - start)
+                notes.append((start, duration, note))
+    if not notes:
+        return []
+    notes.sort(key=lambda n: n[0])
+    tokens: list[int] = []
+    prev_start_ms = 0.0
+    for start, duration, pitch in notes:
+        start_ms = round(start * 1000.0)
+        delta_ms = max(0.0, start_ms - prev_start_ms)
+        prev_start_ms = start_ms
+        time_tok = max(0, min(127, int(round(delta_ms / 32.0))))
+        dur_tok = max(1, min(127, int(round((duration * 1000.0) / 32.0))))
+        pitch_tok = max(0, min(127, int(pitch)))
+        tokens.extend([time_tok, 128 + dur_tok, 256 + pitch_tok])
+    return tokens
+def tokens_to_events(
+    tokens: Iterable[int],
+    *,
+    offset_ms: float = 0.0,
+    velocity: int | None = None,
+) -> list[dict]:
+    if velocity is None:
+        velocity = MIDI_DEFAULTS["velocity_default"]
+    events: list[dict] = []
+    time_ms = offset_ms
+    duration_ms = 1
+    pitch = 60
+    for tok in tokens:
+        if 0 <= tok < 128:
+            time_ms += tok * 32
+        elif 128 < tok < 256:
+            duration_ms = (tok - 128) * 32
+        elif 256 < tok < 384:
+            pitch = tok - 256
+            on_time = time_ms / 1000.0
+            off_time = (time_ms + duration_ms) / 1000.0
+            events.append(
+                {
+                    "type": "note_on",
+                    "note": pitch,
+                    "velocity": velocity,
+                    "time": on_time,
+                    "channel": 0,
+                }
+            )
+            events.append(
+                {
+                    "type": "note_off",
+                    "note": pitch,
+                    "velocity": 0,
+                    "time": off_time,
+                    "channel": 0,
+                }
+            )
+    return events
+def keyboard_note_range() -> tuple[int, int]:
+    min_note = KEYBOARD_BASE_MIDI
+    max_note = KEYBOARD_BASE_MIDI + (KEYBOARD_OCTAVES * 12) - 1
+    return min_note, max_note
+def count_out_of_range_events(events: list[dict]) -> int:
+    min_note, max_note = keyboard_note_range()
+    return sum(
+        1
+        for event in events
+        if event.get("type") in {"note_on", "note_off"}
+        and int(event.get("note", min_note)) not in range(min_note, max_note + 1)
+    )
+def filter_events_to_keyboard_range(events: list[dict]) -> list[dict]:
+    min_note, max_note = keyboard_note_range()
+    return [
+        event
+        for event in events
+        if event.get("type") not in {"note_on", "note_off"}
+        or min_note <= int(event.get("note", min_note)) <= max_note
+    ]
+def build_prime_tokens(score_tokens: list[int], seq_len: int) -> list[int]:
+    prime = [705, 384, 706]
+    if score_tokens:
+        max_score = max(0, seq_len - len(prime))
+        prime.extend(score_tokens[-max_score:])
+    else:
+        prime.extend([0, 129, 316])
+    return prime
+def load_model_cached(
+    *,
+    repo: str,
+    filename: str,
+    cache_dir: Path,
+    tegridy_dir: Path,
+    seq_len: int,
+    pad_idx: int,
+    device: str,
+) -> tuple[object, str, Path]:
+    from huggingface_hub import hf_hub_download
+    import torch
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    resolved_device = resolve_device(device)
+    cache_key = f"{repo}:{filename}:{seq_len}:{pad_idx}:{resolved_device}"
+    if _MODEL_CACHE.get("key") == cache_key:
+        return (
+            _MODEL_CACHE["model"],
+            _MODEL_CACHE["device"],
+            _MODEL_CACHE["tools_dir"],
+        )
+    checkpoint_path = Path(
+        hf_hub_download(
+            repo_id=repo,
+            filename=filename,
+            local_dir=str(cache_dir),
+            repo_type="model",
+        )
+    )
+    tools_dir, x_transformer_dir = ensure_tegridy_tools(tegridy_dir)
+    add_sys_path(x_transformer_dir)
+    if resolved_device == "cuda":
+        torch.set_float32_matmul_precision("high")
+        torch.backends.cuda.matmul.allow_tf32 = True
+        torch.backends.cudnn.allow_tf32 = True
+    model = build_model(seq_len, pad_idx)
+    load_checkpoint(model, checkpoint_path, resolved_device)
+    model.to(resolved_device)
+    model.eval()
+    _MODEL_CACHE["key"] = cache_key
+    _MODEL_CACHE["model"] = model
+    _MODEL_CACHE["device"] = resolved_device
+    _MODEL_CACHE["tools_dir"] = tools_dir
+    _MODEL_CACHE["checkpoint_path"] = checkpoint_path
+    return model, resolved_device, tools_dir
+def generate_from_events(
+    events: list[dict],
+    *,
+    generate_tokens: int,
+    seed: int | None,
+    repo: str,
+    filename: str,
+    cache_dir: Path,
+    tegridy_dir: Path,
+    seq_len: int,
+    pad_idx: int,
+    device: str,
+) -> tuple[list[dict], list[int]]:
+    import torch
+    model, resolved_device, _ = load_model_cached(
+        repo=repo,
+        filename=filename,
+        cache_dir=cache_dir,
+        tegridy_dir=tegridy_dir,
+        seq_len=seq_len,
+        pad_idx=pad_idx,
+        device=device,
+    )
+    if seed is not None:
+        torch.manual_seed(seed)
+        if resolved_device == "cuda":
+            torch.cuda.manual_seed_all(seed)
+    score_tokens = events_to_score_tokens(events)
+    prime = build_prime_tokens(score_tokens, seq_len)
+    prime_tensor = torch.tensor(prime, dtype=torch.long, device=resolved_device)
+    out = model.generate(
+        prime_tensor,
+        generate_tokens,
+        return_prime=True,
+        eos_token=707,
+    )
+    tokens = out.detach().cpu().tolist()
+    new_tokens = tokens[len(prime) :]
+    last_time_ms = 0.0
+    if events:
+        last_time_ms = max(float(e.get("time", 0.0)) for e in events) * 1000.0
+    new_events = tokens_to_events(new_tokens, offset_ms=last_time_ms)
+    return new_events, new_tokens
+def generate_godzilla_continuation(
+    events: list[dict],
+    *,
+    generate_tokens: int = 32,
+    seed: int | None = None,
+    device: str = "auto",
+) -> tuple[list[dict], list[int]]:
+    return generate_from_events(
+        events,
+        generate_tokens=generate_tokens,
+        seed=seed,
+        repo=DEFAULT_REPO,
+        filename=DEFAULT_FILENAME,
+        cache_dir=Path(".cache/godzilla"),
+        tegridy_dir=Path("external"),
+        seq_len=1536,
+        pad_idx=708,
+        device=device,
+    )
+class GodzillaMidiModel(MidiModel):
+    def __init__(self) -> None:
+        super().__init__(model_id="godzilla", name="Godzilla")
+    def generate_continuation(
+        self,
+        events: list[dict],
+        *,
+        tokens: int = 32,
+        seed: Optional[int] = None,
+    ) -> list[dict]:
+        new_events, _ = generate_godzilla_continuation(
+            events,
+            generate_tokens=tokens,
+            seed=seed,
+            device="auto",
+        )
+        return new_events
+def get_model(model_id: str) -> MidiModel:
+    if model_id == "godzilla":
+        return GodzillaMidiModel()
+    raise ValueError(f"Unknown MIDI model: {model_id}")

requirements.txt CHANGED Viewed

@@ -1,2 +1,6 @@
 gradio
-mido

 gradio
+mido
+torch
+huggingface_hub
+einops>=0.6
+einx

static/keyboard.js CHANGED Viewed

@@ -125,6 +125,23 @@ function buildInstruments(instrumentConfigs) {
 let instruments = {}; // Will be populated after config is fetched
 // =============================================================================
 // INITIALIZATION FROM SERVER CONFIG
 // =============================================================================
@@ -148,6 +165,9 @@ async function initializeFromConfig() {
     for (const [midiStr, key] of Object.entries(serverConfig.keyboard_shortcuts)) {
       window.keyMapFromServer[key.toLowerCase()] = parseInt(midiStr);
     }
     // Render keyboard after config is loaded
     buildKeyboard();
@@ -166,6 +186,11 @@ async function initializeFromConfig() {
     });
     window.keyboardShortcutsFromServer = keyShortcuts; // Use hardcoded as fallback
     window.keyMapFromServer = keyMap; // Use hardcoded as fallback
     buildKeyboard();
   }
 }

 let instruments = {}; // Will be populated after config is fetched
+function populateEngineSelect(engines) {
+  if (!engineSelect || !Array.isArray(engines)) return;
+  engineSelect.innerHTML = '';
+  engines.forEach(engine => {
+    const option = document.createElement('option');
+    option.value = engine.id;
+    option.textContent = engine.name || engine.id;
+    engineSelect.appendChild(option);
+  });
+  if (engines.length > 0) {
+    selectedEngine = engines[0].id;
+    engineSelect.value = selectedEngine;
+  }
+}
 // =============================================================================
 // INITIALIZATION FROM SERVER CONFIG
 // =============================================================================
     for (const [midiStr, key] of Object.entries(serverConfig.keyboard_shortcuts)) {
       window.keyMapFromServer[key.toLowerCase()] = parseInt(midiStr);
     }
+    // Populate engine dropdown from server config
+    populateEngineSelect(serverConfig.engines);
     // Render keyboard after config is loaded
     buildKeyboard();
     });
     window.keyboardShortcutsFromServer = keyShortcuts; // Use hardcoded as fallback
     window.keyMapFromServer = keyMap; // Use hardcoded as fallback
+    populateEngineSelect([
+      { id: 'parrot', name: 'Parrot' },
+      { id: 'reverse_parrot', name: 'Reverse Parrot' },
+      { id: 'godzilla_continue', name: 'Godzilla' }
+    ]);
     buildKeyboard();
   }
 }