Tiger14n commited on Jul 11, 2024

Commit

7ef7abb

verified ·

1 Parent(s): 52b0e2d

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +36 -35
README.md +3 -0
checkpoint/custom_checkpoint_0.pkl +3 -0
checkpoint/pytorch_model.bin +3 -0
configs/inference.yaml +65 -0
configs/model/model.yaml +8 -0
configs/model/t5_base.yaml +15 -0
configs/model/t5_small.yaml +10 -0
configs/model/t5_small_v4.yaml +7 -0
configs/model/t5_small_v9.yaml +9 -0
configs/model/whisper_base.yaml +6 -0
inference.py +117 -0
osuT5/__init__.py +0 -0
osuT5/__pycache__/__init__.cpython-311.pyc +0 -0
osuT5/__pycache__/__init__.cpython-39.pyc +0 -0
osuT5/dataset/__init__.py +1 -0
osuT5/dataset/__pycache__/__init__.cpython-311.pyc +0 -0
osuT5/dataset/__pycache__/__init__.cpython-39.pyc +0 -0
osuT5/dataset/__pycache__/data_utils.cpython-311.pyc +0 -0
osuT5/dataset/__pycache__/data_utils.cpython-39.pyc +0 -0
osuT5/dataset/__pycache__/ors_dataset.cpython-311.pyc +0 -0
osuT5/dataset/__pycache__/ors_dataset.cpython-39.pyc +0 -0
osuT5/dataset/__pycache__/osu_parser.cpython-311.pyc +0 -0
osuT5/dataset/__pycache__/osu_parser.cpython-39.pyc +0 -0
osuT5/dataset/data_utils.py +100 -0
osuT5/dataset/osu_parser.py +184 -0
osuT5/inference/__init__.py +4 -0
osuT5/inference/__pycache__/__init__.cpython-311.pyc +0 -0
osuT5/inference/__pycache__/__init__.cpython-39.pyc +0 -0
osuT5/inference/__pycache__/diffusion_pipeline.cpython-311.pyc +0 -0
osuT5/inference/__pycache__/path_approximator.cpython-311.pyc +0 -0
osuT5/inference/__pycache__/path_approximator.cpython-39.pyc +0 -0
osuT5/inference/__pycache__/pipeline.cpython-311.pyc +0 -0
osuT5/inference/__pycache__/pipeline.cpython-39.pyc +0 -0
osuT5/inference/__pycache__/postprocessor.cpython-311.pyc +0 -0
osuT5/inference/__pycache__/postprocessor.cpython-39.pyc +0 -0
osuT5/inference/__pycache__/preprocessor.cpython-311.pyc +0 -0
osuT5/inference/__pycache__/preprocessor.cpython-39.pyc +0 -0
osuT5/inference/__pycache__/slider_path.cpython-311.pyc +0 -0
osuT5/inference/__pycache__/slider_path.cpython-39.pyc +0 -0
osuT5/inference/diffusion_pipeline.py +214 -0
osuT5/inference/path_approximator.py +253 -0
osuT5/inference/pipeline.py +338 -0
osuT5/inference/postprocessor.py +322 -0
osuT5/inference/preprocessor.py +58 -0
osuT5/inference/slider_path.py +230 -0
osuT5/inference/template.osu +54 -0
osuT5/model/__init__.py +1 -0
osuT5/model/__pycache__/__init__.cpython-311.pyc +0 -0
osuT5/model/__pycache__/__init__.cpython-39.pyc +0 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,36 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+osuT5/inference/vale.mp3 filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,3 @@

+---
+license: mit
+---

checkpoint/custom_checkpoint_0.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0494fdd396142b4a2919c0ab913502c9335746959156a08e82c2647235e07853
+size 564880

checkpoint/pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a12b6c312590efbdf5d7acaff6d8537e8ad1728737eebb43d0a43d5a4b3b5a3a
+size 377860126

configs/inference.yaml ADDED Viewed

	@@ -0,0 +1,65 @@

+model:
+  name: 'google/t5-v1_1-small'
+  spectrogram:
+    sample_rate: 16000
+    hop_length: 128
+    n_fft: 1024
+    n_mels: 388
+  do_style_embed: false
+  input_features: false
+model_path: './checkpoint'
+audio_path: ''            # Path to input audio
+total_duration_ms: 0    # Total duration of audio in milliseconds, 0 for full audio
+output_path: ''         # Path to output directory
+bpm: 120         # Beats per minute of input audio
+offset: 0            # Start of beat, in miliseconds, from the beginning of input audio
+resnap_objects: false    # Resnap objects beat timing ticks, requires accurate BPM and offset
+slider_multiplier: 1.7  # Multiplier for slider velocity
+title: ''               # Song title
+artist: ''              # Song artist
+beatmap_path: ''        # Path to .osu file which will be remapped
+other_beatmap_path: ''  # Path to .osu file of other beatmap in the mapset to use as reference
+beatmap_id: -1          # Beatmap ID to use as style
+difficulty: -1  # Difficulty star rating to map
+creator: ''        # Beatmap creator
+version: ''        # Beatmap version
+full_set: true         # Generate full mapset
+set_difficulties: 5   # Number of difficulties to generate.
+# Diffusion settings
+generate_positions: true      # Use diffusion to generate object positions
+diff_ckpt: './osudiffusion/DiT-B-0700000.pt'                  # Path to checkpoint for diffusion model
+diff_refine_ckpt: ''           # Path to checkpoint for refining diffusion model
+diffusion:
+  style_id: 1451282                # Style ID to use for diffusion
+  num_sampling_steps: 100    # Number of sampling steps
+  cfg_scale: 1           # Scale of classifier-free guidance
+  num_classes: 52670          # Number of classes stored in the model
+  beatmap_idx: 'osudiffusion/beatmap_idx.pickle'  # Path to beatmap index
+  use_amp: true                      # Use automatic mixed precision
+  refine_iters: 10                  # Number of refinement iterations
+  seq_len: 128                      # Sequence length
+  model: 'DiT-B'                    # Model architecture
+data:                  # Data settings
+  src_seq_len: 640
+  tgt_seq_len: 480
+  sample_rate: ${model.spectrogram.sample_rate}
+  hop_length: ${model.spectrogram.hop_length}
+  sequence_stride: 1    # Fraction of audio sequence length to shift inference window
+  center_pad_decoder: false            # Center pad decoder input
+  add_pre_tokens: true
+  special_token_len: 2
+  diff_token_index: 0
+  style_token_index: -1
+  max_pre_token_len: 4
+  add_gd_context: false  # Prefix the decoder with tokens of another beatmap in the mapset
+hydra:
+  job:
+    chdir: False
+  run:
+    dir: ./logs/${now:%Y-%m-%d}/${now:%H-%M-%S}

configs/model/model.yaml ADDED Viewed

	@@ -0,0 +1,8 @@

+input_features: false
+do_style_embed: true
+spectrogram:
+  sample_rate: 16000
+  hop_length: 128
+  n_fft: 1024
+  n_mels: 388

configs/model/t5_base.yaml ADDED Viewed

	@@ -0,0 +1,15 @@

+defaults:
+  - model
+  - _self_
+name: 'google/t5-v1_1-base'
+overwrite:
+  dropout_rate: 0.0
+spectrogram:
+  sample_rate: 16000
+  hop_length: 128
+  n_fft: 1024
+  n_mels: 388
+do_style_embed: false
+input_features: false

configs/model/t5_small.yaml ADDED Viewed

	@@ -0,0 +1,10 @@

+defaults:
+  - model
+  - _self_
+name: 'google/t5-v1_1-small'
+overwrite:
+  dropout_rate: 0.0
+spectrogram:
+  n_mels: 512

configs/model/t5_small_v4.yaml ADDED Viewed

	@@ -0,0 +1,7 @@

+defaults:
+  - model
+  - _self_
+name: 'google/t5-v1_1-small'
+overwrite:
+  dropout_rate: 0.0

configs/model/t5_small_v9.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+defaults:
+  - model
+  - _self_
+do_style_embed: false
+name: 'google/t5-v1_1-small'
+overwrite:
+  dropout_rate: 0.0

configs/model/whisper_base.yaml ADDED Viewed

	@@ -0,0 +1,6 @@

+defaults:
+  - model
+  - _self_
+name: 'openai/whisper-base'
+input_features: true

inference.py ADDED Viewed

	@@ -0,0 +1,117 @@

+from pathlib import Path
+import hydra
+import torch
+from omegaconf import DictConfig
+from slider import Beatmap
+from osudiffusion import DiT_models
+from osuT5.inference import Preprocessor, Pipeline, Postprocessor, DiffisionPipeline
+from osuT5.tokenizer import Tokenizer
+from osuT5.utils import get_model
+def get_args_from_beatmap(args: DictConfig):
+    if args.beatmap_path is None or args.beatmap_path == "":
+        return
+    beatmap_path = Path(args.beatmap_path)
+    if not beatmap_path.is_file():
+        raise FileNotFoundError(f"Beatmap file {beatmap_path} not found.")
+    beatmap = Beatmap.from_path(beatmap_path)
+    args.audio_path = beatmap_path.parent / beatmap.audio_filename
+    args.output_path = beatmap_path.parent
+    args.bpm = beatmap.bpm_max()
+    args.offset = min(tp.offset.total_seconds() * 1000 for tp in beatmap.timing_points)
+    args.slider_multiplier = beatmap.slider_multiplier
+    args.title = beatmap.title
+    args.artist = beatmap.artist
+    args.beatmap_id = beatmap.beatmap_id if args.beatmap_id == -1 else args.beatmap_id
+    args.diffusion.style_id = beatmap.beatmap_id if args.diffusion.style_id == -1 else args.diffusion.style_id
+    args.difficulty = float(beatmap.stars()) if args.difficulty == -1 else args.difficulty
+def find_model(ckpt_path, args: DictConfig, device):
+    assert Path(ckpt_path).exists(), f"Could not find DiT checkpoint at {ckpt_path}"
+    checkpoint = torch.load(ckpt_path, map_location=lambda storage, loc: storage)
+    if "ema" in checkpoint:  # supports checkpoints from train.py
+        checkpoint = checkpoint["ema"]
+    model = DiT_models[args.diffusion.model](
+        num_classes=args.diffusion.num_classes,
+        context_size=19 - 3 + 128,
+    ).to(device)
+    model.load_state_dict(checkpoint)
+    model.eval()  # important!
+    return model
+@hydra.main(config_path="configs", config_name="inference", version_base="1.1")
+def main(args: DictConfig):
+    get_args_from_beatmap(args)
+    torch.set_grad_enabled(False)
+    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    ckpt_path = Path(args.model_path)
+    model_state = torch.load(ckpt_path / "pytorch_model.bin", map_location=device)
+    tokenizer_state = torch.load(ckpt_path / "custom_checkpoint_0.pkl")
+    tokenizer = Tokenizer()
+    tokenizer.load_state_dict(tokenizer_state)
+    model = get_model(args, tokenizer)
+    model.load_state_dict(model_state)
+    model.eval()
+    model.to(device)
+    preprocessor = Preprocessor(args)
+    audio = preprocessor.load(args.audio_path)
+    sequences = preprocessor.segment(audio)
+    total_duration_ms = len(audio) / 16000 * 1000
+    args.total_duration_ms = total_duration_ms
+    generated_maps = []
+    generated_positions = []
+    diffs = []
+    if args.full_set:
+        for i in range(args.set_difficulties):
+            diffs.append(3 + i * (7 - 3) / (args.set_difficulties - 1))
+        print(diffs)
+        for diff in diffs:
+            print(f"Generating difficulty {diff}")
+            args.difficulty = diff
+            pipeline = Pipeline(args, tokenizer)
+            events = pipeline.generate(model, sequences)
+            generated_maps.append(events)
+    else:
+        pipeline = Pipeline(args, tokenizer)
+        events = pipeline.generate(model, sequences)
+        generated_maps.append(events)
+    if args.generate_positions:
+        model = find_model(args.diff_ckpt, args, device)
+        refine_model = find_model(args.diff_refine_ckpt, args, device) if len(args.diff_refine_ckpt) > 0 else None
+        diffusion_pipeline = DiffisionPipeline(args.diffusion)
+        for events in generated_maps:
+            events = diffusion_pipeline.generate(model, events, refine_model)
+            generated_positions.append(events)
+    else:
+        generated_positions = generated_maps
+    postprocessor = Postprocessor(args)
+    postprocessor.generate(generated_positions)
+if __name__ == "__main__":
+    main()

osuT5/__init__.py ADDED Viewed

File without changes

osuT5/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (149 Bytes). View file

osuT5/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (131 Bytes). View file

osuT5/dataset/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .osu_parser import OsuParser

osuT5/dataset/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (277 Bytes). View file

osuT5/dataset/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (229 Bytes). View file

osuT5/dataset/__pycache__/data_utils.cpython-311.pyc ADDED Viewed

Binary file (3.77 kB). View file

osuT5/dataset/__pycache__/data_utils.cpython-39.pyc ADDED Viewed

Binary file (2.11 kB). View file

osuT5/dataset/__pycache__/ors_dataset.cpython-311.pyc ADDED Viewed

Binary file (30 kB). View file

osuT5/dataset/__pycache__/ors_dataset.cpython-39.pyc ADDED Viewed

Binary file (16.1 kB). View file

osuT5/dataset/__pycache__/osu_parser.cpython-311.pyc ADDED Viewed

Binary file (12.3 kB). View file

osuT5/dataset/__pycache__/osu_parser.cpython-39.pyc ADDED Viewed

Binary file (6.51 kB). View file

osuT5/dataset/data_utils.py ADDED Viewed

	@@ -0,0 +1,100 @@

+from pathlib import Path
+from typing import Optional
+import numpy as np
+from pydub import AudioSegment
+import numpy.typing as npt
+from osuT5.tokenizer import Event, EventType
+MILISECONDS_PER_SECOND = 1000
+def load_audio_file(file: Path, sample_rate: int) -> npt.NDArray:
+    """Load an audio file as a numpy time-series array
+    The signals are resampled, converted to mono channel, and normalized.
+    Args:
+        file: Path to audio file.
+        sample_rate: Sample rate to resample the audio.
+    Returns:
+        samples: Audio time series.
+    """
+    print(file)
+    audio = AudioSegment.from_file(file, format="mp3")
+    audio = audio.set_frame_rate(sample_rate)
+    audio = audio.set_channels(1)
+    samples = np.array(audio.get_array_of_samples()).astype(np.float32)
+    samples *= 1.0 / np.max(np.abs(samples))
+    return samples
+def update_event_times(events: list[Event], event_times: list[float], end_time: Optional[float] = None):
+    non_timed_events = [
+        EventType.BEZIER_ANCHOR,
+        EventType.PERFECT_ANCHOR,
+        EventType.CATMULL_ANCHOR,
+        EventType.RED_ANCHOR,
+    ]
+    timed_events = [
+        EventType.CIRCLE,
+        EventType.SPINNER,
+        EventType.SPINNER_END,
+        EventType.SLIDER_HEAD,
+        EventType.LAST_ANCHOR,
+        EventType.SLIDER_END,
+    ]
+    start_index = len(event_times)
+    end_index = len(events)
+    ct = 0 if len(event_times) == 0 else event_times[-1]
+    for i in range(start_index, end_index):
+        event = events[i]
+        if event.type == EventType.TIME_SHIFT:
+            ct = event.value
+        event_times.append(ct)
+    # Interpolate time for control point events
+    # T-D-Start-D-CP-D-CP-T-D-LCP-T-D-End
+    # 1-1-1-----1-1--1-1--7-7--7--9-9-9--
+    # 1-1-1-----3-3--5-5--7-7--7--9-9-9--
+    ct = end_time if end_time is not None else event_times[-1]
+    interpolate = False
+    for i in range(end_index - 1, start_index - 1, -1):
+        event = events[i]
+        if event.type in timed_events:
+            interpolate = False
+        if event.type in non_timed_events:
+            interpolate = True
+        if not interpolate:
+            ct = event_times[i]
+            continue
+        if event.type not in non_timed_events:
+            event_times[i] = ct
+            continue
+        # Find the time of the first timed event and the number of control points between
+        j = i
+        count = 0
+        t = ct
+        while j >= 0:
+            event2 = events[j]
+            if event2.type == EventType.TIME_SHIFT:
+                t = event_times[j]
+                break
+            if event2.type in non_timed_events:
+                count += 1
+            j -= 1
+        if i < 0:
+            t = 0
+        # Interpolate the time
+        ct = (ct - t) / (count + 1) * count + t
+        event_times[i] = ct

osuT5/dataset/osu_parser.py ADDED Viewed

	@@ -0,0 +1,184 @@

+from __future__ import annotations
+from datetime import timedelta
+import numpy as np
+import numpy.typing as npt
+from slider import Beatmap, Circle, Slider, Spinner
+from slider.curve import Linear, Catmull, Perfect, MultiBezier
+from osuT5.tokenizer import Event, EventType, Tokenizer
+class OsuParser:
+    def __init__(self, tokenizer: Tokenizer) -> None:
+        dist_range = tokenizer.event_range[EventType.DISTANCE]
+        self.dist_min = dist_range.min_value
+        self.dist_max = dist_range.max_value
+    def parse(self, beatmap: Beatmap) -> list[Event]:
+        # noinspection PyUnresolvedReferences
+        """Parse an .osu beatmap.
+        Each hit object is parsed into a list of Event objects, in order of its
+        appearance in the beatmap. In other words, in ascending order of time.
+        Args:
+            beatmap: Beatmap object parsed from an .osu file.
+        Returns:
+            events: List of Event object lists.
+        Example::
+            >>> beatmap = [
+                "64,80,11000,1,0",
+                "100,100,16000,2,0,B|200:200|250:200|250:200|300:150,2"
+            ]
+            >>> events = parse(beatmap)
+            >>> print(events)
+            [
+                Event(EventType.TIME_SHIFT, 11000), Event(EventType.DISTANCE, 36), Event(EventType.CIRCLE),
+                Event(EventType.TIME_SHIFT, 16000), Event(EventType.DISTANCE, 42), Event(EventType.SLIDER_HEAD),
+                Event(EventType.TIME_SHIFT, 16500), Event(EventType.DISTANCE, 141), Event(EventType.BEZIER_ANCHOR),
+                Event(EventType.TIME_SHIFT, 17000), Event(EventType.DISTANCE, 50), Event(EventType.BEZIER_ANCHOR),
+                Event(EventType.TIME_SHIFT, 17500), Event(EventType.DISTANCE, 10), Event(EventType.BEZIER_ANCHOR),
+                Event(EventType.TIME_SHIFT, 18000), Event(EventType.DISTANCE, 64), Event(EventType.LAST _ANCHOR),
+                Event(EventType.TIME_SHIFT, 20000), Event(EventType.DISTANCE, 11), Event(EventType.SLIDER_END)
+            ]
+        """
+        hit_objects = beatmap.hit_objects(stacking=False)
+        last_pos = np.array((256, 192))
+        events = []
+        for hit_object in hit_objects:
+            if isinstance(hit_object, Circle):
+                last_pos = self._parse_circle(hit_object, events, last_pos)
+            elif isinstance(hit_object, Slider):
+                last_pos = self._parse_slider(hit_object, events, last_pos)
+            elif isinstance(hit_object, Spinner):
+                last_pos = self._parse_spinner(hit_object, events)
+        return events
+    def _clip_dist(self, dist: int) -> int:
+        """Clip distance to valid range."""
+        return int(np.clip(dist, self.dist_min, self.dist_max))
+    def _parse_circle(self, circle: Circle, events: list[Event], last_pos: npt.NDArray) -> npt.NDArray:
+        """Parse a circle hit object.
+        Args:
+            circle: Circle object.
+            events: List of events to add to.
+            last_pos: Last position of the hit objects.
+        Returns:
+            pos: Position of the circle.
+        """
+        time = int(circle.time.total_seconds() * 1000)
+        pos = np.array(circle.position)
+        dist = self._clip_dist(np.linalg.norm(pos - last_pos))
+        events.append(Event(EventType.TIME_SHIFT, time))
+        events.append(Event(EventType.DISTANCE, dist))
+        if circle.new_combo:
+            events.append(Event(EventType.NEW_COMBO))
+        events.append(Event(EventType.CIRCLE))
+        return pos
+    def _parse_slider(self, slider: Slider, events: list[Event], last_pos: npt.NDArray) -> npt.NDArray:
+        """Parse a slider hit object.
+        Args:
+            slider: Slider object.
+            events: List of events to add to.
+            last_pos: Last position of the hit objects.
+        Returns:
+            pos: Last position of the slider.
+        """
+        # Ignore sliders which are too big
+        if len(slider.curve.points) >= 100:
+            return last_pos
+        time = int(slider.time.total_seconds() * 1000)
+        pos = np.array(slider.position)
+        dist = self._clip_dist(np.linalg.norm(pos - last_pos))
+        last_pos = pos
+        events.append(Event(EventType.TIME_SHIFT, time))
+        events.append(Event(EventType.DISTANCE, dist))
+        if slider.new_combo:
+            events.append(Event(EventType.NEW_COMBO))
+        events.append(Event(EventType.SLIDER_HEAD))
+        duration: timedelta = (slider.end_time - slider.time) / slider.repeat
+        control_point_count = len(slider.curve.points)
+        def append_control_points(event_type: EventType, last_pos: npt.NDArray = last_pos) -> npt.NDArray:
+            for i in range(1, control_point_count - 1):
+                last_pos = add_anchor_time_dist(i, last_pos)
+                events.append(Event(event_type))
+            return last_pos
+        def add_anchor_time_dist(i: int, last_pos: npt.NDArray) -> npt.NDArray:
+            time = int((slider.time + i / (control_point_count - 1) * duration).total_seconds() * 1000)
+            pos = np.array(slider.curve.points[i])
+            dist = self._clip_dist(np.linalg.norm(pos - last_pos))
+            last_pos = pos
+            events.append(Event(EventType.TIME_SHIFT, time))
+            events.append(Event(EventType.DISTANCE, dist))
+            return last_pos
+        if isinstance(slider.curve, Linear):
+            last_pos = append_control_points(EventType.RED_ANCHOR, last_pos)
+        elif isinstance(slider.curve, Catmull):
+            last_pos = append_control_points(EventType.CATMULL_ANCHOR, last_pos)
+        elif isinstance(slider.curve, Perfect):
+            last_pos = append_control_points(EventType.PERFECT_ANCHOR, last_pos)
+        elif isinstance(slider.curve, MultiBezier):
+            for i in range(1, control_point_count - 1):
+                if slider.curve.points[i] == slider.curve.points[i + 1]:
+                    last_pos = add_anchor_time_dist(i, last_pos)
+                    events.append(Event(EventType.RED_ANCHOR))
+                elif slider.curve.points[i] != slider.curve.points[i - 1]:
+                    last_pos = add_anchor_time_dist(i, last_pos)
+                    events.append(Event(EventType.BEZIER_ANCHOR))
+        last_pos = add_anchor_time_dist(control_point_count - 1, last_pos)
+        events.append(Event(EventType.LAST_ANCHOR))
+        time = int(slider.end_time.total_seconds() * 1000)
+        pos = np.array(slider.curve(1))
+        dist = self._clip_dist(np.linalg.norm(pos - last_pos))
+        last_pos = pos
+        events.append(Event(EventType.TIME_SHIFT, time))
+        events.append(Event(EventType.DISTANCE, dist))
+        events.append(Event(EventType.SLIDER_END))
+        return last_pos
+    def _parse_spinner(self, spinner: Spinner, events: list[Event]) -> npt.NDArray:
+        """Parse a spinner hit object.
+        Args:
+            spinner: Spinner object.
+            events: List of events to add to.
+        Returns:
+            pos: Last position of the spinner.
+        """
+        time = int(spinner.time.total_seconds() * 1000)
+        events.append(Event(EventType.TIME_SHIFT, time))
+        events.append(Event(EventType.SPINNER))
+        time = int(spinner.end_time.total_seconds() * 1000)
+        events.append(Event(EventType.TIME_SHIFT, time))
+        events.append(Event(EventType.SPINNER_END))
+        return np.array((256, 192))

osuT5/inference/__init__.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from .pipeline import *
+from .preprocessor import *
+from .postprocessor import *
+from .diffusion_pipeline import *

osuT5/inference/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (301 Bytes). View file

osuT5/inference/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (245 Bytes). View file

osuT5/inference/__pycache__/diffusion_pipeline.cpython-311.pyc ADDED Viewed

Binary file (11.5 kB). View file

osuT5/inference/__pycache__/path_approximator.cpython-311.pyc ADDED Viewed

Binary file (11.5 kB). View file

osuT5/inference/__pycache__/path_approximator.cpython-39.pyc ADDED Viewed

Binary file (5.02 kB). View file

osuT5/inference/__pycache__/pipeline.cpython-311.pyc ADDED Viewed

Binary file (23.4 kB). View file

osuT5/inference/__pycache__/pipeline.cpython-39.pyc ADDED Viewed

Binary file (11.8 kB). View file

osuT5/inference/__pycache__/postprocessor.cpython-311.pyc ADDED Viewed

Binary file (17 kB). View file

osuT5/inference/__pycache__/postprocessor.cpython-39.pyc ADDED Viewed

Binary file (8.08 kB). View file

osuT5/inference/__pycache__/preprocessor.cpython-311.pyc ADDED Viewed

Binary file (3.37 kB). View file

osuT5/inference/__pycache__/preprocessor.cpython-39.pyc ADDED Viewed

Binary file (2.23 kB). View file

osuT5/inference/__pycache__/slider_path.cpython-311.pyc ADDED Viewed

Binary file (10.9 kB). View file

osuT5/inference/__pycache__/slider_path.cpython-39.pyc ADDED Viewed

Binary file (5.16 kB). View file

osuT5/inference/diffusion_pipeline.py ADDED Viewed

	@@ -0,0 +1,214 @@

+import pickle
+from pathlib import Path
+import torch
+from omegaconf import DictConfig
+from tqdm import tqdm
+from osudiffusion import timestep_embedding
+from osudiffusion import repeat_type
+from osudiffusion import create_diffusion
+from osudiffusion import DiT
+from osuT5.dataset.data_utils import update_event_times
+from osuT5.tokenizer import Event, EventType
+def get_beatmap_idx(path) -> dict[int, int]:
+    p = Path(path)
+    with p.open("rb") as f:
+        beatmap_idx = pickle.load(f)
+    return beatmap_idx
+class DiffisionPipeline(object):
+    def __init__(self, args: DictConfig):
+        """Model inference stage that generates positions for distance events."""
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.num_sampling_steps = args.num_sampling_steps
+        self.cfg_scale = args.cfg_scale
+        self.seq_len = args.seq_len
+        self.num_classes = args.num_classes
+        self.beatmap_idx = get_beatmap_idx(args.beatmap_idx)
+        self.style_id = args.style_id
+        self.refine_iters = args.refine_iters
+        self.use_amp = args.use_amp
+        if self.style_id in self.beatmap_idx:
+            self.class_label = self.beatmap_idx[self.style_id]
+        else:
+            print(f"Beatmap ID {self.style_id} not found in dataset, using default style.")
+            self.class_label = self.num_classes
+    def generate(self, model: DiT, events: list[Event], refine_model: DiT = None) -> list[Event]:
+        """Generate position events for distance events in the Event list.
+        Args:
+            model: Trained model to use for inference.
+            events: List of Event objects with distance events.
+            refine_model: Optional model to refine the generated positions.
+        Returns:
+            events: List of Event objects with position events.
+        """
+        seq_o, seq_c, seq_len, seq_indices = self.events_to_sequence(events)
+        seq_o = seq_o - seq_o[0]  # Normalize to relative time
+        print(f"seq len {seq_len}")
+        diffusion = create_diffusion(
+            str(self.num_sampling_steps),
+            noise_schedule="squaredcos_cap_v2",
+        )
+        # Create banded matrix attention mask for increased sequence length
+        attn_mask = torch.full((seq_len, seq_len), True, dtype=torch.bool, device=self.device)
+        for i in range(seq_len):
+            attn_mask[max(0, i - self.seq_len): min(seq_len, i + self.seq_len), i] = False
+        class_labels = [self.class_label]
+        # Create sampling noise:
+        n = len(class_labels)
+        z = torch.randn(n, 2, seq_len, device=self.device)
+        o = seq_o.repeat(n, 1).to(self.device)
+        c = seq_c.repeat(n, 1, 1).to(self.device)
+        y = torch.tensor(class_labels, device=self.device)
+        # Setup classifier-free guidance:
+        z = torch.cat([z, z], 0)
+        o = torch.cat([o, o], 0)
+        c = torch.cat([c, c], 0)
+        y_null = torch.tensor([self.num_classes] * n, device=self.device)
+        y = torch.cat([y, y_null], 0)
+        model_kwargs = dict(o=o, c=c, y=y, cfg_scale=self.cfg_scale, attn_mask=attn_mask)
+        def to_positions(samples):
+            samples, _ = samples.chunk(2, dim=0)  # Remove null class samples
+            samples *= torch.tensor((512, 384), device=self.device).repeat(n, 1).unsqueeze(2)
+            return samples.cpu()
+        # Sample images:
+        samples = diffusion.p_sample_loop(
+            model.forward_with_cfg,
+            z.shape,
+            z,
+            clip_denoised=True,
+            model_kwargs=model_kwargs,
+            progress=True,
+            device=self.device,
+        )
+        if refine_model is not None:
+            # Refine result with refine model
+            for _ in tqdm(range(self.refine_iters)):
+                t = torch.tensor([0] * samples.shape[0], device=self.device)
+                with torch.no_grad():
+                    out = diffusion.p_sample(
+                        model.forward_with_cfg,
+                        samples,
+                        t,
+                        clip_denoised=True,
+                        model_kwargs=model_kwargs,
+                    )
+                    samples = out["sample"]
+        positions = to_positions(samples)
+        return self.events_with_pos(events, positions.squeeze(0), seq_indices)
+    @staticmethod
+    def events_to_sequence(events: list[Event]) -> tuple[torch.Tensor, torch.Tensor, int, dict[int, int]]:
+        # Calculate the time of every event and interpolate time for control point events
+        event_times = []
+        update_event_times(events, event_times)
+        # Calculate the number of repeats for each slider end event
+        # Convert to vectorized form for osu-diffusion
+        nc_types = [EventType.CIRCLE, EventType.SLIDER_HEAD]
+        event_index = {
+            EventType.CIRCLE: 0,
+            EventType.SPINNER: 2,
+            EventType.SPINNER_END: 3,
+            EventType.SLIDER_HEAD: 4,
+            EventType.BEZIER_ANCHOR: 6,
+            EventType.PERFECT_ANCHOR: 7,
+            EventType.CATMULL_ANCHOR: 8,
+            EventType.RED_ANCHOR: 9,
+            EventType.LAST_ANCHOR: 10,
+            EventType.SLIDER_END: 11,
+        }
+        seq_indices = {}
+        indices = []
+        data_chunks = []
+        distance = 0
+        new_combo = False
+        head_time = 0
+        last_anchor_time = 0
+        for i, event in enumerate(events):
+            indices.append(i)
+            if event.type == EventType.DISTANCE:
+                distance = event.value
+            elif event.type == EventType.NEW_COMBO:
+                new_combo = True
+            elif event.type in event_index:
+                time = event_times[i]
+                index = event_index[event.type]
+                # Handle NC index offset
+                if event.type in nc_types and new_combo:
+                    index += 1
+                    new_combo = False
+                # Add slider end repeats index offset
+                if event.type == EventType.SLIDER_END:
+                    span_duration = last_anchor_time - head_time
+                    total_duration = time - head_time
+                    repeats = max(int(round(total_duration / span_duration)), 1) if span_duration > 0 else 1
+                    index += repeat_type(repeats)
+                elif event.type == EventType.SLIDER_HEAD:
+                    head_time = time
+                elif event.type == EventType.LAST_ANCHOR:
+                    last_anchor_time = time
+                features = torch.zeros(18)
+                features[0] = time
+                features[1] = distance
+                features[index + 2] = 1
+                data_chunks.append(features)
+                for j in indices:
+                    seq_indices[j] = len(data_chunks) - 1
+                indices = []
+        seq = torch.stack(data_chunks, 0)
+        seq = torch.swapaxes(seq, 0, 1)
+        seq_o = seq[0, :]
+        seq_d = seq[1, :]
+        seq_c = torch.concatenate(
+            [
+                timestep_embedding(seq_d, 128).T,
+                seq[2:, :],
+            ],
+            0,
+        )
+        return seq_o, seq_c, seq.shape[1], seq_indices
+    @staticmethod
+    def events_with_pos(events: list[Event], sampled_seq: torch.Tensor, seq_indices: dict[int, int]) -> list[Event]:
+        new_events = []
+        for i, event in enumerate(events):
+            if event.type == EventType.DISTANCE:
+                try:
+                    index = seq_indices[i]
+                    pos_x = sampled_seq[0, index].item()
+                    pos_y = sampled_seq[1, index].item()
+                    new_events.append(Event(EventType.POS_X, int(round(pos_x))))
+                    new_events.append(Event(EventType.POS_Y, int(round(pos_y))))
+                except KeyError:
+                    print(f"Warning: Key {i} not found in seq_indices. Skipping event.")
+            else:
+                new_events.append(event)
+        return new_events

osuT5/inference/path_approximator.py ADDED Viewed

	@@ -0,0 +1,253 @@

+import numpy as np
+BEZIER_TOLERANCE = 0.25
+CATMULL_DETAIL = 50
+CIRCULAR_ARC_TOLERANCE = 0.1
+length_squared = lambda x: np.inner(x, x)
+def approximate_bezier(control_points: np.ndarray) -> np.ndarray:
+    return approximate_b_spline(control_points)
+def approximate_b_spline(control_points: np.ndarray, p: int = 0) -> np.ndarray:
+    output = []
+    n = len(control_points) - 1
+    if n < 0:
+        return output
+    to_flatten = []
+    free_buffers = []
+    points = control_points.copy()
+    if 0 < p < n:
+        for i in range(n - p):
+            sub_bezier = np.empty((p + 1, 2))
+            sub_bezier[0] = points[i]
+            for j in range(p - 1):
+                sub_bezier[j + 1] = points[i + 1]
+                for k in range(1, p - j):
+                    l = np.min((k, n - p - i))
+                    points[i + k] = (l * points[i + k] + points[i + k + 1]) / (l + 1)
+            sub_bezier[p] = points[i + 1]
+            to_flatten.append(sub_bezier)
+        to_flatten.append(points[(n - p) :])
+        to_flatten.reverse()
+    else:
+        p = n
+        to_flatten.append(points)
+    subdivision_buffer1 = np.empty([p + 1, 2])
+    subdivision_buffer2 = np.empty([p * 2 + 1, 2])
+    left_child = subdivision_buffer2
+    while len(to_flatten) > 0:
+        parent = to_flatten.pop()
+        if bezier_is_flat_enough(parent):
+            bezier_approximate(
+                parent,
+                output,
+                subdivision_buffer1,
+                subdivision_buffer2,
+                p + 1,
+            )
+            free_buffers.append(parent)
+            continue
+        right_child = (
+            free_buffers.pop() if len(free_buffers) > 0 else np.empty([p + 1, 2])
+        )
+        bezier_subdivide(parent, left_child, right_child, subdivision_buffer1, p + 1)
+        for i in range(p + 1):
+            parent[i] = left_child[i]
+        to_flatten.append(right_child)
+        to_flatten.append(parent)
+    output.append(control_points[n].copy())
+    return np.vstack(output)
+def approximate_catmull(control_points: np.ndarray) -> list[np.ndarray]:
+    result = []
+    for i in range(len(control_points) - 1):
+        v1 = control_points[i - 1] if i > 0 else control_points[i]
+        v2 = control_points[i]
+        v3 = control_points[i + 1] if i < len(control_points) - 1 else v2 + v2 - v1
+        v4 = control_points[i + 2] if i < len(control_points) - 2 else v3 + v3 - v2
+        for c in range(CATMULL_DETAIL):
+            result.append(catmull_find_point(v1, v2, v3, v4, c / CATMULL_DETAIL))
+            result.append(catmull_find_point(v1, v2, v3, v4, (c + 1) / CATMULL_DETAIL))
+    return result
+def approximate_circular_arc(control_points: np.ndarray) -> list[np.ndarray]:
+    a = control_points[0]
+    b = control_points[1]
+    c = control_points[2]
+    aSq = length_squared(b - c)
+    bSq = length_squared(a - c)
+    cSq = length_squared(a - b)
+    if np.isclose(aSq, 0) or np.isclose(bSq, 0) or np.isclose(cSq, 0):
+        return []
+    s = aSq * (bSq + cSq - aSq)
+    t = bSq * (aSq + cSq - bSq)
+    u = cSq * (aSq + bSq - cSq)
+    sum = s + t + u
+    if np.isclose(sum, 0):
+        return []
+    centre = (s * a + t * b + u * c) / sum
+    dA = a - centre
+    dC = c - centre
+    r = np.linalg.norm(dA)
+    theta_start = np.arctan2(dA[1], dA[0])
+    theta_end = np.arctan2(dC[1], dC[0])
+    while theta_end < theta_start:
+        theta_end += 2 * np.pi
+    direction = 1
+    theta_range = theta_range = theta_end - theta_start
+    ortho_ato_c = c - a
+    ortho_ato_c = np.array([ortho_ato_c[1], -ortho_ato_c[0]])
+    if np.dot(ortho_ato_c, b - a) < 0:
+        direction = -direction
+        theta_range = 2 * np.pi - theta_range
+    amount_points = (
+        2
+        if 2 * r <= CIRCULAR_ARC_TOLERANCE
+        else int(
+            max(
+                2,
+                np.ceil(theta_range / (2 * np.arccos(1 - CIRCULAR_ARC_TOLERANCE / r))),
+            ),
+        )
+    )
+    output = []
+    for i in range(amount_points):
+        fract = i / (amount_points - 1)
+        theta = theta_start + direction * fract * theta_range
+        o = np.array([np.cos(theta), np.sin(theta)]) * r
+        output.append(centre + o)
+    return output
+def approximate_linear(control_points: np.ndarray) -> list[np.ndarray]:
+    result = []
+    for c in control_points:
+        result.append(c.copy())
+    return result
+def bezier_is_flat_enough(control_points: np.ndarray) -> bool:
+    for i in range(1, len(control_points) - 1):
+        p = control_points[i - 1] - 2 * control_points[i] + control_points[i + 1]
+        if length_squared(p) > BEZIER_TOLERANCE * BEZIER_TOLERANCE * 4:
+            return False
+    return True
+def bezier_subdivide(
+    control_points: np.ndarray,
+    left: np.ndarray,
+    right: np.ndarray,
+    subdivision_buffer: np.ndarray,
+    count: int,
+) -> None:
+    midpoints = subdivision_buffer
+    for i in range(count):
+        midpoints[i] = control_points[i]
+    for i in range(count):
+        left[i] = midpoints[0].copy()
+        right[count - i - 1] = midpoints[count - i - 1]
+        for j in range(count - i - 1):
+            midpoints[j] = (midpoints[j] + midpoints[j + 1]) / 2
+def bezier_approximate(
+    control_points: np.ndarray,
+    output: list[np.ndarray],
+    subdivision_buffer1: np.ndarray,
+    subdivision_buffer2: np.ndarray,
+    count: int,
+) -> None:
+    left = subdivision_buffer2
+    right = subdivision_buffer1
+    bezier_subdivide(control_points, left, right, subdivision_buffer1, count)
+    for i in range(count - 1):
+        left[count + i] = right[i + 1]
+    output.append(control_points[0].copy())
+    for i in range(1, count - 1):
+        index = 2 * i
+        p = 0.25 * (left[index - 1] + 2 * left[index] + left[index + 1])
+        output.append(p.copy())
+def catmull_find_point(
+    vec1: np.ndarray,
+    vec2: np.ndarray,
+    vec3: np.ndarray,
+    vec4: np.ndarray,
+    t: float,
+) -> np.ndarray:
+    t2 = t * t
+    t3 = t * t2
+    result = np.array(
+        [
+            0.5
+            * (
+                2 * vec2[0]
+                + (-vec1[0] + vec3[0]) * t
+                + (2 * vec1[0] - 5 * vec2[0] + 4 * vec3[0] - vec4[0]) * t2
+                + (-vec1[0] + 3 * vec2[0] - 3 * vec3[0] + vec4[0]) * t3
+            ),
+            0.5
+            * (
+                2 * vec2[1]
+                + (-vec1[1] + vec3[1]) * t
+                + (2 * vec1[1] - 5 * vec2[1] + 4 * vec3[1] - vec4[1]) * t2
+                + (-vec1[1] + 3 * vec2[1] - 3 * vec3[1] + vec4[1]) * t3
+            ),
+        ],
+    )
+    return result

osuT5/inference/pipeline.py ADDED Viewed

	@@ -0,0 +1,338 @@

+from __future__ import annotations
+from pathlib import Path
+import torch
+import torch.nn.functional as F
+from slider import Beatmap
+from tqdm import tqdm
+from omegaconf import DictConfig
+from osuT5.dataset import OsuParser
+from osuT5.dataset.data_utils import update_event_times
+from osuT5.tokenizer import Event, EventType, Tokenizer
+from osuT5.model import OsuT
+MILISECONDS_PER_SECOND = 1000
+MILISECONDS_PER_STEP = 10
+def top_k_sampling(logits, k):
+    top_k_logits, top_k_indices = torch.topk(logits, k)
+    top_k_probs = F.softmax(top_k_logits, dim=-1)
+    sampled_index = torch.multinomial(top_k_probs, 1)
+    sampled_token = top_k_indices.gather(-1, sampled_index)
+    return sampled_token
+def preprocess_event(event, frame_time):
+    if event.type == EventType.TIME_SHIFT:
+        event = Event(type=event.type, value=int((event.value - frame_time) / MILISECONDS_PER_STEP))
+    return event
+class Pipeline(object):
+    def __init__(self, args: DictConfig, tokenizer: Tokenizer):
+        """Model inference stage that processes sequences."""
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.tokenizer = tokenizer
+        self.tgt_seq_len = args.data.tgt_seq_len
+        self.frame_seq_len = args.data.src_seq_len - 1
+        self.frame_size = args.model.spectrogram.hop_length
+        self.sample_rate = args.model.spectrogram.sample_rate
+        self.samples_per_sequence = self.frame_seq_len * self.frame_size
+        self.sequence_stride = int(self.samples_per_sequence * args.data.sequence_stride)
+        self.miliseconds_per_sequence = self.samples_per_sequence * MILISECONDS_PER_SECOND / self.sample_rate
+        self.miliseconds_per_stride = self.sequence_stride * MILISECONDS_PER_SECOND / self.sample_rate
+        self.beatmap_id = args.beatmap_id
+        self.difficulty = args.difficulty
+        self.center_pad_decoder = args.data.center_pad_decoder
+        self.special_token_len = args.data.special_token_len
+        self.diff_token_index = args.data.diff_token_index
+        self.style_token_index = args.data.style_token_index
+        self.max_pre_token_len = args.data.max_pre_token_len
+        self.add_pre_tokens = args.data.add_pre_tokens
+        self.add_gd_context = args.data.add_gd_context
+        self.bpm = args.bpm
+        self.offset = args.offset
+        self.total_duration_ms = args.total_duration_ms
+        print(f"Configuration: {args}")
+        if self.add_gd_context:
+            other_beatmap_path = Path(args.other_beatmap_path)
+            if not other_beatmap_path.is_file():
+                raise FileNotFoundError(f"Beatmap file {other_beatmap_path} not found.")
+            other_beatmap = Beatmap.from_path(other_beatmap_path)
+            self.other_beatmap_id = other_beatmap.beatmap_id
+            self.other_difficulty = float(other_beatmap.stars())
+            parser = OsuParser(tokenizer)
+            self.other_events = parser.parse(other_beatmap)
+            self.other_events, self.other_event_times = self._prepare_events(self.other_events)
+    def _calculate_time_shifts(self, bpm: float, duration_ms: float, tick_rate: int, offset: float = 0) -> list[float]:
+        """Calculate EventType.TIME_SHIFT events based on song's BPM and tick rate."""
+        events = []
+        ms_per_beat = 60000 / bpm  # 60000 ms per minute
+        ms_per_tick = ms_per_beat / tick_rate
+        num_ticks = int(duration_ms // ms_per_tick)
+        for i in range(num_ticks):
+            events.append(float(int(i * ms_per_tick + offset)) )
+        return events
+    def generate_events(self, model, frames, tokens, encoder_outputs, beatmap_idx, total_steps):
+        temperature = 0.9
+        k = 10  # top-k sampling
+        for _ in range(total_steps):
+            out = model.forward(
+                frames=frames,
+                decoder_input_ids=tokens,
+                decoder_attention_mask=tokens.ne(self.tokenizer.pad_id),
+                encoder_outputs=encoder_outputs,
+                beatmap_idx=beatmap_idx,
+            )
+            encoder_outputs = (out.encoder_last_hidden_state, out.encoder_hidden_states, out.encoder_attentions)
+            logits = out.logits
+            logits = logits[:, -1, :] / temperature
+            logits = self._filter(logits, 0.9)
+            probabilities = F.softmax(logits, dim=-1)
+            next_tokens = top_k_sampling(probabilities, k)
+            tokens = torch.cat([tokens, next_tokens], dim=-1)
+            eos_in_sentence = next_tokens == self.tokenizer.eos_id
+            if eos_in_sentence.all():
+                break
+        return tokens
+    def generate(self, model: OsuT, sequences: torch.Tensor, top_k: int = 50) -> list[Event]:
+        """
+        Generate a list of Event object lists and their timestamps given source sequences.
+        Args:
+            model: Trained model to use for inference.
+            sequences: A list of batched source sequences.
+            top_k: Number of top tokens to use for top-k sampling.
+        Returns:
+            events: List of Event object lists.
+            event_times: Corresponding event times of Event object lists in milliseconds.
+        """
+        events = []
+        event_times = []
+        temperature = 0.95
+        idx_dict = self.tokenizer.beatmap_idx
+        beatmap_idx = torch.tensor([idx_dict.get(self.beatmap_id, 6666)], dtype=torch.long, device=self.device)
+        style_token = self.tokenizer.encode_style(self.beatmap_id) if self.beatmap_id in idx_dict else self.tokenizer.style_unk
+        diff_token = self.tokenizer.encode_diff(self.difficulty) if self.difficulty != -1 else self.tokenizer.diff_unk
+        special_tokens = torch.empty((1, self.special_token_len), dtype=torch.long, device=self.device)
+        special_tokens[:, self.diff_token_index] = diff_token
+        special_tokens[:, self.style_token_index] = style_token
+        if self.add_gd_context:
+            other_style_token = self.tokenizer.encode_style(self.other_beatmap_id) if self.other_beatmap_id in idx_dict else self.tokenizer.style_unk
+            other_special_tokens = torch.empty((1, self.special_token_len), dtype=torch.long, device=self.device)
+            other_special_tokens[:, self.diff_token_index] = self.tokenizer.encode_diff(self.other_difficulty)
+            other_special_tokens[:, self.style_token_index] = other_style_token
+        else:
+            other_special_tokens = torch.empty((1, 0), dtype=torch.long, device=self.device)
+        for sequence_index, frames in enumerate(tqdm(sequences)):
+            # Get tokens of previous frame
+            frame_time = sequence_index * self.miliseconds_per_stride
+            prev_events = self._get_events_time_range(
+                events, event_times, frame_time - self.miliseconds_per_sequence, frame_time) if self.add_pre_tokens else []
+            post_events = self._get_events_time_range(
+                events, event_times, frame_time, frame_time + self.miliseconds_per_sequence)
+            prev_tokens = self._encode(prev_events, frame_time)
+            post_tokens = self._encode(post_events, frame_time)
+            post_token_length = post_tokens.shape[1]
+            if 0 <= self.max_pre_token_len < prev_tokens.shape[1]:
+                prev_tokens = prev_tokens[:, -self.max_pre_token_len:]
+            # Get prefix tokens
+            prefix = torch.cat([special_tokens, prev_tokens], dim=-1)
+            if self.center_pad_decoder:
+                prefix = F.pad(prefix, (self.tgt_seq_len // 2 - prefix.shape[1], 0), value=self.tokenizer.pad_id)
+            prefix_length = prefix.shape[1]
+            max_retries = 5
+            attempt = 0
+            result = []
+            while attempt < max_retries and not result:
+                attempt += 1
+                try:
+                    # Reset tokens
+                    tokens = torch.tensor([[self.tokenizer.sos_id]], dtype=torch.long, device=self.device)
+                    tokens = torch.cat([prefix, tokens, post_tokens], dim=-1)
+                    # Ensure frames are properly reset for each retry
+                    retry_frames = frames.clone().to(self.device).unsqueeze(0)
+                    encoder_outputs = None
+                    while tokens.shape[-1] < self.tgt_seq_len:
+                        out = model.forward(
+                            frames=retry_frames,
+                            decoder_input_ids=tokens,
+                            decoder_attention_mask=tokens.ne(self.tokenizer.pad_id),
+                            encoder_outputs=encoder_outputs,
+                            #beatmap_idx=beatmap_idx,
+                        )
+                        encoder_outputs = (out.encoder_last_hidden_state, out.encoder_hidden_states, out.encoder_attentions)
+                        logits = out.logits[:, -1, :]
+                        logits = logits / temperature
+                        logits = self._filter(logits, top_p=0.9, top_k=60)
+                        probabilities = F.softmax(logits, dim=-1)
+                        next_tokens = torch.multinomial(probabilities, 1)
+                        tokens = torch.cat([tokens, next_tokens], dim=-1)
+                        eos_in_sentence = next_tokens == self.tokenizer.eos_id
+                        if eos_in_sentence.all():
+                            break
+                    predicted_tokens = tokens[:, prefix_length + 1 + post_token_length:]
+                    result = self._decode(predicted_tokens[0], frame_time)
+                     # if no new combo in result, retry;
+                    if len(result) > 10 and not any(event.type == EventType.NEW_COMBO for event in result):
+                        #print("No new combo in result; retrying...")
+                        result = []
+                except Exception as e:
+                    #print(f"Attempt {attempt} encountered an error: {e}")
+                    result = []  # Ensure result is empty to trigger retry
+            events += result
+            self._update_event_times(events, event_times, frame_time)
+        return events
+    def _prepare_events(self, events: list[Event]) -> tuple[list[Event], list[float]]:
+        """Pre-process raw list of events for inference. Calculates event times and removes redundant time shifts."""
+        ct = 0
+        event_times = []
+        for event in events:
+            if event.type == EventType.TIME_SHIFT:
+                ct = event.value
+            event_times.append(ct)
+        # Loop through the events in reverse to remove any time shifts that occur before anchor events
+        delete_next_time_shift = False
+        for i in range(len(events) - 1, -1, -1):
+            if events[i].type == EventType.TIME_SHIFT and delete_next_time_shift:
+                delete_next_time_shift = False
+                del events[i]
+                del event_times[i]
+                continue
+            elif events[i].type in [EventType.BEZIER_ANCHOR, EventType.PERFECT_ANCHOR, EventType.CATMULL_ANCHOR,
+                                    EventType.RED_ANCHOR]:
+                delete_next_time_shift = True
+        # duplicate events 3 times
+        return events, event_times
+    def _get_events_time_range(self, events: list[Event], event_times: list[float], start_time: float, end_time: float):
+        # Look from the end of the list
+        s = 0
+        for i in range(len(event_times) - 1, -1, -1):
+            if event_times[i] < start_time:
+                s = i + 1
+                break
+        e = 0
+        for i in range(len(event_times) - 1, -1, -1):
+            if event_times[i] < end_time:
+                e = i + 1
+                break
+        return events[s:e]
+    def _update_event_times(self, events: list[Event], event_times: list[float], frame_time: float):
+        update_event_times(events, event_times, frame_time + self.miliseconds_per_sequence)
+    def _encode(self, events: list[Event], frame_time: float) -> torch.Tensor:
+        try:
+            tokens = torch.empty((1, len(events)), dtype=torch.long)
+            for i, event in enumerate(events):
+                if event.type == EventType.TIME_SHIFT:
+                    event = Event(type=event.type, value=int((event.value - frame_time) / MILISECONDS_PER_STEP))
+                tokens[0, i] = self.tokenizer.encode(event)
+            return tokens.to(self.device)
+        except Exception as e:
+            #print(f"Error encoding events: {events}")
+            #print(e)
+            return torch.empty((1, 0), dtype=torch.long, device=self.device)
+    def _decode(self, tokens: torch.Tensor, frame_time: float) -> list[Event]:
+        """Converts a list of tokens into Event objects and converts to absolute time values.
+        Args:
+            tokens: List of tokens.
+            frame time: Start time of current source sequence.
+        Returns:
+            events: List of Event objects.
+        """
+        events = []
+        for token in tokens:
+            if token == self.tokenizer.eos_id:
+                break
+            try:
+                event = self.tokenizer.decode(token.item())
+            except:
+                continue
+            if event.type == EventType.TIME_SHIFT:
+                event.value = frame_time + event.value * MILISECONDS_PER_STEP
+            events.append(event)
+        return events
+    def _filter(self, logits: torch.Tensor, top_p: float = 0.75, top_k: int = 1, filter_value: float = -float("Inf")) -> torch.Tensor:
+        """Filter a distribution of logits using nucleus (top-p) and/or top-k filtering.
+        """
+        logits = top_k_logits(logits, top_k) if top_k > 0 else logits
+        if 0.0 < top_p < 1.0:
+            sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+            cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1)
+            sorted_indices_to_remove = cumulative_probs > top_p
+            sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone()
+            sorted_indices_to_remove[..., 0] = 0
+            indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove)
+            logits[indices_to_remove] = filter_value
+        return logits
+def top_k_logits(logits, k):
+    """
+    Keep only the top-k tokens with highest probabilities.
+    Args:
+        logits: Logits distribution of shape (batch size, vocabulary size).
+        k: Number of top tokens to keep.
+    Returns:
+        logits with non-top-k elements set to negative infinity.
+    """
+    values, indices = torch.topk(logits, k)
+    min_values = values[:, -1].unsqueeze(-1).expand_as(logits)
+    return torch.where(logits < min_values, torch.full_like(logits, float("-Inf")), logits)

osuT5/inference/postprocessor.py ADDED Viewed

	@@ -0,0 +1,322 @@

+from __future__ import annotations
+import dataclasses
+import os
+import pathlib
+import uuid
+from string import Template
+import zipfile
+import numpy as np
+from omegaconf import DictConfig
+import time as t
+from osuT5.inference.slider_path import SliderPath
+from osuT5.tokenizer import Event, EventType
+OSU_FILE_EXTENSION = ".osu"
+OSU_TEMPLATE_PATH = os.path.join(os.path.dirname(__file__), "template.osu")
+STEPS_PER_MILLISECOND = 0.1
+@dataclasses.dataclass
+class BeatmapConfig:
+    # General
+    audio_filename: str = ""
+    # Metadata
+    title: str = ""
+    title_unicode: str = ""
+    artist: str = ""
+    artist_unicode: str = ""
+    creator: str = ""
+    version: str = ""
+    # Difficulty
+    hp_drain_rate: float = 5
+    circle_size: float = 4
+    overall_difficulty: float = 8
+    approach_rate: float = 9
+    slider_multiplier: float = 1.8
+def calculate_coordinates(last_pos, dist, num_samples, playfield_size):
+    # Generate a set of angles
+    angles = np.linspace(0, 2*np.pi, num_samples)
+    # Calculate the x and y coordinates for each angle
+    x_coords = last_pos[0] + dist * np.cos(angles)
+    y_coords = last_pos[1] + dist * np.sin(angles)
+    # Combine the x and y coordinates into a list of tuples
+    coordinates = list(zip(x_coords, y_coords))
+    # Filter out coordinates that are outside the playfield
+    coordinates = [(x, y) for x, y in coordinates if 0 <= x <= playfield_size[0] and 0 <= y <= playfield_size[1]]
+    if len(coordinates) == 0:
+        return [playfield_size] if last_pos[0] + last_pos[1] > (playfield_size[0] + playfield_size[1]) / 2 else [(0, 0)]
+    return coordinates
+def position_to_progress(slider_path: SliderPath, pos: np.ndarray) -> np.ndarray:
+    eps = 1e-4
+    lr = 1
+    t = 1
+    for i in range(100):
+        grad = np.linalg.norm(slider_path.position_at(t) - pos) - np.linalg.norm(
+            slider_path.position_at(t - eps) - pos,
+        )
+        t -= lr * grad
+        if grad == 0 or t < 0 or t > 1:
+            break
+    return np.clip(t, 0, 1)
+def quantize_to_beat(time, bpm, offset):
+    """Quantize a given time to the nearest  beat based on the BPM and offset."""
+    # tick rate is 1/4
+    #tick_rate = 0.25
+    # tick rate is 1/8
+    # tick_rate = 0.125
+    # tick rate is 1/2
+    #tick_rate = 0.5
+    tick_rate = 0.5
+    beats_per_minute = bpm
+    beats_per_second = beats_per_minute / 60.0
+    milliseconds_per_beat = 1000 / beats_per_second
+    quantized_time = round((time - offset) / (milliseconds_per_beat * tick_rate)) * (milliseconds_per_beat * tick_rate) + offset
+    return quantized_time
+def quantize_to_beat_again(time, bpm, offset):
+    """Quantize a given time to the nearest  beat based on the BPM and offset."""
+    # tick rate is 1/4
+    #tick_rate = 0.25
+    # tick rate is 1/8
+    # tick_rate = 0.125
+    # tick rate is 1/2
+    #tick_rate = 0.5
+    tick_rate = 0.25
+    beats_per_minute = bpm
+    beats_per_second = beats_per_minute / 60.0
+    milliseconds_per_beat = 1000 / beats_per_second
+    quantized_time = round((time - offset) / (milliseconds_per_beat * tick_rate)) * (milliseconds_per_beat * tick_rate) + offset
+    return quantized_time
+def move_to_next_tick(time, bpm):
+    """Move to the next tick based on the BPM and offset."""
+    tick_rate = 0.25
+    beats_per_minute = bpm
+    beats_per_second = beats_per_minute / 60.0
+    milliseconds_per_beat = 1000 / beats_per_second
+    quantized_time = time + milliseconds_per_beat * tick_rate
+    return quantized_time
+def move_to_prev_tick(time, bpm):
+    """Move to the next tick based on the BPM and offset."""
+    tick_rate = 0.25
+    beats_per_minute = bpm
+    beats_per_second = beats_per_minute / 60.0
+    milliseconds_per_beat = 1000 / beats_per_second
+    quantized_time = time - milliseconds_per_beat * tick_rate
+    return quantized_time
+def adjust_hit_objects(hit_objects, bpm, offset):
+    """Adjust the timing of hit objects to align with beats based on BPM and offset."""
+    adjusted_hit_objects = []
+    adjusted_times = []
+    to_be_adjusted = []
+    for hit_object in hit_objects:
+        hit_type = hit_object.type
+        if hit_type == EventType.TIME_SHIFT:
+            time = quantize_to_beat(hit_object.value, bpm, offset)
+            if len(adjusted_times) > 0 and int(time) == adjusted_times[-1] and adjusted_hit_objects[-1].type != (EventType.LAST_ANCHOR or EventType.SLIDER_END):
+                time = move_to_next_tick(time, bpm)
+                adjusted_hit_objects.append(Event(EventType.TIME_SHIFT, time))
+                adjusted_times.append(int(time))
+            else:
+                adjusted_hit_objects.append(Event(EventType.TIME_SHIFT, time))
+                adjusted_times.append(int(time))
+        else:
+            adjusted_hit_objects.append(hit_object)
+    return adjusted_hit_objects
+class Postprocessor(object):
+    def __init__(self, args: DictConfig):
+        """Postprocessing stage that converts a list of Event objects to a beatmap file."""
+        self.curve_type_shorthand = {
+            "B": "Bezier",
+            "P": "PerfectCurve",
+            "C": "Catmull",
+        }
+        self.output_path = args.output_path
+        self.audio_path = args.audio_path
+        self.audio_filename = pathlib.Path(args.audio_path).name.split(".")[0]
+        self.beatmap_config = BeatmapConfig(
+            title=str(f"{self.audio_filename} ({args.title})"),
+            artist=str(args.artist),
+            title_unicode=str(args.title),
+            artist_unicode=str(args.artist),
+            audio_filename=pathlib.Path(args.audio_path).name,
+            slider_multiplier=float(args.slider_multiplier),
+            creator=str(args.creator),
+            version=str(args.version),
+        )
+        self.offset = args.offset
+        self.beat_length = 60000 / args.bpm
+        self.slider_multiplier = self.beatmap_config.slider_multiplier
+        self.bpm = args.bpm
+        self.resnap_objects = args.resnap_objects
+    def generate(self, generated_positions: list[Event]):
+        """Generate a beatmap file.
+        Args:
+            events: List of Event objects.
+        Returns:
+            None. An .osu file will be generated.
+        """
+        processed_events = []
+        for events in generated_positions:
+            # adjust hit objects to align with 1/4 beats
+            if self.resnap_objects:
+               events = adjust_hit_objects(events, self.bpm, self.offset)
+            hit_object_strings = []
+            time = 0
+            dist = 0
+            x = 256
+            y = 192
+            has_pos = False
+            new_combo = 0
+            ho_info = []
+            anchor_info = []
+            timing_point_strings = [
+                f"{self.offset},{self.beat_length},4,2,0,100,1,0"
+            ]
+            for event in events:
+                hit_type = event.type
+                if hit_type == EventType.TIME_SHIFT:
+                    time = event.value
+                    continue
+                elif hit_type == EventType.DISTANCE:
+                    # Find a point which is dist away from the last point but still within the playfield
+                    dist = event.value
+                    coordinates = calculate_coordinates((x, y), dist, 500, (512, 384))
+                    pos = coordinates[np.random.randint(len(coordinates))]
+                    x, y = pos
+                    continue
+                elif hit_type == EventType.POS_X:
+                    x = event.value
+                    has_pos = True
+                    continue
+                elif hit_type == EventType.POS_Y:
+                    y = event.value
+                    has_pos = True
+                    continue
+                elif hit_type == EventType.NEW_COMBO:
+                    new_combo = 4
+                    continue
+                if hit_type == EventType.CIRCLE:
+                    hit_object_strings.append(f"{int(round(x))},{int(round(y))},{int(round(time))},{1 | new_combo},0")
+                    ho_info = []
+                elif hit_type == EventType.SPINNER:
+                    ho_info = [time, new_combo]
+                elif hit_type == EventType.SPINNER_END and len(ho_info) == 2:
+                    hit_object_strings.append(
+                        f"{256},{192},{int(round(ho_info[0]))},{8 | ho_info[1]},0,{int(round(time))}"
+                    )
+                    ho_info = []
+                elif hit_type == EventType.SLIDER_HEAD:
+                    ho_info = [x, y, time, new_combo]
+                    anchor_info = []
+                elif hit_type == EventType.BEZIER_ANCHOR:
+                    anchor_info.append(('B', x, y))
+                elif hit_type == EventType.PERFECT_ANCHOR:
+                    anchor_info.append(('P', x, y))
+                elif hit_type == EventType.CATMULL_ANCHOR:
+                    anchor_info.append(('C', x, y))
+                elif hit_type == EventType.RED_ANCHOR:
+                    anchor_info.append(('B', x, y))
+                    anchor_info.append(('B', x, y))
+                elif hit_type == EventType.LAST_ANCHOR:
+                    ho_info.append(time)
+                    anchor_info.append(('B', x, y))
+                elif hit_type == EventType.SLIDER_END and len(ho_info) == 5 and len(anchor_info) > 0:
+                    curve_type = anchor_info[0][0]
+                    span_duration = ho_info[4] - ho_info[2]
+                    total_duration = time - ho_info[2]
+                    if total_duration == 0 or span_duration == 0:
+                        continue
+                    slides = max(int(round(total_duration / span_duration)), 1)
+                    control_points = "|".join(f"{int(round(cp[1]))}:{int(round(cp[2]))}" for cp in anchor_info)
+                    slider_path = SliderPath(self.curve_type_shorthand[curve_type], np.array([(ho_info[0], ho_info[1])] + [(cp[1], cp[2]) for cp in anchor_info], dtype=float))
+                    length = slider_path.get_distance()
+                    req_length = length * position_to_progress(
+                        slider_path,
+                        np.array((x, y)),
+                    ) if has_pos else length - dist
+                    if req_length < 1e-4:
+                        continue
+                    hit_object_strings.append(
+                        f"{int(round(ho_info[0]))},{int(round(ho_info[1]))},{int(round(ho_info[2]))},{2 | ho_info[3]},0,{curve_type}|{control_points},{slides},{req_length}"
+                    )
+                    sv = span_duration / req_length / self.beat_length * self.slider_multiplier * -10000
+                    timing_point_strings.append(
+                        f"{int(round(ho_info[2]))},{sv},4,2,0,100,0,0"
+                    )
+                new_combo = 0
+            # Write .osu file
+            with open(OSU_TEMPLATE_PATH, "r") as tf:
+                template = Template(tf.read())
+                hit_objects = {"hit_objects": "\n".join(hit_object_strings)}
+                timing_points = {"timing_points": "\n".join(timing_point_strings)}
+                beatmap_config = dataclasses.asdict(self.beatmap_config)
+                result = template.safe_substitute({**beatmap_config, **hit_objects, **timing_points})
+                processed_events.append(result)
+        osz_path = os.path.join(self.output_path, f"{self.audio_filename}_{t.time()}.osz")
+        with zipfile.ZipFile(osz_path, "w") as z:
+            for i, event in enumerate(processed_events):
+                osu_path = os.path.join(self.output_path, f"{i}{OSU_FILE_EXTENSION}")
+                with open(osu_path, "w") as osu_file:
+                    osu_file.write(event)
+                z.write(osu_path, os.path.basename(osu_path))
+            z.write(self.audio_path, os.path.basename(self.audio_path))
+            print(f"Mapset saved {osz_path}")
+            z.close()

osuT5/inference/preprocessor.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from __future__ import annotations
+from pathlib import Path
+import torch
+import numpy as np
+import numpy.typing as npt
+from omegaconf import DictConfig
+from osuT5.dataset.data_utils import load_audio_file
+class Preprocessor(object):
+    def __init__(self, args: DictConfig):
+        """Preprocess audio data into sequences."""
+        self.frame_seq_len = args.data.src_seq_len - 1
+        self.frame_size = args.data.hop_length
+        self.sample_rate = args.data.sample_rate
+        self.samples_per_sequence = self.frame_seq_len * self.frame_size
+        self.sequence_stride = int(self.samples_per_sequence * args.data.sequence_stride)
+    def load(self, path: Path) -> npt.ArrayLike:
+        """Load an audio file as audio frames. Convert stereo to mono, normalize.
+        Args:
+            path: Path to audio file.
+        Returns:
+            samples: Audio time-series.
+        """
+        return load_audio_file(path, self.sample_rate)
+    def segment(self, samples: npt.ArrayLike) -> torch.Tensor:
+        """Segment audio samples into sequences. Sequences are flattened frames.
+        Args:
+            samples: Audio time-series.
+        Returns:
+            sequences: A list of sequences of shape (batch size, samples per sequence).
+        """
+        samples = np.pad(
+            samples,
+            [0, self.sequence_stride - (len(samples) - self.samples_per_sequence) % self.sequence_stride],
+        )
+        sequences = self.window(samples, self.samples_per_sequence, self.sequence_stride)
+        sequences = torch.from_numpy(sequences).to(torch.float32)
+        return sequences
+    @staticmethod
+    def window(a, w, o, copy=False):
+        sh = (a.size - w + 1, w)
+        st = a.strides * 2
+        view = np.lib.stride_tricks.as_strided(a, strides=st, shape=sh)[0::o]
+        if copy:
+            return view.copy()
+        else:
+            return view

osuT5/inference/slider_path.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import logging
+import numpy as np
+from numpy.linalg import norm
+import osuT5.inference.path_approximator as path_approximator
+def binary_search(array, target):
+    lower = 0
+    upper = len(array)
+    while lower < upper:  # use < instead of <=
+        x = lower + (upper - lower) // 2
+        val = array[x]
+        if target == val:
+            return x
+        elif target > val:
+            if lower == x:  # these two are the actual lines
+                break  # you're looking for
+            lower = x
+        elif target < val:
+            upper = x
+    return ~upper
+class SliderPath:
+    __slots__ = (
+        "control_points",
+        "path_type",
+        "expected_distance",
+        "calculated_path",
+        "cumulative_length",
+        "is_initialised",
+    )
+    def __init__(
+        self,
+        path_type: str,
+        control_points: np.array,
+        expected_distance: float | None = None,
+    ) -> None:
+        self.control_points = control_points
+        self.path_type = path_type
+        self.expected_distance = expected_distance
+        self.calculated_path = None
+        self.cumulative_length = None
+        self.is_initialised = None
+        self.ensure_initialised()
+    def get_control_points(self) -> np.array:
+        self.ensure_initialised()
+        return self.control_points
+    def get_distance(self) -> float:
+        self.ensure_initialised()
+        return 0 if len(self.cumulative_length) == 0 else self.cumulative_length[-1]
+    def get_path_to_progress(self, path, p0, p1) -> None:
+        self.ensure_initialised()
+        d0 = self.progress_to_distance(p0)
+        d1 = self.progress_to_distance(p1)
+        path.clear()
+        i = 0
+        while i < len(self.calculated_path) and self.cumulative_length[i] < d0:
+            i += 1
+        path.append(self.interpolate_vertices(i, d0))
+        while i < len(self.calculated_path) and self.cumulative_length[i] < d1:
+            path.append(self.calculated_path[i])
+            i += 1
+        path.append(self.interpolate_vertices(i, d1))
+    def position_at(self, progress) -> np.array:
+        self.ensure_initialised()
+        d = self.progress_to_distance(progress)
+        return self.interpolate_vertices(self.index_of_distance(d), d)
+    def ensure_initialised(self) -> None:
+        if self.is_initialised:
+            return
+        self.is_initialised = True
+        self.control_points = [] if self.control_points is None else self.control_points
+        self.calculated_path = []
+        self.cumulative_length = []
+        self.calculate_path()
+        self.calculate_cumulative_length()
+    def calculate_subpath(self, sub_control_points) -> list:
+        if self.path_type == "Linear":
+            return path_approximator.approximate_linear(sub_control_points)
+        elif self.path_type == "PerfectCurve":
+            if len(self.get_control_points()) != 3 or len(sub_control_points) != 3:
+                return path_approximator.approximate_bezier(sub_control_points)
+            subpath = path_approximator.approximate_circular_arc(sub_control_points)
+            if len(subpath) == 0:
+                return path_approximator.approximate_bezier(sub_control_points)
+            return subpath
+        elif self.path_type == "Catmull":
+            return path_approximator.approximate_catmull(sub_control_points)
+        else:
+            return path_approximator.approximate_bezier(sub_control_points)
+    def calculate_path(self) -> None:
+        self.calculated_path.clear()
+        start = 0
+        end = 0
+        for i in range(len(self.get_control_points())):
+            end += 1
+            if (
+                i == len(self.get_control_points()) - 1
+                or (
+                    self.get_control_points()[i] == self.get_control_points()[i + 1]
+                ).all()
+            ):
+                cp_span = self.get_control_points()[start:end]
+                for t in self.calculate_subpath(cp_span):
+                    if (
+                        len(self.calculated_path) == 0
+                        or (self.calculated_path[-1] != t).any()
+                    ):
+                        self.calculated_path.append(t)
+                start = end
+    def calculate_cumulative_length(self) -> None:
+        length = 0
+        self.cumulative_length.clear()
+        self.cumulative_length.append(length)
+        for i in range(len(self.calculated_path) - 1):
+            diff = self.calculated_path[i + 1] - self.calculated_path[i]
+            d = norm(diff)
+            if (
+                self.expected_distance is not None
+                and self.expected_distance - length < d
+            ):
+                self.calculated_path[i + 1] = (
+                    self.calculated_path[i]
+                    + diff * (self.expected_distance - length) / d
+                )
+                del self.calculated_path[i + 2 : len(self.calculated_path) - 2 - i]
+                length = self.expected_distance
+                self.cumulative_length.append(length)
+                break
+            length += d
+            self.cumulative_length.append(length)
+        if (
+            self.expected_distance is not None
+            and length < self.expected_distance
+            and len(self.calculated_path) > 1
+        ):
+            diff = self.calculated_path[-1] - self.calculated_path[-2]
+            d = norm(diff)
+            if d <= 0:
+                return
+            self.calculated_path[-1] += (
+                diff * (self.expected_distance - self.cumulative_length[-1]) / d
+            )
+            self.cumulative_length[-1] = self.expected_distance
+    def index_of_distance(self, d) -> int:
+        i = binary_search(self.cumulative_length, d)
+        if i < 0:
+            i = ~i
+        return i
+    def progress_to_distance(self, progress) -> float:
+        return np.clip(progress, 0, 1) * self.get_distance()
+    def interpolate_vertices(self, i, d) -> np.array:
+        if len(self.calculated_path) == 0:
+            return np.zeros([2])
+        if i <= 0:
+            return self.calculated_path[0]
+        if i >= len(self.calculated_path):
+            return self.calculated_path[-1]
+        p0 = self.calculated_path[i - 1]
+        p1 = self.calculated_path[i]
+        d0 = self.cumulative_length[i - 1]
+        d1 = self.cumulative_length[i]
+        if np.isclose(d0, d1):
+            return p0
+        w = (d - d0) / (d1 - d0)
+        return p0 + (p1 - p0) * w
+if __name__ == "__main__":
+    path = SliderPath(
+        "Bezier",
+        100 * np.array([[0, 0], [1, 1], [1, -1], [2, 0], [2, 0], [3, -1], [2, -2]]),
+    )
+    p = np.vstack(path.calculated_path)
+    logging.info(p.shape)
+    import matplotlib.pyplot as plt
+    plt.axis("equal")
+    plt.plot(p[:, 0], p[:, 1], color="green")
+    plt.show()

osuT5/inference/template.osu ADDED Viewed

	@@ -0,0 +1,54 @@

+osu file format v14
+[General]
+AudioFilename: $audio_filename
+AudioLeadIn: 0
+PreviewTime: -1
+Countdown: 0
+SampleSet: Soft
+StackLeniency: 0.7
+Mode: 0
+LetterboxInBreaks: 0
+WidescreenStoryboard: 1
+[Editor]
+DistanceSpacing: 1.0
+BeatDivisor: 4
+GridSize: 8
+TimelineZoom: 1
+[Metadata]
+Title:$title
+TitleUnicode:$title_unicode
+Artist:$artist
+ArtistUnicode:$artist_unicode
+Creator:$creator
+Version:$version
+Source:
+Tags:
+[Difficulty]
+HPDrainRate:$hp_drain_rate
+CircleSize:$circle_size
+OverallDifficulty:$overall_difficulty
+ApproachRate:$approach_rate
+SliderMultiplier:$slider_multiplier
+SliderTickRate:1
+[Events]
+//Background and Video events
+//Break Periods
+//Storyboard Layer 0 (Background)
+//Storyboard Layer 1 (Fail)
+//Storyboard Layer 2 (Pass)
+//Storyboard Layer 3 (Foreground)
+//Storyboard Layer 4 (Overlay)
+//Storyboard Sound Samples
+[TimingPoints]
+$timing_points
+[Colours]
+[HitObjects]
+$hit_objects

osuT5/model/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .osu_t import OsuT

osuT5/model/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (203 Bytes). View file

osuT5/model/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (171 Bytes). View file