| """ |
| Parser for maidata.txt files (MaichartConverter format). |
| |
| Parses: |
| 1. Header key-value pairs (&key=value) |
| 2. Note sequence data (&inote_N= blocks) |
| |
| The note format is measure-based: |
| {beat_div}pos1,pos2,pos3,... |
| |
| Where each position can be: |
| - A number 1-8 (button position) |
| - Empty (rest) |
| - Number/Number (simultaneous tap) |
| - Number + modifiers (h=hold, b=break, x=firework, >=/</V/slide patterns) |
| - Touch region (C, B1-B8, E1-E8, A1-A8, D1-D8) |
| - End marker: "E" |
| """ |
|
|
| from __future__ import annotations |
|
|
| import re |
| from pathlib import Path |
| from typing import Optional |
|
|
| from .models import Cabinet, Chart, Difficulty, Song, TouchNote |
|
|
| |
|
|
| |
| _RE_BEAT_DIV = re.compile(r"\{(\d+)\}") |
|
|
| |
| _RE_BPM_CHANGE = re.compile(r"\((\d+(?:\.\d+)?)\)") |
|
|
| |
| _RE_HOLD = re.compile(r"h\[(\d+):(\d+)\]") |
|
|
| |
| |
| _RE_SLIDE_DUR = re.compile(r"([><Vv\-\^])(\d+)\[(\d+):(\d+)\]") |
|
|
| |
| _RE_SIMPLE_SLIDE = re.compile(r"(\d+)[><Vv\-\^](\d+)") |
|
|
| |
| _RE_BREAK = re.compile(r"b$|\$b$") |
|
|
| |
| _RE_FIREWORK = re.compile(r"x$") |
|
|
| |
| _RE_TOUCH = re.compile(r"^(C[fh]?|[ABDE][1-8])$") |
|
|
| |
| _RE_SIMUL = re.compile(r"^(\d+)/(\d+)(?:/(\d+))?(?:/(\d+))?$") |
|
|
| |
| _RE_POS = re.compile(r"^\d+$") |
|
|
|
|
| def parse_header_line(line: str) -> tuple[str, str]: |
| """Parse a single &key=value header line. Returns (key, value).""" |
| line = line.strip() |
| if line.startswith("&"): |
| line = line[1:] |
| if "=" in line: |
| key, value = line.split("=", 1) |
| return key.strip(), value.strip() |
| return line.strip(), "" |
|
|
|
|
| def _extract_bpm_changes(text: str) -> list[tuple[int, float]]: |
| """Extract inline BPM changes from a measure line. |
| Returns list of (position_in_measure, bpm_value). |
| """ |
| changes = [] |
| for m in _RE_BPM_CHANGE.finditer(text): |
| changes.append((m.start(), float(m.group(1)))) |
| return changes |
|
|
|
|
| def _parse_single_note(token: str, current_beat_div: int) -> TouchNote: |
| """ |
| Parse a single note token (one comma-separated segment). |
| |
| Examples: |
| "" → rest |
| "E" → end marker |
| "1" → tap at position 1 |
| "1/8" → simultaneous tap at 1 and 8 |
| "1b" → break at position 1 |
| "1x" → firework at position 1 |
| "1h[4:1]" → hold at position 1, duration 4:1 |
| "3>6[4:1]"→ slide from 3 to 6, duration 4:1 |
| "3-6" → simple slide 3→6 |
| "C" → touch center |
| "B7/B6" → touch simultaneous |
| "Ch[2:1]" → touch hold |
| """ |
| note = TouchNote(beat_div=current_beat_div, raw=token) |
|
|
| |
| if not token: |
| note.is_rest = True |
| return note |
|
|
| |
| if token.strip() == "E": |
| note.is_end = True |
| return note |
|
|
| |
| t = token.strip() |
|
|
| |
| t = _RE_BPM_CHANGE.sub("", t).strip() |
| if not t: |
| note.is_rest = True |
| return note |
|
|
| |
| |
| |
| touch_parts = re.split(r"[/]", t) |
| all_touch = all( |
| _RE_TOUCH.match(re.sub(r"\[.*?\]", "", p).strip("hbfxqpb$Vv*^-><")) |
| for p in touch_parts |
| ) |
|
|
| if all_touch and any( |
| re.match(r"^[A-E]", p.strip("hbfxqpb$Vv*^-><")) |
| for p in touch_parts |
| ): |
| note.is_touch = True |
| |
| for p in touch_parts: |
| clean = re.sub(r"\[.*?\]", "", p).strip("hbfxqpb$Vv*^-><") |
| if clean: |
| note.touch_regions.append(clean) |
| |
| hold_m = _RE_HOLD.search(t) |
| if hold_m: |
| note.is_hold = True |
| note.hold_duration = (int(hold_m.group(1)), int(hold_m.group(2))) |
| return note |
|
|
| |
|
|
| |
| if _RE_BREAK.search(t): |
| note.is_break = True |
| t = _RE_BREAK.sub("", t).rstrip("$") |
|
|
| |
| if t.endswith("x"): |
| note.is_firework = True |
| t = t[:-1] |
|
|
| |
| hold_m = _RE_HOLD.search(t) |
| if hold_m: |
| note.is_hold = True |
| note.hold_duration = (int(hold_m.group(1)), int(hold_m.group(2))) |
| t = _RE_HOLD.sub("", t) |
|
|
| |
| if t.endswith("*"): |
| note.is_star = True |
| t = t[:-1] |
|
|
| |
| |
| slide_m = _RE_SLIDE_DUR.search(t) |
| if slide_m: |
| note.is_slide = True |
| note.slide_path = _extract_slide_path(t) |
| hold_m2 = _RE_HOLD.search(t) |
| if not hold_m2: |
| |
| note.hold_duration = (int(slide_m.group(3)), int(slide_m.group(4))) |
| |
| pos_match = re.findall(r"(\d+)", t) |
| note.positions = [int(p) for p in pos_match[:4]] |
| return note |
|
|
| |
| simple_slide = _RE_SIMPLE_SLIDE.search(t) |
| if simple_slide: |
| note.is_slide = True |
| pos_match = re.findall(r"(\d+)", t) |
| note.positions = [int(p) for p in pos_match[:4]] |
| return note |
|
|
| |
| if re.match(r"^V\d*$", t): |
| note.is_slide = True |
| return note |
|
|
| |
| simul_m = _RE_SIMUL.match(t) |
| if simul_m: |
| note.is_simultaneous = True |
| note.positions = [int(g) for g in simul_m.groups() if g is not None] |
| return note |
|
|
| |
| if _RE_POS.match(t): |
| note.positions = [int(t)] |
| return note |
|
|
| |
| num_match = re.findall(r"(\d+)", t) |
| if num_match: |
| note.positions = [int(p) for p in num_match[:4]] |
| return note |
|
|
| |
| note.is_rest = True |
| return note |
|
|
|
|
| def _extract_slide_path(token: str) -> list[int]: |
| """Extract all position numbers from a slide pattern like '3>8-6v3[8:9]' → [3,8,6,3].""" |
| nums = re.findall(r"(?<![a-zA-Z\[:])(\d+)(?![a-zA-Z\]])", token) |
| return [int(n) for n in nums] |
|
|
|
|
| def _parse_note_sequence(raw_text: str) -> list[TouchNote]: |
| """ |
| Parse the full note sequence from an &inote_N block. |
| |
| Handles: |
| - Beat division changes: {4}, {8}, {16}, etc. |
| - Inline BPM changes: (200) |
| - Measure-by-measure comma-separated notes |
| - End marker: E |
| """ |
| notes: list[TouchNote] = [] |
| current_div = 4 |
|
|
| |
| text = raw_text.strip() |
|
|
| for line in text.split("\n"): |
| line = line.strip() |
| if not line: |
| continue |
|
|
| |
| bd_match = _RE_BEAT_DIV.match(line) |
| if bd_match: |
| current_div = int(bd_match.group(1)) |
| line = line[bd_match.end():] |
|
|
| |
| |
| if not line: |
| continue |
|
|
| |
| bpm_match = _RE_BPM_CHANGE.match(line) |
| if bpm_match: |
| line = line[bpm_match.end():] |
| |
| bd2 = _RE_BEAT_DIV.match(line) if line else None |
| if bd2: |
| current_div = int(bd2.group(1)) |
| line = line[bd2.end():] |
|
|
| if not line: |
| continue |
|
|
| |
| tokens = line.split(",") |
|
|
| for token in tokens: |
| |
| bd_inline = _RE_BEAT_DIV.search(token) if token else None |
| local_div = current_div |
| if bd_inline: |
| local_div = int(bd_inline.group(1)) |
| token = _RE_BEAT_DIV.sub("", token).strip() |
|
|
| note = _parse_single_note(token, local_div) |
| notes.append(note) |
|
|
| if note.is_end: |
| return notes |
|
|
| return notes |
|
|
|
|
| def parse_level_value(level_str: str) -> tuple[float, bool, bool]: |
| """ |
| Parse a level string. |
| Returns (numeric_value, is_plus, is_ura). |
| |
| Examples: |
| "12.4" → (12.4, False, False) |
| "13+" → (13.0, True, False) |
| "14.6?" → (14.6, False, True) |
| "7+" → (7.0, True, False) |
| "耐" → (-1.0, False, False) # non-numeric |
| "" → (0.0, False, False) |
| """ |
| is_plus = "+" in level_str |
| is_ura = "?" in level_str |
| |
| clean = level_str.replace("+", "").replace("?", "").strip() |
| try: |
| val = float(clean) if clean else 0.0 |
| except ValueError: |
| val = -1.0 |
| return val, is_plus, is_ura |
|
|
|
|
| def parse_maidata(content: str, song_id: str = "", |
| maidata_path: str = "", audio_path: str = "") -> Song: |
| """ |
| Parse a complete maidata.txt content string into a Song object. |
| |
| Args: |
| content: The full text content of maidata.txt |
| song_id: Folder name / song identifier |
| maidata_path: Relative path to maidata.txt |
| audio_path: Relative path to track.mp3 |
| |
| Returns: |
| Song object with all parsed data |
| """ |
| song = Song(song_id=song_id, maidata_path=maidata_path, audio_path=audio_path) |
|
|
| |
| if content.startswith("\ufeff"): |
| content = content[1:] |
|
|
| lines = content.split("\n") |
| raw_notes: dict[int, str] = {} |
| current_note_idx: Optional[int] = None |
| current_note_lines: list[str] = [] |
|
|
| for line in lines: |
| line_stripped = line.strip() |
| if not line_stripped: |
| continue |
|
|
| if line_stripped.startswith("&"): |
| key, value = parse_header_line(line_stripped) |
|
|
| |
| if current_note_idx is not None: |
| raw_notes[current_note_idx] = "\n".join(current_note_lines) |
| current_note_idx = None |
| current_note_lines = [] |
|
|
| if key.startswith("inote_"): |
| |
| idx_str = key.replace("inote_", "") |
| try: |
| current_note_idx = int(idx_str) |
| except ValueError: |
| current_note_idx = None |
| if value: |
| current_note_lines.append(value) |
| elif key.startswith("lv_"): |
| idx_str = key.replace("lv_", "") |
| if not idx_str.isdigit(): |
| continue |
| idx = int(idx_str) |
| if not value: |
| continue |
| song.levels[idx] = value |
| level_val, is_plus, is_ura = parse_level_value(value) |
| if level_val > 0: |
| |
| diff = Difficulty.from_index(idx) |
| chart = Chart( |
| difficulty_index=idx, |
| difficulty=diff, |
| level=value, |
| level_value=level_val, |
| is_plus=is_plus, |
| is_ura=is_ura, |
| ) |
| song.charts[idx] = chart |
| elif key.startswith("des_"): |
| idx_str = key.replace("des_", "") |
| try: |
| idx = int(idx_str) |
| except ValueError: |
| continue |
| song.charters[idx] = value |
| if idx in song.charts: |
| song.charts[idx].charter = value |
| elif key == "title": |
| song.title = value |
| song.title_clean = _clean_title(value) |
| song.tags = _extract_tags(value) |
| song.is_full = "[FULL]" in value or "_FULLTOUCH" in value |
| song.is_fulltouch = "_FULLTOUCH" in value |
| elif key == "artist": |
| song.artist = value |
| elif key == "artistid": |
| try: |
| song.artist_id = int(value) |
| except ValueError: |
| pass |
| elif key == "wholebpm": |
| try: |
| song.bpm = float(value) |
| except ValueError: |
| pass |
| elif key == "first": |
| try: |
| song.first = float(value) |
| except ValueError: |
| pass |
| elif key == "genre": |
| song.genre = value |
| song.is_utage = "宴会場" in value |
| elif key == "genreid": |
| try: |
| song.genre_id = int(value) |
| except ValueError: |
| pass |
| elif key == "cabinet": |
| try: |
| song.cabinet = Cabinet(value.upper()) |
| except ValueError: |
| song.cabinet = Cabinet.UNKNOWN |
| elif key == "version": |
| song.version = value |
| elif key == "shortid": |
| try: |
| song.short_id = int(value) |
| except ValueError: |
| pass |
| elif key == "des": |
| song.description = value |
| elif key == "ChartConverter": |
| song.converter = value |
| elif key == "ChartConvertTool": |
| song.converter_tool = value |
| elif key == "ChartConvertToolVersion": |
| song.converter_version = value |
| elif current_note_idx is not None: |
| |
| current_note_lines.append(line_stripped) |
|
|
| |
| if current_note_idx is not None: |
| raw_notes[current_note_idx] = "\n".join(current_note_lines) |
|
|
| |
| for idx, raw in raw_notes.items(): |
| notes = _parse_note_sequence(raw) |
|
|
| if idx in song.charts: |
| song.charts[idx].notes = notes |
| song.charts[idx].compute_stats() |
| elif idx in song.levels and song.levels[idx]: |
| |
| level_val, is_plus, is_ura = parse_level_value(song.levels[idx]) |
| diff = Difficulty.from_index(idx) |
| chart = Chart( |
| difficulty_index=idx, |
| difficulty=diff, |
| level=song.levels[idx], |
| level_value=level_val, |
| is_plus=is_plus, |
| is_ura=is_ura, |
| ) |
| chart.notes = notes |
| chart.compute_stats() |
| song.charts[idx] = chart |
|
|
| return song |
|
|
|
|
| def _clean_title(title: str) -> str: |
| """Remove bracket tags like [SD], [DX], [宴] from title.""" |
| return re.sub(r"\[.*?\]", "", title).strip() |
|
|
|
|
| def _extract_tags(title: str) -> list[str]: |
| """Extract bracket tags from title, e.g. [SD], [DX], [宴].""" |
| return re.findall(r"\[(.*?)\]", title) |
|
|
|
|
| def parse_maidata_file(filepath: str | Path) -> Song: |
| """ |
| Parse a maidata.txt file from disk. |
| |
| Args: |
| filepath: Path to the maidata.txt file |
| |
| Returns: |
| Parsed Song object |
| """ |
| filepath = Path(filepath) |
| song_id = filepath.parent.name |
| content = filepath.read_text(encoding="utf-8") |
|
|
| |
| maidata_rel = filepath.name |
| audio_rel = "track.mp3" |
|
|
| return parse_maidata( |
| content, |
| song_id=song_id, |
| maidata_path=maidata_rel, |
| audio_path=audio_rel, |
| ) |
|
|