""" Parser for maidata.txt files (MaichartConverter format). Parses: 1. Header key-value pairs (&key=value) 2. Note sequence data (&inote_N= blocks) The note format is measure-based: {beat_div}pos1,pos2,pos3,... Where each position can be: - A number 1-8 (button position) - Empty (rest) - Number/Number (simultaneous tap) - Number + modifiers (h=hold, b=break, x=firework, >=/, <, -, V, v, ^ # Examples: 3>6[4:1], 6<3[8:11], 3-6[4:1], 3v6[16:3], 5>2[8:11], 3>8-6v3[8:9] _RE_SLIDE_DUR = re.compile(r"([>6, 4-1 _RE_SIMPLE_SLIDE = re.compile(r"(\d+)[> tuple[str, str]: """Parse a single &key=value header line. Returns (key, value).""" line = line.strip() if line.startswith("&"): line = line[1:] # strip & if "=" in line: key, value = line.split("=", 1) return key.strip(), value.strip() return line.strip(), "" def _extract_bpm_changes(text: str) -> list[tuple[int, float]]: """Extract inline BPM changes from a measure line. Returns list of (position_in_measure, bpm_value). """ changes = [] for m in _RE_BPM_CHANGE.finditer(text): changes.append((m.start(), float(m.group(1)))) return changes def _parse_single_note(token: str, current_beat_div: int) -> TouchNote: """ Parse a single note token (one comma-separated segment). Examples: "" → rest "E" → end marker "1" → tap at position 1 "1/8" → simultaneous tap at 1 and 8 "1b" → break at position 1 "1x" → firework at position 1 "1h[4:1]" → hold at position 1, duration 4:1 "3>6[4:1]"→ slide from 3 to 6, duration 4:1 "3-6" → simple slide 3→6 "C" → touch center "B7/B6" → touch simultaneous "Ch[2:1]" → touch hold """ note = TouchNote(beat_div=current_beat_div, raw=token) # Empty = rest if not token: note.is_rest = True return note # End marker if token.strip() == "E": note.is_end = True return note # Work on the cleaned token (remove beat division, BPM) t = token.strip() # Remove inline BPM changes for parsing: (200), (150.5) t = _RE_BPM_CHANGE.sub("", t).strip() if not t: note.is_rest = True return note # --- Check for touch notes --- # Touch notes can have modifiers like Ch[2:1], C, B7/B6 # They use letters A-E + number, or just C touch_parts = re.split(r"[/]", t) all_touch = all( _RE_TOUCH.match(re.sub(r"\[.*?\]", "", p).strip("hbfxqpb$Vv*^-><")) for p in touch_parts ) if all_touch and any( re.match(r"^[A-E]", p.strip("hbfxqpb$Vv*^-><")) for p in touch_parts ): note.is_touch = True # Extract touch regions (strip modifiers) for p in touch_parts: clean = re.sub(r"\[.*?\]", "", p).strip("hbfxqpb$Vv*^-><") if clean: note.touch_regions.append(clean) # Parse hold/touch-hold hold_m = _RE_HOLD.search(t) if hold_m: note.is_hold = True note.hold_duration = (int(hold_m.group(1)), int(hold_m.group(2))) return note # --- Parse button note --- # Break suffix if _RE_BREAK.search(t): note.is_break = True t = _RE_BREAK.sub("", t).rstrip("$") # Firework suffix if t.endswith("x"): note.is_firework = True t = t[:-1] # Hold hold_m = _RE_HOLD.search(t) if hold_m: note.is_hold = True note.hold_duration = (int(hold_m.group(1)), int(hold_m.group(2))) t = _RE_HOLD.sub("", t) # Star/EX note (* suffix) if t.endswith("*"): note.is_star = True t = t[:-1] # Slide patterns # Pattern with duration: 3>6[4:1], 3-6[8:11], 3v6[16:3], 3>8-6v3[8:9] slide_m = _RE_SLIDE_DUR.search(t) if slide_m: note.is_slide = True note.slide_path = _extract_slide_path(t) hold_m2 = _RE_HOLD.search(t) if not hold_m2: # Duration is on slide: [beat:subdiv] note.hold_duration = (int(slide_m.group(3)), int(slide_m.group(4))) # Extract positions from slide pos_match = re.findall(r"(\d+)", t) note.positions = [int(p) for p in pos_match[:4]] return note # Simple slide: 3>6, 4-1, 8<5 simple_slide = _RE_SIMPLE_SLIDE.search(t) if simple_slide: note.is_slide = True pos_match = re.findall(r"(\d+)", t) note.positions = [int(p) for p in pos_match[:4]] return note # Single V (slide continuation / endpoint) if re.match(r"^V\d*$", t): note.is_slide = True return note # Simultaneous tap: 1/8, 2/6/7 simul_m = _RE_SIMUL.match(t) if simul_m: note.is_simultaneous = True note.positions = [int(g) for g in simul_m.groups() if g is not None] return note # Simple tap: 1-8 if _RE_POS.match(t): note.positions = [int(t)] return note # Special modifiers: 'q', 'p', 'w', 'z', '$' etc. — treat as tap if has number num_match = re.findall(r"(\d+)", t) if num_match: note.positions = [int(p) for p in num_match[:4]] return note # Unknown / rest note.is_rest = True return note def _extract_slide_path(token: str) -> list[int]: """Extract all position numbers from a slide pattern like '3>8-6v3[8:9]' → [3,8,6,3].""" nums = re.findall(r"(? list[TouchNote]: """ Parse the full note sequence from an &inote_N block. Handles: - Beat division changes: {4}, {8}, {16}, etc. - Inline BPM changes: (200) - Measure-by-measure comma-separated notes - End marker: E """ notes: list[TouchNote] = [] current_div = 4 # default beat division # Normalize: join continuation lines, split by newline text = raw_text.strip() for line in text.split("\n"): line = line.strip() if not line: continue # Check for beat division change at start of line bd_match = _RE_BEAT_DIV.match(line) if bd_match: current_div = int(bd_match.group(1)) line = line[bd_match.end():] # remove {beat_div} prefix # If the line is empty after removing beat div, it's just a beat division # announcement; subsequent lines use this division if not line: continue # The line may start with an inline BPM change like (173){1}, bpm_match = _RE_BPM_CHANGE.match(line) if bpm_match: line = line[bpm_match.end():] # Check for beat div inside the BPM line bd2 = _RE_BEAT_DIV.match(line) if line else None if bd2: current_div = int(bd2.group(1)) line = line[bd2.end():] if not line: continue # Split by commas for individual notes tokens = line.split(",") for token in tokens: # Check for inline beat division change bd_inline = _RE_BEAT_DIV.search(token) if token else None local_div = current_div if bd_inline: local_div = int(bd_inline.group(1)) token = _RE_BEAT_DIV.sub("", token).strip() note = _parse_single_note(token, local_div) notes.append(note) if note.is_end: return notes # stop at E marker return notes def parse_level_value(level_str: str) -> tuple[float, bool, bool]: """ Parse a level string. Returns (numeric_value, is_plus, is_ura). Examples: "12.4" → (12.4, False, False) "13+" → (13.0, True, False) "14.6?" → (14.6, False, True) "7+" → (7.0, True, False) "耐" → (-1.0, False, False) # non-numeric "" → (0.0, False, False) """ is_plus = "+" in level_str is_ura = "?" in level_str # Strip non-numeric except . + ? clean = level_str.replace("+", "").replace("?", "").strip() try: val = float(clean) if clean else 0.0 except ValueError: val = -1.0 return val, is_plus, is_ura def parse_maidata(content: str, song_id: str = "", maidata_path: str = "", audio_path: str = "") -> Song: """ Parse a complete maidata.txt content string into a Song object. Args: content: The full text content of maidata.txt song_id: Folder name / song identifier maidata_path: Relative path to maidata.txt audio_path: Relative path to track.mp3 Returns: Song object with all parsed data """ song = Song(song_id=song_id, maidata_path=maidata_path, audio_path=audio_path) # Strip BOM if present if content.startswith("\ufeff"): content = content[1:] lines = content.split("\n") raw_notes: dict[int, str] = {} # difficulty_index → raw note text current_note_idx: Optional[int] = None current_note_lines: list[str] = [] for line in lines: line_stripped = line.strip() if not line_stripped: continue if line_stripped.startswith("&"): key, value = parse_header_line(line_stripped) # If we were collecting note data, flush it if current_note_idx is not None: raw_notes[current_note_idx] = "\n".join(current_note_lines) current_note_idx = None current_note_lines = [] if key.startswith("inote_"): # Note data block idx_str = key.replace("inote_", "") try: current_note_idx = int(idx_str) except ValueError: current_note_idx = None if value: current_note_lines.append(value) elif key.startswith("lv_"): idx_str = key.replace("lv_", "") if not idx_str.isdigit(): continue idx = int(idx_str) if not value: continue # skip empty levels (no chart) song.levels[idx] = value level_val, is_plus, is_ura = parse_level_value(value) if level_val > 0: # Create chart placeholder (notes parsed later) diff = Difficulty.from_index(idx) chart = Chart( difficulty_index=idx, difficulty=diff, level=value, level_value=level_val, is_plus=is_plus, is_ura=is_ura, ) song.charts[idx] = chart elif key.startswith("des_"): idx_str = key.replace("des_", "") try: idx = int(idx_str) except ValueError: continue song.charters[idx] = value if idx in song.charts: song.charts[idx].charter = value elif key == "title": song.title = value song.title_clean = _clean_title(value) song.tags = _extract_tags(value) song.is_full = "[FULL]" in value or "_FULLTOUCH" in value song.is_fulltouch = "_FULLTOUCH" in value elif key == "artist": song.artist = value elif key == "artistid": try: song.artist_id = int(value) except ValueError: pass elif key == "wholebpm": try: song.bpm = float(value) except ValueError: pass elif key == "first": try: song.first = float(value) except ValueError: pass elif key == "genre": song.genre = value song.is_utage = "宴会場" in value elif key == "genreid": try: song.genre_id = int(value) except ValueError: pass elif key == "cabinet": try: song.cabinet = Cabinet(value.upper()) except ValueError: song.cabinet = Cabinet.UNKNOWN elif key == "version": song.version = value elif key == "shortid": try: song.short_id = int(value) except ValueError: pass elif key == "des": song.description = value elif key == "ChartConverter": song.converter = value elif key == "ChartConvertTool": song.converter_tool = value elif key == "ChartConvertToolVersion": song.converter_version = value elif current_note_idx is not None: # Collect note data lines current_note_lines.append(line_stripped) # Flush last note block if current_note_idx is not None: raw_notes[current_note_idx] = "\n".join(current_note_lines) # ── Parse all collected note sequences ── for idx, raw in raw_notes.items(): notes = _parse_note_sequence(raw) if idx in song.charts: song.charts[idx].notes = notes song.charts[idx].compute_stats() elif idx in song.levels and song.levels[idx]: # Chart has notes + level but was skipped due to level_val being non-numeric (e.g. "耐") level_val, is_plus, is_ura = parse_level_value(song.levels[idx]) diff = Difficulty.from_index(idx) chart = Chart( difficulty_index=idx, difficulty=diff, level=song.levels[idx], level_value=level_val, is_plus=is_plus, is_ura=is_ura, ) chart.notes = notes chart.compute_stats() song.charts[idx] = chart return song def _clean_title(title: str) -> str: """Remove bracket tags like [SD], [DX], [宴] from title.""" return re.sub(r"\[.*?\]", "", title).strip() def _extract_tags(title: str) -> list[str]: """Extract bracket tags from title, e.g. [SD], [DX], [宴].""" return re.findall(r"\[(.*?)\]", title) def parse_maidata_file(filepath: str | Path) -> Song: """ Parse a maidata.txt file from disk. Args: filepath: Path to the maidata.txt file Returns: Parsed Song object """ filepath = Path(filepath) song_id = filepath.parent.name content = filepath.read_text(encoding="utf-8") # Build relative paths maidata_rel = filepath.name audio_rel = "track.mp3" return parse_maidata( content, song_id=song_id, maidata_path=maidata_rel, audio_path=audio_rel, )