Score_To_MML / core /music_parser.py
Coconuttttt's picture
Initial deployment: Score to MML converter
daa0bdd
"""
core/music_parser.py
OMR ๊ฒฐ๊ณผ๋ฅผ NoteEvent ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํŒŒ์„œ.
์ง€์› ํ˜•์‹:
- "mock_events": MockOMRAdapter ์ถœ๋ ฅ (dict ๋ฆฌ์ŠคํŠธ)
- "musicxml": MusicXML ๋ฌธ์ž์—ด (score-partwise ํ˜•์‹, stdlib xml.etree ํŒŒ์‹ฑ)
MusicXML ํŒŒ์‹ฑ ์ฒ˜๋ฆฌ ๋ฒ”์œ„:
- ๋‹จ์Œ/ํ™”์Œ(<chord>) ์ฒ˜๋ฆฌ
- ์‰ผํ‘œ(<rest>) โ†’ pitch=0 NoteEvent๋กœ ๋ณ€ํ™˜ (offset ํ๋ฆ„ ์œ ์ง€)
- <backup>/<forward> ์ฒ˜๋ฆฌ (๋‹ค์„ฑ๋ถ€ offset ์œ ์ง€)
- <divisions> ๋ณ€๊ฒฝ ์ถ”์ 
- <key><fifths> ์กฐํ‘œ ํŒŒ์‹ฑ โ†’ ์Œํ‘œ ๋ฐ˜์Œ ๋ณด์ •
- <sound tempo=""> ํ…œํฌ ํŒŒ์‹ฑ
- voice/staff ์ •๋ณด ๋ณด์กด
- namespace ์ž๋™ ์ œ๊ฑฐ
- ํƒ€์ด(<tie>) ์ฒ˜๋ฆฌ: ๊ฐ™์€ ํ”ผ์น˜ ์Œํ‘œ duration ํ•ฉ์‚ฐ
๋ฏธ์ง€์›:
- score-timewise ํ˜•์‹
- grace note (skip)
- ์Šฌ๋Ÿฌ(articulation)
- ๋ฐ˜๋ณต๊ธฐํ˜ธ ํŽผ์น˜๊ธฐ (D.S. / D.C. / Coda / Segno)
"""
from __future__ import annotations
import xml.etree.ElementTree as ET
from dataclasses import replace as dc_replace
from typing import List
from .models import NoteEvent
class ParseError(Exception):
"""ํŒŒ์‹ฑ ์˜ค๋ฅ˜. ์–ด๋–ค ํŒŒ์ผ/๋‹จ๊ณ„์—์„œ ์‹คํŒจํ–ˆ๋Š”์ง€ ํฌํ•จ."""
pass
# ---------------------------------------------------------------------------
# ๊ณต๊ฐœ ์ธํ„ฐํŽ˜์ด์Šค
# ---------------------------------------------------------------------------
def parse_omr_result(omr_result: dict) -> tuple[List[NoteEvent], dict]:
"""
OMR ๊ฒฐ๊ณผ dict๋ฅผ (NoteEvent ๋ฆฌ์ŠคํŠธ, ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ dict)๋กœ ๋ณ€ํ™˜.
๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ํ‚ค:
- "tempo": int (BPM, 0์ด๋ฉด ๋ฏธ๋ฐœ๊ฒฌ)
Raises:
ParseError: ์•Œ ์ˆ˜ ์—†๋Š” ํ˜•์‹์ด๊ฑฐ๋‚˜ ํŒŒ์‹ฑ ์‹คํŒจ ์‹œ
"""
fmt = omr_result.get("format")
if fmt == "mock_events":
notes = _parse_mock_events(omr_result["data"])
return notes, {"tempo": 0}
elif fmt == "musicxml":
return _parse_musicxml(omr_result["data"])
else:
raise ParseError(f"์ง€์›ํ•˜์ง€ ์•Š๋Š” OMR ๊ฒฐ๊ณผ ํ˜•์‹: '{fmt}'")
def parse_musicxml_file(xml_path: str) -> tuple[List[NoteEvent], dict]:
"""
MusicXML ํŒŒ์ผ ๊ฒฝ๋กœ์—์„œ ์ง์ ‘ ํŒŒ์‹ฑ. ํ…Œ์ŠคํŠธ ๋ฐ ์ง์ ‘ ์‚ฌ์šฉ์— ์œ ์šฉ.
Returns:
(notes, metadata) โ€” parse_omr_result์™€ ๋™์ผํ•œ ํ˜•์‹
Raises:
ParseError: ํŒŒ์ผ ์ฝ๊ธฐ ์‹คํŒจ ๋˜๋Š” ํŒŒ์‹ฑ ์˜ค๋ฅ˜
"""
try:
with open(xml_path, "r", encoding="utf-8", errors="replace") as f:
xml_string = f.read()
except OSError as e:
raise ParseError(f"MusicXML ํŒŒ์ผ ์ฝ๊ธฐ ์‹คํŒจ ({xml_path}): {e}")
return _parse_musicxml(xml_string, source_hint=xml_path)
# ---------------------------------------------------------------------------
# ๋‚ด๋ถ€ ๊ตฌํ˜„
# ---------------------------------------------------------------------------
def _parse_mock_events(raw_notes: list) -> List[NoteEvent]:
events = []
for raw in raw_notes:
try:
event = NoteEvent(
pitch=int(raw["pitch"]),
start=float(raw["start"]),
duration=float(raw["duration"]),
staff=int(raw.get("staff", 1)),
voice=int(raw.get("voice", 1)),
part_hint=raw.get("part_hint"),
)
events.append(event)
except (KeyError, ValueError) as e:
raise ParseError(f"์Œํ‘œ ๋ฐ์ดํ„ฐ ํŒŒ์‹ฑ ์˜ค๋ฅ˜: {raw!r} โ€” {e}")
events.sort(key=lambda n: (n.start, n.staff, n.voice))
return events
# MIDI ์Œ๊ณ„ ๋ฐ˜์Œ ์ˆ˜: C=0, D=2, E=4, F=5, G=7, A=9, B=11
_STEP_SEMITONE = {"C": 0, "D": 2, "E": 4, "F": 5, "G": 7, "A": 9, "B": 11}
# ์กฐํ‘œ ์ƒคํ”„/ํ”Œ๋žซ ์ˆœ์„œ
_KEY_SHARPS = ["F", "C", "G", "D", "A", "E", "B"] # 1โ™ฏ=F#, 2โ™ฏ=F#C#, ...
_KEY_FLATS = ["B", "E", "A", "D", "G", "C", "F"] # 1โ™ญ=Bโ™ญ, 2โ™ญ=Bโ™ญEโ™ญ, ...
def _get_key_alters(fifths: int) -> dict[str, int]:
"""
์กฐํ‘œ fifths ๊ฐ’ โ†’ {์Œ์ด๋ฆ„: alter} ๋”•์…”๋„ˆ๋ฆฌ.
์˜ˆ: fifths=2 (D์žฅ์กฐ) โ†’ {"F": 1, "C": 1}
fifths=-1 (F์žฅ์กฐ) โ†’ {"B": -1}
"""
alters: dict[str, int] = {}
if fifths > 0:
for i in range(min(fifths, 7)):
alters[_KEY_SHARPS[i]] = 1
elif fifths < 0:
for i in range(min(-fifths, 7)):
alters[_KEY_FLATS[i]] = -1
return alters
def _parse_tempo(root: ET.Element) -> int:
"""
๋ฃจํŠธ ์š”์†Œ์—์„œ ์ฒซ ๋ฒˆ์งธ <sound tempo="N"/> ๊ฐ’์„ ๋ฐ˜ํ™˜.
์—†์œผ๋ฉด 0 ๋ฐ˜ํ™˜.
"""
for elem in root.iter("sound"):
tempo_str = elem.get("tempo")
if tempo_str:
try:
return int(float(tempo_str))
except (ValueError, TypeError):
pass
return 0
def _parse_musicxml(xml_string: str, source_hint: str = "") -> tuple[List[NoteEvent], dict]:
"""
MusicXML ๋ฌธ์ž์—ด์„ (NoteEvent ๋ฆฌ์ŠคํŠธ, ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ)๋กœ ๋ณ€ํ™˜.
Args:
xml_string: MusicXML XML ๋ฌธ์ž์—ด
source_hint: ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€์— ํฌํ•จํ•  ํŒŒ์ผ๋ช…/๊ฒฝ๋กœ (์„ ํƒ)
Returns:
(events, metadata) โ€” metadata์— "tempo" ํฌํ•จ
"""
src = f" ({source_hint})" if source_hint else ""
try:
root = ET.fromstring(xml_string)
except ET.ParseError as e:
raise ParseError(f"MusicXML XML ๊ตฌ๋ฌธ ์˜ค๋ฅ˜{src}: {e}")
# namespace ์ œ๊ฑฐ (xmlns๊ฐ€ ์žˆ์–ด๋„ ๋™์ผํ•˜๊ฒŒ ์ฒ˜๋ฆฌ)
for elem in root.iter():
if "}" in elem.tag:
elem.tag = elem.tag.split("}")[1]
root_tag = root.tag
if root_tag != "score-partwise":
raise ParseError(
f"์ง€์›ํ•˜์ง€ ์•Š๋Š” MusicXML ๋ฃจํŠธ ์š”์†Œ{src}: '{root_tag}'\n"
f" score-partwise ํ˜•์‹๋งŒ ์ง€์›ํ•ฉ๋‹ˆ๋‹ค. "
f"score-timewise๋Š” MuseScore/Audiveris์—์„œ ๋ณ€ํ™˜ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค."
)
# ํ…œํฌ ์ถ”์ถœ
tempo = _parse_tempo(root)
events: List[NoteEvent] = []
# ํƒ€์ด ์ถ”์ : key=(part_id, pitch, voice, staff) โ†’ events ๋ฆฌ์ŠคํŠธ ์ธ๋ฑ์Šค
tie_pending: dict[tuple, int] = {}
for part_idx, part_elem in enumerate(root.findall("part")):
part_id = part_elem.get("id", f"P{part_idx + 1}")
divisions = 1 # <divisions>: 4๋ถ„์Œํ‘œ๋‹น XML duration ๋‹จ์œ„
measure_start = 0.0 # ํ˜„์žฌ ๋งˆ๋””์˜ ์‹œ์ž‘ beat
key_alters: dict[str, int] = {} # ์กฐํ‘œ ๋ฐ˜์Œ ๋ณด์ • (์Œ์ด๋ฆ„ โ†’ alter)
for measure_elem in part_elem.findall("measure"):
current_beat = 0.0 # ๋งˆ๋”” ๋‚ด ํ˜„์žฌ ์œ„์น˜
prev_note_beat = 0.0 # ์ง์ „ ๋น„-chord ์Œํ‘œ์˜ ์‹œ์ž‘ ์œ„์น˜ (chord ์ฒ˜๋ฆฌ์šฉ)
max_beat = 0.0 # ๋งˆ๋”” ๋‚ด ๋„๋‹ฌํ•œ ์ตœ๋Œ€ ์œ„์น˜ (backup ํ›„์—๋„ ์œ ์ง€)
for child in measure_elem:
tag = child.tag
# attributes: divisions, key ์—…๋ฐ์ดํŠธ
if tag == "attributes":
div_elem = child.find("divisions")
if div_elem is not None and div_elem.text:
try:
divisions = int(div_elem.text)
except ValueError:
pass
key_elem = child.find("key")
if key_elem is not None:
fifths_elem = key_elem.find("fifths")
if fifths_elem is not None and fifths_elem.text:
try:
key_alters = _get_key_alters(int(fifths_elem.text))
except ValueError:
pass
elif tag == "note":
note_event = _parse_note(
child, divisions, part_idx,
measure_start, current_beat, prev_note_beat,
part_id, source_hint, key_alters,
)
if note_event is not None:
# ํƒ€์ด ์ฒ˜๋ฆฌ (์‰ผํ‘œ ์ œ์™ธ)
if note_event.pitch != 0:
tie_stop = any(
t.get("type") == "stop"
for t in child.findall("tie")
)
tie_start = any(
t.get("type") == "start"
for t in child.findall("tie")
)
tie_key = (
part_id,
note_event.pitch,
note_event.voice,
note_event.staff,
)
if tie_stop and tie_key in tie_pending:
# ์ด์ „ ํƒ€์ด ์Œํ‘œ์— duration ํ•ฉ์‚ฐ
idx = tie_pending.pop(tie_key)
old = events[idx]
events[idx] = dc_replace(
old, duration=old.duration + note_event.duration
)
if tie_start:
tie_pending[tie_key] = idx
# ์ƒˆ ์ด๋ฒคํŠธ๋Š” ์ถ”๊ฐ€ํ•˜์ง€ ์•Š์Œ
else:
events.append(note_event)
if tie_start:
tie_pending[tie_key] = len(events) - 1
else:
events.append(note_event)
# chord๊ฐ€ ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ์œ„์น˜ ์ „์ง„
is_chord = child.find("chord") is not None
dur_beats = _get_duration_beats(child, divisions)
if not is_chord:
prev_note_beat = current_beat
current_beat += dur_beats
max_beat = max(max_beat, current_beat)
elif tag == "backup":
dur_beats = _get_duration_beats(child, divisions)
current_beat = max(0.0, current_beat - dur_beats)
elif tag == "forward":
dur_beats = _get_duration_beats(child, divisions)
current_beat += dur_beats
max_beat = max(max_beat, current_beat)
# backup์ด ์žˆ์–ด๋„ ๋งˆ๋”” ๊ธธ์ด๋Š” ์ตœ๋Œ€ ๋„๋‹ฌ ์œ„์น˜ ๊ธฐ์ค€
measure_start += max_beat
if not events:
return [], {"tempo": tempo}
events.sort(key=lambda n: (n.start, n.staff, n.voice))
return events, {"tempo": tempo}
def _parse_note(
note_elem: ET.Element,
divisions: int,
part_idx: int,
measure_start: float,
current_beat: float,
prev_note_beat: float,
part_id: str,
source_hint: str,
key_alters: dict[str, int],
) -> NoteEvent | None:
"""
๋‹จ์ผ <note> ์š”์†Œ๋ฅผ NoteEvent๋กœ ๋ณ€ํ™˜.
grace note์ฒ˜๋Ÿผ duration์ด ์—†๋Š” ๊ฒฝ์šฐ๋Š” None ๋ฐ˜ํ™˜ (skip).
key_alters: ์กฐํ‘œ์—์„œ ํŒŒ์ƒ๋œ {์Œ์ด๋ฆ„: alter} ๋”•์…”๋„ˆ๋ฆฌ.
<alter> ํƒœ๊ทธ๊ฐ€ ์—†๋Š” ์Œํ‘œ์˜ ๋ฐ˜์Œ ๋ณด์ •์— ์‚ฌ์šฉ.
"""
is_chord = note_elem.find("chord") is not None
is_rest = note_elem.find("rest") is not None
dur_beats = _get_duration_beats(note_elem, divisions)
if dur_beats == 0.0:
# grace note ๋˜๋Š” duration 0 โ€” skip
return None
note_beat = prev_note_beat if is_chord else current_beat
abs_start = measure_start + note_beat
voice_elem = note_elem.find("voice")
voice = int(voice_elem.text) if voice_elem is not None and voice_elem.text else 1
staff_elem = note_elem.find("staff")
if staff_elem is not None and staff_elem.text:
staff_raw = int(staff_elem.text)
else:
# <staff> ์—†์œผ๋ฉด voice๋ฅผ staff ๋Œ€๋ฆฌ์ž๋กœ ์‚ฌ์šฉ (Audiveris ๋“ฑ)
# voice๋ณ„๋กœ ํŒŒํŠธ๊ฐ€ ๋ถ„๋ฆฌ๋˜๋„๋ก ํ•จ
staff_raw = voice
# part ๊ฐ„ staff ๋ฒˆํ˜ธ๊ฐ€ ๊ฒน์น˜์ง€ ์•Š๋„๋ก ์ „์—ญ ๊ณ ์œ ๊ฐ’์œผ๋กœ ๋ณ€ํ™˜
# ex) P1/staff1=1, P1/staff2=2, P2/staff1=11, P2/staff2=12
staff = part_idx * 10 + staff_raw
if is_rest:
return NoteEvent(
pitch=0,
start=abs_start,
duration=dur_beats,
staff=staff,
voice=voice,
)
pitch_elem = note_elem.find("pitch")
if pitch_elem is None:
return None
step_elem = pitch_elem.find("step")
octave_elem = pitch_elem.find("octave")
alter_elem = pitch_elem.find("alter")
step = step_elem.text.strip().upper() if step_elem is not None and step_elem.text else "C"
octave = int(octave_elem.text) if octave_elem is not None and octave_elem.text else 4
# <alter> ๋ช…์‹œ ์‹œ ์šฐ์„  ์‚ฌ์šฉ, ์—†์œผ๋ฉด ์กฐํ‘œ ๊ธฐ๋ณธ๊ฐ’ ์ ์šฉ
if alter_elem is not None and alter_elem.text:
alter = int(float(alter_elem.text))
else:
alter = key_alters.get(step, 0)
semitone = _STEP_SEMITONE.get(step, 0)
pitch = (octave + 1) * 12 + semitone + alter
pitch = max(0, min(127, pitch))
return NoteEvent(
pitch=pitch,
start=abs_start,
duration=dur_beats,
staff=staff,
voice=voice,
)
def _get_duration_beats(elem: ET.Element, divisions: int) -> float:
"""<duration> ์š”์†Œ๋ฅผ 4๋ถ„์Œํ‘œ ๊ธฐ์ค€ beats๋กœ ๋ณ€ํ™˜."""
dur_elem = elem.find("duration")
if dur_elem is None or not dur_elem.text:
return 0.0
try:
return int(dur_elem.text) / max(1, divisions)
except (ValueError, ZeroDivisionError):
return 0.0