Coconuttttt commited on
Commit
daa0bdd
ยท
1 Parent(s): b4d884a

Initial deployment: Score to MML converter

Browse files

- Gradio web UI with queue (max 10, concurrency 1)
- Audiveris OMR built from source (Java 25)
- PDF max 5 pages limit
- JVM heap capped at 1500m for HF Spaces 2GB RAM
- Preprocessing optional (default: none)
- Upscale limited to PIL 2x/3x (no waifu2x)

.dockerignore ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ .git/
4
+ .gradio/
5
+ models/
6
+ dataset/
7
+ backup/
8
+ dist/
9
+ build/
10
+ *.pt
11
+ *.png
12
+ *.jpg
13
+ *.jpeg
14
+ *.musicxml
15
+ *.mml
16
+ *.txt
17
+ *.zip
18
+ *.spec
19
+ *.stackdump
20
+ resume_state.json
21
+ app_local/
22
+ app_web/
23
+ scripts/
24
+ tests/
25
+ sample_data/
26
+ artifacts/
27
+ python/
28
+ run_*.py
29
+ compare_*.py
30
+ convert_*.py
31
+ debug_*.py
32
+ discord_*.py
33
+ test_*.py
Dockerfile ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ==============================================================================
2
+ # Stage 1: Build Audiveris from source (needs JDK 25 + Gradle)
3
+ # ==============================================================================
4
+ FROM eclipse-temurin:25-jdk AS audiveris-builder
5
+
6
+ RUN apt-get update && apt-get install -y --no-install-recommends git && \
7
+ rm -rf /var/lib/apt/lists/*
8
+
9
+ # Clone Audiveris source
10
+ RUN git clone --depth 1 https://github.com/Audiveris/audiveris.git /audiveris
11
+
12
+ WORKDIR /audiveris
13
+
14
+ # Build Audiveris (installDist creates bin/ + lib/ with all JARs)
15
+ RUN chmod +x gradlew && \
16
+ ./gradlew :app:installDist -x test -x javadoc --no-daemon
17
+
18
+ # ==============================================================================
19
+ # Stage 2: Runtime image (JRE 25 + Python 3.11 + Tesseract)
20
+ # ==============================================================================
21
+ FROM eclipse-temurin:25-jre
22
+
23
+ # Install Python, Tesseract, and system deps
24
+ RUN apt-get update && apt-get install -y --no-install-recommends \
25
+ python3 \
26
+ python3-pip \
27
+ python3-venv \
28
+ tesseract-ocr \
29
+ tesseract-ocr-eng \
30
+ libgl1 \
31
+ libglib2.0-0 \
32
+ && rm -rf /var/lib/apt/lists/*
33
+
34
+ # Copy built Audiveris
35
+ COPY --from=audiveris-builder /audiveris/app/build/install/app /opt/audiveris
36
+
37
+ # Make Audiveris script executable
38
+ RUN chmod +x /opt/audiveris/bin/Audiveris
39
+
40
+ # Set up Python virtual env (HF Spaces best practice)
41
+ RUN python3 -m venv /opt/venv
42
+ ENV PATH="/opt/venv/bin:$PATH"
43
+
44
+ # Install Python dependencies
45
+ COPY requirements-server.txt /tmp/requirements.txt
46
+ RUN pip install --no-cache-dir -r /tmp/requirements.txt
47
+
48
+ # Copy application code
49
+ WORKDIR /app
50
+ COPY core/ ./core/
51
+ COPY app_gradio.py .
52
+ COPY convert_3part.py .
53
+
54
+ # Environment variables
55
+ ENV AUDIVERIS_BIN=/opt/audiveris/bin/Audiveris
56
+ ENV AUDIVERIS_MAX_HEAP=1500m
57
+ ENV JAVA_TOOL_OPTIONS="-Djava.awt.headless=true"
58
+ ENV MAX_PDF_PAGES=5
59
+ ENV GRADIO_SERVER_NAME=0.0.0.0
60
+ ENV GRADIO_SERVER_PORT=7860
61
+
62
+ EXPOSE 7860
63
+
64
+ CMD ["python", "app_gradio.py", "--port", "7860"]
README.md CHANGED
@@ -1,11 +1,23 @@
1
  ---
2
- title: Score To MML
3
- emoji: ๐Ÿ”ฅ
4
- colorFrom: indigo
5
  colorTo: purple
6
  sdk: docker
7
- pinned: false
8
- short_description: Convert Image of Score to MML
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Score to MML Converter
3
+ emoji: ๐ŸŽต
4
+ colorFrom: blue
5
  colorTo: purple
6
  sdk: docker
7
+ app_port: 7860
 
8
  ---
9
 
10
+ # ์•…๋ณด โ†’ MML ๋ณ€ํ™˜๊ธฐ
11
+
12
+ PDF ๋˜๋Š” ์ด๋ฏธ์ง€ ์•…๋ณด๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด ๋งˆ๋น„๋…ธ๊ธฐ MML๋กœ ๋ณ€ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
13
+
14
+ ## ๊ธฐ๋Šฅ
15
+ - PDF / PNG / JPG ์•…๋ณด ์ž…๋ ฅ
16
+ - Audiveris OMR ์—”์ง„์œผ๋กœ ์Œํ‘œ ์ธ์‹
17
+ - MusicXML โ†’ MML ์ž๋™ ๋ณ€ํ™˜
18
+ - MML / MXL / XML ๋‹ค์šด๋กœ๋“œ
19
+
20
+ ## ์ œํ•œ์‚ฌํ•ญ
21
+ - PDF ์ตœ๋Œ€ 5ํŽ˜์ด์ง€
22
+ - ๋™์‹œ ์ฒ˜๋ฆฌ: 1๊ฑด (ํ ๋Œ€๊ธฐ ์ตœ๋Œ€ 10๋ช…)
23
+ - ๋ณต์žกํ•œ ์•…๋ณด๋Š” ์ธ์‹ ์ •ํ™•๋„๊ฐ€ ๋–จ์–ด์งˆ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค
app_gradio.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ app_gradio.py
3
+
4
+ Gradio ์›น UI โ€” ์•…๋ณด(PDF/PNG/JPG) ์—…๋กœ๋“œ โ†’ MML ๋ณ€ํ™˜ ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜.
5
+
6
+ ์‹คํ–‰:
7
+ python app_gradio.py
8
+
9
+ ์™ธ๋ถ€ ์ ‘์†:
10
+ python app_gradio.py --share
11
+
12
+ Hugging Face Spaces ๋ฐฐํฌ:
13
+ Dockerfile์—์„œ ENTRYPOINT๋กœ ์‹คํ–‰๋จ
14
+ """
15
+
16
+ import argparse
17
+ import datetime
18
+ import os
19
+ import tempfile
20
+ from pathlib import Path
21
+
22
+ import gradio as gr
23
+
24
+ from core.convert_pipeline import run_score_pipeline
25
+
26
+
27
+ SUPPORTED_EXTENSIONS = [".pdf", ".png", ".jpg", ".jpeg"]
28
+ MAX_PDF_PAGES = int(os.environ.get("MAX_PDF_PAGES", "5"))
29
+
30
+ _PREPROCESS_MAP = {"์—†์Œ": "none", "Otsu": "otsu", "Adaptive": "adaptive", "๋Œ€๋น„๊ฐ•ํ™”": "contrast"}
31
+ _UPSCALE_MAP = {
32
+ "์—†์Œ": "none",
33
+ "PIL 2ร—": "pil_2", "PIL 3ร—": "pil_3",
34
+ }
35
+
36
+
37
+ def _check_pdf_pages(file_path: str) -> str | None:
38
+ """PDF ํŽ˜์ด์ง€ ์ˆ˜ ์ œํ•œ ๊ฒ€์‚ฌ. ์ดˆ๊ณผ ์‹œ ์—๋Ÿฌ ๋ฉ”์‹œ์ง€ ๋ฐ˜ํ™˜."""
39
+ if Path(file_path).suffix.lower() != ".pdf":
40
+ return None
41
+ try:
42
+ import fitz
43
+ doc = fitz.open(file_path)
44
+ count = doc.page_count
45
+ doc.close()
46
+ if count > MAX_PDF_PAGES:
47
+ return f"PDF๊ฐ€ {count}ํŽ˜์ด์ง€์ž…๋‹ˆ๋‹ค. ์ตœ๋Œ€ {MAX_PDF_PAGES}ํŽ˜์ด์ง€๊นŒ์ง€ ์ง€์›ํ•ฉ๋‹ˆ๋‹ค."
48
+ except Exception:
49
+ pass
50
+ return None
51
+
52
+
53
+ def convert(file_path: str, preprocess: str, dpi: int, upscale: str, progress=gr.Progress()) -> tuple[str, str | None, list, list, str]:
54
+ if file_path is None:
55
+ return "", None, [], [], "ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•ด์ฃผ์„ธ์š”."
56
+
57
+ ext = Path(file_path).suffix.lower()
58
+ if ext not in SUPPORTED_EXTENSIONS:
59
+ return "", None, [], [], f"์ง€์›ํ•˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹์ž…๋‹ˆ๋‹ค: {ext}\n์ง€์› ํ˜•์‹: PDF, PNG, JPG"
60
+
61
+ page_err = _check_pdf_pages(file_path)
62
+ if page_err:
63
+ return "", None, [], [], page_err
64
+
65
+ try:
66
+ combined, mxl_paths, xml_paths, warnings = run_score_pipeline(
67
+ input_path=file_path,
68
+ preprocess_mode=_PREPROCESS_MAP.get(preprocess, "none"),
69
+ dpi=dpi,
70
+ upscale_mode=_UPSCALE_MAP.get(upscale, "none"),
71
+ on_progress=lambda frac, desc: progress(frac, desc=desc),
72
+ correct_xml=False,
73
+ )
74
+ except Exception as e:
75
+ return "", None, [], [], f"๋ณ€ํ™˜ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ:\n{e}"
76
+
77
+ # MML ํŒŒ์ผ ์ƒ์„ฑ: ์ž…๋ ฅํŒŒ์ผ๋ช…_mml_YYYYMMDD_HHMMSS.mml
78
+ stem = Path(file_path).stem
79
+ ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
80
+ mml_filename = f"{stem}_mml_{ts}.mml"
81
+ mml_path = Path(tempfile.gettempdir()) / mml_filename
82
+ mml_path.write_text(combined, encoding="utf-8")
83
+
84
+ warnings_text = "\n".join(f"[WARN] {w}" for w in warnings) if warnings else "๊ฒฝ๊ณ  ์—†์Œ"
85
+ return combined, str(mml_path), mxl_paths, xml_paths, warnings_text
86
+
87
+
88
+ def build_ui() -> gr.Blocks:
89
+ with gr.Blocks(title="์•…๋ณด โ†’ MML ๋ณ€ํ™˜๊ธฐ") as demo:
90
+ gr.Markdown("# ์•…๋ณด โ†’ MML ๋ณ€ํ™˜๊ธฐ")
91
+ gr.Markdown("PDF ๋˜๋Š” ์ด๋ฏธ์ง€ ์•…๋ณด๋ฅผ ์—…๋กœ๋“œํ•˜๋ฉด ๋งˆ๋น„๋…ธ๊ธฐ MML๋กœ ๋ณ€ํ™˜ํ•ด๋“œ๋ฆฝ๋‹ˆ๋‹ค.")
92
+
93
+ with gr.Row():
94
+ with gr.Column():
95
+ file_input = gr.File(
96
+ label="์•…๋ณด ํŒŒ์ผ (PDF, PNG, JPG)",
97
+ file_types=[".pdf", ".png", ".jpg", ".jpeg"],
98
+ )
99
+ preprocess_radio = gr.Radio(
100
+ choices=["์—†์Œ", "Otsu", "Adaptive", "๋Œ€๋น„๊ฐ•ํ™”"],
101
+ value="์—†์Œ",
102
+ label="์ „์ฒ˜๋ฆฌ (์—†์Œ=์›๋ณธ, Otsu=๊นจ๋—ํ•œ์Šค์บ”, Adaptive=์กฐ๋ช…๋ถˆ๊ท ์ผ, ๋Œ€๋น„๊ฐ•ํ™”=ํ๋ฆฐ์Šค์บ”)",
103
+ )
104
+ dpi = gr.Radio(
105
+ choices=[150, 300, 450, 600],
106
+ value=300,
107
+ label="DPI (PDF๋งŒ ํ•ด๋‹น, 300 ๊ถŒ์žฅ)",
108
+ )
109
+ upscale_radio = gr.Radio(
110
+ choices=["์—†์Œ", "PIL 2ร—", "PIL 3ร—"],
111
+ value="์—†์Œ",
112
+ label="์—…์Šค์ผ€์ผ (PIL ๋ฆฌ์‚ฌ์ด์ฆˆ โ€” ์ €ํ•ด์ƒ๋„ ์Šค์บ”์— ํšจ๊ณผ์ )",
113
+ )
114
+ convert_btn = gr.Button("๋ณ€ํ™˜ ์‹œ์ž‘", variant="primary")
115
+
116
+ with gr.Column():
117
+ mml_output = gr.Textbox(
118
+ label="MML ๊ฒฐ๊ณผ (์œ„: ์ „์ฒดํ™”์Œ NํŒŒํŠธ / ๊ตฌ๋ถ„์„  / ์•„๋ž˜: 3ํŒŒํŠธ)",
119
+ lines=25,
120
+ show_copy_button=True,
121
+ placeholder="๋ณ€ํ™˜ ๊ฒฐ๊ณผ๊ฐ€ ์—ฌ๊ธฐ์— ํ‘œ์‹œ๋ฉ๋‹ˆ๋‹ค.",
122
+ )
123
+ mml_file_output = gr.File(
124
+ label="MML ๋‹ค์šด๋กœ๋“œ",
125
+ file_count="single",
126
+ )
127
+ mxl_output = gr.File(
128
+ label="MXL ๋‹ค์šด๋กœ๋“œ (์••์ถ• MusicXML)",
129
+ file_count="multiple",
130
+ )
131
+ xml_output = gr.File(
132
+ label="XML ๋‹ค์šด๋กœ๋“œ (Soundslice ๋“ฑ ํ˜ธํ™˜)",
133
+ file_count="multiple",
134
+ )
135
+ warnings_output = gr.Textbox(
136
+ label="๊ฒฝ๊ณ ",
137
+ lines=4,
138
+ placeholder="๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€",
139
+ )
140
+
141
+ convert_btn.click(
142
+ fn=convert,
143
+ inputs=[file_input, preprocess_radio, dpi, upscale_radio],
144
+ outputs=[mml_output, mml_file_output, mxl_output, xml_output, warnings_output],
145
+ )
146
+
147
+ return demo
148
+
149
+
150
+ if __name__ == "__main__":
151
+ parser = argparse.ArgumentParser()
152
+ parser.add_argument("--share", action="store_true", help="์™ธ๋ถ€ ๊ณต์œ  ๋งํฌ ์ƒ์„ฑ")
153
+ parser.add_argument("--port", type=int, default=7860, help="ํฌํŠธ ๋ฒˆํ˜ธ (๊ธฐ๋ณธ: 7860)")
154
+ args = parser.parse_args()
155
+
156
+ demo = build_ui()
157
+ demo.queue(max_size=10, default_concurrency_limit=1)
158
+ demo.launch(
159
+ share=args.share,
160
+ server_name="0.0.0.0",
161
+ server_port=args.port,
162
+ )
convert_3part.py ADDED
@@ -0,0 +1,858 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ convert_3part.py
4
+ ๋งˆ๋น„๋…ธ๊ธฐ 1์ธ ์•…๋ณด์šฉ MusicXML โ†’ 3ํŒŒํŠธ MML ๋ณ€ํ™˜๊ธฐ
5
+
6
+ ์ ˆ๋Œ€ ๊ทœ์น™:
7
+ 1. ์ ˆ๋Œ€ ์‹œ๊ฐ„์ถ• ๊ธฐ์ค€
8
+ 2. <duration>/<divisions> ๋กœ ์‹ค์ œ ๊ธธ์ด ๊ณ„์‚ฐ
9
+ 3. <chord> note = ์ง์ „ non-chord note์™€ ๊ฐ™์€ ์‹œ์ž‘์ 
10
+ 4. <backup>/<forward> ๋ฐ˜์˜
11
+ 5. tie(start/stop) ๋™์ผ pitch ๋ณ‘ํ•ฉ
12
+ 6. <harmony> ๋ฌด์‹œ
13
+ 7. tempo: <sound tempo> ์šฐ์„ , ์—†์œผ๋ฉด metronome, ๊ธฐ๋ณธ๊ฐ’ 120
14
+ 8. ๋™์‹œ 3์Œ ์ดํ•˜ ๊ทธ๋Œ€๋กœ
15
+ 9. ๋™์‹œ 4์Œ ์ด์ƒ โ†’ ์ตœ์ƒ์„ฑ+์ตœํ•˜์„ฑ+ํ™”์„ฑ ๋Œ€ํ‘œ 1์Œ์œผ๋กœ ์ถ•์•ฝ
16
+ 10. ์ถœ๋ ฅ Part 1=Melody, Part 2=Chord1/Sub, Part 3=Chord2/Bass ๊ณ ์ •
17
+ 11. ๋‚ด๋ถ€ ๊ณ„์‚ฐ์€ Fraction ๊ณ ์ •๋ฐ€, ์ตœ์ข… ์ถœ๋ ฅ ์ง์ „์—๋งŒ MML ํ† ํฐ์œผ๋กœ ๋ณ€ํ™˜
18
+ 12. pitch/start/duration/tempo ๋ณด์กด ์ตœ์šฐ์„ ; slur/layout/harmony text ๋ฌด์‹œ
19
+
20
+ Usage:
21
+ python convert_3part.py file1.mxl [file2.mxl ...] -o output.txt [--append]
22
+ """
23
+ from __future__ import annotations
24
+
25
+ import argparse
26
+ import sys
27
+ import zipfile
28
+ import xml.etree.ElementTree as ET
29
+ from fractions import Fraction
30
+ from functools import lru_cache
31
+ from typing import List, Tuple, Optional, Dict
32
+
33
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
34
+ # ์ƒ์ˆ˜
35
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
36
+ QUARTER_TICKS = 12 # quarter note = 12 ticks
37
+ WHOLE_TICKS = 48 # whole note = 48 ticks
38
+
39
+ STEP_TO_SEMI: Dict[str, int] = {
40
+ 'C': 0, 'D': 2, 'E': 4, 'F': 5, 'G': 7, 'A': 9, 'B': 11
41
+ }
42
+ SEMI_TO_NOTE: Dict[int, str] = {
43
+ 0: 'c', 1: 'c+', 2: 'd', 3: 'd+', 4: 'e',
44
+ 5: 'f', 6: 'f+', 7: 'g', 8: 'g+', 9: 'a', 10: 'a+', 11: 'b'
45
+ }
46
+
47
+ # (tick, MML token) โ€” ๋‚ด๋ฆผ์ฐจ์ˆœ ์ •๋ ฌ (DP greedy์šฉ)
48
+ MML_DUR_TABLE: List[Tuple[int, str]] = sorted([
49
+ (48, '1'), (36, '2.'), (24, '2'), (18, '4.'), (16, '3'),
50
+ (12, '4'), (9, '8.'), (8, '6'), (6, '8'), (4, '12'),
51
+ (3, '16'), (2, '24'), (1, '48'),
52
+ ], reverse=True)
53
+
54
+ MML_TICK_TO_TOKEN: Dict[int, str] = dict(MML_DUR_TABLE)
55
+
56
+
57
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
58
+ # MXL / XML ์—ด๊ธฐ
59
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
60
+ def open_mxl_or_xml(path: str) -> str:
61
+ """MXL(zip) ๋˜๋Š” ์ผ๋ฐ˜ XML/MusicXML ํŒŒ์ผ์—์„œ MusicXML ๋ฌธ์ž์—ด ๋ฐ˜ํ™˜"""
62
+ if path.lower().endswith('.mxl'):
63
+ with zipfile.ZipFile(path) as zf:
64
+ # META-INF/container.xml ์—์„œ rootfile ๊ฒฝ๋กœ ์ฐพ๊ธฐ
65
+ try:
66
+ container = zf.read('META-INF/container.xml')
67
+ croot = ET.fromstring(container)
68
+ for elem in croot.iter():
69
+ if '}' in elem.tag:
70
+ elem.tag = elem.tag.split('}')[1]
71
+ rf = croot.find('.//rootfile')
72
+ xml_name = rf.attrib['full-path'] if rf is not None else ''
73
+ except Exception:
74
+ xml_name = ''
75
+ if not xml_name:
76
+ # fallback: ์ฒซ ๋ฒˆ์งธ .xml ๋˜๋Š” .musicxml ํŒŒ์ผ
77
+ xml_name = next(
78
+ (n for n in zf.namelist()
79
+ if n.endswith('.xml') or n.endswith('.musicxml')),
80
+ ''
81
+ )
82
+ if not xml_name:
83
+ raise FileNotFoundError(f'MXL ์•ˆ์—์„œ MusicXML์„ ์ฐพ์ง€ ๋ชปํ•จ: {path}')
84
+ return zf.read(xml_name).decode('utf-8', errors='replace')
85
+ else:
86
+ with open(path, encoding='utf-8', errors='replace') as f:
87
+ return f.read()
88
+
89
+
90
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
91
+ # MusicXML ํŒŒ์‹ฑ
92
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
93
+ def strip_ns(root: ET.Element) -> None:
94
+ """namespace ์ œ๊ฑฐ"""
95
+ for elem in root.iter():
96
+ if '}' in elem.tag:
97
+ elem.tag = elem.tag.split('}')[1]
98
+
99
+
100
+ def parse_tempo(root: ET.Element) -> int:
101
+ """๊ทœ์น™ 7: <sound tempo> ์šฐ์„ , ์—†์œผ๋ฉด metronome, ๊ธฐ๋ณธ 120"""
102
+ for sound in root.iter('sound'):
103
+ t = sound.attrib.get('tempo')
104
+ if t:
105
+ try:
106
+ return int(float(t))
107
+ except ValueError:
108
+ pass
109
+ for metro in root.iter('metronome'):
110
+ bpm = metro.findtext('per-minute')
111
+ if bpm:
112
+ try:
113
+ return int(float(bpm))
114
+ except ValueError:
115
+ pass
116
+ return 120
117
+
118
+
119
+ def frac_tick(raw_dur: int, divisions: int) -> Fraction:
120
+ """XML raw duration โ†’ Fraction ticks (๊ทœ์น™ 2)"""
121
+ if divisions <= 0:
122
+ divisions = 1
123
+ return Fraction(raw_dur * QUARTER_TICKS, divisions)
124
+
125
+
126
+ # <type> ์š”์†Œ ๊ธฐ๋ฐ˜ duration ํด๋ฐฑ ํ…Œ์ด๋ธ” (divisions ์˜ค๋ฅ˜ ์‹œ ์‚ฌ์šฉ)
127
+ _TYPE_TICKS: dict = {
128
+ 'breve': Fraction(96), 'whole': Fraction(48), 'half': Fraction(24),
129
+ 'quarter': Fraction(12), 'eighth': Fraction(6), '16th': Fraction(3),
130
+ '32nd': Fraction(3, 2), '64th': Fraction(3, 4),
131
+ }
132
+
133
+
134
+ def _dur_from_type(note_elem: ET.Element) -> Optional[Fraction]:
135
+ """<type> + <dot> ์š”์†Œ๋กœ duration(ticks) ๊ณ„์‚ฐ. ์—†์œผ๋ฉด None."""
136
+ t = note_elem.findtext('type')
137
+ if t not in _TYPE_TICKS:
138
+ return None
139
+ ticks = _TYPE_TICKS[t]
140
+ for _ in note_elem.findall('dot'):
141
+ ticks = ticks * Fraction(3, 2)
142
+ return ticks
143
+
144
+
145
+ def parse_part(part_elem: ET.Element) -> Tuple[List[dict], Fraction]:
146
+ """
147
+ ๋‹จ์ผ <part>๋ฅผ ์ ˆ๋Œ€ ์‹œ๊ฐ„์ถ•์œผ๋กœ ํŒŒ์‹ฑ (๊ทœ์น™ 1~6).
148
+ Returns: (events, total_ticks)
149
+ event = {'start': Fraction, 'dur': Fraction, 'midi': int, 'tie': frozenset}
150
+ """
151
+ events: List[dict] = []
152
+ measure_start = Fraction(0)
153
+ divisions = 1
154
+ divisions_valid = False # XML์—์„œ ์œ ํšจํ•œ divisions ๊ฐ’(>0)์„ ์ฝ์—ˆ๋Š”์ง€ ์—ฌ๋ถ€
155
+
156
+ for m in part_elem.findall('measure'):
157
+ # attributes ์šฐ์„  ํ™•์ธ
158
+ attr = m.find('attributes')
159
+ if attr is not None:
160
+ d = attr.findtext('divisions')
161
+ if d:
162
+ divisions = int(d)
163
+ if divisions > 0:
164
+ divisions_valid = True
165
+
166
+ # divisions=0 ์ธ implicit ๋งˆ๋””๋Š” Audiveris ์•„ํ‹ฐํŒฉํŠธ โ€” ๊ฑด๋„ˆ๋œ€
167
+ if not divisions_valid and m.get('implicit') == 'yes':
168
+ continue
169
+
170
+ def get_dur(raw_dur: int, note_elem: Optional[ET.Element] = None) -> Fraction:
171
+ """divisions ์œ ํšจํ•˜๋ฉด ๊ณ„์‚ฐ๊ฐ’ ์‚ฌ์šฉ, ์•„๋‹ˆ๋ฉด <type> ๊ธฐ๋ฐ˜ ํด๋ฐฑ."""
172
+ if divisions_valid:
173
+ return frac_tick(raw_dur, divisions)
174
+ if note_elem is not None:
175
+ t = _dur_from_type(note_elem)
176
+ if t is not None:
177
+ return t
178
+ return frac_tick(raw_dur, max(divisions, 1))
179
+
180
+ cursor = Fraction(0) # ๋งˆ๋”” ๋‚ด raw cursor (frac ticks)
181
+ max_cursor = Fraction(0)
182
+ last_note_start: Optional[Fraction] = None
183
+
184
+ for child in m:
185
+ tag = child.tag
186
+ if tag == 'backup':
187
+ # ๊ทœ์น™ 4: backup
188
+ raw = int(child.findtext('duration', '0'))
189
+ cursor -= get_dur(raw)
190
+
191
+ elif tag == 'forward':
192
+ # ๊ทœ์น™ 4: forward
193
+ raw = int(child.findtext('duration', '0'))
194
+ cursor += get_dur(raw)
195
+ max_cursor = max(max_cursor, cursor)
196
+
197
+ elif tag == 'harmony':
198
+ # ๊ทœ์น™ 6: harmony ๋ฌด์‹œ
199
+ pass
200
+
201
+ elif tag == 'note':
202
+ raw_dur = int(child.findtext('duration', '0'))
203
+ dur_f = get_dur(raw_dur, child)
204
+
205
+ is_chord = child.find('chord') is not None
206
+ is_rest = child.find('rest') is not None
207
+ is_grace = child.find('grace') is not None
208
+
209
+ # ๊ทœ์น™: grace ๊ธฐ๋ณธ ์ œ๊ฑฐ
210
+ if is_grace:
211
+ if not is_chord:
212
+ # grace๋Š” duration=0 ์ด ๋งŽ์ง€๋งŒ ํ˜น์‹œ ์žˆ์œผ๋ฉด
213
+ cursor += dur_f
214
+ max_cursor = max(max_cursor, cursor)
215
+ continue
216
+
217
+ # ๊ทœ์น™ 3: chord note๋Š” ์ง์ „ non-chord note์™€ ๊ฐ™์€ ์‹œ์ž‘์ 
218
+ if is_chord:
219
+ note_start = last_note_start if last_note_start is not None else cursor
220
+ else:
221
+ note_start = cursor
222
+ last_note_start = cursor
223
+
224
+ tie_types = frozenset(t.attrib.get('type') for t in child.findall('tie'))
225
+
226
+ if not is_rest:
227
+ p = child.find('pitch')
228
+ if p is not None:
229
+ step = p.findtext('step', 'C')
230
+ octave = int(p.findtext('octave', '4'))
231
+ alter = int(float(p.findtext('alter', '0')))
232
+ midi = (octave + 1) * 12 + STEP_TO_SEMI.get(step, 0) + alter
233
+ abs_start = measure_start + note_start
234
+ staff = int(child.findtext('staff', '1'))
235
+ events.append({
236
+ 'start': abs_start,
237
+ 'dur': dur_f,
238
+ 'midi': midi,
239
+ 'tie': tie_types,
240
+ 'staff': staff,
241
+ })
242
+
243
+ # cursor ๊ฐฑ์‹  (๊ทœ์น™ 3: chord๋Š” cursor ์ด๋™ ์•ˆ ํ•จ)
244
+ if not is_chord:
245
+ cursor += dur_f
246
+ max_cursor = max(max_cursor, cursor)
247
+ else:
248
+ max_cursor = max(max_cursor, note_start + dur_f)
249
+
250
+ measure_start += max_cursor
251
+
252
+ return events, measure_start
253
+
254
+
255
+ def parse_xml_string(xml_str: str) -> Tuple[List[dict], Fraction, int]:
256
+ """MusicXML ๋ฌธ์ž์—ด โ†’ (๋ชจ๋“  ํŒŒํŠธ ์ด๋ฒคํŠธ ํ•ฉ์‚ฐ, ์ด ticks, tempo)"""
257
+ root = ET.fromstring(xml_str.encode('utf-8', errors='replace')
258
+ if isinstance(xml_str, str) else xml_str)
259
+ strip_ns(root)
260
+ tempo = parse_tempo(root)
261
+
262
+ all_events: List[dict] = []
263
+ total_dur = Fraction(0)
264
+
265
+ for part in root.findall('part'):
266
+ evs, part_dur = parse_part(part)
267
+ all_events.extend(evs)
268
+ total_dur = max(total_dur, part_dur)
269
+
270
+ return all_events, total_dur, tempo
271
+
272
+
273
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
274
+ # ํƒ€์ด ๋ณ‘ํ•ฉ (๊ทœ์น™ 5)
275
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
276
+ def merge_ties(events: List[dict]) -> List[dict]:
277
+ """๋™์ผ pitch์˜ tie startโ†’stop ์—ฐ๊ฒฐ์„ ์ง€์†์Œ์œผ๋กœ ๋ณ‘ํ•ฉ"""
278
+ out: List[dict] = []
279
+ # midi โ†’ index in out
280
+ ongoing: Dict[int, int] = {}
281
+
282
+ for e in sorted(events, key=lambda x: (x['start'], x['midi'])):
283
+ midi = e['midi']
284
+ tie = e['tie']
285
+
286
+ if 'stop' in tie and midi in ongoing:
287
+ prev = out[ongoing[midi]]
288
+ # ์—ฐ์†(์ง์ „ note์˜ ๋ == ํ˜„์žฌ ์‹œ์ž‘)์ด์–ด์•ผ ๋ณ‘ํ•ฉ
289
+ if prev['start'] + prev['dur'] == e['start']:
290
+ prev['dur'] += e['dur']
291
+ if 'start' not in tie:
292
+ del ongoing[midi]
293
+ continue # ํ˜„์žฌ ์ด๋ฒคํŠธ๋ฅผ ๋ณ„๋„ ์ถ”๊ฐ€ํ•˜์ง€ ์•Š์Œ
294
+
295
+ # ์ƒˆ ์ด๋ฒคํŠธ๋กœ ์ถ”๊ฐ€
296
+ new_ev = {k: v for k, v in e.items()}
297
+ out.append(new_ev)
298
+ if 'start' in tie:
299
+ ongoing[midi] = len(out) - 1
300
+
301
+ return out
302
+
303
+
304
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
305
+ # ๋™์‹œ์Œ ์ถ•์•ฝ (๊ทœ์น™ 8~9)
306
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
307
+ def _best_middle(pitches: List[int], top: int, bot: int) -> Optional[int]:
308
+ """
309
+ top/bot ์ œ์™ธ ํ›„๋ณด์—์„œ ํ™”์„ฑ ์ •์ฒด์„ฑ์„ ๊ฐ€์žฅ ์‚ด๋ฆฌ๋Š” 1์Œ ๋ฐ˜ํ™˜.
310
+ ์ œ๊ฑฐ ์šฐ์„ ์ˆœ์œ„: ์˜ฅํƒ€๋ธŒ ์ค‘๋ณต > ์™„์ „5๋„ ์ค‘๋ณต > ํ†ต๊ณผ์Œ
311
+ """
312
+ candidates = [p for p in pitches if p != top and p != bot]
313
+ if not candidates:
314
+ return None
315
+
316
+ top_cls = top % 12
317
+ bot_cls = bot % 12
318
+
319
+ # 1๋‹จ๊ณ„: top/bot๊ณผ ์˜ฅํƒ€๋ธŒ ์ค‘๋ณต ์ œ๊ฑฐ
320
+ f1 = [p for p in candidates if p % 12 not in (top_cls, bot_cls)]
321
+ if f1:
322
+ candidates = f1
323
+
324
+ # 2๋‹จ๊ณ„: top์˜ ์™„์ „5๋„(7๋ฐ˜์Œ ์•„๋ž˜) ์Œ ์ œ๊ฑฐ
325
+ p5_cls = (top_cls - 7) % 12
326
+ f2 = [p for p in candidates if p % 12 != p5_cls]
327
+ if f2:
328
+ candidates = f2
329
+
330
+ # 3๋‹จ๊ณ„: ํ™”์Œ ๋‚ด ์šฐ์„  ์Œ์ • (๋‹จ3/์žฅ3/๋‹จ7 ๋“ฑ) ๊ธฐ์ค€ ์„ ํƒ
331
+ PREFERRED_INTERVALS = {3: 0, 4: 1, 10: 2, 9: 3, 7: 4, 8: 5, 5: 6, 2: 7}
332
+
333
+ def harmony_score(p: int):
334
+ interval = (top - p) % 12
335
+ return (PREFERRED_INTERVALS.get(interval, 10), -p)
336
+
337
+ return min(candidates, key=harmony_score)
338
+
339
+
340
+ def reduce_simultaneous(events: List[dict]) -> List[dict]:
341
+ """
342
+ ๊ทœ์น™ 8~9: ๊ฐ™์€ start_tick์—์„œ 4์Œ ์ด์ƒ ๋™์‹œ ์‹œ์ž‘์ด๋ฉด 3์Œ์œผ๋กœ ์ถ•์•ฝ.
343
+ """
344
+ by_start: Dict[Fraction, List[dict]] = {}
345
+ for e in events:
346
+ by_start.setdefault(e['start'], []).append(e)
347
+
348
+ result: List[dict] = []
349
+ for start in sorted(by_start):
350
+ group = sorted(by_start[start], key=lambda x: -x['midi'])
351
+ if len(group) <= 3:
352
+ result.extend(group)
353
+ else:
354
+ top = group[0]
355
+ bot = group[-1]
356
+ pitches = [ev['midi'] for ev in group]
357
+ mid_pitch = _best_middle(pitches, top['midi'], bot['midi'])
358
+
359
+ kept = [top]
360
+ if mid_pitch is not None:
361
+ # group์—์„œ ํ•ด๋‹น pitch์˜ ์ด๋ฒคํŠธ ์ฐพ๊ธฐ (top/bot ์ œ์™ธ)
362
+ for ev in group[1:-1]:
363
+ if ev['midi'] == mid_pitch:
364
+ kept.append(ev)
365
+ break
366
+ kept.append(bot)
367
+ result.extend(kept)
368
+
369
+ return result
370
+
371
+
372
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
373
+ # 3ํŠธ๋ž™ ๋ฐฐ์ • (๊ทœ์น™ 10)
374
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
375
+ def assign_3_tracks(
376
+ events: List[dict],
377
+ total_dur: Fraction
378
+ ) -> Tuple[List[dict], List[dict], List[dict]]:
379
+ """
380
+ ์ด๋ฒคํŠธ๋ฅผ Melody(0) / Chord1(1) / Bass(2) 3ํŠธ๋ž™์œผ๋กœ ๋ฐฐ์ •.
381
+ ๋™์‹œ ์‹œ์ž‘: ๊ณ ์Œโ†’Melody, ์ค‘๊ฐ„โ†’Chord1, ์ €์Œโ†’Bass.
382
+ ๋‹จ๋…์Œ: ๋นˆ ํŠธ๋ž™์— Melody ์šฐ์„  ๋ฐฐ์ •.
383
+ """
384
+ tracks: List[List[dict]] = [[], [], []]
385
+ track_end = [Fraction(-1)] * 3 # ๊ฐ ํŠธ๋ž™์˜ ํ˜„์žฌ ์ ์œ  ๋ tick
386
+
387
+ by_start: Dict[Fraction, List[dict]] = {}
388
+ for e in events:
389
+ by_start.setdefault(e['start'], []).append(e)
390
+
391
+ for start in sorted(by_start):
392
+ group = sorted(by_start[start], key=lambda x: -x['midi'])
393
+ n = len(group)
394
+
395
+ if n >= 3:
396
+ # 3์Œ: ๊ณ โ†’0(Melody), ์ค‘โ†’1(Chord1), ์ €โ†’2(Bass)
397
+ for tidx, ev in [(0, group[0]), (1, group[1]), (2, group[-1])]:
398
+ tracks[tidx].append(ev)
399
+ track_end[tidx] = ev['start'] + ev['dur']
400
+
401
+ elif n == 2:
402
+ # 2์Œ: ๊ณ โ†’Melody, ์ €โ†’Bass ์‹œ๋„
403
+ pairs = [(0, group[0]), (2, group[1])]
404
+ for tidx, ev in pairs:
405
+ if track_end[tidx] <= ev['start']:
406
+ tracks[tidx].append(ev)
407
+ track_end[tidx] = ev['start'] + ev['dur']
408
+ elif track_end[1] <= ev['start']:
409
+ tracks[1].append(ev)
410
+ track_end[1] = ev['start'] + ev['dur']
411
+ # ๋ชจ๋‘ ๊ฒน์น˜๋ฉด skip (์†์‹ค ํ—ˆ์šฉ)
412
+
413
+ else: # n == 1
414
+ ev = group[0]
415
+ # ๋นˆ ํŠธ๋ž™์— Melody ์šฐ์„ 
416
+ assigned = False
417
+ for tidx in [0, 1, 2]:
418
+ if track_end[tidx] <= ev['start']:
419
+ tracks[tidx].append(ev)
420
+ track_end[tidx] = ev['start'] + ev['dur']
421
+ assigned = True
422
+ break
423
+ # ๋ชจ๋‘ busy๋ฉด pitch๊ฐ€ ๊ฐ€์žฅ ๊ฐ€๊นŒ์šด ํŠธ๋ž™์— ๊ฐ•์ œ ๋ฐฐ์ •
424
+ if not assigned:
425
+ closest = min(
426
+ range(3),
427
+ key=lambda i: abs(
428
+ (tracks[i][-1]['midi'] if tracks[i] else 60) - ev['midi']
429
+ )
430
+ )
431
+ tracks[closest].append(ev)
432
+ track_end[closest] = max(track_end[closest], ev['start'] + ev['dur'])
433
+
434
+ # ๊ฐ ํŠธ๋ž™ start ๊ธฐ์ค€ ์ •๋ ฌ
435
+ for i in range(3):
436
+ tracks[i].sort(key=lambda e: (e['start'], -e['midi']))
437
+
438
+ return tracks[0], tracks[1], tracks[2]
439
+
440
+
441
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
442
+ # ํ‹ฑ โ†’ MML ํ† ํฐ ๋ถ„ํ•ด (๊ทœ์น™ 11)
443
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
444
+ @lru_cache(maxsize=4096)
445
+ def _decompose_dp(rem: int) -> Optional[tuple]:
446
+ """DP๋กœ rem ํ‹ฑ์„ MML ํ† ํฐ ํ‹ฑ ๋ฆฌ์ŠคํŠธ๋กœ ๋ถ„ํ•ด (์ตœ์†Œ ํ† ํฐ ์ˆ˜)"""
447
+ if rem == 0:
448
+ return ()
449
+ best: Optional[tuple] = None
450
+ for tick, _ in MML_DUR_TABLE:
451
+ if tick <= rem:
452
+ tail = _decompose_dp(rem - tick)
453
+ if tail is not None:
454
+ cand = (tick,) + tail
455
+ if best is None or len(cand) < len(best):
456
+ best = cand
457
+ return best
458
+
459
+
460
+ def decompose_duration(ticks: Fraction) -> List[Tuple[int, str]]:
461
+ """Fraction ํ‹ฑ โ†’ [(tick, MML_token), ...] (๊ทœ์น™ 11)"""
462
+ iticks = round(float(ticks))
463
+ iticks = max(0, iticks)
464
+ if iticks == 0:
465
+ return []
466
+ parts = _decompose_dp(iticks)
467
+ if parts is None:
468
+ raise ValueError(f'๋ถ„ํ•ด ๋ถˆ๊ฐ€: {ticks} โ†’ {iticks} ticks')
469
+ return [(t, MML_TICK_TO_TOKEN[t]) for t in parts]
470
+
471
+
472
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
473
+ # MML ๋นŒ๋“œ
474
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
475
+ def _midi_to_oct_note(midi: int) -> Tuple[int, str]:
476
+ return midi // 12 - 1, SEMI_TO_NOTE[midi % 12]
477
+
478
+
479
+ def build_track_mml(events: List[dict], total_dur: Fraction, optimize: bool = True) -> str:
480
+ """์ด๋ฒคํŠธ ๋ฆฌ์ŠคํŠธ + ์ด ๊ธธ์ด โ†’ MML ๋ฌธ์ž์—ด (MML@ ์™€ ; ์ œ์™ธ)
481
+
482
+ optimize=True:
483
+ - ยฑ1 ์˜ฅํƒ€๋ธŒ ์ด๋™: oN ๋Œ€์‹  >/< ์‚ฌ์šฉ
484
+ - ๊ฐ™์€ ์Œํ‘œ๊ธธ์ด 3๊ฐœ ์ด์ƒ ์—ฐ์†: lN ์„ค์ • ํ›„ suffix ์ƒ๋žต
485
+ """
486
+ # Phase 1: raw ํ† ํฐ ์ƒ์„ฑ
487
+ # ('oct', int) | ('note', str, str) | ('rest', str) | ('tie',)
488
+ raw: List[tuple] = []
489
+ cur_time = Fraction(0)
490
+ cur_oct: Optional[int] = None
491
+
492
+ for e in events:
493
+ if e['start'] > cur_time:
494
+ for _, tok in decompose_duration(e['start'] - cur_time):
495
+ raw.append(('rest', tok))
496
+ cur_time = e['start']
497
+ if e['start'] < cur_time:
498
+ continue
499
+
500
+ octave, note = _midi_to_oct_note(e['midi'])
501
+ if cur_oct != octave:
502
+ raw.append(('oct', octave))
503
+ cur_oct = octave
504
+
505
+ dur_parts = decompose_duration(e['dur'])
506
+ for i, (_, tok) in enumerate(dur_parts):
507
+ raw.append(('note', note, tok))
508
+ if i < len(dur_parts) - 1:
509
+ raw.append(('tie',))
510
+ cur_time = e['start'] + e['dur']
511
+
512
+ if cur_time < total_dur:
513
+ for _, tok in decompose_duration(total_dur - cur_time):
514
+ raw.append(('rest', tok))
515
+
516
+ # Phase 2: ์—ฐ์† ๊ฐ™์€ duration run ๊ธธ์ด ๊ณ„์‚ฐ (optimize ์‹œ์—๋งŒ ์‚ฌ์šฉ)
517
+ n = len(raw)
518
+ run_len = [1] * n
519
+ if optimize:
520
+ for i in range(n - 2, -1, -1):
521
+ a, b = raw[i], raw[i + 1]
522
+ if a[0] in ('note', 'rest') and b[0] in ('note', 'rest'):
523
+ da = a[2] if a[0] == 'note' else a[1]
524
+ db = b[2] if b[0] == 'note' else b[1]
525
+ if da == db:
526
+ run_len[i] = run_len[i + 1] + 1
527
+
528
+ # Phase 3: ์ถœ๋ ฅ ์ƒ์„ฑ
529
+ pieces: List[str] = []
530
+ cur_oct = None
531
+ cur_len: Optional[str] = None
532
+
533
+ for i, t in enumerate(raw):
534
+ if t[0] == 'oct':
535
+ new_oct = t[1]
536
+ if cur_oct is not None:
537
+ diff = new_oct - cur_oct
538
+ if optimize and diff == 1:
539
+ pieces.append('>')
540
+ elif optimize and diff == -1:
541
+ pieces.append('<')
542
+ else:
543
+ pieces.append(f'o{new_oct}')
544
+ else:
545
+ pieces.append(f'o{new_oct}')
546
+ cur_oct = new_oct
547
+ elif t[0] == 'tie':
548
+ pieces.append('&')
549
+ elif t[0] == 'rest':
550
+ suffix = t[1]
551
+ if optimize and run_len[i] >= 3 and suffix != cur_len:
552
+ pieces.append(f'l{suffix}')
553
+ cur_len = suffix
554
+ pieces.append('r' if suffix == cur_len else 'r' + suffix)
555
+ elif t[0] == 'note':
556
+ note, suffix = t[1], t[2]
557
+ if optimize and run_len[i] >= 3 and suffix != cur_len:
558
+ pieces.append(f'l{suffix}')
559
+ cur_len = suffix
560
+ pieces.append(note if suffix == cur_len else note + suffix)
561
+
562
+ return ''.join(pieces)
563
+
564
+
565
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
566
+ # ๋ณ€ํ™˜ ํŒŒ์ดํ”„๋ผ์ธ
567
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
568
+ def convert_files(file_paths: List[str], optimize: bool = True) -> Tuple[str, str, str, int]:
569
+ """
570
+ ์—ฌ๋Ÿฌ ํŒŒ์ผ(ํŽ˜์ด์ง€ ์ˆœ์„œ)์„ ์—ฐ๊ฒฐํ•ด 3ํŒŒํŠธ MML๋กœ ๋ณ€ํ™˜.
571
+ Returns: (melody_mml, chord1_mml, bass_mml, tempo)
572
+ """
573
+ all_events: List[dict] = []
574
+ total_dur = Fraction(0)
575
+ tempo = 120
576
+
577
+ for path in file_paths:
578
+ xml_str = open_mxl_or_xml(path)
579
+ evs, dur, bpm = parse_xml_string(xml_str)
580
+ # ํŽ˜์ด์ง€ ์˜คํ”„์…‹ ์ ์šฉ
581
+ for e in evs:
582
+ e['start'] += total_dur
583
+ all_events.extend(evs)
584
+ total_dur += dur
585
+ tempo = bpm
586
+
587
+ # ํŒŒ์ดํ”„๋ผ์ธ
588
+ all_events = merge_ties(all_events) # ๊ทœ์น™ 5
589
+ all_events = reduce_simultaneous(all_events) # ๊ทœ์น™ 8~9
590
+
591
+ mel, ch1, bas = assign_3_tracks(all_events, total_dur) # ๊ทœ์น™ 10
592
+
593
+ melody_mml = build_track_mml(mel, total_dur, optimize=optimize)
594
+ chord1_mml = build_track_mml(ch1, total_dur, optimize=optimize)
595
+ bass_mml = build_track_mml(bas, total_dur, optimize=optimize)
596
+
597
+ return melody_mml, chord1_mml, bass_mml, tempo
598
+
599
+
600
+ def format_output(melody: str, chord1: str, bass: str, tempo: int) -> str:
601
+ """
602
+ 3ํŒŒํŠธ MML ๊ฒฐ๊ณผ๋ฅผ ํ…์ŠคํŠธ ํ˜•์‹์œผ๋กœ ํฌ๋งคํŒ….
603
+ ๋งˆ๋น„๋…ธ๊ธฐ 1์ธ ์•…๋ณด์šฉ: MML@p1,p2,p3; ํ†ตํ•ฉ ํฌ๋งท์œผ๋กœ ์ถœ๋ ฅ.
604
+ """
605
+ lines = [
606
+ 'Part 1',
607
+ f'MML@{melody};',
608
+ '',
609
+ 'Part 2',
610
+ f'MML@{chord1};',
611
+ '',
612
+ 'Part 3',
613
+ f'MML@{bass};',
614
+ ]
615
+ return '\n'.join(lines)
616
+
617
+
618
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
619
+ # ๊ฐ€์•ผ๊ธˆ ๋ชจ๋“œ
620
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
621
+
622
+ def _clamp_midi_to_range(midi: int, lo: int, hi: int) -> int:
623
+ """MIDI ๋ฒˆํ˜ธ๋ฅผ lo~hi ๋ฒ”์œ„ ์•ˆ๏ฟฝ๏ฟฝ๏ฟฝ๋กœ 12๋ฐ˜์Œ ๋‹จ์œ„๋กœ ์กฐ์ •."""
624
+ while midi > hi:
625
+ midi -= 12
626
+ while midi < lo:
627
+ midi += 12
628
+ return midi
629
+
630
+
631
+ def convert_files_gayageum(file_paths: List[str], optimize: bool = True) -> Tuple[str, str, str, int]:
632
+ """
633
+ ๊ฐ€์•ผ๊ธˆ ๋ชจ๋“œ ๋ณ€ํ™˜.
634
+ Staff 1(์˜ค๋ฅธ์†) โ†’ ๋†ํ˜„(o5~o8, MIDI 72~107): MIDI +48 ํ›„ ํด๋žจํ”„
635
+ Staff 2(์™ผ์†) โ†’ ํ‰์Œ(o1~o4, MIDI 24~59): ๋ฒ”์œ„ ๋ฒ—์–ด๋‚˜๋ฉด ํด๋žจํ”„
636
+ ํ•ฉ์‚ฐ ํ›„ 3ํŒŒํŠธ๋กœ ์••์ถ•.
637
+ Returns: (melody_mml, chord1_mml, bass_mml, tempo)
638
+ """
639
+ all_events: List[dict] = []
640
+ total_dur = Fraction(0)
641
+ tempo = 120
642
+
643
+ for path in file_paths:
644
+ xml_str = open_mxl_or_xml(path)
645
+ evs, dur, bpm = parse_xml_string(xml_str)
646
+ for e in evs:
647
+ e['start'] += total_dur
648
+ all_events.extend(evs)
649
+ total_dur += dur
650
+ tempo = bpm
651
+
652
+ all_events = merge_ties(all_events)
653
+ staff_map = split_by_staff(all_events)
654
+
655
+ mapped: List[dict] = []
656
+
657
+ # Staff 1: ์˜ค๋ฅธ์† โ†’ ๋†ํ˜„ (o5~o8, MIDI 72~107)
658
+ for e in staff_map.get(1, []):
659
+ ne = dict(e)
660
+ ne['midi'] = _clamp_midi_to_range(e['midi'] + 48, 72, 107)
661
+ mapped.append(ne)
662
+
663
+ # Staff 2: ์™ผ์† โ†’ ํ‰์Œ (o1~o4, MIDI 24~59)
664
+ for e in staff_map.get(2, []):
665
+ ne = dict(e)
666
+ ne['midi'] = _clamp_midi_to_range(e['midi'], 24, 59)
667
+ mapped.append(ne)
668
+
669
+ mapped = reduce_simultaneous(mapped)
670
+ mel, ch1, bas = assign_3_tracks(mapped, total_dur)
671
+
672
+ melody_mml = build_track_mml(mel, total_dur, optimize=optimize)
673
+ chord1_mml = build_track_mml(ch1, total_dur, optimize=optimize)
674
+ bass_mml = build_track_mml(bas, total_dur, optimize=optimize)
675
+
676
+ return melody_mml, chord1_mml, bass_mml, tempo
677
+
678
+
679
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
680
+ # staff ๊ธฐ๋ฐ˜ ๋ถ„๋ฆฌ
681
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
682
+
683
+ def split_by_staff(events: List[dict]) -> Dict[int, List[dict]]:
684
+ """์ด๋ฒคํŠธ๋ฅผ staff ๋ฒˆํ˜ธ๋ณ„๋กœ ๋ถ„๋ฆฌ. {staff_num: [events]}"""
685
+ result: Dict[int, List[dict]] = {}
686
+ for e in events:
687
+ s = e.get('staff', 1)
688
+ result.setdefault(s, []).append(e)
689
+ return result
690
+
691
+
692
+ def convert_files_by_staff(file_paths: List[str], optimize: bool = True) -> Tuple[Dict[int, str], Fraction, int]:
693
+ """
694
+ Staff๋ณ„๋กœ MML์„ ๋ถ„๋ฆฌํ•ด์„œ ๋ฐ˜ํ™˜.
695
+ Returns: ({staff_num: mml_str}, total_dur, tempo)
696
+ """
697
+ all_events: List[dict] = []
698
+ total_dur = Fraction(0)
699
+ tempo = 120
700
+
701
+ for path in file_paths:
702
+ xml_str = open_mxl_or_xml(path)
703
+ evs, dur, bpm = parse_xml_string(xml_str)
704
+ for e in evs:
705
+ e['start'] += total_dur
706
+ all_events.extend(evs)
707
+ total_dur += dur
708
+ tempo = bpm
709
+
710
+ all_events = merge_ties(all_events)
711
+ staff_events = split_by_staff(all_events)
712
+
713
+ staff_mmls: Dict[int, str] = {}
714
+ for staff_num, evs in sorted(staff_events.items()):
715
+ # staff ๋‚ด ๋™์‹œ์Œ์„ ๋†’์€์Œ ์ˆœ์œผ๋กœ n๊ฐœ ํŠธ๋ž™์œผ๋กœ ๋ถ„๋ฆฌ
716
+ tracks = assign_n_tracks(evs, total_dur)
717
+ # ๊ฐ ํŠธ๋ž™์„ MML๋กœ ๋ณ€ํ™˜ ํ›„ ํ•ฉ์‚ฐ (staff ํ•˜๋‚˜ = ์—ฌ๋Ÿฌ ํŠธ๋ž™ ๊ฐ€๋Šฅ)
718
+ track_mmls = [build_track_mml(t, total_dur, optimize=optimize) for t in tracks]
719
+ staff_mmls[staff_num] = track_mmls # ํŠธ๋ž™ ๋ฆฌ์ŠคํŠธ๋กœ ์ €์žฅ
720
+
721
+ return staff_mmls, total_dur, tempo
722
+
723
+
724
+ def format_output_by_staff(staff_mmls: Dict[int, list], total_dur: Fraction, tempo: int) -> str:
725
+ """
726
+ Staff๋ณ„ MML์„ ํŒŒํŠธ ํ˜•ํƒœ๋กœ ์ถœ๋ ฅ.
727
+ Staff 1 = ์˜ค๋ฅธ์†(๋ฉœ๋กœ๋””), Staff 2 = ์™ผ์†(๋ฒ ์ด์Šค) ์ˆœ.
728
+ """
729
+ lines = []
730
+ part_num = 1
731
+ for staff_num, track_mmls in sorted(staff_mmls.items()):
732
+ hand = '์˜ค๋ฅธ์†' if staff_num == 1 else '์™ผ์†' if staff_num == 2 else f'Staff {staff_num}'
733
+ for i, mml in enumerate(track_mmls, 1):
734
+ lines.append(f'Part {part_num} [{hand} Track {i}]')
735
+ lines.append(f'MML@{mml};')
736
+ lines.append('')
737
+ part_num += 1
738
+ return '\n'.join(lines)
739
+
740
+
741
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
742
+ # --all-notes ๋ชจ๋“œ: ๋™์‹œ์Œ ์ „๋ถ€ ํฌํ•จ, ํŒŒํŠธ ์ˆ˜ = ์ตœ๋Œ€ ๋™์‹œ์Œ ์ˆ˜
743
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
744
+
745
+ def assign_n_tracks(events: List[dict], total_dur: Fraction) -> List[List[dict]]:
746
+ """
747
+ ๋™์‹œ์Œ์„ ๋†’์€์Œ ์ˆœ์œผ๋กœ N๊ฐœ ํŠธ๋ž™์— ๋ฐฐ์ •. ์Œ ํ•˜๋‚˜๋„ ๋ฒ„๋ฆฌ์ง€ ์•Š์Œ.
748
+ N = ์ „์ฒด ์•…๋ณด์—์„œ ๋™์‹œ์— ์šธ๋ฆฌ๋Š” ์Œ์˜ ์ตœ๋Œ€ ๊ฐœ์ˆ˜.
749
+ Returns: N๊ฐœ ํŠธ๋ž™ ๋ฆฌ์ŠคํŠธ (๊ฐ ํŠธ๋ž™์€ event dict ๋ฆฌ์ŠคํŠธ)
750
+ """
751
+ if not events:
752
+ return [[]]
753
+
754
+ # ๊ฐ ์‹œ์ž‘ ์‹œ์ ์˜ ๋™์‹œ์Œ ๊ฐœ์ˆ˜ ํŒŒ์•… โ†’ ์ตœ๋Œ€๊ฐ’ N
755
+ from collections import defaultdict
756
+ start_groups: dict = defaultdict(list)
757
+ for e in events:
758
+ start_groups[e['start']].append(e)
759
+
760
+ max_poly = max(len(g) for g in start_groups.values())
761
+ n = max(max_poly, 1)
762
+
763
+ tracks: List[List[dict]] = [[] for _ in range(n)]
764
+
765
+ for start, group in sorted(start_groups.items()):
766
+ # ๋†’์€์Œ ์ˆœ ์ •๋ ฌ
767
+ sorted_group = sorted(group, key=lambda e: e['midi'], reverse=True)
768
+ for i, ev in enumerate(sorted_group):
769
+ tracks[i % n].append(ev)
770
+
771
+ return tracks
772
+
773
+
774
+ def format_output_n(tracks: List[List[dict]], total_dur: Fraction, tempo: int, optimize: bool = True) -> str:
775
+ """
776
+ NํŒŒํŠธ MML ๊ฒฐ๊ณผ๋ฅผ ํ…์ŠคํŠธ ํ˜•์‹์œผ๋กœ ํฌ๋งคํŒ….
777
+ Part 1 ~ Part N ํ˜•์‹.
778
+ """
779
+ lines = []
780
+ for i, track in enumerate(tracks, 1):
781
+ mml = build_track_mml(track, total_dur, optimize=optimize)
782
+ lines.append(f'Part {i}')
783
+ lines.append(f'MML@{mml};')
784
+ lines.append('')
785
+ return '\n'.join(lines)
786
+
787
+
788
+ def convert_files_all_notes(file_paths: List[str]) -> Tuple[List[List[dict]], Fraction, int]:
789
+ """
790
+ ์—ฌ๋Ÿฌ ํŒŒ์ผ์„ ์—ฐ๊ฒฐํ•ด NํŒŒํŠธ(์ตœ๋Œ€ ๋™์‹œ์Œ ์ˆ˜)๋กœ ๋ณ€ํ™˜.
791
+ Returns: (tracks, total_dur, tempo)
792
+ """
793
+ all_events: List[dict] = []
794
+ total_dur = Fraction(0)
795
+ tempo = 120
796
+
797
+ for path in file_paths:
798
+ xml_str = open_mxl_or_xml(path)
799
+ evs, dur, bpm = parse_xml_string(xml_str)
800
+ for e in evs:
801
+ e['start'] += total_dur
802
+ all_events.extend(evs)
803
+ total_dur += dur
804
+ tempo = bpm
805
+
806
+ all_events = merge_ties(all_events)
807
+ tracks = assign_n_tracks(all_events, total_dur)
808
+
809
+ return tracks, total_dur, tempo
810
+
811
+
812
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
813
+ # CLI
814
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
815
+ def main():
816
+ parser = argparse.ArgumentParser(description='MusicXML โ†’ 3ํŒŒํŠธ MML ๋ณ€ํ™˜๊ธฐ')
817
+ parser.add_argument('files', nargs='+', help='์ž…๋ ฅ MXL/XML ํŒŒ์ผ (ํŽ˜์ด์ง€ ์ˆœ์„œ)')
818
+ parser.add_argument('-o', '--output', help='์ถœ๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ')
819
+ parser.add_argument('--append', action='store_true',
820
+ help='๊ธฐ์กด ํŒŒ์ผ ํ•˜๋‹จ์— ๊ตฌ๋ถ„์„ ๊ณผ ํ•จ๊ป˜ ์ถ”๊ฐ€')
821
+ parser.add_argument('--all-notes', action='store_true',
822
+ help='๋™์‹œ์Œ ์ „๋ถ€ ํฌํ•จ, ํŒŒํŠธ ์ˆ˜=์ตœ๋Œ€ ๋™์‹œ์Œ ์ˆ˜ ๋ชจ๋“œ')
823
+ parser.add_argument('--gayageum', action='store_true',
824
+ help='๊ฐ€์•ผ๊ธˆ ๋ชจ๋“œ: ์˜ค๋ฅธ์†โ†’๋†ํ˜„(o5~o8), ์™ผ์†โ†’ํ‰์Œ(o1~o4), 3ํŒŒํŠธ ์ถœ๋ ฅ')
825
+ args = parser.parse_args()
826
+
827
+ print(f'๋ณ€ํ™˜ ์ค‘: {args.files}', file=sys.stderr)
828
+
829
+ if args.gayageum:
830
+ melody, chord1, bass, tempo = convert_files_gayageum(args.files)
831
+ result_text = format_output(melody, chord1, bass, tempo)
832
+ print('๊ฐ€์•ผ๊ธˆ ๋ชจ๋“œ: ์˜ค๋ฅธ์†โ†’๋†ํ˜„(o5~o8), ์™ผ์†โ†’ํ‰์Œ(o1~o4)', file=sys.stderr)
833
+ elif args.all_notes:
834
+ tracks, total_dur, tempo = convert_files_all_notes(args.files)
835
+ result_text = format_output_n(tracks, total_dur, tempo)
836
+ print(f'ํŒŒํŠธ ์ˆ˜: {len(tracks)}', file=sys.stderr)
837
+ else:
838
+ melody, chord1, bass, tempo = convert_files(args.files)
839
+ result_text = format_output(melody, chord1, bass, tempo)
840
+
841
+ if args.output:
842
+ if args.append:
843
+ SEP = '\n' + '-' * 40 + '\n'
844
+ with open(args.output, 'a', encoding='utf-8') as f:
845
+ f.write(SEP)
846
+ f.write(result_text)
847
+ f.write('\n')
848
+ else:
849
+ with open(args.output, 'w', encoding='utf-8') as f:
850
+ f.write(result_text)
851
+ f.write('\n')
852
+ print(f'์ €์žฅ: {args.output}', file=sys.stderr)
853
+ else:
854
+ print(result_text)
855
+
856
+
857
+ if __name__ == '__main__':
858
+ main()
core/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # core package
core/convert_pipeline.py ADDED
@@ -0,0 +1,413 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/convert_pipeline.py
3
+
4
+ ์•…๋ณด(PDF/PNG/JPG) โ†’ MML ๋ณ€ํ™˜ ํŒŒ์ดํ”„๋ผ์ธ.
5
+
6
+ run_score_pipeline() ํ•˜๋‚˜๋ฅผ ํ˜ธ์ถœํ•˜๋ฉด:
7
+ 1. PDF โ†’ 300DPI PNG ๋ถ„๋ฆฌ (fitz)
8
+ 2. ์„ ํƒ์  ์ „์ฒ˜๋ฆฌ (Grayscale + Otsu ์ด์ง„ํ™”, cv2)
9
+ 3. Audiveris OMR โ†’ MXL ํŒŒ์ผ
10
+ 4. convert_3part.convert_files_all_notes() โ†’ ์ „์ฒด ํ™”์Œ(NํŒŒํŠธ)
11
+ 5. convert_3part.convert_files() โ†’ 3ํŒŒํŠธ
12
+ 6. ๊ฒฐ๊ณผ ํ…์ŠคํŠธ ๋ฐ˜ํ™˜ (NํŒŒํŠธ ์œ„ / ๊ตฌ๋ถ„์„  / 3ํŒŒํŠธ ์•„๋ž˜)
13
+
14
+ ํ™˜๊ฒฝ๋ณ€์ˆ˜:
15
+ AUDIVERIS_BIN=<๊ฒฝ๋กœ> Audiveris ์‹คํ–‰ ํŒŒ์ผ
16
+ AUDIVERIS_JAR=<๊ฒฝ๋กœ> Audiveris.jar (java -jar ๋ฐฉ์‹)
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import os
22
+ import shutil
23
+ import subprocess
24
+ import sys
25
+ import tempfile
26
+ from pathlib import Path
27
+ from typing import Optional
28
+
29
+ SEP = "\n" + "-" * 40 + "\n"
30
+
31
+ WAIFU2X_DEFAULT = r"C:\tools\waifu2x-ncnn-vulkan\waifu2x-ncnn-vulkan-20250915-windows\waifu2x-ncnn-vulkan.exe"
32
+
33
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
34
+ # Audiveris ๊ฒฝ๋กœ ํ™•์ธ
35
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
36
+
37
+ def _get_audiveris_cmd(image_path: str, output_dir: str) -> list[str]:
38
+ bin_path = os.environ.get("AUDIVERIS_BIN", "")
39
+ jar_path = os.environ.get("AUDIVERIS_JAR", "")
40
+ if bin_path:
41
+ return [bin_path, "-batch", "-export", "-output", output_dir, "--", image_path]
42
+ if jar_path:
43
+ max_heap = os.environ.get("AUDIVERIS_MAX_HEAP", "1500m")
44
+ return ["java", f"-Xmx{max_heap}", "-jar", jar_path, "-batch", "-export", "-output", output_dir, "--", image_path]
45
+ raise RuntimeError(
46
+ "Audiveris ํ™˜๊ฒฝ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.\n"
47
+ " set AUDIVERIS_BIN=C:\\path\\to\\Audiveris.exe"
48
+ )
49
+
50
+
51
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
52
+ # PDF โ†’ PNG
53
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
54
+
55
+ def _pdf_to_pngs(pdf_path: str, out_dir: str, dpi: int = 300) -> list[str]:
56
+ import fitz
57
+ doc = fitz.open(pdf_path)
58
+ mat = fitz.Matrix(dpi / 72, dpi / 72)
59
+ paths = []
60
+ for i, page in enumerate(doc):
61
+ pix = page.get_pixmap(matrix=mat)
62
+ p = Path(out_dir) / f"page_{i+1:02d}.png"
63
+ pix.save(str(p))
64
+ paths.append(str(p))
65
+ return paths
66
+
67
+
68
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
69
+ # PNG ๋ณต์‚ฌ + DPI ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ์„ค์ •
70
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
71
+
72
+ def _copy_with_scale(src: str, dst: str, scale: float = 1.0) -> None:
73
+ """์ด๋ฏธ์ง€๋ฅผ ๋ณต์‚ฌํ•˜๋ฉด์„œ scale ๋ฐฐ์œจ๋กœ ์—…์Šค์ผ€์ผ. scale=1์ด๋ฉด ๊ทธ๋Œ€๋กœ ๋ณต์‚ฌ."""
74
+ from PIL import Image
75
+ img = Image.open(src)
76
+ if scale != 1.0:
77
+ new_w = int(img.width * scale)
78
+ new_h = int(img.height * scale)
79
+ img = img.resize((new_w, new_h), Image.LANCZOS)
80
+ # RGBA/ํŒ”๋ ˆํŠธ ๋ชจ๋“œ๋Š” Audiveris๊ฐ€ ์ฒ˜๋ฆฌ ๋ชปํ•  ์ˆ˜ ์žˆ์Œ โ†’ RGB๋กœ ๋ณ€ํ™˜
81
+ if img.mode not in ("RGB", "L"):
82
+ img = img.convert("RGB")
83
+ img.save(dst)
84
+
85
+
86
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
87
+ # waifu2x-ncnn-vulkan ์—…์Šค์ผ€์ผ
88
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
89
+
90
+ def _upscale_waifu2x(src: str, dst: str, scale: int = 2, noise: int = 1) -> None:
91
+ """waifu2x-ncnn-vulkan์œผ๋กœ ์—…์Šค์ผ€์ผ + ๋…ธ์ด์ฆˆ ์ œ๊ฑฐ.
92
+ scale: 2/4/8/16/32 (waifu2x๋Š” 2์˜ ๊ฑฐ๋“ญ์ œ๊ณฑ๋งŒ ์ง€์›)
93
+ noise: -1(์—†์Œ) / 0~3
94
+ """
95
+ exe = os.environ.get("WAIFU2X_BIN", WAIFU2X_DEFAULT)
96
+ if not Path(exe).exists():
97
+ raise RuntimeError(
98
+ f"waifu2x-ncnn-vulkan ์‹คํ–‰ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {exe}\n"
99
+ "ํ™˜๊ฒฝ๋ณ€์ˆ˜ WAIFU2X_BIN ๋˜๋Š” ๊ธฐ๋ณธ ๊ฒฝ๋กœ๋ฅผ ํ™•์ธํ•˜์„ธ์š”."
100
+ )
101
+ cmd = [exe, "-i", src, "-o", dst, "-s", str(scale), "-n", str(noise), "-f", "png"]
102
+ proc = subprocess.run(cmd, capture_output=True, timeout=300)
103
+ if proc.returncode != 0 or not Path(dst).exists() or Path(dst).stat().st_size == 0:
104
+ stderr = proc.stderr.decode("utf-8", errors="ignore")[-300:]
105
+ raise RuntimeError(f"waifu2x ์‹คํŒจ (exit {proc.returncode}): {stderr}")
106
+
107
+
108
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
109
+ # ์ „์ฒ˜๋ฆฌ (Grayscale + Otsu ์ด์ง„ํ™”)
110
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
111
+
112
+ def _deskew(gray_img):
113
+ """OpenCV๋กœ ๊ธฐ์šธ๊ธฐ ๋ณด์ •. grayscale ndarray ์ž…๋ ฅ โ†’ ๋ณด์ •๋œ ndarray ๋ฐ˜ํ™˜."""
114
+ import cv2
115
+ import numpy as np
116
+ coords = np.column_stack(np.where(gray_img < 128))
117
+ if len(coords) < 10:
118
+ return gray_img
119
+ angle = cv2.minAreaRect(coords)[-1]
120
+ if angle < -45:
121
+ angle = 90 + angle
122
+ h, w = gray_img.shape
123
+ center = (w // 2, h // 2)
124
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
125
+ return cv2.warpAffine(gray_img, M, (w, h), flags=cv2.INTER_LINEAR,
126
+ borderMode=cv2.BORDER_REPLICATE)
127
+
128
+
129
+ def _preprocess(src: str, dst: str, mode: str = "otsu") -> None:
130
+ import cv2
131
+ import numpy as np
132
+
133
+ if mode == "otsu":
134
+ img = cv2.imread(src, cv2.IMREAD_COLOR)
135
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
136
+ gray = cv2.GaussianBlur(gray, (3, 3), 0)
137
+ gray = _deskew(gray)
138
+ _, binary = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
139
+ cv2.imwrite(dst, binary)
140
+
141
+ elif mode == "adaptive":
142
+ img = cv2.imread(src, cv2.IMREAD_COLOR)
143
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
144
+ gray = cv2.GaussianBlur(gray, (3, 3), 0)
145
+ gray = _deskew(gray)
146
+ binary = cv2.adaptiveThreshold(
147
+ gray, 255,
148
+ cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
149
+ cv2.THRESH_BINARY, 15, 10,
150
+ )
151
+ cv2.imwrite(dst, binary)
152
+
153
+ elif mode == "contrast":
154
+ from PIL import Image, ImageEnhance
155
+ img = Image.open(src)
156
+ img = ImageEnhance.Contrast(img).enhance(2.0)
157
+ img = ImageEnhance.Sharpness(img).enhance(1.5)
158
+ if img.mode not in ("RGB", "L"):
159
+ img = img.convert("RGB")
160
+ img.save(dst)
161
+
162
+
163
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
164
+ # PNG โ†’ MXL (Audiveris)
165
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
166
+
167
+ _AUDIVERIS_MAX_PX = 4800 # ์ด ๊ฐ’ ์ดˆ๊ณผ ์‹œ ๋น„์œจ ์œ ์ง€ํ•˜๋ฉฐ ์ถ•์†Œ
168
+
169
+
170
+ def _normalize_png(src: str, dst: str) -> None:
171
+ """PIL๋กœ RGB/L ์ •๊ทœํ™” + ์ตœ๋Œ€ ํ•ด์ƒ๋„ ์บก ์ ์šฉ ํ›„ ์ €์žฅ โ€” Audiveris ํ˜ธํ™˜ ๋ณด์žฅ."""
172
+ from PIL import Image
173
+ Image.MAX_IMAGE_PIXELS = None # waifu2x ๊ณ ๋ฐฐ์œจ ์ถœ๋ ฅ ํ—ˆ์šฉ (๋ฐ”๋กœ ์ถ•์†Œํ•˜๋ฏ€๋กœ ์•ˆ์ „)
174
+ img = Image.open(src)
175
+ img = img.convert("RGB") # Audiveris๋Š” RGB๋ฅผ ๊ฐ€์žฅ ์•ˆ์ •์ ์œผ๋กœ ์ฒ˜๋ฆฌ
176
+ max_dim = max(img.width, img.height)
177
+ if max_dim > _AUDIVERIS_MAX_PX:
178
+ ratio = _AUDIVERIS_MAX_PX / max_dim
179
+ img = img.resize(
180
+ (int(img.width * ratio), int(img.height * ratio)),
181
+ Image.LANCZOS,
182
+ )
183
+ img.save(dst, format="PNG")
184
+
185
+
186
+ def _run_audiveris(png_path: str, mxl_save_dir: str, on_line=None) -> tuple[Optional[str], str]:
187
+ """
188
+ PNG ํ•œ ์žฅ์„ Audiveris๋กœ ์ฒ˜๋ฆฌ โ†’ (MXL ํŒŒ์ผ ๊ฒฝ๋กœ, ์—๋Ÿฌ๋ฉ”์‹œ์ง€).
189
+ ์‹คํŒจ ์‹œ (None, ์—๋Ÿฌ๋ฉ”์‹œ์ง€) ๋ฐ˜ํ™˜.
190
+ on_line: Audiveris ์ถœ๋ ฅ ํ•œ ์ค„์”ฉ ์ „๋‹ฌ๋ฐ›๋Š” ์ฝœ๋ฐฑ (optional)
191
+ """
192
+ # Audiveris ์ „ PIL ์ •๊ทœํ™” (waifu2x/์ „์ฒ˜๋ฆฌ ํ›„ ํฌ๋งท ์ด์Šˆ ๋ฐฉ์ง€)
193
+ norm_path = png_path.replace(".png", "_norm.png")
194
+ _normalize_png(png_path, norm_path)
195
+
196
+ with tempfile.TemporaryDirectory() as tmp:
197
+ cmd = _get_audiveris_cmd(norm_path, tmp)
198
+ proc = subprocess.Popen(
199
+ cmd,
200
+ stdout=subprocess.PIPE,
201
+ stderr=subprocess.STDOUT,
202
+ )
203
+ stdout_lines = []
204
+ for raw in proc.stdout:
205
+ line = raw.decode("utf-8", errors="ignore").rstrip()
206
+ if line:
207
+ stdout_lines.append(line)
208
+ if on_line:
209
+ on_line(line)
210
+ proc.wait()
211
+
212
+ mxl_files = list(Path(tmp).rglob("*.mxl"))
213
+ if not mxl_files:
214
+ stdout_tail = "\n".join(stdout_lines[-10:])
215
+ err = f"exit code {proc.returncode} / stdout: {stdout_tail}"
216
+ return None, err
217
+
218
+ dest = Path(mxl_save_dir) / f"{Path(png_path).stem}.mxl"
219
+ shutil.copy2(mxl_files[0], dest)
220
+
221
+ # XML ์ถ”์ถœ (์••์ถ• ํ•ด์ œ)
222
+ xml_dest = Path(mxl_save_dir) / f"{Path(png_path).stem}.xml"
223
+ import zipfile as _zf
224
+ with _zf.ZipFile(dest) as zf:
225
+ xml_members = [n for n in zf.namelist()
226
+ if n.lower().endswith(".xml") and "container" not in n.lower()]
227
+ if xml_members:
228
+ xml_dest.write_bytes(zf.read(xml_members[0]))
229
+
230
+ return str(dest), ""
231
+
232
+
233
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
234
+ # ๋ฉ”์ธ ํŒŒ์ดํ”„๋ผ์ธ
235
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
236
+
237
+ def run_score_pipeline(
238
+ input_path: str,
239
+ preprocess_mode: str = "none",
240
+ dpi: int = 300,
241
+ upscale_mode: str = "none",
242
+ save_dir: str = "",
243
+ on_progress=None,
244
+ correct_xml: bool = False,
245
+ ) -> tuple[str, list[str], list[str]]:
246
+ """
247
+ ์•…๋ณด ํŒŒ์ผ(PDF/PNG/JPG) โ†’ MML ํ…์ŠคํŠธ ๋ณ€ํ™˜.
248
+
249
+ Args:
250
+ input_path: ์ž…๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ
251
+ preprocess_mode: ์ „์ฒ˜๋ฆฌ ๋ชจ๋“œ โ€” "none" / "otsu" / "adaptive" / "contrast"
252
+ dpi: PDF ๋ Œ๋”๋ง ํ•ด์ƒ๋„
253
+ upscale_mode: ์—…์Šค์ผ€์ผ ๋ชจ๋“œ โ€” "none" / "pil_2"~"pil_5" / "waifu2x_2" / "waifu2x_4"
254
+ save_dir: MXL ๋ฐ ํŽ˜์ด์ง€ PNG๋ฅผ ์ €์žฅํ•  ํด๋”.
255
+ ๋น„์–ด ์žˆ์œผ๋ฉด temp ํด๋” ์‚ฌ์šฉ ํ›„ ์ž๋™ ์‚ญ์ œ.
256
+
257
+ Returns:
258
+ (combined_text, mxl_paths, warnings)
259
+ combined_text: NํŒŒํŠธ(์ „์ฒดํ™”์Œ) + ๊ตฌ๋ถ„์„  + 3ํŒŒํŠธ
260
+ mxl_paths: ์ƒ์„ฑ๋œ MXL ํŒŒ์ผ ๊ฒฝ๋กœ ๋ชฉ๋ก (๋‹ค์šด๋กœ๋“œ์šฉ)
261
+ """
262
+ # convert_3part ํ•จ์ˆ˜ import (ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ ๊ธฐ์ค€)
263
+ project_root = Path(__file__).parent.parent
264
+ if str(project_root) not in sys.path:
265
+ sys.path.insert(0, str(project_root))
266
+ from convert_3part import (
267
+ convert_files_all_notes, format_output_n,
268
+ convert_files, format_output,
269
+ )
270
+
271
+ warnings: list[str] = []
272
+ ext = Path(input_path).suffix.lower()
273
+
274
+ work_dir = save_dir if save_dir else tempfile.mkdtemp(prefix="sml_pipe_")
275
+ pages_dir = Path(work_dir) / "pages"
276
+ mxl_dir = Path(work_dir) / "mxl"
277
+ pages_dir.mkdir(parents=True, exist_ok=True)
278
+ mxl_dir.mkdir(parents=True, exist_ok=True)
279
+
280
+ def _prog(frac: float, desc: str) -> None:
281
+ if on_progress:
282
+ on_progress(frac, desc)
283
+
284
+ try:
285
+ # โ”€โ”€ 1. ์ž…๋ ฅ โ†’ PNG ๋ชฉ๋ก โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
286
+ _prog(0.0, "ํŽ˜์ด์ง€ ๋ถ„๋ฆฌ ์ค‘...")
287
+ if ext == ".pdf":
288
+ raw_pngs = _pdf_to_pngs(input_path, str(pages_dir), dpi=dpi)
289
+ else:
290
+ dest = str(pages_dir / f"page_01.png")
291
+ _copy_with_scale(input_path, dest, scale=1.0)
292
+ raw_pngs = [dest]
293
+ total = len(raw_pngs)
294
+ orig_pngs = list(raw_pngs) # ์—…์Šค์ผ€์ผ ์ด์ „ ์›๋ณธ PNG ๋ชฉ๋ก (๊ต์ • ๋น„๊ต์šฉ)
295
+
296
+ # โ”€โ”€ 1-b. ์—…์Šค์ผ€์ผ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
297
+ if upscale_mode.startswith("pil_"):
298
+ scale = int(upscale_mode.split("_")[1])
299
+ scaled_pngs = []
300
+ for i, src in enumerate(raw_pngs):
301
+ _prog(0.05 + 0.10 * i / total, f"์—…์Šค์ผ€์ผ ์ค‘... ({i+1}/{total})")
302
+ dst = src.replace(".png", f"_x{scale}.png")
303
+ _copy_with_scale(src, dst, scale=float(scale))
304
+ scaled_pngs.append(dst)
305
+ raw_pngs = scaled_pngs
306
+ elif upscale_mode.startswith("waifu2x_"):
307
+ scale = int(upscale_mode.split("_")[1])
308
+ scaled_pngs = []
309
+ for i, src in enumerate(raw_pngs):
310
+ _prog(0.05 + 0.15 * i / total, f"waifu2x ์—…์Šค์ผ€์ผ ์ค‘... ({i+1}/{total})")
311
+ dst = src.replace(".png", f"_w2x.png")
312
+ _upscale_waifu2x(src, dst, scale=scale, noise=1)
313
+ scaled_pngs.append(dst)
314
+ raw_pngs = scaled_pngs
315
+
316
+ # โ”€โ”€ 2. ์„ ํƒ์  ์ „์ฒ˜๋ฆฌ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
317
+ if preprocess_mode != "none":
318
+ processed_pngs = []
319
+ for i, src in enumerate(raw_pngs):
320
+ _prog(0.20 + 0.05 * i / total, f"์ „์ฒ˜๋ฆฌ ์ค‘... ({i+1}/{total})")
321
+ dst = src.replace(".png", "_pre.png").replace(".jpg", "_pre.jpg").replace(".jpeg", "_pre.jpeg")
322
+ _preprocess(src, dst, mode=preprocess_mode)
323
+ processed_pngs.append(dst)
324
+ else:
325
+ processed_pngs = raw_pngs
326
+
327
+ # โ”€โ”€ 3. Audiveris โ†’ MXL โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
328
+ mxl_paths = []
329
+ for i, png in enumerate(processed_pngs):
330
+ _prog(0.25 + 0.60 * i / total, f"Audiveris OMR ์ค‘... ({i+1}/{total}ํŽ˜์ด์ง€)")
331
+ def _auv_line(line: str, _i=i, _total=total) -> None:
332
+ # Audiveris ๋กœ๊ทธ์—์„œ ์˜๋ฏธ์žˆ๋Š” ์ค„๋งŒ ์ถ”๋ ค progress ์„ค๋ช… ์—…๋ฐ์ดํŠธ
333
+ if any(k in line for k in ("INFO", "WARN", "Sheet", "Page", "System", "Measure", "Staff")):
334
+ short = line.split("]")[-1].strip()[:80]
335
+ _prog(0.25 + 0.60 * _i / _total, f"[{_i+1}/{_total}] {short}")
336
+ mxl_path, err = _run_audiveris(png, str(mxl_dir), on_line=_auv_line)
337
+ if mxl_path:
338
+ mxl_paths.append(mxl_path)
339
+ else:
340
+ warnings.append(f"MXL ์ƒ์„ฑ ์‹คํŒจ (์Šคํ‚ต): {Path(png).name} โ€” {err}")
341
+
342
+ if not mxl_paths:
343
+ warn_detail = "\n".join(warnings)
344
+ raise RuntimeError(f"๋ณ€ํ™˜๋œ ํŽ˜์ด์ง€๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค. ๋ชจ๋“  ํŽ˜์ด์ง€์—์„œ Audiveris ์‹คํŒจ.\n{warn_detail}")
345
+
346
+ # โ”€โ”€ 3-b. XML ์ž๋™ ๊ต์ • (์„ ํƒ์ ) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
347
+ mxl_paths_for_convert = mxl_paths
348
+
349
+ if correct_xml and mxl_paths:
350
+ try:
351
+ from core.xml_corrector import XMLCorrector
352
+ except ImportError as _ie:
353
+ warnings.append(f"[๊ต์ • ์Šคํ‚ต] xml_corrector ์˜์กด์„ฑ ์—†์Œ: {_ie}")
354
+ else:
355
+ _prog(0.85, "XML ์ž๋™ ๊ต์ • ์ค‘...")
356
+ corrector = XMLCorrector()
357
+ corrected: list[str] = []
358
+
359
+ for i, mxl_p in enumerate(mxl_paths):
360
+ _prog(
361
+ 0.85 + 0.03 * i / len(mxl_paths),
362
+ f"XML ๊ต์ • ์ค‘... ({i+1}/{len(mxl_paths)})",
363
+ )
364
+ stem = Path(mxl_p).stem
365
+ # ์—…์Šค์ผ€์ผ/์ „์ฒ˜๋ฆฌ ์ ‘๋ฏธ์‚ฌ ์ œ๊ฑฐ โ†’ ์›๋ณธ PNG stem ์ถ”์ถœ
366
+ import re as _re
367
+ _m = _re.match(r"(page_\d+)", stem)
368
+ base_stem = _m.group(1) if _m else stem
369
+ raw_png = next(
370
+ (p for p in orig_pngs if Path(p).stem == base_stem),
371
+ None,
372
+ )
373
+ if raw_png is None:
374
+ warnings.append(f"[๊ต์ • ์Šคํ‚ต] {stem}: ์›๋ณธ PNG ์—†์Œ")
375
+ continue
376
+ out_xml = str(mxl_dir / f"{stem}_corrected.xml")
377
+ try:
378
+ res = corrector.correct(mxl_p, raw_png, out_xml)
379
+ corrected.append(out_xml)
380
+ warnings += [f"[๊ต์ •] {w}" for w in res.warnings]
381
+ warnings.append(
382
+ f"[๊ต์ •] {stem}: {res.measures_corrected}๊ฐœ ์ˆ˜์ •, "
383
+ f"{res.measures_fallback}๊ฐœ fallback"
384
+ )
385
+ except Exception as _e:
386
+ warnings.append(f"[๊ต์ • ์‹คํŒจ] {stem}: {_e}")
387
+
388
+ if corrected:
389
+ mxl_paths_for_convert = corrected
390
+
391
+ # โ”€โ”€ 4. MXL โ†’ MML (์ „์ฒด ํ™”์Œ, NํŒŒํŠธ) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
392
+ _prog(0.88, "MML ๋ณ€ํ™˜ ์ค‘ (์ „์ฒด ํ™”์Œ)...")
393
+ tracks, total_dur, tempo = convert_files_all_notes(mxl_paths_for_convert)
394
+ all_notes_text = format_output_n(tracks, total_dur, tempo)
395
+
396
+ # โ”€โ”€ 5. MXL โ†’ MML (3ํŒŒํŠธ) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
397
+ _prog(0.94, "MML ๋ณ€ํ™˜ ์ค‘ (3ํŒŒํŠธ)...")
398
+ melody, chord1, bass, tempo3 = convert_files(mxl_paths_for_convert)
399
+ three_part_text = format_output(melody, chord1, bass, tempo3)
400
+
401
+ # โ”€โ”€ 6. ๊ฒฐํ•ฉ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
402
+ _prog(0.99, "๊ฒฐ๊ณผ ์ •๋ฆฌ ์ค‘...")
403
+ combined = all_notes_text.rstrip() + SEP + three_part_text.rstrip() + "\n"
404
+
405
+ xml_paths = [str(p).replace(".mxl", ".xml")
406
+ for p in [Path(m) for m in mxl_paths]
407
+ if Path(str(p).replace(".mxl", ".xml")).exists()]
408
+
409
+ except Exception:
410
+ shutil.rmtree(work_dir, ignore_errors=True)
411
+ raise
412
+
413
+ return combined, mxl_paths, xml_paths, warnings
core/measure_verifier.py ADDED
@@ -0,0 +1,916 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ measure_verifier.py โ€” CV ๊ธฐ๋ฐ˜ ๋งˆ๋””๋ณ„ ์Œํ‘œ ๊ฒ€์ฆ ๋ฐ ํ”ผ์น˜ ๊ต์ •
3
+ ๋‹จ๋… ๊ตฌํ˜„: cv2, numpy, xml.etree ์™ธ ์™ธ๋ถ€ core ๋ชจ๋“ˆ ์˜์กด ์—†์Œ
4
+ """
5
+
6
+ import os
7
+ import re
8
+ import xml.etree.ElementTree as ET
9
+ from dataclasses import dataclass, field
10
+ from typing import Optional
11
+
12
+ import cv2
13
+ import numpy as np
14
+
15
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
16
+ # YOLOv8 notehead ๋ชจ๋ธ (ํ•™์Šต ์™„๋ฃŒ ํ›„ ์ž๋™ ๋กœ๋“œ)
17
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
18
+ _YOLO_MODEL = None
19
+ _YOLO_MODEL_PATH = os.path.join(
20
+ os.path.dirname(__file__), '..', 'models', 'notehead_yolo', 'weights', 'best.pt'
21
+ )
22
+
23
+
24
+ def _load_yolo_model():
25
+ global _YOLO_MODEL
26
+ if _YOLO_MODEL is None:
27
+ from ultralytics import YOLO
28
+ _YOLO_MODEL = YOLO(_YOLO_MODEL_PATH)
29
+ return _YOLO_MODEL
30
+
31
+
32
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
33
+ # Data Classes
34
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
35
+
36
+ @dataclass
37
+ class NoteInfo:
38
+ pitch: str # "C4", "G5", "rest", "?"
39
+ duration: float # quarter note = 1.0
40
+ is_chord: bool = False
41
+
42
+
43
+ @dataclass
44
+ class TimePoint:
45
+ notes: list # list[NoteInfo]
46
+ quarter_length: float # beat offset within measure
47
+
48
+
49
+ @dataclass
50
+ class MeasureInfo:
51
+ number: int
52
+ timepoints: list # list[TimePoint]
53
+ total_beats: float = 0.0 # ์ด ๋งˆ๋””์˜ ์ด ๋ฐ•์ž ์ˆ˜ (quarter note = 1)
54
+
55
+
56
+ @dataclass
57
+ class NoteHead:
58
+ x: int
59
+ y: int
60
+ w: int
61
+ h: int
62
+ filled: bool
63
+
64
+
65
+ @dataclass
66
+ class Cluster:
67
+ x_center: float
68
+ noteheads: list # list[NoteHead]
69
+
70
+
71
+ @dataclass
72
+ class VerifyResult:
73
+ status: str # "OK" or "FLAG"
74
+ cv_count: int
75
+ xml_count: int
76
+ diff: int
77
+
78
+
79
+ @dataclass
80
+ class MeasureResult:
81
+ number: int
82
+ verify: VerifyResult
83
+ corrected_timepoints: Optional[list] = None # list[TimePoint]; None if FLAG
84
+ cv_clusters: list = field(default_factory=list)
85
+
86
+
87
+ @dataclass
88
+ class VerificationReport:
89
+ measures: list # list[MeasureResult]
90
+ ok_count: int = 0
91
+ flag_count: int = 0
92
+
93
+
94
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
95
+ # 0. ๋‚ด๋ถ€ CV ์œ ํ‹ธ
96
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
97
+
98
+ def _to_binary_inv(arr: np.ndarray) -> np.ndarray:
99
+ """BGR/GRAY โ†’ ์ด์ง„ํ™” ๋ฐ˜์ „ (ํฐ=์ „๊ฒฝ, ๊ฒ€=๋ฐฐ๊ฒฝ)."""
100
+ if arr.ndim == 3:
101
+ gray = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)
102
+ else:
103
+ gray = arr
104
+ _, binary = cv2.threshold(gray, 180, 255, cv2.THRESH_BINARY_INV)
105
+ return binary
106
+
107
+
108
+ def _remove_staff_lines(binary: np.ndarray) -> np.ndarray:
109
+ """์ˆ˜ํ‰ ๋ชจํด๋กœ์ง€๋กœ 5์„  ์ œ๊ฑฐ."""
110
+ kernel_w = max(30, binary.shape[1] // 5)
111
+ horiz = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_w, 1))
112
+ lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horiz)
113
+ return cv2.subtract(binary, lines)
114
+
115
+
116
+ def _detect_staff_lines_y(binary: np.ndarray) -> list:
117
+ """
118
+ ์ˆ˜ํ‰ ํˆฌ์˜์œผ๋กœ 5์„  y์ขŒํ‘œ ๊ฒ€์ถœ.
119
+ ๋ฐ˜ํ™˜: y์ขŒํ‘œ ๋ฆฌ์ŠคํŠธ (์œ„โ†’์•„๋ž˜ ์ •๋ ฌ). ๊ฒ€์ถœ ์‹คํŒจ ์‹œ ๋นˆ ๋ฆฌ์ŠคํŠธ.
120
+ """
121
+ h, w = binary.shape
122
+ proj = binary.sum(axis=1)
123
+ threshold = w * 255 * 0.3 # ํ–‰ ๋„ˆ๋น„์˜ 30% ์ด์ƒ
124
+
125
+ in_line = False
126
+ groups = []
127
+ start = 0
128
+ for y in range(h):
129
+ if proj[y] >= threshold:
130
+ if not in_line:
131
+ start = y
132
+ in_line = True
133
+ else:
134
+ if in_line:
135
+ groups.append((start + y) // 2)
136
+ in_line = False
137
+ if in_line:
138
+ groups.append((start + h) // 2)
139
+
140
+ if len(groups) < 5:
141
+ return []
142
+ # ์—ฌ๋Ÿฌ ์‹œ์Šคํ…œ์ด ์žˆ์„ ๊ฒฝ์šฐ ์ฒซ 5๊ฐœ๋งŒ
143
+ return groups[:5]
144
+
145
+
146
+ def _detect_staff_spacing(arr: np.ndarray) -> float:
147
+ """
148
+ ์ˆ˜ํ‰ ํˆฌ์˜์œผ๋กœ 5์„  ๊ฐ„๊ฒฉ(ํ”ฝ์…€) ๋ฐ˜ํ™˜.
149
+ ๊ฒ€์ถœ ์‹คํŒจ ์‹œ ์ด๋ฏธ์ง€ ๋†’์ด / 8 ๋ฐ˜ํ™˜.
150
+ """
151
+ binary = _to_binary_inv(arr)
152
+ lines = _detect_staff_lines_y(binary)
153
+ if len(lines) < 2:
154
+ return arr.shape[0] / 8.0
155
+ spacings = [lines[i + 1] - lines[i] for i in range(len(lines) - 1)]
156
+ return float(np.median(spacings))
157
+
158
+
159
+ def _detect_systems(inv: np.ndarray) -> list:
160
+ """
161
+ ์˜ค์„  ๋ผ์ธ ๊ธฐ๋ฐ˜ ์‹œ์Šคํ…œ y๋ฒ”์œ„ ๊ฒ€์ถœ.
162
+ ์˜ค์„ (staff line): ํ–‰ ๋„ˆ๋น„์˜ 30% ์ด์ƒ์ด ์ „๊ฒฝ ํ”ฝ์…€ โ†’ ๊ฐ•ํ•œ ์ˆ˜ํ‰์„ 
163
+ ์ด๋Ÿฐ ๋ผ์ธ๋“ค์„ ๊ทธ๋ฃนํ•‘ํ•ด ๊ฐ ์‹œ์Šคํ…œ์˜ y1, y2 ๋ฐ˜ํ™˜.
164
+ ๋ฐ˜ํ™˜: list of (y1, y2) tuples.
165
+ """
166
+ h, w = inv.shape
167
+ proj = inv.sum(axis=1).astype(float)
168
+
169
+ # ์˜ค์„  ๋ผ์ธ ๊ธฐ์ค€: ํ–‰ ๋„ˆ๋น„ 30% ์ด์ƒ (์•…๋ณด ์ „์ฒด๋ฅผ ๊ฐ€๋กœ์ง€๋ฅด๋Š” ์„ ๋งŒ ํฌํ•จ)
170
+ staff_threshold = w * 255 * 0.30
171
+
172
+ # ์˜ค์„  ํ–‰ ๋งˆํ‚น
173
+ is_staff_row = proj >= staff_threshold
174
+
175
+ # ์ธ์ ‘ํ•œ ์˜ค์„  ํ–‰์„ ๋ฌถ์–ด "staff line band" ์ƒ์„ฑ
176
+ bands = [] # [(y_center), ...]
177
+ in_band = False
178
+ band_start = 0
179
+ for y in range(h):
180
+ if is_staff_row[y]:
181
+ if not in_band:
182
+ band_start = y
183
+ in_band = True
184
+ else:
185
+ if in_band:
186
+ bands.append((band_start + y) // 2)
187
+ in_band = False
188
+ if in_band:
189
+ bands.append((band_start + h) // 2)
190
+
191
+ if len(bands) < 5:
192
+ # staff line ๊ฒ€์ถœ ์‹คํŒจ โ†’ ๋‚ฎ์€ threshold๋กœ ์žฌ์‹œ๋„ (๋…ธ์ด์ฆˆ ๋งŽ์€ ์ด๋ฏธ์ง€)
193
+ fallback_thr = w * 255 * 0.10
194
+ active = proj >= fallback_thr
195
+ gap_thr = max(80, h // 15)
196
+ min_h = max(80, h // 20)
197
+ systems = []
198
+ in_sys = False
199
+ sys_start = 0
200
+ gap_count = 0
201
+ for y in range(h):
202
+ if active[y]:
203
+ if not in_sys:
204
+ sys_start = y
205
+ in_sys = True
206
+ gap_count = 0
207
+ else:
208
+ if in_sys:
209
+ gap_count += 1
210
+ if gap_count > gap_thr:
211
+ sys_end = y - gap_count
212
+ if sys_end - sys_start >= min_h:
213
+ systems.append((sys_start, sys_end))
214
+ in_sys = False
215
+ gap_count = 0
216
+ if in_sys and h - 1 - sys_start >= min_h:
217
+ systems.append((sys_start, h - 1))
218
+ return systems
219
+
220
+ # staff line band ๊ฐ„๊ฒฉ(spacing) ์ถ”์ • โ€” ์ธ์ ‘ 5๊ฐœ ๋ฐด๋“œ์˜ median ๊ฐ„๊ฒฉ
221
+ spacings = [bands[i + 1] - bands[i] for i in range(min(9, len(bands) - 1))]
222
+ staff_spacing = float(np.median(spacings))
223
+
224
+ # ๊ฐ™์€ ์‹œ์Šคํ…œ์— ์†ํ•˜๋Š” band ๊ทธ๋ฃนํ•‘
225
+ # ํ”ผ์•„๋…ธ ๊ทธ๋žœ๋“œ ์Šคํƒœํ”„: trebleโ†”bass ๊ฐญ โ‰ˆ 8ร—staff_spacing โ†’ 10๋ฐฐ๋กœ ์—ฌ์œ  ํ™•๋ณด
226
+ # inter-system ๊ฐญ์€ ๋ณดํ†ต 15ร—spacing ์ด์ƒ์ด๋ฏ€๋กœ ์•ˆ์ „
227
+ group_threshold = staff_spacing * 10
228
+ groups = [[bands[0]]]
229
+ for b in bands[1:]:
230
+ if b - groups[-1][-1] <= group_threshold:
231
+ groups[-1].append(b)
232
+ else:
233
+ groups.append([b])
234
+
235
+ # ๊ฐ ๊ทธ๋ฃน์˜ y๋ฒ”์œ„ โ†’ ์‹œ์Šคํ…œ ๋ฒ”์œ„ (์—ฌ๋ฐฑ ์ถ”๊ฐ€)
236
+ margin = max(5, int(staff_spacing * 0.5))
237
+ systems = []
238
+ for g in groups:
239
+ if len(g) < 2:
240
+ continue # ๋‹จ์ผ ๋ผ์ธ์€ ์˜ค์„  ์‹œ์Šคํ…œ์ด ์•„๋‹˜
241
+ y1 = max(0, g[0] - margin)
242
+ y2 = min(h - 1, g[-1] + margin)
243
+ systems.append((y1, y2))
244
+
245
+ return systems
246
+
247
+
248
+ def _detect_barlines(inv: np.ndarray, y1: int, y2: int) -> list:
249
+ """
250
+ ์ˆ˜์ง ํˆฌ์˜์œผ๋กœ ๋งˆ๋””์„  x์ขŒํ‘œ ๊ฒ€์ถœ.
251
+ ๋ฐ˜ํ™˜: barline x์ขŒํ‘œ ๋ฆฌ์ŠคํŠธ (์ •๋ ฌ๋จ).
252
+ barline ์กฐ๊ฑด: ์‹œ์Šคํ…œ ๋†’์ด์˜ 50% ์ด์ƒ ์ฑ„์›€ + ํ”ผํฌ ๋„ˆ๋น„ < 20px
253
+ """
254
+ crop = inv[y1:y2, :]
255
+ if crop.shape[0] < 10 or crop.shape[1] < 10:
256
+ return []
257
+
258
+ h_crop, w_crop = crop.shape
259
+ proj = crop.sum(axis=0).astype(float)
260
+
261
+ # barline์€ ์‹œ์Šคํ…œ ๋†’์ด์˜ 70% ์ด์ƒ์„ ์ฑ„์šฐ๋Š” ์ˆ˜์ง์„ 
262
+ # (treble+bass ๊ฐœ๋ณ„ ์ค„๊ธฐ๊ฐ€ ๊ฐ๊ฐ ~30% โ†’ ํ•ฉ ~60% < 70% โ†’ ์ฐจ๋‹จ)
263
+ threshold = h_crop * 255 * 0.70
264
+
265
+ # ์ขŒ์šฐ ์—ฌ๋ฐฑ(์Œ์ž๋ฆฌํ‘œ, ๊ด„ํ˜ธ ์˜์—ญ) ๋ฌด์‹œ: ๋„ˆ๋น„์˜ 8% ~ (๋-5px)
266
+ left_margin = int(w_crop * 0.08)
267
+ right_margin = w_crop - 5
268
+
269
+ barlines = []
270
+ in_peak = False
271
+ peak_start = 0
272
+ for x in range(left_margin, right_margin):
273
+ if proj[x] >= threshold:
274
+ if not in_peak:
275
+ peak_start = x
276
+ in_peak = True
277
+ else:
278
+ if in_peak:
279
+ peak_width = x - peak_start
280
+ # ์ง„์งœ barline์€ ์–‡์Œ (๋„ˆ๋น„ < 20px)
281
+ if peak_width < 20:
282
+ barlines.append((peak_start + x) // 2)
283
+ in_peak = False
284
+ if in_peak and (w_crop - peak_start) < 20:
285
+ barlines.append((peak_start + right_margin) // 2)
286
+
287
+ # ๋„ˆ๋ฌด ๊ฐ€๊นŒ์šด barline ์ œ๊ฑฐ (์ตœ์†Œ ๊ฐ„๊ฒฉ: ๋„ˆ๋น„์˜ 10%)
288
+ min_spacing = max(50, w_crop // 10)
289
+ filtered = []
290
+ for bl in barlines:
291
+ if not filtered or bl - filtered[-1] >= min_spacing:
292
+ filtered.append(bl)
293
+
294
+ return filtered
295
+
296
+
297
+ def _detect_noteheads_cv(arr: np.ndarray, left_skip_px: int = 0) -> list:
298
+ """
299
+ CC(connected components) ๊ธฐ๋ฐ˜ notehead ๊ฒ€์ถœ โ€” fallback์šฉ.
300
+ YOLOv8 ๋ชจ๋ธ์ด ์—†์„ ๋•Œ ์ž๋™์œผ๋กœ ์‚ฌ์šฉ๋จ.
301
+ """
302
+ binary = _to_binary_inv(arr)
303
+
304
+ spacing = _detect_staff_spacing(binary)
305
+ spacing = max(spacing, 4.0)
306
+
307
+ min_w = max(4, int(spacing * 0.7))
308
+ max_w = max(min_w + 1, int(spacing * 1.6))
309
+ min_h = max(3, int(spacing * 0.7))
310
+ max_h = max(min_h + 1, int(spacing * 1.1))
311
+ min_area = max(10, int(spacing ** 2 * 0.4))
312
+ max_area = max(min_area + 1, int(spacing ** 2 * 3.0))
313
+ beam_max_h = int(spacing * 2.2)
314
+
315
+ no_lines = _remove_staff_lines(binary)
316
+ n, labels, stats, centroids = cv2.connectedComponentsWithStats(no_lines, connectivity=8)
317
+
318
+ noteheads = []
319
+ for i in range(1, n):
320
+ bx, by, bw, bh, area = stats[i]
321
+ cx = int(centroids[i][0])
322
+
323
+ if (min_area <= area <= max_area and
324
+ min_w <= bw <= max_w and
325
+ min_h <= bh <= max_h and
326
+ 0.5 <= bw / bh <= 1.8):
327
+ if cx >= left_skip_px:
328
+ cy = int(centroids[i][1])
329
+ fill_ratio = area / (bw * bh)
330
+ noteheads.append(NoteHead(cx, cy, bw, bh, fill_ratio > 0.42))
331
+ continue
332
+
333
+ if (area > min_area and bw > max_w and min_h <= bh <= beam_max_h):
334
+ comp_mask = (labels[by:by + bh, bx:bx + bw] == i).astype(np.uint8)
335
+ col_proj = comp_mask.sum(axis=0).astype(float)
336
+ max_proj = col_proj.max()
337
+ if max_proj > 0:
338
+ peak_thresh = max_proj * 0.65
339
+ in_pk = False
340
+ pk_start = 0
341
+ end = len(col_proj)
342
+ for xi in range(end + 1):
343
+ above = (xi < end and col_proj[xi] >= peak_thresh)
344
+ if above and not in_pk:
345
+ pk_start = xi
346
+ in_pk = True
347
+ elif not above and in_pk:
348
+ pk_w = xi - pk_start
349
+ if min_w <= pk_w <= max_w + 6:
350
+ px_cx = bx + (pk_start + xi) // 2
351
+ if px_cx >= left_skip_px:
352
+ row_proj = comp_mask[:, pk_start:xi].sum(axis=1)
353
+ px_cy = by + int(np.argmax(row_proj)) if row_proj.sum() > 0 else by + bh // 2
354
+ ph = min(bh, int(spacing * 1.1))
355
+ noteheads.append(NoteHead(px_cx, px_cy, pk_w, ph, True))
356
+ in_pk = False
357
+
358
+ row_bin = max(1, int(spacing * 1.5))
359
+ noteheads.sort(key=lambda nh: (nh.y // row_bin, nh.x))
360
+ return noteheads
361
+
362
+
363
+ def _detect_noteheads(arr: np.ndarray, left_skip_px: int = 0) -> list:
364
+ """
365
+ notehead ๊ฒ€์ถœ ์ง„์ž…์  (๋งˆ๋”” crop ๋‹จ์œ„ ํ˜ธ์ถœ์šฉ โ€” fallback CC ๋ฐฉ์‹).
366
+ ์ „์ฒด ํŽ˜์ด์ง€์—์„œ YOLO ๊ฒ€์ถœ์ด ํ•„์š”ํ•˜๋ฉด _detect_noteheads_full_page() ์‚ฌ์šฉ.
367
+ """
368
+ return _detect_noteheads_cv(arr, left_skip_px)
369
+
370
+
371
+ def _detect_noteheads_full_page(img: np.ndarray) -> list:
372
+ """
373
+ ์ „์ฒด ํŽ˜์ด์ง€ ์ด๋ฏธ์ง€์— YOLO๋ฅผ ํ•œ ๋ฒˆ ์‹คํ–‰ํ•ด ๋ชจ๋“  notehead ๋ฐ˜ํ™˜.
374
+ ๋ฐ˜ํ™˜: list of NoteHead (ํŽ˜์ด์ง€ ์ ˆ๋Œ€ ์ขŒํ‘œ).
375
+ YOLO ๋ชจ๋ธ ์—†์œผ๋ฉด ๋นˆ ๋ฆฌ์ŠคํŠธ ๋ฐ˜ํ™˜ โ†’ run_verification์—์„œ CC ๋ฐฉ์‹์œผ๋กœ ๋Œ€์ฒด.
376
+ """
377
+ if not os.path.exists(_YOLO_MODEL_PATH):
378
+ return []
379
+ try:
380
+ model = _load_yolo_model()
381
+ results = model(img, imgsz=1280, conf=0.25, verbose=False)[0]
382
+ except Exception:
383
+ return []
384
+
385
+ binary = _to_binary_inv(img)
386
+ spacing = max(_detect_staff_spacing(img), 4.0)
387
+ noteheads = []
388
+ for box in results.boxes:
389
+ x1, y1, x2, y2 = box.xyxy[0].tolist()
390
+ cx = int((x1 + x2) / 2)
391
+ cy = int((y1 + y2) / 2)
392
+ bw = int(x2 - x1)
393
+ bh = int(y2 - y1)
394
+ roi = binary[max(0, int(y1)):max(1, int(y2)), max(0, int(x1)):max(1, int(x2))]
395
+ fill_ratio = roi.sum() / (255 * roi.size) if roi.size > 0 else 0.5
396
+ noteheads.append(NoteHead(cx, cy, bw, bh, fill_ratio > 0.42))
397
+
398
+ row_bin = max(1, int(spacing * 1.5))
399
+ noteheads.sort(key=lambda nh: (nh.y // row_bin, nh.x))
400
+ return noteheads
401
+
402
+
403
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
404
+ # 1. MusicXML ํŒŒ์‹ฑ
405
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
406
+
407
+ def _parse_pitch(note_el) -> str:
408
+ """์Œํ‘œ element์—์„œ ํ”ผ์น˜ ๋ฌธ์ž์—ด ์ถ”์ถœ. rest์ด๋ฉด 'rest' ๋ฐ˜ํ™˜."""
409
+ if note_el.find('rest') is not None:
410
+ return 'rest'
411
+ pitch_el = note_el.find('pitch')
412
+ if pitch_el is None:
413
+ return 'rest'
414
+ step = pitch_el.findtext('step', 'C')
415
+ octave = pitch_el.findtext('octave', '4')
416
+ alter_text = pitch_el.findtext('alter', '0')
417
+ try:
418
+ alter = int(float(alter_text))
419
+ except ValueError:
420
+ alter = 0
421
+ suffix = '#' if alter == 1 else ('b' if alter == -1 else '')
422
+ return f"{step}{suffix}{octave}"
423
+
424
+
425
+ def parse_xml_measures(xml_str: str) -> list:
426
+ """
427
+ MusicXML string โ†’ list[MeasureInfo]
428
+ ์ฒซ ๋ฒˆ์งธ ํŒŒํŠธ์˜ ๋งˆ๋””๋ณ„ ์Œํ‘œ๋ฅผ ํŒŒ์‹ฑ.
429
+ Audiveris๊ฐ€ ๋ณต์ˆ˜์˜ ์‹œ๊ฐ์  ๋งˆ๋””๋ฅผ ํ•˜๋‚˜์˜ XML ๋งˆ๋””๋กœ ๋ณ‘ํ•ฉํ•˜๋Š” ๊ฒฝ์šฐ
430
+ ๋ฐ•์ž ์ˆ˜ ๊ธฐ์ค€์œผ๋กœ ์ž๋™ ๋ถ„ํ• ํ•œ๋‹ค (์˜ˆ: 8๏ฟฝ๏ฟฝ๏ฟฝ ๋งˆ๋”” โ†’ 4๋ฐ•์งœ๋ฆฌ 2๊ฐœ).
431
+ """
432
+ xml_clean = re.sub(r'\s+xmlns[^"]*"[^"]*"', '', xml_str)
433
+
434
+ try:
435
+ root = ET.fromstring(xml_clean)
436
+ except ET.ParseError:
437
+ return []
438
+
439
+ parts = root.findall('.//part')
440
+ if not parts:
441
+ return []
442
+
443
+ # ์ฒซ ๋ฒˆ์งธ ํŒŒํŠธ๋งŒ ์‚ฌ์šฉ: ๋ฉ€ํ‹ฐํŒŒํŠธ ์•…๋ณด์—์„œ ๊ฐ ์‹œ์Šคํ…œ(ํ–‰)์€ ์ฒซ ํŒŒํŠธ(P.I ๋“ฑ)์— ๋Œ€์‘
444
+ # ์ „์ฒด ํŒŒํŠธ ํ•ฉ์‚ฐ ์‹œ XML count ๊ณผ๋Œ€ ๊ณ„์‚ฐ โ†’ ์ค‘๋ณต ์‹œ์Šคํ…œ ์ œ๊ฑฐ ํ›„ ์ฒซ ํŒŒํŠธ์™€ ๋น„๊ต
445
+ parts = parts[:1]
446
+
447
+ # measure_number โ†’ beat โ†’ list[NoteInfo]
448
+ from collections import defaultdict
449
+ all_measure_beats: dict = defaultdict(lambda: defaultdict(list))
450
+ measure_order: list = [] # ๋งˆ๋”” ๋ฒˆํ˜ธ ์ˆœ์„œ ์œ ์ง€
451
+ measure_max_beats: dict = {} # mn โ†’ ๋งˆ๋”” ์ด ๋ฐ•์ž ์ˆ˜ (์ฒซ ํŒŒํŠธ ๊ธฐ์ค€)
452
+
453
+ divisions = 1
454
+ beats_per_measure = 4.0
455
+
456
+ for pi, part in enumerate(parts):
457
+ divisions = 1
458
+ beats_per_measure = 4.0
459
+
460
+ for measure_el in part.findall('measure'):
461
+ number_str = measure_el.get('number', '0')
462
+ try:
463
+ mn = int(number_str)
464
+ except ValueError:
465
+ mn = 0
466
+ if mn not in all_measure_beats:
467
+ measure_order.append(mn)
468
+
469
+ attrs = measure_el.find('attributes')
470
+ if attrs is not None:
471
+ div_el = attrs.find('divisions')
472
+ if div_el is not None and div_el.text:
473
+ divisions = int(div_el.text)
474
+ time_el = attrs.find('time')
475
+ if time_el is not None:
476
+ try:
477
+ beats = float(time_el.findtext('beats', '4'))
478
+ beat_type = float(time_el.findtext('beat-type', '4'))
479
+ beats_per_measure = beats * (4.0 / beat_type)
480
+ except (ValueError, ZeroDivisionError):
481
+ pass
482
+
483
+ current_beat = 0.0
484
+ last_beat = 0.0
485
+ max_beat_this = 0.0 # ์ด ๋งˆ๋””์—์„œ ๋„๋‹ฌํ•œ ์ตœ๋Œ€ ๋ฐ•์ž
486
+
487
+ for child in measure_el:
488
+ tag = child.tag
489
+ if tag == 'note':
490
+ is_chord = child.find('chord') is not None
491
+ is_grace = child.find('grace') is not None
492
+ dur_el = child.find('duration')
493
+ duration = 0.0
494
+ if dur_el is not None and dur_el.text and not is_grace:
495
+ try:
496
+ duration = int(dur_el.text) / divisions
497
+ except (ValueError, ZeroDivisionError):
498
+ pass
499
+ pitch = _parse_pitch(child)
500
+ note_beat = last_beat if is_chord else current_beat
501
+ all_measure_beats[mn][round(note_beat, 6)].append(
502
+ NoteInfo(pitch=pitch, duration=duration, is_chord=is_chord)
503
+ )
504
+ if not is_chord and not is_grace:
505
+ last_beat = current_beat
506
+ current_beat += duration
507
+ max_beat_this = max(max_beat_this, current_beat)
508
+ elif tag == 'backup':
509
+ dur_el = child.find('duration')
510
+ if dur_el is not None and dur_el.text:
511
+ try:
512
+ current_beat -= int(dur_el.text) / divisions
513
+ current_beat = max(0.0, current_beat)
514
+ except (ValueError, ZeroDivisionError):
515
+ pass
516
+ elif tag == 'forward':
517
+ dur_el = child.find('duration')
518
+ if dur_el is not None and dur_el.text:
519
+ try:
520
+ current_beat += int(dur_el.text) / divisions
521
+ max_beat_this = max(max_beat_this, current_beat)
522
+ except (ValueError, ZeroDivisionError):
523
+ pass
524
+
525
+ # ์ฒซ ํŒŒํŠธ์—์„œ๋งŒ ๋ฐ•์ž ์ˆ˜ ๊ธฐ๋ก (๋ชจ๋“  ํŒŒํŠธ ๋™์ผํ•˜๋ฏ€๋กœ)
526
+ if pi == 0 or mn not in measure_max_beats:
527
+ measure_max_beats[mn] = max(measure_max_beats.get(mn, 0.0), max_beat_this)
528
+
529
+ # all_measure_beats โ†’ list[MeasureInfo] ๋ณ€ํ™˜
530
+ # Audiveris๊ฐ€ ์—ฌ๋Ÿฌ ์‹œ๊ฐ์  ๋งˆ๋””๋ฅผ ํ•˜๋‚˜๋กœ ๋ณ‘ํ•ฉํ•œ ๊ฒฝ์šฐ (total_beats > beats_per_measure * 1.5)
531
+ # beats_per_measure ๋‹จ์œ„๋กœ ๋ถ„ํ• ํ•œ๋‹ค.
532
+ measures = []
533
+ seq = 1
534
+ for mn in sorted(set(measure_order)):
535
+ beat_map = all_measure_beats[mn]
536
+ timepoints_all = [
537
+ TimePoint(notes=beat_map[b], quarter_length=b)
538
+ for b in sorted(beat_map.keys())
539
+ ]
540
+ total_beats = measure_max_beats.get(mn, beats_per_measure)
541
+
542
+ if total_beats > beats_per_measure * 1.5 and beats_per_measure > 0:
543
+ # ๋ถ„ํ•  ํ•„์š”: beats_per_measure ๊ฐ„๊ฒฉ์œผ๋กœ ์„œ๋ธŒ๋งˆ๋”” ์ƒ์„ฑ
544
+ n_sub = max(2, round(total_beats / beats_per_measure))
545
+ sub_beats = beats_per_measure
546
+ for sub_i in range(n_sub):
547
+ lo = sub_i * sub_beats
548
+ hi = lo + sub_beats
549
+ sub_tps = [
550
+ TimePoint(notes=tp.notes, quarter_length=tp.quarter_length - lo)
551
+ for tp in timepoints_all
552
+ if lo <= tp.quarter_length < hi
553
+ ]
554
+ measures.append(MeasureInfo(number=seq, timepoints=sub_tps, total_beats=sub_beats))
555
+ seq += 1
556
+ else:
557
+ measures.append(MeasureInfo(number=seq, timepoints=timepoints_all, total_beats=total_beats))
558
+ seq += 1
559
+
560
+ return measures
561
+
562
+
563
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
564
+ # 2. Notehead ํด๋Ÿฌ์Šคํ„ฐ๋ง
565
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
566
+
567
+ def cluster_noteheads(noteheads: list, staff_spacing_px: float) -> list:
568
+ """
569
+ notehead x์ขŒํ‘œ ๊ธฐ์ค€ ํด๋Ÿฌ์Šคํ„ฐ๋ง.
570
+ threshold = staff_spacing_px * 0.5 ์ด๋‚ด๋ฉด ๊ฐ™์€ ํƒ€์ž„ํฌ์ธํŠธ.
571
+ ๋ฐ˜ํ™˜: list[Cluster] (x_center ์ˆœ ์ •๋ ฌ)
572
+ """
573
+ if not noteheads:
574
+ return []
575
+
576
+ threshold = max(staff_spacing_px * 0.8, 8.0)
577
+ sorted_nh = sorted(noteheads, key=lambda nh: nh.x)
578
+
579
+ clusters = []
580
+ group = [sorted_nh[0]]
581
+
582
+ for nh in sorted_nh[1:]:
583
+ if nh.x - group[-1].x <= threshold:
584
+ group.append(nh)
585
+ else:
586
+ x_center = sum(n.x for n in group) / len(group)
587
+ clusters.append(Cluster(x_center=x_center, noteheads=list(group)))
588
+ group = [nh]
589
+
590
+ if group:
591
+ x_center = sum(n.x for n in group) / len(group)
592
+ clusters.append(Cluster(x_center=x_center, noteheads=list(group)))
593
+
594
+ return clusters
595
+
596
+
597
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
598
+ # 3. ๋งˆ๋””๋ณ„ ๊ฒ€์ฆ
599
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
600
+
601
+ def verify_measure(cv_clusters: list, xml_timepoints: list) -> VerifyResult:
602
+ """
603
+ CV ํด๋Ÿฌ์Šคํ„ฐ ์ˆ˜ vs XML ํƒ€์ž„ํฌ์ธํŠธ ์ˆ˜ ๋น„๊ต.
604
+ rest ์ „์šฉ ํƒ€์ž„ํฌ์ธํŠธ๋Š” xml_timepoints์—์„œ ์ œ์™ธํ•˜๊ณ  ๋„˜๊ฒจ์•ผ ํ•จ.
605
+ """
606
+ cv_count = len(cv_clusters)
607
+ xml_count = len(xml_timepoints)
608
+ diff = abs(cv_count - xml_count)
609
+ status = "OK" if diff <= 1 else "FLAG"
610
+ return VerifyResult(status=status, cv_count=cv_count, xml_count=xml_count, diff=diff)
611
+
612
+
613
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
614
+ # 4. ํ”ผ์น˜ ์ถ”์ •
615
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
616
+
617
+ _DIATONIC_STEPS = ['C', 'D', 'E', 'F', 'G', 'A', 'B']
618
+ _DIATONIC_SEMITONES = {'C': 0, 'D': 2, 'E': 4, 'F': 5, 'G': 7, 'A': 9, 'B': 11}
619
+
620
+ # ๋‹ค์ด์•„ํ† ๋‹‰ ์Šคํ… โ†’ ํ”ผ์น˜ ๋ฌธ์ž์—ด (E4 = step 0 ๊ธฐ์ค€)
621
+ # E๋Š” CDEFGAB์—์„œ ์ธ๋ฑ์Šค 2, ์˜ฅํƒ€๋ธŒ 4
622
+ _E4_TOTAL_INDEX = 4 * 7 + 2 # octave*7 + step_index
623
+
624
+
625
+ def _diatonic_to_pitch(steps_from_ref: int, ref_total_index: int) -> str:
626
+ """
627
+ ref_total_index ๊ธฐ์ค€ ๋‹ค์ด์•„ํ† ๋‹‰ ๋‹จ๊ณ„ โ†’ ํ”ผ์น˜ ๋ฌธ์ž์—ด.
628
+ total_index = octave * 7 + step_index (CDEFGAB ์ธ๋ฑ์Šค)
629
+ """
630
+ total = ref_total_index + steps_from_ref
631
+ octave, step_idx = divmod(total, 7)
632
+ if step_idx < 0:
633
+ step_idx += 7
634
+ octave -= 1
635
+ step = _DIATONIC_STEPS[step_idx % 7]
636
+ return f"{step}{octave}"
637
+
638
+
639
+ def estimate_pitch_from_y(y_px: float, staff_bands: list, clef: str = 'treble') -> str:
640
+ """
641
+ staff_bands: 5๊ฐœ y์ขŒํ‘œ (์œ„โ†’์•„๋ž˜ = line5..line1 for treble).
642
+ clef: 'treble' | 'bass'
643
+ ๋ฐ˜ํ™˜: ํ”ผ์น˜ ๋ฌธ์ž์—ด e.g. "C4", "G5", "?" (๊ฒ€์ถœ ์‹คํŒจ)
644
+ """
645
+ if len(staff_bands) < 5:
646
+ return '?'
647
+
648
+ spacing = (staff_bands[4] - staff_bands[0]) / 4.0
649
+ if spacing <= 0:
650
+ return '?'
651
+
652
+ half_spacing = spacing / 2.0
653
+
654
+ if clef == 'treble':
655
+ # ์•„๋ž˜ ์„ (staff_bands[4]) = E4, ref_total_index = 4*7+2 = 30
656
+ ref_y = staff_bands[4]
657
+ ref_total = _E4_TOTAL_INDEX
658
+ elif clef == 'bass':
659
+ # ์•„๋ž˜ ์„  = G2, total_index = 2*7+4 = 18
660
+ ref_y = staff_bands[4]
661
+ ref_total = 2 * 7 + 4 # G2
662
+ else:
663
+ return '?'
664
+
665
+ dy = ref_y - y_px # ์–‘์ˆ˜ = ์•„๋ž˜ ์„  ์œ„์ชฝ = ๋†’์€ ์Œ
666
+ steps = round(dy / half_spacing)
667
+ return _diatonic_to_pitch(steps, ref_total)
668
+
669
+
670
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€๏ฟฝ๏ฟฝโ”€โ”€
671
+ # 5. ํ”ผ์น˜ ๊ต์ •
672
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
673
+
674
+ def _pitch_to_sort_key(pitch: str) -> int:
675
+ """ํ”ผ์น˜ ๋ฌธ์ž์—ด โ†’ ์ •๋ ฌ ํ‚ค. 'rest'/'?' โ†’ -999."""
676
+ if pitch in ('rest', '?'):
677
+ return -999
678
+ try:
679
+ step = pitch[0]
680
+ rest = pitch[1:]
681
+ alter = 0
682
+ if '#' in rest:
683
+ alter = 1
684
+ rest = rest.replace('#', '')
685
+ elif 'b' in rest:
686
+ alter = -1
687
+ rest = rest.replace('b', '')
688
+ octave = int(rest)
689
+ return octave * 12 + _DIATONIC_SEMITONES.get(step, 0) + alter
690
+ except (ValueError, IndexError):
691
+ return -999
692
+
693
+
694
+ def correct_measure_pitches(
695
+ cv_clusters: list,
696
+ xml_timepoints: list,
697
+ staff_bands: list,
698
+ clef: str = 'treble',
699
+ ) -> list:
700
+ """
701
+ ๊ฐœ์ˆ˜๊ฐ€ ์ผ์น˜ํ•˜๋Š” ๋งˆ๋””์—์„œ CV y์ขŒํ‘œ๋กœ ๊ฐ ์Œํ‘œ ํ”ผ์น˜ ๊ต์ •.
702
+ cv_clusters์™€ xml_timepoints๋Š” ๊ฐ™์€ ๊ธธ์ด์ด์–ด์•ผ ํ•จ.
703
+ ๋ฐ˜ํ™˜: ๊ต์ •๋œ list[TimePoint]
704
+ """
705
+ corrected = []
706
+ for cluster, tp in zip(cv_clusters, xml_timepoints):
707
+ # notehead๋ฅผ y ์˜ค๋ฆ„์ฐจ์ˆœ (์œ„โ†’์•„๋ž˜ = ๋†’์Œโ†’๋‚ฎ์Œ)์œผ๋กœ ์ •๋ ฌ
708
+ sorted_nhs = sorted(cluster.noteheads, key=lambda nh: nh.y)
709
+ # xml ์Œํ‘œ๋ฅผ ํ”ผ์น˜ ๋‚ด๋ฆผ์ฐจ์ˆœ (๋†’์€ ์Œ ๋จผ์ €)์œผ๋กœ ์ •๋ ฌ
710
+ sorted_notes = sorted(
711
+ tp.notes,
712
+ key=lambda ni: _pitch_to_sort_key(ni.pitch),
713
+ reverse=True,
714
+ )
715
+
716
+ new_notes = []
717
+ for idx, note in enumerate(sorted_notes):
718
+ if note.pitch == 'rest':
719
+ new_notes.append(NoteInfo(pitch='rest', duration=note.duration, is_chord=note.is_chord))
720
+ elif idx < len(sorted_nhs):
721
+ est = estimate_pitch_from_y(sorted_nhs[idx].y, staff_bands, clef)
722
+ new_notes.append(NoteInfo(pitch=est, duration=note.duration, is_chord=note.is_chord))
723
+ else:
724
+ new_notes.append(note)
725
+
726
+ corrected.append(TimePoint(notes=new_notes, quarter_length=tp.quarter_length))
727
+
728
+ return corrected
729
+
730
+
731
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
732
+ # 6. ์ „์ฒด ํŒŒ์ดํ”„๋ผ์ธ
733
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
734
+
735
+ def run_verification(png_path: str, xml_str: str) -> VerificationReport:
736
+ """
737
+ ์›๋ณธ PNG + MusicXML string โ†’ ๋งˆ๋””๋ณ„ ๊ฒ€์ฆ/๊ต์ • ๊ฒฐ๊ณผ ๋ฐ˜ํ™˜.
738
+
739
+ ํŒŒ์ดํ”„๋ผ์ธ:
740
+ 1. ์ด๋ฏธ์ง€ ๋กœ๋“œ & ์ด์ง„ํ™”
741
+ 2. ์‹œ์Šคํ…œ(์•…๋‹จ) y๋ฒ”์œ„ ๊ฒ€์ถœ
742
+ 3. ์‹œ์Šคํ…œ๋ณ„ ๋งˆ๋””์„  x์ขŒํ‘œ ๊ฒ€์ถœ โ†’ ๋งˆ๋”” crop
743
+ 4. MusicXML ํŒŒ์‹ฑ
744
+ 5. ๋งˆ๋””๋ณ„: notehead ๊ฒ€์ถœ โ†’ ํด๋Ÿฌ์Šคํ„ฐ๋ง โ†’ verify โ†’ ํ”ผ์น˜ ๊ต์ •
745
+ 6. VerificationReport ๋ฐ˜ํ™˜
746
+ """
747
+ img = cv2.imread(png_path)
748
+ if img is None:
749
+ raise FileNotFoundError(f"์ด๋ฏธ์ง€๋ฅผ ์—ด ์ˆ˜ ์—†์Œ: {png_path}")
750
+
751
+ inv = _to_binary_inv(img)
752
+ h_img, w_img = inv.shape
753
+ min_measure_width = max(20, w_img // 25)
754
+
755
+ # YOLO ๋ชจ๋ธ์ด ์žˆ์œผ๋ฉด ์ „์ฒด ํŽ˜์ด์ง€์— ํ•œ ๋ฒˆ ์‹คํ–‰ (crop ๋‹จ์œ„ X)
756
+ page_noteheads = _detect_noteheads_full_page(img)
757
+ use_yolo = len(page_noteheads) > 0
758
+
759
+ systems = _detect_systems(inv)
760
+ if not systems:
761
+ return VerificationReport(measures=[], ok_count=0, flag_count=0)
762
+
763
+ # ๊ฐ™์€ ํ–‰์˜ ์ค‘๋ณต ์‹œ์Šคํ…œ ์ œ๊ฑฐ (2๋Œ€ ํ”ผ์•„๋…ธ ๋“ฑ ๋ฉ€ํ‹ฐํŒŒํŠธ ์•…๋ณด ๋Œ€์‘)
764
+ # ๊ธฐ์ค€ ๋†’์ด: ์ „์ฒด ์‹œ์Šคํ…œ ๋†’์ด์˜ ์ค‘์•™๊ฐ’ (merged ์‹œ์Šคํ…œ์ด ์„ž์—ฌ๋„ ๋‹จ์ผ ํŒŒํŠธ ๋†’์ด ๊ธฐ์ค€ ์œ ์ง€)
765
+ if len(systems) > 1:
766
+ all_heights = [y2 - y1 for y1, y2 in systems]
767
+ all_heights_sorted = sorted(all_heights)
768
+ ref_h = all_heights_sorted[len(all_heights_sorted) // 2] # ์ค‘์•™๊ฐ’
769
+ # ๋‹จ์ผ ํŒŒํŠธ ๋†’์ด์— ๋งž๊ฒŒ ์กฐ์ •: ์ค‘์•™๊ฐ’์ด ๋„ˆ๋ฌด ํฌ๋ฉด(merged) ์ตœ์†Ÿ๊ฐ’ ์‚ฌ์šฉ
770
+ min_h = all_heights_sorted[0]
771
+ if ref_h > min_h * 1.5:
772
+ ref_h = min_h
773
+
774
+ deduped = [systems[0]]
775
+ for i in range(1, len(systems)):
776
+ prev_y2 = deduped[-1][1]
777
+ ny1, ny2 = systems[i]
778
+ gap = ny1 - prev_y2
779
+ if gap >= ref_h * 0.7:
780
+ deduped.append((ny1, ny2))
781
+ # gap < ๊ธฐ์ค€๋†’์ด์˜ 70% โ†’ ๊ฐ™์€ ํ–‰์˜ ๋‹ค๋ฅธ ํŒŒํŠธ, ์Šคํ‚ต
782
+ systems = deduped
783
+
784
+ all_xml_measures = parse_xml_measures(xml_str)
785
+ measure_map = {m.number: m for m in all_xml_measures}
786
+
787
+ results: list = []
788
+ n_systems = len(systems)
789
+ n_xml = len(all_xml_measures)
790
+
791
+ # ์‹œ์Šคํ…œ๋ณ„ XML ๋งˆ๋”” ์ˆ˜ ๋ถ„๋ฐฐ (์ตœ๋Œ€ํ•œ ๊ท ๋“ฑํ•˜๊ฒŒ)
792
+ xml_per_sys: list = []
793
+ remaining_xml = n_xml
794
+ for si in range(n_systems):
795
+ remaining_sys = n_systems - si
796
+ n_this = max(1, round(remaining_xml / remaining_sys))
797
+ xml_per_sys.append(n_this)
798
+ remaining_xml -= n_this
799
+ # ๋‚จ๊ฑฐ๋‚˜ ๋ถ€์กฑํ•œ ๊ฒฝ์šฐ ๋งˆ์ง€๋ง‰ ์‹œ์Šคํ…œ์— ์กฐ์ •
800
+ if xml_per_sys:
801
+ xml_per_sys[-1] += n_xml - sum(xml_per_sys)
802
+ xml_per_sys[-1] = max(1, xml_per_sys[-1])
803
+
804
+ measure_idx = 1
805
+
806
+ for si, (y1, y2) in enumerate(systems):
807
+ barline_xs = _detect_barlines(inv, y1, y2)
808
+ n_xml_this = xml_per_sys[si] if si < len(xml_per_sys) else 1
809
+
810
+ if len(barline_xs) < 2:
811
+ measure_boundaries = [(0, w_img)]
812
+ else:
813
+ # ๋„ˆ๋ฌด ๊ฐ€๊นŒ์šด barline ์ œ๊ฑฐ
814
+ filtered_bl = [barline_xs[0]]
815
+ for bl in barline_xs[1:]:
816
+ if bl - filtered_bl[-1] >= min_measure_width:
817
+ filtered_bl.append(bl)
818
+
819
+ # open_x: ์‹œ์Šคํ…œ ์‹œ์ž‘ = ์ด๋ฏธ์ง€ ์™ผ์ชฝ ์—ฃ์ง€
820
+ # ์ฒซ ๋ฒˆ์งธ barline์ด ์™ผ์ชฝ ์—ฌ๋ฐฑ(๋„ˆ๋น„์˜ 15%) ์ด๋‚ด์— ์žˆ์œผ๋ฉด ์‹œ์Šคํ…œ ์—ฃ์ง€/ํด๋ ˆํ”„ ์˜์—ญ์œผ๋กœ ๊ฐ„์ฃผํ•ด ์Šคํ‚ต
821
+ left_edge_threshold = w_img * 0.15
822
+ if filtered_bl[0] < left_edge_threshold:
823
+ # ์ฒซ barline์ด ํด๋ ˆํ”„/์กฐํ‘œ ์˜์—ญ ๋ โ†’ ์Šคํ‚ต
824
+ open_x = filtered_bl[1] if len(filtered_bl) >= 2 else filtered_bl[0]
825
+ close_x = filtered_bl[-1]
826
+ internal = filtered_bl[2:-1] if len(filtered_bl) >= 3 else []
827
+ else:
828
+ # ๋ชจ๋“  barline์ด ์‹ค์ œ ๋งˆ๋””์„  โ†’ ์ด๋ฏธ์ง€ ์™ผ์ชฝ์„ ์‹œ์ž‘์ ์œผ๋กœ
829
+ open_x = 0
830
+ close_x = filtered_bl[-1]
831
+ internal = filtered_bl[:-1]
832
+
833
+ n_internal_needed = n_xml_this - 1
834
+
835
+ if n_internal_needed <= 0 or not internal:
836
+ selected = []
837
+ elif n_internal_needed >= len(internal):
838
+ selected = internal
839
+ else:
840
+ # XML ๋งˆ๋”” ๋ฐ•์ž ์ˆ˜ ๊ธฐ๋ฐ˜ ๊ฐ€์ค‘ ์˜ˆ์ƒ ์œ„์น˜ ๊ณ„์‚ฐ
841
+ # measure_idx ๊ธฐ์ค€์œผ๋กœ ์ด ์‹œ์Šคํ…œ์— ํ• ๋‹น๋œ XML ๋งˆ๋””๋“ค์˜ ๋ฐ•์ž ๋น„์œจ ์‚ฌ์šฉ
842
+ total_w = close_x - open_x
843
+ sys_measures = all_xml_measures[measure_idx - 1: measure_idx - 1 + n_xml_this]
844
+ durations = [max(m.total_beats, 1.0) for m in sys_measures]
845
+ total_dur = sum(durations)
846
+ if total_dur > 0:
847
+ cumul = [sum(durations[:k + 1]) for k in range(len(durations) - 1)]
848
+ expected = [open_x + total_w * c / total_dur for c in cumul]
849
+ else:
850
+ expected = [open_x + total_w * (k + 1) / (n_internal_needed + 1)
851
+ for k in range(n_internal_needed)]
852
+ selected = []
853
+ used = set()
854
+ for exp_x in expected:
855
+ best = min((bl for bl in internal if id(bl) not in used),
856
+ key=lambda b: abs(b - exp_x),
857
+ default=None)
858
+ if best is not None:
859
+ selected.append(best)
860
+ used.add(id(best))
861
+ selected = sorted(selected)
862
+
863
+ final_bl = [open_x] + selected + [close_x]
864
+ measure_boundaries = [(final_bl[i], final_bl[i + 1])
865
+ for i in range(len(final_bl) - 1)]
866
+
867
+ for mi, (x1, x2) in enumerate(measure_boundaries):
868
+ measure_crop = img[y1:y2, x1:x2]
869
+
870
+ if measure_crop.shape[0] < 10 or measure_crop.shape[1] < 10:
871
+ continue
872
+
873
+ xml_measure = measure_map.get(measure_idx)
874
+ if xml_measure is None:
875
+ measure_idx += 1
876
+ continue
877
+
878
+ # rest ์ „์šฉ ํƒ€์ž„ํฌ์ธํŠธ ์ œ์™ธ
879
+ xml_tps = [
880
+ tp for tp in xml_measure.timepoints
881
+ if any(n.pitch != 'rest' for n in tp.notes)
882
+ ]
883
+
884
+ # ๋งˆ๋”” ๋‚ด staff ์ •๋ณด ๊ฒ€์ถœ
885
+ crop_binary = _to_binary_inv(measure_crop)
886
+ staff_ys = _detect_staff_lines_y(crop_binary)
887
+ staff_spacing = _detect_staff_spacing(measure_crop)
888
+
889
+ if use_yolo:
890
+ # ํŽ˜์ด์ง€ ์ ˆ๋Œ€ ์ขŒํ‘œ์—์„œ ์ด ๋งˆ๋”” ๋ฒ”์œ„(x1~x2, y1~y2) ๋‚ด notehead ํ•„ํ„ฐ๋ง
891
+ # NoteHead ์ขŒํ‘œ๋ฅผ crop ๊ธฐ์ค€์œผ๋กœ ๋ณ€ํ™˜
892
+ noteheads = [
893
+ NoteHead(nh.x - x1, nh.y - y1, nh.w, nh.h, nh.filled)
894
+ for nh in page_noteheads
895
+ if x1 <= nh.x < x2 and y1 <= nh.y < y2
896
+ ]
897
+ else:
898
+ noteheads = _detect_noteheads(measure_crop)
899
+ clusters = cluster_noteheads(noteheads, staff_spacing)
900
+ verify = verify_measure(clusters, xml_tps)
901
+
902
+ corrected = None
903
+ if verify.status == "OK" and len(staff_ys) >= 5:
904
+ corrected = correct_measure_pitches(clusters, xml_tps, staff_ys)
905
+
906
+ results.append(MeasureResult(
907
+ number=measure_idx,
908
+ verify=verify,
909
+ corrected_timepoints=corrected,
910
+ cv_clusters=clusters,
911
+ ))
912
+ measure_idx += 1
913
+
914
+ ok_count = sum(1 for r in results if r.verify.status == "OK")
915
+ flag_count = sum(1 for r in results if r.verify.status == "FLAG")
916
+ return VerificationReport(measures=results, ok_count=ok_count, flag_count=flag_count)
core/mml_converter.py ADDED
@@ -0,0 +1,283 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/mml_converter.py
3
+
4
+ NoteEvent ํŒŒํŠธ ๋ฆฌ์ŠคํŠธ๋ฅผ ๋งˆ๋น„๋…ธ๊ธฐ MML ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜.
5
+
6
+ ๋งˆ๋น„๋…ธ๊ธฐ MML ํ˜•์‹:
7
+ MML@<๋ฉœ๋กœ๋””>,<ํ™”์Œ1>,<ํ™”์Œ2>;
8
+ ์Œํ‘œ: o<์˜ฅํƒ€๋ธŒ><์Œ์ด๋ฆ„>[<๊ธธ์ด>] ์˜ˆ) o4c4 = C4 4๋ถ„์Œํ‘œ, l4 ์„ ์–ธ ํ›„ c
9
+ ์‰ผํ‘œ: r[<๊ธธ์ด>] ์˜ˆ) r4 = 4๋ถ„์Œํ‘œ ์‰ผํ‘œ
10
+ ๊ธฐ๋ณธ๊ธธ์ด: l<N> ์˜ˆ) l8 โ†’ ์ดํ›„ ๊ธธ์ด ์ƒ๋žต ์‹œ 8๋ถ„์Œํ‘œ
11
+ ์˜ฅํƒ€๋ธŒ ์ƒ์Šน: > ํ•˜๊ฐ•: <
12
+ ํƒ€์ด: c4&c8 (๊ฐ™์€ ์Œ ์—ฐ๊ฒฐ)
13
+ ํ…œํฌ: t<BPM> ์˜ˆ) t120
14
+
15
+ ์ง€์› ๊ธธ์ด (4๋ถ„์Œํ‘œ = duration 1.0):
16
+ 1, 1., 2, 2., 4, 4., 8, 8., 16, 16., 32, 32., 64
17
+
18
+ ํ‘œํ˜„ ๋ถˆ๊ฐ€ ๊ธธ์ด: & ํƒ€์ด๋กœ ๊ทธ๋ฆฌ๋”” ๋ถ„ํ•ด (์˜ˆ: 1.25๋ฐ• โ†’ 4&16)
19
+ ์ž”์—ฌ ์˜ค์ฐจ > 0.01๋ฐ•์ด๋ฉด ๊ทผ์‚ฌ ๊ฒฝ๊ณ  ๋ฐœ์ƒ.
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from collections import Counter
25
+
26
+ from .models import NoteEvent
27
+
28
+ # MIDI ํ”ผ์น˜ โ†’ ์Œ์ด๋ฆ„ (์˜ฅํƒ€๋ธŒ ์—†์ด)
29
+ _PITCH_TO_NOTE_SHARP = {
30
+ 0: "c", 1: "c+", 2: "d", 3: "d+", 4: "e",
31
+ 5: "f", 6: "f+", 7: "g", 8: "g+", 9: "a",
32
+ 10: "a+", 11: "b",
33
+ }
34
+ _PITCH_TO_NOTE_FLAT = {
35
+ 0: "c", 1: "d-", 2: "d", 3: "e-", 4: "e",
36
+ 5: "f", 6: "g-", 7: "g", 8: "a-", 9: "a",
37
+ 10: "b-", 11: "b",
38
+ }
39
+
40
+ # (duration_in_beats, mml_length_string) โ€” ๋ฐ˜๋“œ์‹œ ๋‚ด๋ฆผ์ฐจ์ˆœ ์ •๋ ฌ
41
+ _DURATION_MAP: list[tuple[float, str]] = [
42
+ (4.0, "1"), # ์˜จ์Œํ‘œ
43
+ (3.0, "1."), # ์ ์˜จ์Œํ‘œ
44
+ (2.0, "2"), # 2๋ถ„์Œํ‘œ
45
+ (1.5, "2."), # ์ 2๋ถ„์Œํ‘œ
46
+ (1.0, "4"), # 4๋ถ„์Œํ‘œ
47
+ (0.75, "4."), # ์ 4๋ถ„์Œํ‘œ
48
+ (0.5, "8"), # 8๋ถ„์Œํ‘œ
49
+ (0.375, "8."), # ์ 8๋ถ„์Œํ‘œ
50
+ (0.25, "16"), # 16๋ถ„์Œํ‘œ
51
+ (0.1875, "16."), # ์ 16๋ถ„์Œํ‘œ
52
+ (0.125, "32"), # 32๋ถ„์Œํ‘œ
53
+ (0.09375, "32."), # ์ 32๋ถ„์Œํ‘œ
54
+ (0.0625, "64"), # 64๋ถ„์Œํ‘œ
55
+ ]
56
+
57
+
58
+ def _mml_duration(lengths: list[str]) -> float:
59
+ """MML ๊ธธ์ด ๋ฌธ์ž์—ด ๋ฆฌ์ŠคํŠธ์˜ ์‹ค์ œ ์žฌ์ƒ ๋ฐ•์ˆ˜ ํ•ฉ์‚ฐ."""
60
+ dur_lookup = {mml_len: dur_val for dur_val, mml_len in _DURATION_MAP}
61
+ return sum(dur_lookup.get(l, 0.0) for l in lengths)
62
+
63
+
64
+ def _duration_to_lengths(duration: float) -> tuple[list[str], bool]:
65
+ """
66
+ duration โ†’ MML ๊ธธ์ด ๋ฌธ์ž์—ด ๋ฆฌ์ŠคํŠธ (& ํƒ€์ด ๊ทธ๋ฆฌ๋”” ๋ถ„ํ•ด ํฌํ•จ).
67
+
68
+ - ์ •ํ™•ํžˆ ํ‘œํ˜„ ๊ฐ€๋Šฅ โ†’ ๋‹จ์ผ ์›์†Œ, is_approximate=False
69
+ - ๋ถ„ํ•ด ๊ฐ€๋Šฅ โ†’ ๋ณต์ˆ˜ ์›์†Œ, is_approximate=False
70
+ - ์ž”์—ฌ ์˜ค์ฐจ > 0.01๋ฐ• โ†’ is_approximate=True
71
+
72
+ 4๋ฐ•(์˜จ์Œํ‘œ)์„ ์ดˆ๊ณผํ•˜๋Š” duration์€ ์˜จ์Œํ‘œ๋ฅผ ๋ฐ˜๋ณต ์‚ฌ์šฉํ•˜์—ฌ ์ •ํ™•ํžˆ ํ‘œํ˜„.
73
+
74
+ ์˜ˆ:
75
+ 1.0 โ†’ (["4"], False)
76
+ 0.75 โ†’ (["4."], False)
77
+ 1.25 โ†’ (["4", "16"], False) โ† 1.0 + 0.25
78
+ 8.0 โ†’ (["1","1"], False) โ† 4.0 + 4.0
79
+ """
80
+ lengths: list[str] = []
81
+ remaining = duration
82
+
83
+ while remaining > 0.001:
84
+ found = False
85
+ for dur_val, mml_len in _DURATION_MAP:
86
+ if dur_val <= remaining + 0.001:
87
+ lengths.append(mml_len)
88
+ remaining -= dur_val
89
+ found = True
90
+ break
91
+ if not found:
92
+ break
93
+
94
+ if not lengths:
95
+ # ๋„ˆ๋ฌด ์ž‘์•„ ํ‘œํ˜„ ๋ถˆ๊ฐ€ โ€” ๊ฐ€์žฅ ๊ฐ€๊นŒ์šด ๊ฐ’์œผ๋กœ ๊ทผ์‚ฌ
96
+ best = min(_DURATION_MAP, key=lambda x: abs(x[0] - duration))
97
+ return [best[1]], True
98
+
99
+ return lengths, remaining > 0.01
100
+
101
+
102
+ def _midi_to_octave_note(pitch: int, prefer_sharps: bool = True) -> tuple[int, str]:
103
+ """MIDI ๋ฒˆํ˜ธ โ†’ (์˜ฅํƒ€๋ธŒ, ์Œ์ด๋ฆ„). ๊ธฐ์ค€: o4c = C4 (MIDI 60)."""
104
+ octave = (pitch // 12) - 1
105
+ note_map = _PITCH_TO_NOTE_SHARP if prefer_sharps else _PITCH_TO_NOTE_FLAT
106
+ return octave, note_map[pitch % 12]
107
+
108
+
109
+ def convert_part_to_mml(
110
+ notes: list[NoteEvent],
111
+ prefer_sharps: bool = True,
112
+ ) -> tuple[str, list[str]]:
113
+ """
114
+ ๋‹จ์ผ ํŒŒํŠธ์˜ NoteEvent ๋ฆฌ์ŠคํŠธ๋ฅผ MML ๋ณธ๋ฌธ ๋ฌธ์ž์—ด๋กœ ๋ณ€ํ™˜.
115
+ (MML@ ์™€ ; ์ œ์™ธ โ€” convert_parts_to_mml์—์„œ ์กฐํ•ฉ)
116
+
117
+ ์ตœ์ ํ™”:
118
+ - l<N> : ๊ฐ€์žฅ ๋นˆ๋„ ๋†’์€ ๊ธธ์ด๋ฅผ ๊ธฐ๋ณธ๊ฐ’์œผ๋กœ ์„ ์–ธ, ์ดํ›„ ์ƒ๋žต
119
+ - > / <: 1์˜ฅํƒ€๋ธŒ ์ด๋™ ์‹œ oN ๋Œ€์‹  > / < ์‚ฌ์šฉ (1๊ธ€์ž ์ ˆ๊ฐ)
120
+ - & : ํ‘œํ˜„ ๋ถˆ๊ฐ€ ๊ธธ์ด๋ฅผ ํƒ€์ด๋กœ ๋ถ„ํ•ด
121
+
122
+ Returns:
123
+ (body_string, warnings)
124
+ ๋นˆ ํŒŒํŠธ โ†’ ("r1", [])
125
+ """
126
+ if not notes:
127
+ return "r1", []
128
+
129
+ sorted_notes = sorted(notes, key=lambda n: n.start)
130
+ warnings: list[str] = []
131
+
132
+ # MML์—์„œ ํ‘œํ˜„ ๊ฐ€๋Šฅํ•œ ์ตœ์†Œ ๋‹จ์œ„: 64๋ถ„์Œํ‘œ(0.0625๋ฐ•)
133
+ # ์ด๋ณด๋‹ค ์ž‘์€ gap์€ ํ‘œํ˜„ ๋ถˆ๊ฐ€์ด๋ฏ€๋กœ skip
134
+ _MIN_GAP = 0.0625 - 0.001
135
+
136
+ # ------------------------------------------------------------------
137
+ # Pass 1: l ์ตœ์ ํ™” โ€” mml_pos ๊ธฐ๋ฐ˜์œผ๋กœ gap ํฌํ•จ ๊ธธ์ด ๋นˆ๋„ ์นด์šดํŠธ
138
+ # ------------------------------------------------------------------
139
+ length_counter: Counter = Counter()
140
+ mml_pos_scan = 0.0
141
+ for note in sorted_notes:
142
+ gap = note.start - mml_pos_scan
143
+ if gap >= _MIN_GAP:
144
+ gap_lengths, _ = _duration_to_lengths(gap)
145
+ length_counter[gap_lengths[0]] += 1
146
+ mml_pos_scan += _mml_duration(gap_lengths)
147
+ dur_lengths, _ = _duration_to_lengths(note.duration)
148
+ length_counter[dur_lengths[0]] += 1
149
+ mml_pos_scan += _mml_duration(dur_lengths)
150
+
151
+ default_len = length_counter.most_common(1)[0][0] if length_counter else "4"
152
+
153
+ # ------------------------------------------------------------------
154
+ # Pass 2: ํ† ํฐ ์ƒ์„ฑ
155
+ #
156
+ # mml_pos: MML์ด ์‹ค์ œ๋กœ ์žฌ์ƒํ•œ ์œ„์น˜ (๊ทผ์‚ฌ๋œ duration ๋ˆ„์ )
157
+ # gap์€ note.start - mml_pos ๊ธฐ์ค€์œผ๋กœ ๊ณ„์‚ฐ โ†’ ๊ทผ์‚ฌ ์˜ค์ฐจ๊ฐ€ ๋‹ค์Œ ์Œ์— ๋ˆ„์ ๋˜์ง€ ์•Š์Œ
158
+ # ------------------------------------------------------------------
159
+ tokens: list[str] = [f"l{default_len}"]
160
+ current_octave = -1
161
+ mml_pos = 0.0
162
+
163
+ def emit(name: str, lengths: list[str]) -> None:
164
+ """name('r' ๋˜๋Š” ์Œ์ด๋ฆ„) + lengths ๋ฆฌ์ŠคํŠธ โ†’ ํ† ํฐ ์ถ”๊ฐ€ (& ๋ถ„ํ•ด ํฌํ•จ)."""
165
+ for i, lstr in enumerate(lengths):
166
+ tok = name if lstr == default_len else f"{name}{lstr}"
167
+ tokens.append(f"&{tok}" if i > 0 else tok)
168
+
169
+ for note in sorted_notes:
170
+ gap = note.start - mml_pos
171
+ if gap >= _MIN_GAP:
172
+ lengths, approx = _duration_to_lengths(gap)
173
+ if approx:
174
+ warnings.append(f"์‰ผํ‘œ ๊ธธ์ด ๊ทผ์‚ฌ: {gap:.3f}๋ฐ• โ†’ r{lengths[0]}")
175
+ emit("r", lengths)
176
+ mml_pos += _mml_duration(lengths)
177
+ # gap < _MIN_GAP: ํ‘œํ˜„ ๋ถˆ๊ฐ€ ๋ฏธ์„ธ ๊ฐญ(์ฃผ๋กœ 3์ž‡๋‹จ์Œํ‘œ ๋ฐ˜์˜ฌ๋ฆผ ์˜ค์ฐจ) โ€” skip
178
+
179
+ if note.pitch == 0:
180
+ lengths, approx = _duration_to_lengths(note.duration)
181
+ if approx:
182
+ warnings.append(f"์‰ผํ‘œ ๊ธธ์ด ๊ทผ์‚ฌ: {note.duration:.3f}๋ฐ• โ†’ r{lengths[0]}")
183
+ emit("r", lengths)
184
+ mml_pos += _mml_duration(lengths)
185
+ else:
186
+ octave, note_name = _midi_to_octave_note(note.pitch, prefer_sharps)
187
+ lengths, approx = _duration_to_lengths(note.duration)
188
+ if approx:
189
+ warnings.append(
190
+ f"์Œํ‘œ ๊ธธ์ด ๊ทผ์‚ฌ: pitch={note.pitch}, "
191
+ f"{note.duration:.3f}๋ฐ• โ†’ {note_name}{lengths[0]}"
192
+ )
193
+
194
+ # ์˜ฅํƒ€๋ธŒ ๋ณ€๊ฒฝ: 1์นธ ์ด๋™์€ > / <, 2์นธ ์ด์ƒ์ด๊ฑฐ๋‚˜ ์ฒ˜์Œ์€ oN
195
+ if octave != current_octave:
196
+ if current_octave == -1:
197
+ tokens.append(f"o{octave}")
198
+ elif octave == current_octave + 1:
199
+ tokens.append(">")
200
+ elif octave == current_octave - 1:
201
+ tokens.append("<")
202
+ else:
203
+ tokens.append(f"o{octave}")
204
+ current_octave = octave
205
+
206
+ emit(note_name, lengths)
207
+ mml_pos += _mml_duration(lengths)
208
+
209
+ return "".join(tokens), warnings
210
+
211
+
212
+ def _split_to_lanes(notes: list[NoteEvent]) -> list[list[NoteEvent]]:
213
+ """
214
+ ํ•œ ํŒŒํŠธ ๋‚ด์—์„œ ์‹œ๊ฐ„์ƒ ๊ฒน์น˜๋Š” ๋…ธํŠธ๋ฅผ ๋ ˆ์ธ(sub-part)์œผ๋กœ ๋ถ„๋ฐฐ.
215
+
216
+ ๋…ธํŠธ๋ฅผ start ์ˆœ์œผ๋กœ ์ •๋ ฌ ํ›„, ๊ฐ ๋…ธํŠธ๋ฅผ ์žฌ์ƒ ๊ฐ€๋Šฅํ•œ ์ฒซ ๋ฒˆ์งธ ๋ ˆ์ธ์— ๋ฐฐ์น˜.
217
+ ์žฌ์ƒ ๊ฐ€๋Šฅ ์กฐ๊ฑด: ๋ ˆ์ธ์˜ ๋งˆ์ง€๋ง‰ ๋…ธํŠธ ์ข…๋ฃŒ ์‹œ๊ฐ <= ํ˜„์žฌ ๋…ธํŠธ ์‹œ์ž‘ ์‹œ๊ฐ.
218
+ ์กฐ๊ฑด์„ ๋งŒ์กฑํ•˜๋Š” ๋ ˆ์ธ์ด ์—†์œผ๋ฉด ์ƒˆ ๋ ˆ์ธ ์ƒ์„ฑ.
219
+
220
+ ๊ฒฐ๊ณผ ๋ ˆ์ธ์˜ ๊ฐ ๋…ธํŠธ๋Š” ์„œ๋กœ ๊ฒน์น˜์ง€ ์•Š์œผ๋ฏ€๋กœ convert_part_to_mml์ด
221
+ ์ฒ˜์Œ๋ถ€ํ„ฐ ์ •ํ™•ํ•œ ์‹œ๊ฐ„ ๊ณ„์‚ฐ์œผ๋กœ ๋ณ€ํ™˜ ๊ฐ€๋Šฅ.
222
+ """
223
+ lanes: list[list[NoteEvent]] = []
224
+ lane_end: list[float] = [] # ๊ฐ ๋ ˆ์ธ์˜ ๋งˆ์ง€๋ง‰ ๋…ธํŠธ ์ข…๋ฃŒ ์‹œ๊ฐ
225
+
226
+ for note in sorted(notes, key=lambda n: n.start):
227
+ placed = False
228
+ for i, end_time in enumerate(lane_end):
229
+ if note.start >= end_time - 0.001:
230
+ lanes[i].append(note)
231
+ lane_end[i] = note.start + note.duration
232
+ placed = True
233
+ break
234
+ if not placed:
235
+ lanes.append([note])
236
+ lane_end.append(note.start + note.duration)
237
+
238
+ return lanes
239
+
240
+
241
+ def convert_parts_to_mml(
242
+ parts: list[list[NoteEvent]],
243
+ prefer_sharps: bool = True,
244
+ tempo: int = 0,
245
+ ) -> tuple[str, list[str], list[str]]:
246
+ """
247
+ ํŒŒํŠธ ๋ฆฌ์ŠคํŠธ ์ „์ฒด๋ฅผ ๋งˆ๋น„๋…ธ๊ธฐ MML๋กœ ๋ณ€ํ™˜.
248
+
249
+ ๊ฐ ํŒŒํŠธ ๋‚ด ์‹œ๊ฐ„์ƒ ๊ฒน์น˜๋Š” ๋…ธํŠธ๋Š” ๋ ˆ์ธ์œผ๋กœ ๋ถ„๋ฆฌ๋˜์–ด ๋ณ„๋„ MML ํŒŒํŠธ๋กœ ์ถœ๋ ฅ.
250
+ ๊ฐ ๋ ˆ์ธ์€ ์‹œ์ž‘ ์‹œ๊ฐ๋ถ€ํ„ฐ ์‰ผํ‘œ๋ฅผ ์‚ฝ์ž…ํ•ด ํƒ€์ด๋ฐ์„ ์ •ํ™•ํžˆ ์œ ์ง€.
251
+
252
+ Args:
253
+ parts: split_parts() ๋ฐ˜ํ™˜๊ฐ’ (ํŒŒํŠธ๋ณ„ NoteEvent ๋ฆฌ์ŠคํŠธ)
254
+ prefer_sharps: True = ์˜ฌ๋ฆผํ‘œ(+), False = ๋‚ด๋ฆผํ‘œ(-)
255
+ tempo: BPM. 0์ด๋ฉด t ์ƒ๋žต.
256
+
257
+ Returns:
258
+ (combined_mml, part_mmls, all_warnings)
259
+ combined_mml: "MML@[t<BPM>]<part1>,\n<part2>,\n..." (์ฐธ๊ณ ์šฉ, ํŒŒํŠธ ์ˆ˜ ์ œํ•œ ์—†์Œ)
260
+ part_mmls: ["MML@[t<BPM>]<part1>;", "MML@...", ...] (ํŒŒํŠธ๋ณ„ ๊ฐœ๋ณ„ ๋ฌธ์ž์—ด)
261
+ all_warnings: ๊ฒฝ๊ณ  ๋ชฉ๋ก
262
+ """
263
+ bodies: list[str] = []
264
+ all_warnings: list[str] = []
265
+
266
+ for part_idx, part_notes in enumerate(parts, start=1):
267
+ lanes = _split_to_lanes(part_notes)
268
+ for lane_idx, lane_notes in enumerate(lanes):
269
+ body, warnings = convert_part_to_mml(lane_notes, prefer_sharps)
270
+ bodies.append(body)
271
+ label = f"Part {part_idx}" if lane_idx == 0 else f"Part {part_idx}-{lane_idx + 1}"
272
+ for w in warnings:
273
+ all_warnings.append(f"{label}: {w}")
274
+
275
+ if not bodies:
276
+ bodies.append("r1")
277
+
278
+ tempo_str = f"t{tempo}" if tempo > 0 else ""
279
+ part_mmls = [f"MML@{tempo_str}{b};" for b in bodies]
280
+ combined = ",\n".join(bodies)
281
+ combined_mml = f"MML@{tempo_str}{combined};"
282
+
283
+ return combined_mml, part_mmls, all_warnings
core/models.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/models.py
3
+
4
+ ๋‚ด๋ถ€ ๋ฐ์ดํ„ฐ ๊ตฌ์กฐ ์ •์˜.
5
+ ๋ชจ๋“  core ๋ชจ๋“ˆ ๊ฐ„์— ๊ณต์œ ๋˜๋Š” ๋ฐ์ดํ„ฐ ํด๋ž˜์Šค.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ from dataclasses import dataclass, field
11
+ from typing import Optional
12
+
13
+
14
+ @dataclass
15
+ class NoteEvent:
16
+ """
17
+ ์•…๋ณด์—์„œ ์ถ”์ถœํ•œ ๋‹จ์ผ ์Œํ‘œ/์‰ผํ‘œ ์ด๋ฒคํŠธ.
18
+
19
+ pitch=0 ์ด๋ฉด ์‰ผํ‘œ(rest)๋กœ ์ทจ๊ธ‰.
20
+ duration์€ 4๋ถ„์Œํ‘œ ๊ธฐ์ค€์˜ ๋น„์œจ (์˜ˆ: 1.0 = 4๋ถ„์Œํ‘œ, 0.5 = 8๋ถ„์Œํ‘œ, 2.0 = 2๋ถ„์Œํ‘œ).
21
+ """
22
+ pitch: int # MIDI ๋ฒˆํ˜ธ (0 = rest, 60 = C4)
23
+ start: float # ์‹œ์ž‘ ์‹œ๊ฐ (4๋ถ„์Œํ‘œ ๊ธฐ์ค€ beat)
24
+ duration: float # ์ง€์† ์‹œ๊ฐ„ (4๋ถ„์Œํ‘œ ๊ธฐ์ค€ beat)
25
+ staff: int = 1 # ๋ณดํ‘œ ๋ฒˆํ˜ธ (1 = ์ƒ๋‹จ, 2 = ํ•˜๋‹จ)
26
+ voice: int = 1 # ์„ฑ๋ถ€ ๋ฒˆํ˜ธ
27
+ part_hint: Optional[int] = None # ํŒŒํŠธ ๋ฐฐ๋ถ„ ํžŒํŠธ (None = ๋ฏธ์ง€์ •)
28
+
29
+
30
+ @dataclass
31
+ class ConvertOptions:
32
+ """
33
+ ๋ณ€ํ™˜ ํŒŒ์ดํ”„๋ผ์ธ ์˜ต์…˜.
34
+ """
35
+ part_count: int = 0 # ์ถœ๋ ฅ ํŒŒํŠธ ์ˆ˜ (0 = ๋™์‹œ ๋ฐœ์Œ ์ˆ˜ ๊ธฐ์ค€ ์ž๋™ ๊ฐ์ง€)
36
+ strict_mode: bool = False # True์ด๋ฉด ๊ฒฝ๊ณ ๋ฅผ ์—๋Ÿฌ๋กœ ์ฒ˜๋ฆฌ
37
+ prefer_sharps: bool = True # True = ์˜ฌ๋ฆผํ‘œ(#), False = ๋‚ด๋ฆผํ‘œ(b)
38
+ mock_mode: bool = True # True์ด๋ฉด MockOMRAdapter ์‚ฌ์šฉ
39
+ default_tempo: int = 120 # ๊ธฐ๋ณธ ํ…œํฌ BPM
40
+ pdf_dpi: int = 150 # PDF ๋ Œ๋”๋ง ํ•ด์ƒ๋„
41
+ # 150dpi: ๋น ๋ฅธ ์ฒ˜๋ฆฌ, ๊ธฐ๋ณธ๊ฐ’
42
+ # 300dpi: Audiveris ์‹ค์ œ ์‚ฌ์šฉ ์‹œ ๊ถŒ์žฅ (์ธ์‹๋ฅ  ํ–ฅ์ƒ)
43
+ preprocess_enabled: bool = True # OpenCV ์ „์ฒ˜๋ฆฌ ์‚ฌ์šฉ ์—ฌ๋ถ€ (audiveris ๋ชจ๋“œ๋งŒ ์ ์šฉ)
44
+ blur_enabled: bool = True # GaussianBlur ๋…ธ์ด์ฆˆ ์ œ๊ฑฐ ์‚ฌ์šฉ ์—ฌ๋ถ€
45
+ binarize_enabled: bool = False # ์ด์ง„ํ™” ์‚ฌ์šฉ ์—ฌ๋ถ€ (๊ธฐ๋ณธ off: Audiveris ์ž์ฒด ์ด์ง„ํ™” ์‹ ๋ขฐ)
46
+ binarize_method: str = "otsu" # ์ด์ง„ํ™” ๋ฐฉ์‹: "otsu" | "adaptive" (binarize_enabled=True ์‹œ)
47
+ deskew_enabled: bool = False # ๊ธฐ์šธ๊ธฐ ๋ณด์ • (์‹คํ—˜์ , ๊ธฐ๋ณธ off)
48
+ debug_dir: str = "" # ์ค‘๊ฐ„ ๊ฒฐ๊ณผ๋ฌผ ์ €์žฅ ๋””๋ ‰ํ† ๋ฆฌ (๋นˆ ๋ฌธ์ž์—ด = ์ €์žฅ ์•ˆ ํ•จ)
49
+ engine: str = "" # OMR ์—”์ง„ ์ง€์ •: "" | "audiveris" | "homr" | "oemer" | "clarity"
50
+ # ๋นˆ ๋ฌธ์ž์—ด์ด๋ฉด mock_mode์— ๋”ฐ๋ผ ์ž๋™ ์„ ํƒ
51
+ pdf_pages: list = field(default_factory=list)
52
+ # ์ฒ˜๋ฆฌํ•  PDF ํŽ˜์ด์ง€ ๋ฒˆํ˜ธ ๋ชฉ๋ก (1-based). ๋นˆ ๋ฆฌ์ŠคํŠธ = ์ „์ฒด ์ฒ˜๋ฆฌ
53
+ # ์˜ˆ) [1, 3] โ†’ 1, 3ํŽ˜์ด์ง€๋งŒ ์ฒ˜๋ฆฌ
54
+
55
+
56
+ @dataclass
57
+ class ConvertResult:
58
+ """
59
+ ๋ณ€ํ™˜ ๊ฒฐ๊ณผ.
60
+
61
+ mml: ๋งˆ๋น„๋…ธ๊ธฐ MML ์™„์„ฑ ๋ฌธ์ž์—ด "MML@p1,p2,p3;"
62
+ part1/2/3: ํŒŒํŠธ๋ณ„ ๋ณธ๋ฌธ ๋ฌธ์ž์—ด (MML@, ; ์ œ์™ธ, ๋‚ด๋ถ€ ๊ฒ€์‚ฌ์šฉ)
63
+ """
64
+ success: bool
65
+ mml: str = "" # ์ตœ์ข… ๋งˆ๋น„๋…ธ๊ธฐ MML "MML@[t<BPM>]p1,p2,p3;"
66
+ part1: str = "" # Part 1 ๋ณธ๋ฌธ (๋‚ด๋ถ€ ๊ฒ€์‚ฌ/๋””๋ฒ„๊ทธ์šฉ)
67
+ part2: str = ""
68
+ part3: str = ""
69
+ warnings: list[str] = field(default_factory=list)
70
+ debug_info: dict = field(default_factory=dict)
71
+
72
+ def parts(self) -> list[str]:
73
+ """ํŒŒํŠธ ๋ณธ๋ฌธ ๋ชฉ๋ก์„ ๋ฆฌ์ŠคํŠธ๋กœ ๋ฐ˜ํ™˜."""
74
+ return [self.part1, self.part2, self.part3]
75
+
76
+ def format_output(self) -> str:
77
+ """์ฝ˜์†”/ํŒŒ์ผ ์ถœ๋ ฅ์šฉ ํฌ๋งท. ํŒŒํŠธ๋ณ„ ๊ฐœ๋ณ„ MML@...;๋กœ ์ถœ๋ ฅ."""
78
+ lines = []
79
+ for i, mml in enumerate([self.part1, self.part2, self.part3], start=1):
80
+ lines.append(f"Part {i}")
81
+ lines.append(mml if mml else "MML@r1;")
82
+ lines.append("")
83
+ if self.warnings:
84
+ lines.append("--- Warnings ---")
85
+ for w in self.warnings:
86
+ lines.append(f" [WARN] {w}")
87
+ return "\n".join(lines).strip()
88
+
89
+
90
+ @dataclass
91
+ class WarningMessage:
92
+ """๊ฒฝ๊ณ  ๋ฉ”์‹œ์ง€ (ํ•„์š”์‹œ ๊ตฌ์กฐํ™”๋œ ๊ฒฝ๊ณ ๋กœ ์‚ฌ์šฉ)."""
93
+ code: str
94
+ message: str
95
+ context: Optional[str] = None
96
+
97
+
98
+ # ---------------------------------------------------------------------------
99
+ # ๋ฉ€ํ‹ฐ ์—”์ง„ ๋น„๊ต์šฉ ๋ฐ์ดํ„ฐ ๊ตฌ์กฐ
100
+ # ---------------------------------------------------------------------------
101
+
102
+ @dataclass
103
+ class EngineRunResult:
104
+ """
105
+ ๋‹จ์ผ OMR ์—”์ง„ ์‹คํ–‰ ๊ฒฐ๊ณผ.
106
+
107
+ ์ž๋™ ์ ์ˆ˜๋งŒ์œผ๋กœ ์ตœ์ข… ํŒ์ •์„ ๋‚ด๋ฆฌ์ง€ ์•Š๋Š”๋‹ค.
108
+ heuristic_summary๋Š” ์ฐธ๊ณ  ์ง€ํ‘œ์ผ ๋ฟ, ์ตœ์ข… ํ’ˆ์งˆ ํŒ๋‹จ์€ ์‚ฌ์šฉ์ž๊ฐ€ ์ง์ ‘ ํ™•์ธํ•ด์•ผ ํ•œ๋‹ค.
109
+ """
110
+ engine_name: str
111
+ success: bool
112
+ stage: str = "" # ์‹คํŒจ ๋‹จ๊ณ„: "init" | "preprocess" | "omr" | "parse" | "convert"
113
+ warnings: list[str] = field(default_factory=list)
114
+ error_message: str = ""
115
+
116
+ # ์ถœ๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ (์ €์žฅ๋œ ๊ฒฝ์šฐ)
117
+ output_xml_path: str = "" # ์—”์ง„ ์ถœ๋ ฅ MusicXML ๊ฒฝ๋กœ
118
+ output_mml_path: str = "" # ์ €์žฅ๋œ MML ํ…์ŠคํŠธ ํŒŒ์ผ ๊ฒฝ๋กœ
119
+ output_notes_json_path: str = "" # ์ €์žฅ๋œ notes.json ๊ฒฝ๋กœ
120
+ output_notes_txt_path: str = "" # ์ €์žฅ๋œ notes.txt ๊ฒฝ๋กœ
121
+ output_debug_path: str = "" # ์ €์žฅ๋œ debug.json ๊ฒฝ๋กœ
122
+
123
+ # ์ •๋Ÿ‰ ์ง€ํ‘œ (์ฐธ๊ณ ์šฉ)
124
+ note_count: int = 0
125
+ chord_count: int = 0 # ๋™์‹œ ๋ฐœ์Œ ๋…ธํŠธ ๊ทธ๋ฃน ์ˆ˜
126
+ part_note_counts: list[int] = field(default_factory=list)
127
+ warning_count: int = 0
128
+
129
+ # ์ƒ์„ธ ๋ฐ์ดํ„ฐ
130
+ debug_info: dict = field(default_factory=dict)
131
+ heuristic_summary: dict = field(default_factory=dict)
132
+ mml_parts: list[str] = field(default_factory=list)
133
+ notes_dump: list[dict] = field(default_factory=list) # ์‚ฌ๋žŒ์ด ๊ฒ€ํ† ํ•˜๊ธฐ ์ข‹์€ ๋…ธํŠธ ๋ชฉ๋ก
134
+
135
+
136
+ @dataclass
137
+ class ComparisonReport:
138
+ """
139
+ ์—ฌ๋Ÿฌ OMR ์—”์ง„ ๋น„๊ต ์‹คํ–‰ ๊ฒฐ๊ณผ ์ง‘๊ณ„.
140
+
141
+ ์ค‘์š”: user_review_priority=True ๋Š” ํ•ญ์ƒ True์—ฌ์•ผ ํ•œ๋‹ค.
142
+ ์ตœ์ข… ์—”์ง„ ์„ ํƒ์€ ์‚ฌ์šฉ์ž๊ฐ€ ์ง์ ‘ ๊ฒฐ๊ณผ๋ฅผ ๋“ค์–ด๋ณด๊ณ  ํŒ๋‹จํ•œ๋‹ค.
143
+ suggested_engine์€ ๋‹จ์ˆœ ์ฐธ๊ณ ์šฉ์ด๋ฉฐ ์ž๋™ ๊ฒฐ์ •์ด ์•„๋‹ˆ๋‹ค.
144
+ """
145
+ input_file: str
146
+ timestamp: str = ""
147
+ runs: list[EngineRunResult] = field(default_factory=list)
148
+ user_review_priority: bool = True # ํ•ญ์ƒ True โ€” ์ตœ์ข… ํŒ์ •์€ ์‚ฌ์šฉ์ž ์ง์ ‘ ํ™•์ธ
149
+ comparison_summary: str = ""
150
+ notes_for_manual_review: list[str] = field(default_factory=list)
151
+ suggested_engine: str = "" # ์ž๋™ ์ฐธ๊ณ  ์ถ”์ฒœ (์ตœ์ข… ํŒ์ • ์•„๋‹˜)
core/music_parser.py ADDED
@@ -0,0 +1,376 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/music_parser.py
3
+
4
+ OMR ๊ฒฐ๊ณผ๋ฅผ NoteEvent ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ํŒŒ์„œ.
5
+
6
+ ์ง€์› ํ˜•์‹:
7
+ - "mock_events": MockOMRAdapter ์ถœ๋ ฅ (dict ๋ฆฌ์ŠคํŠธ)
8
+ - "musicxml": MusicXML ๋ฌธ์ž์—ด (score-partwise ํ˜•์‹, stdlib xml.etree ํŒŒ์‹ฑ)
9
+
10
+ MusicXML ํŒŒ์‹ฑ ์ฒ˜๋ฆฌ ๋ฒ”์œ„:
11
+ - ๋‹จ์Œ/ํ™”์Œ(<chord>) ์ฒ˜๋ฆฌ
12
+ - ์‰ผํ‘œ(<rest>) โ†’ pitch=0 NoteEvent๋กœ ๋ณ€ํ™˜ (offset ํ๋ฆ„ ์œ ์ง€)
13
+ - <backup>/<forward> ์ฒ˜๋ฆฌ (๋‹ค์„ฑ๋ถ€ offset ์œ ์ง€)
14
+ - <divisions> ๋ณ€๊ฒฝ ์ถ”์ 
15
+ - <key><fifths> ์กฐํ‘œ ํŒŒ์‹ฑ โ†’ ์Œํ‘œ ๋ฐ˜์Œ ๋ณด์ •
16
+ - <sound tempo=""> ํ…œํฌ ํŒŒ์‹ฑ
17
+ - voice/staff ์ •๋ณด ๋ณด์กด
18
+ - namespace ์ž๋™ ์ œ๊ฑฐ
19
+ - ํƒ€์ด(<tie>) ์ฒ˜๋ฆฌ: ๊ฐ™์€ ํ”ผ์น˜ ์Œํ‘œ duration ํ•ฉ์‚ฐ
20
+
21
+ ๋ฏธ์ง€์›:
22
+ - score-timewise ํ˜•์‹
23
+ - grace note (skip)
24
+ - ์Šฌ๋Ÿฌ(articulation)
25
+ - ๋ฐ˜๋ณต๊ธฐํ˜ธ ํŽผ์น˜๊ธฐ (D.S. / D.C. / Coda / Segno)
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import xml.etree.ElementTree as ET
31
+ from dataclasses import replace as dc_replace
32
+ from typing import List
33
+
34
+ from .models import NoteEvent
35
+
36
+
37
+ class ParseError(Exception):
38
+ """ํŒŒ์‹ฑ ์˜ค๋ฅ˜. ์–ด๋–ค ํŒŒ์ผ/๋‹จ๊ณ„์—์„œ ์‹คํŒจํ–ˆ๋Š”์ง€ ํฌํ•จ."""
39
+ pass
40
+
41
+
42
+ # ---------------------------------------------------------------------------
43
+ # ๊ณต๊ฐœ ์ธํ„ฐํŽ˜์ด์Šค
44
+ # ---------------------------------------------------------------------------
45
+
46
+ def parse_omr_result(omr_result: dict) -> tuple[List[NoteEvent], dict]:
47
+ """
48
+ OMR ๊ฒฐ๊ณผ dict๋ฅผ (NoteEvent ๋ฆฌ์ŠคํŠธ, ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ dict)๋กœ ๋ณ€ํ™˜.
49
+
50
+ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ ํ‚ค:
51
+ - "tempo": int (BPM, 0์ด๋ฉด ๋ฏธ๋ฐœ๊ฒฌ)
52
+
53
+ Raises:
54
+ ParseError: ์•Œ ์ˆ˜ ์—†๋Š” ํ˜•์‹์ด๊ฑฐ๋‚˜ ํŒŒ์‹ฑ ์‹คํŒจ ์‹œ
55
+ """
56
+ fmt = omr_result.get("format")
57
+
58
+ if fmt == "mock_events":
59
+ notes = _parse_mock_events(omr_result["data"])
60
+ return notes, {"tempo": 0}
61
+ elif fmt == "musicxml":
62
+ return _parse_musicxml(omr_result["data"])
63
+ else:
64
+ raise ParseError(f"์ง€์›ํ•˜์ง€ ์•Š๋Š” OMR ๊ฒฐ๊ณผ ํ˜•์‹: '{fmt}'")
65
+
66
+
67
+ def parse_musicxml_file(xml_path: str) -> tuple[List[NoteEvent], dict]:
68
+ """
69
+ MusicXML ํŒŒ์ผ ๊ฒฝ๋กœ์—์„œ ์ง์ ‘ ํŒŒ์‹ฑ. ํ…Œ์ŠคํŠธ ๋ฐ ์ง์ ‘ ์‚ฌ์šฉ์— ์œ ์šฉ.
70
+
71
+ Returns:
72
+ (notes, metadata) โ€” parse_omr_result์™€ ๋™์ผํ•œ ํ˜•์‹
73
+
74
+ Raises:
75
+ ParseError: ํŒŒ์ผ ์ฝ๊ธฐ ์‹คํŒจ ๋˜๋Š” ํŒŒ์‹ฑ ์˜ค๋ฅ˜
76
+ """
77
+ try:
78
+ with open(xml_path, "r", encoding="utf-8", errors="replace") as f:
79
+ xml_string = f.read()
80
+ except OSError as e:
81
+ raise ParseError(f"MusicXML ํŒŒ์ผ ์ฝ๊ธฐ ์‹คํŒจ ({xml_path}): {e}")
82
+ return _parse_musicxml(xml_string, source_hint=xml_path)
83
+
84
+
85
+ # ---------------------------------------------------------------------------
86
+ # ๋‚ด๋ถ€ ๊ตฌํ˜„
87
+ # ---------------------------------------------------------------------------
88
+
89
+ def _parse_mock_events(raw_notes: list) -> List[NoteEvent]:
90
+ events = []
91
+ for raw in raw_notes:
92
+ try:
93
+ event = NoteEvent(
94
+ pitch=int(raw["pitch"]),
95
+ start=float(raw["start"]),
96
+ duration=float(raw["duration"]),
97
+ staff=int(raw.get("staff", 1)),
98
+ voice=int(raw.get("voice", 1)),
99
+ part_hint=raw.get("part_hint"),
100
+ )
101
+ events.append(event)
102
+ except (KeyError, ValueError) as e:
103
+ raise ParseError(f"์Œํ‘œ ๋ฐ์ดํ„ฐ ํŒŒ์‹ฑ ์˜ค๋ฅ˜: {raw!r} โ€” {e}")
104
+
105
+ events.sort(key=lambda n: (n.start, n.staff, n.voice))
106
+ return events
107
+
108
+
109
+ # MIDI ์Œ๊ณ„ ๋ฐ˜์Œ ์ˆ˜: C=0, D=2, E=4, F=5, G=7, A=9, B=11
110
+ _STEP_SEMITONE = {"C": 0, "D": 2, "E": 4, "F": 5, "G": 7, "A": 9, "B": 11}
111
+
112
+ # ์กฐํ‘œ ์ƒคํ”„/ํ”Œ๋žซ ์ˆœ์„œ
113
+ _KEY_SHARPS = ["F", "C", "G", "D", "A", "E", "B"] # 1โ™ฏ=F#, 2โ™ฏ=F#C#, ...
114
+ _KEY_FLATS = ["B", "E", "A", "D", "G", "C", "F"] # 1โ™ญ=Bโ™ญ, 2โ™ญ=Bโ™ญEโ™ญ, ...
115
+
116
+
117
+ def _get_key_alters(fifths: int) -> dict[str, int]:
118
+ """
119
+ ์กฐํ‘œ fifths ๊ฐ’ โ†’ {์Œ์ด๋ฆ„: alter} ๋”•์…”๋„ˆ๋ฆฌ.
120
+
121
+ ์˜ˆ: fifths=2 (D์žฅ์กฐ) โ†’ {"F": 1, "C": 1}
122
+ fifths=-1 (F์žฅ์กฐ) โ†’ {"B": -1}
123
+ """
124
+ alters: dict[str, int] = {}
125
+ if fifths > 0:
126
+ for i in range(min(fifths, 7)):
127
+ alters[_KEY_SHARPS[i]] = 1
128
+ elif fifths < 0:
129
+ for i in range(min(-fifths, 7)):
130
+ alters[_KEY_FLATS[i]] = -1
131
+ return alters
132
+
133
+
134
+ def _parse_tempo(root: ET.Element) -> int:
135
+ """
136
+ ๋ฃจํŠธ ์š”์†Œ์—์„œ ์ฒซ ๋ฒˆ์งธ <sound tempo="N"/> ๊ฐ’์„ ๋ฐ˜ํ™˜.
137
+ ์—†์œผ๋ฉด 0 ๋ฐ˜ํ™˜.
138
+ """
139
+ for elem in root.iter("sound"):
140
+ tempo_str = elem.get("tempo")
141
+ if tempo_str:
142
+ try:
143
+ return int(float(tempo_str))
144
+ except (ValueError, TypeError):
145
+ pass
146
+ return 0
147
+
148
+
149
+ def _parse_musicxml(xml_string: str, source_hint: str = "") -> tuple[List[NoteEvent], dict]:
150
+ """
151
+ MusicXML ๋ฌธ์ž์—ด์„ (NoteEvent ๋ฆฌ์ŠคํŠธ, ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ)๋กœ ๋ณ€ํ™˜.
152
+
153
+ Args:
154
+ xml_string: MusicXML XML ๋ฌธ์ž์—ด
155
+ source_hint: ์˜ค๋ฅ˜ ๋ฉ”์‹œ์ง€์— ํฌํ•จํ•  ํŒŒ์ผ๋ช…/๊ฒฝ๋กœ (์„ ํƒ)
156
+
157
+ Returns:
158
+ (events, metadata) โ€” metadata์— "tempo" ํฌํ•จ
159
+ """
160
+ src = f" ({source_hint})" if source_hint else ""
161
+
162
+ try:
163
+ root = ET.fromstring(xml_string)
164
+ except ET.ParseError as e:
165
+ raise ParseError(f"MusicXML XML ๊ตฌ๋ฌธ ์˜ค๋ฅ˜{src}: {e}")
166
+
167
+ # namespace ์ œ๊ฑฐ (xmlns๊ฐ€ ์žˆ์–ด๋„ ๋™์ผํ•˜๊ฒŒ ์ฒ˜๋ฆฌ)
168
+ for elem in root.iter():
169
+ if "}" in elem.tag:
170
+ elem.tag = elem.tag.split("}")[1]
171
+
172
+ root_tag = root.tag
173
+ if root_tag != "score-partwise":
174
+ raise ParseError(
175
+ f"์ง€์›ํ•˜์ง€ ์•Š๋Š” MusicXML ๋ฃจํŠธ ์š”์†Œ{src}: '{root_tag}'\n"
176
+ f" score-partwise ํ˜•์‹๋งŒ ์ง€์›ํ•ฉ๋‹ˆ๋‹ค. "
177
+ f"score-timewise๋Š” MuseScore/Audiveris์—์„œ ๋ณ€ํ™˜ ๊ฐ€๋Šฅํ•ฉ๋‹ˆ๋‹ค."
178
+ )
179
+
180
+ # ํ…œํฌ ์ถ”์ถœ
181
+ tempo = _parse_tempo(root)
182
+
183
+ events: List[NoteEvent] = []
184
+ # ํƒ€์ด ์ถ”์ : key=(part_id, pitch, voice, staff) โ†’ events ๋ฆฌ์ŠคํŠธ ์ธ๋ฑ์Šค
185
+ tie_pending: dict[tuple, int] = {}
186
+
187
+ for part_idx, part_elem in enumerate(root.findall("part")):
188
+ part_id = part_elem.get("id", f"P{part_idx + 1}")
189
+ divisions = 1 # <divisions>: 4๋ถ„์Œํ‘œ๋‹น XML duration ๋‹จ์œ„
190
+ measure_start = 0.0 # ํ˜„์žฌ ๋งˆ๋””์˜ ์‹œ์ž‘ beat
191
+ key_alters: dict[str, int] = {} # ์กฐํ‘œ ๋ฐ˜์Œ ๋ณด์ • (์Œ์ด๋ฆ„ โ†’ alter)
192
+
193
+ for measure_elem in part_elem.findall("measure"):
194
+ current_beat = 0.0 # ๋งˆ๋”” ๋‚ด ํ˜„์žฌ ์œ„์น˜
195
+ prev_note_beat = 0.0 # ์ง์ „ ๋น„-chord ์Œํ‘œ์˜ ์‹œ์ž‘ ์œ„์น˜ (chord ์ฒ˜๋ฆฌ์šฉ)
196
+ max_beat = 0.0 # ๋งˆ๋”” ๋‚ด ๋„๋‹ฌํ•œ ์ตœ๋Œ€ ์œ„์น˜ (backup ํ›„์—๋„ ์œ ์ง€)
197
+
198
+ for child in measure_elem:
199
+ tag = child.tag
200
+
201
+ # attributes: divisions, key ์—…๋ฐ์ดํŠธ
202
+ if tag == "attributes":
203
+ div_elem = child.find("divisions")
204
+ if div_elem is not None and div_elem.text:
205
+ try:
206
+ divisions = int(div_elem.text)
207
+ except ValueError:
208
+ pass
209
+
210
+ key_elem = child.find("key")
211
+ if key_elem is not None:
212
+ fifths_elem = key_elem.find("fifths")
213
+ if fifths_elem is not None and fifths_elem.text:
214
+ try:
215
+ key_alters = _get_key_alters(int(fifths_elem.text))
216
+ except ValueError:
217
+ pass
218
+
219
+ elif tag == "note":
220
+ note_event = _parse_note(
221
+ child, divisions, part_idx,
222
+ measure_start, current_beat, prev_note_beat,
223
+ part_id, source_hint, key_alters,
224
+ )
225
+ if note_event is not None:
226
+ # ํƒ€์ด ์ฒ˜๋ฆฌ (์‰ผํ‘œ ์ œ์™ธ)
227
+ if note_event.pitch != 0:
228
+ tie_stop = any(
229
+ t.get("type") == "stop"
230
+ for t in child.findall("tie")
231
+ )
232
+ tie_start = any(
233
+ t.get("type") == "start"
234
+ for t in child.findall("tie")
235
+ )
236
+ tie_key = (
237
+ part_id,
238
+ note_event.pitch,
239
+ note_event.voice,
240
+ note_event.staff,
241
+ )
242
+ if tie_stop and tie_key in tie_pending:
243
+ # ์ด์ „ ํƒ€์ด ์Œํ‘œ์— duration ํ•ฉ์‚ฐ
244
+ idx = tie_pending.pop(tie_key)
245
+ old = events[idx]
246
+ events[idx] = dc_replace(
247
+ old, duration=old.duration + note_event.duration
248
+ )
249
+ if tie_start:
250
+ tie_pending[tie_key] = idx
251
+ # ์ƒˆ ์ด๋ฒคํŠธ๋Š” ์ถ”๊ฐ€ํ•˜์ง€ ์•Š์Œ
252
+ else:
253
+ events.append(note_event)
254
+ if tie_start:
255
+ tie_pending[tie_key] = len(events) - 1
256
+ else:
257
+ events.append(note_event)
258
+
259
+ # chord๊ฐ€ ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ์œ„์น˜ ์ „์ง„
260
+ is_chord = child.find("chord") is not None
261
+ dur_beats = _get_duration_beats(child, divisions)
262
+ if not is_chord:
263
+ prev_note_beat = current_beat
264
+ current_beat += dur_beats
265
+ max_beat = max(max_beat, current_beat)
266
+
267
+ elif tag == "backup":
268
+ dur_beats = _get_duration_beats(child, divisions)
269
+ current_beat = max(0.0, current_beat - dur_beats)
270
+
271
+ elif tag == "forward":
272
+ dur_beats = _get_duration_beats(child, divisions)
273
+ current_beat += dur_beats
274
+ max_beat = max(max_beat, current_beat)
275
+
276
+ # backup์ด ์žˆ์–ด๋„ ๋งˆ๋”” ๊ธธ์ด๋Š” ์ตœ๋Œ€ ๋„๋‹ฌ ์œ„์น˜ ๊ธฐ์ค€
277
+ measure_start += max_beat
278
+
279
+ if not events:
280
+ return [], {"tempo": tempo}
281
+
282
+ events.sort(key=lambda n: (n.start, n.staff, n.voice))
283
+ return events, {"tempo": tempo}
284
+
285
+
286
+ def _parse_note(
287
+ note_elem: ET.Element,
288
+ divisions: int,
289
+ part_idx: int,
290
+ measure_start: float,
291
+ current_beat: float,
292
+ prev_note_beat: float,
293
+ part_id: str,
294
+ source_hint: str,
295
+ key_alters: dict[str, int],
296
+ ) -> NoteEvent | None:
297
+ """
298
+ ๋‹จ์ผ <note> ์š”์†Œ๋ฅผ NoteEvent๋กœ ๋ณ€ํ™˜.
299
+
300
+ grace note์ฒ˜๋Ÿผ duration์ด ์—†๋Š” ๊ฒฝ์šฐ๋Š” None ๋ฐ˜ํ™˜ (skip).
301
+ key_alters: ์กฐํ‘œ์—์„œ ํŒŒ์ƒ๋œ {์Œ์ด๋ฆ„: alter} ๋”•์…”๋„ˆ๋ฆฌ.
302
+ <alter> ํƒœ๊ทธ๊ฐ€ ์—†๋Š” ์Œํ‘œ์˜ ๋ฐ˜์Œ ๋ณด์ •์— ์‚ฌ์šฉ.
303
+ """
304
+ is_chord = note_elem.find("chord") is not None
305
+ is_rest = note_elem.find("rest") is not None
306
+
307
+ dur_beats = _get_duration_beats(note_elem, divisions)
308
+ if dur_beats == 0.0:
309
+ # grace note ๋˜๋Š” duration 0 โ€” skip
310
+ return None
311
+
312
+ note_beat = prev_note_beat if is_chord else current_beat
313
+ abs_start = measure_start + note_beat
314
+
315
+ voice_elem = note_elem.find("voice")
316
+ voice = int(voice_elem.text) if voice_elem is not None and voice_elem.text else 1
317
+
318
+ staff_elem = note_elem.find("staff")
319
+ if staff_elem is not None and staff_elem.text:
320
+ staff_raw = int(staff_elem.text)
321
+ else:
322
+ # <staff> ์—†์œผ๋ฉด voice๋ฅผ staff ๋Œ€๋ฆฌ์ž๋กœ ์‚ฌ์šฉ (Audiveris ๋“ฑ)
323
+ # voice๋ณ„๋กœ ํŒŒํŠธ๊ฐ€ ๋ถ„๋ฆฌ๋˜๋„๋ก ํ•จ
324
+ staff_raw = voice
325
+ # part ๊ฐ„ staff ๋ฒˆํ˜ธ๊ฐ€ ๊ฒน์น˜์ง€ ์•Š๋„๋ก ์ „์—ญ ๊ณ ์œ ๊ฐ’์œผ๋กœ ๋ณ€ํ™˜
326
+ # ex) P1/staff1=1, P1/staff2=2, P2/staff1=11, P2/staff2=12
327
+ staff = part_idx * 10 + staff_raw
328
+
329
+ if is_rest:
330
+ return NoteEvent(
331
+ pitch=0,
332
+ start=abs_start,
333
+ duration=dur_beats,
334
+ staff=staff,
335
+ voice=voice,
336
+ )
337
+
338
+ pitch_elem = note_elem.find("pitch")
339
+ if pitch_elem is None:
340
+ return None
341
+
342
+ step_elem = pitch_elem.find("step")
343
+ octave_elem = pitch_elem.find("octave")
344
+ alter_elem = pitch_elem.find("alter")
345
+
346
+ step = step_elem.text.strip().upper() if step_elem is not None and step_elem.text else "C"
347
+ octave = int(octave_elem.text) if octave_elem is not None and octave_elem.text else 4
348
+
349
+ # <alter> ๋ช…์‹œ ์‹œ ์šฐ์„  ์‚ฌ์šฉ, ์—†์œผ๋ฉด ์กฐํ‘œ ๊ธฐ๋ณธ๊ฐ’ ์ ์šฉ
350
+ if alter_elem is not None and alter_elem.text:
351
+ alter = int(float(alter_elem.text))
352
+ else:
353
+ alter = key_alters.get(step, 0)
354
+
355
+ semitone = _STEP_SEMITONE.get(step, 0)
356
+ pitch = (octave + 1) * 12 + semitone + alter
357
+ pitch = max(0, min(127, pitch))
358
+
359
+ return NoteEvent(
360
+ pitch=pitch,
361
+ start=abs_start,
362
+ duration=dur_beats,
363
+ staff=staff,
364
+ voice=voice,
365
+ )
366
+
367
+
368
+ def _get_duration_beats(elem: ET.Element, divisions: int) -> float:
369
+ """<duration> ์š”์†Œ๋ฅผ 4๋ถ„์Œํ‘œ ๊ธฐ์ค€ beats๋กœ ๋ณ€ํ™˜."""
370
+ dur_elem = elem.find("duration")
371
+ if dur_elem is None or not dur_elem.text:
372
+ return 0.0
373
+ try:
374
+ return int(dur_elem.text) / max(1, divisions)
375
+ except (ValueError, ZeroDivisionError):
376
+ return 0.0
core/mxl_mml_converter.py ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/mxl_mml_converter.py
3
+
4
+ ChatGPT ๋ณ€ํ™˜ ์ฝ”๋“œ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ (๋กœ์ง ๋ณ€๊ฒฝ ์—†์Œ).
5
+ ํŒŒ์ดํ”„๋ผ์ธ ์ธํ„ฐํŽ˜์ด์Šค(convert_xml_pages_to_mml_parts)๋งŒ ๋ž˜ํผ๋กœ ์ถ”๊ฐ€.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import xml.etree.ElementTree as ET
10
+ from typing import List, Tuple
11
+
12
+ # ---------------------------------------------------------------------------
13
+ # ChatGPT ์ฝ”๋“œ โ€” ํ•˜๋‚˜๋„ ๋ฐ”๊พธ์ง€ ์•Š์Œ
14
+ # ---------------------------------------------------------------------------
15
+
16
+ STEP_TO_SEMI = {'C':0,'D':2,'E':4,'F':5,'G':7,'A':9,'B':11}
17
+ SEMI_TO_NOTE = {
18
+ 0:'c',1:'c+',2:'d',3:'d+',4:'e',5:'f',6:'f+',7:'g',8:'g+',9:'a',10:'a+',11:'b'
19
+ }
20
+ # quarter=12 ticks, whole=48
21
+ DUR_TOKEN = {
22
+ 48:'1', 36:'2.', 24:'2', 18:'4.', 16:'3', 15:'4&16', 12:'4', 9:'8.', 8:'6',
23
+ 6:'8', 4:'12', 3:'16', 2:'24', 1:'48'
24
+ }
25
+ # Components that can be emitted as single token (excluding composites encoded above)
26
+ SINGLE_DURS = [48,36,24,18,16,12,9,8,6,4,3,2,1]
27
+
28
+
29
+ def parse_events(root):
30
+ part=root.find('part')
31
+ events=[]
32
+ abs_time=0 # 48 ticks/whole ๊ธฐ์ค€ ์ •๊ทœํ™”๋œ ๋ˆ„์  ์‹œ๊ฐ„
33
+ divisions=1
34
+ measure_bounds=[]
35
+ for m in part.findall('measure'):
36
+ # ์ด ๋งˆ๋””์˜ divisions ๋จผ์ € ํ™•์ธ
37
+ attr=m.find('attributes')
38
+ if attr is not None:
39
+ d=attr.findtext('divisions')
40
+ if d is not None:
41
+ divisions=int(d)
42
+
43
+ # ๋งˆ๋”” ๋‚ด๋ถ€๋Š” raw tick์œผ๋กœ ์ถ”์ , ์ €์žฅ ์‹œ ์ •๊ทœํ™”
44
+ scale_num=48
45
+ scale_den=divisions*4
46
+ def norm(raw): return raw*scale_num//scale_den
47
+
48
+ measure_start=abs_time
49
+ cursor_raw=0 # ๋งˆ๋”” ๋‚ด raw tick (๋งˆ๋”” ์‹œ์ž‘ ๊ธฐ์ค€ ์ƒ๋Œ€๊ฐ’)
50
+ max_cursor_raw=0
51
+ last_start_raw=None
52
+
53
+ for child in m:
54
+ if child.tag=='attributes':
55
+ pass # ์ด๋ฏธ ์ฒ˜๋ฆฌ
56
+ elif child.tag=='backup':
57
+ cursor_raw -= int(child.findtext('duration'))
58
+ elif child.tag=='forward':
59
+ cursor_raw += int(child.findtext('duration'))
60
+ max_cursor_raw=max(max_cursor_raw,cursor_raw)
61
+ elif child.tag=='note':
62
+ dur_raw=int(child.findtext('duration','0'))
63
+ start_raw=cursor_raw if child.find('chord') is None else last_start_raw
64
+ if child.find('rest') is None:
65
+ p=child.find('pitch')
66
+ midi=(int(p.findtext('octave'))+1)*12+STEP_TO_SEMI[p.findtext('step')]+int(p.findtext('alter','0'))
67
+ events.append({
68
+ 'start': measure_start+norm(start_raw),
69
+ 'dur': norm(dur_raw),
70
+ 'midi': midi,
71
+ 'voice': child.findtext('voice','1'),
72
+ 'measure': int(m.attrib['number']),
73
+ 'tie': tuple(t.attrib.get('type') for t in child.findall('tie')),
74
+ })
75
+ if child.find('chord') is None:
76
+ last_start_raw=start_raw
77
+ cursor_raw += dur_raw
78
+ max_cursor_raw=max(max_cursor_raw,cursor_raw)
79
+ else:
80
+ max_cursor_raw=max(max_cursor_raw,start_raw+dur_raw)
81
+
82
+ abs_time=measure_start+norm(max_cursor_raw)
83
+ measure_bounds.append((int(m.attrib['number']), measure_start, abs_time))
84
+ return events, abs_time, divisions, measure_bounds
85
+
86
+
87
+ def merge_ties(events):
88
+ out=[]
89
+ ongoing={}
90
+ for e in sorted(events, key=lambda e:(e['start'], e['midi'], e['voice'])):
91
+ key=(e['midi'], e['voice'])
92
+ tie=set(e['tie'])
93
+ if 'stop' in tie and key in ongoing:
94
+ idx=ongoing[key]
95
+ prev=out[idx]
96
+ if prev['start']+prev['dur']==e['start']:
97
+ prev['dur'] += e['dur']
98
+ prev['measure_end'] = e['measure']
99
+ if 'start' not in tie:
100
+ del ongoing[key]
101
+ continue
102
+ new={k:v for k,v in e.items() if k!='tie'}
103
+ new['measure_start']=e['measure']
104
+ new['measure_end']=e['measure']
105
+ out.append(new)
106
+ if 'start' in tie:
107
+ ongoing[key]=len(out)-1
108
+ return out
109
+
110
+
111
+ def assign_tracks(events):
112
+ tracks=[]
113
+ for e in sorted(events, key=lambda e:(e['start'], -e['midi'], e['dur'])):
114
+ free=[(i,t) for i,t in enumerate(tracks) if t['end']<=e['start']]
115
+ if free:
116
+ def score(item):
117
+ i,t=item
118
+ same = 1 if (t.get('last_pitch')==e['midi'] and t['end']==e['start']) else 0
119
+ dist = abs((t.get('last_pitch',e['midi']))-e['midi'])
120
+ return (-same, dist, -t['end'], i)
121
+ idx,_=min(free,key=score)
122
+ else:
123
+ idx=len(tracks)
124
+ tracks.append({'end':0,'last_pitch':None,'events':[]})
125
+ t=tracks[idx]
126
+ t['events'].append(e)
127
+ t['end']=e['start']+e['dur']
128
+ t['last_pitch']=e['midi']
129
+ order=sorted(range(len(tracks)), key=lambda i: -(sum(e['midi'] for e in tracks[i]['events'])/len(tracks[i]['events'])))
130
+ reordered=[]
131
+ for old_idx in order:
132
+ reordered.append({'events':sorted(tracks[old_idx]['events'], key=lambda e:(e['start'], -e['midi']))})
133
+ return reordered
134
+
135
+
136
+ def decompose_duration(total):
137
+ from functools import lru_cache
138
+ @lru_cache(None)
139
+ def dp(rem):
140
+ if rem==0:
141
+ return []
142
+ best=None
143
+ for d in SINGLE_DURS:
144
+ if d<=rem:
145
+ tail=dp(rem-d)
146
+ if tail is not None:
147
+ cand=[d]+tail
148
+ if best is None or len(cand)<len(best):
149
+ best=cand
150
+ return best
151
+ res=dp(total)
152
+ if res is None:
153
+ raise ValueError(f'cannot decompose {total}')
154
+ return res
155
+
156
+
157
+ def midi_to_oct_note(midi):
158
+ return midi//12 - 1, SEMI_TO_NOTE[midi%12]
159
+
160
+
161
+ def emit_event(pieces, cur_oct, midi, dur):
162
+ octave,note = midi_to_oct_note(midi)
163
+ if cur_oct != octave:
164
+ pieces.append(f'o{octave}')
165
+ cur_oct=octave
166
+ parts=decompose_duration(dur)
167
+ for i,d in enumerate(parts):
168
+ pieces.append(note + DUR_TOKEN[d])
169
+ if i != len(parts)-1:
170
+ pieces.append('&')
171
+ return cur_oct
172
+
173
+
174
+ def emit_rest(pieces, dur):
175
+ for d in decompose_duration(dur):
176
+ pieces.append('r'+DUR_TOKEN[d])
177
+
178
+
179
+ def build_track_mml(track_events, total_dur):
180
+ pieces=[]
181
+ cur_oct=None
182
+ cur_time=0
183
+ for e in track_events:
184
+ if e['start']<cur_time:
185
+ raise ValueError('overlap')
186
+ if e['start']>cur_time:
187
+ emit_rest(pieces, e['start']-cur_time)
188
+ cur_time=e['start']
189
+ cur_oct = emit_event(pieces, cur_oct, e['midi'], e['dur'])
190
+ cur_time = e['start']+e['dur']
191
+ if cur_time<total_dur:
192
+ emit_rest(pieces, total_dur-cur_time)
193
+ return ''.join(pieces)
194
+
195
+
196
+ # ---------------------------------------------------------------------------
197
+ # ํŒŒ์ดํ”„๋ผ์ธ ์ธํ„ฐํŽ˜์ด์Šค (๋ž˜ํผ)
198
+ # ---------------------------------------------------------------------------
199
+
200
+ def convert_xml_pages_to_mml_parts(
201
+ xml_strings: List[str],
202
+ ) -> Tuple[List[str], List[str]]:
203
+ """
204
+ ์—ฌ๋Ÿฌ ํŽ˜์ด์ง€์˜ MusicXML ๋ฌธ์ž์—ด์„ MML ํŒŒํŠธ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜.
205
+
206
+ Returns:
207
+ (part_mmls, warnings)
208
+ """
209
+ all_events = []
210
+ total_offset = 0
211
+ warnings: List[str] = []
212
+
213
+ for page_idx, xml_str in enumerate(xml_strings, start=1):
214
+ try:
215
+ xml_bytes = xml_str.encode('utf-8') if isinstance(xml_str, str) else xml_str
216
+ root = ET.fromstring(xml_bytes)
217
+ # namespace ์ œ๊ฑฐ (namespace๊ฐ€ ์žˆ๋Š” XML ๋Œ€์‘)
218
+ for elem in root.iter():
219
+ if '}' in elem.tag:
220
+ elem.tag = elem.tag.split('}')[1]
221
+ page_events, page_dur, _divisions, _bounds = parse_events(root)
222
+ except Exception as e:
223
+ warnings.append(f'ํŽ˜์ด์ง€ {page_idx} ํŒŒ์‹ฑ ์˜ค๋ฅ˜: {e}')
224
+ continue
225
+
226
+ page_merged = merge_ties(page_events)
227
+
228
+ for ev in page_merged:
229
+ ev['start'] += total_offset
230
+
231
+ all_events.extend(page_merged)
232
+ total_offset += page_dur
233
+
234
+ if not all_events:
235
+ return ['MML@r1;'], warnings
236
+
237
+ tracks = assign_tracks(all_events)
238
+ mml_parts = [f"MML@{build_track_mml(t['events'], total_offset)};" for t in tracks]
239
+ return mml_parts, warnings
core/omr_adapter.py ADDED
@@ -0,0 +1,788 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/omr_adapter.py
3
+
4
+ OMR ์—”์ง„ ์ถ”์ƒํ™” ๊ณ„์ธต.
5
+
6
+ ์‹ค์ œ OMR ๊ตฌํ˜„์€ ์ด ๋ชจ๋“ˆ ์•ˆ์—์„œ๋งŒ ๊ต์ฒด๋œ๋‹ค.
7
+ ๋‹ค๋ฅธ core ๋ชจ๋“ˆ๋“ค์€ OMRAdapter ์ธํ„ฐํŽ˜์ด์Šค๋งŒ ์‚ฌ์šฉํ•œ๋‹ค.
8
+
9
+ ๊ตฌํ˜„์ฒด:
10
+ - MockOMRAdapter: ํ…Œ์ŠคํŠธ/๊ฐœ๋ฐœ์šฉ. ์ด๋ฏธ์ง€ ์ฝ์ง€ ์•Š๊ณ  ๊ณ ์ • ๋ฐ์ดํ„ฐ ๋ฐ˜ํ™˜.
11
+ - AudiverisOMRAdapter: Audiveris CLI๋ฅผ subprocess๋กœ ํ˜ธ์ถœ. MusicXML ๋ฐ˜ํ™˜.
12
+ - HOMROMRAdapter: HOMR (pip install homr) CLI๋ฅผ subprocess๋กœ ํ˜ธ์ถœ.
13
+ - OeMeROMRAdapter: OeMeR/oemer (pip install oemer) CLI๋ฅผ subprocess๋กœ ํ˜ธ์ถœ.
14
+ - ClarityOMRAdapter: Clarity-OMR (github: clquwu/Clarity-OMR) CLI๋ฅผ ํ˜ธ์ถœ.
15
+ PDF โ†’ MusicXML. GPU/CPU ์ž๋™ ์„ ํƒ.
16
+
17
+ ํ™˜๊ฒฝ๋ณ€์ˆ˜:
18
+ AUDIVERIS_BIN=<๊ฒฝ๋กœ> Audiveris ์‹คํ–‰ ํŒŒ์ผ (์ง์ ‘ ์‹คํ–‰ ๋ฐฉ์‹)
19
+ AUDIVERIS_JAR=<๊ฒฝ๋กœ> Audiveris.jar (java -jar ๋ฐฉ์‹)
20
+ HOMR_CMD=<๊ฒฝ๋กœ> homr ์‹คํ–‰ ํŒŒ์ผ ๋˜๋Š” ๋ช…๋ น์–ด (๊ธฐ๋ณธ: python -m homr)
21
+ OEMER_CMD=<๊ฒฝ๋กœ> oemer ์‹คํ–‰ ํŒŒ์ผ ๋˜๋Š” ๋ช…๋ น์–ด (๊ธฐ๋ณธ: oemer)
22
+ CLARITY_OMR_DIR=<๊ฒฝ๋กœ> Clarity-OMR ๋ ˆํฌ์ง€ํ† ๋ฆฌ ๋ฃจํŠธ (omr.py๊ฐ€ ์žˆ๋Š” ํด๋”)
23
+ CLARITY_DEVICE=<๊ฐ’> "cuda" | "cpu" | "auto" (๊ธฐ๋ณธ: "auto")
24
+ auto์ด๋ฉด torch.cuda.is_available()๋กœ ์ž๋™ ์„ ํƒ
25
+ CLARITY_PYTHON=<๊ฒฝ๋กœ> Clarity-OMR ์‹คํ–‰์— ์‚ฌ์šฉํ•  python ๊ฒฝ๋กœ
26
+ (๊ธฐ๋ณธ: ํ˜„์žฌ ํ”„๋กœ์„ธ์Šค์˜ sys.executable)
27
+
28
+ ์ฃผ์˜:
29
+ HOMR/OeMeR/Clarity๋Š” import ๋‹จ๊ณ„์—์„œ ์ฒดํฌํ•˜์ง€ ์•Š๋Š”๋‹ค.
30
+ ์‹คํ–‰(run()) ์‹œ์ ์—๋งŒ ํ™˜๊ฒฝ์„ ๊ฒ€์‚ฌํ•œ๋‹ค.
31
+
32
+ Audiveris ์ถœ๋ ฅ ๊ตฌ์กฐ:
33
+ <output_dir>/<image_basename>/<image_basename>.mxl (๋˜๋Š” .xml)
34
+ .mxl์€ ZIP ํ˜•์‹์˜ MusicXML ์•„์นด์ด๋ธŒ
35
+ """
36
+
37
+ from __future__ import annotations
38
+
39
+ import abc
40
+ import os
41
+ import shutil
42
+ import subprocess
43
+ import sys
44
+ import tempfile
45
+ import zipfile
46
+ import xml.etree.ElementTree as ET
47
+ from pathlib import Path
48
+ from typing import Optional
49
+
50
+
51
+ class OMRAdapter(abc.ABC):
52
+ """OMR ์—”์ง„ ์ถ”์ƒ ์ธํ„ฐํŽ˜์ด์Šค."""
53
+
54
+ @abc.abstractmethod
55
+ def run(self, image_path: str) -> dict:
56
+ """
57
+ ์ด๋ฏธ์ง€๋ฅผ ๋ถ„์„ํ•˜์—ฌ OMR ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜.
58
+
59
+ Returns:
60
+ dict:
61
+ - "format": "mock_events" | "musicxml"
62
+ - "data": ๊ฒฐ๊ณผ ๋ฐ์ดํ„ฐ (mock_events๋ฉด list[dict], musicxml์ด๋ฉด XML ๋ฌธ์ž์—ด)
63
+ - "source_path": ์›๋ณธ ์ด๋ฏธ์ง€ ๊ฒฝ๋กœ
64
+ - "metadata": ๋ถ€๊ฐ€ ์ •๋ณด
65
+ """
66
+ ...
67
+
68
+ @abc.abstractmethod
69
+ def is_available(self) -> bool:
70
+ """์ด ์–ด๋Œ‘ํ„ฐ๋ฅผ ์‹คํ–‰ํ•  ์ˆ˜ ์žˆ๋Š”์ง€ ํ™•์ธ (์‹คํ–‰ ์‹œ์  ๊ฒ€์‚ฌ)."""
71
+ ...
72
+
73
+
74
+ class MockOMRAdapter(OMRAdapter):
75
+ """
76
+ ํ…Œ์ŠคํŠธ ๋ฐ ๊ฐœ๋ฐœ์šฉ Mock OMR ์–ด๋Œ‘ํ„ฐ.
77
+
78
+ ์‹ค์ œ ์ด๋ฏธ์ง€ ๋ถ„์„ ์—†์ด C์žฅ์กฐ ์Šค์ผ€์ผ + ํ™”์Œ + ๋ฒ ์ด์Šค ๋ฐ์ดํ„ฐ๋ฅผ ๋ฐ˜ํ™˜.
79
+ """
80
+
81
+ def run(self, image_path: str, debug_dir: str = "") -> dict:
82
+ mock_notes = [
83
+ # staff=1 (์ƒ๋‹จ ๋ณดํ‘œ) โ€” ๋ฉœ๋กœ๋””
84
+ {"pitch": 60, "start": 0.0, "duration": 1.0, "staff": 1, "voice": 1},
85
+ {"pitch": 62, "start": 1.0, "duration": 1.0, "staff": 1, "voice": 1},
86
+ {"pitch": 64, "start": 2.0, "duration": 1.0, "staff": 1, "voice": 1},
87
+ {"pitch": 65, "start": 3.0, "duration": 1.0, "staff": 1, "voice": 1},
88
+ {"pitch": 67, "start": 4.0, "duration": 1.0, "staff": 1, "voice": 1},
89
+ {"pitch": 69, "start": 5.0, "duration": 1.0, "staff": 1, "voice": 1},
90
+ {"pitch": 71, "start": 6.0, "duration": 1.0, "staff": 1, "voice": 1},
91
+ {"pitch": 72, "start": 7.0, "duration": 2.0, "staff": 1, "voice": 1},
92
+ # staff=1 โ€” ํ™”์Œ (voice 2)
93
+ {"pitch": 64, "start": 0.0, "duration": 1.0, "staff": 1, "voice": 2},
94
+ {"pitch": 65, "start": 1.0, "duration": 1.0, "staff": 1, "voice": 2},
95
+ {"pitch": 67, "start": 2.0, "duration": 1.0, "staff": 1, "voice": 2},
96
+ {"pitch": 67, "start": 3.0, "duration": 1.0, "staff": 1, "voice": 2},
97
+ {"pitch": 69, "start": 4.0, "duration": 1.0, "staff": 1, "voice": 2},
98
+ {"pitch": 71, "start": 5.0, "duration": 1.0, "staff": 1, "voice": 2},
99
+ {"pitch": 72, "start": 6.0, "duration": 1.0, "staff": 1, "voice": 2},
100
+ {"pitch": 72, "start": 7.0, "duration": 2.0, "staff": 1, "voice": 2},
101
+ # staff=2 (ํ•˜๋‹จ ๋ณดํ‘œ) โ€” ๋ฒ ์ด์Šค
102
+ {"pitch": 48, "start": 0.0, "duration": 2.0, "staff": 2, "voice": 1},
103
+ {"pitch": 53, "start": 2.0, "duration": 2.0, "staff": 2, "voice": 1},
104
+ {"pitch": 55, "start": 4.0, "duration": 2.0, "staff": 2, "voice": 1},
105
+ {"pitch": 48, "start": 6.0, "duration": 2.0, "staff": 2, "voice": 1},
106
+ ]
107
+ return {
108
+ "format": "mock_events",
109
+ "data": mock_notes,
110
+ "source_path": image_path,
111
+ "metadata": {
112
+ "key_signature": "C",
113
+ "time_signature": "4/4",
114
+ "tempo": 120,
115
+ "note_count": len(mock_notes),
116
+ },
117
+ }
118
+
119
+ def is_available(self) -> bool:
120
+ return True
121
+
122
+
123
+ class AudiverisOMRAdapter(OMRAdapter):
124
+ """
125
+ Audiveris ๊ธฐ๋ฐ˜ ์‹ค์ œ OMR ์–ด๋Œ‘ํ„ฐ.
126
+
127
+ subprocess๋กœ Audiveris CLI๋ฅผ ํ˜ธ์ถœํ•˜๊ณ  MusicXML ๊ฒฐ๊ณผ๋ฅผ ๋ฐ˜ํ™˜.
128
+ ์ž„์‹œ ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ๋Š” run() ์™„๋ฃŒ ํ›„ ์ž๋™ ์ •๋ฆฌ.
129
+ """
130
+
131
+ def __init__(self, jar_path: str = "", bin_path: str = ""):
132
+ # bin_path: audiveris ์‹คํ–‰ ํŒŒ์ผ ์ง์ ‘ ๊ฒฝ๋กœ
133
+ # jar_path: Audiveris.jar ๊ฒฝ๋กœ (java -jar ๋ฐฉ์‹)
134
+ self.jar_path = jar_path
135
+ self.bin_path = bin_path
136
+
137
+ def _build_command(self, image_path: str, output_dir: str) -> list:
138
+ if self.bin_path:
139
+ return [
140
+ self.bin_path,
141
+ "-batch", "-export",
142
+ "-output", output_dir,
143
+ "--", image_path,
144
+ ]
145
+ max_heap = os.environ.get("AUDIVERIS_MAX_HEAP", "1500m")
146
+ return [
147
+ "java", f"-Xmx{max_heap}", "-jar", self.jar_path,
148
+ "-batch", "-export",
149
+ "-output", output_dir,
150
+ "--", image_path,
151
+ ]
152
+
153
+ def run(self, image_path: str, debug_dir: str = "") -> dict:
154
+ tmp_dir = tempfile.mkdtemp(prefix="score_to_mml_aud_")
155
+ try:
156
+ result = self._run_in_dir(image_path, tmp_dir)
157
+ if debug_dir:
158
+ xml_path = result["metadata"].get("musicxml_path", "")
159
+ if xml_path and Path(xml_path).exists():
160
+ shutil.copy2(xml_path, str(Path(debug_dir) / Path(xml_path).name))
161
+ return result
162
+ finally:
163
+ shutil.rmtree(tmp_dir, ignore_errors=True)
164
+
165
+ def _run_in_dir(self, image_path: str, tmp_dir: str) -> dict:
166
+ cmd = self._build_command(image_path, tmp_dir)
167
+
168
+ try:
169
+ proc = subprocess.run(
170
+ cmd,
171
+ capture_output=True,
172
+ text=True,
173
+ timeout=180,
174
+ )
175
+ except FileNotFoundError as e:
176
+ raise RuntimeError(
177
+ f"Audiveris ์‹คํ–‰ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: '{cmd[0]}'\n"
178
+ f" - AUDIVERIS_BIN ๋˜๋Š” AUDIVERIS_JAR ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ํ™•์ธํ•˜์„ธ์š”.\n"
179
+ f" - java -jar ๋ฐฉ์‹์ด๋ผ๋ฉด java๊ฐ€ PATH์— ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”.\n"
180
+ f" ์›๋ณธ ์˜ค๋ฅ˜: {e}"
181
+ )
182
+ except subprocess.TimeoutExpired:
183
+ raise RuntimeError(
184
+ f"Audiveris ์‹คํ–‰ ํƒ€์ž„์•„์›ƒ (180์ดˆ ์ดˆ๊ณผ)\n"
185
+ f" ์ž…๋ ฅ ํŒŒ์ผ: {image_path}\n"
186
+ f" ์ด๋ฏธ์ง€๊ฐ€ ๋„ˆ๋ฌด ํฌ๊ฑฐ๋‚˜ ์‹œ์Šคํ…œ์ด ๋А๋ฆฐ ๊ฒฝ์šฐ ๋ฐœ์ƒํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."
187
+ )
188
+
189
+ if proc.returncode != 0:
190
+ stdout_tail = proc.stdout[-800:] if proc.stdout else "(์—†์Œ)"
191
+ stderr_tail = proc.stderr[-800:] if proc.stderr else "(์—†์Œ)"
192
+ raise RuntimeError(
193
+ f"Audiveris ์‹คํ–‰ ์‹คํŒจ (exit code {proc.returncode})\n"
194
+ f" ๋ช…๋ น: {' '.join(cmd)}\n"
195
+ f" ์ž…๋ ฅ: {image_path}\n"
196
+ f" STDOUT (๋งˆ์ง€๋ง‰ 800์ž):\n{stdout_tail}\n"
197
+ f" STDERR (๋งˆ์ง€๋ง‰ 800์ž):\n{stderr_tail}"
198
+ )
199
+
200
+ xml_path = _find_musicxml_output(tmp_dir, image_path)
201
+ if xml_path is None:
202
+ stdout_tail = proc.stdout[-400:] if proc.stdout else "(์—†์Œ)"
203
+ raise RuntimeError(
204
+ f"Audiveris ์ถœ๋ ฅ์—์„œ MusicXML ํŒŒ์ผ(.mxl/.xml)์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.\n"
205
+ f" ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ: {tmp_dir}\n"
206
+ f" Audiveris๊ฐ€ ์ •์ƒ ์ข…๋ฃŒ(exit 0)ํ–ˆ์ง€๋งŒ ์ถœ๋ ฅ ํŒŒ์ผ์ด ์—†๋Š” ๊ฒฝ์šฐ:\n"
207
+ f" - ์ด๋ฏธ์ง€ ํ’ˆ์งˆ์ด ๋„ˆ๋ฌด ๋‚ฎ๊ฑฐ๋‚˜\n"
208
+ f" - Audiveris ์„ค์ •์—์„œ -export ์˜ต์…˜์ด ํ•„์š”ํ•˜๊ฑฐ๋‚˜\n"
209
+ f" - ์•…๋ณด๋ฅผ ์ธ์‹ํ•˜์ง€ ๋ชปํ•œ ๊ฒฝ์šฐ์ผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.\n"
210
+ f" STDOUT:\n{stdout_tail}"
211
+ )
212
+
213
+ xml_string = _read_musicxml(xml_path)
214
+
215
+ return {
216
+ "format": "musicxml",
217
+ "data": xml_string,
218
+ "source_path": image_path,
219
+ "metadata": {
220
+ "musicxml_path": xml_path,
221
+ "audiveris_stdout_tail": proc.stdout[-300:] if proc.stdout else "",
222
+ },
223
+ }
224
+
225
+ def is_available(self) -> bool:
226
+ if self.bin_path:
227
+ return Path(self.bin_path).exists()
228
+ if not self.jar_path or not Path(self.jar_path).exists():
229
+ return False
230
+ # jar ๋ฐฉ์‹์ด๋ฉด java๋„ ํ•„์š”
231
+ try:
232
+ result = subprocess.run(
233
+ ["java", "-version"],
234
+ capture_output=True,
235
+ timeout=5,
236
+ )
237
+ return result.returncode == 0
238
+ except (FileNotFoundError, subprocess.TimeoutExpired):
239
+ return False
240
+
241
+
242
+ class ClarityOMRAdapter(OMRAdapter):
243
+ """
244
+ Clarity-OMR ๊ธฐ๋ฐ˜ OMR ์–ด๋Œ‘ํ„ฐ.
245
+
246
+ PDF๋ฅผ ์ž…๋ ฅ์œผ๋กœ ๋ฐ›์•„ MusicXML์„ ์ถœ๋ ฅํ•œ๋‹ค.
247
+ ์ด๋ฏธ์ง€(PNG/JPG ๋“ฑ)๊ฐ€ ์ž…๋ ฅ๋˜๋ฉด Pillow๋กœ ์ž„์‹œ PDF ๋ณ€ํ™˜ ํ›„ ์ฒ˜๋ฆฌํ•œ๋‹ค.
248
+ GPU๊ฐ€ ์žˆ์œผ๋ฉด ์ž๋™์œผ๋กœ CUDA๋ฅผ ์‚ฌ์šฉํ•œ๋‹ค.
249
+
250
+ ์„ค์น˜:
251
+ git clone https://github.com/clquwu/Clarity-OMR
252
+ cd Clarity-OMR
253
+ pip install torch torchvision # GPU: --index-url ์˜ต์…˜ ์ถ”๊ฐ€
254
+ pip install -r requirements.txt
255
+
256
+ ํ™˜๊ฒฝ๋ณ€์ˆ˜:
257
+ CLARITY_OMR_DIR: Clarity-OMR ๋ ˆํฌ ๋ฃจํŠธ ๊ฒฝ๋กœ (ํ•„์ˆ˜, omr.py๊ฐ€ ์žˆ๋Š” ํด๋”)
258
+ CLARITY_DEVICE: "cuda" | "cpu" | "auto" (๊ธฐ๋ณธ: "auto")
259
+ CLARITY_PYTHON: python ์‹คํ–‰ ํŒŒ์ผ ๊ฒฝ๋กœ (๊ธฐ๋ณธ: ํ˜„์žฌ ํ”„๋กœ์„ธ์Šค์˜ sys.executable)
260
+ """
261
+
262
+ def __init__(self, omr_dir: str = "", device: str = "", python_path: str = ""):
263
+ self.omr_dir = omr_dir or os.environ.get("CLARITY_OMR_DIR", "")
264
+ self.device = device or os.environ.get("CLARITY_DEVICE", "auto")
265
+ self.python_path = python_path or os.environ.get("CLARITY_PYTHON", "")
266
+
267
+ def _resolve_device(self) -> str:
268
+ """์‚ฌ์šฉํ•  ๋””๋ฐ”์ด์Šค("cuda" ๋˜๋Š” "cpu")๋ฅผ ๊ฒฐ์ •."""
269
+ if self.device in ("cuda", "cpu"):
270
+ return self.device
271
+ # auto: torch๋กœ CUDA ๊ฐ€์šฉ ์—ฌ๋ถ€ ํ™•์ธ
272
+ try:
273
+ import torch
274
+ return "cuda" if torch.cuda.is_available() else "cpu"
275
+ except ImportError:
276
+ return "cpu"
277
+
278
+ def _get_python(self) -> str:
279
+ """Clarity-OMR ์‹คํ–‰์— ์‚ฌ์šฉํ•  python ๊ฒฝ๋กœ."""
280
+ return self.python_path or sys.executable
281
+
282
+ def _image_to_pdf(self, image_path: str, tmp_dir: str) -> str:
283
+ """
284
+ ์ด๋ฏธ์ง€๋ฅผ ์ž„์‹œ PDF๋กœ ๋ณ€ํ™˜ (Pillow ์‚ฌ์šฉ).
285
+ Clarity-OMR์ด PDF ์ž…๋ ฅ๋งŒ ์ง€์›ํ•˜๊ธฐ ๋•Œ๋ฌธ์— ํ•„์š”.
286
+ """
287
+ try:
288
+ from PIL import Image
289
+ except ImportError:
290
+ raise RuntimeError(
291
+ "์ด๋ฏธ์ง€๋ฅผ PDF๋กœ ๋ณ€ํ™˜ํ•˜๋ ค๋ฉด Pillow๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.\n"
292
+ " ์„ค์น˜: pip install Pillow\n"
293
+ " ๋˜๋Š” ์ฒ˜์Œ๋ถ€ํ„ฐ PDF ํŒŒ์ผ์„ ์ž…๋ ฅ์œผ๋กœ ์‚ฌ์šฉํ•˜์„ธ์š”."
294
+ )
295
+ img = Image.open(image_path).convert("RGB")
296
+ pdf_path = str(Path(tmp_dir) / (Path(image_path).stem + "_clarity_input.pdf"))
297
+ img.save(pdf_path, "PDF", resolution=300)
298
+ return pdf_path
299
+
300
+ def run(self, image_path: str, debug_dir: str = "") -> dict:
301
+ if not self.omr_dir:
302
+ raise RuntimeError(
303
+ "CLARITY_OMR_DIR ํ™˜๊ฒฝ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.\n"
304
+ " Clarity-OMR ๋ ˆํฌ์ง€ํ† ๋ฆฌ ๋ฃจํŠธ ๊ฒฝ๋กœ๋ฅผ ์„ค์ •ํ•˜์„ธ์š”.\n"
305
+ " ์˜ˆ) set CLARITY_OMR_DIR=C:\\path\\to\\Clarity-OMR"
306
+ )
307
+
308
+ tmp_dir = tempfile.mkdtemp(prefix="score_to_mml_clarity_")
309
+ try:
310
+ # PDF๊ฐ€ ์•„๋‹Œ ๊ฒฝ์šฐ ๋ณ€ํ™˜
311
+ if image_path.lower().endswith(".pdf"):
312
+ pdf_path = image_path
313
+ else:
314
+ pdf_path = self._image_to_pdf(image_path, tmp_dir)
315
+
316
+ stem = Path(pdf_path).stem
317
+ output_path = str(Path(tmp_dir) / (stem + ".musicxml"))
318
+ device = self._resolve_device()
319
+ python = self._get_python()
320
+ omr_script = str(Path(self.omr_dir) / "omr.py")
321
+
322
+ cmd = [python, omr_script, pdf_path, "-o", output_path, "--device", device]
323
+
324
+ try:
325
+ proc = subprocess.run(
326
+ cmd,
327
+ capture_output=True,
328
+ text=True,
329
+ timeout=600,
330
+ cwd=self.omr_dir,
331
+ )
332
+ except FileNotFoundError as e:
333
+ raise RuntimeError(
334
+ f"Clarity-OMR ์‹คํ–‰ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.\n"
335
+ f" python: {python}\n"
336
+ f" omr.py: {omr_script}\n"
337
+ f" CLARITY_OMR_DIR ๊ฒฝ๋กœ๊ฐ€ ์˜ฌ๋ฐ”๋ฅธ์ง€ ํ™•์ธํ•˜์„ธ์š”.\n"
338
+ f" ์›๋ณธ ์˜ค๋ฅ˜: {e}"
339
+ )
340
+ except subprocess.TimeoutExpired:
341
+ raise RuntimeError(
342
+ f"Clarity-OMR ์‹คํ–‰ ํƒ€์ž„์•„์›ƒ (600์ดˆ ์ดˆ๊ณผ)\n"
343
+ f" ์ž…๋ ฅ ํŒŒ์ผ: {pdf_path}\n"
344
+ f" ์ฒซ ์‹คํ–‰ ์‹œ HuggingFace์—์„œ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ๊ฐ€ ํ•„์š”ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."
345
+ )
346
+
347
+ # -o ์˜ต์…˜ ๊ฒฝ๋กœ ์šฐ์„ , ์—†์œผ๋ฉด tmp_dir ์ „์ฒด ํƒ์ƒ‰
348
+ xml_path: Optional[str] = None
349
+ if Path(output_path).exists():
350
+ xml_path = output_path
351
+ else:
352
+ xml_path = _find_any_musicxml(tmp_dir, stem)
353
+
354
+ if xml_path is None:
355
+ stdout_tail = proc.stdout[-400:] if proc.stdout else "(์—†์Œ)"
356
+ stderr_tail = proc.stderr[-400:] if proc.stderr else "(์—†์Œ)"
357
+ raise RuntimeError(
358
+ f"Clarity-OMR ์ถœ๋ ฅ์—์„œ MusicXML ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.\n"
359
+ f" ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ: {tmp_dir}\n"
360
+ f" returncode: {proc.returncode}\n"
361
+ f" device: {device}\n"
362
+ f" STDOUT:\n{stdout_tail}\n"
363
+ f" STDERR:\n{stderr_tail}"
364
+ )
365
+
366
+ if debug_dir:
367
+ shutil.copy2(xml_path, str(Path(debug_dir) / Path(xml_path).name))
368
+
369
+ xml_string = _read_musicxml(xml_path)
370
+ return {
371
+ "format": "musicxml",
372
+ "data": xml_string,
373
+ "source_path": image_path,
374
+ "metadata": {
375
+ "musicxml_path": xml_path,
376
+ "device": device,
377
+ "clarity_stdout_tail": proc.stdout[-300:] if proc.stdout else "",
378
+ "clarity_stderr_tail": proc.stderr[-300:] if proc.stderr else "",
379
+ "clarity_returncode": proc.returncode,
380
+ },
381
+ }
382
+ finally:
383
+ shutil.rmtree(tmp_dir, ignore_errors=True)
384
+
385
+ def is_available(self) -> bool:
386
+ if not self.omr_dir:
387
+ return False
388
+ return (Path(self.omr_dir) / "omr.py").exists()
389
+
390
+
391
+ # ---------------------------------------------------------------------------
392
+ # ๋‚ด๋ถ€ ์œ ํ‹ธ
393
+ # ---------------------------------------------------------------------------
394
+
395
+ def _find_any_musicxml(output_dir: str, stem: str = "") -> Optional[str]:
396
+ """
397
+ ๋””๋ ‰ํ† ๋ฆฌ์—์„œ MusicXML ํŒŒ์ผ์„ ํƒ์ƒ‰. HOMR/OeMeR ๋“ฑ ๋ฒ”์šฉ ์ถœ๋ ฅ ํƒ์ƒ‰์— ์‚ฌ์šฉ.
398
+
399
+ ํƒ์ƒ‰ ์ˆœ์„œ:
400
+ 1. stem ๊ธฐ๋ฐ˜ ์ •ํ™•ํ•œ ํŒŒ์ผ๋ช… (stem.musicxml, stem.mxl, stem.xml)
401
+ 2. ์ „์ฒด ์žฌ๊ท€ ํƒ์ƒ‰ (*.musicxml, *.mxl, *.xml)
402
+ """
403
+ out = Path(output_dir)
404
+
405
+ if stem:
406
+ for ext in (".musicxml", ".mxl", ".xml"):
407
+ candidate = out / (stem + ext)
408
+ if candidate.exists():
409
+ return str(candidate)
410
+
411
+ for pattern in ("*.musicxml", "*.mxl", "*.xml"):
412
+ matches = sorted(out.rglob(pattern))
413
+ if matches:
414
+ return str(matches[0])
415
+
416
+ return None
417
+
418
+
419
+ def _find_musicxml_output(output_dir: str, image_path: str) -> Optional[str]:
420
+ """
421
+ Audiveris ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ์—์„œ MusicXML ํŒŒ์ผ์„ ํƒ์ƒ‰.
422
+
423
+ ํƒ์ƒ‰ ์ˆœ์„œ:
424
+ 1. <output_dir>/<image_stem>/<image_stem>.(mxl|xml) โ€” ์˜ˆ์ƒ ์œ„์น˜
425
+ 2. <output_dir>/**/*.mxl, *.xml โ€” recursive fallback
426
+ """
427
+ out = Path(output_dir)
428
+ stem = Path(image_path).stem
429
+
430
+ for ext in (".mxl", ".xml"):
431
+ candidate = out / stem / (stem + ext)
432
+ if candidate.exists():
433
+ return str(candidate)
434
+
435
+ for pattern in ("*.mxl", "*.xml"):
436
+ matches = sorted(out.rglob(pattern))
437
+ if matches:
438
+ return str(matches[0])
439
+
440
+ return None
441
+
442
+
443
+ def _read_musicxml(xml_path: str) -> str:
444
+ """
445
+ .mxl(ZIP) ๋˜๋Š” .xml ํŒŒ์ผ์—์„œ MusicXML ๋ฌธ์ž์—ด์„ ์ฝ์–ด ๋ฐ˜ํ™˜.
446
+ """
447
+ if xml_path.lower().endswith(".mxl"):
448
+ return _extract_xml_from_mxl(xml_path)
449
+ with open(xml_path, "r", encoding="utf-8", errors="replace") as f:
450
+ return f.read()
451
+
452
+
453
+ def _extract_xml_from_mxl(mxl_path: str) -> str:
454
+ """
455
+ MXL (ZIP ํ˜•์‹ MusicXML) ํŒŒ์ผ์—์„œ XML ๋‚ด์šฉ์„ ์ถ”์ถœ.
456
+
457
+ META-INF/container.xml์˜ rootfile ๊ฒฝ๋กœ๋ฅผ ์šฐ์„  ์‚ฌ์šฉํ•˜๊ณ ,
458
+ ์‹คํŒจ ์‹œ ZIP ์•ˆ์˜ ์ฒซ ๋ฒˆ์งธ .xml ํŒŒ์ผ์„ ๋ฐ˜ํ™˜.
459
+ """
460
+ with zipfile.ZipFile(mxl_path, "r") as zf:
461
+ # META-INF/container.xml โ†’ rootfile full-path
462
+ try:
463
+ container_raw = zf.read("META-INF/container.xml").decode("utf-8")
464
+ container_root = ET.fromstring(container_raw)
465
+ for elem in container_root.iter():
466
+ local = elem.tag.split("}")[-1] if "}" in elem.tag else elem.tag
467
+ if local == "rootfile":
468
+ rootfile_path = elem.get("full-path")
469
+ if rootfile_path and rootfile_path in zf.namelist():
470
+ return zf.read(rootfile_path).decode("utf-8", errors="replace")
471
+ except (KeyError, ET.ParseError):
472
+ pass
473
+
474
+ # fallback: ZIP ์•ˆ ์ฒซ ๋ฒˆ์งธ .xml
475
+ xml_entries = [
476
+ n for n in zf.namelist()
477
+ if n.endswith(".xml") and not n.startswith("META-INF")
478
+ ]
479
+ if xml_entries:
480
+ return zf.read(xml_entries[0]).decode("utf-8", errors="replace")
481
+
482
+ raise RuntimeError(f"MXL ํŒŒ์ผ์—์„œ MusicXML์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {mxl_path}")
483
+
484
+
485
+ class HOMROMRAdapter(OMRAdapter):
486
+ """
487
+ HOMR ๊ธฐ๋ฐ˜ OMR ์–ด๋Œ‘ํ„ฐ.
488
+
489
+ ์„ค์น˜: pip install homr
490
+ ์ฐธ๊ณ : https://github.com/liebharc/homr
491
+
492
+ ํ™˜๊ฒฝ๋ณ€์ˆ˜:
493
+ HOMR_CMD: homr ์‹คํ–‰ ๋ช…๋ น (๊ธฐ๋ณธ: python -m homr)
494
+ ์˜ˆ) HOMR_CMD=homr ๋˜๋Š” HOMR_CMD=C:\\path\\to\\homr.exe
495
+
496
+ ์ถœ๋ ฅ: ์ž…๋ ฅ ์ด๋ฏธ์ง€์™€ ๊ฐ™์€ ๋””๋ ‰ํ† ๋ฆฌ์— <stem>.musicxml ์ƒ์„ฑ
497
+ โ†’ tmp_dir์—์„œ ์‹คํ–‰ํ•˜๋ฉด tmp_dir์— ์ถœ๋ ฅ๋จ
498
+ """
499
+
500
+ def __init__(self, cmd: str = ""):
501
+ self.cmd = cmd or os.environ.get("HOMR_CMD", "")
502
+
503
+ def _build_command(self, image_path: str) -> list[str]:
504
+ if self.cmd:
505
+ return [self.cmd, image_path]
506
+ # ํ˜„์žฌ Python ํ™˜๊ฒฝ Scripts ํƒ์ƒ‰
507
+ homr_exe = str(Path(sys.executable).parent / "Scripts" / "homr.exe")
508
+ if Path(homr_exe).exists():
509
+ return [homr_exe, image_path]
510
+ # Python 3.10 ํ™˜๊ฒฝ Scripts ํƒ์ƒ‰ (homr๋Š” 3.10+ ์ „์šฉ)
511
+ py310_homr = Path(r"C:\Users\Park\AppData\Local\Programs\Python\Python310\Scripts\homr.exe")
512
+ if py310_homr.exists():
513
+ return [str(py310_homr), image_path]
514
+ return [sys.executable, "-m", "homr", image_path]
515
+
516
+ def run(self, image_path: str, debug_dir: str = "") -> dict:
517
+ tmp_dir = tempfile.mkdtemp(prefix="score_to_mml_homr_")
518
+ try:
519
+ # ์ž…๋ ฅ ํŒŒ์ผ์„ tmp_dir์— ๋ณต์‚ฌ (์ถœ๋ ฅ์ด ์ž…๋ ฅ๊ณผ ๊ฐ™์€ ๋””๋ ‰ํ† ๋ฆฌ์— ์ƒ์„ฑ๋˜๋Š” ๊ฒฝ์šฐ ๋Œ€๋น„)
520
+ img_filename = Path(image_path).name
521
+ tmp_input = str(Path(tmp_dir) / img_filename)
522
+ shutil.copy2(image_path, tmp_input)
523
+
524
+ cmd = self._build_command(tmp_input)
525
+ # PYTHONUTF8=1: ํ•œ๊ตญ์–ด Windows CP949 ํ™˜๊ฒฝ์—์„œ musicxml ํŒจํ‚ค์ง€ ์ธ์ฝ”๋”ฉ ์˜ค๋ฅ˜ ๋ฐฉ์ง€
526
+ run_env = {**os.environ, "PYTHONUTF8": "1"}
527
+ try:
528
+ proc = subprocess.run(
529
+ cmd,
530
+ capture_output=True,
531
+ text=True,
532
+ timeout=300,
533
+ cwd=tmp_dir,
534
+ env=run_env,
535
+ )
536
+ except FileNotFoundError as e:
537
+ raise RuntimeError(
538
+ f"HOMR ์‹คํ–‰ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: '{cmd[0]}'\n"
539
+ f" ์„ค์น˜: pip install homr\n"
540
+ f" ๋˜๋Š” HOMR_CMD ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ์„ค์ •ํ•˜์„ธ์š”.\n"
541
+ f" ์›๋ณธ ์˜ค๋ฅ˜: {e}"
542
+ )
543
+ except subprocess.TimeoutExpired:
544
+ raise RuntimeError(
545
+ f"HOMR ์‹คํ–‰ ํƒ€์ž„์•„์›ƒ (300์ดˆ ์ดˆ๊ณผ)\n"
546
+ f" ์ž…๋ ฅ ํŒŒ์ผ: {image_path}"
547
+ )
548
+
549
+ xml_path = _find_any_musicxml(tmp_dir, Path(image_path).stem)
550
+ if xml_path is None:
551
+ stdout_tail = proc.stdout[-400:] if proc.stdout else "(์—†์Œ)"
552
+ stderr_tail = proc.stderr[-400:] if proc.stderr else "(์—†์Œ)"
553
+ raise RuntimeError(
554
+ f"HOMR ์ถœ๋ ฅ์—์„œ MusicXML ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.\n"
555
+ f" ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ: {tmp_dir}\n"
556
+ f" returncode: {proc.returncode}\n"
557
+ f" STDOUT:\n{stdout_tail}\n"
558
+ f" STDERR:\n{stderr_tail}"
559
+ )
560
+
561
+ if debug_dir:
562
+ dest = Path(debug_dir) / Path(xml_path).name
563
+ shutil.copy2(xml_path, str(dest))
564
+
565
+ xml_string = _read_musicxml(xml_path)
566
+ return {
567
+ "format": "musicxml",
568
+ "data": xml_string,
569
+ "source_path": image_path,
570
+ "metadata": {
571
+ "musicxml_path": xml_path,
572
+ "homr_stdout_tail": proc.stdout[-300:] if proc.stdout else "",
573
+ "homr_stderr_tail": proc.stderr[-300:] if proc.stderr else "",
574
+ "homr_returncode": proc.returncode,
575
+ },
576
+ }
577
+ finally:
578
+ shutil.rmtree(tmp_dir, ignore_errors=True)
579
+
580
+ def is_available(self) -> bool:
581
+ if self.cmd:
582
+ p = Path(self.cmd)
583
+ if p.is_absolute():
584
+ return p.exists()
585
+ return shutil.which(self.cmd) is not None
586
+ # ํ˜„์žฌ Python ํ™˜๊ฒฝ homr.exe ํƒ์ƒ‰
587
+ homr_exe = Path(sys.executable).parent / "Scripts" / "homr.exe"
588
+ if homr_exe.exists():
589
+ return True
590
+ # Python 3.10 ํ™˜๊ฒฝ ํƒ์ƒ‰ (homr๋Š” 3.10+ ์ „์šฉ)
591
+ py310_homr = Path(r"C:\Users\Park\AppData\Local\Programs\Python\Python310\Scripts\homr.exe")
592
+ if py310_homr.exists():
593
+ return True
594
+ return False
595
+
596
+
597
+ class OeMeROMRAdapter(OMRAdapter):
598
+ """
599
+ OeMeR (oemer) ๊ธฐ๋ฐ˜ OMR ์–ด๋Œ‘ํ„ฐ.
600
+
601
+ ์„ค์น˜: pip install oemer
602
+ ์ฐธ๊ณ : https://github.com/BreezeWhite/oemer
603
+
604
+ ํ™˜๊ฒฝ๋ณ€์ˆ˜:
605
+ OEMER_CMD: oemer ์‹คํ–‰ ๋ช…๋ น (๊ธฐ๋ณธ: oemer)
606
+ ์˜ˆ) OEMER_CMD=oemer ๋˜๋Š” OEMER_CMD=C:\\path\\to\\oemer.exe
607
+
608
+ ์ถœ๋ ฅ: ์ž…๋ ฅ ์ด๋ฏธ์ง€์™€ ๊ฐ™์€ ๋””๋ ‰ํ† ๋ฆฌ์— <stem>.musicxml ์ƒ์„ฑ
609
+ """
610
+
611
+ def __init__(self, cmd: str = ""):
612
+ self.cmd = cmd or os.environ.get("OEMER_CMD", "oemer")
613
+
614
+ def _build_command(self, image_path: str) -> list[str]:
615
+ return [self.cmd, image_path]
616
+
617
+ def run(self, image_path: str, debug_dir: str = "") -> dict:
618
+ tmp_dir = tempfile.mkdtemp(prefix="score_to_mml_oemer_")
619
+ try:
620
+ # ์ž…๋ ฅ ํŒŒ์ผ์„ tmp_dir์— ๋ณต์‚ฌ (์ถœ๋ ฅ์ด ์ž…๋ ฅ๊ณผ ๊ฐ™์€ ๋””๋ ‰ํ† ๋ฆฌ์— ์ƒ์„ฑ๋จ)
621
+ img_filename = Path(image_path).name
622
+ tmp_input = str(Path(tmp_dir) / img_filename)
623
+ shutil.copy2(image_path, tmp_input)
624
+
625
+ cmd = self._build_command(tmp_input)
626
+ try:
627
+ proc = subprocess.run(
628
+ cmd,
629
+ capture_output=True,
630
+ text=True,
631
+ timeout=600, # ์ฒซ ์‹คํ–‰ ์‹œ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ ํฌํ•จํ•ด ์ตœ๋Œ€ 10๋ถ„
632
+ cwd=tmp_dir,
633
+ )
634
+ except FileNotFoundError as e:
635
+ raise RuntimeError(
636
+ f"OeMeR ์‹คํ–‰ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: '{cmd[0]}'\n"
637
+ f" ์„ค์น˜: pip install oemer\n"
638
+ f" ๋˜๋Š” OEMER_CMD ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ์„ค์ •ํ•˜์„ธ์š”.\n"
639
+ f" ์›๋ณธ ์˜ค๋ฅ˜: {e}"
640
+ )
641
+ except subprocess.TimeoutExpired:
642
+ raise RuntimeError(
643
+ f"OeMeR ์‹คํ–‰ ํƒ€์ž„์•„์›ƒ (600์ดˆ ์ดˆ๊ณผ)\n"
644
+ f" ์ž…๋ ฅ ํŒŒ์ผ: {image_path}\n"
645
+ f" ์ฒซ ์‹คํ–‰ ์‹œ ๋”ฅ๋Ÿฌ๋‹ ๋ชจ๋ธ ๋‹ค์šด๋กœ๋“œ๊ฐ€ ํ•„์š”ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.\n"
646
+ f" `oemer <image>` ๋ฅผ ์ง์ ‘ ์‹คํ–‰ํ•ด์„œ ๋ชจ๋ธ์„ ๋จผ์ € ๋‹ค์šด๋กœ๋“œํ•˜์„ธ์š”."
647
+ )
648
+
649
+ xml_path = _find_any_musicxml(tmp_dir, Path(image_path).stem)
650
+ if xml_path is None:
651
+ stdout_tail = proc.stdout[-400:] if proc.stdout else "(์—†์Œ)"
652
+ stderr_tail = proc.stderr[-400:] if proc.stderr else "(์—†์Œ)"
653
+ raise RuntimeError(
654
+ f"OeMeR ์ถœ๋ ฅ์—์„œ MusicXML ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.\n"
655
+ f" ์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ: {tmp_dir}\n"
656
+ f" returncode: {proc.returncode}\n"
657
+ f" STDOUT:\n{stdout_tail}\n"
658
+ f" STDERR:\n{stderr_tail}"
659
+ )
660
+
661
+ if debug_dir:
662
+ dest = Path(debug_dir) / Path(xml_path).name
663
+ shutil.copy2(xml_path, str(dest))
664
+
665
+ xml_string = _read_musicxml(xml_path)
666
+ return {
667
+ "format": "musicxml",
668
+ "data": xml_string,
669
+ "source_path": image_path,
670
+ "metadata": {
671
+ "musicxml_path": xml_path,
672
+ "oemer_stdout_tail": proc.stdout[-300:] if proc.stdout else "",
673
+ "oemer_stderr_tail": proc.stderr[-300:] if proc.stderr else "",
674
+ "oemer_returncode": proc.returncode,
675
+ },
676
+ }
677
+ finally:
678
+ shutil.rmtree(tmp_dir, ignore_errors=True)
679
+
680
+ def is_available(self) -> bool:
681
+ if self.cmd:
682
+ return shutil.which(self.cmd) is not None or Path(self.cmd).exists()
683
+ return False
684
+
685
+
686
+ # ---------------------------------------------------------------------------
687
+ # ํŒฉํ† ๋ฆฌ
688
+ # ---------------------------------------------------------------------------
689
+
690
+ def get_adapter(mock_mode: bool = True) -> OMRAdapter:
691
+ """
692
+ ์˜ต์…˜์— ๋”ฐ๋ผ ์ ์ ˆํ•œ OMR ์–ด๋Œ‘ํ„ฐ๋ฅผ ๋ฐ˜ํ™˜.
693
+
694
+ mock_mode=True โ†’ MockOMRAdapter (ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋ถˆํ•„์š”)
695
+ mock_mode=False โ†’ AudiverisOMRAdapter
696
+ AUDIVERIS_BIN ๋˜๋Š” AUDIVERIS_JAR ํ™˜๊ฒฝ๋ณ€์ˆ˜ ํ•„์š”
697
+ """
698
+ if mock_mode:
699
+ return MockOMRAdapter()
700
+
701
+ bin_path = os.environ.get("AUDIVERIS_BIN", "")
702
+ jar_path = os.environ.get("AUDIVERIS_JAR", "")
703
+
704
+ if not bin_path and not jar_path:
705
+ raise RuntimeError(
706
+ "Audiveris ํ™˜๊ฒฝ๋ณ€์ˆ˜๊ฐ€ ์„ค์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.\n"
707
+ "๋‹ค์Œ ์ค‘ ํ•˜๋‚˜๋ฅผ ์„ค์ •ํ•˜๊ณ  ๋‹ค์‹œ ์‹คํ–‰ํ•˜์„ธ์š”:\n\n"
708
+ " [์ง์ ‘ ์‹คํ–‰ ๋ฐฉ์‹]\n"
709
+ " set AUDIVERIS_BIN=C:\\path\\to\\Audiveris.exe (Windows)\n"
710
+ " export AUDIVERIS_BIN=/path/to/audiveris (Linux/Mac)\n\n"
711
+ " [JAR ๋ฐฉ์‹ (java ํ•„์š”)]\n"
712
+ " set AUDIVERIS_JAR=C:\\path\\to\\Audiveris.jar (Windows)\n"
713
+ " export AUDIVERIS_JAR=/path/to/Audiveris.jar (Linux/Mac)"
714
+ )
715
+
716
+ adapter = AudiverisOMRAdapter(jar_path=jar_path, bin_path=bin_path)
717
+
718
+ if not adapter.is_available():
719
+ if bin_path:
720
+ hint = f"AUDIVERIS_BIN='{bin_path}' ํŒŒ์ผ์ด ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”."
721
+ else:
722
+ hint = (
723
+ f"AUDIVERIS_JAR='{jar_path}' ํŒŒ์ผ์ด ์กด์žฌํ•˜๋Š”์ง€,\n"
724
+ f" java๊ฐ€ PATH์— ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”. (java -version ์œผ๋กœ ํ…Œ์ŠคํŠธ)"
725
+ )
726
+ raise RuntimeError(f"Audiveris๋ฅผ ์‹คํ–‰ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.\n {hint}")
727
+
728
+ return adapter
729
+
730
+
731
+ def get_engine_adapter(engine_name: str) -> OMRAdapter:
732
+ """
733
+ ์—”์ง„ ์ด๋ฆ„์œผ๋กœ ์–ด๋Œ‘ํ„ฐ๋ฅผ ๋ฐ˜ํ™˜.
734
+
735
+ compare_omr_engines()์—์„œ ๋ฉ€ํ‹ฐ ์—”์ง„ ์‹คํ–‰ ์‹œ ์‚ฌ์šฉ.
736
+ ์—”์ง„์ด ์„ค์น˜๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ RuntimeError ๋ฐœ์ƒ (caller์—์„œ ์ฒ˜๋ฆฌ).
737
+
738
+ ์ง€์› ์—”์ง„:
739
+ "mock" โ†’ MockOMRAdapter
740
+ "audiveris" โ†’ AudiverisOMRAdapter (AUDIVERIS_BIN ๋˜๋Š” AUDIVERIS_JAR ํ•„์š”)
741
+ "homr" โ†’ HOMROMRAdapter (pip install homr ๋˜๋Š” HOMR_CMD ํ•„์š”)
742
+ "oemer" โ†’ OeMeROMRAdapter (pip install oemer ๋˜๋Š” OEMER_CMD ํ•„์š”)
743
+ "clarity" โ†’ ClarityOMRAdapter (CLARITY_OMR_DIR ํ•„์š”, GPU ์ž๋™ ๊ฐ์ง€)
744
+ """
745
+ name = engine_name.lower().strip()
746
+
747
+ if name == "mock":
748
+ return MockOMRAdapter()
749
+
750
+ if name == "audiveris":
751
+ return get_adapter(mock_mode=False)
752
+
753
+ if name == "homr":
754
+ adapter = HOMROMRAdapter()
755
+ if not adapter.is_available():
756
+ raise RuntimeError(
757
+ "HOMR๋ฅผ ์‹คํ–‰ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.\n"
758
+ " ์„ค์น˜: pip install homr\n"
759
+ " ๋˜๋Š” HOMR_CMD ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ์„ค์ •ํ•˜์„ธ์š”."
760
+ )
761
+ return adapter
762
+
763
+ if name == "oemer":
764
+ adapter = OeMeROMRAdapter()
765
+ if not adapter.is_available():
766
+ raise RuntimeError(
767
+ "OeMeR๋ฅผ ์‹คํ–‰ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.\n"
768
+ " ์„ค์น˜: pip install oemer\n"
769
+ " ๋˜๋Š” OEMER_CMD ํ™˜๊ฒฝ๋ณ€์ˆ˜๋ฅผ ์„ค์ •ํ•˜์„ธ์š”."
770
+ )
771
+ return adapter
772
+
773
+ if name == "clarity":
774
+ adapter = ClarityOMRAdapter()
775
+ if not adapter.is_available():
776
+ raise RuntimeError(
777
+ "Clarity-OMR๋ฅผ ์‹คํ–‰ํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.\n"
778
+ " 1. git clone https://github.com/clquwu/Clarity-OMR\n"
779
+ " 2. pip install -r Clarity-OMR/requirements.txt\n"
780
+ " 3. set CLARITY_OMR_DIR=C:\\path\\to\\Clarity-OMR\n"
781
+ " GPU ์‚ฌ์šฉ: pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121"
782
+ )
783
+ return adapter
784
+
785
+ raise RuntimeError(
786
+ f"์•Œ ์ˆ˜ ์—†๋Š” ์—”์ง„ ์ด๋ฆ„: '{engine_name}'\n"
787
+ f" ์ง€์› ์—”์ง„: mock, audiveris, homr, oemer, clarity"
788
+ )
core/part_splitter.py ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/part_splitter.py
3
+
4
+ NoteEvent ๋ฆฌ์ŠคํŠธ๋ฅผ N๊ฐœ ํŒŒํŠธ๋กœ ๋ถ„๋ฐฐ.
5
+
6
+ ํ˜„์žฌ ์ „๋žต: Greedy ํ”ผ์น˜ ๋ฒ”์œ„ ๊ธฐ๋ฐ˜ ๋ถ„๋ฐฐ
7
+ - Part 1: ๊ฐ€์žฅ ๋†’์€ ํ”ผ์น˜ ๊ทธ๋ฃน (์ฃผ ๋ฉœ๋กœ๋””)
8
+ - Part 2: ์ค‘๊ฐ„ ํ”ผ์น˜ ๊ทธ๋ฃน (ํ™”์Œ/๋ฐ˜์ฃผ)
9
+ - Part 3: ๊ฐ€์žฅ ๋‚ฎ์€ ํ”ผ์น˜ ๊ทธ๋ฃน (๋ฒ ์ด์Šค)
10
+
11
+ ๊ต์ฒด ๊ฐ€๋Šฅ์„ฑ: ์ด ํ•จ์ˆ˜์˜ ์‹œ๊ทธ๋‹ˆ์ฒ˜๋ฅผ ์œ ์ง€ํ•˜๋ฉด์„œ
12
+ ๋” ์ •๊ตํ•œ ์•Œ๊ณ ๋ฆฌ์ฆ˜(staff/voice ๊ธฐ๋ฐ˜, ์Œ์—ญ๋Œ€ ๋ถ„์„ ๋“ฑ)์œผ๋กœ ๊ต์ฒด ๊ฐ€๋Šฅ.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from .models import NoteEvent
18
+
19
+
20
+ def _count_max_simultaneous(notes: list[NoteEvent]) -> int:
21
+ """์ตœ๋Œ€ ๋™์‹œ ๋ฐœ์Œ ์ˆ˜ ๊ณ„์‚ฐ (part_count ์ž๋™ ๊ฐ์ง€์šฉ)."""
22
+ if not notes:
23
+ return 1
24
+ events: list[tuple[float, int]] = []
25
+ for n in notes:
26
+ events.append((n.start, 1))
27
+ events.append((n.start + n.duration, -1))
28
+ events.sort()
29
+ max_count = current = 0
30
+ for _, delta in events:
31
+ current += delta
32
+ max_count = max(max_count, current)
33
+ return max(1, max_count)
34
+
35
+
36
+ def split_parts(
37
+ notes: list[NoteEvent],
38
+ part_count: int = 0,
39
+ ) -> list[list[NoteEvent]]:
40
+ """
41
+ NoteEvent ๋ฆฌ์ŠคํŠธ๋ฅผ part_count๊ฐœ ํŒŒํŠธ๋กœ ๋ถ„๋ฐฐ.
42
+
43
+ Args:
44
+ notes: ์ „์ฒด NoteEvent ๋ฆฌ์ŠคํŠธ
45
+ part_count: ๋ถ„๋ฐฐํ•  ํŒŒํŠธ ์ˆ˜ (๊ธฐ๋ณธ 3)
46
+
47
+ Returns:
48
+ ํŒŒํŠธ๋ณ„ NoteEvent ๋ฆฌ์ŠคํŠธ์˜ ๋ฆฌ์ŠคํŠธ.
49
+ ๊ธธ์ด๋Š” ํ•ญ์ƒ part_count์ด๋ฉฐ, ๋น„์–ด์žˆ๋Š” ํŒŒํŠธ๋Š” ๋นˆ ๋ฆฌ์ŠคํŠธ.
50
+ """
51
+ if part_count <= 0:
52
+ part_count = _count_max_simultaneous(notes)
53
+
54
+ if not notes:
55
+ return [[] for _ in range(part_count)]
56
+
57
+ # part_hint๊ฐ€ ์ง€์ •๋œ ๊ฒฝ์šฐ ์šฐ์„  ์‚ฌ์šฉ
58
+ if any(n.part_hint is not None for n in notes):
59
+ return _split_by_hint(notes, part_count)
60
+
61
+ staffs = {n.staff for n in notes}
62
+ voices = {n.voice for n in notes}
63
+
64
+ # staff๋„ 1๊ฐœ์ด๊ณ  voice๋„ 1๊ฐœ๋ฉด ๋‹จ์„ ์œจ โ†’ Part 1์— ์ „๋ถ€
65
+ if len(staffs) == 1 and len(voices) == 1:
66
+ parts = [[] for _ in range(part_count)]
67
+ parts[0].extend(sorted(notes, key=lambda n: n.start))
68
+ return parts
69
+
70
+ # staff ๋˜๋Š” voice๊ฐ€ ์—ฌ๋Ÿฌ ๊ฐœ๋ฉด staff+voice ๊ธฐ๋ฐ˜ ๋ถ„๋ฐฐ
71
+ return _split_by_staff_and_voice(notes, part_count)
72
+
73
+ # fallback: ํ”ผ์น˜ ๋ฒ”์œ„ ๊ธฐ๋ฐ˜ greedy ๋ถ„๋ฐฐ
74
+ return _split_by_pitch_range(notes, part_count)
75
+
76
+
77
+ def _split_by_hint(notes: list[NoteEvent], part_count: int) -> list[list[NoteEvent]]:
78
+ """part_hint ๊ฐ’์— ๋”ฐ๋ผ ๋ถ„๋ฐฐ."""
79
+ parts: list[list[NoteEvent]] = [[] for _ in range(part_count)]
80
+ for note in notes:
81
+ hint = note.part_hint
82
+ if hint is not None and 1 <= hint <= part_count:
83
+ parts[hint - 1].append(note)
84
+ else:
85
+ parts[0].append(note) # ๋ฏธ์ง€์ •์€ Part 1์œผ๋กœ
86
+ return parts
87
+
88
+
89
+ def _split_by_staff_and_voice(notes: list[NoteEvent], part_count: int) -> list[list[NoteEvent]]:
90
+ """
91
+ staff + voice ์กฐํ•ฉ์œผ๋กœ ํŒŒํŠธ ๋ถ„๋ฐฐ.
92
+
93
+ ์กฐํ•ฉ ์ˆœ์„œ (๋†’์€ ํ”ผ์น˜ ์šฐ์„ ): staff1/voice1, staff1/voice2, staff2/voice1, ...
94
+ """
95
+ # (staff, voice) ์กฐํ•ฉ์„ ํ‰๊ท  ํ”ผ์น˜ ๊ธฐ์ค€์œผ๋กœ ์ •๋ ฌ (๋†’์€ ๊ฒƒ์ด Part 1)
96
+ combos: dict[tuple[int, int], list[NoteEvent]] = {}
97
+ for note in notes:
98
+ key = (note.staff, note.voice)
99
+ combos.setdefault(key, []).append(note)
100
+
101
+ def avg_pitch(note_list: list[NoteEvent]) -> float:
102
+ pitched = [n.pitch for n in note_list if n.pitch > 0]
103
+ return sum(pitched) / len(pitched) if pitched else 0.0
104
+
105
+ sorted_combos = sorted(combos.values(), key=avg_pitch, reverse=True)
106
+
107
+ parts: list[list[NoteEvent]] = [[] for _ in range(part_count)]
108
+ for i, group in enumerate(sorted_combos):
109
+ idx = min(i, part_count - 1)
110
+ parts[idx].extend(group)
111
+
112
+ # ํŒŒํŠธ ๋‚ด start ๊ธฐ์ค€ ์žฌ์ •๋ ฌ
113
+ for part in parts:
114
+ part.sort(key=lambda n: n.start)
115
+
116
+ return parts
117
+
118
+
119
+ def _split_by_pitch_range(notes: list[NoteEvent], part_count: int) -> list[list[NoteEvent]]:
120
+ """
121
+ ํ”ผ์น˜ ๋ฒ”์œ„ ๊ธฐ์ค€ greedy ๋ถ„๋ฐฐ.
122
+
123
+ ์ „์ฒด ํ”ผ์น˜๋ฅผ part_count ๊ตฌ๊ฐ„์œผ๋กœ ๋‚˜๋ˆ„์–ด ๋ถ„๋ฐฐ.
124
+ """
125
+ pitched = [n for n in notes if n.pitch > 0]
126
+ rests = [n for n in notes if n.pitch == 0]
127
+
128
+ if not pitched:
129
+ parts: list[list[NoteEvent]] = [[] for _ in range(part_count)]
130
+ parts[part_count - 1].extend(rests)
131
+ return parts
132
+
133
+ min_pitch = min(n.pitch for n in pitched)
134
+ max_pitch = max(n.pitch for n in pitched)
135
+ pitch_span = max_pitch - min_pitch or 1
136
+
137
+ parts = [[] for _ in range(part_count)]
138
+ for note in pitched:
139
+ ratio = (note.pitch - min_pitch) / pitch_span
140
+ # ๋†’์€ ํ”ผ์น˜ = ๋‚ฎ์€ ์ธ๋ฑ์Šค (Part 1)
141
+ idx = part_count - 1 - int(ratio * (part_count - 0.01))
142
+ idx = max(0, min(idx, part_count - 1))
143
+ parts[idx].append(note)
144
+
145
+ # ์‰ผํ‘œ๋Š” Part 1์— ๋ฐฐ์น˜
146
+ parts[0].extend(rests)
147
+
148
+ for part in parts:
149
+ part.sort(key=lambda n: n.start)
150
+
151
+ return parts
core/pdf_converter.py ADDED
@@ -0,0 +1,106 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/pdf_converter.py
3
+
4
+ PDF ํŒŒ์ผ์„ ํŽ˜์ด์ง€๋ณ„ ์ด๋ฏธ์ง€๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ๋ชจ๋“ˆ.
5
+
6
+ ์‚ฌ์šฉ ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ: pymupdf (fitz)
7
+ - ์„ค์น˜: pip install pymupdf
8
+ - ์™ธ๋ถ€ ๋ฐ”์ด๋„ˆ๋ฆฌ(Poppler ๋“ฑ) ๋ถˆํ•„์š”, Windows์—์„œ ๋ฐ”๋กœ ๋™์ž‘
9
+
10
+ ์ด ๋ชจ๋“ˆ์˜ ์—ญํ• :
11
+ - PDF ํŒŒ์ผ์„ ๋ฐ›์•„ ๊ฐ ํŽ˜์ด์ง€๋ฅผ PNG ์ด๋ฏธ์ง€๋กœ ๋ Œ๋”๋ง
12
+ - ๋ Œ๋”๋ง๋œ ์ด๋ฏธ์ง€ ๊ฒฝ๋กœ ๋ฆฌ์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜
13
+ - ์ดํ›„ ์ฒ˜๋ฆฌ๋Š” ๊ธฐ์กด ์ด๋ฏธ์ง€ ํŒŒ์ดํ”„๋ผ์ธ์— ๊ทธ๋Œ€๋กœ ์œ„์ž„
14
+
15
+ ์‚ฌ์ด๋“œ์ดํŽ™ํŠธ:
16
+ - output_dir์— page_001.png, page_002.png ... ํŒŒ์ผ์„ ์ƒ์„ฑ
17
+ - ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ •๋ฆฌ๋Š” ํ˜ธ์ถœ ์ธก(pipeline.py)์—์„œ ๋‹ด๋‹น
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ from pathlib import Path
23
+
24
+
25
+ class PDFConvertError(Exception):
26
+ """PDF ๋ณ€ํ™˜ ์˜ค๋ฅ˜."""
27
+ pass
28
+
29
+
30
+ def convert_pdf_to_images(
31
+ pdf_path: str,
32
+ output_dir: str,
33
+ dpi: int = 150,
34
+ ) -> list[str]:
35
+ """
36
+ PDF ํŒŒ์ผ์˜ ๊ฐ ํŽ˜์ด์ง€๋ฅผ PNG ์ด๋ฏธ์ง€๋กœ ๋ณ€ํ™˜.
37
+
38
+ Args:
39
+ pdf_path: ์ž…๋ ฅ PDF ํŒŒ์ผ ๊ฒฝ๋กœ
40
+ output_dir: ์ด๋ฏธ์ง€๋ฅผ ์ €์žฅํ•  ๋””๋ ‰ํ† ๋ฆฌ ๊ฒฝ๋กœ (์—†์œผ๋ฉด ์ƒ์„ฑ)
41
+ dpi: ๋ Œ๋”๋ง ํ•ด์ƒ๋„ (๊ธฐ๋ณธ 150dpi, OMR ์ •๋ฐ€๋„์— ๋”ฐ๋ผ 300dpi ๊ถŒ์žฅ)
42
+
43
+ Returns:
44
+ ํŽ˜์ด์ง€ ์ˆœ์„œ๋Œ€๋กœ ์ •๋ ฌ๋œ ์ด๋ฏธ์ง€ ํŒŒ์ผ ๊ฒฝ๋กœ ๋ฆฌ์ŠคํŠธ
45
+
46
+ Raises:
47
+ PDFConvertError: pymupdf ๋ฏธ์„ค์น˜, ํŒŒ์ผ ์—†์Œ, ๋ Œ๋”๋ง ์‹คํŒจ ์‹œ
48
+ """
49
+ try:
50
+ import fitz # pymupdf
51
+ except ImportError:
52
+ raise PDFConvertError(
53
+ "pymupdf๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„ค์น˜ ๋ช…๋ น: pip install pymupdf"
54
+ )
55
+
56
+ pdf = Path(pdf_path)
57
+ if not pdf.exists():
58
+ raise PDFConvertError(f"PDF ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {pdf_path}")
59
+
60
+ out_dir = Path(output_dir)
61
+ out_dir.mkdir(parents=True, exist_ok=True)
62
+
63
+ try:
64
+ doc = fitz.open(str(pdf))
65
+ except Exception as e:
66
+ raise PDFConvertError(f"PDF ์—ด๊ธฐ ์‹คํŒจ: {e}")
67
+
68
+ if doc.page_count == 0:
69
+ doc.close()
70
+ raise PDFConvertError(f"PDF์— ํŽ˜์ด์ง€๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค: {pdf_path}")
71
+
72
+ # 72dpi๊ฐ€ fitz ๊ธฐ๋ณธ ๋‹จ์œ„ โ†’ dpi/72 ๋ฐฐ์œจ
73
+ matrix = fitz.Matrix(dpi / 72, dpi / 72)
74
+ image_paths: list[str] = []
75
+
76
+ try:
77
+ for page_idx in range(doc.page_count):
78
+ page = doc[page_idx]
79
+ pixmap = page.get_pixmap(matrix=matrix)
80
+ img_path = out_dir / f"page_{page_idx + 1:03d}.png"
81
+ pixmap.save(str(img_path))
82
+ image_paths.append(str(img_path))
83
+ except Exception as e:
84
+ raise PDFConvertError(f"ํŽ˜์ด์ง€ ๋ Œ๋”๋ง ์‹คํŒจ (page {page_idx + 1}): {e}")
85
+ finally:
86
+ doc.close()
87
+
88
+ return image_paths
89
+
90
+
91
+ def get_pdf_page_count(pdf_path: str) -> int:
92
+ """PDF ํŒŒ์ผ์˜ ํŽ˜์ด์ง€ ์ˆ˜๋ฅผ ๋ฐ˜ํ™˜."""
93
+ try:
94
+ import fitz
95
+ except ImportError:
96
+ raise PDFConvertError(
97
+ "pymupdf๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์„ค์น˜ ๋ช…๋ น: pip install pymupdf"
98
+ )
99
+
100
+ try:
101
+ doc = fitz.open(pdf_path)
102
+ count = doc.page_count
103
+ doc.close()
104
+ return count
105
+ except Exception as e:
106
+ raise PDFConvertError(f"PDF ํŽ˜์ด์ง€ ์ˆ˜ ํ™•์ธ ์‹คํŒจ: {e}")
core/pipeline.py ADDED
@@ -0,0 +1,1117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/pipeline.py
3
+
4
+ ์ „์ฒด ๋ณ€ํ™˜ ํŒŒ์ดํ”„๋ผ์ธ์˜ ๋‹จ์ผ ์ง„์ž…์ .
5
+
6
+ CLI์™€ Web API ๋ชจ๋‘ convert_score_to_mml()๋งŒ ํ˜ธ์ถœํ•ด์•ผ ํ•œ๋‹ค.
7
+ ์‚ฌ์ด๋“œ์ดํŽ™ํŠธ(ํŒŒ์ผ ์ €์žฅ, ์ฝ˜์†” ์ถœ๋ ฅ)๋Š” ์ด ํ•จ์ˆ˜์—์„œ ์ˆ˜ํ–‰ํ•˜์ง€ ์•Š๋Š”๋‹ค.
8
+
9
+ ํŒŒ์ดํ”„๋ผ์ธ ํ๋ฆ„ โ€” ์ด๋ฏธ์ง€ ์ž…๋ ฅ:
10
+ ์ž…๋ ฅ ๊ฒฝ๋กœ
11
+ โ†’ preprocess.preprocess_image() ํŒŒ์ผ ๊ฒ€์ฆ
12
+ โ†’ [preprocess.apply_preprocessing()] OpenCV ์ „์ฒ˜๋ฆฌ (audiveris ๋ชจ๋“œ + enabled ์‹œ)
13
+ โ†’ omr_adapter.run()
14
+ โ†’ music_parser.parse_omr_result()
15
+ โ†’ part_splitter.split_parts()
16
+ โ†’ mml_converter.convert_parts_to_mml()
17
+ โ†’ ConvertResult
18
+
19
+ ํŒŒ์ดํ”„๋ผ์ธ ํ๋ฆ„ โ€” PDF ์ž…๋ ฅ:
20
+ ์ž…๋ ฅ ๊ฒฝ๋กœ
21
+ โ†’ pdf_converter.convert_pdf_to_images() ํŽ˜์ด์ง€๋ณ„ PNG ์ƒ์„ฑ
22
+ โ†’ ๊ฐ ํŽ˜์ด์ง€์— ๋Œ€ํ•ด:
23
+ [preprocess.apply_preprocessing()] OpenCV ์ „์ฒ˜๋ฆฌ (audiveris ๋ชจ๋“œ + enabled ์‹œ)
24
+ โ†’ omr_adapter.run()
25
+ โ†’ music_parser.parse_omr_result()
26
+ โ†’ NoteEvent ์‹œ๊ฐ„์ถ• ์ด์–ด๋ถ™์ด๊ธฐ (page offset ๋ˆ„์ )
27
+ โ†’ ์ „์ฒด NoteEvent ๋ณ‘ํ•ฉ
28
+ โ†’ part_splitter.split_parts()
29
+ โ†’ mml_converter.convert_parts_to_mml()
30
+ โ†’ ConvertResult (์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ ์ •๋ฆฌ)
31
+ """
32
+
33
+ from __future__ import annotations
34
+
35
+ import datetime
36
+ import json
37
+ import shutil
38
+ import tempfile
39
+ from pathlib import Path
40
+ from typing import Optional
41
+
42
+ from .models import ConvertOptions, ConvertResult, NoteEvent, EngineRunResult, ComparisonReport
43
+ from .preprocess import preprocess_image, apply_preprocessing, PreprocessError
44
+ from .omr_adapter import get_adapter, get_engine_adapter, OMRAdapter
45
+ from .music_parser import parse_omr_result, ParseError
46
+ from .part_splitter import split_parts
47
+ from .mml_converter import convert_parts_to_mml
48
+
49
+
50
+ def _get_adapter_for_options(options: ConvertOptions) -> OMRAdapter:
51
+ """ConvertOptions์˜ engine/mock_mode ์„ค์ •์— ๋”ฐ๋ผ ์ ์ ˆํ•œ ์–ด๋Œ‘ํ„ฐ๋ฅผ ๋ฐ˜ํ™˜."""
52
+ if options.engine:
53
+ return get_engine_adapter(options.engine)
54
+ return get_adapter(mock_mode=options.mock_mode)
55
+
56
+
57
+ def convert_score_to_mml(
58
+ input_path: str,
59
+ options: Optional[ConvertOptions] = None,
60
+ ) -> ConvertResult:
61
+ """
62
+ ์ด๋ฏธ์ง€ ๋˜๋Š” PDF ์•…๋ณด๋ฅผ MML๋กœ ๋ณ€ํ™˜ํ•˜๋Š” ๋ฉ”์ธ ํŒŒ์ดํ”„๋ผ์ธ ํ•จ์ˆ˜.
63
+
64
+ Args:
65
+ input_path: ์ž…๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ (PNG, JPG, PDF ๋“ฑ)
66
+ options: ๋ณ€ํ™˜ ์˜ต์…˜. None์ด๋ฉด ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ.
67
+
68
+ Returns:
69
+ ConvertResult: ์„ฑ๊ณต ์—ฌ๋ถ€, 3ํŒŒํŠธ MML, ๊ฒฝ๊ณ  ๋ชฉ๋ก, ๋””๋ฒ„๊ทธ ์ •๋ณด
70
+ """
71
+ if options is None:
72
+ options = ConvertOptions()
73
+
74
+ debug_info: dict = {"input_path": input_path, "options": vars(options)}
75
+
76
+ ext = Path(input_path).suffix.lower()
77
+ if ext == ".pdf":
78
+ # Clarity-OMR์€ PDF๋ฅผ ์ง์ ‘ ์ฒ˜๋ฆฌ โ€” ํŽ˜์ด์ง€ ๋ถ„ํ•  ๋ถˆํ•„์š”
79
+ if options.engine == "clarity":
80
+ return _process_pdf_direct(input_path, options, debug_info)
81
+ return _process_pdf(input_path, options, debug_info)
82
+ else:
83
+ return _process_image(input_path, options, debug_info)
84
+
85
+
86
+ # ---------------------------------------------------------------------------
87
+ # ๋‚ด๋ถ€ ํ—ฌํผ
88
+ # ---------------------------------------------------------------------------
89
+
90
+ def _process_image(
91
+ input_path: str,
92
+ options: ConvertOptions,
93
+ debug_info: dict,
94
+ ) -> ConvertResult:
95
+ """๋‹จ์ผ ์ด๋ฏธ์ง€ ํŒŒ์ผ์— ๋Œ€ํ•œ ๋ณ€ํ™˜ ํŒŒ์ดํ”„๋ผ์ธ."""
96
+ warnings: list[str] = []
97
+ debug_info["input_type"] = "image"
98
+
99
+ # debug_dir ์ค€๋น„ (options.debug_dir๊ฐ€ ์„ค์ •๋œ ๊ฒฝ์šฐ ์‹คํ–‰๋ณ„ ํ•˜์œ„ ํด๋” ์ƒ์„ฑ)
100
+ run_debug_dir = _prepare_run_debug_dir(input_path, options.debug_dir)
101
+ if run_debug_dir:
102
+ debug_info["debug_output_dir"] = run_debug_dir
103
+ shutil.copy2(input_path, str(Path(run_debug_dir) / ("00_original" + Path(input_path).suffix)))
104
+
105
+ # 1. ํŒŒ์ผ ๊ฒ€์ฆ
106
+ try:
107
+ image_info = preprocess_image(input_path)
108
+ debug_info["image_info"] = image_info
109
+ except PreprocessError as e:
110
+ return ConvertResult(success=False, warnings=[str(e)], debug_info=debug_info)
111
+
112
+ omr_input = image_info["preprocessed_path"]
113
+ preprocess_tmpdir = None
114
+
115
+ try:
116
+ # 2. OpenCV ์ „์ฒ˜๋ฆฌ (audiveris ๋ชจ๋“œ + preprocess_enabled ์‹œ)
117
+ if not options.mock_mode and options.preprocess_enabled:
118
+ preprocess_tmpdir = tempfile.mkdtemp(prefix="score_to_mml_pre_")
119
+ omr_input, pre_info, pre_warnings = _run_preprocess_step(
120
+ omr_input, preprocess_tmpdir, Path(input_path).stem, options,
121
+ debug_dir=run_debug_dir,
122
+ )
123
+ # ์ „์ฒ˜๋ฆฌ ์ตœ์ข… ๊ฒฐ๊ณผ ์ด๋ฏธ์ง€๋ฅผ debug_dir์— ๋ณต์‚ฌ
124
+ if run_debug_dir and Path(omr_input).exists():
125
+ shutil.copy2(omr_input, str(Path(run_debug_dir) / "01_preprocessed_final.png"))
126
+ debug_info["preprocess_info"] = pre_info
127
+ warnings.extend(pre_warnings)
128
+ else:
129
+ debug_info["preprocess_info"] = {
130
+ "applied": False,
131
+ "reason": "mock mode" if options.mock_mode else "preprocess_enabled=False",
132
+ }
133
+
134
+ # 3~4. OMR + ํŒŒ์‹ฑ
135
+ try:
136
+ adapter = _get_adapter_for_options(options)
137
+ except RuntimeError as e:
138
+ return ConvertResult(success=False, warnings=[f"OMR ์–ด๋Œ‘ํ„ฐ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}"], debug_info=debug_info)
139
+ notes, omr_warnings, omr_debug, _ = _run_omr_and_parse(
140
+ omr_input, adapter, debug_dir=run_debug_dir
141
+ )
142
+ debug_info.update(omr_debug)
143
+ warnings.extend(omr_warnings)
144
+
145
+ if notes is None:
146
+ _save_debug_log(run_debug_dir, debug_info, warnings, result=None)
147
+ return ConvertResult(success=False, warnings=warnings, debug_info=debug_info)
148
+
149
+ result = _build_result(notes, options, warnings, debug_info)
150
+ _save_debug_log(run_debug_dir, debug_info, warnings, result=result)
151
+ return result
152
+
153
+ finally:
154
+ if preprocess_tmpdir:
155
+ shutil.rmtree(preprocess_tmpdir, ignore_errors=True)
156
+
157
+
158
+ def _process_pdf_direct(
159
+ input_path: str,
160
+ options: ConvertOptions,
161
+ debug_info: dict,
162
+ ) -> ConvertResult:
163
+ """
164
+ PDF๋ฅผ ํŽ˜์ด์ง€ ๋ถ„ํ•  ์—†์ด OMR ์—”์ง„์— ์ง์ ‘ ์ „๋‹ฌํ•˜๋Š” ๊ฒฝ๋กœ.
165
+
166
+ Clarity-OMR์ฒ˜๋Ÿผ PDF๋ฅผ ํ†ต์งธ๋กœ ๋ฐ›๋Š” ์—”์ง„์— ์‚ฌ์šฉ.
167
+ PDF โ†’ adapter.run(pdf) โ†’ MusicXML โ†’ NoteEvent โ†’ MML
168
+ """
169
+ warnings: list[str] = []
170
+ debug_info["input_type"] = "pdf_direct"
171
+
172
+ try:
173
+ adapter = _get_adapter_for_options(options)
174
+ except RuntimeError as e:
175
+ return ConvertResult(success=False, warnings=[f"OMR ์–ด๋Œ‘ํ„ฐ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}"], debug_info=debug_info)
176
+
177
+ notes, omr_warnings, omr_debug, _ = _run_omr_and_parse(input_path, adapter)
178
+ debug_info.update(omr_debug)
179
+ warnings.extend(omr_warnings)
180
+
181
+ if notes is None:
182
+ return ConvertResult(success=False, warnings=warnings, debug_info=debug_info)
183
+
184
+ return _build_result(notes, options, warnings, debug_info)
185
+
186
+
187
+ def _process_pdf(
188
+ input_path: str,
189
+ options: ConvertOptions,
190
+ debug_info: dict,
191
+ ) -> ConvertResult:
192
+ """
193
+ PDF ์ž…๋ ฅ ์ฒ˜๋ฆฌ.
194
+
195
+ PDF โ†’ ํŽ˜์ด์ง€๋ณ„ ์ด๋ฏธ์ง€ ๋ณ€ํ™˜ โ†’ ๊ฐ ํŽ˜์ด์ง€ OMR + ํŒŒ์‹ฑ โ†’ NoteEvent ๋ณ‘ํ•ฉ
196
+ """
197
+ from .pdf_converter import convert_pdf_to_images, PDFConvertError
198
+
199
+ warnings: list[str] = []
200
+ debug_info["input_type"] = "pdf"
201
+
202
+ # PDF โ†’ ํŽ˜์ด์ง€ ์ด๋ฏธ์ง€
203
+ tmp_dir = tempfile.mkdtemp(prefix="score_to_mml_pdf_")
204
+ try:
205
+ try:
206
+ page_image_paths = convert_pdf_to_images(
207
+ input_path, tmp_dir, dpi=options.pdf_dpi
208
+ )
209
+ except PDFConvertError as e:
210
+ return ConvertResult(
211
+ success=False,
212
+ warnings=[f"PDF ๋ณ€ํ™˜ ์‹คํŒจ: {e}"],
213
+ debug_info=debug_info,
214
+ )
215
+
216
+ debug_info["pdf_page_count"] = len(page_image_paths)
217
+
218
+ adapter = _get_adapter_for_options(options)
219
+ all_notes: list[NoteEvent] = []
220
+ processed_pages: list[int] = []
221
+ page_offset: float = 0.0
222
+
223
+ for page_num, page_path in enumerate(page_image_paths, start=1):
224
+ omr_page_input = page_path
225
+ page_pre_info: dict = {"applied": False, "reason": "mock mode" if options.mock_mode else "preprocess_enabled=False"}
226
+
227
+ if not options.mock_mode and options.preprocess_enabled:
228
+ stem = Path(page_path).stem
229
+ omr_page_input, page_pre_info, page_pre_warns = _run_preprocess_step(
230
+ page_path, tmp_dir, stem, options
231
+ )
232
+ for w in page_pre_warns:
233
+ warnings.append(f"[page {page_num}] {w}")
234
+
235
+ page_notes, page_warnings, page_debug, _ = _run_omr_and_parse(
236
+ omr_page_input, adapter
237
+ )
238
+ page_debug["preprocess_info"] = page_pre_info
239
+ debug_info[f"page_{page_num}"] = page_debug
240
+
241
+ for w in page_warnings:
242
+ warnings.append(f"[page {page_num}] {w}")
243
+
244
+ # ์ •์ฑ…: ์–ด๋–ค ํŽ˜์ด์ง€๋ผ๋„ OMR/ํŒŒ์‹ฑ์— ์‹คํŒจํ•˜๋ฉด ์ „์ฒด ์‹คํŒจ.
245
+ # ์ด์œ : ๋ถ€๋ถ„ ๊ฒฐ๊ณผ๋กœ ๋งŒ๋“ค์–ด์ง„ MML์€ ์›๊ณก๊ณผ ๋‹ค๋ฅผ ์ˆ˜ ์žˆ์–ด ์˜คํ•ด๋ฅผ ์œ ๋ฐœํ•จ.
246
+ if page_notes is None:
247
+ debug_info["processed_pages"] = processed_pages
248
+ debug_info["failed_page"] = page_num
249
+ debug_info["note_count"] = len(all_notes)
250
+ return ConvertResult(
251
+ success=False,
252
+ warnings=warnings + [
253
+ f"ํŽ˜์ด์ง€ {page_num}/{len(page_image_paths)} ์ฒ˜๋ฆฌ ์‹คํŒจ๋กœ ์ „์ฒด ์ค‘๋‹จ๋จ. "
254
+ f"(์‹คํŒจ ์ด์œ ๋Š” ์œ„ warnings ์ฐธ์กฐ)"
255
+ ],
256
+ debug_info=debug_info,
257
+ )
258
+
259
+ # ์‹œ๊ฐ„์ถ• ์ด์–ด๋ถ™์ด๊ธฐ
260
+ shifted = _shift_notes(page_notes, page_offset)
261
+ all_notes.extend(shifted)
262
+
263
+ if shifted:
264
+ page_offset = max(n.start + n.duration for n in shifted)
265
+ else:
266
+ page_offset += 4.0 # ์Œํ‘œ ์—†๋Š” ํŽ˜์ด์ง€: 1๋งˆ๋””(4๋ฐ•) ์—ฌ๋ฐฑ
267
+
268
+ processed_pages.append(page_num)
269
+
270
+ debug_info["processed_pages"] = processed_pages
271
+ debug_info["failed_page"] = None
272
+ debug_info["note_count"] = len(all_notes)
273
+
274
+ return _build_result(all_notes, options, warnings, debug_info)
275
+
276
+ finally:
277
+ # ์ž„์‹œ ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ •๋ฆฌ
278
+ shutil.rmtree(tmp_dir, ignore_errors=True)
279
+
280
+
281
+ def _run_preprocess_step(
282
+ input_path: str,
283
+ output_dir: str,
284
+ stem: str,
285
+ options: ConvertOptions,
286
+ debug_dir: str = "",
287
+ ) -> tuple:
288
+ """
289
+ OpenCV ์ „์ฒ˜๋ฆฌ๋ฅผ ์‹คํ–‰ํ•˜๊ณ  (omr_input_path, pre_info, warnings)๋ฅผ ๋ฐ˜ํ™˜.
290
+
291
+ ์ „์ฒ˜๋ฆฌ ์‹คํŒจ ์‹œ: ์›๋ณธ ๊ฒฝ๋กœ + ๊ฒฝ๊ณ  ๋ฐ˜ํ™˜ (pipeline ์ค‘๋‹จํ•˜์ง€ ์•Š์Œ).
292
+ """
293
+ out_path = str(Path(output_dir) / f"{stem}_pre.png")
294
+ try:
295
+ pre_info = apply_preprocessing(input_path, out_path, options, debug_dir=debug_dir)
296
+ return out_path, pre_info, []
297
+ except PreprocessError as e:
298
+ warn = f"์ „์ฒ˜๋ฆฌ ์‹คํŒจ, ์›๋ณธ ์ด๋ฏธ์ง€ ์‚ฌ์šฉ: {e}"
299
+ fallback_info = {"applied": False, "reason": str(e)}
300
+ return input_path, fallback_info, [warn]
301
+
302
+
303
+ def _run_omr_and_parse(
304
+ image_path: str,
305
+ adapter: OMRAdapter,
306
+ debug_dir: str = "",
307
+ ) -> tuple[Optional[list[NoteEvent]], list[str], dict, Optional[str]]:
308
+ """
309
+ ๋‹จ์ผ ์ด๋ฏธ์ง€์— ๋Œ€ํ•ด OMR ์‹คํ–‰ + NoteEvent ํŒŒ์‹ฑ.
310
+
311
+ Returns:
312
+ (notes, warnings, debug_info, raw_xml)
313
+ notes๊ฐ€ None์ด๋ฉด ์‹คํŒจ.
314
+ raw_xml: musicxml ํ˜•์‹์ผ ๋•Œ XML ๋ฌธ์ž์—ด, ์•„๋‹ˆ๋ฉด None.
315
+ """
316
+ warnings: list[str] = []
317
+ debug: dict = {}
318
+
319
+ try:
320
+ omr_result = adapter.run(image_path, debug_dir=debug_dir)
321
+ debug["omr_format"] = omr_result.get("format")
322
+ debug["omr_metadata"] = omr_result.get("metadata", {})
323
+ except (RuntimeError, NotImplementedError) as e:
324
+ return None, [f"OMR ์‹คํŒจ: {e}"], debug, None
325
+
326
+ raw_xml: Optional[str] = None
327
+ if omr_result.get("format") == "musicxml":
328
+ raw_xml = omr_result.get("data")
329
+
330
+ try:
331
+ notes, parse_meta = parse_omr_result(omr_result)
332
+ debug["note_count"] = len(notes)
333
+ debug["tempo"] = parse_meta.get("tempo", 0)
334
+ except ParseError as e:
335
+ return None, [f"ํŒŒ์‹ฑ ์‹คํŒจ: {e}"], debug, None
336
+
337
+ if not notes:
338
+ warnings.append("์Œํ‘œ๋ฅผ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ์ž…๋ ฅ ์ด๋ฏธ์ง€๋ฅผ ํ™•์ธํ•˜์„ธ์š”.")
339
+
340
+ return notes, warnings, debug, raw_xml
341
+
342
+
343
+ def _assign_chord_parts(notes: list[NoteEvent], max_parts: int = 3) -> list[NoteEvent]:
344
+ """
345
+ ๋™์‹œ์— ์‹œ์ž‘ํ•˜๋Š” ๋ชจ๋“  ์Œํ‘œ๋ฅผ ํ”ผ์น˜ ์ˆœ์œ„ ๊ธฐ๋ฐ˜์œผ๋กœ ํŒŒํŠธ์— ๋ฐฐ๋ถ„.
346
+
347
+ ๊ฐ™์€ ์‹œ์ž‘ ์‹œ๊ฐ„์˜ ์Œํ‘œ๋“ค์„ ํ”ผ์น˜ ๋‚ด๋ฆผ์ฐจ์ˆœ ์ •๋ ฌ ํ›„:
348
+ - 1์œ„(์ตœ๊ณ ์Œ) โ†’ part_hint=1
349
+ - 2์œ„(์ค‘๊ฐ„์Œ) โ†’ part_hint=2
350
+ - 3์œ„(์ตœ์ €์Œ) โ†’ part_hint=3
351
+ - 4์œ„ ์ดํ•˜ โ†’ ์ œ๊ฑฐ
352
+
353
+ staff/voice ๊ตฌ๋ถ„ ์—†์ด ์ „์ฒด ์‹œ๊ฐ„์ถ• ๊ธฐ์ค€์œผ๋กœ ์ฒ˜๋ฆฌํ•˜๋ฏ€๋กœ
354
+ ์™ผ์†/์˜ค๋ฅธ์† ๊ตฌ๋ถ„ ๋Œ€์‹  '๋™์‹œ์— ์šธ๋ฆฌ๋Š” ์Œ' ๊ธฐ์ค€์œผ๋กœ ํŒŒํŠธ๊ฐ€ ๋‚˜๋‰œ๋‹ค.
355
+ """
356
+ from dataclasses import replace as dc_replace
357
+
358
+ # start time 0.01๋ฐ• ๋‹จ์œ„๋กœ ๊ทธ๋ฃนํ™”
359
+ groups: dict[int, list[NoteEvent]] = {}
360
+ for n in notes:
361
+ key = round(n.start * 100)
362
+ groups.setdefault(key, []).append(n)
363
+
364
+ result: list[NoteEvent] = []
365
+ for group in groups.values():
366
+ rests = [n for n in group if n.pitch == 0]
367
+ pitches = sorted(
368
+ [n for n in group if n.pitch != 0],
369
+ key=lambda n: n.pitch,
370
+ reverse=True, # ๋†’์€ ์Œ ๋จผ์ €
371
+ )
372
+ for i, note in enumerate(pitches[:max_parts]):
373
+ result.append(dc_replace(note, part_hint=i + 1))
374
+ result.extend(rests)
375
+
376
+ return result
377
+
378
+
379
+ def _shift_notes(notes: list[NoteEvent], offset: float) -> list[NoteEvent]:
380
+ """
381
+ NoteEvent ๋ฆฌ์ŠคํŠธ์˜ start ์‹œ๊ฐ์— offset์„ ๋”ํ•œ ์ƒˆ ๋ฆฌ์ŠคํŠธ๋ฅผ ๋ฐ˜ํ™˜.
382
+ ์›๋ณธ ๊ฐ์ฒด๋Š” ๋ณ€๊ฒฝํ•˜์ง€ ์•Š๋Š”๋‹ค.
383
+ """
384
+ if offset == 0.0:
385
+ return list(notes)
386
+ from dataclasses import replace
387
+ return [replace(n, start=n.start + offset) for n in notes]
388
+
389
+
390
+ def _build_result(
391
+ notes: list[NoteEvent],
392
+ options: ConvertOptions,
393
+ warnings: list[str],
394
+ debug_info: dict,
395
+ ) -> ConvertResult:
396
+ """ํŒŒํŠธ ๋ถ„๋ฐฐ โ†’ MML ๋ณ€ํ™˜ โ†’ ConvertResult ์ƒ์„ฑ."""
397
+ notes = _assign_chord_parts(notes, max_parts=3)
398
+ parts = split_parts(notes, part_count=options.part_count)
399
+ debug_info["part_note_counts"] = [len(p) for p in parts]
400
+
401
+ tempo = debug_info.get("tempo", 0)
402
+ mml, part_mmls, mml_warnings = convert_parts_to_mml(
403
+ parts, prefer_sharps=options.prefer_sharps, tempo=tempo
404
+ )
405
+ warnings.extend(mml_warnings)
406
+
407
+ if options.strict_mode and warnings:
408
+ return ConvertResult(success=False, warnings=warnings, debug_info=debug_info)
409
+
410
+ while len(part_mmls) < 3:
411
+ part_mmls.append("MML@r1;")
412
+
413
+ return ConvertResult(
414
+ success=True,
415
+ mml=mml,
416
+ part1=part_mmls[0],
417
+ part2=part_mmls[1],
418
+ part3=part_mmls[2],
419
+ warnings=warnings,
420
+ debug_info=debug_info,
421
+ )
422
+
423
+
424
+ # ---------------------------------------------------------------------------
425
+ # ๋ฉ€ํ‹ฐ ์ด๋ฏธ์ง€ โ†’ MML ํŒŒ์ดํ”„๋ผ์ธ (mxl_mml_converter ๊ฒฝ๋กœ)
426
+ # ---------------------------------------------------------------------------
427
+
428
+ def convert_images_to_mml_parts(
429
+ image_paths: list[str],
430
+ options: Optional[ConvertOptions] = None,
431
+ progress_callback=None,
432
+ mxl_save_dir: str = "",
433
+ ) -> tuple[list[str], list[str]]:
434
+ """
435
+ ์—ฌ๋Ÿฌ ์ด๋ฏธ์ง€(PNG ๋“ฑ)๋ฅผ ์ˆœ์„œ๋Œ€๋กœ MML ํŒŒํŠธ ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜.
436
+
437
+ ๊ฐ ์ด๋ฏธ์ง€๋ฅผ Audiveris OMR๋กœ ์ฒ˜๋ฆฌ โ†’ MusicXML ์ˆ˜์ง‘
438
+ โ†’ mxl_mml_converter.convert_xml_pages_to_mml_parts()๋กœ ํ•ฉ๋ณธ ๋ณ€ํ™˜.
439
+
440
+ Args:
441
+ image_paths: ์ž…๋ ฅ ์ด๋ฏธ์ง€ ๊ฒฝ๋กœ ๋ชฉ๋ก (ํŽ˜์ด์ง€ ์ˆœ์„œ)
442
+ options: ๋ณ€ํ™˜ ์˜ต์…˜. None์ด๋ฉด audiveris/no-preprocess ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ.
443
+ progress_callback: (page_idx, total, message) ํ˜•ํƒœ์˜ ์ง„ํ–‰ ์ฝœ๋ฐฑ (์„ ํƒ)
444
+
445
+ Returns:
446
+ (mml_parts, warnings)
447
+ mml_parts: ["MML@...;", "MML@...;", ...]
448
+ """
449
+ from .mxl_mml_converter import convert_xml_pages_to_mml_parts
450
+
451
+ if options is None:
452
+ options = ConvertOptions(mock_mode=False, preprocess_enabled=False, engine="audiveris")
453
+
454
+ warnings: list[str] = []
455
+ xml_strings: list[str] = []
456
+ total = len(image_paths)
457
+
458
+ try:
459
+ adapter = _get_adapter_for_options(options)
460
+ except RuntimeError as e:
461
+ return ["MML@r1;"], [f"OMR ์–ด๋Œ‘ํ„ฐ ์ดˆ๊ธฐํ™” ์‹คํŒจ: {e}"]
462
+
463
+ pre_tmp = None
464
+ if not options.mock_mode and options.preprocess_enabled:
465
+ pre_tmp = tempfile.mkdtemp(prefix="score_to_mml_pre_")
466
+
467
+ try:
468
+ for i, image_path in enumerate(image_paths, start=1):
469
+ if progress_callback:
470
+ progress_callback(i, total, f"ํŽ˜์ด์ง€ {i}/{total} OMR ์ฒ˜๋ฆฌ ์ค‘: {Path(image_path).name}")
471
+
472
+ omr_input = image_path
473
+ if pre_tmp:
474
+ stem = Path(image_path).stem
475
+ try:
476
+ omr_input, _, pre_warns = _run_preprocess_step(image_path, pre_tmp, stem, options)
477
+ for w in pre_warns:
478
+ warnings.append(f"[ํŽ˜์ด์ง€ {i}] {w}")
479
+ except Exception as e:
480
+ warnings.append(f"[ํŽ˜์ด์ง€ {i}] ์ „์ฒ˜๋ฆฌ ์‹คํŒจ, ์›๋ณธ ์‚ฌ์šฉ: {e}")
481
+
482
+ try:
483
+ omr_result = adapter.run(omr_input)
484
+ except (RuntimeError, NotImplementedError) as e:
485
+ return ["MML@r1;"], warnings + [f"ํŽ˜์ด์ง€ {i} OMR ์‹คํŒจ: {e}"]
486
+
487
+ if omr_result.get("format") != "musicxml":
488
+ return ["MML@r1;"], warnings + [
489
+ f"ํŽ˜์ด์ง€ {i}: musicxml ํ˜•์‹์ด ์•„๋‹Œ ๊ฒฐ๊ณผ ({omr_result.get('format')})"
490
+ ]
491
+
492
+ xml_data = omr_result["data"]
493
+ xml_strings.append(xml_data)
494
+
495
+ if mxl_save_dir:
496
+ save_dir = Path(mxl_save_dir)
497
+ save_dir.mkdir(parents=True, exist_ok=True)
498
+ stem = Path(image_path).stem
499
+ mxl_path = save_dir / f"{stem}.mxl"
500
+ mxl_path.write_text(xml_data, encoding="utf-8")
501
+
502
+ if progress_callback:
503
+ progress_callback(total, total, "MML ๋ณ€ํ™˜ ์ค‘...")
504
+
505
+ mml_parts, conv_warnings = convert_xml_pages_to_mml_parts(xml_strings)
506
+ warnings.extend(conv_warnings)
507
+ return mml_parts, warnings
508
+
509
+ finally:
510
+ if pre_tmp:
511
+ shutil.rmtree(pre_tmp, ignore_errors=True)
512
+
513
+
514
+ # ---------------------------------------------------------------------------
515
+ # ๋ฉ€ํ‹ฐ ์—”์ง„ ๋น„๊ต ํŒŒ์ดํ”„๋ผ์ธ
516
+ # ---------------------------------------------------------------------------
517
+
518
+ def compare_omr_engines(
519
+ input_path: Union[str, list[str]],
520
+ engine_names: list,
521
+ options: Optional[ConvertOptions] = None,
522
+ save_dir: str = "",
523
+ ) -> ComparisonReport:
524
+ """
525
+ ๊ฐ™์€ ์ž…๋ ฅ ํŒŒ์ผ์„ ์—ฌ๋Ÿฌ OMR ์—”์ง„์œผ๋กœ ์ˆœ์ฐจ ์‹คํ–‰ํ•˜๊ณ  ๊ฒฐ๊ณผ๋ฅผ ๋น„๊ต ๋ฆฌํฌํŠธ๋กœ ๋ฐ˜ํ™˜.
526
+
527
+ Args:
528
+ input_path: ์ž…๋ ฅ ์•…๋ณด ํŒŒ์ผ ๊ฒฝ๋กœ (์ด๋ฏธ์ง€, PDF, ๋˜๋Š” ์—ฌ๋Ÿฌ ์ด๋ฏธ์ง€ ๊ฒฝ๋กœ list)
529
+ engine_names: ์‹คํ–‰ํ•  ์—”์ง„ ์ด๋ฆ„ ๋ชฉ๋ก (์˜ˆ: ["audiveris", "homr", "oemer"])
530
+ options: ๋ณ€ํ™˜ ์˜ต์…˜. None์ด๋ฉด ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ.
531
+ save_dir: ๊ฒฐ๊ณผ ์ €์žฅ ๋””๋ ‰ํ† ๋ฆฌ. ์ง€์ • ์‹œ ๊ฐ ์—”์ง„๋ณ„ ํ•˜์œ„ ํด๋”์— ์‚ฐ์ถœ๋ฌผ ์ €์žฅ.
532
+
533
+ Returns:
534
+ ComparisonReport: ์—”์ง„๋ณ„ EngineRunResult ํฌํ•จ ๋น„๊ต ๊ฒฐ๊ณผ
535
+
536
+ ์ค‘์š”:
537
+ comparison_summary์™€ heuristic_summary๋Š” ์ฐธ๊ณ  ์ง€ํ‘œ์ž…๋‹ˆ๋‹ค.
538
+ ์ตœ์ข… ํ’ˆ์งˆ ํŒ๋‹จ์€ ์‚ฌ์šฉ์ž๊ฐ€ ์ง์ ‘ ๊ฒฐ๊ณผ๋ฅผ ํ™•์ธํ•ด์•ผ ํ•ฉ๋‹ˆ๋‹ค.
539
+ """
540
+ if options is None:
541
+ options = ConvertOptions()
542
+
543
+ ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
544
+ input_label = " + ".join(input_path) if isinstance(input_path, list) else input_path
545
+ report = ComparisonReport(
546
+ input_file=input_label,
547
+ timestamp=ts,
548
+ user_review_priority=True,
549
+ )
550
+
551
+ # ์ €์žฅ ๋””๋ ‰ํ† ๋ฆฌ ์ค€๋น„
552
+ if save_dir:
553
+ Path(save_dir).mkdir(parents=True, exist_ok=True)
554
+
555
+ for engine_name in engine_names:
556
+ engine_save_dir = ""
557
+ if save_dir:
558
+ engine_save_dir = str(Path(save_dir) / engine_name)
559
+ Path(engine_save_dir).mkdir(parents=True, exist_ok=True)
560
+
561
+ run_result = _run_single_engine(
562
+ input_path=input_path,
563
+ engine_name=engine_name,
564
+ options=options,
565
+ save_dir=engine_save_dir,
566
+ )
567
+ report.runs.append(run_result)
568
+
569
+ # ๋น„๊ต ์š”์•ฝ ์ƒ์„ฑ
570
+ report.comparison_summary = _build_comparison_summary(report)
571
+ report.notes_for_manual_review = _collect_manual_review_notes(report)
572
+ report.suggested_engine = _suggest_engine(report) # ์ฐธ๊ณ ์šฉ๋งŒ
573
+
574
+ # ๋ฆฌํฌํŠธ ์ €์žฅ
575
+ if save_dir:
576
+ _save_comparison_report(save_dir, report)
577
+
578
+ return report
579
+
580
+
581
+ def _run_single_engine(
582
+ input_path: Union[str, list[str]],
583
+ engine_name: str,
584
+ options: ConvertOptions,
585
+ save_dir: str,
586
+ ) -> EngineRunResult:
587
+ """
588
+ ๋‹จ์ผ ์—”์ง„์œผ๋กœ ์ „์ฒด ํŒŒ์ดํ”„๋ผ์ธ์„ ์‹คํ–‰ํ•˜๊ณ  EngineRunResult๋ฅผ ๋ฐ˜ํ™˜.
589
+
590
+ input_path๊ฐ€ list[str]์ด๋ฉด ์—ฌ๋Ÿฌ ์ด๋ฏธ์ง€๋ฅผ ์ˆœ์„œ๋Œ€๋กœ ์ด์–ด๋ถ™์ž„.
591
+ ์‹คํŒจ ์‹œ์—๋„ ๊ตฌ์กฐํ™”๋œ EngineRunResult๋ฅผ ๋ฐ˜ํ™˜ํ•œ๋‹ค (์˜ˆ์™ธ๋ฅผ ๋ฐ–์œผ๋กœ ์ „ํŒŒํ•˜์ง€ ์•Š์Œ).
592
+ """
593
+ result = EngineRunResult(engine_name=engine_name, success=False)
594
+
595
+ # ์–ด๋Œ‘ํ„ฐ ์ดˆ๊ธฐํ™”
596
+ try:
597
+ adapter = get_engine_adapter(engine_name)
598
+ except RuntimeError as e:
599
+ result.stage = "init"
600
+ result.error_message = str(e)
601
+ return result
602
+
603
+ # ์ „์ฒ˜๋ฆฌ + OMR + ํŒŒ์‹ฑ + MML ๋ณ€ํ™˜
604
+ warnings: list[str] = []
605
+ notes: Optional[list[NoteEvent]] = None
606
+
607
+ is_mock = (engine_name == "mock")
608
+
609
+ preprocess_tmpdir = None
610
+ try:
611
+ # ์ž…๋ ฅ ํƒ€์ž…๋ณ„ image_paths ๊ฒฐ์ •
612
+ if isinstance(input_path, list):
613
+ image_paths = input_path
614
+ debug_info: dict = {"input_path": image_paths, "engine": engine_name, "input_type": "multi_image"}
615
+ elif Path(input_path).suffix.lower() == ".pdf":
616
+ debug_info = {"input_path": input_path, "engine": engine_name}
617
+ image_paths, pdf_warn = _convert_pdf_for_engine(input_path, options)
618
+ warnings.extend(pdf_warn)
619
+ debug_info["input_type"] = "pdf"
620
+ debug_info["pdf_page_count"] = len(image_paths)
621
+ if not image_paths:
622
+ result.stage = "preprocess"
623
+ result.error_message = "PDF๋ฅผ ์ด๋ฏธ์ง€๋กœ ๋ณ€ํ™˜ํ•˜์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค."
624
+ result.warnings = warnings
625
+ return result
626
+ # ํŽ˜์ด์ง€ ํ•„ํ„ฐ ์ ์šฉ (pdf_pages๊ฐ€ ์ง€์ •๋œ ๊ฒฝ์šฐ)
627
+ if options.pdf_pages:
628
+ total = len(image_paths)
629
+ page_specs = options.pdf_pages
630
+ # ์Œ์ˆ˜๊ฐ’์€ "ํ•ด๋‹น ํŽ˜์ด์ง€ ์ œ์™ธ" ์˜๋ฏธ (--skip-pages only)
631
+ if all(p < 0 for p in page_specs):
632
+ skip = {-p for p in page_specs}
633
+ filtered = [img for i, img in enumerate(image_paths, 1) if i not in skip]
634
+ else:
635
+ filtered = []
636
+ for p in page_specs:
637
+ if 1 <= p <= total:
638
+ filtered.append(image_paths[p - 1])
639
+ else:
640
+ warnings.append(f"ํŽ˜์ด์ง€ {p}๋Š” ๋ฒ”์œ„๋ฅผ ๋ฒ—์–ด๋‚ฉ๋‹ˆ๋‹ค (์ „์ฒด {total}ํŽ˜์ด์ง€). ๊ฑด๋„ˆ๋œ€.")
641
+ if not filtered:
642
+ result.stage = "preprocess"
643
+ result.error_message = f"์ง€์ •ํ•œ ํŽ˜์ด์ง€๊ฐ€ ๋ชจ๋‘ ๋ฒ”์œ„๋ฅผ ๋ฒ—์–ด๋‚ฉ๋‹ˆ๋‹ค (์ „์ฒด {total}ํŽ˜์ด์ง€)."
644
+ result.warnings = warnings
645
+ return result
646
+ debug_info["pdf_selected_pages"] = options.pdf_pages
647
+ image_paths = filtered
648
+ else:
649
+ image_paths = [input_path]
650
+ debug_info = {"input_path": input_path, "engine": engine_name, "input_type": "image"}
651
+
652
+ all_notes: list[NoteEvent] = []
653
+ all_xml_strings: list[str] = [] # musicxml ํ˜•์‹์ผ ๋•Œ ํŽ˜์ด์ง€๋ณ„ XML ์ˆ˜์ง‘
654
+ page_offset = 0.0
655
+
656
+ for page_idx, img_path in enumerate(image_paths):
657
+ page_num = page_idx + 1
658
+ omr_input = img_path
659
+
660
+ # ์ „์ฒ˜๋ฆฌ (mock ์•„๋‹Œ ๊ฒฝ์šฐ + enabled)
661
+ if not is_mock and options.preprocess_enabled:
662
+ preprocess_tmpdir = preprocess_tmpdir or tempfile.mkdtemp(prefix="score_to_mml_cmp_pre_")
663
+ stem = Path(img_path).stem
664
+ omr_input, pre_info, pre_warns = _run_preprocess_step(
665
+ img_path, preprocess_tmpdir, stem, options,
666
+ debug_dir=save_dir,
667
+ )
668
+ for w in pre_warns:
669
+ warnings.append(f"[page {page_num}] {w}")
670
+ debug_info[f"page_{page_num}_preprocess"] = pre_info
671
+ else:
672
+ debug_info[f"page_{page_num}_preprocess"] = {
673
+ "applied": False,
674
+ "reason": "mock mode" if is_mock else "preprocess_enabled=False",
675
+ }
676
+
677
+ # OMR + ํŒŒ์‹ฑ
678
+ page_notes, omr_warns, omr_debug, raw_xml = _run_omr_and_parse(
679
+ omr_input, adapter, debug_dir=save_dir
680
+ )
681
+ for w in omr_warns:
682
+ warnings.append(f"[page {page_num}] {w}")
683
+ debug_info[f"page_{page_num}_omr"] = omr_debug
684
+
685
+ if page_notes is None:
686
+ result.stage = "omr"
687
+ result.error_message = f"ํŽ˜์ด์ง€ {page_num} OMR/ํŒŒ์‹ฑ ์‹คํŒจ: " + "; ".join(omr_warns)
688
+ result.warnings = warnings
689
+ result.debug_info = debug_info
690
+ return result
691
+
692
+ if raw_xml:
693
+ all_xml_strings.append(raw_xml)
694
+
695
+ shifted = _shift_notes(page_notes, page_offset)
696
+ all_notes.extend(shifted)
697
+ if shifted:
698
+ page_offset = max(n.start + n.duration for n in shifted)
699
+ else:
700
+ page_offset += 4.0
701
+
702
+ notes = all_notes
703
+ debug_info["note_count"] = len(notes)
704
+
705
+ # MML ๋ณ€ํ™˜
706
+ # musicxml ํ˜•์‹์ด๋ฉด tick ๊ธฐ๋ฐ˜ ์ •์ˆ˜ ๋ณ€ํ™˜๊ธฐ ์‚ฌ์šฉ, ์•„๋‹ˆ๋ฉด ๊ธฐ์กด float ๋ณ€ํ™˜๊ธฐ fallback
707
+ try:
708
+ if all_xml_strings and len(all_xml_strings) == len(image_paths):
709
+ from .mxl_mml_converter import convert_xml_pages_to_mml_parts
710
+ mml_list, mml_warns = convert_xml_pages_to_mml_parts(all_xml_strings)
711
+ warnings.extend(mml_warns)
712
+ # part_note_counts๋Š” NoteEvent ๊ธฐ์ค€์œผ๋กœ ๊ทผ์‚ฌ ํ‘œ์‹œ
713
+ parts_approx = split_parts(notes, part_count=len(mml_list))
714
+ debug_info["part_note_counts"] = [len(p) for p in parts_approx]
715
+ debug_info["mml_converter"] = "mxl_mml_converter (tick-based)"
716
+ else:
717
+ parts = split_parts(notes, part_count=options.part_count)
718
+ _combined_mml, mml_list, mml_warns = convert_parts_to_mml(parts, prefer_sharps=options.prefer_sharps)
719
+ warnings.extend(mml_warns)
720
+ debug_info["part_note_counts"] = [len(p) for p in parts]
721
+ debug_info["mml_converter"] = "mml_converter (float-based, fallback)"
722
+ while len(mml_list) < 3:
723
+ mml_list.append("MML@r1;")
724
+ except Exception as e:
725
+ result.stage = "convert"
726
+ result.error_message = f"MML ๋ณ€ํ™˜ ์‹คํŒจ: {e}"
727
+ result.warnings = warnings
728
+ result.debug_info = debug_info
729
+ return result
730
+
731
+ # EngineRunResult ์ฑ„์šฐ๊ธฐ
732
+ notes_dump = _build_notes_dump(notes)
733
+ chord_count = _count_chords(notes)
734
+ heuristic = _compute_heuristics(notes, mml_list, warnings, chord_count)
735
+
736
+ result.success = True
737
+ result.warnings = warnings
738
+ result.warning_count = len(warnings)
739
+ result.note_count = len(notes)
740
+ result.chord_count = chord_count
741
+ result.part_note_counts = [len(p) for p in parts]
742
+ result.mml_parts = mml_list
743
+ result.notes_dump = notes_dump
744
+ result.debug_info = debug_info
745
+ result.heuristic_summary = heuristic
746
+
747
+ # ์‚ฐ์ถœ๋ฌผ ์ €์žฅ
748
+ if save_dir:
749
+ _save_engine_artifacts(save_dir, result, notes, mml_list)
750
+
751
+ return result
752
+
753
+ except Exception as e:
754
+ result.stage = result.stage or "unknown"
755
+ result.error_message = f"์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์˜ค๋ฅ˜: {type(e).__name__}: {e}"
756
+ result.warnings = warnings
757
+ result.debug_info = debug_info
758
+ return result
759
+
760
+ finally:
761
+ if preprocess_tmpdir:
762
+ shutil.rmtree(preprocess_tmpdir, ignore_errors=True)
763
+
764
+
765
+ def _convert_pdf_for_engine(
766
+ input_path: str,
767
+ options: ConvertOptions,
768
+ ) -> tuple:
769
+ """PDF โ†’ ์ด๋ฏธ์ง€ ๋ฆฌ์ŠคํŠธ ๋ณ€ํ™˜. ์ž„์‹œ ๋””๋ ‰ํ† ๋ฆฌ๋Š” caller๊ฐ€ ๊ด€๋ฆฌ."""
770
+ from .pdf_converter import convert_pdf_to_images, PDFConvertError
771
+ try:
772
+ tmp_dir = tempfile.mkdtemp(prefix="score_to_mml_cmp_pdf_")
773
+ pages = convert_pdf_to_images(input_path, tmp_dir, dpi=options.pdf_dpi)
774
+ return pages, []
775
+ except PDFConvertError as e:
776
+ return [], [f"PDF ๋ณ€ํ™˜ ์‹คํŒจ: {e}"]
777
+
778
+
779
+ # ---------------------------------------------------------------------------
780
+ # Note dump / ํœด๋ฆฌ์Šคํ‹ฑ ํ—ฌํผ
781
+ # ---------------------------------------------------------------------------
782
+
783
+ _NOTE_NAMES_SHARP = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
784
+ _NOTE_NAMES_FLAT = ["C", "Db", "D", "Eb", "E", "F", "Gb", "G", "Ab", "A", "Bb", "B"]
785
+
786
+
787
+ def _midi_to_pitch_name(pitch: int, prefer_sharps: bool = True) -> str:
788
+ """MIDI ๋ฒˆํ˜ธ๋ฅผ C4 ํ˜•์‹ ์Œ์ด๋ฆ„์œผ๋กœ ๋ณ€ํ™˜."""
789
+ if pitch <= 0:
790
+ return "rest"
791
+ octave = (pitch // 12) - 1
792
+ names = _NOTE_NAMES_SHARP if prefer_sharps else _NOTE_NAMES_FLAT
793
+ return f"{names[pitch % 12]}{octave}"
794
+
795
+
796
+ def _build_notes_dump(notes: list[NoteEvent], prefer_sharps: bool = True) -> list[dict]:
797
+ """
798
+ NoteEvent ๋ฆฌ์ŠคํŠธ๋ฅผ ์‚ฌ๋žŒ์ด ๊ฒ€ํ† ํ•˜๊ธฐ ์ข‹์€ dict ๋ฆฌ์ŠคํŠธ๋กœ ๋ณ€ํ™˜.
799
+
800
+ ๊ฐ dict์—๋Š” start, duration, pitch, pitch_name, is_rest,
801
+ staff, voice, part_hint, is_chord_note ๊ฐ€ ํฌํ•จ๋œ๋‹ค.
802
+ """
803
+ if not notes:
804
+ return []
805
+
806
+ # ๋™์‹œ ๋ฐœ์Œ ๋…ธํŠธ ๊ฐ์ง€: ๊ฐ™์€ start ์œ„์น˜์— ์—ฌ๋Ÿฌ ๋…ธํŠธ๊ฐ€ ์žˆ์œผ๋ฉด chord_note
807
+ from collections import defaultdict
808
+ start_groups: dict[float, list[int]] = defaultdict(list)
809
+ for i, n in enumerate(notes):
810
+ start_groups[round(n.start, 6)].append(i)
811
+ chord_indices = {i for idxs in start_groups.values() if len(idxs) > 1 for i in idxs}
812
+
813
+ dump = []
814
+ for i, n in enumerate(notes):
815
+ dump.append({
816
+ "index": i + 1,
817
+ "start": n.start,
818
+ "duration": n.duration,
819
+ "pitch": n.pitch,
820
+ "pitch_name": _midi_to_pitch_name(n.pitch, prefer_sharps),
821
+ "is_rest": (n.pitch == 0),
822
+ "is_chord_note": (i in chord_indices),
823
+ "staff": n.staff,
824
+ "voice": n.voice,
825
+ "part_hint": n.part_hint,
826
+ })
827
+ return dump
828
+
829
+
830
+ def _count_chords(notes: list[NoteEvent]) -> int:
831
+ """๋™์‹œ ๋ฐœ์Œ ๋…ธํŠธ ๊ทธ๋ฃน(ํ™”์Œ) ์ˆ˜๋ฅผ ๋ฐ˜ํ™˜."""
832
+ from collections import defaultdict
833
+ start_groups: dict[float, list] = defaultdict(list)
834
+ for n in notes:
835
+ start_groups[round(n.start, 6)].append(n)
836
+ return sum(1 for g in start_groups.values() if len(g) > 1)
837
+
838
+
839
+ def _compute_heuristics(
840
+ notes: list[NoteEvent],
841
+ mml_list: list[str],
842
+ warnings: list[str],
843
+ chord_count: int,
844
+ ) -> dict:
845
+ """
846
+ ๊ฐ€๋ฒผ์šด ์ฐธ๊ณ  ์ง€ํ‘œ๋ฅผ ๊ณ„์‚ฐ.
847
+
848
+ ์ค‘์š”: ์ด ์ง€ํ‘œ๋Š” ์ตœ์ข… ํ’ˆ์งˆ ํŒ์ •์— ์‚ฌ์šฉํ•˜์ง€ ์•Š๋Š”๋‹ค.
849
+ ์‚ฌ์šฉ์ž์˜ ์ง์ ‘ ์ฒญ์ทจ/ํ™•์ธ์ด ์ตœ์šฐ์„ ์ด๋‹ค.
850
+ """
851
+ if not notes:
852
+ return {
853
+ "note_count": 0,
854
+ "chord_count": 0,
855
+ "mml_empty": True,
856
+ "warning_count": len(warnings),
857
+ "note": "์ง€ํ‘œ๋Š” ์ฐธ๊ณ ์šฉ. ์ตœ์ข… ํŒ์ •์€ ์‚ฌ์šฉ์ž ์ง์ ‘ ํ™•์ธ.",
858
+ }
859
+
860
+ pitches = [n.pitch for n in notes if n.pitch > 0]
861
+ jumps = 0
862
+ for i in range(1, len(pitches)):
863
+ if abs(pitches[i] - pitches[i - 1]) > 12:
864
+ jumps += 1
865
+
866
+ mml_empty = all(m.strip() in ("MML@;", "") for m in mml_list)
867
+
868
+ return {
869
+ "note_count": len(notes),
870
+ "chord_count": chord_count,
871
+ "rest_count": sum(1 for n in notes if n.pitch == 0),
872
+ "pitch_jump_over_octave": jumps,
873
+ "warning_count": len(warnings),
874
+ "mml_empty": mml_empty,
875
+ "note": "์ง€ํ‘œ๋Š” ์ฐธ๊ณ ์šฉ. ์ตœ์ข… ํŒ์ •์€ ์‚ฌ์šฉ์ž ์ง์ ‘ ํ™•์ธ.",
876
+ }
877
+
878
+
879
+ # ---------------------------------------------------------------------------
880
+ # ์‚ฐ์ถœ๋ฌผ ์ €์žฅ ํ—ฌํผ
881
+ # ---------------------------------------------------------------------------
882
+
883
+ def _save_engine_artifacts(
884
+ engine_save_dir: str,
885
+ result: EngineRunResult,
886
+ notes: list[NoteEvent],
887
+ mml_list: list[str],
888
+ ) -> None:
889
+ """
890
+ ์—”์ง„๋ณ„ ์‚ฐ์ถœ๋ฌผ์„ engine_save_dir์— ์ €์žฅ.
891
+
892
+ ์ €์žฅ ํŒŒ์ผ:
893
+ result.txt - MML ํŒŒํŠธ ํ…์ŠคํŠธ
894
+ notes.json - ๋…ธํŠธ ๋คํ”„ (machine-readable)
895
+ notes.txt - ๋…ธํŠธ ๋คํ”„ (human-readable)
896
+ debug.json - debug_info + heuristic_summary
897
+ """
898
+ d = Path(engine_save_dir)
899
+
900
+ # result.txt
901
+ try:
902
+ lines = []
903
+ for i, mml in enumerate(mml_list, start=1):
904
+ lines.append(f"Part {i}")
905
+ lines.append(mml if mml else "MML@;")
906
+ lines.append("")
907
+ if result.warnings:
908
+ lines.append("--- Warnings ---")
909
+ for w in result.warnings:
910
+ lines.append(f" [WARN] {w}")
911
+ mml_path = d / "result.txt"
912
+ mml_path.write_text("\n".join(lines).strip(), encoding="utf-8")
913
+ result.output_mml_path = str(mml_path)
914
+ except Exception:
915
+ pass
916
+
917
+ # notes.json
918
+ try:
919
+ notes_dump = result.notes_dump or _build_notes_dump(notes)
920
+ notes_json_path = d / "notes.json"
921
+ notes_json_path.write_text(
922
+ json.dumps(notes_dump, ensure_ascii=False, indent=2),
923
+ encoding="utf-8",
924
+ )
925
+ result.output_notes_json_path = str(notes_json_path)
926
+ except Exception:
927
+ pass
928
+
929
+ # notes.txt (human-readable)
930
+ try:
931
+ notes_txt_path = d / "notes.txt"
932
+ lines = [
933
+ "# Note Dump (์‚ฌ๋žŒ์ด ์ฝ๊ธฐ ์‰ฌ์šด ํ˜•์‹)",
934
+ "# ์ฃผ์˜: ์ž๋™ ์ง€ํ‘œ๋Š” ์ฐธ๊ณ ์šฉ. ์ตœ์ข… ํ’ˆ์งˆ์€ ์‚ฌ์šฉ์ž๊ฐ€ ์ง์ ‘ ํ™•์ธํ•˜์„ธ์š”.",
935
+ f"# ์—”์ง„: {result.engine_name}",
936
+ f"# ์ด ๋…ธํŠธ ์ˆ˜: {result.note_count} ํ™”์Œ ๊ทธ๋ฃน: {result.chord_count}",
937
+ "#",
938
+ "# idx start dur pitch name rest chord staff voice",
939
+ "#" + "-" * 65,
940
+ ]
941
+ for n in result.notes_dump:
942
+ chord_mark = "C" if n.get("is_chord_note") else " "
943
+ rest_mark = "R" if n.get("is_rest") else " "
944
+ lines.append(
945
+ f" {n['index']:04d} "
946
+ f"{n['start']:7.3f} "
947
+ f"{n['duration']:5.3f} "
948
+ f"{n['pitch']:5d} "
949
+ f"{n['pitch_name']:<7s} "
950
+ f"{rest_mark} "
951
+ f"{chord_mark} "
952
+ f"{n['staff']} "
953
+ f"{n['voice']}"
954
+ )
955
+ notes_txt_path.write_text("\n".join(lines), encoding="utf-8")
956
+ result.output_notes_txt_path = str(notes_txt_path)
957
+ except Exception:
958
+ pass
959
+
960
+ # debug.json
961
+ try:
962
+ debug_data = {
963
+ "engine_name": result.engine_name,
964
+ "success": result.success,
965
+ "stage": result.stage,
966
+ "error_message": result.error_message,
967
+ "warnings": result.warnings,
968
+ "heuristic_summary": result.heuristic_summary,
969
+ "debug_info": result.debug_info,
970
+ }
971
+ debug_path = d / "debug.json"
972
+ debug_path.write_text(
973
+ json.dumps(debug_data, ensure_ascii=False, indent=2, default=str),
974
+ encoding="utf-8",
975
+ )
976
+ result.output_debug_path = str(debug_path)
977
+ except Exception:
978
+ pass
979
+
980
+
981
+ def _build_comparison_summary(report: ComparisonReport) -> str:
982
+ """์‚ฌ๋žŒ์ด ์ฝ๊ธฐ ์‰ฌ์šด ๋น„๊ต ์š”์•ฝ ํ…์ŠคํŠธ๋ฅผ ์ƒ์„ฑ."""
983
+ lines = [
984
+ "=" * 60,
985
+ "OMR ์—”์ง„ ๋น„๊ต ๊ฒฐ๊ณผ",
986
+ "=" * 60,
987
+ f"์ž…๋ ฅ ํŒŒ์ผ: {report.input_file}",
988
+ f"์‹คํ–‰ ์‹œ๊ฐ„: {report.timestamp}",
989
+ "",
990
+ "[์ค‘์š”] ์•„๋ž˜ ์ง€ํ‘œ๋Š” ์ฐธ๊ณ ์šฉ์ž…๋‹ˆ๋‹ค.",
991
+ " ์ตœ์ข… ํ’ˆ์งˆ ํ‰๊ฐ€๋Š” ์‚ฌ์šฉ์ž๊ฐ€ ์ง์ ‘ ๊ฒฐ๊ณผ๋ฅผ ํ™•์ธํ•˜์„ธ์š”.",
992
+ "",
993
+ "์—”์ง„๋ณ„ ๊ฒฐ๊ณผ:",
994
+ ]
995
+
996
+ for run in report.runs:
997
+ if run.success:
998
+ status = "์„ฑ๊ณต"
999
+ detail = f"๋…ธํŠธ {run.note_count}๊ฐœ ํ™”์Œ {run.chord_count}๊ฐœ ๊ฒฝ๊ณ  {run.warning_count}๊ฐœ"
1000
+ else:
1001
+ status = "์‹คํŒจ"
1002
+ detail = f"๋‹จ๊ณ„: {run.stage} ์˜ค๋ฅ˜: {run.error_message[:80]}"
1003
+ lines.append(f" {run.engine_name:<12s}: {status} {detail}")
1004
+
1005
+ if report.suggested_engine:
1006
+ lines.append("")
1007
+ lines.append(f"[์ž๋™ ์ฐธ๊ณ  ์ถ”์ฒœ] {report.suggested_engine}")
1008
+ lines.append(" * ์ด ์ถ”์ฒœ์€ ๋…ธํŠธ ์ˆ˜ ๊ธฐ์ค€์ด๋ฉฐ ์ตœ์ข… ํŒ์ •์ด ์•„๋‹™๋‹ˆ๋‹ค.")
1009
+ lines.append(" ๋ฐ˜๋“œ์‹œ ๊ฐ ์—”์ง„ result.txt์™€ notes.txt๋ฅผ ์ง์ ‘ ํ™•์ธํ•˜์„ธ์š”.")
1010
+
1011
+ lines.append("=" * 60)
1012
+ return "\n".join(lines)
1013
+
1014
+
1015
+ def _collect_manual_review_notes(report: ComparisonReport) -> list:
1016
+ """์ˆ˜๋™ ๊ฒ€ํ†  ์‹œ ์ฐธ๊ณ ํ•  ์‚ฌํ•ญ์„ ์ˆ˜์ง‘."""
1017
+ notes = [
1018
+ "์•„๋ž˜ ํ•ญ๋ชฉ์„ ์ง์ ‘ ํ™•์ธํ•˜์„ธ์š”:",
1019
+ " 1. ๊ฐ ์—”์ง„์˜ result.txt (MML)๋ฅผ ์žฌ์ƒํ•ด์„œ ์Œ์•…์  ํ’ˆ์งˆ์„ ๋“ค์–ด๋ณด์„ธ์š”.",
1020
+ " 2. notes.txt์—์„œ ์ด์ƒํ•œ ํ”ผ์น˜(๊ทน๋‹จ์  ์ ํ”„, ์˜ˆ์ƒ์น˜ ๋ชปํ•œ ์Œ)๋ฅผ ํ™•์ธํ•˜์„ธ์š”.",
1021
+ " 3. ํ™”์Œ ๋…ธํŠธ๊ฐ€ ์˜๋„ํ•œ ๋Œ€๋กœ ์ถ”์ถœ๋˜์—ˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”.",
1022
+ " 4. ์‰ผํ‘œ ์œ„์น˜์™€ ๊ธธ์ด๊ฐ€ ๋งž๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”.",
1023
+ ]
1024
+ for run in report.runs:
1025
+ if not run.success:
1026
+ notes.append(f" [์ฃผ์˜] {run.engine_name}: ์‹คํŒจ ({run.stage} ๋‹จ๊ณ„) โ€” {run.error_message[:60]}")
1027
+ elif run.heuristic_summary.get("pitch_jump_over_octave", 0) > 5:
1028
+ cnt = run.heuristic_summary["pitch_jump_over_octave"]
1029
+ notes.append(f" [์ฃผ์˜] {run.engine_name}: 1์˜ฅํƒ€๋ธŒ ์ดˆ๊ณผ ํ”ผ์น˜ ์ ํ”„ {cnt}ํšŒ (์Œ์ดํƒˆ ์˜์‹ฌ)")
1030
+ return notes
1031
+
1032
+
1033
+ def _suggest_engine(report: ComparisonReport) -> str:
1034
+ """
1035
+ ๋…ธํŠธ ์ˆ˜ ๊ธฐ์ค€ ๋‹จ์ˆœ ์ฐธ๊ณ  ์ถ”์ฒœ. ์ตœ์ข… ํŒ์ •์ด ์•„๋‹˜.
1036
+ ์‹คํŒจํ•œ ์—”์ง„์€ ์ œ์™ธ.
1037
+ """
1038
+ successful = [r for r in report.runs if r.success and r.note_count > 0]
1039
+ if not successful:
1040
+ return ""
1041
+ best = max(successful, key=lambda r: r.note_count)
1042
+ return best.engine_name
1043
+
1044
+
1045
+ def _save_comparison_report(save_dir: str, report: ComparisonReport) -> None:
1046
+ """comparison_report.json ๊ณผ comparison_summary.txt ๋ฅผ save_dir์— ์ €์žฅ."""
1047
+ d = Path(save_dir)
1048
+
1049
+ # comparison_report.json
1050
+ try:
1051
+ from dataclasses import asdict
1052
+ report_dict = asdict(report)
1053
+ (d / "comparison_report.json").write_text(
1054
+ json.dumps(report_dict, ensure_ascii=False, indent=2, default=str),
1055
+ encoding="utf-8",
1056
+ )
1057
+ except Exception:
1058
+ pass
1059
+
1060
+ # comparison_summary.txt
1061
+ try:
1062
+ (d / "comparison_summary.txt").write_text(
1063
+ report.comparison_summary, encoding="utf-8"
1064
+ )
1065
+ except Exception:
1066
+ pass
1067
+
1068
+
1069
+ # ---------------------------------------------------------------------------
1070
+ # ๋””๋ฒ„๊ทธ ์ €์žฅ ํ—ฌํผ
1071
+ # ---------------------------------------------------------------------------
1072
+
1073
+ def _prepare_run_debug_dir(input_path: str, base_debug_dir: str) -> str:
1074
+ """
1075
+ base_debug_dir ์•„๋ž˜์— ์‹คํ–‰๋ณ„ ํ•˜์œ„ ํด๋”๋ฅผ ์ƒ์„ฑํ•˜๊ณ  ๊ฒฝ๋กœ๋ฅผ ๋ฐ˜ํ™˜.
1076
+ base_debug_dir๊ฐ€ ๋น„์–ด์žˆ์œผ๋ฉด ๋นˆ ๋ฌธ์ž์—ด ๋ฐ˜ํ™˜.
1077
+ ํด๋”๋ช…: {input_stem}_{YYYYMMDD_HHMMSS}
1078
+ """
1079
+ if not base_debug_dir:
1080
+ return ""
1081
+ stem = Path(input_path).stem
1082
+ ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
1083
+ run_dir = Path(base_debug_dir) / f"{stem}_{ts}"
1084
+ run_dir.mkdir(parents=True, exist_ok=True)
1085
+ return str(run_dir)
1086
+
1087
+
1088
+ def _save_debug_log(
1089
+ run_debug_dir: str,
1090
+ debug_info: dict,
1091
+ warnings: list,
1092
+ result: Optional[ConvertResult],
1093
+ ) -> None:
1094
+ """debug_dir์— debug_log.txt์™€ result.txt๋ฅผ ์ €์žฅ."""
1095
+ if not run_debug_dir:
1096
+ return
1097
+ d = Path(run_debug_dir)
1098
+
1099
+ # debug_log.txt
1100
+ try:
1101
+ log_lines = ["=== debug_info ==="]
1102
+ for k, v in debug_info.items():
1103
+ log_lines.append(f"{k}: {json.dumps(v, ensure_ascii=False, default=str)}")
1104
+ if warnings:
1105
+ log_lines.append("\n=== warnings ===")
1106
+ for w in warnings:
1107
+ log_lines.append(f" [WARN] {w}")
1108
+ (d / "debug_log.txt").write_text("\n".join(log_lines), encoding="utf-8")
1109
+ except Exception:
1110
+ pass
1111
+
1112
+ # result.txt (MML ๊ฒฐ๊ณผ)
1113
+ if result is not None:
1114
+ try:
1115
+ (d / "result.txt").write_text(result.format_output(), encoding="utf-8")
1116
+ except Exception:
1117
+ pass
core/preprocess.py ADDED
@@ -0,0 +1,269 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/preprocess.py
3
+
4
+ ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ ๊ณ„์ธต.
5
+
6
+ ์—ญํ• :
7
+ - ํŒŒ์ผ ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ (๊ฒฝ๋กœ, ํ™•์žฅ์ž, ์ฝ๊ธฐ ๊ฐ€๋Šฅ ์—ฌ๋ถ€)
8
+ - OpenCV ๊ธฐ๋ฐ˜ ์ด๋ฏธ์ง€ ํ’ˆ์งˆ ๊ฐœ์„  (Audiveris OMR ์ •ํ™•๋„ ํ–ฅ์ƒ ๋ชฉ์ )
9
+
10
+ ์ „์ฒ˜๋ฆฌ ํŒŒ์ดํ”„๋ผ์ธ (apply_preprocessing):
11
+ 1. ์ด๋ฏธ์ง€ ๋กœ๋“œ ๋ฐ ๊ฒ€์ฆ (cv2.imread)
12
+ 2. Grayscale ๋ณ€ํ™˜
13
+ 3. ๊ฐ€๋ฒผ์šด ๋…ธ์ด์ฆˆ ์ œ๊ฑฐ (GaussianBlur 3x3)
14
+ 4. ์ด์ง„ํ™” (Otsu ๋˜๋Š” Adaptive Threshold)
15
+ 5. [์„ ํƒ์ ] Deskew โ€” ๊ธฐ์šธ๊ธฐ ๋ณด์ • (์‹คํ—˜์ , deskew_enabled=True ์‹œ)
16
+
17
+ ์ฃผ์˜:
18
+ - apply_preprocessing์€ opencv-python ํ•„์š” (pip install opencv-python)
19
+ - opencv ๋ฏธ์„ค์น˜ ์‹œ PreprocessError ๋ฐœ์ƒ โ†’ pipeline์—์„œ fallback ์ฒ˜๋ฆฌ
20
+ - mock ๋ชจ๋“œ์—์„œ๋Š” pipeline์ด ์ด ํ•จ์ˆ˜๋ฅผ ํ˜ธ์ถœํ•˜์ง€ ์•Š์Œ
21
+
22
+ ํ•œ๊ณ„:
23
+ - deskew๋Š” ยฑ10๋„ ๋ฒ”์œ„์—์„œ๋งŒ ๋™์ž‘. ๊ณผ๋„ํ•˜๊ฒŒ ๊ธฐ์šธ์–ด์ง„ ์ด๋ฏธ์ง€๋Š” ๋ณด์ • ๋ถˆ๊ฐ€
24
+ - ์†๊ธ€์”จ, ๊ทธ๋ฆผ์ž, ์ €ํ•ด์ƒ๋„ ์ด๋ฏธ์ง€๋Š” ์ด์ง„ํ™” ํ›„ ์˜คํžˆ๋ ค ํ’ˆ์งˆ ์ €ํ•˜ ๊ฐ€๋Šฅ
25
+ - ์—ฐํ•„ ํ•„๊ธฐ ์•…๋ณด๋Š” Adaptive Threshold๊ฐ€ ๋” ์ ํ•ฉํ•  ์ˆ˜ ์žˆ์Œ
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ from pathlib import Path
31
+ from typing import TYPE_CHECKING
32
+
33
+ if TYPE_CHECKING:
34
+ from .models import ConvertOptions
35
+
36
+ SUPPORTED_EXTENSIONS = {".png", ".jpg", ".jpeg", ".tiff", ".tif", ".bmp", ".pdf"}
37
+
38
+
39
+ class PreprocessError(Exception):
40
+ """์ „์ฒ˜๋ฆฌ ๋‹จ๊ณ„ ์˜ค๋ฅ˜."""
41
+ pass
42
+
43
+
44
+ # ---------------------------------------------------------------------------
45
+ # ํŒŒ์ผ ๊ฒ€์ฆ (opencv ๋ถˆํ•„์š”, ํ•ญ์ƒ ๋™์ž‘)
46
+ # ---------------------------------------------------------------------------
47
+
48
+ def validate_image_path(input_path: str) -> Path:
49
+ """
50
+ ์ž…๋ ฅ ํŒŒ์ผ ๊ฒฝ๋กœ๋ฅผ ๊ฒ€์ฆํ•˜๊ณ  Path ๊ฐ์ฒด๋ฅผ ๋ฐ˜ํ™˜.
51
+
52
+ Raises:
53
+ PreprocessError: ํŒŒ์ผ์ด ์—†๊ฑฐ๋‚˜ ์ง€์›๋˜์ง€ ์•Š๋Š” ํ˜•์‹์ธ ๊ฒฝ์šฐ
54
+ """
55
+ path = Path(input_path)
56
+
57
+ if not path.exists():
58
+ raise PreprocessError(f"ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {input_path}")
59
+ if not path.is_file():
60
+ raise PreprocessError(f"ํŒŒ์ผ์ด ์•„๋‹™๋‹ˆ๋‹ค: {input_path}")
61
+
62
+ ext = path.suffix.lower()
63
+ if ext not in SUPPORTED_EXTENSIONS:
64
+ raise PreprocessError(
65
+ f"์ง€์›ํ•˜์ง€ ์•Š๋Š” ํŒŒ์ผ ํ˜•์‹: {ext}. "
66
+ f"์ง€์› ํ˜•์‹: {', '.join(sorted(SUPPORTED_EXTENSIONS))}"
67
+ )
68
+ return path
69
+
70
+
71
+ def get_file_info(path: Path) -> dict:
72
+ """ํŒŒ์ผ ๊ธฐ๋ณธ ์ •๋ณด๋ฅผ ๋ฐ˜ํ™˜."""
73
+ stat = path.stat()
74
+ return {
75
+ "filename": path.name,
76
+ "extension": path.suffix.lower(),
77
+ "size_bytes": stat.st_size,
78
+ "absolute_path": str(path.resolve()),
79
+ }
80
+
81
+
82
+ def preprocess_image(input_path: str) -> dict:
83
+ """
84
+ ํŒŒ์ผ ์œ ํšจ์„ฑ ๊ฒ€์‚ฌ๋งŒ ์ˆ˜ํ–‰. ์ „์ฒ˜๋ฆฌ ์—†์ด ์›๋ณธ ๊ฒฝ๋กœ๋ฅผ ๋ฐ˜ํ™˜.
85
+
86
+ pipeline์—์„œ ๋จผ์ € ํ˜ธ์ถœํ•˜์—ฌ ํŒŒ์ผ์„ ๊ฒ€์ฆํ•œ ๋’ค,
87
+ Audiveris ๋ชจ๋“œ๋ผ๋ฉด apply_preprocessing()์„ ์ถ”๊ฐ€ ํ˜ธ์ถœํ•œ๋‹ค.
88
+ """
89
+ path = validate_image_path(input_path)
90
+ info = get_file_info(path)
91
+ info["preprocessed_path"] = str(path.resolve())
92
+ info["preprocessing_applied"] = []
93
+ return info
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # OpenCV ์ „์ฒ˜๋ฆฌ (opencv-python ํ•„์š”)
98
+ # ---------------------------------------------------------------------------
99
+
100
+ def apply_preprocessing(
101
+ input_path: str,
102
+ output_path: str,
103
+ options: ConvertOptions,
104
+ debug_dir: str = "",
105
+ ) -> dict:
106
+ """
107
+ OpenCV ๊ธฐ๋ฐ˜ ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ๋ฅผ ์ ์šฉํ•˜๊ณ  ๊ฒฐ๊ณผ๋ฅผ output_path์— ์ €์žฅ.
108
+
109
+ Args:
110
+ input_path: ์›๋ณธ ์ด๋ฏธ์ง€ ๊ฒฝ๋กœ
111
+ output_path: ์ „์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ ์ €์žฅ ๊ฒฝ๋กœ (.png ๊ถŒ์žฅ)
112
+ options: ConvertOptions (binarize_method, deskew_enabled ์‚ฌ์šฉ)
113
+
114
+ Returns:
115
+ dict: ์ „์ฒ˜๋ฆฌ ๊ฒฐ๊ณผ ์ •๋ณด
116
+ - applied: True
117
+ - steps: ์ ์šฉ๋œ ๋‹จ๊ณ„ ๋ชฉ๋ก
118
+ - input_size: (w, h)
119
+ - output_path: ๊ฒฐ๊ณผ ํŒŒ์ผ ๊ฒฝ๋กœ
120
+
121
+ Raises:
122
+ PreprocessError: opencv ๋ฏธ์„ค์น˜, ์ด๋ฏธ์ง€ ์ฝ๊ธฐ ์‹คํŒจ, ์ €์žฅ ์‹คํŒจ ์‹œ
123
+ """
124
+ try:
125
+ import cv2
126
+ import numpy as np
127
+ except ImportError:
128
+ raise PreprocessError(
129
+ "opencv-python์ด ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.\n"
130
+ "์„ค์น˜ ๋ช…๋ น: pip install opencv-python"
131
+ )
132
+
133
+ img = cv2.imread(input_path, cv2.IMREAD_COLOR)
134
+ if img is None:
135
+ raise PreprocessError(
136
+ f"์ด๋ฏธ์ง€๋ฅผ ์ฝ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {input_path}\n"
137
+ f"ํŒŒ์ผ์ด ์†์ƒ๋˜์—ˆ๊ฑฐ๋‚˜ ์ง€์›ํ•˜์ง€ ์•Š๋Š” ํ˜•์‹์ผ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค."
138
+ )
139
+
140
+ h, w = img.shape[:2]
141
+ applied: list[str] = []
142
+
143
+ def _save_step(name: str, img_data) -> None:
144
+ if debug_dir:
145
+ cv2.imwrite(str(Path(debug_dir) / name), img_data)
146
+
147
+ # 1. Grayscale
148
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
149
+ applied.append("grayscale")
150
+ _save_step("step_01_grayscale.png", gray)
151
+
152
+ # 2. ๋…ธ์ด์ฆˆ ์ œ๊ฑฐ (๊ฐ€๋ฒผ์šด Gaussian blur, ์„ ํƒ์ )
153
+ if options.blur_enabled:
154
+ denoised = cv2.GaussianBlur(gray, (3, 3), 0)
155
+ applied.append("gaussian_blur_3x3")
156
+ _save_step("step_02_gaussian_blur.png", denoised)
157
+ else:
158
+ denoised = gray
159
+
160
+ # 3. ์ด์ง„ํ™” (์„ ํƒ์  โ€” ๊ธฐ๋ณธ off, Audiveris ์ž์ฒด ์ด์ง„ํ™” ์‹ ๋ขฐ)
161
+ processed = denoised
162
+ if options.binarize_enabled:
163
+ processed = _binarize(denoised, options.binarize_method)
164
+ applied.append(f"binarize:{options.binarize_method}")
165
+ _save_step(f"step_03_binarized_{options.binarize_method}.png", processed)
166
+
167
+ # 4. Deskew (์„ ํƒ์ )
168
+ if options.deskew_enabled:
169
+ processed, angle = _deskew(processed)
170
+ if abs(angle) > 0.01:
171
+ applied.append(f"deskew:{angle:.2f}deg")
172
+ _save_step(f"step_04_deskew_{angle:.2f}deg.png", processed)
173
+ else:
174
+ applied.append("deskew:skipped(angle<0.5)")
175
+
176
+ # ์ €์žฅ
177
+ ok = cv2.imwrite(output_path, processed)
178
+ if not ok:
179
+ raise PreprocessError(
180
+ f"์ „์ฒ˜๋ฆฌ ์ด๋ฏธ์ง€ ์ €์žฅ ์‹คํŒจ: {output_path}\n"
181
+ f"์ถœ๋ ฅ ๋””๋ ‰ํ† ๋ฆฌ๊ฐ€ ์กด์žฌํ•˜๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”."
182
+ )
183
+
184
+ return {
185
+ "applied": True,
186
+ "steps": applied,
187
+ "input_path": input_path,
188
+ "output_path": output_path,
189
+ "input_size": (w, h),
190
+ }
191
+
192
+
193
+ def _binarize(gray_img, method: str):
194
+ """
195
+ Grayscale ์ด๋ฏธ์ง€๋ฅผ ์ด์ง„ํ™”.
196
+
197
+ method:
198
+ "otsu" : ์ „์—ญ Otsu ์ž„๊ณ„๊ฐ’. ๋ช…์•” ๋Œ€๋น„๊ฐ€ ๋ถ„๋ช…ํ•œ ์Šค์บ” ์•…๋ณด์— ์ ํ•ฉ.
199
+ "adaptive" : ์ง€์—ญ Adaptive Threshold. ์กฐ๋ช… ๋ถˆ๊ท ์ผ / ์—ฐํ•„ ํ•„๊ธฐ ์•…๋ณด์— ์ ํ•ฉ.
200
+ """
201
+ try:
202
+ import cv2
203
+ except ImportError:
204
+ raise PreprocessError("opencv-python์ด ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.")
205
+
206
+ if method == "adaptive":
207
+ return cv2.adaptiveThreshold(
208
+ gray_img, 255,
209
+ cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
210
+ cv2.THRESH_BINARY,
211
+ blockSize=15, C=8,
212
+ )
213
+ # default: otsu
214
+ _, binary = cv2.threshold(
215
+ gray_img, 0, 255,
216
+ cv2.THRESH_BINARY + cv2.THRESH_OTSU,
217
+ )
218
+ return binary
219
+
220
+
221
+ def _deskew(binary_img):
222
+ """
223
+ ์ด์ง„ํ™”๋œ ์ด๋ฏธ์ง€์˜ ๊ธฐ์šธ๊ธฐ๋ฅผ ๋ณด์ •.
224
+
225
+ Returns:
226
+ (corrected_img, angle_degrees)
227
+
228
+ ํ•œ๊ณ„:
229
+ - ยฑ10๋„ ์ด๋‚ด์˜ ๊ธฐ์šธ๊ธฐ๋งŒ ๋ณด์ •. ๋ฒ”์œ„ ์ดˆ๊ณผ ์‹œ ์›๋ณธ ๋ฐ˜ํ™˜.
230
+ - ์•…๋ณด ์ „์ฒด๊ฐ€ ๊ธฐ์šธ์–ด์ง„ ๊ฒฝ์šฐ์—๋งŒ ์œ ํšจ. ๊ฐœ๋ณ„ ๋ณดํ‘œ ๊ธฐ์šธ๊ธฐ๋Š” ๋ฏธ์ฒ˜๋ฆฌ.
231
+ - ์–ด๋‘์šด ํ”ฝ์…€์ด ๋„ˆ๋ฌด ์ ์œผ๋ฉด ๊ฐ๋„ ์ถ”์ • ๋ถˆ๊ฐ€ โ†’ ์›๋ณธ ๋ฐ˜ํ™˜.
232
+ """
233
+ try:
234
+ import cv2
235
+ import numpy as np
236
+ except ImportError:
237
+ return binary_img, 0.0
238
+
239
+ # ์–ด๋‘์šด ํ”ฝ์…€ ์ขŒํ‘œ ์ถ”์ถœ (์ด์ง„ํ™”๋œ ์ด๋ฏธ์ง€: ์•…๋ณด ์„ /์Œํ‘œ = 0)
240
+ dark_coords = np.column_stack(np.where(binary_img < 128))
241
+ if len(dark_coords) < 200:
242
+ return binary_img, 0.0
243
+
244
+ # (row, col) โ†’ (x=col, y=row) ๋ณ€ํ™˜ ํ›„ minAreaRect
245
+ points = dark_coords[:, ::-1].astype(np.float32)
246
+ rect = cv2.minAreaRect(points)
247
+ angle = rect[2] # range: (-90, 0]
248
+
249
+ # (-90, -45] โ†’ ์„ธ๋กœ ๋ฐฉํ–ฅ ๋ฐ•์Šค โ†’ +90 ๋ณด์ •
250
+ if angle < -45:
251
+ angle = 90.0 + angle
252
+ # ์ด์ œ angle โˆˆ (-45, 45)
253
+
254
+ # ๋„ˆ๋ฌด ํฌ๋ฉด ๋ณด์ • ๋ถˆ๊ฐ€ (์˜†์œผ๋กœ ์ฐํžŒ ์ด๋ฏธ์ง€ ๋“ฑ)
255
+ if abs(angle) > 10.0:
256
+ return binary_img, 0.0
257
+ # ๋„ˆ๋ฌด ์ž‘์œผ๋ฉด ์˜๋ฏธ ์—†์Œ
258
+ if abs(angle) < 0.5:
259
+ return binary_img, 0.0
260
+
261
+ h, w = binary_img.shape
262
+ center = (w // 2, h // 2)
263
+ M = cv2.getRotationMatrix2D(center, angle, 1.0)
264
+ corrected = cv2.warpAffine(
265
+ binary_img, M, (w, h),
266
+ flags=cv2.INTER_LINEAR,
267
+ borderMode=cv2.BORDER_REPLICATE,
268
+ )
269
+ return corrected, angle
core/score_primitives.py ADDED
@@ -0,0 +1,193 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ score_primitives.py โ€” CV ๊ธฐ๋ฐ˜ ์•…๋ณด ์š”์†Œ ๊ฒ€์ถœ
3
+ ํ˜„์žฌ: notehead ๊ฒ€์ถœ๋งŒ ๊ตฌํ˜„
4
+ """
5
+
6
+ import cv2
7
+ import numpy as np
8
+ from dataclasses import dataclass
9
+ from typing import List
10
+
11
+
12
+ @dataclass
13
+ class NoteHead:
14
+ x: int # center x
15
+ y: int # center y
16
+ w: int # bounding box width
17
+ h: int # bounding box height
18
+ filled: bool # True=๊ฒ€์€ ์Œํ‘œ(4๋ถ„/8๋ถ„), False=ํฐ ์Œํ‘œ(2๋ถ„/์˜จ์Œํ‘œ)
19
+
20
+
21
+ def _remove_staff_lines(binary: np.ndarray) -> np.ndarray:
22
+ """์ˆ˜ํ‰ ๋ชจํด๋กœ์ง€๋กœ 5์„ ์„ ์ œ๊ฑฐํ•œ ์ด๋ฏธ์ง€ ๋ฐ˜ํ™˜ (ํฐ=๋ฐฐ๊ฒฝ, ๊ฒ€=์Œํ‘œ)."""
23
+ # ์ด๋ฏธ์ง€ ๋„ˆ๋น„์˜ 1/5 ์ด์ƒ ์ด์–ด์ง„ ์ˆ˜ํ‰์„  = ์˜ค์„ 
24
+ kernel_w = max(30, binary.shape[1] // 5)
25
+ horiz_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (kernel_w, 1))
26
+ detected_lines = cv2.morphologyEx(binary, cv2.MORPH_OPEN, horiz_kernel)
27
+ return cv2.subtract(binary, detected_lines)
28
+
29
+
30
+ def detect_noteheads(img_path: str) -> List[NoteHead]:
31
+ """
32
+ ์•…๋ณด ์ด๋ฏธ์ง€์—์„œ notehead๋ฅผ ๊ฒ€์ถœํ•ด ๋ฆฌ์ŠคํŠธ๋กœ ๋ฐ˜ํ™˜.
33
+ img_path: PNG/JPG ๊ฒฝ๋กœ
34
+ """
35
+ img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
36
+ if img is None:
37
+ raise FileNotFoundError(f"์ด๋ฏธ์ง€๋ฅผ ์—ด ์ˆ˜ ์—†์Œ: {img_path}")
38
+
39
+ h_img, w_img = img.shape
40
+
41
+ # --- ์ด์ง„ํ™” (ํฐ=๋ฐฐ๊ฒฝ, ๊ฒ€=์Œํ‘œ) ---
42
+ _, binary = cv2.threshold(img, 180, 255, cv2.THRESH_BINARY_INV)
43
+
44
+ # --- 5์„  ์ œ๊ฑฐ ---
45
+ no_lines = _remove_staff_lines(binary)
46
+
47
+ # --- connected components ---
48
+ n, labels, stats, centroids = cv2.connectedComponentsWithStats(no_lines, connectivity=8)
49
+
50
+ # --- notehead ํฌ๊ธฐ ์ž„๊ณ„๊ฐ’: ์ด๋ฏธ์ง€ ํฌ๊ธฐ ๊ธฐ๋ฐ˜ ์ž๋™ ์Šค์ผ€์ผ ---
51
+ # ๊ธฐ์ค€: 527px ๋„ˆ๋น„ ์ด๋ฏธ์ง€์—์„œ notehead โ‰ˆ w8~70, h6~55, area 60~4000
52
+ scale = w_img / 527.0
53
+ min_area = int(60 * scale ** 2)
54
+ max_area = int(4000 * scale ** 2)
55
+ min_w = int(8 * scale)
56
+ max_w = int(70 * scale)
57
+ min_h = int(6 * scale)
58
+ max_h = int(55 * scale)
59
+
60
+ noteheads = []
61
+ for i in range(1, n):
62
+ bx, by, bw, bh, area = stats[i]
63
+
64
+ # ํฌ๊ธฐ ํ•„ํ„ฐ
65
+ if area < min_area or area > max_area:
66
+ continue
67
+ if bw < min_w or bw > max_w:
68
+ continue
69
+ if bh < min_h or bh > max_h:
70
+ continue
71
+
72
+ # ์ข…ํšก๋น„ ํ•„ํ„ฐ (notehead๋Š” ๋Œ€๋žต ํƒ€์›ํ˜•: 0.5~2.0)
73
+ aspect = bw / bh
74
+ if aspect < 0.45 or aspect > 2.2:
75
+ continue
76
+
77
+ cx = int(centroids[i][0])
78
+ cy = int(centroids[i][1])
79
+
80
+ # filled vs open: ๋ฐ”์šด๋”ฉ๋ฐ•์Šค ๋Œ€๋น„ ์ฑ„์›€ ๋น„์œจ
81
+ fill_ratio = area / (bw * bh)
82
+ filled = fill_ratio > 0.42
83
+
84
+ noteheads.append(NoteHead(cx, cy, bw, bh, filled))
85
+
86
+ # x, y ์ˆœ์œผ๋กœ ์ •๋ ฌ (์œ„โ†’์•„๋ž˜, ์ขŒโ†’์šฐ)
87
+ row_bin = int(30 * scale)
88
+ noteheads.sort(key=lambda n: (n.y // row_bin, n.x))
89
+
90
+ return noteheads
91
+
92
+
93
+ def detect_noteheads_from_array(arr: np.ndarray) -> List[NoteHead]:
94
+ """
95
+ numpy array์—์„œ notehead๋ฅผ ๊ฒ€์ถœํ•ด ๋ฆฌ์ŠคํŠธ๋กœ ๋ฐ˜ํ™˜.
96
+ detect_noteheads()์™€ ๋™์ผํ•˜๋‚˜ ํŒŒ์ผ ๊ฒฝ๋กœ ๋Œ€์‹  ๋ฐฐ์—ด์„ ๋ฐ›์Œ.
97
+ arr: BGR ๋˜๋Š” GRAY numpy array.
98
+ """
99
+ if arr.ndim == 3:
100
+ img = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)
101
+ else:
102
+ img = arr.copy()
103
+
104
+ h_img, w_img = img.shape
105
+
106
+ _, binary = cv2.threshold(img, 180, 255, cv2.THRESH_BINARY_INV)
107
+ no_lines = _remove_staff_lines(binary)
108
+
109
+ n, labels, stats, centroids = cv2.connectedComponentsWithStats(no_lines, connectivity=8)
110
+
111
+ scale = w_img / 527.0
112
+ min_area = int(60 * scale ** 2)
113
+ max_area = int(4000 * scale ** 2)
114
+ min_w = int(8 * scale)
115
+ max_w = int(70 * scale)
116
+ min_h = int(6 * scale)
117
+ max_h = int(55 * scale)
118
+
119
+ noteheads = []
120
+ for i in range(1, n):
121
+ bx, by, bw, bh, area = stats[i]
122
+ if area < min_area or area > max_area:
123
+ continue
124
+ if bw < min_w or bw > max_w:
125
+ continue
126
+ if bh < min_h or bh > max_h:
127
+ continue
128
+ aspect = bw / bh
129
+ if aspect < 0.45 or aspect > 2.2:
130
+ continue
131
+ cx = int(centroids[i][0])
132
+ cy = int(centroids[i][1])
133
+ fill_ratio = area / (bw * bh)
134
+ filled = fill_ratio > 0.42
135
+ noteheads.append(NoteHead(cx, cy, bw, bh, filled))
136
+
137
+ row_bin = int(30 * scale)
138
+ noteheads.sort(key=lambda nh: (nh.y // row_bin, nh.x))
139
+ return noteheads
140
+
141
+
142
+ def detect_staff_spacing(arr: np.ndarray) -> float:
143
+ """
144
+ ์ˆ˜ํ‰ ํˆฌ์˜์œผ๋กœ 5์„  ๊ฐ„๊ฒฉ(ํ”ฝ์…€)์„ ๋ฐ˜ํ™˜.
145
+ arr: BGR ๋˜๋Š” GRAY numpy array.
146
+ ๊ฒ€์ถœ ์‹คํŒจ ์‹œ ์ด๋ฏธ์ง€ ๋†’์ด / 8 ๋ฐ˜ํ™˜.
147
+ """
148
+ if arr.ndim == 3:
149
+ img = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)
150
+ else:
151
+ img = arr.copy()
152
+
153
+ h, w = img.shape
154
+ _, binary = cv2.threshold(img, 180, 255, cv2.THRESH_BINARY_INV)
155
+
156
+ proj = binary.sum(axis=1).astype(float)
157
+ threshold = w * 255 * 0.3 # ํ–‰ ๋„ˆ๋น„์˜ 30% ์ด์ƒ ํ”ฝ์…€ = ์˜ค์„ 
158
+
159
+ in_line = False
160
+ line_centers: List[int] = []
161
+ start = 0
162
+ for y in range(h):
163
+ if proj[y] >= threshold:
164
+ if not in_line:
165
+ start = y
166
+ in_line = True
167
+ else:
168
+ if in_line:
169
+ line_centers.append((start + y) // 2)
170
+ in_line = False
171
+ if in_line:
172
+ line_centers.append((start + h) // 2)
173
+
174
+ if len(line_centers) < 2:
175
+ return float(h) / 8.0
176
+
177
+ spacings = [line_centers[i + 1] - line_centers[i]
178
+ for i in range(min(4, len(line_centers) - 1))]
179
+ return float(np.median(spacings))
180
+
181
+
182
+ def save_overlay(img_path: str, noteheads: List[NoteHead], out_path: str) -> None:
183
+ """๊ฒ€์ถœ๋œ notehead๋ฅผ ์›๋ณธ ์ด๋ฏธ์ง€์— overlayํ•ด ์ €์žฅ."""
184
+ img = cv2.imread(img_path)
185
+ for nh in noteheads:
186
+ color = (0, 0, 255) if nh.filled else (255, 0, 0) # ๋นจ๊ฐ•=filled, ํŒŒ๋ž‘=open
187
+ cv2.rectangle(img,
188
+ (nh.x - nh.w // 2, nh.y - nh.h // 2),
189
+ (nh.x + nh.w // 2, nh.y + nh.h // 2),
190
+ color, 1)
191
+ cv2.circle(img, (nh.x, nh.y), 2, color, -1)
192
+ cv2.imwrite(out_path, img)
193
+ print(f"overlay ์ €์žฅ: {out_path}")
core/xml_corrector.py ADDED
@@ -0,0 +1,1681 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ core/xml_corrector.py
3
+
4
+ MXL/XML ์ž๋™ ๊ต์ • ํŒŒ์ดํ”„๋ผ์ธ.
5
+ Verovio ๋ Œ๋”๋ง + OpenCV ๋งˆ๋”” ๋ถ„ํ•  + SSIM ๋น„๊ต๋กœ
6
+ Audiveris OMR ์˜ค๋ฅ˜๋ฅผ ํƒ์ง€ยท๊ต์ •ํ•œ๋‹ค.
7
+
8
+ ์˜์กด์„ฑ:
9
+ pip install verovio scikit-image lxml cairosvg
10
+
11
+ ํ๋ฆ„:
12
+ MXL โ†’ Verovio ๋ Œ๋” โ†’ ๋งˆ๋”” ๋ถ„ํ• 
13
+ ์›๋ณธ PNG โ†’ ๋งˆ๋”” ๋ถ„ํ• 
14
+ SSIM ๋น„๊ต โ†’ ์˜์‹ฌ ๋งˆ๋”” ์ถ”์ถœ
15
+ XML ์ˆ˜์ • ํ›„๋ณด ์ƒ์„ฑ โ†’ ์žฌ๋ Œ๋” โ†’ SSIM ์žฌ๋น„๊ต โ†’ ์ฑ„ํƒ
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import ctypes
21
+ import os
22
+ import tempfile
23
+ import zipfile
24
+ from copy import deepcopy
25
+ from dataclasses import dataclass, field
26
+ from pathlib import Path
27
+ from typing import Iterator, Optional
28
+
29
+ import cv2
30
+ import numpy as np
31
+
32
+ # โ”€โ”€ Cairo DLL ์„ ํ–‰ ๋กœ๋“œ (cairosvg ์˜์กด) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
33
+ _CAIRO_DIR = r"C:\Program Files\Unity Hub\resources\app.asar.unpacked\node_modules\canvas\build\Release"
34
+ _CAIRO_DLLS = [
35
+ "libgcc_s_seh-1.dll", "libwinpthread-1.dll", "libglib-2.0-0.dll",
36
+ "libpixman-1-0.dll", "libpng16-16.dll", "libfreetype-6.dll",
37
+ "libfontconfig-1.dll", "libcairo-2.dll",
38
+ ]
39
+ if os.path.isdir(_CAIRO_DIR):
40
+ for _dll in _CAIRO_DLLS:
41
+ _p = os.path.join(_CAIRO_DIR, _dll)
42
+ if os.path.exists(_p):
43
+ try:
44
+ ctypes.CDLL(_p)
45
+ except OSError:
46
+ pass
47
+
48
+ # โ”€โ”€ lazy import โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
49
+ try:
50
+ import cairosvg as _cairosvg
51
+ _CAIRO_OK = True
52
+ except Exception:
53
+ _CAIRO_OK = False
54
+
55
+ try:
56
+ import verovio as _verovio_mod
57
+ _VEROVIO_OK = True
58
+ except Exception:
59
+ _VEROVIO_OK = False
60
+
61
+ try:
62
+ from lxml import etree as _etree
63
+ _LXML_OK = True
64
+ except Exception:
65
+ import xml.etree.ElementTree as _etree # type: ignore
66
+ _LXML_OK = False
67
+
68
+ try:
69
+ from skimage.metrics import structural_similarity as _ssim
70
+ _SSIM_OK = True
71
+ except Exception:
72
+ _SSIM_OK = False
73
+
74
+
75
+ # โ”€โ”€ ๋ฐ์ดํ„ฐํด๋ž˜์Šค โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
76
+
77
+ @dataclass
78
+ class SystemRegion:
79
+ y1: int
80
+ y2: int
81
+ x1: int
82
+ x2: int
83
+
84
+
85
+ @dataclass
86
+ class CandidateEdit:
87
+ measure_number: int
88
+ note_index: int
89
+ edit_type: str
90
+ description: str = ""
91
+
92
+
93
+ @dataclass
94
+ class CorrectionResult:
95
+ xml_path: str
96
+ measures_checked: int = 0
97
+ measures_corrected: int = 0
98
+ measures_fallback: int = 0
99
+ warnings: list[str] = field(default_factory=list)
100
+
101
+
102
+ @dataclass
103
+ class StaffBand:
104
+ y1: int
105
+ y2: int
106
+ spacing_px: float # 5์„  ํ•œ ์นธ ๊ฐ„๊ฒฉ
107
+
108
+
109
+ # โ”€โ”€ ํ—ฌํผ ํ•จ์ˆ˜ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
110
+
111
+ def _check_deps() -> None:
112
+ missing = []
113
+ if not _VEROVIO_OK:
114
+ missing.append("verovio")
115
+ if not _CAIRO_OK:
116
+ missing.append("cairosvg (๋˜๋Š” Cairo ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ)")
117
+ if not _SSIM_OK:
118
+ missing.append("scikit-image")
119
+ if missing:
120
+ raise ImportError(f"XML ๊ต์ •์— ํ•„์š”ํ•œ ํŒจํ‚ค์ง€ ์—†์Œ: {', '.join(missing)}")
121
+
122
+
123
+ def _extract_xml_from_mxl(mxl_path: str) -> str:
124
+ """MXL(zip) ๋˜๋Š” XML ํŒŒ์ผ์—์„œ MusicXML ๋ฌธ์ž์—ด ๋ฐ˜ํ™˜."""
125
+ path = Path(mxl_path)
126
+ if path.suffix.lower() == ".mxl":
127
+ with zipfile.ZipFile(mxl_path) as zf:
128
+ names = zf.namelist()
129
+ xml_name = next(
130
+ (n for n in names
131
+ if n.lower().endswith((".xml", ".musicxml"))
132
+ and "container" not in n.lower()),
133
+ None,
134
+ )
135
+ if xml_name is None:
136
+ raise RuntimeError(f"MXL ๋‚ด๋ถ€์— XML ์—†์Œ: {mxl_path}")
137
+ return zf.read(xml_name).decode("utf-8", errors="replace")
138
+ else:
139
+ return path.read_text(encoding="utf-8", errors="replace")
140
+
141
+
142
+ def _strip_ns(root) -> None:
143
+ """lxml Element์˜ ๋„ค์ž„์ŠคํŽ˜์ด์Šค ์ œ๊ฑฐ (in-place)."""
144
+ for elem in root.iter():
145
+ if hasattr(elem, "tag") and isinstance(elem.tag, str) and "{" in elem.tag:
146
+ elem.tag = elem.tag.split("}", 1)[1]
147
+ # attrib ๋„ค์ž„์ŠคํŽ˜์ด์Šค๋„ ์ œ๊ฑฐ
148
+ new_attrib = {}
149
+ for k, v in elem.attrib.items():
150
+ if "{" in k:
151
+ k = k.split("}", 1)[1]
152
+ new_attrib[k] = v
153
+ elem.attrib.clear()
154
+ elem.attrib.update(new_attrib)
155
+
156
+
157
+ # โ”€โ”€ ๋””๋ฒ„๊ทธ ์ด๋ฏธ์ง€ ํ—ฌํผ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
158
+
159
+ def _save_debug_pair(
160
+ debug_dir: str, sys_idx: int, m_idx: int,
161
+ xml_num: int, score: float,
162
+ r_crop: np.ndarray, o_crop: np.ndarray,
163
+ proc_r: "np.ndarray | None" = None,
164
+ proc_o: "np.ndarray | None" = None,
165
+ best_cand: "np.ndarray | None" = None,
166
+ ) -> None:
167
+ h, w = 150, 200
168
+
169
+ def rsz(x: np.ndarray) -> np.ndarray:
170
+ if x is None or x.size == 0:
171
+ return np.ones((h, w, 3), np.uint8) * 200
172
+ out = cv2.resize(x, (w, h))
173
+ if out.ndim == 2:
174
+ out = cv2.cvtColor(out, cv2.COLOR_GRAY2BGR)
175
+ return out
176
+
177
+ name = f"r{sys_idx+1}_m{m_idx+1}_xml{xml_num}_score{score:.3f}"
178
+ cv2.imwrite(str(Path(debug_dir) / f"raw_{name}.png"),
179
+ np.hstack([rsz(r_crop), rsz(o_crop)]))
180
+ if proc_r is not None and proc_o is not None:
181
+ cv2.imwrite(str(Path(debug_dir) / f"proc_{name}.png"),
182
+ np.hstack([rsz(proc_r), rsz(proc_o)]))
183
+ if best_cand is not None:
184
+ cv2.imwrite(str(Path(debug_dir) / f"cand_{name}.png"), rsz(best_cand))
185
+
186
+
187
+ # pitch shift ํ—ฌํผ
188
+ _STEP_SEMITONES = {"C": 0, "D": 2, "E": 4, "F": 5, "G": 7, "A": 9, "B": 11}
189
+ _SEMITONE_TO_STEP = {
190
+ 0: ("C", 0), 1: ("C", 1), 2: ("D", 0), 3: ("D", 1), 4: ("E", 0),
191
+ 5: ("F", 0), 6: ("F", 1), 7: ("G", 0), 8: ("G", 1), 9: ("A", 0),
192
+ 10: ("A", 1), 11: ("B", 0),
193
+ }
194
+ _TYPE_ORDER = ["64th", "32nd", "16th", "eighth", "quarter", "half", "whole"]
195
+
196
+
197
+ def _shift_pitch(step: str, alter: int, octave: int, delta: int):
198
+ semi = _STEP_SEMITONES.get(step, 0) + alter + octave * 12
199
+ new_semi = semi + delta
200
+ new_oct, rem = divmod(new_semi, 12)
201
+ ns, na = _SEMITONE_TO_STEP[rem]
202
+ return ns, na, new_oct
203
+
204
+
205
+ # โ”€โ”€ MXLRenderer โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
206
+
207
+ class MXLRenderer:
208
+ """Verovio + cairosvg๋กœ MXL/XML โ†’ BGR numpy ์ด๋ฏธ์ง€ ๋ Œ๋”๋ง."""
209
+
210
+ def __init__(
211
+ self,
212
+ scale: int = 40,
213
+ seg_fill_ratio: float = 0.25,
214
+ seg_bar_ratio: float = 0.60,
215
+ seg_gap_factor: float = 5.0,
216
+ ):
217
+ _check_deps()
218
+ self._tk = _verovio_mod.toolkit()
219
+ self._tk.setOptions({
220
+ "scale": scale,
221
+ "adjustPageHeight": True,
222
+ "adjustPageWidth": True,
223
+ "svgBoundingBoxes": True,
224
+ })
225
+ self._loaded: Optional[str] = None
226
+ # ImageSegmenter ํŒŒ๋ผ๋ฏธํ„ฐ ๋ณด๊ด€ (get_measure_data_by_system ์—์„œ ์‚ฌ์šฉ)
227
+ self._seg_fill = seg_fill_ratio
228
+ self._seg_bar = seg_bar_ratio
229
+ self._seg_gap = seg_gap_factor
230
+
231
+ def load(self, mxl_path: str) -> None:
232
+ if self._loaded == mxl_path:
233
+ return
234
+ xml_str = _extract_xml_from_mxl(mxl_path)
235
+ ok = self._tk.loadData(xml_str)
236
+ if not ok:
237
+ raise RuntimeError(f"Verovio loadData ์‹คํŒจ: {mxl_path}")
238
+ self._loaded = mxl_path
239
+
240
+ def load_xml_string(self, xml_str: str) -> None:
241
+ """XML ๋ฌธ์ž์—ด ์ง์ ‘ ๋กœ๋“œ (ํ›„๋ณด ์žฌ๋ Œ๋”์šฉ)."""
242
+ self._tk.loadData(xml_str)
243
+ self._loaded = None
244
+
245
+ def render_candidate_measures(
246
+ self, xml_str: str, page_num: int = 1
247
+ ) -> Optional[list[list[dict]]]:
248
+ """
249
+ ํ›„๋ณด XML์„ **์ƒˆ๋กœ์šด toolkit ์ธ์Šคํ„ด์Šค**๋กœ ๋ Œ๋”๋ง (ํฌ๋ž˜์‹œ ๊ฒฉ๋ฆฌ).
250
+ ์‹คํŒจ ์‹œ None ๋ฐ˜ํ™˜.
251
+ """
252
+ try:
253
+ tk = _verovio_mod.toolkit()
254
+ tk.setOptions({
255
+ "scale": self._tk.getOptions().get("scale", 40),
256
+ "adjustPageHeight": True,
257
+ "adjustPageWidth": True,
258
+ "svgBoundingBoxes": True,
259
+ })
260
+ tk.loadData(xml_str)
261
+ svg = tk.renderToSVG(page_num)
262
+ png_bytes = _cairosvg.svg2png(bytestring=svg.encode())
263
+ img = self._png_bytes_to_bgr(png_bytes)
264
+ img_h, img_w = img.shape[:2]
265
+
266
+ # SVG ํŒŒ์‹ฑ (๊ธฐ์กด get_measure_data_by_system๊ณผ ๋™์ผ)
267
+ if _LXML_OK:
268
+ root = _etree.fromstring(svg.encode())
269
+ else:
270
+ root = _etree.fromstring(svg)
271
+ tag = root.tag
272
+ svg_ns = tag.split("}")[0].lstrip("{") if "}" in tag else ""
273
+ g_tag = f"{{{svg_ns}}}g" if svg_ns else "g"
274
+ r_tag = f"{{{svg_ns}}}rect" if svg_ns else "rect"
275
+ svg_tag_name = f"{{{svg_ns}}}svg" if svg_ns else "svg"
276
+
277
+ vb_w, vb_h = float(img_w), float(img_h)
278
+ for inner in root.iter(svg_tag_name):
279
+ if "definition-scale" in inner.get("class", ""):
280
+ vb = (inner.get("viewBox") or "").split()
281
+ if len(vb) >= 4:
282
+ try:
283
+ vb_w = float(vb[2])
284
+ vb_h = float(vb[3])
285
+ except ValueError:
286
+ pass
287
+ break
288
+ scale_y = img_h / vb_h if vb_h > 0 else 1.0
289
+
290
+ _MARGIN = 12
291
+ sys_entries = []
292
+ counter = [0]
293
+ for sys_elem in root.iter(g_tag):
294
+ if sys_elem.get("class", "") != "system":
295
+ continue
296
+ nums = []
297
+ for child in sys_elem.iter(g_tag):
298
+ if child.get("class", "") != "measure":
299
+ continue
300
+ try:
301
+ n = int(child.get("data-n") or 0)
302
+ except ValueError:
303
+ n = 0
304
+ if n == 0:
305
+ counter[0] += 1
306
+ n = counter[0]
307
+ nums.append(n)
308
+ if not nums:
309
+ continue
310
+ y1_px = y2_px = None
311
+ for child in sys_elem:
312
+ child_cls = child.get("class", "") if hasattr(child, "get") else ""
313
+ if "bounding-box" in child_cls:
314
+ for r in child.iter(r_tag):
315
+ try:
316
+ y_sv = float(r.get("y", 0))
317
+ h_sv = float(r.get("height", 0))
318
+ if h_sv > 0:
319
+ y1_px = max(0, int(y_sv * scale_y) - _MARGIN)
320
+ y2_px = min(img_h, int((y_sv + h_sv) * scale_y) + _MARGIN)
321
+ except (ValueError, TypeError):
322
+ pass
323
+ break
324
+ break
325
+ if y1_px is None:
326
+ continue
327
+ sys_entries.append((y1_px, y2_px, nums))
328
+
329
+ if not sys_entries:
330
+ return None
331
+
332
+ segmenter = ImageSegmenter(
333
+ staff_fill_ratio=self._seg_fill,
334
+ barline_fill_ratio=self._seg_bar,
335
+ system_gap_factor=self._seg_gap,
336
+ )
337
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
338
+ _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
339
+ inv = cv2.bitwise_not(binary)
340
+
341
+ result = []
342
+ for y1_px, y2_px, xml_nums in sys_entries:
343
+ sys_region = SystemRegion(y1_px, y2_px, 0, img_w)
344
+ xs = segmenter._detect_barlines(inv, sys_region)
345
+ crops = segmenter._crop_measures(img, sys_region, xs)
346
+ n_m = min(len(xml_nums), len(crops))
347
+ sl = [{"xml_number": xml_nums[m_idx], "crop": crops[m_idx]}
348
+ for m_idx in range(n_m)]
349
+ if sl:
350
+ result.append(sl)
351
+ return result if result else None
352
+ except Exception:
353
+ return None
354
+
355
+ def get_page_count(self) -> int:
356
+ return self._tk.getPageCount()
357
+
358
+ @staticmethod
359
+ def _png_bytes_to_bgr(png_bytes: bytes) -> np.ndarray:
360
+ """PNG ๋ฐ”์ดํŠธ โ†’ BGR ndarray. RGBA์ธ ๊ฒฝ์šฐ ํฐ ๋ฐฐ๊ฒฝ ์œ„์— ํ•ฉ์„ฑ."""
361
+ arr = np.frombuffer(png_bytes, dtype=np.uint8)
362
+ img = cv2.imdecode(arr, cv2.IMREAD_UNCHANGED)
363
+ if img is None:
364
+ raise RuntimeError("PNG ๋””์ฝ”๋“œ ์‹คํŒจ")
365
+ if img.ndim == 2:
366
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
367
+ elif img.shape[2] == 4:
368
+ # BGRA โ†’ ํฐ ๋ฐฐ๊ฒฝ ํ•ฉ์„ฑ
369
+ alpha = img[:, :, 3:4].astype(np.float32) / 255.0
370
+ bgr = img[:, :, :3].astype(np.float32)
371
+ white = np.full_like(bgr, 255.0)
372
+ img = (bgr * alpha + white * (1.0 - alpha)).astype(np.uint8)
373
+ return img
374
+
375
+ def render_page(self, page_num: int = 1) -> np.ndarray:
376
+ """ํŽ˜์ด์ง€๋ฅผ BGR numpy ๋ฐฐ์—ด๋กœ ๋ Œ๋”๋ง."""
377
+ svg = self._tk.renderToSVG(page_num)
378
+ png_bytes = _cairosvg.svg2png(bytestring=svg.encode())
379
+ img = self._png_bytes_to_bgr(png_bytes)
380
+ if img is None:
381
+ raise RuntimeError("Verovio ๋ Œ๋”๋ง ์‹คํŒจ (๋นˆ ์ด๋ฏธ์ง€)")
382
+ return img
383
+
384
+ def get_measure_data_by_system(self, page_num: int = 1) -> list[list[dict]]:
385
+ """
386
+ SVG์—์„œ ์‹œ์Šคํ…œ bbox(y ๋ฒ”์œ„)์™€ ๋งˆ๋”” xml_number๋ฅผ ์ถ”์ถœํ•˜๊ณ ,
387
+ ๋งˆ๋”” ๋ถ„ํ• ์€ ๊ฐ ์‹œ์Šคํ…œ y ๋ฒ”์œ„ ๋‚ด์—์„œ ImageSegmenter(barline)๋กœ ์ˆ˜ํ–‰.
388
+
389
+ SVG ๋‚ด๋ถ€ ์ขŒํ‘œ๊ณ„(viewBox)๋ฅผ ํ”ฝ์…€๋กœ ๋ณ€ํ™˜ํ•ด ์ •ํ™•ํ•œ ์‹œ์Šคํ…œ y ๋ฒ”์œ„๋ฅผ ๊ฒฐ์ •.
390
+
391
+ Returns: list[system] of list[{'xml_number': int, 'crop': ndarray}]
392
+ """
393
+ svg = self._tk.renderToSVG(page_num)
394
+ png_bytes = _cairosvg.svg2png(bytestring=svg.encode())
395
+ img = self._png_bytes_to_bgr(png_bytes)
396
+ if img is None:
397
+ raise RuntimeError("Verovio ๋ Œ๋”๋ง ์‹คํŒจ")
398
+ img_h, img_w = img.shape[:2]
399
+
400
+ # โ”€โ”€ SVG ํŒŒ์‹ฑ ์ค€๋น„ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
401
+ if _LXML_OK:
402
+ root = _etree.fromstring(svg.encode())
403
+ else:
404
+ root = _etree.fromstring(svg)
405
+
406
+ tag = root.tag
407
+ svg_ns = tag.split("}")[0].lstrip("{") if "}" in tag else ""
408
+ g_tag = f"{{{svg_ns}}}g" if svg_ns else "g"
409
+ r_tag = f"{{{svg_ns}}}rect" if svg_ns else "rect"
410
+ svg_tag = f"{{{svg_ns}}}svg" if svg_ns else "svg"
411
+
412
+ # โ”€โ”€ definition-scale viewBox โ†’ ์ขŒํ‘œ ๋ณ€ํ™˜ ๋น„์œจ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
413
+ vb_w, vb_h = float(img_w), float(img_h)
414
+ for inner in root.iter(svg_tag):
415
+ if "definition-scale" in inner.get("class", ""):
416
+ vb = (inner.get("viewBox") or "").split()
417
+ if len(vb) >= 4:
418
+ try:
419
+ vb_w = float(vb[2])
420
+ vb_h = float(vb[3])
421
+ except ValueError:
422
+ pass
423
+ break
424
+ scale_x = img_w / vb_w if vb_w > 0 else 1.0
425
+ scale_y = img_h / vb_h if vb_h > 0 else 1.0
426
+
427
+ # โ”€โ”€ SVG ํŒŒ์‹ฑ: system bbox + xml_number ๋ชฉ๋ก โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
428
+ _MARGIN = 12 # ์‹œ์Šคํ…œ bbox์— ์ถ”๊ฐ€ํ•  ์—ฌ๋ฐฑ(px)
429
+ sys_entries: list[tuple[int, int, list[int]]] = [] # (y1_px, y2_px, xml_nums)
430
+ counter = [0]
431
+
432
+ for sys_elem in root.iter(g_tag):
433
+ if sys_elem.get("class", "") != "system":
434
+ continue
435
+
436
+ # xml_numbers
437
+ nums: list[int] = []
438
+ for child in sys_elem.iter(g_tag):
439
+ if child.get("class", "") != "measure":
440
+ continue
441
+ try:
442
+ n = int(child.get("data-n") or 0)
443
+ except ValueError:
444
+ n = 0
445
+ if n == 0:
446
+ counter[0] += 1
447
+ n = counter[0]
448
+ nums.append(n)
449
+ if not nums:
450
+ continue
451
+
452
+ # system bounding-box rect โ†’ y ๋ฒ”์œ„
453
+ y1_px = y2_px = None
454
+ for child in sys_elem:
455
+ child_cls = child.get("class", "") if hasattr(child, "get") else ""
456
+ if "bounding-box" in child_cls:
457
+ for r in child.iter(r_tag):
458
+ try:
459
+ y_sv = float(r.get("y", 0))
460
+ h_sv = float(r.get("height", 0))
461
+ if h_sv > 0:
462
+ y1_px = max(0, int(y_sv * scale_y) - _MARGIN)
463
+ y2_px = min(img_h, int((y_sv + h_sv) * scale_y) + _MARGIN)
464
+ except (ValueError, TypeError):
465
+ pass
466
+ break
467
+ break
468
+
469
+ if y1_px is None:
470
+ continue # bbox ์—†์œผ๋ฉด ๊ฑด๋„ˆ๋œ€
471
+
472
+ sys_entries.append((y1_px, y2_px, nums))
473
+
474
+ # โ”€โ”€ SVG bbox ํŒŒ์‹ฑ ์‹คํŒจ ์‹œ ImageSegmenter fallback โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
475
+ if not sys_entries:
476
+ segmenter = ImageSegmenter(
477
+ staff_fill_ratio=self._seg_fill,
478
+ barline_fill_ratio=self._seg_bar,
479
+ system_gap_factor=self._seg_gap,
480
+ )
481
+ crops_by_sys = segmenter.get_measures_by_system(img)
482
+ result: list[list[dict]] = []
483
+ for s_idx, crops in enumerate(crops_by_sys):
484
+ if s_idx < len(sys_entries):
485
+ nums_f = sys_entries[s_idx][2]
486
+ elif not sys_entries:
487
+ nums_f = list(range(1, len(crops) + 1))
488
+ else:
489
+ break
490
+ sl = [{"xml_number": nums_f[m_idx], "crop": crops[m_idx]}
491
+ for m_idx in range(min(len(nums_f), len(crops)))]
492
+ if sl:
493
+ result.append(sl)
494
+ return result
495
+
496
+ # โ”€โ”€ ๊ฐ ์‹œ์Šคํ…œ y ๋ฒ”์œ„ ๋‚ด์—์„œ barline ๊ธฐ๋ฐ˜ ๋งˆ๋”” ๋ถ„ํ•  โ”€โ”€โ”€โ”€โ”€โ”€โ”€
497
+ segmenter = ImageSegmenter(
498
+ staff_fill_ratio=self._seg_fill,
499
+ barline_fill_ratio=self._seg_bar,
500
+ system_gap_factor=self._seg_gap,
501
+ )
502
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
503
+ _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
504
+ inv = cv2.bitwise_not(binary)
505
+
506
+ result = []
507
+ for y1_px, y2_px, xml_nums in sys_entries:
508
+ sys_region = SystemRegion(y1_px, y2_px, 0, img_w)
509
+ xs = segmenter._detect_barlines(inv, sys_region)
510
+ crops = segmenter._crop_measures(img, sys_region, xs)
511
+ n_m = min(len(xml_nums), len(crops))
512
+ sl = [{"xml_number": xml_nums[m_idx], "crop": crops[m_idx]}
513
+ for m_idx in range(n_m)]
514
+ if sl:
515
+ result.append(sl)
516
+ return result
517
+
518
+
519
+ # โ”€โ”€ ImageSegmenter โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
520
+
521
+ class ImageSegmenter:
522
+ """
523
+ ์•…๋ณด PNG์—์„œ OpenCV ๊ธฐ๋ฐ˜์œผ๋กœ ๋งˆ๋””๋ณ„ crop ์ด๋ฏธ์ง€๋ฅผ ์ถ”์ถœ.
524
+ ๋ณดํ‘œ์„  ๊ฒ€์ถœ โ†’ ์‹œ์Šคํ…œ ๋ถ„๋ฆฌ โ†’ ๋งˆ๋””์„  ๊ฒ€์ถœ โ†’ ๋งˆ๋”” crop.
525
+ """
526
+
527
+ def __init__(
528
+ self,
529
+ staff_fill_ratio: float = 0.25,
530
+ barline_fill_ratio: float = 0.05,
531
+ system_gap_factor: float = 5.0,
532
+ margin_px: int = 10,
533
+ ):
534
+ self.staff_fill_ratio = staff_fill_ratio
535
+ self.barline_fill_ratio = barline_fill_ratio
536
+ self.system_gap_factor = system_gap_factor
537
+ self.margin_px = margin_px
538
+
539
+ def get_all_measures(self, png_path: str) -> list[np.ndarray]:
540
+ """์ด๋ฏธ์ง€ ์ „์ฒด์—์„œ ๋งˆ๋””๋ณ„ crop ๋ชฉ๋ก ๋ฐ˜ํ™˜ (ํŽ˜์ด์ง€ ์ˆœ์„œ)."""
541
+ # cv2.imread๋Š” Windows์—์„œ ํ•œ๊ธ€ ๊ฒฝ๋กœ๋ฅผ ์ง€์›ํ•˜์ง€ ์•Š์œผ๋ฏ€๋กœ imdecode ์‚ฌ์šฉ
542
+ buf = np.fromfile(png_path, dtype=np.uint8)
543
+ img = cv2.imdecode(buf, cv2.IMREAD_COLOR)
544
+ if img is None:
545
+ raise RuntimeError(f"์ด๋ฏธ์ง€ ์ฝ๊ธฐ ์‹คํŒจ: {png_path}")
546
+ return self._segment(img)
547
+
548
+ def get_all_measures_from_array(self, img: np.ndarray) -> list[np.ndarray]:
549
+ """numpy ๋ฐฐ์—ด์—์„œ ๋งˆ๋””๋ณ„ crop ๋ชฉ๋ก ๋ฐ˜ํ™˜ (๋ Œ๋”๋ง ์ด๋ฏธ์ง€์šฉ)."""
550
+ return self._segment(img)
551
+
552
+ def get_measures_by_system(
553
+ self, source, n_systems: Optional[int] = None
554
+ ) -> list[list[np.ndarray]]:
555
+ """
556
+ ๋งˆ๋”” crop์„ ์‹œ์Šคํ…œ๋ณ„๋กœ ๊ทธ๋ฃนํ™”ํ•ด์„œ ๋ฐ˜ํ™˜.
557
+ source: ํŒŒ์ผ ๊ฒฝ๋กœ(str/Path) ๋˜๋Š” BGR ndarray
558
+ n_systems: ๊ฐ•์ œ ์‹œ์Šคํ…œ ์ˆ˜ (SVG ํŒŒ์‹ฑ ๊ฒฐ๊ณผ ์ „๋‹ฌ ์‹œ ์ •ํ™•๋„ ํ–ฅ์ƒ)
559
+ Returns: list[system] of list[crop ndarray]
560
+ """
561
+ if isinstance(source, np.ndarray):
562
+ img = source
563
+ else:
564
+ buf = np.fromfile(str(source), dtype=np.uint8)
565
+ img = cv2.imdecode(buf, cv2.IMREAD_COLOR)
566
+ if img is None:
567
+ raise RuntimeError(f"์ด๋ฏธ์ง€ ์ฝ๊ธฐ ์‹คํŒจ: {source}")
568
+
569
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
570
+ _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
571
+ inv = cv2.bitwise_not(binary)
572
+
573
+ result: list[list[np.ndarray]] = []
574
+ for sys in self._detect_systems(inv, n_systems=n_systems):
575
+ xs = self._detect_barlines(inv, sys)
576
+ crops = self._crop_measures(img, sys, xs)
577
+ if crops:
578
+ result.append(crops)
579
+ return result
580
+
581
+ def _segment(self, img: np.ndarray) -> list[np.ndarray]:
582
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
583
+ _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
584
+ inv = cv2.bitwise_not(binary) # ์ž‰ํฌ=255
585
+
586
+ systems = self._detect_systems(inv)
587
+ if not systems:
588
+ return []
589
+
590
+ all_crops: list[np.ndarray] = []
591
+ for sys in systems:
592
+ xs = self._detect_barlines(inv, sys)
593
+ crops = self._crop_measures(img, sys, xs)
594
+ all_crops.extend(crops)
595
+ return all_crops
596
+
597
+ def _detect_systems(
598
+ self, inv: np.ndarray, n_systems: Optional[int] = None
599
+ ) -> list[SystemRegion]:
600
+ h, w = inv.shape
601
+ horiz = np.sum(inv // 255, axis=1) # ํ–‰๋ณ„ ๊ฒ€์€ ํ”ฝ์…€ ์ˆ˜
602
+ min_fill = int(w * self.staff_fill_ratio)
603
+ staff_rows = np.where(horiz > min_fill)[0]
604
+ if len(staff_rows) == 0:
605
+ return []
606
+
607
+ # ์—ฐ์† ํ–‰ ๊ทธ๋ฃนํ™”
608
+ groups: list[tuple[int, int]] = []
609
+ start = staff_rows[0]
610
+ prev = staff_rows[0]
611
+ for r in staff_rows[1:]:
612
+ if r - prev > 5:
613
+ groups.append((start, prev))
614
+ start = r
615
+ prev = r
616
+ groups.append((start, prev))
617
+
618
+ if len(groups) < 2:
619
+ y1 = max(0, groups[0][0] - self.margin_px)
620
+ y2 = min(h, groups[0][1] + self.margin_px)
621
+ return [SystemRegion(y1, y2, 0, w)]
622
+
623
+ line_ys = [(g[0] + g[1]) // 2 for g in groups]
624
+ gaps = [line_ys[i + 1] - line_ys[i] for i in range(len(line_ys) - 1)]
625
+
626
+ # n_systems ์ง€์ • ์‹œ: ๊ฐ€์žฅ ํฐ (n_systems-1)๊ฐœ gap์„ ์‹œ์Šคํ…œ ๊ฒฝ๊ณ„๋กœ ์‚ฌ์šฉ
627
+ if n_systems is not None and n_systems >= 1:
628
+ n_splits = n_systems - 1
629
+ if n_splits <= 0:
630
+ # ๋‹จ์ผ ์‹œ์Šคํ…œ
631
+ y1 = max(0, groups[0][0] - self.margin_px)
632
+ y2 = min(h, groups[-1][1] + self.margin_px)
633
+ return [SystemRegion(y1, y2, 0, w)]
634
+ sorted_gap_indices = sorted(range(len(gaps)), key=lambda i: gaps[i], reverse=True)
635
+ split_indices = sorted(sorted_gap_indices[:n_splits])
636
+ systems: list[SystemRegion] = []
637
+ sys_start = 0
638
+ for i in split_indices:
639
+ y1 = max(0, groups[sys_start][0] - self.margin_px)
640
+ y2 = min(h, groups[i][1] + self.margin_px)
641
+ systems.append(SystemRegion(y1, y2, 0, w))
642
+ sys_start = i + 1
643
+ y1 = max(0, groups[sys_start][0] - self.margin_px)
644
+ y2 = min(h, groups[-1][1] + self.margin_px)
645
+ systems.append(SystemRegion(y1, y2, 0, w))
646
+ return systems
647
+
648
+ # n_systems ๋ฏธ์ง€์ •: factor ๊ธฐ๋ฐ˜ ์ž๋™ ๊ฒ€์ถœ
649
+ median_gap = float(np.median(gaps))
650
+ thresh = median_gap * self.system_gap_factor
651
+
652
+ systems = []
653
+ sys_start = 0
654
+ for i, gap in enumerate(gaps):
655
+ if gap > thresh:
656
+ y1 = max(0, groups[sys_start][0] - self.margin_px)
657
+ y2 = min(h, groups[i][1] + self.margin_px)
658
+ systems.append(SystemRegion(y1, y2, 0, w))
659
+ sys_start = i + 1
660
+ y1 = max(0, groups[sys_start][0] - self.margin_px)
661
+ y2 = min(h, groups[-1][1] + self.margin_px)
662
+ systems.append(SystemRegion(y1, y2, 0, w))
663
+ return systems
664
+
665
+ def _detect_barlines(self, inv: np.ndarray, sys: SystemRegion) -> list[int]:
666
+ h_sys = sys.y2 - sys.y1
667
+ if h_sys <= 0:
668
+ return []
669
+ roi = inv[sys.y1:sys.y2, sys.x1:sys.x2]
670
+ vert = np.sum(roi // 255, axis=0)
671
+ min_fill = int(h_sys * self.barline_fill_ratio)
672
+ cols = np.where(vert > min_fill)[0]
673
+ if len(cols) == 0:
674
+ return []
675
+
676
+ # ์—ฐ์† ์—ด ๊ทธ๋ฃนํ™” โ†’ ์ค‘์‹ฌ X
677
+ xs: list[int] = []
678
+ grp = [cols[0]]
679
+ for c in cols[1:]:
680
+ if c - grp[-1] <= 4:
681
+ grp.append(c)
682
+ else:
683
+ xs.append(int(np.mean(grp)) + sys.x1)
684
+ grp = [c]
685
+ xs.append(int(np.mean(grp)) + sys.x1)
686
+
687
+ # ์‹œ์Šคํ…œ ์–‘๋ 2% ์ด๋‚ด ์ œ๊ฑฐ
688
+ w_sys = sys.x2 - sys.x1
689
+ margin = max(int(w_sys * 0.02), 5)
690
+ xs = [x for x in xs if sys.x1 + margin < x < sys.x2 - margin]
691
+ return sorted(xs)
692
+
693
+ def _crop_measures(
694
+ self, img: np.ndarray, sys: SystemRegion, barline_xs: list[int]
695
+ ) -> list[np.ndarray]:
696
+ if not barline_xs:
697
+ crop = img[sys.y1:sys.y2, sys.x1:sys.x2]
698
+ return [crop] if crop.size > 0 else []
699
+
700
+ borders = [sys.x1] + barline_xs + [sys.x2]
701
+ crops = []
702
+ for i in range(len(borders) - 1):
703
+ x1, x2 = borders[i], borders[i + 1]
704
+ if x2 - x1 < 20:
705
+ continue
706
+ crop = img[sys.y1:sys.y2, x1:x2]
707
+ if crop.size > 0:
708
+ crops.append(crop)
709
+ return crops
710
+
711
+
712
+ # โ”€โ”€ CandidateGenerator โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
713
+
714
+ class CandidateGenerator:
715
+ """
716
+ MusicXML์˜ ํŠน์ • ๋งˆ๋””์— ์ œํ•œ๋œ ํŽธ์ง‘ ํ›„๋ณด๋ฅผ ์ƒ์„ฑ.
717
+ lxml ๊ธฐ๋ฐ˜ XML ํŒŒ์‹ฑ + ์Œ์•… ์ด๋ก  ์ œ์•ฝ ๊ฒ€์ฆ.
718
+ """
719
+
720
+ MAX_CANDIDATES = 20
721
+
722
+ def __init__(self, xml_path: str):
723
+ if _LXML_OK:
724
+ self._tree = _etree.parse(xml_path)
725
+ self._root = self._tree.getroot()
726
+ else:
727
+ self._tree = _etree.parse(xml_path)
728
+ self._root = self._tree.getroot()
729
+ _strip_ns(self._root)
730
+ self._divisions_cache: dict[int, int] = {}
731
+
732
+ # โ”€โ”€ ํผ๋ธ”๋ฆญ API โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
733
+
734
+ def generate_candidates(
735
+ self, measure_number: int
736
+ ) -> Iterator[tuple[CandidateEdit, str]]:
737
+ """
738
+ ๋งˆ๋”” ๋‚ด ์Œํ‘œ ํ•˜๋‚˜์”ฉ ์ˆ˜์ •ํ•œ ํ›„๋ณด๋ฅผ yield.
739
+ Yields: (CandidateEdit, xml_string)
740
+ """
741
+ measure = self._find_measure(self._root, measure_number)
742
+ if measure is None:
743
+ return
744
+ notes = self._get_notes(measure)
745
+ count = 0
746
+
747
+ for idx, note in enumerate(notes):
748
+ if count >= self.MAX_CANDIDATES:
749
+ break
750
+ is_rest = note.find("rest") is not None
751
+ is_chord_member = note.find("chord") is not None
752
+
753
+ if not is_rest:
754
+ # ์Œ๋†’์ด ยฑ1 ๋ฐ˜์Œ
755
+ for delta, etype in [(+1, "pitch_up1"), (-1, "pitch_down1")]:
756
+ cand_xml = self._apply_pitch_shift(measure_number, idx, delta)
757
+ if cand_xml:
758
+ edit = CandidateEdit(measure_number, idx, etype,
759
+ f"note[{idx}] pitch {'+' if delta>0 else ''}{delta}")
760
+ yield edit, cand_xml
761
+ count += 1
762
+ if count >= self.MAX_CANDIDATES:
763
+ return
764
+
765
+ # ์Œํ‘œ โ†’ ์‰ผํ‘œ
766
+ # chord member ๋˜๋Š” chord head(๋‹ค์Œ note์— <chord>๊ฐ€ ์žˆ์Œ)๋Š” ์ œ์™ธ
767
+ # โ€” MusicXML ๊ตฌ์กฐ ํŒŒ๊ดด๋กœ Verovio ํฌ๋ž˜์‹œ
768
+ next_is_chord = (
769
+ idx + 1 < len(notes)
770
+ and notes[idx + 1].find("chord") is not None
771
+ )
772
+ if not is_chord_member and not next_is_chord:
773
+ cand_xml = self._apply_to_rest(measure_number, idx)
774
+ if cand_xml:
775
+ yield CandidateEdit(measure_number, idx, "to_rest",
776
+ f"note[{idx}] โ†’ rest"), cand_xml
777
+ count += 1
778
+ if count >= self.MAX_CANDIDATES:
779
+ return
780
+
781
+ # ๊ธธ์ด ๋ณ€๊ฒฝ
782
+ for edit, cand_xml in self._duration_candidates(measure_number, idx, note):
783
+ yield edit, cand_xml
784
+ count += 1
785
+ if count >= self.MAX_CANDIDATES:
786
+ return
787
+
788
+ def apply_edit(self, edit: CandidateEdit) -> None:
789
+ """์ฑ„ํƒ๋œ ํŽธ์ง‘์„ ๋‚ด๋ถ€ root์— ์˜๊ตฌ ์ ์šฉ."""
790
+ if edit.edit_type in ("pitch_up1", "pitch_down1"):
791
+ delta = +1 if edit.edit_type == "pitch_up1" else -1
792
+ self._apply_pitch_shift_inplace(
793
+ self._root, edit.measure_number, edit.note_index, delta
794
+ )
795
+ elif edit.edit_type == "to_rest":
796
+ self._apply_to_rest_inplace(
797
+ self._root, edit.measure_number, edit.note_index
798
+ )
799
+ elif edit.edit_type in ("dur_x2", "dur_half", "dot_add", "dot_remove"):
800
+ self._apply_duration_inplace(
801
+ self._root, edit.measure_number, edit.note_index, edit.edit_type
802
+ )
803
+
804
+ def to_xml_string(self) -> str:
805
+ """ํ˜„์žฌ XML์„ ๋ฌธ์ž์—ด๋กœ ๋ฐ˜ํ™˜."""
806
+ if _LXML_OK:
807
+ return _etree.tostring(
808
+ self._root, encoding="unicode", xml_declaration=False
809
+ )
810
+ else:
811
+ return _etree.tostring(self._root, encoding="unicode")
812
+
813
+ def save(self, out_path: str) -> None:
814
+ """ํ˜„์žฌ XML์„ ํŒŒ์ผ๋กœ ์ €์žฅ."""
815
+ xml_str = self.to_xml_string()
816
+ Path(out_path).write_text(xml_str, encoding="utf-8")
817
+
818
+ # โ”€โ”€ ๋‚ด๋ถ€ ํ—ฌํผ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
819
+
820
+ def _find_measure(self, root, number: int):
821
+ """์ฃผ์–ด์ง„ ๋ฒˆํ˜ธ์˜ ๋งˆ๋”” ์ค‘ ์Œํ‘œ(non-rest note)๊ฐ€ ๊ฐ€์žฅ ๋งŽ์€ ํŒŒํŠธ์˜ ๋งˆ๋”” ๋ฐ˜ํ™˜.
822
+ ๋‹ค์„ฑ๋ถ€ ์•…๋ณด์—์„œ ์ฒซ ํŒŒํŠธ๊ฐ€ ์‰ผํ‘œ๋งŒ ์žˆ์„ ๋•Œ ํ›„๋ณด๊ฐ€ ์ƒ์„ฑ๋˜์ง€ ์•Š๋Š” ๋ฌธ์ œ ๋ฐฉ์ง€."""
823
+ candidates = []
824
+ for m in root.iter("measure"):
825
+ try:
826
+ if int(m.get("number", 0)) == number:
827
+ note_count = sum(
828
+ 1 for n in m.iter("note")
829
+ if n.find("rest") is None
830
+ )
831
+ candidates.append((note_count, m))
832
+ except (ValueError, TypeError):
833
+ pass
834
+ if candidates:
835
+ candidates.sort(key=lambda x: x[0], reverse=True)
836
+ return candidates[0][1]
837
+ # fallback: ์ˆœ์„œ ๊ธฐ๋ฐ˜
838
+ measures = list(root.iter("measure"))
839
+ if 1 <= number <= len(measures):
840
+ return measures[number - 1]
841
+ return None
842
+
843
+ def _get_notes(self, measure) -> list:
844
+ return list(measure.iter("note"))
845
+
846
+ def _get_divisions(self, measure_number: int) -> int:
847
+ if measure_number in self._divisions_cache:
848
+ return self._divisions_cache[measure_number]
849
+ for m in self._root.iter("measure"):
850
+ try:
851
+ n = int(m.get("number", 0))
852
+ except (ValueError, TypeError):
853
+ n = 0
854
+ div_el = m.find(".//divisions")
855
+ if div_el is not None and div_el.text:
856
+ try:
857
+ d = int(div_el.text)
858
+ self._divisions_cache[n] = d
859
+ except ValueError:
860
+ pass
861
+ return self._divisions_cache.get(measure_number, 1)
862
+
863
+ def _expected_duration(self, measure_number: int) -> Optional[int]:
864
+ """๋ฐ•์žํ‘œ ๊ธฐ๋ฐ˜ ์˜ˆ์ƒ ๋งˆ๋”” ์ด duration (divisions ๋‹จ์œ„)."""
865
+ divisions = self._get_divisions(measure_number)
866
+ # ๋ฐ•์žํ‘œ ํƒ์ƒ‰
867
+ beats, beat_type = 4, 4
868
+ for m in self._root.iter("measure"):
869
+ try:
870
+ n = int(m.get("number", 0))
871
+ except (ValueError, TypeError):
872
+ n = 0
873
+ ts = m.find(".//time")
874
+ if ts is not None:
875
+ b = ts.find("beats")
876
+ bt = ts.find("beat-type")
877
+ if b is not None and bt is not None:
878
+ try:
879
+ beats = int(b.text)
880
+ beat_type = int(bt.text)
881
+ except (ValueError, TypeError):
882
+ pass
883
+ if n >= measure_number:
884
+ break
885
+ # 4๋ถ„์Œํ‘œ 1๋ฐ• = divisions ๋‹จ์œ„
886
+ quarter_dur = divisions
887
+ beat_dur = quarter_dur * 4 // beat_type
888
+ return beats * beat_dur
889
+
890
+ def _measure_actual_duration(self, measure) -> int:
891
+ total = 0
892
+ for note in measure.iter("note"):
893
+ if note.find("chord") is not None:
894
+ continue
895
+ dur_el = note.find("duration")
896
+ if dur_el is not None and dur_el.text:
897
+ try:
898
+ total += int(dur_el.text)
899
+ except ValueError:
900
+ pass
901
+ return total
902
+
903
+ def _validate_measure(self, root, measure_number: int) -> bool:
904
+ """์ˆ˜์ • ํ›„ ๋งˆ๋”” ๊ธธ์ด๊ฐ€ ๋ฐ•์žํ‘œ์™€ ๋งž๋Š”์ง€ ๊ฒ€์ฆ."""
905
+ m = self._find_measure(root, measure_number)
906
+ if m is None:
907
+ return False
908
+ expected = self._expected_duration(measure_number)
909
+ if expected is None:
910
+ return True
911
+ actual = self._measure_actual_duration(m)
912
+ return actual == expected
913
+
914
+ # โ”€โ”€ ํŽธ์ง‘ ์ƒ์„ฑ (deepcopy โ†’ xml string) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
915
+
916
+ def _apply_pitch_shift(
917
+ self, measure_number: int, note_idx: int, delta: int
918
+ ) -> Optional[str]:
919
+ new_root = deepcopy(self._root)
920
+ if not self._apply_pitch_shift_inplace(new_root, measure_number, note_idx, delta):
921
+ return None
922
+ if _LXML_OK:
923
+ return _etree.tostring(new_root, encoding="unicode")
924
+ return _etree.tostring(new_root, encoding="unicode")
925
+
926
+ def _apply_pitch_shift_inplace(self, root, measure_number, note_idx, delta) -> bool:
927
+ m = self._find_measure(root, measure_number)
928
+ if m is None:
929
+ return False
930
+ notes = self._get_notes(m)
931
+ if note_idx >= len(notes):
932
+ return False
933
+ note = notes[note_idx]
934
+ pitch = note.find("pitch")
935
+ if pitch is None:
936
+ return False
937
+ step_el = pitch.find("step")
938
+ alter_el = pitch.find("alter")
939
+ octave_el = pitch.find("octave")
940
+ if step_el is None or octave_el is None:
941
+ return False
942
+ step = step_el.text or "C"
943
+ alter = int(float(alter_el.text)) if alter_el is not None and alter_el.text else 0
944
+ octave = int(octave_el.text or "4")
945
+ ns, na, no = _shift_pitch(step, alter, octave, delta)
946
+ step_el.text = ns
947
+ octave_el.text = str(no)
948
+ if alter_el is not None:
949
+ alter_el.text = str(na)
950
+ elif na != 0:
951
+ alter_el_new = _etree.SubElement(pitch, "alter")
952
+ alter_el_new.text = str(na)
953
+ return True
954
+
955
+ def _apply_to_rest(self, measure_number: int, note_idx: int) -> Optional[str]:
956
+ new_root = deepcopy(self._root)
957
+ m = self._find_measure(new_root, measure_number)
958
+ if m is None:
959
+ return None
960
+ notes = self._get_notes(m)
961
+ if note_idx >= len(notes):
962
+ return None
963
+ note = notes[note_idx]
964
+ pitch = note.find("pitch")
965
+ if pitch is not None:
966
+ note.remove(pitch)
967
+ if note.find("rest") is None:
968
+ rest_el = _etree.SubElement(note, "rest")
969
+ # rest๋ฅผ pitch ์œ„์น˜์— ์‚ฝ์ž… (์ฒซ ๋ฒˆ์งธ ์ž์‹์œผ๋กœ)
970
+ note.remove(rest_el)
971
+ note.insert(0, rest_el)
972
+ # tie ์ œ๊ฑฐ
973
+ for tie in note.findall("tie"):
974
+ note.remove(tie)
975
+ if _LXML_OK:
976
+ return _etree.tostring(new_root, encoding="unicode")
977
+ return _etree.tostring(new_root, encoding="unicode")
978
+
979
+ def _apply_to_rest_inplace(self, root, measure_number, note_idx) -> None:
980
+ m = self._find_measure(root, measure_number)
981
+ if m is None:
982
+ return
983
+ notes = self._get_notes(m)
984
+ if note_idx >= len(notes):
985
+ return
986
+ note = notes[note_idx]
987
+ pitch = note.find("pitch")
988
+ if pitch is not None:
989
+ note.remove(pitch)
990
+ if note.find("rest") is None:
991
+ rest_el = _etree.SubElement(note, "rest")
992
+ note.remove(rest_el)
993
+ note.insert(0, rest_el)
994
+ for tie in note.findall("tie"):
995
+ note.remove(tie)
996
+
997
+ def _duration_candidates(
998
+ self, measure_number: int, note_idx: int, note
999
+ ) -> list[tuple[CandidateEdit, str]]:
1000
+ results = []
1001
+ type_el = note.find("type")
1002
+ dur_el = note.find("duration")
1003
+ dot_el = note.find("dot")
1004
+ if type_el is None or dur_el is None:
1005
+ return results
1006
+ cur_type = type_el.text or "quarter"
1007
+ try:
1008
+ cur_dur = int(dur_el.text)
1009
+ except (ValueError, TypeError):
1010
+ return results
1011
+
1012
+ cur_idx = _TYPE_ORDER.index(cur_type) if cur_type in _TYPE_ORDER else -1
1013
+
1014
+ # dur_x2 (ํ•œ ๋‹จ๊ณ„ ์˜ฌ๋ฆผ)
1015
+ if cur_idx >= 0 and cur_idx + 1 < len(_TYPE_ORDER):
1016
+ cand = self._make_duration_candidate(
1017
+ measure_number, note_idx, "dur_x2", cur_dur * 2,
1018
+ _TYPE_ORDER[cur_idx + 1], remove_dot=True
1019
+ )
1020
+ if cand:
1021
+ results.append((
1022
+ CandidateEdit(measure_number, note_idx, "dur_x2",
1023
+ f"note[{note_idx}] {cur_type}โ†’{_TYPE_ORDER[cur_idx+1]}"),
1024
+ cand
1025
+ ))
1026
+
1027
+ # dur_half (ํ•œ ๋‹จ๊ณ„ ๋‚ด๋ฆผ)
1028
+ if cur_idx > 0 and cur_dur // 2 > 0:
1029
+ cand = self._make_duration_candidate(
1030
+ measure_number, note_idx, "dur_half", cur_dur // 2,
1031
+ _TYPE_ORDER[cur_idx - 1], remove_dot=True
1032
+ )
1033
+ if cand:
1034
+ results.append((
1035
+ CandidateEdit(measure_number, note_idx, "dur_half",
1036
+ f"note[{note_idx}] {cur_type}โ†’{_TYPE_ORDER[cur_idx-1]}"),
1037
+ cand
1038
+ ))
1039
+
1040
+ # dot_add (์ ์Œํ‘œ ์ถ”๊ฐ€, ํ˜„์žฌ dot ์—†์„ ๋•Œ)
1041
+ if dot_el is None:
1042
+ new_dur = int(cur_dur * 1.5)
1043
+ cand = self._make_duration_candidate(
1044
+ measure_number, note_idx, "dot_add", new_dur, cur_type, add_dot=True
1045
+ )
1046
+ if cand:
1047
+ results.append((
1048
+ CandidateEdit(measure_number, note_idx, "dot_add",
1049
+ f"note[{note_idx}] +dot"),
1050
+ cand
1051
+ ))
1052
+
1053
+ # dot_remove (์ ์Œํ‘œ ์ œ๊ฑฐ, ํ˜„์žฌ dot ์žˆ์„ ๋•Œ)
1054
+ if dot_el is not None:
1055
+ new_dur = int(cur_dur / 1.5)
1056
+ cand = self._make_duration_candidate(
1057
+ measure_number, note_idx, "dot_remove", new_dur, cur_type, remove_dot=True
1058
+ )
1059
+ if cand:
1060
+ results.append((
1061
+ CandidateEdit(measure_number, note_idx, "dot_remove",
1062
+ f"note[{note_idx}] -dot"),
1063
+ cand
1064
+ ))
1065
+
1066
+ return results
1067
+
1068
+ def _make_duration_candidate(
1069
+ self,
1070
+ measure_number: int,
1071
+ note_idx: int,
1072
+ edit_type: str,
1073
+ new_dur: int,
1074
+ new_type: str,
1075
+ add_dot: bool = False,
1076
+ remove_dot: bool = False,
1077
+ ) -> Optional[str]:
1078
+ new_root = deepcopy(self._root)
1079
+ self._apply_duration_inplace(
1080
+ new_root, measure_number, note_idx, edit_type,
1081
+ new_dur=new_dur, new_type=new_type,
1082
+ add_dot=add_dot, remove_dot=remove_dot
1083
+ )
1084
+ if not self._validate_measure(new_root, measure_number):
1085
+ return None
1086
+ if _LXML_OK:
1087
+ return _etree.tostring(new_root, encoding="unicode")
1088
+ return _etree.tostring(new_root, encoding="unicode")
1089
+
1090
+ def _apply_duration_inplace(
1091
+ self, root, measure_number, note_idx, edit_type,
1092
+ new_dur=None, new_type=None, add_dot=False, remove_dot=False
1093
+ ) -> None:
1094
+ m = self._find_measure(root, measure_number)
1095
+ if m is None:
1096
+ return
1097
+ notes = self._get_notes(m)
1098
+ if note_idx >= len(notes):
1099
+ return
1100
+ note = notes[note_idx]
1101
+ dur_el = note.find("duration")
1102
+ type_el = note.find("type")
1103
+ dot_el = note.find("dot")
1104
+
1105
+ if dur_el is None or type_el is None:
1106
+ return
1107
+
1108
+ cur_type = type_el.text or "quarter"
1109
+ try:
1110
+ cur_dur = int(dur_el.text)
1111
+ except (ValueError, TypeError):
1112
+ return
1113
+
1114
+ cur_idx = _TYPE_ORDER.index(cur_type) if cur_type in _TYPE_ORDER else -1
1115
+
1116
+ if edit_type == "dur_x2" and new_dur is None:
1117
+ if cur_idx + 1 < len(_TYPE_ORDER):
1118
+ new_dur = cur_dur * 2
1119
+ new_type = _TYPE_ORDER[cur_idx + 1]
1120
+ else:
1121
+ return
1122
+ elif edit_type == "dur_half" and new_dur is None:
1123
+ if cur_idx > 0 and cur_dur // 2 > 0:
1124
+ new_dur = cur_dur // 2
1125
+ new_type = _TYPE_ORDER[cur_idx - 1]
1126
+ else:
1127
+ return
1128
+ elif edit_type == "dot_add" and new_dur is None:
1129
+ new_dur = int(cur_dur * 1.5)
1130
+ new_type = cur_type
1131
+ add_dot = True
1132
+ elif edit_type == "dot_remove" and new_dur is None:
1133
+ new_dur = int(cur_dur / 1.5)
1134
+ new_type = cur_type
1135
+ remove_dot = True
1136
+
1137
+ if new_dur is not None:
1138
+ dur_el.text = str(new_dur)
1139
+ if new_type is not None:
1140
+ type_el.text = new_type
1141
+ if add_dot and dot_el is None:
1142
+ _etree.SubElement(note, "dot")
1143
+ if remove_dot and dot_el is not None:
1144
+ note.remove(dot_el)
1145
+
1146
+
1147
+ # โ”€โ”€ XMLCorrector โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1148
+
1149
+ class XMLCorrector:
1150
+ """
1151
+ ๋‹จ์ผ MXL ํŒŒ์ผ์— ๋Œ€ํ•œ ์ „์ฒด ๊ต์ • ๋ฃจํ”„.
1152
+ """
1153
+
1154
+ SSIM_SUSPECT = 0.75
1155
+ SSIM_IMPROVE = 0.02
1156
+ MIN_CROP_SIZE = 32 # SSIM ๊ณ„์‚ฐ ์ตœ์†Œ ํฌ๊ธฐ
1157
+
1158
+ def __init__(
1159
+ self,
1160
+ ssim_threshold: float = SSIM_SUSPECT,
1161
+ ssim_min_improvement: float = SSIM_IMPROVE,
1162
+ render_scale: int = 70,
1163
+ ):
1164
+ _check_deps()
1165
+ self.ssim_threshold = ssim_threshold
1166
+ self.ssim_min_improvement = ssim_min_improvement
1167
+ self._renderer = MXLRenderer(scale=render_scale)
1168
+ # ์›๋ณธ ์Šค์บ” ์ด๋ฏธ์ง€์šฉ segmenter:
1169
+ # barline_fill_ratio=0.85 โ€” ํ”ผ์•„๋…ธ ๋Œ€๋ณด์˜ ์‹ค์ œ ๋งˆ๋””์„ ๋งŒ ๊ฒ€์ถœ (ํด๋ ˆํ”„/๋ธŒ๋ ˆ์ด์Šค ์ œ์™ธ)
1170
+ # system_gap_factor=9.0 โ€” ๋Œ€๋ณด ๋‚ด๋ถ€ treble/bass ๊ฐ„๊ฒฉ vs ์‹œ์Šคํ…œ ๊ฐ„๊ฒฉ ๊ตฌ๋ถ„
1171
+ self._segmenter = ImageSegmenter(
1172
+ barline_fill_ratio=0.85,
1173
+ system_gap_factor=9.0,
1174
+ )
1175
+
1176
+ def correct(
1177
+ self,
1178
+ mxl_path: str,
1179
+ orig_png_path: str,
1180
+ out_xml_path: str,
1181
+ debug_dir: Optional[str] = None,
1182
+ ) -> CorrectionResult:
1183
+ """
1184
+ mxl_path: Audiveris ์ถœ๋ ฅ MXL
1185
+ orig_png_path: ์›๋ณธ ์•…๋ณด PNG (์ „์ฒ˜๋ฆฌ ์ด์ „)
1186
+ out_xml_path: ๊ต์ •๋œ XML ์ €์žฅ ๊ฒฝ๋กœ
1187
+ debug_dir: ์ง€์ • ์‹œ ๋งˆ๋””๋ณ„ ๋น„๊ต ์ด๋ฏธ์ง€(์ขŒ=๋ Œ๋”, ์šฐ=์›๋ณธ) ์ €์žฅ
1188
+ """
1189
+ result = CorrectionResult(xml_path=out_xml_path)
1190
+
1191
+ # โ”€โ”€ 1. ๋ Œ๋”๋ง: SVG bbox ๊ธฐ๋ฐ˜์œผ๋กœ ์‹œ์Šคํ…œ๋ณ„ ๋งˆ๋”” ์ถ”์ถœ โ”€โ”€โ”€
1192
+ self._renderer.load(mxl_path)
1193
+ try:
1194
+ rendered_by_sys = self._renderer.get_measure_data_by_system(1)
1195
+ except Exception as e:
1196
+ result.warnings.append(f"๋ Œ๋”๋ง ๋งˆ๋”” ์ถ”์ถœ ์‹คํŒจ: {e}")
1197
+ Path(out_xml_path).write_text(_extract_xml_from_mxl(mxl_path), encoding="utf-8")
1198
+ return result
1199
+
1200
+ # โ”€โ”€ 2. ์›๋ณธ PNG: OpenCV ์‹œ์Šคํ…œ๋ณ„ ๋งˆ๋”” ์ถ”์ถœ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1201
+ # n_systems ์—†์ด factor ์ž๋™ ๊ฒ€์ถœ โ€” ์›๋ณธ ์Šค์บ”์˜ ์‹ค์ œ ์‹œ์Šคํ…œ ์ˆ˜๋ฅผ ๊ฒ€์ถœ
1202
+ try:
1203
+ orig_by_sys = self._segmenter.get_measures_by_system(orig_png_path)
1204
+ except Exception as e:
1205
+ result.warnings.append(f"์›๋ณธ PNG ๋งˆ๋”” ์ถ”์ถœ ์‹คํŒจ: {e}")
1206
+ Path(out_xml_path).write_text(_extract_xml_from_mxl(mxl_path), encoding="utf-8")
1207
+ return result
1208
+
1209
+ if not rendered_by_sys or not orig_by_sys:
1210
+ result.warnings.append("๋งˆ๋”” ๋ถ„ํ•  ์‹คํŒจ โ€” ๊ต์ • ์ƒ๋žต")
1211
+ Path(out_xml_path).write_text(_extract_xml_from_mxl(mxl_path), encoding="utf-8")
1212
+ return result
1213
+
1214
+ if debug_dir:
1215
+ Path(debug_dir).mkdir(parents=True, exist_ok=True)
1216
+
1217
+ # โ”€โ”€ 3. XML ๋กœ๋“œ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1218
+ with tempfile.NamedTemporaryFile(
1219
+ suffix=".xml", delete=False, mode="w", encoding="utf-8"
1220
+ ) as tf:
1221
+ tf.write(_extract_xml_from_mxl(mxl_path))
1222
+ tmp_xml = tf.name
1223
+
1224
+ generator = CandidateGenerator(tmp_xml)
1225
+
1226
+ # โ”€โ”€ 4. Flatten: xml_number ์ˆœ์„œ ร— ์›๋ณธ ๋ฌธ์„œ ์ˆœ์„œ 1:1 ๋งค์นญ โ”€โ”€
1227
+ # ๋ Œ๋”๋ง ๋ ˆ์ด์•„์›ƒ๊ณผ ์›๋ณธ ์Šค์บ” ๋ ˆ์ด์•„์›ƒ์ด ๋‹ฌ๋ผ๋„ ๊ฐ์ž์˜ ์ˆœ์„œ๊ฐ€ ๊ฐ™์œผ๋ฉด ๋งž์Œ.
1228
+ # rendered_flat: xml_number ์˜ค๋ฆ„์ฐจ์ˆœ ์ •๋ ฌ (SVG ๊ตฌ์กฐ ๋ณด์žฅ)
1229
+ # orig_flat : ์‹œ์Šคํ…œ ์ˆœ์„œ โ†’ ๋งˆ๋”” ์ˆœ์„œ (top-left โ†’ bottom-right)
1230
+ def _flatten_rendered(by_sys):
1231
+ flat = []
1232
+ for sys_list in by_sys:
1233
+ flat.extend(sys_list)
1234
+ flat.sort(key=lambda m: m["xml_number"])
1235
+ return flat
1236
+
1237
+ orig_flat: list[np.ndarray] = []
1238
+ for sys_list in orig_by_sys:
1239
+ orig_flat.extend(sys_list)
1240
+
1241
+ if not orig_flat:
1242
+ result.warnings.append("๋น„๊ตํ•  ๋งˆ๋”” ์—†์Œ")
1243
+ generator.save(out_xml_path)
1244
+ try:
1245
+ os.unlink(tmp_xml)
1246
+ except OSError:
1247
+ pass
1248
+ return result
1249
+
1250
+ result.warnings.append(
1251
+ f"orig ๋งˆ๋”” ์ˆ˜: {len(orig_flat)}"
1252
+ )
1253
+
1254
+ # โ”€โ”€ 5. DP alignment ๊ธฐ๋ฐ˜ ๋ผ์šด๋“œ ๋ฃจํ”„ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1255
+ MAX_ROUNDS = 3
1256
+ for _round in range(MAX_ROUNDS):
1257
+ rendered_by_sys = self._renderer.get_measure_data_by_system(1)
1258
+ rendered_flat = _flatten_rendered(rendered_by_sys)
1259
+
1260
+ result.warnings.append(
1261
+ f"[round {_round+1}] rendered={len(rendered_flat)}, "
1262
+ f"orig={len(orig_flat)}"
1263
+ )
1264
+
1265
+ pairs = self._align_flat(rendered_flat, orig_flat)
1266
+ if not pairs:
1267
+ break
1268
+
1269
+ edits_this_round = 0
1270
+
1271
+ for r_idx, o_idx in pairs:
1272
+ r_entry = rendered_flat[r_idx]
1273
+ o_crop = orig_flat[o_idx]
1274
+ xml_number = r_entry["xml_number"]
1275
+ r_crop = r_entry["crop"]
1276
+
1277
+ split_log: list[str] = []
1278
+ base_score, proc_r, proc_o = self._measure_score(
1279
+ r_crop, o_crop, warnings_out=split_log)
1280
+
1281
+ result.warnings.append(
1282
+ f" r{r_idx+1}โ†”o{o_idx+1} xml#{xml_number} "
1283
+ f"r={r_crop.shape[:2]} o={o_crop.shape[:2]} "
1284
+ f"score={base_score:.3f}"
1285
+ + (f" [{'; '.join(s.strip() for s in split_log)}]" if split_log else "")
1286
+ )
1287
+ if debug_dir:
1288
+ _save_debug_pair(debug_dir, _round, r_idx,
1289
+ xml_number, base_score,
1290
+ r_crop, o_crop, proc_r, proc_o)
1291
+
1292
+ if base_score >= self.ssim_threshold:
1293
+ continue
1294
+
1295
+ result.measures_checked += 1
1296
+ best_score = base_score
1297
+ best_edit: Optional[CandidateEdit] = None
1298
+ best_cand_crop: Optional[np.ndarray] = None
1299
+
1300
+ cand_scores: list[str] = []
1301
+ for edit, cand_xml_str in generator.generate_candidates(xml_number):
1302
+ cand_by_sys = self._renderer.render_candidate_measures(
1303
+ cand_xml_str, 1)
1304
+ if cand_by_sys is None:
1305
+ result.warnings.append(
1306
+ f" xml#{xml_number} ํ›„๋ณด ๋ Œ๋” ์‹คํŒจ"
1307
+ )
1308
+ continue
1309
+ cand_flat = _flatten_rendered(cand_by_sys)
1310
+ if r_idx >= len(cand_flat):
1311
+ result.warnings.append(
1312
+ f" xml#{xml_number} ํ›„๋ณด r_idx={r_idx} "
1313
+ f"๋ฒ”์œ„ ์ดˆ๊ณผ (cand_flat={len(cand_flat)})"
1314
+ )
1315
+ continue
1316
+ cand_crop = cand_flat[r_idx]["crop"]
1317
+ score, _, _ = self._measure_score(cand_crop, o_crop)
1318
+ cand_scores.append(f"{edit.edit_type}={score:.3f}")
1319
+
1320
+ improvement = score - base_score
1321
+ relative = improvement / max(1e-6, 1.0 - base_score)
1322
+ if improvement >= 0.005 and relative >= 0.015:
1323
+ if score > best_score:
1324
+ best_score = score
1325
+ best_edit = edit
1326
+ best_cand_crop = cand_crop
1327
+ result.warnings.append(
1328
+ f" xml#{xml_number} ํ›„๋ณด์ ์ˆ˜: "
1329
+ + (", ".join(cand_scores) if cand_scores else "ํ›„๋ณด์—†์Œ")
1330
+ )
1331
+
1332
+ if best_edit is not None:
1333
+ generator.apply_edit(best_edit)
1334
+ result.measures_corrected += 1
1335
+ edits_this_round += 1
1336
+ current_xml_str = generator.to_xml_string()
1337
+ self._renderer.load_xml_string(current_xml_str)
1338
+ # ์ด๋ฒˆ ๋ผ์šด๋“œ ๋‚จ์€ ๋งˆ๋””๋Š” ๊ฐฑ์‹ ๋œ ๋ Œ๋” ๊ธฐ์ค€์œผ๋กœ ํ‰๊ฐ€
1339
+ rendered_by_sys = self._renderer.get_measure_data_by_system(1)
1340
+ rendered_flat = _flatten_rendered(rendered_by_sys)
1341
+ if debug_dir and best_cand_crop is not None:
1342
+ _save_debug_pair(debug_dir, _round, r_idx,
1343
+ xml_number, best_score,
1344
+ best_cand_crop, o_crop,
1345
+ best_cand=best_cand_crop)
1346
+ else:
1347
+ result.warnings.append(
1348
+ f" xml#{xml_number}: score={base_score:.3f}, "
1349
+ f"๊ต์ • ํ›„๋ณด ์—†์Œ"
1350
+ )
1351
+ result.measures_fallback += 1
1352
+
1353
+ if edits_this_round == 0:
1354
+ break # ๋” ๊ฐœ์„  ์—†์Œ โ†’ ์กฐ๊ธฐ ์ข…๋ฃŒ
1355
+
1356
+ # โ”€โ”€ 5. ๊ต์ •๋œ XML ์ €์žฅ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1357
+ generator.save(out_xml_path)
1358
+
1359
+ try:
1360
+ os.unlink(tmp_xml)
1361
+ except OSError:
1362
+ pass
1363
+
1364
+ return result
1365
+
1366
+ # โ”€โ”€ ๋น„๊ต ์—”์ง„ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1367
+
1368
+ @staticmethod
1369
+ def _prepare_compare_image(img: np.ndarray) -> np.ndarray:
1370
+ """BGR ์ด๋ฏธ์ง€ โ†’ ์Œํ‘œ ๊ณจ๊ฒฉ๋งŒ ๋‚จ๊ธด binary (๊ฒ€=์Œํ‘œ).
1371
+ ์›Œํ„ฐ๋งˆํฌยท๊ฐ€์‚ฌยท์–ผ๋ฃฉ์„ ์ค„์ด๊ณ  threshold 200์œผ๋กœ ์ด์ง„ํ™”."""
1372
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if img.ndim == 3 else img
1373
+ gray = cv2.GaussianBlur(gray, (3, 3), 0)
1374
+ _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
1375
+ inv = cv2.bitwise_not(binary) # ๊ฒ€=์Œํ‘œ
1376
+ # ์ž‘์€ ๋…ธ์ด์ฆˆ ์ œ๊ฑฐ (area < 20 px)
1377
+ num_labels, labels, stats, _ = cv2.connectedComponentsWithStats(inv, 8)
1378
+ cleaned = np.zeros_like(inv)
1379
+ for i in range(1, num_labels):
1380
+ if stats[i, cv2.CC_STAT_AREA] >= 20:
1381
+ cleaned[labels == i] = 255
1382
+ return cleaned
1383
+
1384
+ def _score_pair(self, proc_a: np.ndarray, proc_b: np.ndarray) -> float:
1385
+ """์ „์ฒ˜๋ฆฌ๋œ binary ์ด๋ฏธ์ง€ ๋‘ ์žฅ์˜ ๋ณตํ•ฉ ์œ ์‚ฌ๋„ [0, 1].
1386
+ masked SSIM 35% + x-ํˆฌ์˜ 25% + y-ํˆฌ์˜ 25% + barline IoU 10% + ์—ฌ์œ  5%."""
1387
+ try:
1388
+ h = max(self.MIN_CROP_SIZE, min(proc_a.shape[0], proc_b.shape[0]))
1389
+ w = max(self.MIN_CROP_SIZE, min(proc_a.shape[1], proc_b.shape[1]))
1390
+ a = cv2.resize(proc_a, (w, h)).astype(np.float32) / 255.0
1391
+ b = cv2.resize(proc_b, (w, h)).astype(np.float32) / 255.0
1392
+
1393
+ # A. masked SSIM (binary ์ด๋ฏธ์ง€)
1394
+ a8 = (a * 255).astype(np.uint8)
1395
+ b8 = (b * 255).astype(np.uint8)
1396
+ masked_ssim = float(_ssim(a8, b8))
1397
+
1398
+ # B. x-ํˆฌ์˜ ์ƒ๊ด€ (์Œํ‘œ ๊ตฐ์ง‘ ์ˆ˜ํ‰ ์œ„์น˜)
1399
+ xp_a = a.sum(axis=0); xp_b = b.sum(axis=0)
1400
+ xp_a /= (xp_a.max() + 1e-6); xp_b /= (xp_b.max() + 1e-6)
1401
+ x_score = float(np.nan_to_num(np.corrcoef(xp_a, xp_b)[0, 1]))
1402
+ x_score = (x_score + 1.0) / 2.0 # [-1,1] โ†’ [0,1]
1403
+
1404
+ # C. y-ํˆฌ์˜ ์ƒ๊ด€ (์Œ๋†’์ด ๋Œ€์—ญ)
1405
+ yp_a = a.sum(axis=1); yp_b = b.sum(axis=1)
1406
+ yp_a /= (yp_a.max() + 1e-6); yp_b /= (yp_b.max() + 1e-6)
1407
+ y_score = float(np.nan_to_num(np.corrcoef(yp_a, yp_b)[0, 1]))
1408
+ y_score = (y_score + 1.0) / 2.0
1409
+
1410
+ # D. barline ์œ„์น˜ ์ •๋ ฌ (IoU)
1411
+ bars_a = set(np.where(a.sum(axis=0) / (h + 1e-6) > 0.5)[0])
1412
+ bars_b = set(np.where(b.sum(axis=0) / (h + 1e-6) > 0.5)[0])
1413
+ if bars_a or bars_b:
1414
+ bar_score = len(bars_a & bars_b) / len(bars_a | bars_b)
1415
+ else:
1416
+ bar_score = 1.0
1417
+
1418
+ return (0.35 * masked_ssim
1419
+ + 0.25 * x_score
1420
+ + 0.25 * y_score
1421
+ + 0.10 * bar_score
1422
+ + 0.05 * masked_ssim) # ํ•ฉ๊ณ„ 1.0 (SSIM ๋น„์ค‘ ๋ณด์ •)
1423
+ except Exception:
1424
+ return 0.0
1425
+
1426
+ def _split_staff_bands(
1427
+ self, proc: np.ndarray
1428
+ ) -> "tuple[np.ndarray, np.ndarray] | None":
1429
+ """binary (๊ฒ€=์Œํ‘œ) ์ด๋ฏธ์ง€์—์„œ ์ƒยทํ•˜ ๋ณดํ‘œ ์‚ฌ์ด ์ˆ˜ํ‰ ๊ณต๋ฐฑ์„ ์ฐพ์•„ ๋ถ„๋ฆฌ.
1430
+ ํ”ผ์•„๋…ธ ๋Œ€๋ณด ์ „์šฉ ํœด๋ฆฌ์Šคํ‹ฑ. ๋ฐ˜ํ™˜: (upper_proc, lower_proc) ๋˜๋Š” None."""
1431
+ h = proc.shape[0]
1432
+ lo, hi = int(h * 0.3), int(h * 0.7)
1433
+ row_fill = proc[lo:hi].sum(axis=1) / (proc.shape[1] * 255.0 + 1e-6)
1434
+ gap_mask = (row_fill < 0.04).tolist()
1435
+
1436
+ # run-length โ†’ ๊ฐ€์žฅ ๊ธด ์—ฐ์† True ๊ตฌ๊ฐ„
1437
+ best_start, best_len = 0, 0
1438
+ cur_start, cur_len = 0, 0
1439
+ for k, val in enumerate(gap_mask):
1440
+ if val:
1441
+ if cur_len == 0:
1442
+ cur_start = k
1443
+ cur_len += 1
1444
+ if cur_len > best_len:
1445
+ best_len = cur_len
1446
+ best_start = cur_start
1447
+ else:
1448
+ cur_len = 0
1449
+
1450
+ if best_len < 3: # 3ํ–‰ ์ด์ƒ ๊ณต๋ฐฑ์ด์–ด์•ผ ๋ณดํ‘œ ๊ตฌ๋ถ„
1451
+ return None
1452
+ split_y = lo + best_start + best_len // 2
1453
+ return proc[:split_y], proc[split_y:]
1454
+
1455
+ @staticmethod
1456
+ def _detect_staff_lines(
1457
+ proc: np.ndarray,
1458
+ diag: "list[str] | None" = None,
1459
+ ) -> "list[StaffBand]":
1460
+ """binary (๊ฒ€=์Œํ‘œ) crop์—์„œ 5์„  ๊ทธ๋ฃน์„ ๊ฒ€์ถœ.
1461
+ ๋ฐ˜ํ™˜: list of StaffBand(y1, y2, spacing_px). ๊ฒ€์ถœ ์‹คํŒจ ์‹œ [].
1462
+ diag: ๋””๋ฒ„๊ทธ ๋ฌธ์ž์—ด ์ˆ˜์ง‘์šฉ (None ์ด๋ฉด ์ƒ๋žต)."""
1463
+ from scipy.signal import find_peaks
1464
+ h, w = proc.shape[:2]
1465
+ row_sums = proc.sum(axis=1).astype(np.float32)
1466
+ threshold = w * 255 * 0.15 # ๋„ˆ๋น„์˜ 15% ์ด์ƒ ์ฑ„์›Œ์ง„ ํ–‰
1467
+ peaks, _ = find_peaks(row_sums, height=threshold, distance=2)
1468
+ if diag is not None:
1469
+ diag.append(f"peaks={len(peaks)} thr={threshold:.0f} img={h}x{w}")
1470
+ if len(peaks) < 5:
1471
+ return []
1472
+
1473
+ gaps = np.diff(peaks)
1474
+ median_gap = float(np.median(gaps))
1475
+ if diag is not None:
1476
+ diag.append(f"median_gap={median_gap:.1f}")
1477
+ if median_gap < 1.0:
1478
+ return []
1479
+
1480
+ bands: list[StaffBand] = []
1481
+ i = 0
1482
+ while i <= len(peaks) - 5:
1483
+ group = peaks[i:i + 5]
1484
+ g = np.diff(group)
1485
+ if g.max() < median_gap * 1.5 and g.min() > median_gap * 0.5:
1486
+ sp = float(np.mean(g))
1487
+ y1 = max(0, int(group[0] - sp * 0.8))
1488
+ y2 = min(h, int(group[-1] + sp * 0.8))
1489
+ bands.append(StaffBand(y1=y1, y2=y2, spacing_px=sp))
1490
+ i += 5
1491
+ else:
1492
+ i += 1
1493
+ if diag is not None:
1494
+ diag.append(f"bands={len(bands)}")
1495
+ return bands
1496
+
1497
+ @staticmethod
1498
+ def _detect_staff_lines_from_raw(
1499
+ img: np.ndarray,
1500
+ diag: "list[str] | None" = None,
1501
+ ) -> "list[StaffBand]":
1502
+ """CC ํ•„ํ„ฐ ์—†์ด raw BGR ์ด๋ฏธ์ง€์—์„œ 5์„ ์„ ๊ฒ€์ถœ.
1503
+ ์›๋ณธ ์Šค์บ”์— ์‚ฌ์šฉ โ€” CC ํ•„ํ„ฐ๊ฐ€ ๋Š์–ด์ง„ 5์„  ์กฐ๊ฐ์„ ์ œ๊ฑฐํ•˜๋Š” ๋ฌธ์ œ ์šฐํšŒ."""
1504
+ from scipy.signal import find_peaks
1505
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) if img.ndim == 3 else img
1506
+ gray = cv2.GaussianBlur(gray, (3, 3), 0)
1507
+ _, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
1508
+ proc = cv2.bitwise_not(binary) # ๊ฒ€=์ „๊ฒฝ
1509
+ h, w = proc.shape[:2]
1510
+ row_sums = proc.sum(axis=1).astype(np.float32)
1511
+ # ๋†’์€ ์ž„๊ณ„๊ฐ’(60%) โ€” 5์„ ์ฒ˜๋Ÿผ ๊ฑฐ์˜ ๊ฝ‰ ์ฐฌ ํ–‰๋งŒ ๊ฒ€์ถœ, ์Œํ‘œ peak ์ œ๊ฑฐ
1512
+ threshold = w * 255 * 0.60
1513
+ peaks, _ = find_peaks(row_sums, height=threshold, distance=2)
1514
+ if diag is not None:
1515
+ diag.append(
1516
+ f"raw_peaks={len(peaks)} thr={threshold:.0f} img={h}x{w}"
1517
+ f" max_row={row_sums.max():.0f}")
1518
+ if len(peaks) < 5:
1519
+ return []
1520
+ gaps = np.diff(peaks)
1521
+ median_gap = float(np.median(gaps))
1522
+ if diag is not None:
1523
+ diag.append(f"median_gap={median_gap:.1f}")
1524
+ if median_gap < 1.0:
1525
+ return []
1526
+ bands: list[StaffBand] = []
1527
+ i = 0
1528
+ while i <= len(peaks) - 5:
1529
+ group = peaks[i:i + 5]
1530
+ g = np.diff(group)
1531
+ if g.max() < median_gap * 1.5 and g.min() > median_gap * 0.5:
1532
+ sp = float(np.mean(g))
1533
+ y1 = max(0, int(group[0] - sp * 0.8))
1534
+ y2 = min(h, int(group[-1] + sp * 0.8))
1535
+ bands.append(StaffBand(y1=y1, y2=y2, spacing_px=sp))
1536
+ i += 5
1537
+ else:
1538
+ i += 1
1539
+ if diag is not None:
1540
+ diag.append(f"bands={len(bands)}")
1541
+ return bands
1542
+
1543
+ @staticmethod
1544
+ def _normalize_to_staff_scale(
1545
+ proc_r: np.ndarray,
1546
+ proc_o: np.ndarray,
1547
+ bands_r: "list[StaffBand]",
1548
+ bands_o: "list[StaffBand]",
1549
+ ) -> "tuple[np.ndarray, float]":
1550
+ """๋ Œ๋” crop์„ ์›๋ณธ์˜ 5์„  ๊ฐ„๊ฒฉ์— ๋งž๊ฒŒ upscale.
1551
+ ๋ฐ˜ํ™˜: (proc_r_scaled, scale_factor)."""
1552
+ if not bands_r or not bands_o:
1553
+ return proc_r, 1.0
1554
+ sp_r = float(np.median([b.spacing_px for b in bands_r]))
1555
+ sp_o = float(np.median([b.spacing_px for b in bands_o]))
1556
+ if sp_r < 1.0:
1557
+ return proc_r, 1.0
1558
+ scale = min(sp_o / sp_r, 5.0)
1559
+ if scale < 1.0:
1560
+ # ์›๋ณธ spacing์ด ๋ Œ๋”๋ณด๋‹ค ์ž‘์Œ โ€” ๋‹ค์šด์Šค์ผ€์ผ ๋ถˆํ•„์š”, fallback
1561
+ return proc_r, 1.0
1562
+ if abs(scale - 1.0) < 0.1:
1563
+ return proc_r, scale
1564
+ new_h = int(proc_r.shape[0] * scale)
1565
+ new_w = int(proc_r.shape[1] * scale)
1566
+ scaled = cv2.resize(proc_r, (new_w, new_h),
1567
+ interpolation=cv2.INTER_LINEAR)
1568
+ return scaled, scale
1569
+
1570
+ def _measure_score(
1571
+ self, r_crop: np.ndarray, o_crop: np.ndarray,
1572
+ warnings_out: "list[str] | None" = None,
1573
+ ) -> "tuple[float, np.ndarray, np.ndarray]":
1574
+ """๋งˆ๋”” ๋‘ ์žฅ์˜ ์ตœ์ข… ์œ ์‚ฌ๋„ ์ ์ˆ˜ + ์ „์ฒ˜๋ฆฌ ์ด๋ฏธ์ง€ ๋ฐ˜ํ™˜.
1575
+ ๋ฐ˜ํ™˜: (score, proc_r, proc_o)"""
1576
+ proc_r = self._prepare_compare_image(r_crop)
1577
+ proc_o = self._prepare_compare_image(o_crop)
1578
+
1579
+ diag_r: list[str] = []
1580
+ diag_o: list[str] = []
1581
+ bands_r = self._detect_staff_lines(proc_r, diag_r)
1582
+ bands_o = self._detect_staff_lines_from_raw(o_crop, diag_o)
1583
+
1584
+ # ํ•ด์ƒ๋„ ์ •๊ทœํ™”: ๋ Œ๋”๋ฅผ ์›๋ณธ 5์„  ๊ฐ„๊ฒฉ์— ๋งž๊ฒŒ upscale
1585
+ proc_r_norm, scale = self._normalize_to_staff_scale(
1586
+ proc_r, proc_o, bands_r, bands_o)
1587
+ if warnings_out is not None and abs(scale - 1.0) > 0.1:
1588
+ warnings_out.append(
1589
+ f" scale={scale:.2f}x "
1590
+ f"(render {proc_r.shape[1]}px โ†’ {proc_r_norm.shape[1]}px)")
1591
+
1592
+ # per-staff ๋น„๊ต (์–‘์ชฝ ๋ชจ๋‘ ๊ฒ€์ถœ ์„ฑ๊ณต ์‹œ, ๊ณตํ†ต ๊ฐœ์ˆ˜ ๋น„๊ต)
1593
+ if bands_r and bands_o:
1594
+ scaled_bands_r = [
1595
+ StaffBand(int(b.y1 * scale), int(b.y2 * scale),
1596
+ b.spacing_px * scale)
1597
+ for b in bands_r]
1598
+ n = min(len(scaled_bands_r), len(bands_o))
1599
+ scores = []
1600
+ for i in range(n):
1601
+ br = scaled_bands_r[i]
1602
+ bo = bands_o[i]
1603
+ cr = proc_r_norm[br.y1:br.y2]
1604
+ co = proc_o[bo.y1:bo.y2]
1605
+ if cr.size > 0 and co.size > 0:
1606
+ scores.append(self._score_pair(cr, co))
1607
+ if scores:
1608
+ if warnings_out is not None:
1609
+ warnings_out.append(
1610
+ f" per-staff scores({len(scores)}): "
1611
+ f"{[f'{s:.3f}' for s in scores]}")
1612
+ return float(np.mean(scores)), proc_r, proc_o
1613
+
1614
+ # fallback: ๊ธฐ์กด ๋ฐฉ์‹
1615
+ if warnings_out is not None:
1616
+ warnings_out.append(
1617
+ f" staff detection ์‹คํŒจ โ†’ fallback"
1618
+ f" [r:{' '.join(diag_r)} | o:{' '.join(diag_o)}]")
1619
+ return self._score_pair(proc_r_norm, proc_o), proc_r, proc_o
1620
+
1621
+ def _align_flat(
1622
+ self,
1623
+ rendered_flat: "list[dict]",
1624
+ orig_flat: "list[np.ndarray]",
1625
+ ) -> "list[tuple[int, int]]":
1626
+ """DP sequence alignment์œผ๋กœ ์ตœ์  (r_idx, o_idx) ์Œ ๋ฐ˜ํ™˜.
1627
+ 30๋งˆ๋”” ์ดˆ๊ณผ ์‹œ ๋‹จ์ˆœ 1:1 fallback."""
1628
+ n, m = len(rendered_flat), len(orig_flat)
1629
+ if n > 30 or m > 30:
1630
+ return list(zip(range(min(n, m)), range(min(n, m))))
1631
+
1632
+ S = np.zeros((n, m), dtype=np.float32)
1633
+ for i in range(n):
1634
+ for j in range(m):
1635
+ score, _, _ = self._measure_score(
1636
+ rendered_flat[i]["crop"], orig_flat[j])
1637
+ S[i][j] = score
1638
+
1639
+ GAP = -0.1
1640
+ dp = np.full((n + 1, m + 1), -np.inf, dtype=np.float32)
1641
+ dp[0, 0] = 0.0
1642
+ for i in range(n + 1):
1643
+ for j in range(m + 1):
1644
+ if i == 0 and j == 0:
1645
+ continue
1646
+ best = -np.inf
1647
+ if i > 0 and j > 0:
1648
+ best = max(best, dp[i-1, j-1] + S[i-1, j-1])
1649
+ if i > 0:
1650
+ best = max(best, dp[i-1, j] + GAP)
1651
+ if j > 0:
1652
+ best = max(best, dp[i, j-1] + GAP)
1653
+ dp[i, j] = best
1654
+
1655
+ # ์—ญ์ถ”์  (float equality โ†’ abs ๋น„๊ต)
1656
+ pairs: list[tuple[int, int]] = []
1657
+ i, j = n, m
1658
+ while i > 0 and j > 0:
1659
+ if abs(dp[i, j] - (dp[i-1, j-1] + S[i-1, j-1])) < 1e-5:
1660
+ pairs.append((i-1, j-1))
1661
+ i -= 1; j -= 1
1662
+ elif i > 0 and dp[i-1, j] >= dp[i, j-1]:
1663
+ i -= 1
1664
+ else:
1665
+ j -= 1
1666
+ pairs.reverse()
1667
+ return pairs
1668
+
1669
+ def _ssim(self, img_a: np.ndarray, img_b: np.ndarray) -> float:
1670
+ """๋‘ ์ด๋ฏธ์ง€์˜ SSIM ์ ์ˆ˜ ๊ณ„์‚ฐ. ์‹คํŒจ ์‹œ 0.0 ๋ฐ˜ํ™˜. (ํ•˜์œ„ ํ˜ธํ™˜์šฉ)"""
1671
+ try:
1672
+ h = max(self.MIN_CROP_SIZE, min(img_a.shape[0], img_b.shape[0]))
1673
+ w = max(self.MIN_CROP_SIZE, min(img_a.shape[1], img_b.shape[1]))
1674
+ a = cv2.resize(img_a, (w, h))
1675
+ b = cv2.resize(img_b, (w, h))
1676
+ a_g = cv2.cvtColor(a, cv2.COLOR_BGR2GRAY)
1677
+ b_g = cv2.cvtColor(b, cv2.COLOR_BGR2GRAY)
1678
+ score, _ = _ssim(a_g, b_g, full=True)
1679
+ return float(score)
1680
+ except Exception:
1681
+ return 0.0
requirements-server.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Server dependencies for Hugging Face Spaces deployment
2
+ gradio>=4.0.0
3
+ pymupdf>=1.23.0
4
+ opencv-python-headless>=4.9.0