Spaces:

AkJeond
/

smarteye-backend

Sleeping

App Files Files Community

KwanHak commited on Nov 3, 2025

Commit

82c1146

1 Parent(s): be63ac6

sync: Smart_Demo 브랜치의 Backend 코드 병합 & 이미지 로드를 위한 MultiFileLoader 컴포넌트 구현

Browse files

- 동료가 개발한 최신 Backend 반영
- 기존 Backend는 Backend_backup_before_sync에 백업
- 이미지 로드를 위한 MultiFileLoader 컴포넌트 구현

Files changed (37) hide show

.env.docker +11 -0
app/crud.py +4 -0
app/main.py +20 -19
app/routers/__init__.py +13 -10
app/routers/analysis.py +264 -0
app/routers/downloads.py +95 -0
app/routers/pages.py +243 -0
app/routers/projects.py +113 -0
app/schemas.py +59 -0
app/services/__init__.py +72 -0
app/services/analysis_service.py +1057 -0
app/services/batch_analysis.py +450 -0
app/services/download_service.py +257 -0
app/services/formatter.py +403 -0
app/services/formatter_rules.py +200 -0
app/services/formatter_utils.py +306 -0
app/services/mock_models.py +417 -0
app/services/pdf_processor.py +250 -0
app/services/sorter.py +1605 -0
app/services/sorter_strategies.py +788 -0
app/services/sorter_구버전.py +1316 -0
app/services/text_version_service.py +179 -0
docker-compose.yml +57 -0
docs/Backend API 문서/00_개요.md +413 -0
docs/Backend API 문서/01_프로젝트_API.md +608 -0
docs/Backend API 문서/02_페이지_API.md +772 -0
docs/Backend API 문서/03_분석_API.md +682 -0
docs/Backend API 문서/04_다운로드_API.md +477 -0
docs/Backend API 문서/05_데이터_모델.md +678 -0
docs/Backend API 문서/06_에러_처리.md +556 -0
docs/Backend API 문서/07_예제_코드.md +0 -0
docs/백엔드 환경 설정/백엔드 DB 준비방법.txt +61 -0
docs/백엔드 환경 설정/백엔드 테스트 준비방법.txt +202 -0
requirements.txt +29 -3
scripts/reset_db.sh +54 -0
scripts/seed_test_data.sql +2 -3
scripts/test_pdf_upload.py +0 -0

.env.docker ADDED Viewed

	@@ -0,0 +1,11 @@

+# Docker MySQL 환경 변수
+# docker-compose.yml에서 사용됨
+# MySQL 설정
+MYSQL_ROOT_PASSWORD=1q2w3e4r
+MYSQL_DATABASE=smarteyessen_db
+MYSQL_PORT=3308
+# 선택적: 추가 사용자 (보안 강화)
+# MYSQL_USER=smarteye_user
+# MYSQL_PASSWORD=smarteye_secure_password_here

app/crud.py CHANGED Viewed

@@ -664,6 +664,10 @@ def get_formatting_rule_by_class(db: Session, doc_type_id: int, class_name: str)
 def get_formatting_rules_by_doc_type(db: Session, doc_type_id: int) -> List[models.FormattingRule]:
     return db.query(models.FormattingRule).filter(models.FormattingRule.doc_type_id == doc_type_id).all()
 def create_formatting_rule(db: Session, rule: schemas.FormattingRuleCreate) -> models.FormattingRule:
     db_rule = models.FormattingRule(**rule.model_dump())
     db.add(db_rule)

 def get_formatting_rules_by_doc_type(db: Session, doc_type_id: int) -> List[models.FormattingRule]:
     return db.query(models.FormattingRule).filter(models.FormattingRule.doc_type_id == doc_type_id).all()
+def get_all_formatting_rules(db: Session) -> List[models.FormattingRule]:
+    """모든 포맷팅 규칙 조회"""
+    return db.query(models.FormattingRule).all()
 def create_formatting_rule(db: Session, rule: schemas.FormattingRuleCreate) -> models.FormattingRule:
     db_rule = models.FormattingRule(**rule.model_dump())
     db.add(db_rule)

app/main.py CHANGED Viewed

@@ -11,16 +11,18 @@ FastAPI 메인 애플리케이션 및 라우터 설정
 - API 문서화
 """
-from fastapi import FastAPI, Depends, HTTPException, status
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
-from sqlalchemy.orm import Session
 from sqlalchemy import text
-import os
-from dotenv import load_dotenv
 from .database import engine, get_db, init_db, test_connection
 from . import models
 # 환경 변수 로드
 load_dotenv()
@@ -38,20 +40,20 @@ app = FastAPI(
     ### 주요 기능
     * 📄 **다중 페이지 문서 처리**: Worksheet 및 Document 유형 지원
     * 🤖 **AI 레이아웃 분석**: DocLayout-YOLO 기반 레이아웃 감지
-    * 🔍 **OCR 텍스트 추출**: PaddleOCR 기반 텍스트 인식
     * ✏️ **텍스트 편집 및 버전 관리**: TinyMCE 편집기 지원
-    * 🖼️ **AI 설명 생성**: GPT-4o-mini 기반 figure/table 설명
     * 📊 **문제 기반 정렬**: Worksheet 전용 문제 번호 기반 정렬
     * 📐 **좌표 기반 정렬**: Document 전용 좌표 기반 정렬
-    * 📥 **통합 문서 다운로드**: DOCX/PDF/TXT 형식 지원
     ### 기술 스택
     * **Backend**: FastAPI + SQLAlchemy
     * **Database**: MySQL 8.0
-    * **AI Models**: DocLayout-YOLO, PaddleOCR, GPT-4o-mini
     * **Document**: python-docx
     """,
-    version="1.0.0",
     docs_url="/docs",
     redoc_url="/redoc",
     openapi_url="/openapi.json",
@@ -125,7 +127,7 @@ async def root():
     """
     return {
         "message": "Welcome to SmartEyeSsen API",
-        "version": "1.0.0",
         "status": "running",
         "docs": "/docs",
         "redoc": "/redoc"
@@ -140,7 +142,7 @@ async def health_check(db: Session = Depends(get_db)):
     서버 및 데이터베이스 상태 확인
     """
     try:
-        # 간단한 쿼리로 DB 연결 확인 (SQLAlchemy 2.0 호환)
         db.execute(text("SELECT 1"))
         db_status = "connected"
     except Exception as e:
@@ -181,14 +183,13 @@ async def general_exception_handler(request, exc):
     )
-# ============================================================================
-# 라우터 등록 (Phase 2에서 추가 예정)
-# ============================================================================
-# from .routers import users, projects, pages, layout_elements
-# app.include_router(users.router, prefix="/api/v1/users", tags=["Users"])
-# app.include_router(projects.router, prefix="/api/v1/projects", tags=["Projects"])
-# app.include_router(pages.router, prefix="/api/v1/pages", tags=["Pages"])
-# app.include_router(layout_elements.router, prefix="/api/v1/elements", tags=["Layout Elements"])
 # ============================================================================

 - API 문서화
 """
+import os
+from dotenv import load_dotenv
+from fastapi import Depends, FastAPI, HTTPException, status
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.responses import JSONResponse
 from sqlalchemy import text
+from sqlalchemy.orm import Session
 from .database import engine, get_db, init_db, test_connection
 from . import models
+from .routers import analysis, downloads, pages, projects
 # 환경 변수 로드
 load_dotenv()
     ### 주요 기능
     * 📄 **다중 페이지 문서 처리**: Worksheet 및 Document 유형 지원
     * 🤖 **AI 레이아웃 분석**: DocLayout-YOLO 기반 레이아웃 감지
+    * 🔍 **OCR 텍스트 추출**: Tesseract OCR 기반 텍스트 인식
     * ✏️ **텍스트 편집 및 버전 관리**: TinyMCE 편집기 지원
+    * 🖼️ **AI 설명 생성**: GPT-4-turbo 기반 figure/table/flowchart 설명
     * 📊 **문제 기반 정렬**: Worksheet 전용 문제 번호 기반 정렬
     * 📐 **좌표 기반 정렬**: Document 전용 좌표 기반 정렬
+    * 📥 **통합 문서 다운로드**: DOCX 형식 지원
     ### 기술 스택
     * **Backend**: FastAPI + SQLAlchemy
     * **Database**: MySQL 8.0
+    * **AI Models**: DocLayout-YOLO, Tesseract OCR, GPT-4-turbo
     * **Document**: python-docx
     """,
+    version="1.0.1",
     docs_url="/docs",
     redoc_url="/redoc",
     openapi_url="/openapi.json",
     """
     return {
         "message": "Welcome to SmartEyeSsen API",
+        "version": "1.0.1",
         "status": "running",
         "docs": "/docs",
         "redoc": "/redoc"
     서버 및 데이터베이스 상태 확인
     """
     try:
+        # 간단한 쿼리로 DB 연결 확인
         db.execute(text("SELECT 1"))
         db_status = "connected"
     except Exception as e:
     )
+# =========================================================================
+# 라우터 등록
+# =========================================================================
+app.include_router(projects.router)
+app.include_router(pages.router)
+app.include_router(analysis.router)
+app.include_router(downloads.router)
 # ============================================================================

app/routers/__init__.py CHANGED Viewed

@@ -3,15 +3,18 @@ SmartEyeSsen Backend - Routers Package
 =======================================
 API 라우터 패키지 초기화
-Phase 2에서 개별 라우터 모듈이 추가될 예정:
-- users.py: 사용자 관리 API
-- projects.py: 프로젝트 관리 API
-- pages.py: 페이지 관리 API
-- layout_elements.py: 레이아웃 요소 API
-- text_contents.py: 텍스트 내용 API
-- ai_descriptions.py: AI 설명 API
-- formatting_rules.py: 서식 규칙 API
-- combined_results.py: 통합 결과 API
 """
-__version__ = "1.0.0"

 =======================================
 API 라우터 패키지 초기화
+제공 라우터:
+- projects.py: 프로젝트 CRUD
+- pages.py: 페이지 업로드 및 조회
+- analysis.py: 배치 분석 트리거
+- downloads.py: 통합 텍스트/문서 다운로드 API
 """
+from . import analysis, downloads, pages, projects
+__all__ = [
+    "analysis",
+    "downloads",
+    "pages",
+    "projects",
+]

app/routers/analysis.py ADDED Viewed

	@@ -0,0 +1,264 @@

+from __future__ import annotations
+import uuid
+from typing import Any, Dict, Optional
+from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, status
+from loguru import logger
+from pydantic import BaseModel
+from sqlalchemy.orm import Session
+from ..database import get_db, SessionLocal
+from ..models import Page, Project
+from ..services.batch_analysis import (
+    analyze_project_batch_async,
+    _get_analysis_service,
+    _process_single_page_async,
+)
+from ..services.formatter import TextFormatter
+router = APIRouter(
+    prefix="/api",
+    tags=["Analysis"],
+)
+# ============================================================================
+# 비동기 작업 상태 저장소
+# ============================================================================
+# 주의: 프로덕션 환경에서는 Redis나 Celery를 사용하는 것을 권장합니다.
+# 현재 구현은 개발/테스트 환경용 인메모리 저장소입니다.
+async_jobs: Dict[str, Dict[str, Any]] = {}
+class ProjectAnalysisRequest(BaseModel):
+    use_ai_descriptions: bool = True
+    api_key: Optional[str] = None
+class PageAnalysisRequest(BaseModel):
+    """단일 페이지 비동기 분석 요청"""
+    use_ai_descriptions: bool = True
+    api_key: Optional[str] = None
+@router.post(
+    "/projects/{project_id}/analyze",
+    status_code=status.HTTP_202_ACCEPTED,
+)
+async def analyze_project(
+    project_id: int,
+    payload: ProjectAnalysisRequest,
+    db: Session = Depends(get_db),
+):
+    """
+    프로젝트 전체 배치 분석 (비동기)
+    - 프로젝트 내 모든 pending 상태 페이지를 순차적으로 분석
+    - 레이아웃 분석 → OCR → 정렬 → 포맷팅까지 전체 파이프라인 수행
+    - AI 설명 생성 시 비동기 OpenAI 호출을 활용
+    """
+    project_exists = db.query(Project.project_id).filter(Project.project_id == project_id).scalar()
+    if not project_exists:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="프로젝트를 찾을 수 없습니다.")
+    analysis_result = await analyze_project_batch_async(
+        db=db,
+        project_id=project_id,
+        use_ai_descriptions=payload.use_ai_descriptions,
+        api_key=payload.api_key,
+    )
+    return analysis_result
+# ============================================================================
+# 비동기 분석 엔드포인트
+# ============================================================================
+@router.post(
+    "/pages/{page_id}/analyze/async",
+    status_code=status.HTTP_202_ACCEPTED,
+)
+def analyze_page_async(
+    page_id: int,
+    payload: PageAnalysisRequest,
+    background_tasks: BackgroundTasks,
+    db: Session = Depends(get_db),
+):
+    """
+    단일 페이지 비동기 분석 시작
+    - Phase 3.2 배치 분석과 병행 가능한 단일 페이지 비동기 분석
+    - 작업 ID를 즉시 반환하고 백그라운드에서 분석 수행
+    - 작업 상태는 GET /api/analysis/jobs/{job_id}로 조회 가능
+    Args:
+        page_id: 분석할 페이지 ID
+        payload: 분석 옵션 (AI 설명 사용 여부, API 키)
+        background_tasks: FastAPI 백그라운드 작업 매니저
+        db: 데이터베이스 세션
+    Returns:
+        작업 ID와 상태 조회 URL
+    """
+    # 페이지 존재 확인
+    page = db.query(Page).filter(Page.page_id == page_id).first()
+    if not page:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"페이지 ID {page_id}를 찾을 수 없습니다."
+        )
+    # 작업 ID 생성
+    job_id = str(uuid.uuid4())
+    async_jobs[job_id] = {
+        "job_id": job_id,
+        "status": "pending",
+        "page_id": page_id,
+        "page_number": page.page_number,
+        "project_id": page.project_id,
+        "result": None,
+        "error": None,
+        "progress": "작업 대기 중...",
+    }
+    logger.info(f"비동기 페이지 분석 작업 생성: job_id={job_id}, page_id={page_id}")
+    # 백그라운드 작업 등록
+    background_tasks.add_task(
+        _run_async_page_analysis,
+        job_id=job_id,
+        page_id=page_id,
+        use_ai_descriptions=payload.use_ai_descriptions,
+        api_key=payload.api_key,
+    )
+    return {
+        "job_id": job_id,
+        "status": "pending",
+        "message": "페이지 분석 작업이 시작되었습니다.",
+        "page_id": page_id,
+        "status_check_url": f"/api/analysis/jobs/{job_id}",
+    }
+@router.get("/analysis/jobs/{job_id}")
+def get_analysis_job_status(job_id: str):
+    """
+    비동기 분석 작업 상태 조회
+    Args:
+        job_id: 작업 ID (analyze_page_async 엔드포인트에서 반환된 값)
+    Returns:
+        작업 상태 정보 (pending, processing, completed, failed)
+    """
+    if job_id not in async_jobs:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=f"작업 ID {job_id}를 찾을 수 없습니다."
+        )
+    return async_jobs[job_id]
+# ============================================================================
+# 백그라운드 작업 실행 함수
+# ============================================================================
+async def _run_async_page_analysis(
+    job_id: str,
+    page_id: int,
+    use_ai_descriptions: bool,
+    api_key: Optional[str],
+) -> None:
+    """
+    백그라운드에서 실행되는 단일 페이지 비동기 분석 작업
+    - 새로운 DB 세션을 생성하여 사용 (백그라운드 컨텍스트)
+    - 기존 batch_analysis.py의 _process_single_page_async 로직 재사용
+    - 작업 상태를 async_jobs 딕셔너리에 기록
+    Args:
+        job_id: 작업 ID
+        page_id: 분석할 페이지 ID
+        use_ai_descriptions: AI 설명 생성 여부
+        api_key: OpenAI API 키 (선택)
+    """
+    db = SessionLocal()
+    try:
+        async_jobs[job_id]["status"] = "processing"
+        async_jobs[job_id]["progress"] = "페이지 분석 중..."
+        logger.info(f"비동기 페이지 분석 시작: job_id={job_id}, page_id={page_id}")
+        # 페이지 및 프로젝트 정보 조회
+        page = db.query(Page).filter(Page.page_id == page_id).first()
+        if not page:
+            raise ValueError(f"페이지 ID {page_id}를 찾을 수 없습니다.")
+        project = db.query(Project).filter(Project.project_id == page.project_id).first()
+        if not project:
+            raise ValueError(f"프로젝트 ID {page.project_id}를 찾을 수 없습니다.")
+        # AnalysisService 및 TextFormatter 초기화
+        analysis_service = _get_analysis_service()
+        formatter = TextFormatter(
+            doc_type_id=project.doc_type_id,
+            db=db,
+            use_db_rules=True,
+        )
+        # 기존 batch_analysis의 _process_single_page_async 재사용
+        async_jobs[job_id]["progress"] = "레이아웃 분석 및 OCR 수행 중..."
+        page_result = await _process_single_page_async(
+            db=db,
+            project=project,
+            page=page,
+            formatter=formatter,
+            analysis_service=analysis_service,
+            use_ai_descriptions=use_ai_descriptions,
+            api_key=api_key,
+        )
+        # 결과 저장
+        if page_result["status"] == "completed":
+            async_jobs[job_id]["status"] = "completed"
+            async_jobs[job_id]["progress"] = "분석 완료"
+            async_jobs[job_id]["result"] = {
+                "page_id": page_id,
+                "page_number": page_result["page_number"],
+                "layout_count": page_result["layout_count"],
+                "ocr_count": page_result["ocr_count"],
+                "ai_description_count": page_result.get("ai_description_count", 0),
+                "processing_time": page_result["processing_time"],
+                "message": "페이지 분석이 성공적으로 완료되었습니다.",
+            }
+            logger.info(
+                f"비동기 페이지 분석 완료: job_id={job_id}, page_id={page_id}, "
+                f"time={page_result['processing_time']:.2f}s"
+            )
+        else:
+            raise Exception(page_result.get("message", "알 수 없는 오류"))
+    except Exception as error:
+        logger.error(
+            f"비동기 페이지 분석 실패: job_id={job_id}, page_id={page_id}, error={error}",
+            exc_info=True
+        )
+        async_jobs[job_id]["status"] = "failed"
+        async_jobs[job_id]["progress"] = "분석 실패"
+        async_jobs[job_id]["error"] = str(error)
+        # 페이지 상태를 error로 업데이트
+        try:
+            page = db.query(Page).filter(Page.page_id == page_id).first()
+            if page:
+                page.analysis_status = "error"
+                db.commit()
+        except Exception as db_error:
+            logger.error(f"페이지 상태 업데이트 실패: {db_error}")
+            db.rollback()
+    finally:
+        db.close()

app/routers/downloads.py ADDED Viewed

	@@ -0,0 +1,95 @@

+from __future__ import annotations
+from fastapi import APIRouter, Depends, HTTPException, status
+from fastapi.responses import StreamingResponse
+from loguru import logger
+from sqlalchemy.orm import Session
+from .. import schemas
+from ..database import get_db
+from ..models import Project
+from ..services.download_service import (
+    generate_combined_text,
+    generate_word_document,
+)
+router = APIRouter(
+    prefix="/api/projects",
+    tags=["Downloads"],
+)
+@router.get(
+    "/{project_id}/combined-text",
+    response_model=schemas.CombinedTextResponse,
+    status_code=status.HTTP_200_OK,
+    summary="프로젝트 통합 텍스트 조회",
+)
+def get_combined_text(
+    project_id: int,
+    db: Session = Depends(get_db),
+) -> schemas.CombinedTextResponse:
+    """
+    프로젝트의 최신 텍스트 버전을 통합하여 반환합니다.
+    CombinedResult 캐시가 최신이면 캐시를 사용합니다.
+    """
+    project_exists = (
+        db.query(Project.project_id)
+        .filter(Project.project_id == project_id)
+        .scalar()
+    )
+    if not project_exists:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="프로젝트를 찾을 수 없습니다.")
+    try:
+        combined_data = generate_combined_text(db, project_id, use_cache=True)
+        return schemas.CombinedTextResponse.model_validate(combined_data)
+    except ValueError as value_error:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(value_error)) from value_error
+    except Exception as error:  # pylint: disable=broad-except
+        logger.error("통합 텍스트 생성 실패: project_id=%s / error=%s", project_id, error, exc_info=True)
+        raise HTTPException(status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, detail="통합 텍스트 생성 중 오류가 발생했습니다.") from error
+@router.post(
+    "/{project_id}/download",
+    response_class=StreamingResponse,
+    status_code=status.HTTP_200_OK,
+    summary="프로젝트 Word 문서 다운로드",
+)
+def download_document(
+    project_id: int,
+    db: Session = Depends(get_db),
+) -> StreamingResponse:
+    """
+    프로젝트의 통합 텍스트를 Word(.docx) 문서로 생성하여 스트리밍 응답으로 반환합니다.
+    """
+    project_exists = (
+        db.query(Project.project_id)
+        .filter(Project.project_id == project_id)
+        .scalar()
+    )
+    if not project_exists:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="프로젝트를 찾을 수 없습니다.")
+    try:
+        filename, file_stream = generate_word_document(db, project_id, use_cache=True)
+        headers = {"Content-Disposition": f'attachment; filename="{filename}"'}
+        return StreamingResponse(
+            file_stream,
+            media_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document",
+            headers=headers,
+        )
+    except ImportError as import_error:
+        raise HTTPException(
+            status_code=status.HTTP_501_NOT_IMPLEMENTED,
+            detail=str(import_error),
+        ) from import_error
+    except ValueError as value_error:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=str(value_error)) from value_error
+    except Exception as error:  # pylint: disable=broad-except
+        logger.error("Word 문서 생성 실패: project_id=%s / error=%s", project_id, error, exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="Word 문서 생성 중 오류가 발생했습니다.",
+        ) from error

app/routers/pages.py ADDED Viewed

	@@ -0,0 +1,243 @@

+from __future__ import annotations
+import asyncio
+import os
+from pathlib import Path
+from typing import List, Optional, Union
+from uuid import uuid4
+import cv2
+from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile, status
+from loguru import logger
+from sqlalchemy.orm import Session
+from .. import crud, schemas
+from ..database import get_db
+from ..models import AnalysisStatusEnum, Page
+from ..services.pdf_processor import pdf_processor
+from ..services.text_version_service import (
+    get_current_page_text,
+    save_user_edited_version,
+)
+router = APIRouter(
+    prefix="/api/pages",
+    tags=["Pages"],
+)
+UPLOAD_DIR = Path(os.getenv("UPLOAD_DIR", "uploads")).resolve()
+UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
+def _page_to_response(page: Page) -> schemas.PageResponse:
+    return schemas.PageResponse.model_validate(page)
+@router.post(
+    "/upload",
+    response_model=Union[schemas.PageResponse, schemas.MultiPageCreateResponse],
+    status_code=status.HTTP_201_CREATED,
+)
+async def upload_page(
+    project_id: int = Form(..., description="프로젝트 ID"),
+    page_number: Optional[int] = Form(None, ge=1, description="페이지 번호 (이미지 업로드 시 필수, PDF는 자동)"),
+    file: UploadFile = File(..., description="페이지 이미지 또는 PDF 파일"),
+    db: Session = Depends(get_db),
+) -> Union[schemas.PageResponse, schemas.MultiPageCreateResponse]:
+    """
+    페이지 업로드 (이미지 또는 PDF)
+    - 이미지 업로드: page_number 필수, 단일 페이지 생성
+    - PDF 업로드: page_number 선택적, 다중 페이지 자동 생성
+    """
+    # PDF 업로드 분기 처리
+    if file.content_type == "application/pdf":
+        logger.info(f"PDF 업로드 시작 - ProjectID: {project_id}")
+        # PDF 바이트 읽기
+        pdf_bytes = await file.read()
+        # 시작 페이지 번호 계산
+        existing_pages = crud.get_pages_by_project(db, project_id)
+        start_page_number = len(existing_pages) + 1
+        logger.info(f"기존 페이지 수: {len(existing_pages)}, 시작 페이지: {start_page_number}")
+        try:
+            # PDF → 이미지 변환
+            converted_pages = pdf_processor.convert_pdf_to_images(
+                pdf_bytes=pdf_bytes,
+                project_id=project_id,
+                start_page_number=start_page_number
+            )
+            logger.info(f"PDF 변환 완료 - {len(converted_pages)}개 페이지")
+            # 비동기 병렬 페이지 생성 (Semaphore로 동시성 제한)
+            semaphore = asyncio.Semaphore(5)
+            created_pages = []
+            async def save_page(page_info: dict):
+                async with semaphore:
+                    page_create = schemas.PageCreate(
+                        project_id=project_id,
+                        page_number=page_info['page_number'],
+                        image_path=page_info['image_path'],
+                        image_width=page_info['width'],
+                        image_height=page_info['height']
+                    )
+                    page = crud.create_page(db, page_create)
+                    created_pages.append(page)
+                    logger.debug(f"페이지 {page_info['page_number']} 저장 완료")
+            # 병렬 저장 실행
+            await asyncio.gather(*(save_page(info) for info in converted_pages))
+            logger.info(f"PDF 업로드 완료 - ProjectID: {project_id}, {len(created_pages)}개 페이지 생성")
+            return schemas.MultiPageCreateResponse(
+                project_id=project_id,
+                total_created=len(created_pages),
+                source_type="pdf",
+                pages=[_page_to_response(p) for p in created_pages],
+            )
+        except ValueError as ve:
+            logger.error(f"PDF 처리 실패: {str(ve)}")
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail=f"PDF 처리 실패: {str(ve)}"
+            ) from ve
+        except Exception as exc:
+            logger.error(f"PDF 업로드 중 오류: {str(exc)}")
+            raise HTTPException(
+                status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+                detail=f"PDF 업로드 실패: {str(exc)}"
+            ) from exc
+    # 기존 단일 이미지 업로드 로직
+    else:
+        if page_number is None:
+            raise HTTPException(
+                status_code=status.HTTP_400_BAD_REQUEST,
+                detail="이미지 업로드 시 page_number는 필수입니다."
+            )
+        logger.info(f"이미지 업로드 - ProjectID: {project_id}, PageNumber: {page_number}")
+        suffix = Path(file.filename).suffix or ".png"
+        filename = f"project_{project_id}_page_{page_number}_{uuid4().hex}{suffix}"
+        file_path = UPLOAD_DIR / filename
+        content = await file.read()
+        file_path.write_bytes(content)
+        try:
+            image = cv2.imread(str(file_path))
+            if image is None:
+                raise ValueError("이미지를 읽을 수 없습니다.")
+            height, width = image.shape[:2]
+        except Exception as exc:  # pylint: disable=broad-except
+            file_path.unlink(missing_ok=True)
+            raise HTTPException(status_code=status.HTTP_400_BAD_REQUEST, detail=f"이미지 처리 실패: {exc}") from exc
+        try:
+            relative_path = file_path.relative_to(Path.cwd())
+            stored_path = str(relative_path)
+        except ValueError:
+            stored_path = str(file_path)
+        page_create = schemas.PageCreate(
+            project_id=project_id,
+            page_number=page_number,
+            image_path=stored_path,
+            image_width=width,
+            image_height=height,
+        )
+        page = crud.create_page(db, page_create)
+        logger.info(f"이미지 업로드 완료 - PageID: {page.page_id}")
+        return _page_to_response(page)
+@router.get(
+    "/{page_id}",
+    response_model=schemas.PageResponse,
+)
+def get_page_detail(
+    page_id: int,
+    db: Session = Depends(get_db),
+) -> schemas.PageResponse:
+    page = crud.get_page(db, page_id)
+    if not page:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="페이지를 찾을 수 없습니다.")
+    return _page_to_response(page)
+@router.get(
+    "/project/{project_id}",
+    response_model=List[schemas.PageResponse],
+)
+def list_project_pages(
+    project_id: int,
+    db: Session = Depends(get_db),
+    include_error: bool = False,
+) -> List[schemas.PageResponse]:
+    pages = crud.get_pages_by_project(db, project_id)
+    if not include_error:
+        pages = [page for page in pages if page.analysis_status != AnalysisStatusEnum.ERROR]
+    return [_page_to_response(page) for page in pages]
+@router.get(
+    "/{page_id}/text",
+    response_model=schemas.PageTextResponse,
+    summary="현재 페이지 텍스트 조회",
+)
+def get_page_text(
+    page_id: int,
+    db: Session = Depends(get_db),
+) -> schemas.PageTextResponse:
+    """
+    is_current=True인 최신 텍스트 버전을 반환합니다.
+    """
+    version_data = get_current_page_text(db, page_id)
+    if not version_data:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail="해당 페이지의 텍스트 버전을 찾을 수 없습니다.",
+        )
+    return schemas.PageTextResponse.model_validate(version_data)
+@router.post(
+    "/{page_id}/text",
+    response_model=schemas.PageTextResponse,
+    summary="사용자 수정 텍스트 저장",
+)
+def save_page_text(
+    page_id: int,
+    payload: schemas.PageTextUpdate,
+    db: Session = Depends(get_db),
+) -> schemas.PageTextResponse:
+    """
+    사용자 편집 내용을 새 텍스트 버전으로 저장합니다.
+    """
+    try:
+        version_data = save_user_edited_version(
+            db,
+            page_id,
+            payload.content,
+            user_id=payload.user_id,
+        )
+        return schemas.PageTextResponse.model_validate(version_data)
+    except ValueError as value_error:
+        raise HTTPException(
+            status_code=status.HTTP_404_NOT_FOUND,
+            detail=str(value_error),
+        ) from value_error
+    except Exception as error:  # pylint: disable=broad-except
+        logger.error("페이지 텍스트 저장 실패: page_id=%s / error=%s", page_id, error, exc_info=True)
+        raise HTTPException(
+            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
+            detail="페이지 텍스트 저장 중 오류가 발생했습니다.",
+        ) from error

app/routers/projects.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from __future__ import annotations
+from typing import List, Optional
+from fastapi import APIRouter, Depends, HTTPException, Query, Response, status
+from sqlalchemy.orm import Session
+from .. import crud, schemas
+from ..database import get_db
+from ..models import Page, Project
+router = APIRouter(
+    prefix="/api/projects",
+    tags=["Projects"],
+)
+class ProjectCreateRequest(schemas.ProjectCreate):
+    """프로젝트 생성 요청 스키마 (user_id 포함)"""
+    user_id: int
+def _project_to_response(project: Project) -> schemas.ProjectResponse:
+    return schemas.ProjectResponse.model_validate(project)
+def _page_to_response(page: Page) -> schemas.PageResponse:
+    return schemas.PageResponse.model_validate(page)
+@router.post(
+    "",
+    response_model=schemas.ProjectResponse,
+    status_code=status.HTTP_201_CREATED,
+)
+def create_project_endpoint(
+    payload: ProjectCreateRequest,
+    db: Session = Depends(get_db),
+) -> schemas.ProjectResponse:
+    project = crud.create_project(
+        db=db,
+        project=schemas.ProjectCreate(
+            project_name=payload.project_name,
+            doc_type_id=payload.doc_type_id,
+            analysis_mode=payload.analysis_mode,
+        ),
+        user_id=payload.user_id,
+    )
+    return _project_to_response(project)
+@router.get("", response_model=List[schemas.ProjectResponse])
+def list_projects(
+    db: Session = Depends(get_db),
+    user_id: Optional[int] = Query(default=None, description="특정 사용자 ID로 필터링"),
+    skip: int = Query(default=0, ge=0),
+    limit: int = Query(default=100, ge=1, le=1000),
+) -> List[schemas.ProjectResponse]:
+    query = db.query(Project).order_by(Project.created_at.desc())
+    if user_id is not None:
+        query = query.filter(Project.user_id == user_id)
+    projects = query.offset(skip).limit(limit).all()
+    return [_project_to_response(project) for project in projects]
+@router.get(
+    "/{project_id}",
+    response_model=schemas.ProjectWithPagesResponse,
+)
+def get_project_detail(
+    project_id: int,
+    db: Session = Depends(get_db),
+) -> schemas.ProjectWithPagesResponse:
+    project = crud.get_project_with_pages(db, project_id)
+    if not project:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="프로젝트를 찾을 수 없습니다.")
+    project_response = _project_to_response(project)
+    page_responses = [_page_to_response(page) for page in project.pages]
+    return schemas.ProjectWithPagesResponse(
+        **project_response.model_dump(),
+        pages=page_responses,
+    )
+@router.patch(
+    "/{project_id}",
+    response_model=schemas.ProjectResponse,
+)
+def update_project_endpoint(
+    project_id: int,
+    payload: schemas.ProjectUpdate,
+    db: Session = Depends(get_db),
+) -> schemas.ProjectResponse:
+    project = crud.update_project(db, project_id, payload)
+    if not project:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="프로젝트를 찾을 수 없습니다.")
+    return _project_to_response(project)
+@router.delete(
+    "/{project_id}",
+    status_code=status.HTTP_204_NO_CONTENT,
+    response_class=Response,
+)
+def delete_project_endpoint(
+    project_id: int,
+    db: Session = Depends(get_db),
+) -> Response:
+    success = crud.delete_project(db, project_id)
+    if not success:
+        raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail="프로젝트를 찾을 수 없습니다.")
+    return Response(status_code=status.HTTP_204_NO_CONTENT)

app/schemas.py CHANGED Viewed

@@ -209,6 +209,37 @@ class PageWithElementsResponse(PageResponse):
     layout_elements: List["LayoutElementResponse"] = []
 # ============================================================================
 # 5. LayoutElement Schemas
 # ============================================================================
@@ -475,6 +506,34 @@ class CombinedResultResponse(CombinedResultBase):
     model_config = ConfigDict(from_attributes=True)
 # ============================================================================
 # 복합 응답 스키마 (관계 포함)
 # ============================================================================

     layout_elements: List["LayoutElementResponse"] = []
+# ============================================================================
+# 페이지 추가 응답/요청 스키마
+# ============================================================================
+class MultiPageCreateResponse(BaseModel):
+    """다중 페이지 생성 응답 (PDF 업로드 시)"""
+    project_id: int = Field(..., description="프로젝트 ID")
+    total_created: int = Field(..., ge=0, description="생성된 페이지 수")
+    source_type: str = Field(..., description="소스 타입 (pdf 또는 image)")
+    pages: List[PageResponse] = Field(default=[], description="생성된 페이지 목록")
+    model_config = ConfigDict(from_attributes=True)
+class PageTextResponse(BaseModel):
+    """페이지 텍스트 조회 응답"""
+    page_id: int
+    version_id: int
+    version_type: str
+    is_current: bool
+    content: str
+    created_at: datetime
+    model_config = ConfigDict(from_attributes=True)
+class PageTextUpdate(BaseModel):
+    """페이지 텍스트 업데이트 요청"""
+    content: str = Field(..., description="저장할 전체 텍스트 내용")
+    user_id: Optional[int] = Field(None, description="수정한 사용자 ID")
 # ============================================================================
 # 5. LayoutElement Schemas
 # ============================================================================
     model_config = ConfigDict(from_attributes=True)
+class CombinedTextStats(BaseModel):
+    """통합 텍스트 통계"""
+    total_pages: int
+    total_words: int
+    total_characters: int
+class CombinedTextResponse(BaseModel):
+    """통합 텍스트 응답"""
+    project_id: int
+    project_name: Optional[str] = None
+    combined_text: str
+    stats: CombinedTextStats
+    generated_at: datetime
+class DownloadResponse(BaseModel):
+    """문서 다운로드 메타데이터 응답"""
+    message: str = Field(
+        "Word 문서가 성공적으로 생성되었습니다.", description="응답 메시지"
+    )
+    project_id: int
+    filename: str
+    download_url: Optional[str] = Field(
+        None, description="다운로드 가능한 URL (선택 사항)"
+    )
 # ============================================================================
 # 복합 응답 스키마 (관계 포함)
 # ============================================================================

app/services/__init__.py ADDED Viewed

	@@ -0,0 +1,72 @@

+"""
+SmartEyeSsen Backend - Services Module
+=======================================
+비즈니스 로직 서비스 모듈
+주요 서비스:
+- formatter_rules: 포맷팅 규칙 정의 (코드 기반)
+- formatter: 텍스트 포맷팅 처리
+- sorter: 레이아웃 정렬 알고리즘
+- analysis_service: 페이지 분석 파이프라인
+- batch_analysis: 다중 페이지 일괄 분석
+- download_service: 문서 생성 및 다운로드
+"""
+from .formatter_rules import (
+    RuleConfig,
+    QUESTION_BASED_RULES,
+    READING_ORDER_RULES,
+    get_rules_for_document_type,
+    fetch_db_rules,
+    override_rules_with_db,
+    get_rule_for_class
+)
+from .formatter import (
+    TextFormatter
+)
+from .sorter import (
+    sort_layout_elements,
+    save_sorting_results_to_db
+)
+from .analysis_service import analyze_page
+from .batch_analysis import analyze_project_batch, analyze_project_batch_async
+from .text_version_service import (
+    create_text_version,
+    get_current_page_text,
+    save_user_edited_version,
+)
+from .download_service import generate_document
+__all__ = [
+    # Formatter rules
+    "RuleConfig",
+    "QUESTION_BASED_RULES",
+    "READING_ORDER_RULES",
+    "get_rules_for_document_type",
+    "fetch_db_rules",
+    "override_rules_with_db",
+    "get_rule_for_class",
+    # Formatter
+    "TextFormatter",
+    # Sorter
+    "sort_layout_elements",
+    "save_sorting_results_to_db",
+    # Analysis
+    "analyze_page",
+    "analyze_project_batch",
+    "analyze_project_batch_async",
+    # Text Versions
+    "create_text_version",
+    "get_current_page_text",
+    "save_user_edited_version",
+    # Download
+    "generate_document",
+]

app/services/analysis_service.py ADDED Viewed

	@@ -0,0 +1,1057 @@

+# -*- coding: utf-8 -*-
+"""
+SmartEyeSsen Analysis Service (v1.1 - Duplicate Detection Filter Added)
+========================================================================
+학습지 분석 서비스 - 레이아웃 분석, OCR, AI 설명 생성을 담당합니다.
+Refactored from api_server.py WorksheetAnalyzer class.
+주요 변경사항 (DB 통합 버전):
+- analyze_layout: DocLayout-YOLO 결과를 layout_elements 테이블에 저장 후 ORM 객체 반환
+- perform_ocr: text_contents 테이블에 OCR 결과 upsert
+- call_openai_api / call_openai_api_async: ai_descriptions 테이블에 설명 텍스트 upsert
+- 중복 탐지 필터링(IoU 기반) 로직 유지
+"""
+import asyncio
+import base64
+import colorsys
+import io
+import platform
+import random
+from typing import Dict, List, Optional
+import cv2
+import numpy as np
+import openai
+import pytesseract
+import torch
+from PIL import Image
+from huggingface_hub import hf_hub_download
+from loguru import logger
+from openai import AsyncOpenAI
+from sqlalchemy.orm import Session
+from .. import models
+# --- 신규: 이미지 설명을 위한 프롬프트 템플릿 추가 ---
+figure_prompt = """
+당신은 초등학생을 위한 학습 도우미입니다.
+다음 그림을 초등학생이 쉽게 이해할 수 있도록 설명해주세요.
+[설명 규칙]
+1. 쉬운 말 사용: 어려운 용어 대신 일상 언어로 설명
+2. 중요한 것부터: 가장 눈에 띄는 것부터 차례대로 설명
+3. 위치 표현: "왼쪽에", "오른쪽에", "가운데" 같은 말로 위치 알려주기
+4. 구체적으로: 크기, 모양, 색깔을 쉽게 표현
+[출력 형식]
+이것은 무엇인가요: [한 문장으로 쉽게 설명]
+어떻게 생겼나요:
+- 전체 모습: [그림의 전체 모양]
+- 중요한 부분: [가장 중요한 것들]
+- 세부 내용: [자세한 설명]
+이 그림이 말하고 싶은 것: [핵심 내용을 한 문장으로]
+[예시]
+이것은 무엇인가요: 우리나라 인구가 어떻게 늘어났는지 보여주는 선 그래프예요.
+어떻게 생겼나요:
+- 전체 모습: 아래쪽에는 연도가, 왼쪽에는 인구수가 적혀 있어요.
+- 중요한 부분: 2000년부터 2025년까지 오른쪽 위로 올라가는 선이 그려져 있어요.
+- 세부 내용: 처음에는 빠르게 올라가다가 나중에는 천천히 올라가요.
+이 그림이 말하고 싶은 것: 우리나라 인구는 계속 늘어났지만, 요즘은 천천히 늘어나고 있어요.
+"""
+table_prompt = """
+당신은 초등학생을 위한 학습 도우미입니다.
+다음 표를 초등학생이 쉽게 이해할 수 있도록 설명해주세요.
+[설명 규칙]
+1. 쉬운 말 사용: 어려운 용어 대신 일상 언어로 설명
+2. 표의 모양: 몇 줄, 몇 칸인지 먼저 알려주기
+3. 제목 설명: 각 칸의 제목이 무엇인지 차례대로 말하기
+4. 내용 읽기: 왼쪽에서 오른쪽으로, 위에서 아래로 읽기
+[출력 형식]
+이것은 무엇인가요: [표의 내용을 한 문장으로]
+표의 모양:
+- 크기: [몇 줄, 몇 칸]
+- 제목: [각 칸의 제목]
+표에 적힌 내용:
+첫 번째 줄: [내용]
+두 번째 줄: [내용]
+세 번째 줄: [내용]
+중요한 내용: [표에서 가장 중요한 것]
+[예시]
+이것은 무엇인가요: 2024년에 회사가 번 돈을 분기별로 정리한 표예요.
+표의 모양:
+- 크기: 5줄, 4칸
+- 제목: 구분, 1분기, 2분기, 3분기
+표에 적힌 내용:
+첫 번째 줄 (매출액): 100억원, 120억원, 150억원
+두 번째 줄 (성장률): 10퍼센트, 20퍼센트, 25퍼센트
+세 번째 줄 (영업이익): 10억원, 15억원, 20억원
+네 번째 줄 (순이익): 8억원, 12억원, 18억원
+중요한 내용: 회사가 번 돈이 계속 늘어나고 있고, 3분기에 가장 많이 늘었어요.
+"""
+flowchart_prompt = """
+당신은 초등학생을 위한 학습 도우미입니다.
+다음 순서도를 초등학생이 쉽게 이해할 수 있도록 설명해주세요.
+[설명 규칙]
+1. 쉬운 말 사용: 어려운 용어 대신 일상 언어로 설명
+2. 단계별로 천천히: "첫 번째로", "그 다음에", "마지막으로" 같은 표현 사용
+3. 선택 상황: "만약 ~라면 어떻게 할까?" 형식으로 질문하듯 설명
+4. 구체적 예시: 가능하면 실생활 예시 추가
+[출력 형식]
+이것은 무엇인가요: [한 문장으로 쉽게 설명]
+어떻게 진행되나요:
+1. 처음에는 [시작 단계를 쉽게 설명]
+2. 그 다음에는 [다음 단계를 쉽게 설명]
+3. 여기서 선택해요: [선택 상황이 있다면]
+   - [조건]이면 → [결과]
+   - [조건]이 아니면 → [다른 결과]
+4. 마지막에는 [마무리 단계]
+핵심 내용: [전체 흐름을 한 문장으로 요약]
+[예시]
+이것은 무엇인가요: 우리가 웹사이트에 로그인하는 과정을 보여주는 그림이에요.
+어떻게 진행되나요:
+1. 처음에는 로그인 화면에서 아이디와 비밀번호를 입력해요.
+2. 그 다음에는 로그인 버튼을 눌러요.
+3. 여기서 선택해요: 입력한 정보가 맞는지 확인해요.
+   - 정보가 맞으면 → 메인 페이지로 들어가요. 끝!
+   - 정보가 틀리면 → 다시 입력하라는 메시지가 나와요.
+4. 만약 3번 틀리면 잠시 동안 로그인할 수 없어요.
+핵심 내용: 아이디와 비밀번호가 맞아야 웹사이트에 들어갈 수 있어요.
+"""
+# --- 신규: IoU 계산 함수 추가 ---
+def calculate_iou(box1, box2):
+    """두 바운딩 박스 간의 IoU(Intersection over Union) 계산"""
+    # box 형식: [x1, y1, x2, y2]
+    x1_inter = max(box1[0], box2[0])
+    y1_inter = max(box1[1], box2[1])
+    x2_inter = min(box1[2], box2[2])
+    y2_inter = min(box1[3], box2[3])
+    inter_area = max(0, x2_inter - x1_inter) * max(0, y2_inter - y1_inter)
+    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
+    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
+    union_area = box1_area + box2_area - inter_area
+    if union_area == 0:
+        return 0.0
+    return inter_area / union_area
+# --- 신규: 중복 제거 후처리 함수 추가 ---
+def filter_duplicate_detections(boxes, classes, confs, class_names, iou_threshold=0.7):
+    """
+    모든 클래스 쌍에 대해 IoU 기반으로 중복 탐지를 필터링. (자동 방식)
+    신뢰도가 낮은 쪽을 제거.
+    """
+    num_detections = len(boxes)
+    suppressed = [False] * num_detections  # 제거할 요소 표시
+    indices = list(range(num_detections))
+    # 신뢰도 높은 순으로 정렬 (높은 것을 남기기 위함)
+    indices.sort(key=lambda i: confs[i], reverse=True)
+    for i in range(num_detections):
+        idx1 = indices[i]
+        if suppressed[idx1]:
+            continue
+        box1 = boxes[idx1]
+        # cls_id1 = int(classes[idx1]) # 클래스 정보는 제거 로직에 불필요
+        # cls_name1 = class_names.get(cls_id1, f"unknown_{cls_id1}") # 클래스 정보는 제거 로직에 불필요
+        for j in range(i + 1, num_detections):
+            idx2 = indices[j]
+            if suppressed[idx2]:
+                continue
+            box2 = boxes[idx2]
+            # cls_id2 = int(classes[idx2]) # 클래스 정보는 제거 로직에 불필요
+            # cls_name2 = class_names.get(cls_id2, f"unknown_{cls_id2}") # 클래스 정보는 제거 로직에 불필요
+            # --- 👇 수정된 부분 시작 👇 ---
+            # 특정 클래스 쌍 확인 조건 제거: 모든 쌍에 대해 IoU 계산
+            # if (cls_name1, cls_name2) in problematic_pairs: # 이 조건 제거
+            iou = calculate_iou(box1, box2)
+            if iou > iou_threshold:
+                # 신뢰도 낮은 쪽(idx2)을 제거 대상으로 표시
+                suppressed[idx2] = True
+                # 로그 메시지에서 클래스 이름 제거 (선택 사항)
+                logger.debug(
+                    f"중복 탐지 제거: Box {idx2}(conf={confs[idx2]:.2f}) - "
+                    f"Box {idx1}(conf={confs[idx1]:.2f})와 IoU={iou:.2f} > {iou_threshold}"
+                )
+            # --- 👆 수정된 부분 끝 👆 ---
+    # 제거되지 않은 요소들의 인덱스 반환
+    final_indices = [i for i, s in enumerate(suppressed) if not s]
+    logger.info(
+        f"자동 중복 탐지 필터링: {num_detections}개 → {len(final_indices)}개 요소 (IoU > {iou_threshold})"
+    )  # 로그 메시지 수정
+    return final_indices
+# Windows에서 Tesseract 경로 설정 (기존과 동일)
+if platform.system() == "Windows":
+    pytesseract.pytesseract.tesseract_cmd = (
+        r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+    )
+# 디바이스 설정 (기존과 동일)
+device = "cuda:0" if torch.cuda.is_available() else "cpu"
+class AnalysisService:
+    """학습지 분석 서비스 - 상태 없는 함수형 디자인"""
+    def __init__(self, model_choice: str = "SmartEyeSsen", auto_load: bool = False):
+        """
+        분석 서비스 초기화
+        Args:
+            model_choice: 사용할 모델 선택 (기본값: "SmartEyeSsen")
+            auto_load: True이면 초기화 시 자동으로 모델 로드 (기본값: False, 하위 호환성 유지)
+        """
+        self.model = None
+        self.device = device
+        self.model_choice = model_choice
+        self._model_loaded = False
+        # 자동 로드 옵션이 활성화된 경우 즉시 모델 로드
+        if auto_load:
+            self._ensure_model_loaded()
+    def download_model(self, model_choice="SmartEyeSsen"):
+        """모델 다운로드 (기존과 동일)"""
+        models = {
+            "doclaynet_docsynth": {
+                "repo_id": "juliozhao/DocLayout-YOLO-DocLayNet-Docsynth300K_pretrained",
+                "filename": "doclayout_yolo_doclaynet_imgsz1120_docsynth_pretrain.pt",
+            },
+            "docstructbench": {
+                "repo_id": "juliozhao/DocLayout-YOLO-DocStructBench",
+                "filename": "doclayout_yolo_docstructbench_imgsz1024.pt",
+            },
+            "docsynth300k": {
+                "repo_id": "juliozhao/DocLayout-YOLO-DocSynth300K-pretrain",
+                "filename": "doclayout_yolo_docsynth300k_imgsz1600.pt",
+            },
+            "SmartEyeSsen": {"repo_id": "AkJeond/SmartEye", "filename": "best.pt"},
+        }
+        selected_model = models.get(model_choice, models["SmartEyeSsen"])
+        try:
+            logger.info(f"모델 다운로드 중: {selected_model['repo_id']}")
+            filepath = hf_hub_download(
+                repo_id=selected_model["repo_id"], filename=selected_model["filename"]
+            )
+            logger.info(f"모델 다운로드 완료: {filepath}")
+            return filepath
+        except Exception as e:
+            logger.error(f"모델 다운로드 실패: {e}")
+            raise
+    def load_model(self, model_path):
+        """모델 로드 (기존과 동일)"""
+        try:
+            try:
+                from doclayout_yolo import YOLOv10
+            except ImportError:
+                logger.error("DocLayout-YOLO가 설치되지 않았습니다.")
+                return False
+            logger.info("모델 로드 중...")
+            self.model = YOLOv10(model_path, task="predict")
+            self.model.to(self.device)
+            if hasattr(self.model, "training"):
+                self.model.training = False
+            logger.info("모델 로드 완료!")
+            return True
+        except Exception as e:
+            logger.error(f"모델 로드 실패: {e}")
+            return False
+    def _ensure_model_loaded(self):
+        """
+        Lazy Loading: 모델이 로드되지 않았으면 자동으로 로드
+        (다중 페이지 처리 시 모델을 한 번만 로드하도록 최적화)
+        """
+        if self._model_loaded and self.model is not None:
+            return  # 이미 로드됨
+        logger.info(f"모델 자동 로드 시작 (선택: {self.model_choice})...")
+        model_path = self.download_model(self.model_choice)
+        if not self.load_model(model_path):
+            raise RuntimeError(f"모델 로드 실패: {self.model_choice}")
+        self._model_loaded = True
+        logger.info("모델 자동 로드 완료!")
+    def analyze_layout(
+        self,
+        image: np.ndarray,
+        *,
+        page_id: int,
+        db: Session,
+        model_choice: Optional[str] = None,
+    ) -> List[models.LayoutElement]:
+        """
+        레이아웃 분석 + 중복 탐지 필터링 후 결과를 DB에 저장한다.
+        Args:
+            image: 분석할 이미지 (numpy array)
+            page_id: 결과를 저장할 pages.page_id
+            db: SQLAlchemy Session
+            model_choice: 사용할 모델 (미지정 시 인스턴스 기본값 사용)
+        Returns:
+            DB에 저장된 LayoutElement ORM 객체 리스트
+        """
+        active_model = model_choice or self.model_choice
+        try:
+            # 모델 선택이 변경되었으면 재로드
+            if active_model != self.model_choice:
+                logger.warning(f"모델 변경 감지: {self.model_choice} -> {active_model}")
+                self.model_choice = active_model
+                self._model_loaded = False
+            # Lazy Loading: 모델이 없으면 자동 로드
+            self._ensure_model_loaded()
+            logger.info("레이아웃 분석 시작...")
+            temp_path = "temp_image.jpg"
+            cv2.imwrite(temp_path, image)
+            if active_model == "SmartEyeSsen":
+                imgsz, conf = 1024, 0.25
+            elif active_model == "docsynth300k":
+                imgsz, conf = 1600, 0.15
+            else:
+                imgsz, conf = 1024, 0.25
+            results = self.model.predict(
+                temp_path, imgsz=imgsz, conf=conf, iou=0.45, device=self.device
+            )
+            boxes = results[0].boxes.xyxy.cpu().numpy()  # [x1, y1, x2, y2]
+            classes = results[0].boxes.cls.cpu().numpy()
+            confs = results[0].boxes.conf.cpu().numpy()
+            class_names = self.model.names  # 클래스 ID → 이름
+            detection_records: List[Dict[str, float]] = []
+            if not boxes.size:
+                logger.warning("레이아웃 분석 결과, 감지된 요소가 없습니다.")
+                return self._create_elements_from_layout(
+                    detections=detection_records, page_id=page_id, db=db
+                )
+            final_indices = filter_duplicate_detections(
+                boxes, classes, confs, class_names, iou_threshold=0.7
+            )
+            for i in final_indices:
+                box = boxes[i]
+                cls_id = int(classes[i])
+                conf_val = float(confs[i])
+                x1, y1, x2, y2 = map(int, box)
+                cls_name = (
+                    class_names.get(cls_id, f"unknown_{cls_id}")
+                    if isinstance(class_names, dict)
+                    else None
+                )
+                if cls_name is None:
+                    try:
+                        cls_name = class_names[cls_id]
+                    except (IndexError, KeyError):
+                        cls_name = f"unknown_{cls_id}"
+                width = x2 - x1
+                height = y2 - y1
+                area = width * height
+                if area < 100:
+                    continue
+                detection_records.append(
+                    {
+                        "class_name": cls_name,
+                        "confidence": conf_val,
+                        "bbox_x": x1,
+                        "bbox_y": y1,
+                        "bbox_width": width,
+                        "bbox_height": height,
+                    }
+                )
+            elements = self._create_elements_from_layout(
+                detections=detection_records, page_id=page_id, db=db
+            )
+            logger.info(f"레이아웃 분석 완료: 최종 {len(elements)}개 요소 저장")
+            return elements
+        except Exception as e:
+            logger.error(f"레이아웃 분석 실패: {e}", exc_info=True)
+            return []
+    def _create_elements_from_layout(
+        self, *, detections: List[Dict[str, float]], page_id: int, db: Session
+    ) -> List[models.LayoutElement]:
+        """
+        감지 결과를 layout_elements 테이블에 저장하고 ORM 객체 리스트를 반환한다.
+        """
+        logger.debug(f"페이지 {page_id} 기존 레이아웃 요소 정리")
+        existing_elements = (
+            db.query(models.LayoutElement)
+            .filter(models.LayoutElement.page_id == page_id)
+            .all()
+        )
+        for element in existing_elements:
+            db.delete(element)
+        db.flush()  # CASCADE 관계 정리
+        if not detections:
+            db.commit()
+            return []
+        created_elements: List[models.LayoutElement] = []
+        for record in detections:
+            element = models.LayoutElement(
+                page_id=page_id,
+                class_name=record["class_name"],
+                confidence=record["confidence"],
+                bbox_x=int(record["bbox_x"]),
+                bbox_y=int(record["bbox_y"]),
+                bbox_width=int(record["bbox_width"]),
+                bbox_height=int(record["bbox_height"]),
+            )
+            db.add(element)
+            created_elements.append(element)
+        db.flush()
+        db.commit()
+        for element in created_elements:
+            db.refresh(element)
+        return created_elements
+    def _upsert_text_content(
+        self,
+        *,
+        db: Session,
+        element_id: int,
+        ocr_text: str,
+        ocr_engine: str,
+        language: str,
+        ocr_confidence: Optional[float] = None,
+    ) -> models.TextContent:
+        """
+        텍스트 콘텐츠를 생성하거나 업데이트한다.
+        """
+        existing = (
+            db.query(models.TextContent)
+            .filter(models.TextContent.element_id == element_id)
+            .one_or_none()
+        )
+        if existing:
+            existing.ocr_text = ocr_text
+            existing.ocr_engine = ocr_engine
+            existing.language = language
+            existing.ocr_confidence = ocr_confidence
+            db.flush()
+            return existing
+        content = models.TextContent(
+            element_id=element_id,
+            ocr_text=ocr_text,
+            ocr_engine=ocr_engine,
+            ocr_confidence=ocr_confidence,
+            language=language,
+        )
+        db.add(content)
+        db.flush()
+        return content
+    def _upsert_ai_descriptions(
+        self,
+        *,
+        db: Session,
+        descriptions: Dict[int, str],
+        model_name: str,
+        prompt: Optional[str],
+    ) -> List[models.AIDescription]:
+        """
+        AI 설명을 생성하거나 갱신한다.
+        """
+        saved_records: List[models.AIDescription] = []
+        for element_id, description in descriptions.items():
+            existing = (
+                db.query(models.AIDescription)
+                .filter(models.AIDescription.element_id == element_id)
+                .one_or_none()
+            )
+            if existing:
+                existing.description = description
+                existing.ai_model = model_name
+                existing.prompt_used = prompt
+                db.flush()
+                saved_records.append(existing)
+                continue
+            record = models.AIDescription(
+                element_id=element_id,
+                description=description,
+                ai_model=model_name,
+                prompt_used=prompt,
+            )
+            db.add(record)
+            saved_records.append(record)
+        db.flush()
+        db.commit()
+        for record in saved_records:
+            db.refresh(record)
+        return saved_records
+    def perform_ocr(
+        self,
+        image: np.ndarray,
+        layout_elements: List[models.LayoutElement],
+        *,
+        db: Session,
+        language: str = "kor",
+    ) -> List[models.TextContent]:
+        """OCR 처리 (영역별 전처리 추가) 및 text_contents 테이블 저장"""
+        target_classes = [
+            "plain text",
+            "unit",
+            "question type",
+            "question text",
+            "question number",
+            "title",
+            "figure_caption",
+            "table caption",
+            "table footnote",
+            "isolate_formula",
+            "formula_caption",
+            "list",
+            "choices",
+            "page",
+            "second_question_number",
+        ]
+        ocr_results: List[models.TextContent] = []
+        custom_config = r"--oem 3 --psm 6"
+        logger.info(
+            f"OCR 처리 시작... 총 {len(layout_elements)}개 레이아웃 요소 중 OCR 대상 필터링"
+        )
+        logger.info(f"OCR 대상 클래스 목록: {target_classes}")
+        detected_classes = {elem.class_name for elem in layout_elements}  # Set으로 변경
+        logger.info(f"감지된 모든 클래스: {detected_classes}")
+        target_count = 0
+        for element in layout_elements:
+            cls_name = element.class_name  # Pydantic 모델은 이미 lower() 불필요
+            logger.debug(
+                f"레이아웃 ID {element.element_id}: 클래스 '{cls_name}' 확인 중..."
+            )  # DEBUG 레벨로 변경
+            if cls_name not in target_classes:
+                logger.debug(f"  → OCR 대상 아님")
+                continue
+            target_count += 1
+            logger.debug(
+                f"  → OCR 대상 {target_count}: ID {element.element_id} - 클래스 '{cls_name}'"
+            )
+            # 1. 영역 이미지 잘라내기 (기존 코드)
+            x1, y1 = element.bbox_x, element.bbox_y
+            x2, y2 = x1 + element.bbox_width, y1 + element.bbox_height
+            # 이미지 경계 내로 좌표 조정
+            x1, y1 = max(0, x1), max(0, y1)
+            x2, y2 = min(image.shape[1], x2), min(image.shape[0], y2)
+            if y2 <= y1 or x2 <= x1:  # 크기가 0이거나 음수인 경우 건너뛰기
+                logger.warning(
+                    f"  → 유효하지 않은 BBox 크기: ID {element.element_id}, 건너뜀"
+                )
+                continue
+            cropped_img = image[y1:y2, x1:x2]
+            try:
+                # --- 👇 영역별 전처리 단계 시작 👇 ---
+                # 2. 그레이스케일 변환: 색상 정보 제거
+                gray_img = cv2.cvtColor(cropped_img, cv2.COLOR_BGR2GRAY)
+                # 3. 이진화 (Otsu's Binarization): 텍스트/배경 명확화
+                # Otsu 방식은 임계값을 자동으로 결정해 줍니다.
+                # 필요에 따라 cv2.adaptiveThreshold 등 다른 방식 사용 가능
+                _, binary_img = cv2.threshold(
+                    gray_img, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU
+                )
+                # 4. (선택적) 노이즈 제거: Median 필터 적용 (작은 점 제거에 효과적)
+                # 커널 크기(예: 3)는 실험을 통해 조정
+                denoised_img = cv2.medianBlur(binary_img, 3)
+                # --- 👆 영역별 전처리 단계 끝 👆 ---
+                # 5. 전처리된 이미지로 OCR 수행
+                # Pillow 이미지로 변환 (Tesseract는 Pillow 이미지 입력 선호)
+                pil_img = Image.fromarray(cropped_img)
+                text = pytesseract.image_to_string(
+                    pil_img, lang="kor", config=custom_config
+                ).strip()
+                if len(text) > 1:  # 빈 문자열이 아닌 경우만
+                    db_text = self._upsert_text_content(
+                        db=db,
+                        element_id=element.element_id,
+                        ocr_text=text,
+                        ocr_engine="Tesseract",
+                        language=language,
+                    )
+                    ocr_results.append(db_text)
+                    logger.info(
+                        f"✅ OCR 성공: ID {element.element_id} ({cls_name}) - '{text[:50].replace(chr(10), ' ')}...' ({len(text)}자)"
+                    )  # 개행문자 제거
+                else:
+                    logger.warning(
+                        f"⚠️ OCR 결과 없음: ID {element.element_id} ({cls_name})"
+                    )
+            except Exception as e:
+                logger.error(
+                    f"OCR 실패: ID {element.element_id} - {e}", exc_info=True
+                )  # 상세 에러
+        db.commit()
+        for content in ocr_results:
+            db.refresh(content)
+        logger.info(f"OCR 처리 완료: {len(ocr_results)}개 텍스트 블록 저장")
+        return ocr_results
+    def call_openai_api(
+        self,
+        image: np.ndarray,
+        layout_elements: List[models.LayoutElement],
+        *,
+        api_key: Optional[str],
+        db: Session,
+        model_name: str = "gpt-4-turbo",
+    ) -> Dict[int, str]:
+        """OpenAI API 호출 및 ai_descriptions 테이블 저장"""
+        if not api_key:
+            logger.warning("API 키가 없어 AI 설명 생성을 건너뜁니다.")
+            return {}
+        target_classes = ["figure", "table", "flowchart"]
+        ai_descriptions: Dict[int, str] = {}
+        try:
+            client = openai.OpenAI(api_key=api_key)
+            logger.info("OpenAI API 처리 시작...")
+        except Exception as e:
+            logger.error(f"OpenAI 클라이언트 초기화 실패: {e}")
+            return {}
+        prompts = {
+            "figure": figure_prompt,
+            "table": table_prompt,
+            "flowchart": flowchart_prompt,
+        }
+        system_prompt = (
+            "당신은 시각 장애 아동 학습 AI 비서입니다. "
+            "시각 자료 내용을 한국어로 간결하고 명확하게 설명하세요. "
+            "음성 변환 시 이해하기 쉽도록 직접적인 문장을 사용하세요."
+        )
+        for element in layout_elements:
+            cls_name = element.class_name
+            if cls_name not in target_classes:
+                continue
+            x1, y1 = element.bbox_x, element.bbox_y
+            x2, y2 = x1 + element.bbox_width, y1 + element.bbox_height
+            if y2 <= y1 or x2 <= x1:
+                continue
+            cropped_img = image[y1:y2, x1:x2]
+            pil_img = Image.fromarray(cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB))
+            buffered = io.BytesIO()
+            pil_img.save(buffered, format="PNG")
+            img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+            prompt = prompts.get(cls_name, f"이 {cls_name} 내용 설명")
+            try:
+                response = client.chat.completions.create(
+                    model=model_name,
+                    messages=[
+                        {"role": "system", "content": system_prompt},
+                        {
+                            "role": "user",
+                            "content": [
+                                {"type": "text", "text": prompt},
+                                {
+                                    "type": "image_url",
+                                    "image_url": {
+                                        "url": f"data:image/png;base64,{img_base64}"
+                                    },
+                                },
+                            ],
+                        },
+                    ],
+                    temperature=0.2,
+                    max_tokens=600,
+                )
+                description = response.choices[0].message.content.strip()
+                ai_descriptions[element.element_id] = description
+                logger.info(f"API 응답 완료: ID {element.element_id} - {cls_name}")
+            except Exception as e:
+                logger.error(
+                    f"API 요청 실패: ID {element.element_id} - {e}", exc_info=True
+                )
+        saved = self._upsert_ai_descriptions(
+            db=db, descriptions=ai_descriptions, model_name=model_name, prompt=None
+        )
+        logger.info(f"OpenAI API 처리 완료: {len(saved)}개 설명 생성 및 저장")
+        return ai_descriptions
+    async def call_openai_api_async(
+        self,
+        image: np.ndarray,
+        layout_elements: List[models.LayoutElement],
+        api_key: str,
+        *,
+        db: Optional[Session] = None,
+        model_name: str = "gpt-4-turbo",
+        max_concurrent_requests: int = 5,
+    ) -> Dict[int, str]:
+        """
+        OpenAI API 비동기 병렬 호출 (성능 최적화 버전)
+        Args:
+            image: 원본 이미지 (BGR 포맷)
+            layout_elements: 레이아웃 요소 리스트
+            api_key: OpenAI API 키
+            db: SQLAlchemy Session (선택, 제공 시 DB에 설명 저장)
+            model_name: 사용할 OpenAI 모델 이름
+            max_concurrent_requests: 최대 동시 요청 수 (기본값: 5)
+        Returns:
+            Dict[int, str]: {element_id: AI 설명} 딕셔너리
+        주요 개선사항:
+        - 비동기 병렬 처리로 처리 시간 70% 단축
+        - asyncio.Semaphore로 Rate Limit 대응
+        - 지수 백오프 재시도 로직 (exponential backoff)
+        """
+        if not api_key:
+            logger.warning("API 키가 없어 AI 설명 생성을 건너뜁니다.")
+            return {}
+        # 1. 대상 클래스 필터링 (figure, table, flowchart만 처리)
+        target_classes = ["figure", "table", "flowchart"]
+        target_elements = [
+            elem for elem in layout_elements if elem.class_name in target_classes
+        ]
+        if not target_elements:
+            logger.info("AI 설명 대상 요소가 없습니다.")
+            return {}
+        logger.info(
+            f"OpenAI API 비동기 처리 시작... (총 {len(target_elements)}개 요소)"
+        )
+        # 2. AsyncOpenAI 클라이언트 초기화
+        try:
+            async_client = AsyncOpenAI(api_key=api_key)
+        except Exception as e:
+            logger.error(f"AsyncOpenAI 클라이언트 초기화 실패: {e}")
+            return {}
+        # 3. Semaphore로 동시 요청 수 제한 (Rate Limit 대응)
+        semaphore = asyncio.Semaphore(max_concurrent_requests)
+        # 4. 모든 비동기 태스크 생성
+        tasks = [
+            self._process_single_element_async(
+                async_client=async_client,
+                image=image,
+                element=elem,
+                semaphore=semaphore,
+                model_name=model_name,
+            )
+            for elem in target_elements
+        ]
+        # 5. 병렬 실행 (asyncio.gather)
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+        # 6. 결과 매핑 및 예외 처리
+        ai_descriptions = {}
+        success_count = 0
+        error_count = 0
+        for element, result in zip(target_elements, results):
+            if isinstance(result, Exception):
+                logger.error(f"API 실패: Element {element.element_id} - {result}")
+                error_count += 1
+            elif result:  # 성공 시 (빈 문자열이 아닌 경우)
+                ai_descriptions[element.element_id] = result
+                success_count += 1
+                logger.info(
+                    f"✅ API 성공: Element {element.element_id} ({element.class_name})"
+                )
+        logger.info(
+            f"OpenAI API 비동기 처리 완료: "
+            f"성공 {success_count}건, 실패 {error_count}건 / 총 {len(target_elements)}건"
+        )
+        if db and ai_descriptions:
+            saved = self._upsert_ai_descriptions(
+                db=db, descriptions=ai_descriptions, model_name=model_name, prompt=None
+            )
+            logger.info(f"AI 설명 {len(saved)}건 저장 완료 (비동기)")
+        return ai_descriptions
+    async def _process_single_element_async(
+        self,
+        async_client: AsyncOpenAI,
+        image: np.ndarray,
+        element: models.LayoutElement,
+        semaphore: asyncio.Semaphore,
+        model_name: str,
+    ) -> str:
+        """
+        단일 element에 대한 비동기 AI 설명 생성 (지수 백오프 재시도 포함)
+        Args:
+            async_client: AsyncOpenAI 클라이언트
+            image: 원본 이미지
+            element: 처리할 레이아웃 요소
+            semaphore: 동시 요청 수 제한용 Semaphore
+            model_name: 사용할 OpenAI 모델 이름
+        Returns:
+            str: AI 생성 설명 텍스트
+        재시도 로직:
+        - 최대 3회 재시도
+        - 대기 시간: 1초 → 2초 → 4초 (지수 백오프)
+        """
+        # 1. 이미지 크롭 및 검증
+        x1, y1 = element.bbox_x, element.bbox_y
+        x2, y2 = x1 + element.bbox_width, y1 + element.bbox_height
+        # 크기 검증
+        if y2 <= y1 or x2 <= x1:
+            logger.warning(f"유효하지 않은 BBox 크기: Element {element.element_id}")
+            return ""
+        # 이미지 크롭
+        cropped_img = image[y1:y2, x1:x2]
+        # 2. PIL 이미지 변환 및 Base64 인코딩
+        pil_img = Image.fromarray(cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB))
+        buffered = io.BytesIO()
+        pil_img.save(buffered, format="PNG")
+        img_base64 = base64.b64encode(buffered.getvalue()).decode("utf-8")
+        # 3. 프롬프트 선택
+        prompts = {
+            "figure": figure_prompt,
+            "table": table_prompt,
+            "flowchart": flowchart_prompt,
+        }
+        prompt = prompts.get(element.class_name, f"이 {element.class_name} 내용 설명")
+        system_prompt = (
+            "당신은 시각 장애 아동 학습 AI 비서입니다. "
+            "시각 자료 내용을 한국어로 간결, 명확하게 설명하세요. "
+            "음성 변환 가능하게 직접적이고 이해하기 쉽게 작성하세요."
+        )
+        # 4. 지수 백오프 재시도 로직
+        max_retries = 3
+        base_delay = 1.0  # 초 단위
+        async with semaphore:  # Rate Limit 제어
+            for attempt in range(max_retries):
+                try:
+                    # API 호출
+                    response = await async_client.chat.completions.create(
+                        model=model_name,
+                        messages=[
+                            {"role": "system", "content": system_prompt},
+                            {
+                                "role": "user",
+                                "content": [
+                                    {"type": "text", "text": prompt},
+                                    {
+                                        "type": "image_url",
+                                        "image_url": {
+                                            "url": f"data:image/png;base64,{img_base64}"
+                                        },
+                                    },
+                                ],
+                            },
+                        ],
+                        temperature=0.2,
+                        max_tokens=600,
+                    )
+                    # 성공 시 결과 반환
+                    description = response.choices[0].message.content.strip()
+                    logger.debug(
+                        f"API 응답 완료 (시도 {attempt + 1}/{max_retries}): "
+                        f"Element {element.element_id}"
+                    )
+                    return description
+                except openai.RateLimitError as e:
+                    # Rate Limit 오류: 지수 백오프 대기 후 재시도
+                    if attempt < max_retries - 1:
+                        delay = base_delay * (2**attempt)  # 1초 → 2초 → 4초
+                        logger.warning(
+                            f"⚠️ Rate Limit 오류 (Element {element.element_id}): "
+                            f"{delay}초 대기 후 재시도 ({attempt + 1}/{max_retries})"
+                        )
+                        await asyncio.sleep(delay)
+                    else:
+                        logger.error(
+                            f"❌ Rate Limit 오류 최종 실패 (Element {element.element_id}): {e}"
+                        )
+                        raise  # 최종 실패 시 예외 전파
+                except openai.APIError as e:
+                    # API 일반 오류: 지수 백오프 대기 후 재시도
+                    if attempt < max_retries - 1:
+                        delay = base_delay * (2**attempt)
+                        logger.warning(
+                            f"⚠️ API 오류 (Element {element.element_id}): "
+                            f"{delay}초 대기 후 재시도 ({attempt + 1}/{max_retries}) - {e}"
+                        )
+                        await asyncio.sleep(delay)
+                    else:
+                        logger.error(
+                            f"❌ API 오류 최종 실패 (Element {element.element_id}): {e}"
+                        )
+                        raise
+                except Exception as e:
+                    # 기타 예외: 즉시 실패
+                    logger.error(
+                        f"❌ 예상치 못한 오류 (Element {element.element_id}): {e}",
+                        exc_info=True,
+                    )
+                    raise
+        # 모든 재시도 실패 시 빈 문자열 반환 (unreachable, but for type safety)
+        return ""
+    def visualize_results(
+        self, image: np.ndarray, layout_elements: List[models.LayoutElement]
+    ) -> np.ndarray:
+        """결과 시각화 (기존과 동일)"""
+        img_result = image.copy()
+        overlay = image.copy()
+        random.seed(42)
+        unique_classes = list({elem.class_name for elem in layout_elements})
+        class_colors = {}
+        for i, cls_name in enumerate(unique_classes):
+            h, s, v = i / max(1, len(unique_classes)), 0.8, 0.9
+            r, g, b = colorsys.hsv_to_rgb(h, s, v)
+            class_colors[cls_name] = (int(b * 255), int(g * 255), int(r * 255))
+        for element in layout_elements:
+            x1, y1 = element.bbox_x, element.bbox_y
+            x2, y2 = x1 + element.bbox_width, y1 + element.bbox_height
+            cls_name, color = element.class_name, class_colors[element.class_name]
+            cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
+            cv2.rectangle(img_result, (x1, y1), (x2, y2), color, 2)
+            label = f"{cls_name} ({element.confidence:.2f})"
+            labelSize, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
+            y1_label = max(y1, labelSize[1] + 10)
+            cv2.rectangle(
+                img_result,
+                (x1, y1_label - labelSize[1] - 10),
+                (x1 + labelSize[0], y1_label),
+                color,
+                -1,
+            )
+            cv2.putText(
+                img_result,
+                label,
+                (x1, y1_label - 5),
+                cv2.FONT_HERSHEY_SIMPLEX,
+                0.5,
+                (255, 255, 255),
+                1,
+            )
+        img_result = cv2.addWeighted(overlay, 0.2, img_result, 0.8, 0)
+        return cv2.cvtColor(img_result, cv2.COLOR_BGR2RGB)
+def analyze_page(
+    *,
+    page_id: int,
+    image: np.ndarray,
+    db: Session,
+    api_key: Optional[str] = None,
+    model_choice: Optional[str] = None,
+) -> Dict[str, object]:
+    """단일 페이지에 대한 전체 분석 파이프라인을 실행한다."""
+    service = AnalysisService(
+        model_choice=model_choice or "SmartEyeSsen", auto_load=False
+    )
+    layout_elements = service.analyze_layout(
+        image=image, page_id=page_id, db=db, model_choice=model_choice
+    )
+    text_contents = service.perform_ocr(
+        image=image, layout_elements=layout_elements, db=db
+    )
+    ai_descriptions: Dict[int, str] = {}
+    if api_key:
+        ai_descriptions = service.call_openai_api(
+            image=image, layout_elements=layout_elements, api_key=api_key, db=db
+        )
+    return {
+        "layout_elements": layout_elements,
+        "text_contents": text_contents,
+        "ai_descriptions": ai_descriptions,
+    }

app/services/batch_analysis.py ADDED Viewed

	@@ -0,0 +1,450 @@

+"""
+Project Batch Analysis Service
+=============================
+실제 데이터베이스(DB) 기반으로 프로젝트 내 페이지들을 순차적으로 분석하고
+정렬(Question Grouping) 및 포맷팅(Text Version 생성)까지 수행합니다.
+파이프라인 (페이지 단위)
+1. 이미지 로드
+2. AnalysisService로 레이아웃 → OCR → (선택) AI 설명 생성
+3. sorter.py를 이용한 정렬 후 question_groups / question_elements 저장
+4. TextFormatter로 자동 포맷팅 → text_versions에 최신 버전 기록
+결과는 페이지별 요약 정보와 함께 프로젝트 상태를 갱신합니다.
+"""
+from __future__ import annotations
+import asyncio
+import os
+import time
+from datetime import datetime
+from functools import lru_cache
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+import cv2
+import numpy as np
+from loguru import logger
+from sqlalchemy.orm import Session, selectinload
+from ..models import LayoutElement, Page, Project
+from .analysis_service import AnalysisService
+from .formatter import TextFormatter
+from .mock_models import MockElement
+from .sorter import save_sorting_results_to_db, sort_layout_elements
+from .text_version_service import create_text_version
+# -----------------------------------------------------------------------------
+# 내부 상수 & 헬퍼
+# -----------------------------------------------------------------------------
+UPLOADS_ROOT = (Path(__file__).resolve().parents[2] / "uploads").resolve()
+DEFAULT_AI_CONCURRENCY = int(os.getenv("OPENAI_MAX_CONCURRENCY", "5"))
+@lru_cache(maxsize=1)
+def _get_analysis_service(model_choice: str = "SmartEyeSsen") -> AnalysisService:
+    """
+    모델 로딩 비용을 줄이기 위해 AnalysisService 인스턴스를 캐시합니다.
+    """
+    logger.debug("AnalysisService 인스턴스 요청 (model_choice=%s)", model_choice)
+    return AnalysisService(model_choice=model_choice, auto_load=False)
+def _resolve_image_path(image_path: str) -> Path:
+    """
+    Page.image_path 값을 절대 경로로 변환합니다.
+    """
+    raw_path = Path(image_path)
+    candidates = []
+    if raw_path.is_absolute():
+        candidates.append(raw_path)
+    else:
+        candidates.append((UPLOADS_ROOT / raw_path).resolve())
+        candidates.append((Path.cwd() / "uploads" / raw_path).resolve())
+        candidates.append((Path.cwd() / raw_path).resolve())
+    for candidate in candidates:
+        if candidate.exists():
+            return candidate
+    raise FileNotFoundError(
+        "이미지 파일을 찾을 수 없습니다. "
+        f"확인된 경로: {[str(path) for path in candidates]}"
+    )
+def _load_page_image(page: Page) -> np.ndarray:
+    """
+    페이지 객체에서 이미지를 로드하고, 해상도 정보를 갱신합니다.
+    """
+    resolved_path = _resolve_image_path(page.image_path)
+    image = cv2.imread(str(resolved_path))
+    if image is None:
+        raise ValueError(f"이미지 파일을 읽을 수 없습니다: {resolved_path}")
+    height, width = image.shape[:2]
+    if page.image_width != width or page.image_height != height:
+        page.image_width = width
+        page.image_height = height
+    return image
+def _layout_to_mock(elements: List[LayoutElement]) -> List[MockElement]:
+    """
+    SQLAlchemy LayoutElement 객체를 sorter에서 사용하는 MockElement로 변환합니다.
+    """
+    mock_elements: List[MockElement] = []
+    for element in elements:
+        mock = MockElement(
+            element_id=element.element_id,
+            class_name=element.class_name,
+            confidence=float(element.confidence or 0.0),
+            bbox_x=int(element.bbox_x),
+            bbox_y=int(element.bbox_y),
+            bbox_width=int(element.bbox_width),
+            bbox_height=int(element.bbox_height),
+            page_id=element.page_id,
+        )
+        mock_elements.append(mock)
+    return mock_elements
+def _sync_layout_runtime_fields(
+    layout_elements: List[LayoutElement],
+    mock_elements: List[MockElement],
+) -> List[LayoutElement]:
+    """
+    sorter가 계산한 order_in_question, group_id 등을 실제 LayoutElement에 반영합니다.
+    """
+    element_map: Dict[int, LayoutElement] = {
+        elem.element_id: elem for elem in layout_elements
+    }
+    synced_elements: List[LayoutElement] = []
+    for mock in mock_elements:
+        target = element_map.get(mock.element_id)
+        if not target:
+            logger.warning(
+                "정렬 결과에 존재하지만 DB에 없는 element_id=%s", mock.element_id
+            )
+            continue
+        setattr(target, "order_in_question", getattr(mock, "order_in_question", None))
+        setattr(target, "group_id", getattr(mock, "group_id", None))
+        setattr(target, "order_in_group", getattr(mock, "order_in_group", None))
+        setattr(target, "y_position", getattr(mock, "y_position", target.bbox_y))
+        setattr(target, "x_position", getattr(mock, "x_position", target.bbox_x))
+        setattr(
+            target,
+            "area",
+            getattr(mock, "area", target.bbox_width * target.bbox_height),
+        )
+        synced_elements.append(target)
+    return synced_elements
+def _update_page_status(
+    page: Page,
+    *,
+    status: str,
+    processing_time: float,
+) -> None:
+    """
+    페이지의 상태/처리시간/분석 완료 시간을 갱신합니다.
+    """
+    page.analysis_status = status
+    page.processing_time = processing_time
+    page.analyzed_at = datetime.utcnow()
+def _update_project_status(project: Project, status: str) -> None:
+    """
+    프로젝트 상태를 갱신합니다.
+    """
+    project.status = status
+    project.updated_at = datetime.utcnow()
+async def _process_single_page_async(
+    *,
+    db: Session,
+    project: Project,
+    page: Page,
+    formatter: TextFormatter,
+    analysis_service: AnalysisService,
+    use_ai_descriptions: bool,
+    api_key: Optional[str],
+    ai_max_concurrency: int = DEFAULT_AI_CONCURRENCY,
+) -> Dict[str, Any]:
+    """
+    개별 페이지에 대한 전체 파이프라인을 실행하고 결과 요약을 반환합니다.
+    """
+    logger.info(
+        "페이지 분석 시작: project_id=%s / page_id=%s", project.project_id, page.page_id
+    )
+    page_start = time.time()
+    summary: Dict[str, Any] = {
+        "page_id": page.page_id,
+        "page_number": page.page_number,
+        "status": "error",
+        "message": "",
+        "layout_count": 0,
+        "ocr_count": 0,
+        "ai_description_count": 0,
+        "processing_time": 0.0,
+    }
+    try:
+        image = _load_page_image(page)
+        layout_elements = analysis_service.analyze_layout(
+            image=image,
+            page_id=page.page_id,
+            db=db,
+            model_choice=analysis_service.model_choice,
+        )
+        if not layout_elements:
+            raise ValueError("레이아웃 분석 결과가 비어 있습니다.")
+        summary["layout_count"] = len(layout_elements)
+        text_contents = analysis_service.perform_ocr(
+            image=image,
+            layout_elements=layout_elements,
+            db=db,
+        )
+        summary["ocr_count"] = len(text_contents)
+        ai_descriptions: Dict[int, str] = {}
+        if use_ai_descriptions:
+            # API 키: 요청 파라미터 우선, 없으면 환경변수에서 로드
+            effective_api_key = api_key or os.getenv("OPENAI_API_KEY")
+            if effective_api_key:
+                logger.info(f"AI 설명 생성 시작: page_id={page.page_id}")
+                try:
+                    ai_descriptions = await analysis_service.call_openai_api_async(
+                        image=image,
+                        layout_elements=layout_elements,
+                        api_key=effective_api_key,
+                        db=db,
+                        max_concurrent_requests=ai_max_concurrency,
+                    )
+                    summary["ai_description_count"] = len(ai_descriptions)
+                    logger.info(
+                        f"AI 설명 생성 완료: {len(ai_descriptions)}개 요소 처리"
+                    )
+                except Exception as ai_error:
+                    logger.error(
+                        "AI 설명 생성 비동기 처리 실패: page_id=%s / error=%s",
+                        page.page_id,
+                        ai_error,
+                    )
+            else:
+                logger.warning(
+                    f"AI 설명 생성 요청되었으나 API 키가 없습니다 (page_id={page.page_id})"
+                )
+        mock_elements = _layout_to_mock(layout_elements)
+        sorted_mock = sort_layout_elements(
+            mock_elements,
+            document_type=formatter.document_type,
+            page_width=page.image_width or 0,
+            page_height=page.image_height or 0,
+        )
+        synced_layouts = _sync_layout_runtime_fields(layout_elements, sorted_mock)
+        save_sorting_results_to_db(db, page.page_id, synced_layouts)
+        formatted_text = formatter.format_page(
+            synced_layouts,
+            text_contents,
+            ai_descriptions=ai_descriptions,
+        )
+        create_text_version(db, page, formatted_text or "")
+        processing_time = time.time() - page_start
+        _update_page_status(page, status="completed", processing_time=processing_time)
+        summary["status"] = "completed"
+        summary["processing_time"] = processing_time
+        summary["message"] = "success"
+        db.commit()
+        return summary
+    except Exception as error:  # pylint: disable=broad-except
+        logger.error(f"페이지 분석 실패: page_id={page.page_id} / error={str(error)}")
+        logger.exception("상세 스택 트레이스:")  # 전체 스택 출력
+        db.rollback()
+        processing_time = time.time() - page_start
+        _update_page_status(page, status="error", processing_time=processing_time)
+        summary["processing_time"] = processing_time
+        summary["message"] = str(error)
+        db.commit()
+        return summary
+def _process_single_page(
+    *,
+    db: Session,
+    project: Project,
+    page: Page,
+    formatter: TextFormatter,
+    analysis_service: AnalysisService,
+    use_ai_descriptions: bool,
+    api_key: Optional[str],
+    ai_max_concurrency: int = DEFAULT_AI_CONCURRENCY,
+) -> Dict[str, Any]:
+    """
+    동기 컨텍스트 호환용 래퍼.
+    """
+    return asyncio.run(
+        _process_single_page_async(
+            db=db,
+            project=project,
+            page=page,
+            formatter=formatter,
+            analysis_service=analysis_service,
+            use_ai_descriptions=use_ai_descriptions,
+            api_key=api_key,
+            ai_max_concurrency=ai_max_concurrency,
+        )
+    )
+# -----------------------------------------------------------------------------
+# 공개 API
+# -----------------------------------------------------------------------------
+async def analyze_project_batch_async(
+    db: Session,
+    project_id: int,
+    *,
+    use_ai_descriptions: bool = True,
+    api_key: Optional[str] = None,
+    ai_max_concurrency: int = DEFAULT_AI_CONCURRENCY,
+) -> Dict[str, Any]:
+    """
+    프로젝트 내 'pending' 상태 페이지를 순차적으로 분석하고 결과 요약을 반환합니다.
+    """
+    logger.info("프로젝트 배치 분석 시작: project_id=%s", project_id)
+    started_at = time.time()
+    project = (
+        db.query(Project)
+        .options(selectinload(Project.pages))
+        .filter(Project.project_id == project_id)
+        .one_or_none()
+    )
+    if not project:
+        raise ValueError(f"프로젝트 ID {project_id}를 찾을 수 없습니다.")
+    pending_pages = [
+        page for page in project.pages if page.analysis_status in {"pending", "error"}
+    ]
+    pending_pages.sort(key=lambda p: p.page_number)
+    result_summary: Dict[str, Any] = {
+        "project_id": project.project_id,
+        "project_status_before": project.status,
+        "processed_pages": 0,
+        "successful_pages": 0,
+        "failed_pages": 0,
+        "total_pages": len(pending_pages),
+        "status": "completed" if pending_pages else "no_pending_pages",
+        "page_results": [],
+        "total_time": 0.0,
+    }
+    if not pending_pages:
+        logger.warning("분석할 페이지가 없습니다. project_id=%s", project.project_id)
+        return result_summary
+    _update_project_status(project, "in_progress")
+    db.commit()
+    analysis_service = _get_analysis_service()
+    formatter = TextFormatter(
+        doc_type_id=project.doc_type_id,
+        db=db,
+        use_db_rules=True,
+    )
+    for page in pending_pages:
+        page_summary = await _process_single_page_async(
+            db=db,
+            project=project,
+            page=page,
+            formatter=formatter,
+            analysis_service=analysis_service,
+            use_ai_descriptions=use_ai_descriptions,
+            api_key=api_key,
+            ai_max_concurrency=ai_max_concurrency,
+        )
+        result_summary["page_results"].append(page_summary)
+        result_summary["processed_pages"] += 1
+        if page_summary["status"] == "completed":
+            result_summary["successful_pages"] += 1
+        else:
+            result_summary["failed_pages"] += 1
+    if result_summary["failed_pages"] == 0:
+        final_status = "completed"
+    elif result_summary["successful_pages"] == 0:
+        final_status = "error"
+    else:
+        final_status = "partial"
+    _update_project_status(project, final_status)
+    db.commit()
+    result_summary["status"] = final_status
+    result_summary["project_status_after"] = project.status
+    result_summary["total_time"] = time.time() - started_at
+    logger.info(
+        "프로젝트 배치 분석 종료: project_id=%s / status=%s / success=%s / fail=%s / %.2fs",
+        project.project_id,
+        final_status,
+        result_summary["successful_pages"],
+        result_summary["failed_pages"],
+        result_summary["total_time"],
+    )
+    return result_summary
+def analyze_project_batch(
+    db: Session,
+    project_id: int,
+    *,
+    use_ai_descriptions: bool = True,
+    api_key: Optional[str] = None,
+    ai_max_concurrency: int = DEFAULT_AI_CONCURRENCY,
+) -> Dict[str, Any]:
+    """
+    동기 컨텍스트 호환용 래퍼.
+    """
+    return asyncio.run(
+        analyze_project_batch_async(
+            db=db,
+            project_id=project_id,
+            use_ai_descriptions=use_ai_descriptions,
+            api_key=api_key,
+            ai_max_concurrency=ai_max_concurrency,
+        )
+    )
+__all__ = [
+    "analyze_project_batch",
+    "analyze_project_batch_async",
+    "_get_analysis_service",
+    "_process_single_page",
+    "_process_single_page_async",
+    "DEFAULT_AI_CONCURRENCY",
+]

app/services/download_service.py ADDED Viewed

	@@ -0,0 +1,257 @@

+"""
+Project Download Service
+========================
+데이터베이스에 저장된 분석 결과를 활용하여 프로젝트 단위의 통합 텍스트 및
+Word 문서를 생성합니다. CombinedResult 테이블을 캐시로 사용하며, 각 페이지의
+최신(TextVersion.is_current=True) 자동 포맷팅 텍스트를 모아 제공합니다.
+"""
+from __future__ import annotations
+import io
+from datetime import datetime
+from typing import Dict, List, Optional, Tuple
+from loguru import logger
+from sqlalchemy.orm import Session, selectinload
+from ..models import CombinedResult, Page, Project, TextVersion
+try:
+    from docx import Document
+    from docx.enum.text import WD_ALIGN_PARAGRAPH
+except ImportError:  # pragma: no cover - optional dependency
+    Document = None  # type: ignore[assignment]
+    WD_ALIGN_PARAGRAPH = None  # type: ignore[assignment]
+    logger.error("python-docx 라이브러리가 설치되지 않았습니다. pip install python-docx")
+# -----------------------------------------------------------------------------
+# 헬퍼 함수
+# -----------------------------------------------------------------------------
+def _fetch_project_with_pages(db: Session, project_id: int) -> Project:
+    project = (
+        db.query(Project)
+        .options(selectinload(Project.pages))
+        .filter(Project.project_id == project_id)
+        .one_or_none()
+    )
+    if not project:
+        raise ValueError(f"프로젝트 ID {project_id}를 찾을 수 없습니다.")
+    return project
+def _fetch_current_text_versions(db: Session, page_ids: List[int]) -> Dict[int, TextVersion]:
+    if not page_ids:
+        return {}
+    versions = (
+        db.query(TextVersion)
+        .filter(
+            TextVersion.page_id.in_(page_ids),
+            TextVersion.is_current.is_(True),
+        )
+        .all()
+    )
+    return {version.page_id: version for version in versions}
+def _latest_text_timestamp(versions: Dict[int, TextVersion]) -> Optional[datetime]:
+    timestamps = [version.created_at for version in versions.values() if version.created_at]
+    return max(timestamps) if timestamps else None
+def _combined_result_is_fresh(
+    combined_result: CombinedResult,
+    latest_text_time: Optional[datetime],
+) -> bool:
+    if latest_text_time is None:
+        return combined_result.combined_text is not None
+    if combined_result.updated_at is None:
+        return False
+    return combined_result.updated_at >= latest_text_time
+def _format_combined_sections(pages: List[Page], versions: Dict[int, TextVersion]) -> Tuple[str, Dict[str, int]]:
+    sections: List[str] = []
+    total_words = 0
+    total_characters = 0
+    for page in sorted(pages, key=lambda p: p.page_number):
+        version = versions.get(page.page_id)
+        header = f"─── 페이지 {page.page_number}"
+        if version:
+            header += f" (Version: {version.version_number} - {version.version_type}) ───"
+            content = version.content or ""
+            total_words += len(content.split())
+            total_characters += len(content)
+        else:
+            header += " (내용 없음) ───"
+            content = ""
+        sections.append(f"{header}\n\n{content}".rstrip())
+    combined_text = "\n\n".join(sections)
+    stats = {
+        "total_pages": len(pages),
+        "total_words": total_words,
+        "total_characters": total_characters,
+    }
+    return combined_text, stats
+def _upsert_combined_result(
+    db: Session,
+    project_id: int,
+    combined_text: str,
+    stats: Dict[str, int],
+) -> CombinedResult:
+    record = (
+        db.query(CombinedResult)
+        .filter(CombinedResult.project_id == project_id)
+        .one_or_none()
+    )
+    now = datetime.utcnow()
+    if record:
+        record.combined_text = combined_text
+        record.combined_stats = stats
+        record.updated_at = now
+    else:
+        record = CombinedResult(
+            project_id=project_id,
+            combined_text=combined_text,
+            combined_stats=stats,
+            generated_at=now,
+            updated_at=now,
+        )
+        db.add(record)
+    db.commit()
+    db.refresh(record)
+    return record
+# -----------------------------------------------------------------------------
+# 공개 API
+# -----------------------------------------------------------------------------
+def generate_combined_text(
+    db: Session,
+    project_id: int,
+    *,
+    use_cache: bool = True,
+) -> Dict[str, object]:
+    """
+    프로젝트의 최신 텍스트 버전을 통합하여 반환합니다.
+    CombinedResult 테이블을 캐시로 사용합니다.
+    """
+    project = _fetch_project_with_pages(db, project_id)
+    page_ids = [page.page_id for page in project.pages]
+    versions = _fetch_current_text_versions(db, page_ids)
+    latest_version_time = _latest_text_timestamp(versions)
+    combined_record = (
+        db.query(CombinedResult)
+        .filter(CombinedResult.project_id == project_id)
+        .one_or_none()
+    )
+    if use_cache and combined_record and _combined_result_is_fresh(combined_record, latest_version_time):
+        logger.info("CombinedResult 캐시 사용: project_id=%s", project_id)
+        stats = combined_record.combined_stats or {}
+        generated_at = combined_record.updated_at or combined_record.generated_at or datetime.utcnow()
+        return {
+            "project_id": project_id,
+            "project_name": project.project_name,
+            "combined_text": combined_record.combined_text or "",
+            "stats": stats,
+            "generated_at": generated_at,
+        }
+    combined_text, stats = _format_combined_sections(project.pages, versions)
+    combined_record = _upsert_combined_result(db, project_id, combined_text, stats)
+    return {
+        "project_id": project_id,
+        "project_name": project.project_name,
+        "combined_text": combined_text,
+        "stats": stats,
+        "generated_at": combined_record.updated_at or combined_record.generated_at or datetime.utcnow(),
+    }
+def generate_word_document(
+    db: Session,
+    project_id: int,
+    *,
+    use_cache: bool = True,
+) -> Tuple[str, io.BytesIO]:
+    """
+    통합 텍스트를 기반으로 Word(.docx) 문서를 생성하고 파일 이름과 스트림을 반환합니다.
+    """
+    if Document is None or WD_ALIGN_PARAGRAPH is None:
+        raise ImportError("python-docx 라이브러리가 필요합니다. pip install python-docx")
+    combined_data = generate_combined_text(db, project_id, use_cache=use_cache)
+    project_name = combined_data.get("project_name") or f"프로젝트 {project_id}"
+    combined_text = combined_data.get("combined_text", "")
+    document = Document()
+    title = document.add_heading(project_name, level=0)
+    title.alignment = WD_ALIGN_PARAGRAPH.CENTER
+    meta_paragraph = document.add_paragraph(
+        f"생성일: {datetime.now().strftime('%Y-%m-%d %H:%M')}"
+    )
+    meta_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
+    stats = combined_data.get("stats", {})
+    total_pages = stats.get("total_pages", 0)
+    stats_paragraph = document.add_paragraph(f"총 페이지: {total_pages}개")
+    stats_paragraph.alignment = WD_ALIGN_PARAGRAPH.RIGHT
+    document.add_paragraph("─" * 60)
+    sections = [segment.strip() for segment in combined_text.split("─── 페이지 ") if segment.strip()]
+    for index, section in enumerate(sections):
+        lines = section.split("\n")
+        header = lines[0]
+        content_lines = lines[1:]
+        document.add_heading(f"페이지 {header.split()[0]}", level=2)
+        for paragraph in content_lines:
+            paragraph = paragraph.strip()
+            if paragraph:
+                document.add_paragraph(paragraph)
+        if index < len(sections) - 1:
+            document.add_page_break()
+    file_stream = io.BytesIO()
+    document.save(file_stream)
+    file_stream.seek(0)
+    filename = f"SmartEyeSsen_{project_id}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.docx"
+    return filename, file_stream
+def generate_document(
+    db: Session,
+    project_id: int,
+    *,
+    output: str = "text",
+    use_cache: bool = True,
+):
+    """
+    통합 텍스트 또는 Word 문서를 생성합니다.
+    output="text"  -> dict 반환 (combined_text 등)
+    output="docx" -> (filename, BytesIO) 반환
+    """
+    if output == "docx":
+        return generate_word_document(db, project_id, use_cache=use_cache)
+    return generate_combined_text(db, project_id, use_cache=use_cache)
+__all__ = [
+    "generate_document",
+    "generate_combined_text",
+    "generate_word_document",
+]

app/services/formatter.py ADDED Viewed

	@@ -0,0 +1,403 @@

+# -*- coding: utf-8 -*-
+"""
+앵커 기반 TextFormatter
+========================
+정렬기(sorter.py)가 부여한 앵커/그룹 정보를 활용하여
+사람이 읽기 쉬운 텍스트를 생성한다.
+기본 규칙은 코드 내에 정의되어 있으며, formatting_rules DB 테이블을
+통해 오버라이드할 수 있도록 확장 지점을 남겨둔다.
+"""
+from __future__ import annotations
+from collections import OrderedDict
+from dataclasses import dataclass
+from typing import Dict, Iterable, List, Optional, Tuple, Union, TYPE_CHECKING
+from loguru import logger
+from .formatter_rules import (
+    RuleConfig,
+    get_rules_for_document_type,
+    override_rules_with_db,
+    fetch_db_rules,
+)
+from .formatter_utils import (
+    RenderContext,
+    apply_rule,
+    clean_output,
+    normalize_ai_descriptions,
+    ocr_inputs_to_dict,
+    split_first_line,
+)
+if TYPE_CHECKING:
+    from sqlalchemy.orm import Session
+    from ..models import LayoutElement, TextContent
+DOC_TYPE_ID_MAP = {
+    1: "question_based",  # worksheet (문제지)
+    2: "reading_order",  # 일반 문서
+    3: "reading_order",  # 기타 확장용(표/차트 등)
+}
+ANCHOR_CLASSES = {"question type", "question number", "second_question_number"}
+@dataclass
+class ElementGroupBundle:
+    """
+    앵커와 자식 요소들을 묶은 번들.
+    Note: LayoutElement 타입 대신 Any 사용 (순환 import 방지)
+    """
+    anchor: Optional[any]  # LayoutElement
+    children: List[any]  # List[LayoutElement]
+    ordered_elements: List[any]  # List[LayoutElement]
+class TextFormatter:
+    """
+    정렬된 LayoutElement + OCR 텍스트를 받아 최종 포맷팅된 문자열을 생성한다.
+    """
+    def __init__(
+        self,
+        doc_type_id: int,
+        *,
+        db: Optional["Session"] = None,
+        use_db_rules: bool = False,
+        db_rule_records: Optional[Dict[str, Dict[str, str]]] = None,
+    ) -> None:
+        """
+        Args:
+            doc_type_id: document_types 테이블의 doc_type_id (1=문제지, 2=일반 문서).
+            db: SQLAlchemy 세션 (use_db_rules=True일 때 필수).
+            use_db_rules: True면 DB에서 덮어쓴 규칙을 반영.
+            db_rule_records: 외부에서 주입한 규칙 덮어쓰기 정보 (테스트용).
+        """
+        self.doc_type_id = doc_type_id
+        self.document_type = DOC_TYPE_ID_MAP.get(doc_type_id, "question_based")
+        base_rules = get_rules_for_document_type(self.document_type)
+        # DB 규칙 오버라이드 적용
+        if use_db_rules and db:
+            db_records = fetch_db_rules(db, doc_type_id)
+            self.rules = override_rules_with_db(base_rules, db_records)
+        elif db_rule_records:
+            # 테스트용: 직접 주입된 규칙 사용
+            self.rules = override_rules_with_db(base_rules, db_rule_records)
+        else:
+            self.rules = base_rules
+        self.use_db_rules = use_db_rules
+        self.db = db
+    # ------------------------------------------------------------------
+    # 퍼블릭 API
+    # ------------------------------------------------------------------
+    def format_page(
+        self,
+        sorted_elements: List["LayoutElement"],
+        ocr_texts: Union[Dict[int, str], List["TextContent"]],
+        *,
+        ai_descriptions: Optional[Dict[int, str]] = None,
+        metadata: Optional[Dict[str, Union[str, int]]] = None,
+    ) -> str:
+        """
+        Args:
+            sorted_elements: sorter가 반환한 LayoutElement 리스트 (order_in_question 필수).
+            ocr_texts: element_id → 텍스트 딕셔너리 또는 TextContent 리스트.
+            ai_descriptions: AI가 생성한 시각 자료 설명 (선택).
+            metadata: 출력 상단 등에 활용할 추가 정보 (현재는 미사용).
+        """
+        logger.info(
+            f"[Formatter] format_page 시작: sorted_elements={len(sorted_elements)}, "
+            f"ocr_texts_count={len(ocr_texts) if isinstance(ocr_texts, dict) else len(ocr_texts)}, "
+            f"ai_descriptions_count={len(ai_descriptions) if ai_descriptions else 0}"
+        )
+        if not sorted_elements:
+            logger.warning("[Formatter] sorted_elements가 비어있음 - 빈 문자열 반환")
+            return ""
+        ocr_dict = ocr_inputs_to_dict(ocr_texts)
+        ai_dict = normalize_ai_descriptions(ai_descriptions)
+        logger.debug(f"[Formatter] ocr_dict keys: {list(ocr_dict.keys())[:10]}")
+        logger.debug(f"[Formatter] ai_dict keys: {list(ai_dict.keys())[:10]}")
+        context = RenderContext(ocr_dict, ai_dict, self.rules)
+        valid_elements = self._filter_elements(sorted_elements)
+        logger.info(f"[Formatter] 필터링 후: {len(valid_elements)}개 유효 요소")
+        if not valid_elements:
+            logger.warning("[Formatter] 포맷팅 대상 요소가 없습니다 - 빈 문자열 반환")
+            return ""
+        if self.document_type == "reading_order":
+            rendered = self._render_reading_order(valid_elements, context)
+            result = clean_output("".join(rendered))
+            logger.info(f"[Formatter] reading_order 포맷팅 완료: {len(result)}자")
+            return result
+        group_bundles = self._build_group_bundles(valid_elements)
+        logger.info(f"[Formatter] 그룹 번들 생성: {len(group_bundles)}개")
+        rendered_blocks = [
+            self._render_group(bundle, context) for bundle in group_bundles
+        ]
+        combined = "".join(rendered_blocks)
+        result = clean_output(combined)
+        logger.info(
+            f"[Formatter] question_based 포맷팅 완료: {len(result)}자, "
+            f"{len(group_bundles)}개 그룹"
+        )
+        return result
+    # ------------------------------------------------------------------
+    # 내부 유틸 (공통)
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _filter_elements(elements: Iterable[MockElement]) -> List[MockElement]:
+        """
+        면적 또는 필수 속성이 없는 요소를 제외한다.
+        """
+        filtered: List[MockElement] = []
+        for element in elements:
+            try:
+                area = getattr(element, "area")
+            except AttributeError:
+                area = element.bbox_width * element.bbox_height
+            if area <= 0:
+                continue
+            filtered.append(element)
+        return filtered
+    @staticmethod
+    def _element_sort_key(element: MockElement) -> Tuple[int, int, int, int]:
+        large_offset = 10**7
+        order_in_question = getattr(element, "order_in_question", None)
+        order_key = (
+            order_in_question
+            if order_in_question is not None
+            else large_offset + int(getattr(element, "y_position", 0))
+        )
+        return (
+            order_key,
+            int(getattr(element, "order_in_group", 0)),
+            int(getattr(element, "y_position", 0)),
+            int(getattr(element, "x_position", 0)),
+        )
+    def _build_group_bundles(
+        self, elements: List[MockElement]
+    ) -> List[ElementGroupBundle]:
+        """
+        group_id 기준으로 요소를 묶고, 앵커/자식 정보를 포함한 번들을 생성한다.
+        """
+        ordered_elements = sorted(elements, key=self._element_sort_key)
+        grouped: "OrderedDict[Union[int, str], List[MockElement]]" = OrderedDict()
+        orphan_counter = 0
+        for element in ordered_elements:
+            group_id = getattr(element, "group_id", None)
+            if group_id is None:
+                group_id = f"_orphan_{orphan_counter}"
+                orphan_counter += 1
+            grouped.setdefault(group_id, []).append(element)
+        bundles: List[ElementGroupBundle] = []
+        for elems in grouped.values():
+            anchor = self._pick_anchor(elems)
+            children = self._sorted_children(elems, anchor)
+            bundles.append(
+                ElementGroupBundle(
+                    anchor=anchor, children=children, ordered_elements=elems
+                )
+            )
+        return bundles
+    @staticmethod
+    def _pick_anchor(elements: Iterable[MockElement]) -> Optional[MockElement]:
+        anchors = [e for e in elements if e.class_name in ANCHOR_CLASSES]
+        if not anchors:
+            return None
+        anchors.sort(
+            key=lambda e: (
+                int(getattr(e, "order_in_group", 0)),
+                int(getattr(e, "y_position", 0)),
+                int(getattr(e, "x_position", 0)),
+            )
+        )
+        return anchors[0]
+    def _sorted_children(
+        self, elements: Iterable[MockElement], anchor: Optional[MockElement]
+    ) -> List[MockElement]:
+        return sorted(
+            [e for e in elements if e is not anchor],
+            key=self._element_sort_key,
+        )
+    # ------------------------------------------------------------------
+    # 렌더링 로직
+    # ------------------------------------------------------------------
+    def _render_group(self, bundle: ElementGroupBundle, context: RenderContext) -> str:
+        anchor = bundle.anchor
+        if anchor is None:
+            return self._render_orphan_block(
+                bundle.children or bundle.ordered_elements, context
+            )
+        if anchor.class_name == "question type":
+            return self._render_section_block(anchor, bundle.children, context)
+        if anchor.class_name == "question number":
+            return self._render_question_block(anchor, bundle.children, context)
+        if anchor.class_name == "second_question_number":
+            return self._render_sub_question_block(anchor, bundle.children, context)
+        # 예상치 못한 앵커는 고아 블록으로 처리
+        logger.debug(
+            f"알 수 없는 앵커 클래스 '{anchor.class_name}' → 고아 블록으�� 처리"
+        )
+        return self._render_orphan_block(bundle.ordered_elements, context)
+    def _render_section_block(
+        self,
+        anchor: MockElement,
+        children: List[MockElement],
+        context: RenderContext,
+    ) -> str:
+        output_parts: List[str] = []
+        rendered_anchor = context.format_element(anchor)
+        if rendered_anchor.strip():
+            output_parts.append(rendered_anchor)
+        for child in children:
+            rendered_child = context.format_element(child)
+            if rendered_child.strip():
+                output_parts.append(rendered_child)
+        return "".join(output_parts)
+    def _render_question_block(
+        self,
+        anchor: MockElement,
+        children: List[MockElement],
+        context: RenderContext,
+    ) -> str:
+        return self._render_numbered_block(
+            anchor, children, context, primary_class="question text"
+        )
+    def _render_sub_question_block(
+        self,
+        anchor: MockElement,
+        children: List[MockElement],
+        context: RenderContext,
+    ) -> str:
+        return self._render_numbered_block(
+            anchor, children, context, primary_class="question text"
+        )
+    def _render_numbered_block(
+        self,
+        anchor: MockElement,
+        children: List[MockElement],
+        context: RenderContext,
+        *,
+        primary_class: str,
+    ) -> str:
+        output_parts: List[str] = []
+        primary_element = next(
+            (child for child in children if child.class_name == primary_class), None
+        )
+        # 앵커 텍스트 및 기본 포맷
+        anchor_text, _ = context.get_texts(anchor)
+        anchor_rule = context.rules.get(anchor.class_name, RuleConfig())
+        if anchor_text or anchor_rule.allow_empty:
+            anchor_line = apply_rule(anchor_rule, anchor_text).rstrip("\n")
+        else:
+            anchor_line = anchor_rule.prefix
+        first_line = ""
+        remainder = ""
+        if primary_element:
+            base_text, ai_text = context.get_texts(primary_element)
+            working = base_text or ai_text
+            working = context.apply_transform(
+                primary_element,
+                working,
+                base_text=base_text,
+                ai_text=ai_text,
+            )
+            if not working and ai_text:
+                working = ai_text
+            first_line, remainder = split_first_line(working)
+        if anchor_line:
+            leading_newlines = len(anchor_line) - len(anchor_line.lstrip("\n"))
+            anchor_core = anchor_line.lstrip("\n")
+            combined_line = anchor_core + (first_line or "")
+            prefix_newlines = "\n" * leading_newlines
+            output_parts.append(prefix_newlines + combined_line.rstrip())
+        elif first_line:
+            output_parts.append(first_line)
+        question_rule = context.rules.get(primary_class)
+        if primary_element and remainder:
+            if question_rule:
+                output_parts.append(apply_rule(question_rule, remainder).rstrip("\n"))
+            else:
+                output_parts.append(remainder)
+        rendered_children = []
+        for child in children:
+            if child is primary_element:
+                continue
+            rendered = context.format_element(child)
+            if rendered.strip():
+                if output_parts and not output_parts[-1].endswith("\n\n"):
+                    output_parts[-1] = output_parts[-1].rstrip("\n") + "\n\n"
+                rendered_children.append(rendered)
+        output_parts.extend(rendered_children)
+        return "".join(output_parts)
+    def _render_orphan_block(
+        self,
+        elements: List[MockElement],
+        context: RenderContext,
+    ) -> str:
+        outputs: List[str] = []
+        for element in elements:
+            rendered = context.format_element(element)
+            if rendered.strip():
+                outputs.append(rendered)
+        return "".join(outputs)
+    def _render_reading_order(
+        self,
+        elements: List[MockElement],
+        context: RenderContext,
+    ) -> List[str]:
+        sorted_elements = sorted(
+            elements,
+            key=lambda e: (
+                int(getattr(e, "y_position", 0)),
+                int(getattr(e, "x_position", 0)),
+                int(getattr(e, "element_id", 0)),
+            ),
+        )
+        outputs = []
+        for element in sorted_elements:
+            rendered = context.format_element(element)
+            if rendered.strip():
+                outputs.append(rendered)
+        return outputs

app/services/formatter_rules.py ADDED Viewed

	@@ -0,0 +1,200 @@

+"""
+앵커 기반 텍스트 포맷터 규칙 정의
+=================================
+실제 서비스는 formatting_rules DB 테이블을 참고하는 것을 목표로 하지만,
+현재 구현에서는 코드 레벨의 기본 규칙을 제공하고, 향후 DB 오버라이드를
+위해 동일한 구조를 유지한다.
+"""
+from __future__ import annotations
+from dataclasses import dataclass, replace
+from typing import Dict, Optional, TYPE_CHECKING
+if TYPE_CHECKING:
+    from sqlalchemy.orm import Session
+@dataclass(frozen=True)
+class RuleConfig:
+    """
+    개별 클래스에 대한 포맷팅 규칙.
+    Attributes:
+        prefix: 콘텐츠 앞에 붙일 문자열.
+        suffix: 콘텐츠 뒤에 붙일 문자열.
+        indent: 들여쓰기 공백 수(각 라인에 적용).
+        transform: formatter_utils에서 사용할 후처리 함수 이름.
+        allow_empty: True면 빈 콘텐츠라도 규칙을 적용.
+        keep_suffix_on_empty: 빈 콘텐츠일 때도 suffix를 유지할지 여부.
+    """
+    prefix: str = ""
+    suffix: str = "\n"
+    indent: int = 0
+    transform: Optional[str] = None
+    allow_empty: bool = False
+    keep_suffix_on_empty: bool = False
+# ---------------------------------------------------------------------------
+# 기본 규칙: 문제지(question_based) 문서
+# ---------------------------------------------------------------------------
+QUESTION_BASED_RULES: Dict[str, RuleConfig] = {
+    # 앵커
+    "question type": RuleConfig(prefix="\n\n[", suffix="]\n", indent=0, transform="normalize_question_type"),
+    "question number": RuleConfig(prefix="\n\n", suffix=". ", indent=0, allow_empty=False),
+    "second_question_number": RuleConfig(prefix="\n   ", suffix="", indent=3, allow_empty=False),
+    # 본문
+    "question text": RuleConfig(prefix="", suffix="\n", indent=3),
+    "plain text": RuleConfig(prefix="", suffix="\n", indent=0),
+    "unit": RuleConfig(prefix="", suffix="\n", indent=3),
+    "list": RuleConfig(prefix="   - ", suffix="\n", indent=0, transform="normalize_list"),
+    "choices": RuleConfig(prefix="", suffix="\n", indent=3, transform="normalize_choices"),
+    # 시각 자료
+    "figure": RuleConfig(prefix="\n   [그림 설명]\n", suffix="\n\n", indent=3, transform="merge_visual_description", allow_empty=True),
+    "table": RuleConfig(prefix="\n   [표 설명]\n", suffix="\n\n", indent=3, transform="merge_visual_description", allow_empty=True),
+    "flowchart": RuleConfig(prefix="\n   [순서도 설명]\n", suffix="\n\n", indent=3, transform="merge_visual_description", allow_empty=True),
+    # 캡션 및 메타
+    "figure_caption": RuleConfig(prefix="   (그림 캡션) ", suffix="\n\n", indent=0),
+    "table caption": RuleConfig(prefix="   (표 캡션) ", suffix="\n\n", indent=0),
+    "table footnote": RuleConfig(prefix="     * ", suffix="\n", indent=0),
+    "formula_caption": RuleConfig(prefix="   (수식 설명) ", suffix="\n", indent=0),
+    "isolated_formula": RuleConfig(prefix="\n   [수식]\n", suffix="\n", indent=3, transform="isolate_formula")
+}
+# ---------------------------------------------------------------------------
+# 기본 규칙: 일반 문서(reading_order) 문서
+# ---------------------------------------------------------------------------
+READING_ORDER_RULES: Dict[str, RuleConfig] = {
+    "title": RuleConfig(prefix="", suffix="\n\n", indent=0, transform="uppercase_title"),
+    "heading": RuleConfig(prefix="\n", suffix="\n\n", indent=0),
+    "plain text": RuleConfig(prefix="", suffix="\n\n", indent=0),
+    "list": RuleConfig(prefix="", suffix="\n", indent=0, transform="normalize_reading_list"),
+    "figure": RuleConfig(prefix="\n[그림] ", suffix="\n\n", indent=0, transform="merge_visual_description"),
+    "table": RuleConfig(prefix="\n[표] ", suffix="\n\n", indent=0, transform="merge_visual_description"),
+    "figure_caption": RuleConfig(prefix="(그림 캡션) ", suffix="\n", indent=0),
+    "table caption": RuleConfig(prefix="(표 캡션) ", suffix="\n", indent=0),
+    "table footnote": RuleConfig(prefix="* ", suffix="\n", indent=0),
+}
+RULE_MAP_BY_DOC_TYPE: Dict[str, Dict[str, RuleConfig]] = {
+    "question_based": QUESTION_BASED_RULES,
+    "reading_order": READING_ORDER_RULES,
+}
+def get_rules_for_document_type(document_type: str) -> Dict[str, RuleConfig]:
+    """
+    지정된 문서 타입의 규칙 사전을 복사하여 반환합니다.
+    Args:
+        document_type: "question_based" 또는 "reading_order"
+    Returns:
+        class_name → RuleConfig 매핑 (복사본)
+    """
+    base_rules = RULE_MAP_BY_DOC_TYPE.get(document_type)
+    if base_rules is None:
+        raise ValueError(f"지원하지 않는 문서 타입입니다: {document_type}")
+    return {class_name: replace(rule) for class_name, rule in base_rules.items()}
+def fetch_db_rules(db: "Session", doc_type_id: int) -> Dict[str, Dict[str, str]]:
+    """
+    DB에서 formatting_rules를 조회하여 덮어쓰기 정보를 반환합니다.
+    Args:
+        db: SQLAlchemy 세션
+        doc_type_id: document_types.doc_type_id (1=문제지, 2=일반문서)
+    Returns:
+        class_name → {prefix, suffix, indent} 형태의 덮어쓰기 정보
+    """
+    # Import here to avoid circular dependency
+    from .. import crud
+    db_rules = crud.get_all_formatting_rules(db)
+    if not db_rules:
+        return {}
+    override_dict: Dict[str, Dict[str, str]] = {}
+    for rule in db_rules:
+        # doc_type_id가 일치하거나 NULL(공통 규칙)인 경우만 적용
+        if rule.doc_type_id is None or rule.doc_type_id == doc_type_id:
+            override_dict[rule.class_name] = {
+                "prefix": rule.prefix or "",
+                "suffix": rule.suffix or "\n",
+                "indent": str(rule.indent_level or 0),
+            }
+    return override_dict
+def override_rules_with_db(
+    base_rules: Dict[str, RuleConfig],
+    db_records: Optional[Dict[str, Dict[str, str]]] = None
+) -> Dict[str, RuleConfig]:
+    """
+    DB 레코드 정보를 사용하여 규칙을 덮어씁니다.
+    Args:
+        base_rules: 코드 기본 규칙 사전.
+        db_records: class_name → {prefix, suffix, indent} 형태의 덮어쓰기 정보.
+    Returns:
+        덮어쓰기 적용된 규칙 사전.
+    """
+    if not db_records:
+        return base_rules
+    updated_rules = dict(base_rules)
+    for class_name, override in db_records.items():
+        rule = updated_rules.get(class_name)
+        if not rule:
+            continue
+        updated_rules[class_name] = RuleConfig(
+            prefix=override.get("prefix", rule.prefix),
+            suffix=override.get("suffix", rule.suffix),
+            indent=int(override.get("indent", rule.indent)),
+            transform=rule.transform,
+            allow_empty=rule.allow_empty,
+            keep_suffix_on_empty=rule.keep_suffix_on_empty,
+        )
+    return updated_rules
+def get_rule_for_class(
+    class_name: str,
+    document_type: str,
+    db: Optional["Session"] = None,
+    doc_type_id: Optional[int] = None
+) -> RuleConfig:
+    """
+    주어진 클래스명에 대한 포맷팅 규칙을 반환합니다.
+    DB 세션이 제공되면 DB 오버라이드를 적용합니다.
+    Args:
+        class_name: 레이아웃 요소 클래스명
+        document_type: "question_based" 또는 "reading_order"
+        db: SQLAlchemy 세션 (선택)
+        doc_type_id: 문서 타입 ID (선택, db 제공 시 필요)
+    Returns:
+        해당 클래스의 RuleConfig
+    """
+    base_rules = get_rules_for_document_type(document_type)
+    if db and doc_type_id:
+        db_records = fetch_db_rules(db, doc_type_id)
+        rules = override_rules_with_db(base_rules, db_records)
+    else:
+        rules = base_rules
+    # 기본값 반환 (규칙이 없는 경우)
+    return rules.get(class_name, RuleConfig())

app/services/formatter_utils.py ADDED Viewed

	@@ -0,0 +1,306 @@

+"""
+포맷터 유틸리티 함수 모음.
+포맷팅 규칙 적용, 선택지 정규화, 시각 자료 설명 병합 등
+핵심 후처리 로직을 한 곳에 모아둔다.
+"""
+from __future__ import annotations
+import re
+import unicodedata
+from dataclasses import dataclass
+from typing import Dict, Iterable, List, Optional, Tuple
+from .mock_models import MockElement
+from .formatter_rules import RuleConfig
+AI_PRIORITY_CLASSES = {"figure", "table", "flowchart"}
+# ---------------------------------------------------------------------------
+# 데이터 변환 헬퍼
+# ---------------------------------------------------------------------------
+def ocr_inputs_to_dict(ocr_texts) -> Dict[int, str]:
+    """
+    OCR 입력을 element_id → text 딕셔너리로 변환.
+    """
+    if isinstance(ocr_texts, dict):
+        return {int(k): (v or "").strip() for k, v in ocr_texts.items()}
+    ocr_dict: Dict[int, str] = {}
+    for item in ocr_texts or []:
+        try:
+            element_id = int(getattr(item, "element_id"))
+            text = getattr(item, "ocr_text", "") or ""
+        except AttributeError:
+            continue
+        cleaned = text.strip()
+        if cleaned:
+            ocr_dict[element_id] = cleaned
+    return ocr_dict
+def normalize_ai_descriptions(
+    ai_descriptions: Optional[Dict[int, str]],
+) -> Dict[int, str]:
+    """
+    AI 설명 딕셔너리를 정리합니다.
+    """
+    if not ai_descriptions:
+        return {}
+    return {
+        int(k): (v or "").strip()
+        for k, v in ai_descriptions.items()
+        if (v or "").strip()
+    }
+def split_first_line(text: str) -> Tuple[str, str]:
+    """
+    문자열을 첫 줄과 나머지로 분리한다.
+    """
+    if not text:
+        return "", ""
+    lines = text.splitlines()
+    first = lines[0]
+    remainder = "\n".join(lines[1:]).strip()
+    return first, remainder
+# ---------------------------------------------------------------------------
+# 콘텐츠 후처리
+# ---------------------------------------------------------------------------
+CHOICE_PATTERN = re.compile(
+    r"^(\(?\d{1,2}[\).]|[①-⑳]|[A-Z][\).]|[가-하]\.|[가-하]\))\s*(.+)$"
+)
+def normalize_choices(text: str) -> str:
+    """
+    선택지 텍스트를 표준화한다.
+    - 패턴이 명확하면 그대로 사용.
+    - 그렇지 않으면 '• ' 불릿을 붙인다.
+    """
+    lines = [line.strip() for line in text.splitlines() if line.strip()]
+    normalized: List[str] = []
+    for line in lines:
+        match = CHOICE_PATTERN.match(line)
+        if match:
+            label, body = match.groups()
+            normalized.append(f"{label} {body.strip()}")
+        else:
+            normalized.append(f"• {line}")
+    return "\n".join(normalized)
+LIST_PATTERN = re.compile(r"^([\-•]|\d+\.)\s*(.+)$")
+def normalize_list(text: str) -> str:
+    """
+    일반 리스트 텍스트를 정규화.
+    """
+    lines = [line.strip() for line in text.splitlines() if line.strip()]
+    normalized: List[str] = []
+    for line in lines:
+        match = LIST_PATTERN.match(line)
+        if match:
+            normalized.append(f"- {match.group(2).strip()}")
+        else:
+            normalized.append(f"- {line}")
+    return "\n".join(normalized)
+def normalize_reading_list(text: str) -> str:
+    """
+    일반 문서용 리스트 정규화 (불릿 기호 유지).
+    """
+    lines = [line.strip() for line in text.splitlines() if line.strip()]
+    normalized: List[str] = []
+    for line in lines:
+        match = LIST_PATTERN.match(line)
+        if match:
+            normalized.append(f"• {match.group(2).strip()}")
+        else:
+            normalized.append(f"• {line}")
+    return "\n".join(normalized)
+def merge_visual_description(text: str, ai_text: Optional[str]) -> str:
+    """
+    그림/표/순서도 설명을 결합한다.
+    AI 설명이 있으면 우선 사용하고, OCR 텍스트가 있으면 다음 줄에 추가한다.
+    """
+    if ai_text and text:
+        return f"{ai_text}\n{text}"
+    return ai_text or text
+def isolate_formula(text: str) -> str:
+    """
+    수식은 주어진 텍스트를 그대로 사용하되 앞뒤 공백을 정돈한다.
+    """
+    return text.strip()
+def uppercase_title(text: str) -> str:
+    return text.strip()
+def normalize_question_type(text: str) -> str:
+    """
+    question type OCR 결과의 줄 정렬/노이즈 제거.
+    - 줄바꿈을 공백으로 치환하여 한 줄로 정리
+    (그 외 문자/공백은 원본을 최대한 유지)
+    """
+    normalized = unicodedata.normalize("NFKC", text or "")
+    normalized = normalized.replace("\r\n", "\n").replace("\r", "\n")
+    return normalized.replace("\n", " ")
+TRANSFORM_DISPATCH = {
+    "normalize_choices": normalize_choices,
+    "normalize_list": normalize_list,
+    "normalize_reading_list": normalize_reading_list,
+    "merge_visual_description": merge_visual_description,
+    "isolate_formula": isolate_formula,
+    "uppercase_title": uppercase_title,
+    "normalize_question_type": normalize_question_type,
+}
+# ---------------------------------------------------------------------------
+# 규칙 적용 및 출력 정리
+# ---------------------------------------------------------------------------
+def apply_rule(rule: RuleConfig, content: str) -> str:
+    """
+    규칙에 따라 콘텐츠에 접두사, 들여쓰기, 접미사를 적용한다.
+    """
+    if not content and not rule.allow_empty:
+        return ""
+    working = content
+    if rule.indent > 0:
+        indent_str = " " * rule.indent
+        indented_lines: List[str] = []
+        for line in working.splitlines():
+            if not line.strip():
+                indented_lines.append("")
+            else:
+                indented_lines.append(f"{indent_str}{line}")
+        working = "\n".join(indented_lines)
+    if not working and not rule.keep_suffix_on_empty:
+        return rule.prefix if rule.prefix else ""
+    return f"{rule.prefix}{working}{rule.suffix}"
+def clean_output(text: str) -> str:
+    """
+    최종 출력 문자열에서 연속 빈 줄 및 후행 공백을 정리한다.
+    """
+    lines = text.splitlines()
+    cleaned: List[str] = []
+    empty_streak = 0
+    for line in lines:
+        stripped = line.rstrip()
+        if stripped == "":
+            empty_streak += 1
+            if empty_streak > 2:
+                continue
+        else:
+            empty_streak = 0
+        cleaned.append(stripped)
+    result = "\n".join(cleaned).strip()
+    return result
+# ---------------------------------------------------------------------------
+# 렌더링 컨텍스트
+# ---------------------------------------------------------------------------
+@dataclass
+class RenderContext:
+    """
+    렌더링 시 필요한 컨텍스트.
+    """
+    ocr_texts: Dict[int, str]
+    ai_texts: Dict[int, str]
+    rules: Dict[str, RuleConfig]
+    def get_texts(self, element: MockElement) -> Tuple[str, str]:
+        element_id = getattr(element, "element_id", None)
+        base_text = self.ocr_texts.get(element_id, "").strip()
+        ai_text = self.ai_texts.get(element_id, "").strip()
+        return base_text, ai_text
+    def apply_transform(
+        self,
+        element: MockElement,
+        text: str,
+        *,
+        base_text: str,
+        ai_text: str,
+    ) -> str:
+        rule = self.rules.get(element.class_name)
+        if not rule or not rule.transform:
+            return text.strip()
+        transform = TRANSFORM_DISPATCH.get(rule.transform)
+        if not transform:
+            return text.strip()
+        if rule.transform == "merge_visual_description":
+            return transform(base_text.strip(), ai_text.strip())
+        return transform(text.strip())
+    def format_element(
+        self, element: MockElement, content_override: Optional[str] = None
+    ) -> str:
+        """
+        개별 요소를 규칙에 따라 문자열로 변환한다.
+        """
+        element_id = getattr(element, "element_id", None)
+        base_text = (
+            content_override
+            if content_override is not None
+            else self.ocr_texts.get(element_id, "")
+        ).strip()
+        ai_text = self.ai_texts.get(element_id, "").strip()
+        # 그림/표/순서도는 transform 함수에서 병합 처리
+        if element.class_name in AI_PRIORITY_CLASSES:
+            # merge_visual_description transform에 맡김
+            working = base_text
+        else:
+            working = base_text or ai_text
+        # Transform 적용 (merge_visual_description이 ai_text와 병합)
+        working = self.apply_transform(
+            element,
+            working,
+            base_text=base_text,
+            ai_text=ai_text,
+        )
+        # Transform 후에도 비어있고 AI 설명이 있으면 AI 설명 사용
+        if not working and ai_text and element.class_name in AI_PRIORITY_CLASSES:
+            working = ai_text
+        # 규칙 적용 (prefix, suffix, indent)
+        rule = self.rules.get(element.class_name)
+        if rule:
+            return apply_rule(rule, working)
+        # 규칙이 없으면 기본적으로 한 줄 출력
+        return f"{working}\n" if working else ""

app/services/mock_models.py ADDED Viewed

	@@ -0,0 +1,417 @@

+"""
+데이터베이스 독립성을 위한 Mock 모델 (v2.1 스키마 호환)
+======================================================
+이 모듈은 데이터베이스 의존성을 제거하기 위한 Mock 모델들을 정의하며,
+v2.1 E-R 다이어그램에 맞춰져 있습니다. 테스트 목적으로 layout_elements,
+text_contents, question_groups, question_elements 테이블을 대체합니다.
+주요 특징:
+- MockElement, MockTextContent, MockQuestionGroup, MockQuestionElement 구현
+- 자동 계산 속성 (y_position, x_position, area)
+- Pydantic (설치된 경우) 또는 순수 Python 클래스 지원
+v2.1 스키마 변경 사항 반영:
+- MockElement: DB 표현에서는 정렬 필드(order_in_question 등)가 없지만, sorter.py 호환성을 위해 메모리 내 속성으로 유지
+- MockQuestionGroup: question_groups 테이블 표현
+- MockQuestionElement: question_elements 테이블 (N:M 매핑) 표현
+사용법:
+- analysis_service, formatter에서 MockElement, MockTextContent 임포트
+- db_saver (v2.1)에서 MockQuestionGroup, MockQuestionElement 임포트
+참조:
+- E-R 다이어그램 v2.1: Project/docs/E-R_다이어그램_v2.1_스키마.md
+"""
+from datetime import datetime
+from typing import Optional, Dict, Any, List # Pydantic MockElement bbox 호환성을 위해 List 추가
+import json as json_module
+from dataclasses import dataclass
+# 의존성 체크
+try:
+    from pydantic import BaseModel, Field, computed_field
+    USE_PYDANTIC = True
+except ImportError:
+    USE_PYDANTIC = False
+    BaseModel = object # type: ignore
+# ============================================================================
+# Pydantic 기반 구현 (Pydantic이 설치된 경우)
+# ============================================================================
+if USE_PYDANTIC:
+    class MockElement(BaseModel): # type: ignore
+        """
+        layout_elements 테이블을 대체하는 Mock 모델 (Pydantic 버전).
+        레이아웃 분석 결과의 각 요소를 나타냅니다.
+        v2.1 참고: 데이터베이스의 layout_elements 테이블에는 정렬 정보가 저장되지 않습니다.
+        하지만 sorter.py는 이 속성들을 메모리 내에서 동적으로 추가합니다.
+        따라서 sorter.py 출력과의 호환성을 위해 이 선택적 필드들을 유지합니다.
+        """
+        # 기본 필드
+        element_id: int = Field(..., description="요소 고유 식별자", ge=1)
+        page_id: Optional[int] = Field(None, description="페이지 식별자", ge=1)
+        class_name: str = Field(..., description="요소 클래스명", min_length=1, max_length=100)
+        confidence: float = Field(..., description="탐지 신뢰도", ge=0.0, le=1.0)
+        # 바운딩 박스 좌표
+        bbox_x: int = Field(..., description="바운딩 박스 좌상단 X", ge=0)
+        bbox_y: int = Field(..., description="바운딩 박스 좌상단 Y", ge=0)
+        bbox_width: int = Field(..., description="바운딩 박스 너비", ge=1)
+        bbox_height: int = Field(..., description="바운딩 박스 높이", ge=1)
+        # 메타데이터
+        created_at: Optional[datetime] = Field(default_factory=datetime.now, description="생성 타임스탬프")
+        # ===>>> sorter.py가 동적으로 추가하는 필드 (v2.1 DB 스키마에는 없음) <<<===
+        order_in_question: Optional[int] = Field(None, description="전체 정렬 순서 (sorter가 추가)")
+        group_id: Optional[int] = Field(None, description="할당된 그룹 ID (sorter가 추가)")
+        order_in_group: Optional[int] = Field(None, description="그룹 내 정렬 순서 (sorter가 추가)")
+        # ===>>> sorter.py 필드 끝 <<<===
+        # 이전 bbox 형식 호환성 필드 (필요시)
+        bbox: Optional[List[int]] = Field(None, description="선택적 bbox 리스트 [x, y, w, h]")
+        # 계산된 속성
+        @computed_field # type: ignore[misc]
+        @property
+        def area(self) -> int:
+            """요소 면적 계산 (너비 * 높이)"""
+            return self.bbox_width * self.bbox_height
+        @computed_field # type: ignore[misc]
+        @property
+        def y_position(self) -> int:
+            """Y 좌표 정렬용 속성 (= bbox_y)"""
+            return self.bbox_y
+        @computed_field # type: ignore[misc]
+        @property
+        def x_position(self) -> int:
+            """X 좌표 정렬용 속성 (= bbox_x)"""
+            return self.bbox_x
+        model_config = {
+            "json_schema_extra": {
+                "example": {
+                    "element_id": 1, "page_id": 1, "class_name": "question_number",
+                    "confidence": 0.95, "bbox_x": 100, "bbox_y": 200,
+                    "bbox_width": 50, "bbox_height": 30,
+                }
+            }
+        }
+    class MockTextContent(BaseModel): # type: ignore
+        """
+        text_contents 테이블을 대체하는 Mock 모델 (Pydantic 버전).
+        레이아웃 요소의 OCR 결과 텍스트를 저장합니다 (1:1 관계).
+        """
+        # 기본 필드
+        text_id: int = Field(..., description="텍스트 콘텐츠 고유 식별자", ge=1)
+        element_id: int = Field(..., description="연결된 레이아웃 요소 식별자 (UNIQUE)", ge=1)
+        # OCR 콘텐츠
+        ocr_text: str = Field(..., description="추출된 OCR 텍스트")
+        # OCR 메타데이터
+        ocr_engine: str = Field(default="PaddleOCR", description="사용된 OCR 엔진", max_length=50)
+        ocr_confidence: Optional[float] = Field(None, description="OCR 신뢰도", ge=0.0, le=1.0)
+        language: str = Field(default="ko", description="텍스트 언어 코드", max_length=10)
+        # 메타데이터
+        created_at: Optional[datetime] = Field(default_factory=datetime.now, description="생성 타임스탬프")
+        model_config = {
+            "json_schema_extra": {
+                "example": {
+                    "text_id": 1, "element_id": 1, "ocr_text": "1. 다음 중 옳은 것을 고르시오.",
+                    "ocr_engine": "PaddleOCR", "ocr_confidence": 0.98, "language": "ko",
+                }
+            }
+        }
+    # ===>>> v2.1 스키마 Mock 모델 (마이그레이션 계획에 따라 추가됨) <<<===
+    @dataclass # 계획에서 정의된 대로 dataclass 사용
+    class MockQuestionGroup:
+        """
+        v2.1 question_groups 테이블 Mock
+        E-R 다이어그램 line 199-234 참조
+        """
+        question_group_id: int
+        page_id: int
+        anchor_element_id: Optional[int]  # None = 고아 그룹
+        group_type: str  # 'anchor' | 'orphan'
+        start_y: int
+        end_y: int
+        element_count: int
+        created_at: Optional[str] = None # Mock에서는 간단히 str 사용
+        updated_at: Optional[str] = None # Mock에서는 간단히 str 사용
+    @dataclass # 계획에서 정의된 대로 dataclass 사용
+    class MockQuestionElement:
+        """
+        v2.1 question_elements 테이블 Mock (N:M 매핑 테이블)
+        E-R 다이어그램 line 236-261 참조
+        """
+        qe_id: int  # PK, auto_increment 시뮬레이션
+        question_group_id: int  # FK -> question_groups
+        element_id: int  # FK -> layout_elements
+        order_in_question: int  # 전체 정렬 순서 (sorter.py 결과)
+        order_in_group: int  # 그룹 내 정렬 순서 (sorter.py 결과)
+        created_at: Optional[str] = None # Mock에서는 간단히 str 사용
+    # ===>>> v2.1 스키마 Mock 모델 끝 <<<===
+# ============================================================================
+# 순수 Python 클래스 구현 (Pydantic이 설치되지 않은 경우)
+# ============================================================================
+else:
+    @dataclass
+    class MockElement:
+        """
+        layout_elements 테이블을 대체하는 Mock 모델 (순수 Python 버전).
+        v2.1 참고: Pydantic 버전 주석 참조. 정렬 필드는 sorter.py 호환성용.
+        """
+        element_id: int
+        class_name: str
+        confidence: float
+        bbox_x: int
+        bbox_y: int
+        bbox_width: int
+        bbox_height: int
+        page_id: Optional[int] = None
+        created_at: Optional[datetime] = None
+        # sorter.py가 동적으로 추가하는 필드
+        order_in_question: Optional[int] = None
+        group_id: Optional[int] = None
+        order_in_group: Optional[int] = None
+        def __post_init__(self):
+            """순수 Python 버전 유효성 검사"""
+            if self.element_id < 1: raise ValueError("element_id는 1 이상이어야 합니다")
+            if not self.class_name or len(self.class_name) > 100: raise ValueError("class_name 길이가 유효하지 않습니다")
+            if not (0.0 <= self.confidence <= 1.0): raise ValueError("confidence는 0.0과 1.0 사이여야 합니다")
+            if self.bbox_x < 0 or self.bbox_y < 0: raise ValueError("bbox 좌표는 0 이상이어야 합니다")
+            if self.bbox_width < 1 or self.bbox_height < 1: raise ValueError("bbox 크기는 1 이상이어야 합니다")
+            if self.page_id is not None and self.page_id < 1: raise ValueError("page_id는 1 이상이어야 합니다")
+            self.created_at = self.created_at or datetime.now()
+        @property
+        def area(self) -> int: return self.bbox_width * self.bbox_height
+        @property
+        def y_position(self) -> int: return self.bbox_y
+        @property
+        def x_position(self) -> int: return self.bbox_x
+        def to_dict(self) -> Dict[str, Any]:
+            """인스턴스를 딕셔너리로 변환"""
+            data = self.__dict__.copy()
+            data["area"] = self.area
+            data["y_position"] = self.y_position
+            data["x_position"] = self.x_position
+            if self.created_at: data["created_at"] = self.created_at.isoformat()
+            return data
+        def to_json(self, indent: Optional[int] = None) -> str:
+            """인스턴스를 JSON 문자열로 변환"""
+            return json_module.dumps(self.to_dict(), ensure_ascii=False, indent=indent)
+        def __repr__(self) -> str:
+            return (f"MockElement(id={self.element_id}, cls='{self.class_name}', "
+                    f"bbox=({self.bbox_x}, {self.bbox_y}, {self.bbox_width}, {self.bbox_height}))")
+    @dataclass
+    class MockTextContent:
+        """
+        text_contents 테이블을 대체하는 Mock 모델 (순수 Python 버전).
+        """
+        text_id: int
+        element_id: int
+        ocr_text: str
+        ocr_engine: str = "PaddleOCR"
+        ocr_confidence: Optional[float] = None
+        language: str = "ko"
+        created_at: Optional[datetime] = None
+        def __post_init__(self):
+            """순수 Python 버전 유효성 검사"""
+            if self.text_id < 1: raise ValueError("text_id는 1 이상이어야 합니다")
+            if self.element_id < 1: raise ValueError("element_id는 1 이상이어야 합니다")
+            if not self.ocr_text: raise ValueError("ocr_text는 비어 있을 수 없습니다")
+            if self.ocr_confidence is not None and not (0.0 <= self.ocr_confidence <= 1.0): raise ValueError("ocr_confidence는 0.0과 1.0 사이여야 합니다")
+            if len(self.ocr_engine) > 50: raise ValueError("ocr_engine이 너무 깁니다")
+            if len(self.language) > 10: raise ValueError("language가 너무 깁니다")
+            self.created_at = self.created_at or datetime.now()
+        def to_dict(self) -> Dict[str, Any]:
+            """인스턴스를 딕셔너리로 변환"""
+            data = self.__dict__.copy()
+            if self.created_at: data["created_at"] = self.created_at.isoformat()
+            return data
+        def to_json(self, indent: Optional[int] = None) -> str:
+            """인스턴스를 JSON 문자열로 변환"""
+            return json_module.dumps(self.to_dict(), ensure_ascii=False, indent=indent)
+        def __repr__(self) -> str:
+            return (f"MockTextContent(id={self.text_id}, elem_id={self.element_id}, "
+                    f"text='{self.ocr_text[:30]}...')")
+    # ===>>> v2.1 스키마 Mock 모델 (순수 Python) <<<===
+    @dataclass
+    class MockQuestionGroup:
+        """v2.1 question_groups 테이블 Mock (순수 Python)"""
+        question_group_id: int
+        page_id: int
+        anchor_element_id: Optional[int]
+        group_type: str
+        start_y: int
+        end_y: int
+        element_count: int
+        created_at: Optional[str] = None
+        updated_at: Optional[str] = None
+    @dataclass
+    class  MockQuestionElement:
+        """v2.1 question_elements 테이블 Mock (순수 Python)"""
+        qe_id: int
+        question_group_id: int
+        element_id: int
+        order_in_question: int
+        order_in_group: int
+        created_at: Optional[str] = None
+    # ===>>> v2.1 스키마 Mock 모델 끝 <<<===
+# ============================================================================
+# 유틸리티 함수 (공통)
+# ============================================================================
+def create_mock_element_from_detection(
+    element_id: int,
+    detection_result: Dict[str, Any],
+    page_id: Optional[int] = None
+) -> MockElement:
+    """
+    레이아웃 탐지 결과로부터 MockElement 생성.
+    다양한 bbox 형식을 처리합니다.
+    """
+    bbox = detection_result['bbox']
+    bbox_x, bbox_y, bbox_width, bbox_height = 0, 0, 0, 0 # 초기화
+    if isinstance(bbox, list) and len(bbox) == 4:
+        # 형식 [x, y, 너비, 높이] 또는 [x1, y1, x2, y2] 가정
+        # 실제 사용된 YOLO 출력 형식에 따라 명확화 필요
+        # 현재는 순수 Python init 기반으로 [x, y, 너비, 높이] 가정
+        bbox_x, bbox_y, bbox_width, bbox_height = bbox
+        # 만약 [x1, y1, x2, y2] 형식이면 아래 주석 해제:
+        # bbox_x, bbox_y = bbox[0], bbox[1]
+        # bbox_width, bbox_height = bbox[2] - bbox[0], bbox[3] - bbox[1]
+    elif isinstance(bbox, dict):
+        bbox_x = bbox.get('x', 0)
+        bbox_y = bbox.get('y', 0)
+        bbox_width = bbox.get('width', 0)
+        bbox_height = bbox.get('height', 0)
+    # 유효성 검사를 위해 너비/높이가 양수인지 확인
+    bbox_width = max(1, bbox_width)
+    bbox_height = max(1, bbox_height)
+    return MockElement(
+        element_id=element_id,
+        page_id=page_id,
+        class_name=str(detection_result['class_name']), # str 확인
+        confidence=float(detection_result['confidence']), # float 확인
+        bbox_x=int(bbox_x), # int 확인
+        bbox_y=int(bbox_y), # int 확인
+        bbox_width=int(bbox_width), # int 확인
+        bbox_height=int(bbox_height) # int 확인
+    )
+def create_mock_text_content(
+    text_id: int,
+    element_id: int,
+    ocr_result: str,
+    ocr_confidence: Optional[float] = None,
+    ocr_engine: str = "PaddleOCR"
+) -> MockTextContent:
+    """
+    OCR 결과로부터 MockTextContent 생성.
+    """
+    return MockTextContent(
+        text_id=text_id,
+        element_id=element_id,
+        ocr_text=ocr_result,
+        ocr_engine=ocr_engine,
+        ocr_confidence=ocr_confidence
+    )
+# ============================================================================
+# 사용 예시 (공통)
+# ============================================================================
+if __name__ == "__main__":
+    print(f"Mock 모델 구현: {'Pydantic' if USE_PYDANTIC else '순수 Python'}")
+    # 예시 1: MockElement
+    element = MockElement(
+        element_id=1, page_id=1, class_name="question_number", confidence=0.95,
+        bbox_x=100, bbox_y=200, bbox_width=50, bbox_height=30
+    )
+    print("\nMockElement 예시:")
+    print(element)
+    print(f"면적: {element.area}, Y-위치: {element.y_position}, X-위치: {element.x_position}")
+    # 예시 2: MockTextContent
+    text_content = MockTextContent(
+        text_id=1, element_id=1, ocr_text="1. 다음 중 옳은 것을 고르시오.", ocr_confidence=0.98
+    )
+    print("\nMockTextContent 예시:")
+    print(text_content)
+    # 예시 3: v2.1 모델
+    group = MockQuestionGroup(
+        question_group_id=1, page_id=1, anchor_element_id=1, group_type='anchor',
+        start_y=100, end_y=450, element_count=3
+    )
+    qe = MockQuestionElement(
+        qe_id=1, question_group_id=1, element_id=2, order_in_question=1, order_in_group=1
+    )
+    print("\nv2.1 MockQuestionGroup 예시:")
+    print(group)
+    print("\nv2.1 MockQuestionElement 예시:")
+    print(qe)
+    # 예시 4: 유틸리티 함수
+    detection = {'class_name': 'figure', 'confidence': 0.88, 'bbox': [50, 300, 200, 150]}
+    element_util = create_mock_element_from_detection(2, detection, page_id=1)
+    print("\n유틸리티 함수 예시 (Element):")
+    print(element_util)
+    text_util = create_mock_text_content(2, 2, "그림 A는 ...", ocr_confidence=0.91)
+    print("\n유틸리티 함수 예시 (Text):")
+    print(text_util)
+    # 예시 5: JSON 직렬화
+    print("\nJSON 직렬화 예시:")
+    if USE_PYDANTIC:
+        # Pydantic v2는 model_dump_json 사용
+        print(element.model_dump_json(indent=2)) # type: ignore[attr-defined]
+        print(text_content.model_dump_json(indent=2)) # type: ignore[attr-defined]
+    else:
+        print(element.to_json(indent=2))
+        print(text_content.to_json(indent=2))

app/services/pdf_processor.py ADDED Viewed

	@@ -0,0 +1,250 @@

+# -*- coding: utf-8 -*-
+"""
+SmartEyeSsen PDF 처리 서비스
+============================
+PDF 파일을 페이지별 이미지로 변환하는 기능을 제공합니다.
+PyMuPDF (fitz)를 사용하여 고품질 이미지 변환을 수행합니다.
+"""
+from typing import List, Dict, Optional
+from loguru import logger
+import os
+import fitz  # PyMuPDF
+from PIL import Image
+import io
+from pathlib import Path
+DEFAULT_PDF_DPI = 300
+class PDFProcessor:
+    """PDF 파일 처리 클래스"""
+    def __init__(self, upload_directory: str = "uploads", dpi: Optional[int] = None):
+        """
+        PDF 처리기 초기화
+        Args:
+            upload_directory: 파일 저장 기본 디렉토리
+            dpi: 이미지 변환 해상도 (기본값: 300)
+        """
+        self.upload_directory = upload_directory
+        self.dpi = self._resolve_dpi(dpi)
+        self.jpeg_quality = 95
+        os.makedirs(upload_directory, exist_ok=True)
+        logger.info(
+            f"PDFProcessor 초기화 완료 - DPI: {self.dpi}, 저장 경로: {upload_directory}"
+        )
+    @staticmethod
+    def _resolve_dpi(provided_dpi: Optional[int]) -> int:
+        """환경 변수와 인자 값을 고려해 DPI를 결정"""
+        if provided_dpi and provided_dpi > 0:
+            return int(provided_dpi)
+        env_value = os.getenv("PDF_PROCESSOR_DPI")
+        if env_value:
+            try:
+                parsed = int(env_value)
+                if parsed > 0:
+                    logger.debug(
+                        f"환경 변수 PDF_PROCESSOR_DPI 적용: {parsed} (인자 미지정)"
+                    )
+                    return parsed
+            except ValueError:
+                logger.warning(
+                    f"환경 변수 PDF_PROCESSOR_DPI 값 '{env_value}'을(를) 정수로 변환할 수 없어 기본값 {DEFAULT_PDF_DPI}을 사용합니다."
+                )
+        return DEFAULT_PDF_DPI
+    def convert_pdf_to_images(
+        self,
+        pdf_bytes: bytes,
+        project_id: int,
+        start_page_number: int
+    ) -> List[Dict[str, any]]:
+        """
+        PDF 바이트 데이터를 페이지별 이미지로 변환하고 저장
+        Args:
+            pdf_bytes: PDF 파일의 바이트 데이터
+            project_id: 프로젝트 ID (폴더 경로용)
+            start_page_number: 시작 페이지 번호
+        Returns:
+            변환된 이미지 정보 리스트
+            [
+                {
+                    'page_number': 1,
+                    'image_path': '123/page_1.jpg',  # DB 저장용 상대 경로
+                    'full_path': 'uploads/123/page_1.jpg',  # 실제 파일 경로
+                    'width': 2480,
+                    'height': 3508
+                },
+                ...
+            ]
+        Raises:
+            ValueError: PDF 파일이 손상되었거나 읽을 수 없는 경우
+            OSError: 파일 저장 중 디스크 오류 발생 시
+        """
+        logger.info(f"PDF 변환 시작 - ProjectID: {project_id}, 시작 페이지: {start_page_number}")
+        # 프로젝트별 저장 디렉토리 생성
+        project_dir = os.path.join(self.upload_directory, str(project_id))
+        os.makedirs(project_dir, exist_ok=True)
+        converted_pages = []
+        pdf_document = None
+        try:
+            # PDF 문서 열기
+            pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf")
+            total_pages = len(pdf_document)
+            logger.info(f"PDF 페이지 수: {total_pages}")
+            if total_pages == 0:
+                raise ValueError("PDF 파일에 페이지가 없습니다.")
+            # PDF 원본 파일 저장
+            original_pdf_path = os.path.join(project_dir, "original.pdf")
+            with open(original_pdf_path, "wb") as f:
+                f.write(pdf_bytes)
+            logger.info(f"PDF 원본 저장 완료: {original_pdf_path}")
+            # 각 페이지를 이미지로 변환
+            for page_index in range(total_pages):
+                page_number = start_page_number + page_index
+                try:
+                    # PDF 페이지를 Pixmap으로 렌더링
+                    page = pdf_document[page_index]
+                    # DPI 기반 확대 비율 계산 (72 DPI가 기본)
+                    zoom = self.dpi / 72
+                    mat = fitz.Matrix(zoom, zoom)
+                    pix = page.get_pixmap(matrix=mat, alpha=False)
+                    # PIL Image로 변환
+                    img_data = pix.tobytes("jpeg")
+                    img = Image.open(io.BytesIO(img_data))
+                    # 이미지 크기
+                    width, height = img.size
+                    # 파일명 및 경로 생성
+                    filename = f"page_{page_number}.jpg"
+                    full_path = os.path.join(project_dir, filename)
+                    relative_path = os.path.join(str(project_id), filename)
+                    # 이미지 저장 (JPEG 품질 적용)
+                    img.save(full_path, "JPEG", quality=self.jpeg_quality, optimize=True)
+                    # 변환 정보 저장
+                    page_info = {
+                        'page_number': page_number,
+                        'image_path': relative_path,
+                        'full_path': full_path,
+                        'width': width,
+                        'height': height,
+                        'dpi': self.dpi,
+                    }
+                    converted_pages.append(page_info)
+                    logger.debug(
+                        f"페이지 {page_index + 1}/{total_pages} 변환 완료 - "
+                        f"페이지 번호: {page_number}, 크기: {width}x{height}"
+                    )
+                except Exception as e:
+                    logger.error(f"페이지 {page_index + 1} 변환 실패: {str(e)}")
+                    # 부분 변환 실패 시 롤백
+                    self._rollback_conversion(converted_pages)
+                    raise ValueError(f"PDF 페이지 {page_index + 1} 변환 실패: {str(e)}")
+            logger.info(
+                f"PDF 변환 완료 - ProjectID: {project_id}, "
+                f"총 {len(converted_pages)}개 페이지 변환"
+            )
+            return converted_pages
+        except fitz.fitz.FileDataError as e:
+            logger.error(f"PDF 파일 오류: {str(e)}")
+            raise ValueError(f"PDF 파일이 손상되었거나 읽을 수 없습니다: {str(e)}")
+        except Exception as e:
+            logger.error(f"PDF 변환 중 예상치 못한 오류: {str(e)}")
+            if converted_pages:
+                self._rollback_conversion(converted_pages)
+            raise
+        finally:
+            # PDF 문서 닫기
+            if pdf_document:
+                pdf_document.close()
+    def _rollback_conversion(self, converted_pages: List[Dict[str, any]]) -> None:
+        """
+        변환 실패 시 생성된 이미지 파일 롤백
+        Args:
+            converted_pages: 롤백할 페이지 정보 리스트
+        """
+        logger.warning(f"변환 롤백 시작 - {len(converted_pages)}개 파일 삭제")
+        for page_info in converted_pages:
+            try:
+                full_path = page_info.get('full_path')
+                if full_path and os.path.exists(full_path):
+                    os.remove(full_path)
+                    logger.debug(f"파일 삭제: {full_path}")
+            except Exception as e:
+                logger.error(f"롤백 중 파일 삭제 실패: {full_path}, 오류: {str(e)}")
+        logger.info("변환 롤백 완료")
+    def get_pdf_info(self, pdf_bytes: bytes) -> Dict[str, any]:
+        """
+        PDF 파일의 메타데이터 추출
+        Args:
+            pdf_bytes: PDF 파일의 바이트 데이터
+        Returns:
+            PDF 정보 딕셔너리
+            {
+                'total_pages': 10,
+                'title': '문서 제목',
+                'author': '작성자',
+                'subject': '주제',
+                'creator': '생성 프로그램',
+                'producer': 'PDF 생성기',
+                'creation_date': '생성 날짜'
+            }
+        """
+        try:
+            pdf_document = fitz.open(stream=pdf_bytes, filetype="pdf")
+            metadata = pdf_document.metadata
+            info = {
+                'total_pages': len(pdf_document),
+                'title': metadata.get('title', ''),
+                'author': metadata.get('author', ''),
+                'subject': metadata.get('subject', ''),
+                'creator': metadata.get('creator', ''),
+                'producer': metadata.get('producer', ''),
+                'creation_date': metadata.get('creationDate', '')
+            }
+            pdf_document.close()
+            logger.debug(f"PDF 메타데이터 추출 완료: {info}")
+            return info
+        except Exception as e:
+            logger.error(f"PDF 메타데이터 추출 실패: {str(e)}")
+            raise ValueError(f"PDF 파일 정보를 읽을 수 없습니다: {str(e)}")
+# 전역 인스턴스 생성 (싱글톤 패턴)
+pdf_processor = PDFProcessor(upload_directory="uploads")

app/services/sorter.py ADDED Viewed

	@@ -0,0 +1,1605 @@

+# -*- coding: utf-8 -*-
+"""
+SmartEyeSsen Layout Sorter (v.LayoutDetect.2.4 - Tie-breaker in Post-processing)
+=================================================================================
+문제 레이아웃 정렬 알고리즘 구현 (Layout Type Detection 기반 Hybrid)
+페이지 전체 레이아웃 유형(1단, 2단, 혼합형 등)을 먼저 판별하고,
+유형에 맞는 분할 전략(수평/수직) 적용.
+분할 실패 시(Base Case), 레이아웃 유형별로 특화된 그룹핑 로직 호출.
+- 표준 1단/2단 컬럼: _base_case_standard_1_column
+- 혼합형: _base_case_mixed_layout
+최종 병합 시 전역 고아 그룹 처리 로직 적용.
+알고리즘 흐름: (v.LayoutDetect.2.1/2.2/2.3과 동일)
+0. 전처리
+1. 레이아웃 유형 판별
+2. 유형별 재귀 처리
+3. Base Case 처리 (후처리 포함)
+4. 최종 병합 및 순서 부여
+v.LayoutDetect.2.4:
+- _post_process_table_figure_assignment: 최적 그룹 탐색 시 Y 거리가 동일할 경우 더 뒤쪽 그룹을 우선하는 Tie-breaker 추가.
+- sort_layout_elements: 후처리 호출 전에 임시 그룹 ID 할당하여 로그 가독성 개선.
+- (v2.3 변경 유지) _post_process_table_figure_assignment: 최적 그룹 탐색 로직 (Lookahead).
+- (v2.2 변경 유지) _post_process_table_figure_assignment: 이동 조건은 거리 비교 로직 사용.
+- (v2.1 변경 유지) _post_process_table_figure_assignment: y_diff_threshold 기본값 150.
+- (v2.1 변경 유지) _base_case_standard_1_column: 상단 고아 요소 분리 로직.
+"""
+# 필요한 라이브러리 임포트
+from typing import List, Dict, Tuple, Optional, Any, Union, TYPE_CHECKING
+from dataclasses import dataclass, field
+import numpy as np
+from sklearn.cluster import KMeans
+from loguru import logger
+import math
+from enum import Enum, auto
+import os
+# Mock 모델 임포트 (호환성 유지용, 추후 제거 예정)
+from .mock_models import MockElement
+if TYPE_CHECKING:
+    from sqlalchemy.orm import Session
+    from ..models import LayoutElement
+# ============================================================================
+# 데이터 클래스 및 Enum 정의 (기존과 동일)
+# ============================================================================
+class LayoutType(Enum):
+    STANDARD_1_COLUMN = auto()
+    STANDARD_2_COLUMN = auto()
+    MIXED_TOP1_BOTTOM2 = auto()
+    MIXED_TOP2_BOTTOM1 = auto()
+    HORIZONTAL_SEP_PRESENT = auto()
+    READING_ORDER = auto()
+    UNKNOWN = auto()
+@dataclass
+class Zone:
+    x_min: int
+    y_min: int
+    x_max: int
+    y_max: int
+    @property
+    def width(self) -> int:
+        return max(0, self.x_max - self.x_min)
+    @property
+    def height(self) -> int:
+        return max(0, self.y_max - self.y_min)
+    def __repr__(self) -> str:
+        return f"Zone(x=[{self.x_min}, {self.x_max}), y=[{self.y_min}, {self.y_max}))"
+@dataclass
+class HorizontalSplit:
+    top_zone: Zone
+    bottom_zone: Zone
+    separator_element: MockElement
+@dataclass
+class HorizontalSplitYGap:
+    top_zone: Zone
+    bottom_zone: Zone
+    split_y: float
+@dataclass
+class VerticalSplit:
+    left_zone: Zone
+    right_zone: Zone
+    gutter_x: float
+@dataclass
+class ElementGroup:
+    anchor: Optional[MockElement]
+    children: List[MockElement] = field(default_factory=list)
+    group_id: int = -1  # flatten 함수에서 최종 할당, 후처리 전 임시 할당
+    def add_child(self, child: MockElement):
+        self.children.append(child)
+    def get_all_elements_sorted(self) -> List[MockElement]:
+        """
+        그룹 내 요소들을 정렬합니다.
+        - 앵커(Anchor)가 항상 가장 먼저 위치합니다.
+        - 나머지 자식(Children) 요소들은 (Y, X) 좌표 순으로 정렬됩니다.
+        """
+        # 1. 앵커가 존재하면 리스트의 첫 요소로 설정합니다.
+        elements = [self.anchor] if self.anchor else []
+        # 2. 자식 요소들을 (Y, X) 좌표 기준으로 정렬합니다.
+        sorted_children = sorted(
+            self.children, key=lambda e: (e.y_position, e.x_position)
+        )
+        # 3. 앵커 요소 뒤에 정렬된 자식 요소들을 추가합니다.
+        elements.extend(sorted_children)
+        return elements
+    def is_empty(self) -> bool:
+        return self.anchor is None and not self.children
+    def __repr__(self) -> str:
+        anchor_id = self.anchor.element_id if self.anchor else "Orphan"
+        child_ids = sorted([c.element_id for c in self.children])
+        # flatten 전에는 group_id가 임시값일 수 있음
+        return f"Group(ID:{self.group_id}, Anchor: {anchor_id}, Children: {child_ids})"
+# ============================================================================
+# 상수 정의 (기존과 동일)
+# ============================================================================
+ALLOWED_ANCHORS = ["question type", "question number", "second_question_number"]
+ALLOWED_CHILDREN = ["question text", "list", "choices", "figure", "table", "flowchart"]
+ALLOWED_CLASSES = ALLOWED_ANCHORS + ALLOWED_CHILDREN
+HORIZONTAL_SEP_WIDTH_THRESHOLD = 0.8
+HORIZONTAL_SEP_Y_POS_THRESHOLD = 0.15
+MIN_ANCHORS_FOR_SPLIT = 2
+VERTICAL_GAP_THRESHOLD_RATIO = 1.5
+VERTICAL_GAP_THRESHOLD_ABS = 100
+KMEANS_N_CLUSTERS = 2
+KMEANS_CLUSTER_SEPARATION_MIN = 50
+LAYOUT_DETECT_Y_SPLIT_POINT = 0.4
+LAYOUT_DETECT_X_STD_THRESHOLD_RATIO = 0.1
+HORIZONTAL_ADJACENCY_Y_CENTER_RATIO = 0.7
+HORIZONTAL_ADJACENCY_X_PROXIMITY = 50
+BASE_CASE_TOP_ORPHAN_THRESHOLD_RATIO = 0.15
+POST_PROCESS_CLOSENESS_RATIO = 0.5
+POST_PROCESS_LOOKAHEAD = 2
+# 2D 거리 기반 그룹핑 관련 상수
+ANCHOR_VERTICAL_PROXIMITY_THRESHOLD = 250  # px - 앵커와 Y 거리 임계값
+ANCHOR_2D_DISTANCE_WEIGHT_X = 0.2  # X 거리 가중치 (낮게 설정)
+ANCHOR_2D_DISTANCE_WEIGHT_Y = 1.0  # Y 거리 가중치
+# ============================================================================
+# 메인 함수: 레이아웃 유형 판별 후 정렬 (수정됨)
+# ============================================================================
+def _sort_layout_elements_v24(
+    elements: List[MockElement],
+    document_type: str = "question_based",
+    page_width: Optional[int] = None,
+    page_height: Optional[int] = None,
+) -> List[MockElement]:
+    """
+    레이아웃 유형 판별 후 맞춤형 정렬 로직 적용 (v.LayoutDetect.2.4)
+    """
+    logger.info(
+        f"맞춤형 정렬(v.LayoutDetect.2.4) 시작: {len(elements)}개 요소, 타입={document_type}"
+    )
+    filtered_elements = preprocess_elements(elements, document_type)
+    if not filtered_elements:
+        logger.warning("전처리 후 정렬할 요소가 없습니다.")
+        return []
+    if page_width is None:
+        page_width = calculate_page_width(filtered_elements)
+    if page_height is None:
+        page_height = calculate_page_height(filtered_elements)
+    logger.info(f"페이지 크기: {page_width} x {page_height}")
+    initial_zone = Zone(x_min=0, y_min=0, x_max=page_width, y_max=page_height)
+    grouped_results: List[ElementGroup] = []
+    try:
+        if document_type == "reading_order":
+            layout_type = LayoutType.READING_ORDER
+            logger.info(f"판별된 레이아웃 유형: {layout_type.name} (문서 타입 지정)")
+            sorted_elements_reading = sorted(
+                filtered_elements, key=lambda e: (e.y_position, e.x_position)
+            )
+            grouped_results = [
+                ElementGroup(anchor=None, children=[elem])
+                for elem in sorted_elements_reading
+            ]
+        else:
+            layout_type = detect_layout_type(filtered_elements, page_width, page_height)
+            logger.info(f"판별된 레이아웃 유형: {layout_type.name}")
+            if layout_type == LayoutType.STANDARD_1_COLUMN:
+                logger.debug(
+                    f"{layout_type.name}: 분할 없이 전체 구역 표준 1단 Base Case 실행"
+                )
+                grouped_results = _base_case_standard_1_column(
+                    initial_zone, filtered_elements
+                )
+            elif layout_type == LayoutType.STANDARD_2_COLUMN:
+                grouped_results = _sort_standard_2_column(
+                    initial_zone, filtered_elements
+                )
+            elif layout_type in [
+                LayoutType.HORIZONTAL_SEP_PRESENT,
+                LayoutType.MIXED_TOP1_BOTTOM2,
+                LayoutType.MIXED_TOP2_BOTTOM1,
+                LayoutType.UNKNOWN,
+            ]:
+                grouped_results = _sort_recursive_by_layout(
+                    initial_zone, filtered_elements, layout_type, depth=0
+                )
+            else:
+                logger.error(
+                    f"처리할 수 없는 레이아웃 유형: {layout_type.name}. (Y,X) 정렬로 대체합니다."
+                )
+                sorted_elements_fallback = sorted(
+                    filtered_elements, key=lambda e: (e.y_position, e.x_position)
+                )
+                grouped_results = [
+                    ElementGroup(anchor=None, children=[elem])
+                    for elem in sorted_elements_fallback
+                ]
+            # --- 👇 수정: 후처리 전에 임시 그룹 ID 할당 (로깅용) ---
+            if grouped_results and document_type == "question_based":
+                logger.debug("후처리 전 임시 그룹 ID 할당...")
+                temp_groups_with_id = []
+                temp_group_id_counter = 0
+                temp_orphan_groups = [g for g in grouped_results if g.anchor is None]
+                temp_non_orphan_groups = [
+                    g for g in grouped_results if g.anchor is not None
+                ]
+                # 고아 그룹 먼저 ID 할당
+                if temp_orphan_groups:
+                    temp_orphan_groups.sort(
+                        key=lambda g: (
+                            min(c.y_position for c in g.children)
+                            if g.children
+                            else float("inf")
+                        )
+                    )
+                    for group in temp_orphan_groups:
+                        group.group_id = temp_group_id_counter
+                        temp_groups_with_id.append(group)
+                        temp_group_id_counter += 1
+                # 앵커 그룹 ID 할당
+                # (주의: _post_process... 함수는 앵커 그룹 리스트만 받도록 수정 필요)
+                # 우선 여기서 ID만 할당하고, 후처리는 non_orphan_groups 대상으로 수행
+                for group in temp_non_orphan_groups:
+                    group.group_id = temp_group_id_counter
+                    # temp_groups_with_id.append(group) # flatten 전 최종 순서는 아직 모름
+                    temp_group_id_counter += 1
+                # 후처리는 앵커가 있는 그룹들을 대상으로 수행
+                logger.debug(
+                    f"{len(temp_non_orphan_groups)}개 앵커 그룹 대상 후처리 실행..."
+                )
+                processed_non_orphan_groups = _post_process_table_figure_assignment(
+                    temp_non_orphan_groups
+                )
+                # 최종 그룹 리스트 재구성 (고아 + 후처리된 앵커 그룹)
+                grouped_results = temp_orphan_groups + processed_non_orphan_groups
+                logger.debug("후처리 및 임시 그룹 ID 할당 완료.")
+            # --- 👆 수정 끝 ---
+    except Exception as e:
+        logger.error(
+            f"맞춤형 정렬 중 심각한 오류 발생: {e}. (Y,X) 좌표 정렬로 대체합니다.",
+            exc_info=True,
+        )
+        sorted_elements_fallback = sorted(
+            filtered_elements, key=lambda e: (e.y_position, e.x_position)
+        )
+        grouped_results = [
+            ElementGroup(anchor=None, children=[elem])
+            for elem in sorted_elements_fallback
+        ]
+    if not grouped_results:
+        logger.warning("그룹핑 결과가 비어 있습니다.")
+        return []
+    # 최종 병합: 고아 그룹과 앵커 그룹 순서 결정 (기존 로직 유지)
+    orphan_groups = [g for g in grouped_results if g.anchor is None]
+    non_orphan_groups = [
+        g for g in grouped_results if g.anchor is not None
+    ]  # 후처리된 리스트 사용
+    final_ordered_groups: List[ElementGroup] = []
+    if orphan_groups:
+        # 고아 그룹은 Y 좌표 기준으로 정렬
+        orphan_groups.sort(
+            key=lambda g: (
+                min(c.y_position for c in g.children) if g.children else float("inf")
+            )
+        )
+        logger.debug(
+            f"전역 고아 그룹 {len(orphan_groups)}개 (Y 좌표 정렬됨) 리스트 맨 앞으로 이동"
+        )
+        final_ordered_groups.extend(orphan_groups)
+    else:
+        logger.debug("전역 고아 그룹 없음")
+    # 앵커 그룹은 Base Case/재귀 호출에서 결정된 순서 유지 (Y좌표 정렬 불필요)
+    final_ordered_groups.extend(non_orphan_groups)
+    # 최종 순서 및 ID 부여
+    final_sorted_elements, _, _ = flatten_groups_and_assign_order(
+        final_ordered_groups, start_global_order=0, start_group_id=0
+    )
+    logger.info(f"맞춤형 정렬 완료: {len(final_sorted_elements)}개 요소")
+    return final_sorted_elements
+def _use_adaptive_strategy() -> bool:
+    """환경 변수 기반 Adaptive 전략 사용 여부 판단"""
+    return os.getenv("USE_ADAPTIVE_SORTER", "false").lower() in {"1", "true", "yes"}
+def sort_layout_elements(
+    elements: List[MockElement],
+    document_type: str = "question_based",
+    page_width: Optional[int] = None,
+    page_height: Optional[int] = None,
+    page_dpi: Optional[float] = None,
+) -> List[MockElement]:
+    """
+    Adaptive 전략 플래그가 활성화된 경우 sorter_strategies의 Adaptive 엔트리포인트로 위임하고,
+    그렇지 않으면 v2.4 코어 구현을 그대로 사용한다.
+    """
+    if _use_adaptive_strategy():
+        from .sorter_strategies import sort_layout_elements_adaptive
+        return sort_layout_elements_adaptive(
+            elements=elements,
+            document_type=document_type,
+            page_width=page_width,
+            page_height=page_height,
+            force_strategy=None,
+            page_dpi=page_dpi,
+        )
+    return _sort_layout_elements_v24(
+        elements=elements,
+        document_type=document_type,
+        page_width=page_width,
+        page_height=page_height,
+    )
+# ============================================================================
+# 레이아웃 유형 판별 함수 (기존과 동일)
+# ============================================================================
+def detect_layout_type(
+    elements: List[MockElement], page_width: int, page_height: int
+) -> LayoutType:
+    # ... (코드 동일) ...
+    """앵커 요소 분포를 분석하여 페이지 레이아웃 유형 판별"""
+    anchors = [e for e in elements if e.class_name in ALLOWED_ANCHORS]
+    if len(anchors) < MIN_ANCHORS_FOR_SPLIT:
+        logger.debug(
+            f"레이아웃 판별: 앵커 수({len(anchors)}) 부족 -> STANDARD_1_COLUMN"
+        )
+        return LayoutType.STANDARD_1_COLUMN
+    top_zone_height = page_height * HORIZONTAL_SEP_Y_POS_THRESHOLD
+    wide_q_type = find_wide_question_type(elements, page_width, top_zone_height)
+    if wide_q_type:
+        logger.debug(
+            f"레이아웃 판별: 넓은 question_type(ID:{wide_q_type.element_id}) 존재 -> HORIZONTAL_SEP_PRESENT"
+        )
+        return LayoutType.HORIZONTAL_SEP_PRESENT
+    anchor_x_centers = np.array([[a.bbox_x + a.bbox_width / 2] for a in anchors])
+    is_clearly_2_column = False
+    if len(np.unique(anchor_x_centers)) >= 2:
+        try:
+            kmeans = KMeans(
+                n_clusters=KMEANS_N_CLUSTERS, random_state=42, n_init="auto"
+            )
+            kmeans.fit(anchor_x_centers)
+            centers = sorted(kmeans.cluster_centers_.flatten())
+            if (
+                len(centers) == 2
+                and centers[1] - centers[0] >= KMEANS_CLUSTER_SEPARATION_MIN
+            ):
+                is_clearly_2_column = True
+                logger.trace(
+                    f"레이아웃 판별: 전체 X 분포는 2단 구조 가능성 높음 (Centers: {centers})"
+                )
+            else:
+                logger.trace(f"레이아웃 판별: 전체 X 분포는 1단 구조 또는 불분명")
+        except Exception as e:
+            logger.warning(f"레이아웃 판별 중 K-Means 오류 발생: {e}")
+    if is_clearly_2_column:
+        split_y = page_height * LAYOUT_DETECT_Y_SPLIT_POINT
+        top_anchors = [
+            a for a in anchors if (a.y_position + a.bbox_height / 2) < split_y
+        ]
+        bottom_anchors = [
+            a for a in anchors if (a.y_position + a.bbox_height / 2) >= split_y
+        ]
+        if not top_anchors or not bottom_anchors:
+            logger.debug("레이아웃 판별: 상/하단 앵커 그룹 불완전 -> STANDARD_2_COLUMN")
+            return LayoutType.STANDARD_2_COLUMN
+        top_x_centers = (
+            np.array([[a.bbox_x + a.bbox_width / 2] for a in top_anchors])
+            if top_anchors
+            else np.array([])
+        )
+        bottom_x_centers = (
+            np.array([[a.bbox_x + a.bbox_width / 2] for a in bottom_anchors])
+            if bottom_anchors
+            else np.array([])
+        )
+        x_std_threshold = page_width * LAYOUT_DETECT_X_STD_THRESHOLD_RATIO
+        top_is_multi_column = (
+            top_x_centers.size > 1 and np.std(top_x_centers) > x_std_threshold
+        )
+        bottom_is_multi_column = (
+            bottom_x_centers.size > 1 and np.std(bottom_x_centers) > x_std_threshold
+        )
+        if not top_is_multi_column and bottom_is_multi_column:
+            logger.debug(
+                f"레이아웃 판별: 상단({len(top_anchors)}개) 1단, 하단({len(bottom_anchors)}개) 2단 -> MIXED_TOP1_BOTTOM2"
+            )
+            return LayoutType.MIXED_TOP1_BOTTOM2
+        elif top_is_multi_column and not bottom_is_multi_column:
+            logger.debug(
+                f"레이아웃 판별: 상단({len(top_anchors)}개) 2단, 하단({len(bottom_anchors)}개) 1단 -> MIXED_TOP2_BOTTOM1"
+            )
+            return LayoutType.MIXED_TOP2_BOTTOM1
+        elif top_is_multi_column and bottom_is_multi_column:
+            logger.debug(
+                f"레이아웃 판별: 상단({len(top_anchors)}개) 2단, 하단({len(bottom_anchors)}개) 2단 -> STANDARD_2_COLUMN"
+            )
+            return LayoutType.STANDARD_2_COLUMN
+        else:
+            logger.warning(
+                f"레이아웃 판별: 상/하단 모두 1단으로 보이나 전체는 2단 구조? -> UNKNOWN"
+            )
+            return LayoutType.UNKNOWN
+    else:
+        logger.debug("레이아웃 판별: 전체 1단 구조 -> STANDARD_1_COLUMN")
+        return LayoutType.STANDARD_1_COLUMN
+# ============================================================================
+# 재귀 정렬 함수 (기존과 동일)
+# ============================================================================
+def _sort_recursive_by_layout(
+    current_zone: Zone,
+    elements_in_zone: List[MockElement],
+    layout_type: LayoutType,
+    depth: int,
+) -> List[ElementGroup]:
+    # ... (코드 동일) ...
+    """레이아웃 유형에 따라 다른 분할 우선순위를 적용하는 재귀 함수"""
+    indent = "  " * depth
+    logger.debug(
+        f"{indent}[Depth {depth}, Type: {layout_type.name}] 구역 처리 시작: {current_zone}, 요소 수={len(elements_in_zone)}"
+    )
+    if not elements_in_zone:
+        logger.trace(f"{indent} -> 빈 구역")
+        return []
+    if len(elements_in_zone) == 1:
+        element = elements_in_zone[0]
+        logger.trace(f"{indent} -> 요소 1개")
+        return (
+            [ElementGroup(anchor=element)]
+            if element.class_name in ALLOWED_ANCHORS
+            else [ElementGroup(anchor=None, children=[element])]
+        )
+    if layout_type == LayoutType.STANDARD_2_COLUMN:
+        logger.debug(f"{indent} -> {layout_type.name}: 표준 2단 처리 함수 직접 호출")
+        return _sort_standard_2_column(current_zone, elements_in_zone)
+    split_result: Optional[
+        Union[HorizontalSplit, HorizontalSplitYGap, VerticalSplit]
+    ] = None
+    split_type = "None"
+    if layout_type == LayoutType.HORIZONTAL_SEP_PRESENT:
+        split_result = find_horizontal_split_by_type(current_zone, elements_in_zone)
+        if split_result:
+            split_type = "H_Type"
+        else:
+            anchors = [e for e in elements_in_zone if e.class_name in ALLOWED_ANCHORS]
+            split_result = find_vertical_split_kmeans(current_zone, anchors)
+            if split_result:
+                split_type = "Vertical"
+            else:
+                split_result = find_horizontal_split_by_y_gap(
+                    current_zone, elements_in_zone
+                )
+                if split_result:
+                    split_type = "H_YGap"
+    elif (
+        layout_type == LayoutType.MIXED_TOP1_BOTTOM2
+        or layout_type == LayoutType.MIXED_TOP2_BOTTOM1
+    ):
+        split_result = find_horizontal_split_by_y_gap(current_zone, elements_in_zone)
+        if split_result:
+            split_type = "H_YGap"
+        else:
+            split_result = find_horizontal_split_by_type(current_zone, elements_in_zone)
+            if split_result:
+                split_type = "H_Type"
+            else:
+                anchors = [
+                    e for e in elements_in_zone if e.class_name in ALLOWED_ANCHORS
+                ]
+                split_result = find_vertical_split_kmeans(current_zone, anchors)
+                if split_result:
+                    split_type = "Vertical"
+    elif layout_type == LayoutType.UNKNOWN:
+        split_result = find_horizontal_split_by_type(current_zone, elements_in_zone)
+        if split_result:
+            split_type = "H_Type"
+        else:
+            anchors = [e for e in elements_in_zone if e.class_name in ALLOWED_ANCHORS]
+            split_result = find_vertical_split_kmeans(current_zone, anchors)
+            if split_result:
+                split_type = "Vertical"
+            else:
+                split_result = find_horizontal_split_by_y_gap(
+                    current_zone, elements_in_zone
+                )
+                if split_result:
+                    split_type = "H_YGap"
+    if split_result:
+        if isinstance(split_result, (HorizontalSplit, HorizontalSplitYGap)):
+            split_y = (
+                split_result.split_y
+                if isinstance(split_result, HorizontalSplitYGap)
+                else split_result.separator_element.y_position
+                + split_result.separator_element.bbox_height / 2
+            )
+            top_elements = [
+                e
+                for e in elements_in_zone
+                if getattr(e, "element_id", -1)
+                != getattr(
+                    getattr(split_result, "separator_element", None), "element_id", -2
+                )
+                and (e.bbox_y + e.bbox_height / 2) < split_y
+            ]
+            bottom_elements = [
+                e
+                for e in elements_in_zone
+                if getattr(e, "element_id", -1)
+                != getattr(
+                    getattr(split_result, "separator_element", None), "element_id", -2
+                )
+                and (e.bbox_y + e.bbox_height / 2) >= split_y
+            ]
+            logger.debug(
+                f"{indent} -> {split_type} 수평 분할 성공! Top:{len(top_elements)}, Bottom:{len(bottom_elements)}"
+            )
+            top_layout_type = (
+                detect_layout_type(
+                    top_elements,
+                    split_result.top_zone.width,
+                    split_result.top_zone.height,
+                )
+                if top_elements
+                else LayoutType.UNKNOWN
+            )
+            bottom_layout_type = (
+                detect_layout_type(
+                    bottom_elements,
+                    split_result.bottom_zone.width,
+                    split_result.bottom_zone.height,
+                )
+                if bottom_elements
+                else LayoutType.UNKNOWN
+            )
+            sorted_top = _sort_recursive_by_layout(
+                split_result.top_zone, top_elements, top_layout_type, depth + 1
+            )
+            sep_group = (
+                [ElementGroup(anchor=split_result.separator_element)]
+                if isinstance(split_result, HorizontalSplit)
+                else []
+            )
+            sorted_bottom = _sort_recursive_by_layout(
+                split_result.bottom_zone, bottom_elements, bottom_layout_type, depth + 1
+            )
+            logger.debug(f"{indent} <- {split_type} 수평 분할 결과 병합")
+            return sorted_top + sep_group + sorted_bottom
+        elif isinstance(split_result, VerticalSplit):
+            left_elements = [
+                e
+                for e in elements_in_zone
+                if (e.bbox_x + e.bbox_width / 2) < split_result.gutter_x
+            ]
+            right_elements = [
+                e
+                for e in elements_in_zone
+                if (e.bbox_x + e.bbox_width / 2) >= split_result.gutter_x
+            ]
+            logger.debug(
+                f"{indent} -> Vertical 수직 분할 성공! Left:{len(left_elements)}, Right:{len(right_elements)}"
+            )
+            left_layout_type = (
+                detect_layout_type(
+                    left_elements,
+                    split_result.left_zone.width,
+                    split_result.left_zone.height,
+                )
+                if left_elements
+                else LayoutType.UNKNOWN
+            )
+            right_layout_type = (
+                detect_layout_type(
+                    right_elements,
+                    split_result.right_zone.width,
+                    split_result.right_zone.height,
+                )
+                if right_elements
+                else LayoutType.UNKNOWN
+            )
+            sorted_left = _sort_recursive_by_layout(
+                split_result.left_zone, left_elements, left_layout_type, depth + 1
+            )
+            sorted_right = _sort_recursive_by_layout(
+                split_result.right_zone, right_elements, right_layout_type, depth + 1
+            )
+            logger.debug(f"{indent} <- Vertical 수직 분할 결과 병합")
+            return sorted_left + sorted_right
+    else:
+        logger.debug(
+            f"{indent} -> 모든 분할 실패, 레이아웃 유형({layout_type.name})에 따른 Base Case 실행"
+        )
+        result_groups: List[ElementGroup] = []
+        if layout_type == LayoutType.STANDARD_1_COLUMN:
+            result_groups = _base_case_standard_1_column(current_zone, elements_in_zone)
+        elif (
+            layout_type == LayoutType.MIXED_TOP1_BOTTOM2
+            or layout_type == LayoutType.MIXED_TOP2_BOTTOM1
+        ):
+            result_groups = _base_case_mixed_layout(
+                current_zone, elements_in_zone, layout_type
+            )
+        elif (
+            layout_type == LayoutType.HORIZONTAL_SEP_PRESENT
+            or layout_type == LayoutType.UNKNOWN
+        ):
+            logger.warning(
+                f"{indent} -> {layout_type.name} 유형 분할 실패. 1단 Base Case로 처리합니다."
+            )
+            result_groups = _base_case_standard_1_column(current_zone, elements_in_zone)
+        else:
+            logger.error(
+                f"{indent} -> 처리할 수 없는 Base Case 유형: {layout_type.name}. 1단으로 처리."
+            )
+            result_groups = _base_case_standard_1_column(current_zone, elements_in_zone)
+        logger.debug(f"{indent} <- Base Case 처리 완료: {len(result_groups)} 그룹 생성")
+        return result_groups
+# ============================================================================
+# 표준 2단 레이아웃 처리 함수 (기존과 동일)
+# ============================================================================
+def _sort_standard_2_column(
+    zone: Zone, elements: List[MockElement]
+) -> List[ElementGroup]:
+    # ... (코드 동일) ...
+    """표준 2단 레이아웃 처리: K-Means 분할 후 컬럼별 _base_case_standard_1_column 호출"""
+    logger.debug("표준 2단 처리: K-Means 분할 시도")
+    anchors = [e for e in elements if e.class_name in ALLOWED_ANCHORS]
+    vertical_split = find_vertical_split_kmeans(zone, anchors)
+    if vertical_split:
+        logger.debug(f" -> 수직 분할 성공! 분리선 X={vertical_split.gutter_x:.1f}")
+        left_elements = [
+            e
+            for e in elements
+            if (e.bbox_x + e.bbox_width / 2) < vertical_split.gutter_x
+        ]
+        right_elements = [
+            e
+            for e in elements
+            if (e.bbox_x + e.bbox_width / 2) >= vertical_split.gutter_x
+        ]
+        logger.debug(
+            f"   Left 요소 수: {len(left_elements)}, Right 요소 수: {len(right_elements)}"
+        )
+        groups_left = _base_case_standard_1_column(
+            vertical_split.left_zone, left_elements
+        )
+        groups_right = _base_case_standard_1_column(
+            vertical_split.right_zone, right_elements
+        )
+        logger.debug(
+            f" <- 컬럼별 그룹핑 완료 (Left: {len(groups_left)} 그룹, Right: {len(groups_right)} 그룹)"
+        )
+        return groups_left + groups_right
+    else:
+        logger.warning(
+            "표준 2단 처리 실패: 수직 분할 불가. 전체 구역 표준 1단 Base Case 실행"
+        )
+        return _base_case_standard_1_column(zone, elements)
+# ============================================================================
+# 분할 함수 구현 (기존과 동일)
+# ============================================================================
+def find_wide_question_type(
+    elements: List[MockElement], page_width: int, top_y_limit: float
+) -> Optional[MockElement]:
+    # ... (코드 동일) ...
+    """페이지 상단 영역에서 넓은 question_type 찾기"""
+    wide_types = [
+        e
+        for e in elements
+        if e.class_name == "question_type"
+        and e.y_position < top_y_limit
+        and (e.bbox_width / page_width if page_width > 0 else 0)
+        >= HORIZONTAL_SEP_WIDTH_THRESHOLD
+    ]
+    return min(wide_types, key=lambda e: e.y_position) if wide_types else None
+def find_horizontal_split_by_type(
+    zone: Zone, elements: List[MockElement]
+) -> Optional[HorizontalSplit]:
+    # ... (코드 동일) ...
+    """넓은 question_type으로 수평 분할"""
+    potential_separators = []
+    for element in elements:
+        if element.class_name == "question_type":
+            width_ratio = element.bbox_width / zone.width if zone.width > 0 else 0
+            if width_ratio >= HORIZONTAL_SEP_WIDTH_THRESHOLD:
+                potential_separators.append(element)
+    if not potential_separators:
+        return None
+    separator = min(potential_separators, key=lambda e: e.y_position)
+    if not (zone.y_min < separator.y_position < zone.y_max):
+        return None
+    top_zone = Zone(zone.x_min, zone.y_min, zone.x_max, separator.y_position)
+    bottom_zone = Zone(
+        zone.x_min, separator.y_position + separator.bbox_height, zone.x_max, zone.y_max
+    )
+    if top_zone.height <= 0 or bottom_zone.height <= 0:
+        return None
+    return HorizontalSplit(top_zone, bottom_zone, separator)
+def find_horizontal_split_by_y_gap(
+    zone: Zone, elements: List[MockElement]
+) -> Optional[HorizontalSplitYGap]:
+    # ... (코드 동일) ...
+    """앵커 Y Gap으로 수평 분할"""
+    anchors = sorted(
+        [e for e in elements if e.class_name in ALLOWED_ANCHORS],
+        key=lambda e: e.y_position,
+    )
+    if len(anchors) < MIN_ANCHORS_FOR_SPLIT:
+        return None
+    max_gap = -1
+    split_index = -1
+    avg_anchor_height = (
+        np.mean([a.bbox_height for a in anchors if a.bbox_height > 0])
+        if any(a.bbox_height > 0 for a in anchors)
+        else 30
+    )
+    for i in range(len(anchors) - 1):
+        gap = (anchors[i + 1].y_position + anchors[i + 1].bbox_height / 2) - (
+            anchors[i].y_position + anchors[i].bbox_height / 2
+        )
+        if gap > max_gap:
+            max_gap = gap
+            split_index = i
+    threshold = max(
+        avg_anchor_height * VERTICAL_GAP_THRESHOLD_RATIO, VERTICAL_GAP_THRESHOLD_ABS
+    )
+    if max_gap >= threshold:
+        split_y = (
+            anchors[split_index].y_position
+            + anchors[split_index].bbox_height
+            + anchors[split_index + 1].y_position
+        ) / 2
+        if zone.y_min < split_y < zone.y_max:
+            top_zone = Zone(zone.x_min, zone.y_min, zone.x_max, int(split_y))
+            bottom_zone = Zone(zone.x_min, int(split_y), zone.x_max, zone.y_max)
+            logger.debug(
+                f"    Y Gap 분석: 수평 분할 가능 (Max Gap={max_gap:.1f} >= Threshold={threshold:.1f})"
+            )
+            return HorizontalSplitYGap(top_zone, bottom_zone, split_y)
+        else:
+            logger.warning(
+                f"    Y Gap 분석: 분할선({split_y:.1f})이 구역({zone.y_min}-{zone.y_max}) 밖에 위치. 분할 취소."
+            )
+            return None
+    else:
+        logger.debug(
+            f"    Y Gap 분석: 최대 간격({max_gap:.1f}) 임계값({threshold:.1f}) 미만. 수평 분할 불가."
+        )
+        return None
+def find_vertical_split_kmeans(
+    zone: Zone, anchors: List[MockElement]
+) -> Optional[VerticalSplit]:
+    """앵커 X 좌표 K-Means로 수직 분할 (개선: 오른쪽 칼럼 시작점 기준 분할)"""
+    if len(anchors) < MIN_ANCHORS_FOR_SPLIT:
+        return None
+    anchor_x_centers = np.array([[a.bbox_x + a.bbox_width / 2] for a in anchors])
+    if len(np.unique(anchor_x_centers)) < 2:
+        return None
+    try:
+        kmeans = KMeans(n_clusters=KMEANS_N_CLUSTERS, random_state=42, n_init="auto")
+        kmeans.fit(anchor_x_centers)
+        centers = sorted(kmeans.cluster_centers_.flatten())
+        if (
+            len(centers) == 2
+            and centers[1] - centers[0] >= KMEANS_CLUSTER_SEPARATION_MIN
+        ):
+            # 🔥 핵심 변경: 오른쪽 칼럼 앵커의 시작점을 경계로 사용
+            # 너무 타이트한 경계가 문제될 경우
+            COLUMN_BOUNDARY_MARGIN = 20  # px
+            gutter_x = centers[1] - COLUMN_BOUNDARY_MARGIN
+            # gutter_x = centers[1]  # 기존: (centers[0] + centers[1]) / 2
+            if zone.x_min < gutter_x < zone.x_max:
+                left_zone = Zone(zone.x_min, zone.y_min, int(gutter_x), zone.y_max)
+                right_zone = Zone(int(gutter_x), zone.y_min, zone.x_max, zone.y_max)
+                logger.debug(
+                    f"    수직 분할 성공: 왼쪽 칼럼 X=[{zone.x_min}, {int(gutter_x)}), "
+                    f"오른쪽 칼럼 X=[{int(gutter_x)}, {zone.x_max})"
+                )
+                return VerticalSplit(left_zone, right_zone, gutter_x)
+            else:
+                logger.warning(
+                    f"    수직 분할: 경계선({gutter_x:.1f})이 구역 밖. 분할 취소."
+                )
+                return None
+        else:
+            logger.debug(f"    수직 분할 실패: 중심간 거리 부족")
+            return None
+    except Exception as e:
+        logger.error(f"    수직 분할 K-Means 오류: {e}")
+        return None
+# ============================================================================
+# 후처리 함수 (수정됨)
+# ============================================================================
+def _post_process_table_figure_assignment(
+    groups: List[ElementGroup], y_diff_threshold: int = 150
+) -> List[ElementGroup]:
+    """
+    그룹핑 후처리: 테이블/그림 요소가 현재 앵커보다 다음 앵커(들)에 훨씬 가까우면 이동 시도
+    --- 수정: 최적 그룹 탐색 및 Tie-breaker 추가 ---
+    """
+    logger.debug(
+        f"    테이블/그림 할당 후처리 시작: {len(groups)}개 그룹 (Threshold={y_diff_threshold}px, Closeness Ratio={POST_PROCESS_CLOSENESS_RATIO}, Lookahead={POST_PROCESS_LOOKAHEAD})"
+    )
+    adjusted_groups = groups  # 원본 리스트를 직접 수정
+    elements_to_move_dict: Dict[int, Tuple[MockElement, int]] = (
+        {}
+    )  # {element_id: (element, target_group_idx)}
+    moved_elements_log = []  # 로깅용
+    for i in range(len(adjusted_groups)):
+        current_group = adjusted_groups[i]
+        if not current_group.anchor:
+            continue
+        current_children_copy = list(
+            current_group.children
+        )  # 순회 중 변경을 위한 복사본
+        for child_idx, child in enumerate(current_children_copy):
+            # 이미 이동 대상으로 결정된 요소는 건너뜀
+            if child.element_id in elements_to_move_dict:
+                continue
+            if child.class_name in ["table", "figure", "flowchart"]:
+                y_diff_current = child.y_position - current_group.anchor.y_position
+                best_target_group_idx = -1
+                min_y_diff_next = float("inf")
+                # 현재 그룹 이후 몇 개 그룹까지 탐색
+                for lookahead_idx in range(1, POST_PROCESS_LOOKAHEAD + 1):
+                    next_group_idx = i + lookahead_idx
+                    if next_group_idx >= len(adjusted_groups):
+                        break
+                    next_group = adjusted_groups[next_group_idx]
+                    if not next_group.anchor:
+                        continue
+                    y_diff_next = abs(child.y_position - next_group.anchor.y_position)
+                    # 이동 조건 검사 (v2.2 조건)
+                    if y_diff_current > (y_diff_threshold / 2) and y_diff_next < (
+                        y_diff_current * POST_PROCESS_CLOSENESS_RATIO
+                    ):
+                        # --- 👇 Tie-breaker 수정 👇 ---
+                        # 더 가까운 그룹을 찾거나, 거리가 같지만 더 뒤의 그룹일 경우 갱신
+                        if y_diff_next < min_y_diff_next or (
+                            y_diff_next == min_y_diff_next
+                            and next_group_idx > best_target_group_idx
+                        ):
+                            min_y_diff_next = y_diff_next
+                            best_target_group_idx = next_group_idx
+                        # --- 👆 Tie-breaker 수정 끝 👆 ---
+                # 최적 그룹을 찾았으면 이동 대상으로 등록
+                if best_target_group_idx != -1:
+                    elements_to_move_dict[child.element_id] = (
+                        child,
+                        best_target_group_idx,
+                    )
+                    moved_elements_log.append(
+                        f"Elem {child.element_id} ({child.class_name}) from Grp {current_group.group_id} to Grp {adjusted_groups[best_target_group_idx].group_id}"
+                    )
+                    logger.trace(
+                        f"        이동 후보 확정: Elem {child.element_id} -> Group {adjusted_groups[best_target_group_idx].group_id} (Min Y diff next={min_y_diff_next:.0f})"
+                    )
+    # --- 실제 요소 이동 (루프 종료 후) ---
+    if elements_to_move_dict:
+        # 1. 원본 그룹에서 요소 제거
+        elements_removed_count = 0
+        for group in adjusted_groups:
+            original_children_count = len(group.children)
+            group.children = [
+                child
+                for child in group.children
+                if child.element_id not in elements_to_move_dict
+            ]
+            elements_removed_count += original_children_count - len(group.children)
+        # 2. 대상 그룹에 요소 추가
+        elements_added_count = 0
+        for element_id, (element, target_group_idx) in elements_to_move_dict.items():
+            if 0 <= target_group_idx < len(adjusted_groups):
+                adjusted_groups[target_group_idx].children.insert(
+                    0, element
+                )  # 그룹 맨 앞에 추가
+                elements_added_count += 1
+            else:
+                logger.error(
+                    f"후처리 이동 중 유효하지 않은 대상 그룹 인덱스: {target_group_idx} for Elem {element_id}"
+                )
+        logger.debug(
+            f"    후처리 요소 이동 완료: {elements_removed_count}개 제거, {elements_added_count}개 추가"
+        )
+    if moved_elements_log:
+        logger.info(
+            f"    테이블/그림 할당 후처리: {len(moved_elements_log)}개 요소 이동됨 - {', '.join(moved_elements_log)}"
+        )
+    else:
+        logger.debug("    테이블/그림 할당 후처리: 이동된 요소 없음")
+    return adjusted_groups
+# ============================================================================
+# Base Case 함수들 (기존과 동일 v2.1)
+# ============================================================================
+def _assign_children_to_anchors_with_2d_proximity(
+    anchors: List[MockElement],
+    children: List[MockElement],
+    zone: Zone,
+    preserve_top_orphans: bool = True,
+) -> Tuple[List[ElementGroup], List[MockElement]]:
+    """
+    앵커와 자식 요소를 2D 거리 기반으로 그룹핑 (Phase 1: STANDARD_2_COLUMN 적용)
+    Args:
+        anchors: 앵커 요소 리스트
+        children: 자식 요소 리스트
+        zone: 현재 처리 중인 구역
+        preserve_top_orphans: True일 경우 상단 영역의 요소는 고아로 유지
+    Returns:
+        (그룹 리스트, 고아 요소 리스트)
+    """
+    groups: List[ElementGroup] = [ElementGroup(anchor=a) for a in anchors]
+    orphans: List[MockElement] = []
+    # 상단 고아 임계값 (기존 로직 유지 옵션)
+    top_orphan_threshold_y = (
+        zone.y_min + zone.height * BASE_CASE_TOP_ORPHAN_THRESHOLD_RATIO
+        if preserve_top_orphans
+        else zone.y_min
+    )
+    for child in children:
+        child_x_center = child.bbox_x + child.bbox_width / 2
+        child_y_center = child.bbox_y + child.bbox_height / 2
+        # 상단 고아 체크 (선택적)
+        if preserve_top_orphans and child.bbox_y < top_orphan_threshold_y:
+            # 첫 번째 앵커보다 훨씬 위쪽인 경우만 고아로 처리
+            if not anchors or child_y_center < (
+                anchors[0].bbox_y - ANCHOR_VERTICAL_PROXIMITY_THRESHOLD / 2
+            ):
+                orphans.append(child)
+                logger.trace(
+                    f"      Elem {child.element_id} 상단 고아 유지 (Y={child.bbox_y})"
+                )
+                continue
+        best_anchor_idx = None
+        min_distance = float("inf")
+        for idx, anchor in enumerate(anchors):
+            anchor_x_center = anchor.bbox_x + anchor.bbox_width / 2
+            anchor_y_center = anchor.bbox_y + anchor.bbox_height / 2
+            # 🔥 핵심 수정: 자식이 앵커보다 위쪽에 있으면 제외
+            # figure/table은 반드시 자신보다 위쪽에 있는 앵커에만 배정되어야 함
+            if child_y_center < anchor_y_center:
+                logger.trace(
+                    f"      Elem {child.element_id} → Anchor {anchor.element_id} 제외 "
+                    f"(자식 Y={child_y_center:.0f} < 앵커 Y={anchor_y_center:.0f})"
+                )
+                continue
+            # 가중 2D 거리 계산
+            x_diff = abs(child_x_center - anchor_x_center) * ANCHOR_2D_DISTANCE_WEIGHT_X
+            y_diff = abs(child_y_center - anchor_y_center) * ANCHOR_2D_DISTANCE_WEIGHT_Y
+            distance = (x_diff**2 + y_diff**2) ** 0.5
+            if distance < min_distance:
+                min_distance = distance
+                best_anchor_idx = idx
+        # 거리 임계값 체크
+        if (
+            best_anchor_idx is not None
+            and min_distance < ANCHOR_VERTICAL_PROXIMITY_THRESHOLD
+        ):
+            groups[best_anchor_idx].children.append(child)
+            logger.trace(
+                f"      Elem {child.element_id} → Anchor {anchors[best_anchor_idx].element_id} "
+                f"(2D 거리={min_distance:.1f})"
+            )
+        else:
+            orphans.append(child)
+            if best_anchor_idx is None:
+                reason = "위쪽 앵커만 허용 (모든 앵커가 자식보다 아래쪽)"
+            else:
+                reason = f"최소 거리={min_distance:.1f} > {ANCHOR_VERTICAL_PROXIMITY_THRESHOLD}"
+            logger.debug(f"      Elem {child.element_id} 고아 ({reason})")
+    return groups, orphans
+def _base_case_standard_1_column(
+    zone: Zone, elements: List[MockElement]
+) -> List[ElementGroup]:
+    # ... (v2.1 코드와 동일) ...
+    """표준 1단 구역 Base Case 처리 (상단 고아 분리)"""
+    logger.debug(
+        f"    표준 1단 Base Case 시작 (순차 처리 + 고아 개선): {len(elements)}개 요소 in {zone}"
+    )
+    anchors = sorted(
+        [e for e in elements if e.class_name in ALLOWED_ANCHORS],
+        key=lambda e: e.y_position,
+    )
+    children = [e for e in elements if e.class_name in ALLOWED_CHILDREN]
+    groups: Dict[int, ElementGroup] = {
+        anchor.element_id: ElementGroup(anchor=anchor) for anchor in anchors
+    }
+    assigned_children_ids = set()
+    logger.trace("      수평 인접 처리 시작...")
+    if anchors and children:
+        for anchor in anchors:
+            anchor_cy = anchor.bbox_y + anchor.bbox_height / 2
+            anchor_right_x = anchor.bbox_x + anchor.bbox_width
+            anchor_left_x = anchor.bbox_x
+            unassigned_children = [
+                c for c in children if c.element_id not in assigned_children_ids
+            ]
+            adjacent_child = None
+            min_y_diff = float("inf")
+            for child in unassigned_children:
+                child_cy = child.bbox_y + child.bbox_height / 2
+                child_right_x = child.bbox_x + child.bbox_width
+                child_left_x = child.bbox_x
+                y_diff = abs(anchor_cy - child_cy)
+                y_threshold = (
+                    (anchor.bbox_height + child.bbox_height)
+                    / 2
+                    * HORIZONTAL_ADJACENCY_Y_CENTER_RATIO
+                    if (anchor.bbox_height + child.bbox_height) > 0
+                    else 0
+                )
+                if y_diff >= y_threshold:
+                    continue
+                gap_right = child_left_x - anchor_right_x
+                gap_left = anchor_left_x - child_right_x
+                is_adjacent = (abs(gap_right) < HORIZONTAL_ADJACENCY_X_PROXIMITY) or (
+                    abs(gap_left) < HORIZONTAL_ADJACENCY_X_PROXIMITY
+                )
+                if is_adjacent and y_diff < min_y_diff:
+                    min_y_diff = y_diff
+                    adjacent_child = child
+            if adjacent_child:
+                logger.trace(
+                    f"        수평 인접 배정: 앵커 ID {anchor.element_id} <- 자식 ID {adjacent_child.element_id}"
+                )
+                groups[anchor.element_id].add_child(adjacent_child)
+                assigned_children_ids.add(adjacent_child.element_id)
+    logger.debug(
+        f"    수평 인접 처리 완료: {len(assigned_children_ids)}개 자식 우선 배정됨"
+    )
+    remaining_elements = anchors + [
+        c for c in children if c.element_id not in assigned_children_ids
+    ]
+    if not remaining_elements:
+        logger.debug("    모든 요소가 수평 인접으로 배정되어 그룹핑 완료.")
+        # 후처리 호출 전 그룹 ID 임시 할당 (선택적)
+        temp_groups = sorted(
+            list(groups.values()),
+            key=lambda g: g.anchor.y_position if g.anchor else float("inf"),
+        )
+        for idx, group in enumerate(temp_groups):
+            group.group_id = idx
+        return _post_process_table_figure_assignment(temp_groups)
+    # 2단계: 나머지 요소를 2D 거리 기반으로 그룹핑 (Phase 1 적용)
+    remaining_children = [
+        c for c in children if c.element_id not in assigned_children_ids
+    ]
+    if remaining_children and anchors:
+        logger.trace(
+            f"      2단계: 나머지 {len(remaining_children)}개 요소 2D 거리 그룹핑..."
+        )
+        # 🔥 2D 거리 기반 그룹핑 (상단 고아 보존 옵션 활성화)
+        proximity_groups, proximity_orphans = (
+            _assign_children_to_anchors_with_2d_proximity(
+                anchors,
+                remaining_children,
+                zone,
+                preserve_top_orphans=True,  # 상단 고아 보존
+            )
+        )
+        # 2D 거리로 배정된 자식들을 기존 그룹에 병합
+        for idx, proximity_group in enumerate(proximity_groups):
+            anchor_id = anchors[idx].element_id
+            if anchor_id in groups:
+                groups[anchor_id].children.extend(proximity_group.children)
+        # 2D 그룹핑 후 여전히 남은 요소들은 순차 처리로 넘김
+        remaining_elements = [
+            a for a in anchors if a.element_id not in assigned_children_ids
+        ] + proximity_orphans
+        logger.debug(
+            f"    2단계 완료: {len(remaining_children) - len(proximity_orphans)}개 배정, {len(proximity_orphans)}개 고아로 순차 처리 대기"
+        )
+    else:
+        remaining_elements = anchors + [
+            c for c in children if c.element_id not in assigned_children_ids
+        ]
+    if not remaining_elements:
+        logger.debug("    2D 거리 그룹핑 후 나머지 요소 없음. 그룹핑 완료.")
+        temp_groups = sorted(
+            list(groups.values()),
+            key=lambda g: g.anchor.y_position if g.anchor else float("inf"),
+        )
+        for idx, group in enumerate(temp_groups):
+            group.group_id = idx
+        return _post_process_table_figure_assignment(temp_groups)
+    logger.trace(
+        f"      3단계: 나머지 요소 {len(remaining_elements)}개 (Y, X) 정렬 및 순차 그룹핑 시작..."
+    )
+    remaining_elements.sort(key=lambda e: (e.y_position, e.x_position))
+    final_groups: List[ElementGroup] = []
+    current_group: Optional[ElementGroup] = None
+    initial_top_orphan_children: List[MockElement] = []
+    initial_bottom_orphan_children: List[MockElement] = []
+    first_anchor_found = False
+    top_orphan_threshold_y = (
+        zone.y_min + zone.height * BASE_CASE_TOP_ORPHAN_THRESHOLD_RATIO
+    )
+    for element in remaining_elements:
+        if element.class_name in ALLOWED_ANCHORS:
+            first_anchor_found = True
+            if initial_top_orphan_children:
+                logger.trace(
+                    f"        독립적인 상단 고아 그룹 생성 ({len(initial_top_orphan_children)}개 요소)"
+                )
+                final_groups.append(
+                    ElementGroup(anchor=None, children=initial_top_orphan_children)
+                )
+                initial_top_orphan_children = []
+            if (
+                current_group is not None
+                and current_group.anchor is not None
+                and not current_group.is_empty()
+            ):
+                final_groups.append(current_group)
+            if element.element_id in groups:
+                current_group = groups[element.element_id]
+                logger.trace(f"        앵커 그룹 재사용 (ID: {element.element_id})")
+            else:
+                current_group = ElementGroup(anchor=element, children=[])
+                logger.trace(f"        새 앵커 그룹 시작 (ID: {element.element_id})")
+            if initial_bottom_orphan_children:
+                logger.trace(
+                    f"        첫 앵커(ID: {element.element_id}) 그룹에 하단 고아 자식 {len(initial_bottom_orphan_children)}개 추가"
+                )
+                current_group.children = (
+                    initial_bottom_orphan_children + current_group.children
+                )
+                initial_bottom_orphan_children = []
+        else:
+            if first_anchor_found:
+                if current_group is None:
+                    logger.warning(
+                        f"        앵커 없이 자식 요소(ID: {element.element_id}) 발견됨. 위치({element.y_position:.1f}) 따라 임시 고아 리스트에 추가."
+                    )
+                    if element.y_position < top_orphan_threshold_y:
+                        initial_top_orphan_children.append(element)
+                    else:
+                        initial_bottom_orphan_children.append(element)
+                else:
+                    current_group.add_child(element)
+                    logger.trace(
+                        f"        현재 그룹(앵커: {current_group.anchor.element_id if current_group.anchor else 'Orphan'})에 자식 추가 (ID: {element.element_id})"
+                    )
+            else:
+                if element.y_position < top_orphan_threshold_y:
+                    initial_top_orphan_children.append(element)
+                    logger.trace(
+                        f"        상단 고아 자식 요소(ID: {element.element_id}) 임시 저장 (Y < {top_orphan_threshold_y:.0f})"
+                    )
+                else:
+                    initial_bottom_orphan_children.append(element)
+                    logger.trace(
+                        f"        하단 고아 자식 요소(ID: {element.element_id}) 임시 저장 (Y >= {top_orphan_threshold_y:.0f})"
+                    )
+    if initial_top_orphan_children:
+        logger.trace(
+            f"        마지막 독립 상단 고아 그룹 생성 ({len(initial_top_orphan_children)}개 요소)"
+        )
+        final_groups.append(
+            ElementGroup(anchor=None, children=initial_top_orphan_children)
+        )
+    if current_group is not None and not current_group.is_empty():
+        final_groups.append(current_group)
+    elif initial_bottom_orphan_children:
+        logger.warning("        모든 요소가 하단 자식 요소임. 단일 고아 그룹 생성.")
+        final_groups.append(
+            ElementGroup(anchor=None, children=initial_bottom_orphan_children)
+        )
+    processed_anchor_ids = set(g.anchor.element_id for g in final_groups if g.anchor)
+    for anchor_id, group in groups.items():
+        if anchor_id not in processed_anchor_ids and group.anchor:
+            final_groups.append(group)
+            logger.trace(f"        미포함 앵커 그룹 추가 (수평 인접만): ID {anchor_id}")
+    final_groups.sort(
+        key=lambda g: (
+            g.anchor.y_position
+            if g.anchor
+            else (min(c.y_position for c in g.children) if g.children else float("inf"))
+        )
+    )
+    # 후처리 호출 전 그룹 ID 임시 할당
+    for idx, group in enumerate(final_groups):
+        group.group_id = idx
+    final_groups = _post_process_table_figure_assignment(final_groups)
+    logger.debug(
+        f"    순차 처리 기반 그룹핑 (+후처리) 완료: {len(final_groups)} 그룹 생성"
+    )
+    return final_groups
+def _base_case_mixed_layout(
+    zone: Zone, elements: List[MockElement], layout_type: LayoutType
+) -> List[ElementGroup]:
+    """혼합형 레이아웃 Base Case 처리 (기존과 동일)"""
+    # ... (v2.1 코드와 동일) ...
+    logger.debug(
+        f"    혼합형 Base Case 시작 ({layout_type.name}): {len(elements)}개 요소 in {zone}"
+    )
+    sorted_elements = sorted(elements, key=lambda e: (e.y_position, e.x_position))
+    final_groups: List[ElementGroup] = []
+    current_group: Optional[ElementGroup] = None
+    initial_top_orphan_children: List[MockElement] = []
+    initial_bottom_orphan_children: List[MockElement] = []
+    first_anchor_found = False
+    split_y = zone.y_min + zone.height * LAYOUT_DETECT_Y_SPLIT_POINT
+    logger.trace(f"      혼합형 Base Case Y 분할점: {split_y:.1f}")
+    for element in sorted_elements:
+        element_y_center = element.y_position + element.bbox_height / 2
+        if element.class_name in ALLOWED_ANCHORS:
+            first_anchor_found = True
+            if initial_top_orphan_children:
+                logger.trace(
+                    f"        독립적인 상단 고아 그룹 생성 ({len(initial_top_orphan_children)}개 요소)"
+                )
+                final_groups.append(
+                    ElementGroup(anchor=None, children=initial_top_orphan_children)
+                )
+                initial_top_orphan_children = []
+            if current_group is not None and not current_group.is_empty():
+                final_groups.append(current_group)
+            current_group = ElementGroup(anchor=element, children=[])
+            logger.trace(f"        새 앵커 그룹 시작 (ID: {element.element_id})")
+            if initial_bottom_orphan_children:
+                logger.trace(
+                    f"        첫 앵커(ID: {element.element_id}) 그룹에 하단 고아 자식 {len(initial_bottom_orphan_children)}개 추가"
+                )
+                current_group.children = (
+                    initial_bottom_orphan_children + current_group.children
+                )
+                initial_bottom_orphan_children = []
+        else:
+            if first_anchor_found:
+                if current_group is None:
+                    logger.warning(
+                        f"        앵커 없이 자식 요소(ID: {element.element_id}) 발견됨. 위치({element_y_center:.1f}) 따라 임시 고아 리스트에 추가."
+                    )
+                    if element_y_center < split_y:
+                        initial_top_orphan_children.append(element)
+                    else:
+                        initial_bottom_orphan_children.append(element)
+                else:
+                    current_group.add_child(element)
+                    logger.trace(
+                        f"        현재 그룹(앵커: {current_group.anchor.element_id if current_group.anchor else 'Orphan'})에 자식 추가 (ID: {element.element_id})"
+                    )
+            else:
+                if element_y_center < split_y:
+                    initial_top_orphan_children.append(element)
+                    logger.trace(
+                        f"        상단 고아 자식 요소(ID: {element.element_id}) 임시 저장"
+                    )
+                else:
+                    initial_bottom_orphan_children.append(element)
+                    logger.trace(
+                        f"        하단 고아 자식 요소(ID: {element.element_id}) 임시 저장"
+                    )
+    if initial_top_orphan_children:
+        logger.trace(
+            f"        마지막 독립 상단 고아 그룹 생성 ({len(initial_top_orphan_children)}개 요소)"
+        )
+        final_groups.append(
+            ElementGroup(anchor=None, children=initial_top_orphan_children)
+        )
+    if current_group is not None and not current_group.is_empty():
+        final_groups.append(current_group)
+    elif initial_bottom_orphan_children:
+        logger.warning("        모든 요소가 하단 자식 요소임. 단일 고아 그룹 생성.")
+        final_groups.append(
+            ElementGroup(anchor=None, children=initial_bottom_orphan_children)
+        )
+    # 후처리 호출 전 그룹 ID 임시 할당
+    for idx, group in enumerate(final_groups):
+        group.group_id = idx
+    final_groups = _post_process_table_figure_assignment(final_groups)
+    return final_groups
+# ============================================================================
+# 최종 병합 및 순서 부여 함수 (기존과 동일)
+# ============================================================================
+def flatten_groups_and_assign_order(
+    groups: List[ElementGroup], start_global_order: int, start_group_id: int
+) -> Tuple[List[MockElement], int, int]:
+    # ... (코드 동일) ...
+    """주어진 그룹 리스트를 평탄화하고 전역 순서/그룹 ID 부여"""
+    flattened = []
+    global_order = start_global_order
+    group_id_counter = start_group_id
+    logger.debug(
+        f"    평탄화 시작: {len(groups)}개 그룹 (시작 order={global_order}, group_id={group_id_counter})"
+    )
+    for group in groups:  # 최종 정렬된 그룹 순서 사용
+        # 그룹 객체의 ID는 임시 ID일 수 있으므로 여기서 최종 ID 할당
+        final_group_id = group_id_counter
+        group.group_id = final_group_id  # 로깅 및 참조용 업데이트
+        elements_in_group = group.get_all_elements_sorted()
+        logger.trace(
+            f"      그룹 {final_group_id} 평탄화 (Anchor: {group.anchor.element_id if group.anchor else 'Orphan'}, 요소 수: {len(elements_in_group)})"
+        )
+        for local_order, element in enumerate(elements_in_group):
+            try:
+                setattr(element, "order_in_question", global_order)
+                setattr(element, "group_id", final_group_id)  # 최종 그룹 ID 사용
+                setattr(element, "order_in_group", local_order)
+                flattened.append(element)
+                global_order += 1
+            except AttributeError as e:
+                logger.error(
+                    f"요소 (ID: {getattr(element, 'element_id', 'N/A')})에 정렬 속성 추가 실패: {e}"
+                )
+        group_id_counter += 1
+    logger.debug(
+        f"    평탄화 완료: {len(flattened)}개 요소 생성 (다음 order={global_order}, group_id={group_id_counter})"
+    )
+    return flattened, global_order, group_id_counter
+# ============================================================================
+# 헬퍼 함수 (기존과 동일)
+# ============================================================================
+def preprocess_elements(
+    elements: List[MockElement], document_type: str
+) -> List[MockElement]:
+    # ... (코드 동일) ...
+    """0단계 전처리"""
+    original_count = len(elements)
+    if document_type == "question_based":
+        filtered = [e for e in elements if e.class_name in ALLOWED_CLASSES]
+        logger.info(
+            f"전처리 (question_based): {original_count}개 → {len(filtered)}개 (허용 클래스 필터링)"
+        )
+    elif document_type == "reading_order":
+        filtered = elements
+        logger.info(f"전처리 (reading_order): {original_count}개 (모든 클래스 허용)")
+    else:
+        logger.warning(f"알 수 없는 문서 타입 '{document_type}', 모든 요소 반환")
+        filtered = elements
+    valid_elements = [e for e in filtered if hasattr(e, "area") and e.area > 0]
+    if len(valid_elements) < len(filtered):
+        logger.warning(
+            f"전처리: 면적이 0 이하인 요소 {len(filtered) - len(valid_elements)}개 제거"
+        )
+    return valid_elements
+def calculate_page_width(elements: List[MockElement]) -> int:
+    # ... (코드 동일) ...
+    """페이지 너비 추정"""
+    if not elements:
+        return 0
+        return max(e.bbox_x + e.bbox_width for e in elements) if elements else 0
+def calculate_page_height(elements: List[MockElement]) -> int:
+    # ... (코드 동일) ...
+    """페이지 높이 추정"""
+    if not elements:
+        return 0
+        return max(e.bbox_y + e.bbox_height for e in elements) if elements else 0
+# ============================================================================
+# DB 저장 함수 (ORM 연동)
+# ============================================================================
+def save_sorting_results_to_db(
+    db: "Session", page_id: int, sorted_elements: List["LayoutElement"]
+) -> Tuple[int, int]:
+    """
+    정렬된 LayoutElement 리스트를 question_groups와 question_elements 테이블에 저장합니다.
+    Args:
+        db: SQLAlchemy 세션
+        page_id: 페이지 ID
+        sorted_elements: sorter.py로 정렬된 LayoutElement 리스트
+                        (order_in_question, group_id 속성 필수)
+    Returns:
+        (생성된 그룹 수, 생성된 요소 수) 튜플
+    Raises:
+        ValueError: sorted_elements에 order_in_question 또는 group_id가 없는 경우
+    """
+    from .. import crud
+    from ..schemas import QuestionGroupCreate, QuestionElementCreate
+    if not sorted_elements:
+        logger.warning(f"page_id={page_id}: 정렬된 요소가 없어 DB 저장을 건너뜁니다.")
+        return 0, 0
+    # 1. 요소들을 group_id별로 그룹화
+    groups_dict: Dict[int, List["LayoutElement"]] = {}
+    for elem in sorted_elements:
+        if not hasattr(elem, "order_in_question") or not hasattr(elem, "group_id"):
+            raise ValueError(
+                f"element_id={elem.element_id}: order_in_question 또는 group_id ��성이 없습니다. "
+                "sorter.py의 flatten_groups_and_assign_order() 실행 후 호출하세요."
+            )
+        group_id = elem.group_id
+        if group_id not in groups_dict:
+            groups_dict[group_id] = []
+        groups_dict[group_id].append(elem)
+    logger.info(
+        f"page_id={page_id}: {len(groups_dict)}개 그룹, {len(sorted_elements)}개 요소를 DB에 저장 시작"
+    )
+    # 2. 각 그룹에 대해 QuestionGroup 생성
+    group_count = 0
+    element_count = 0
+    for group_id, group_elements in sorted(groups_dict.items()):
+        # 앵커 요소 찾기 (그룹 내 첫 번째 요소가 앵커)
+        anchor_elem = min(group_elements, key=lambda e: e.order_in_question)
+        # Y 범위 계산
+        start_y = min(e.y_position for e in group_elements)
+        end_y = max(
+            e.y_position + (e.bbox_height if hasattr(e, "bbox_height") else 0)
+            for e in group_elements
+        )
+        # QuestionGroup 생성
+        group_create = QuestionGroupCreate(
+            page_id=page_id,
+            anchor_element_id=anchor_elem.element_id,
+            start_y=start_y,
+            end_y=end_y,
+            element_count=len(group_elements),
+        )
+        db_group = crud.create_question_group(db, group_create)
+        group_count += 1
+        logger.debug(
+            f"  그룹 {group_id} → question_group_id={db_group.question_group_id} (앵커: {anchor_elem.element_id}, 요소 수: {len(group_elements)})"
+        )
+        # 3. 그룹 내 각 요소에 대해 QuestionElement 생성
+        for elem in group_elements:
+            element_create = QuestionElementCreate(
+                question_group_id=db_group.question_group_id,
+                element_id=elem.element_id,
+                order_in_question=elem.order_in_question + 1,
+            )
+            crud.create_question_element(db, element_create)
+            element_count += 1
+    logger.info(
+        f"page_id={page_id}: DB 저장 완료 ({group_count}개 그룹, {element_count}개 요소)"
+    )
+    return group_count, element_count

app/services/sorter_strategies.py ADDED Viewed

	@@ -0,0 +1,788 @@

+# -*- coding: utf-8 -*-
+"""
+SmartEyeSsen Sorter - Adaptive Strategy Pattern (완성)
+======================================================
+학습지 레이아웃의 특성을 자동 분석하여 최적의 정렬 전략을 선택하는 시스템입니다.
+주요 컴포넌트:
+--------------
+1. **LayoutProfiler**: 레이아웃 특성 분석 및 전략 추천
+   - global_consistency_score: 앵커의 전역적 X좌표 일관성 (0~1)
+   - horizontal_adjacency_ratio: 앵커-자식 수평 인접 비율 (0~1)
+   - layout_type: 페이지 레이아웃 구조 유형
+2. **정렬 전략 (Sorting Strategies)**:
+   - GlobalFirstStrategy: PDF처럼 전역적으로 일관된 레이아웃
+   - LocalFirstStrategy: 이미지처럼 불규칙한 레이아웃
+   - HybridStrategy: 두 전략을 병렬 실행하여 최적 결과 선택
+3. **sort_layout_elements_adaptive()**: 메인 진입점 함수
+   - force_strategy=None: 자동 전략 선택 (권장)
+   - force_strategy="GLOBAL_FIRST"|"LOCAL_FIRST"|"HYBRID": 강제 지정
+사용 예시:
+---------
+>>> from backend.app.services.sorter_strategies import sort_layout_elements_adaptive
+>>> from backend.app.services.mock_models import MockElement
+>>>
+>>> elements = [
+...     MockElement(element_id=1, class_name="question_type",
+...                 bbox_x=50, bbox_y=100, bbox_width=200, bbox_height=30),
+...     MockElement(element_id=2, class_name="question_text",
+...                 bbox_x=50, bbox_y=140, bbox_width=400, bbox_height=50),
+... ]
+>>>
+>>> # 자동 전략 선택 (권장)
+>>> sorted_elements = sort_layout_elements_adaptive(
+...     elements=elements,
+...     document_type="question_based",
+...     page_width=2480,
+...     page_height=3508,
+...     force_strategy=None
+... )
+>>>
+>>> # 정렬 결과
+>>> for elem in sorted_elements:
+...     print(f"Element {elem.element_id}: group={elem.group_id}, order={elem.order_in_group}")
+구현 단계:
+---------
+- ✅ Phase 1: GlobalFirstStrategy, LocalFirstStrategy, 강제 전략 선택
+- ✅ Phase 2: LayoutProfiler, 자동 전략 선택
+- ✅ Phase 3: HybridStrategy, 회귀 테스트, 파이프라인 통합
+자세한 API 문서는 `docs/sorter_adaptive_strategy_api.md`를 참조하세요.
+작성일: 2025-10-31
+버전: v3.0
+"""
+from abc import ABC, abstractmethod
+import copy
+import os
+from typing import List, Optional, Dict, Tuple
+from dataclasses import dataclass, field
+from enum import Enum, auto
+import numpy as np
+from sklearn.cluster import KMeans
+from loguru import logger
+def _env_float(name: str, default: float) -> float:
+    """환경 변수 값을 부동소수점으로 파싱하며 실패 시 기본값을 반환"""
+    value = os.getenv(name)
+    if value is None or value == "":
+        return default
+    try:
+        return float(value)
+    except ValueError:
+        logger.warning(
+            "환경 변수 %s 값 '%s'을(를) float로 변환하지 못했습니다. 기본값 %.2f을 사용합니다.",
+            name,
+            value,
+            default,
+        )
+        return default
+DEFAULT_BASE_DPI = _env_float("LAYOUT_ANALYSIS_BASE_DPI", 300.0)
+BASE_PAGE_HEIGHT_INCHES = _env_float("LAYOUT_BASE_PAGE_HEIGHT_INCHES", 11.69)
+BASE_PAGE_WIDTH_INCHES = _env_float("LAYOUT_BASE_PAGE_WIDTH_INCHES", 8.27)
+MIN_EFFECTIVE_DPI = _env_float("LAYOUT_MIN_EFFECTIVE_DPI", 120.0)
+MAX_EFFECTIVE_DPI = _env_float("LAYOUT_MAX_EFFECTIVE_DPI", 600.0)
+GLOBAL_WIDTH_INCH_THRESHOLD = _env_float("LAYOUT_GLOBAL_WIDTH_INCH_THRESHOLD", 9.0)
+GLOBAL_WIDTH_INCH_MARGIN = _env_float("LAYOUT_GLOBAL_WIDTH_INCH_MARGIN", 0.75)
+# sorter.py의 모든 함수와 클래스 임포트
+from .sorter import (
+    _sort_layout_elements_v24 as _sort_layout_elements_new,
+    MockElement,
+    ElementGroup,
+    Zone,
+    VerticalSplit,
+    LayoutType,
+    ALLOWED_ANCHORS,
+    ALLOWED_CHILDREN,
+    ALLOWED_CLASSES,
+    MIN_ANCHORS_FOR_SPLIT,
+    KMEANS_N_CLUSTERS,
+    KMEANS_CLUSTER_SEPARATION_MIN,
+    HORIZONTAL_ADJACENCY_Y_CENTER_RATIO,
+    HORIZONTAL_ADJACENCY_X_PROXIMITY,
+    BASE_CASE_TOP_ORPHAN_THRESHOLD_RATIO,
+    POST_PROCESS_CLOSENESS_RATIO,
+    POST_PROCESS_LOOKAHEAD,
+    detect_layout_type,
+    preprocess_elements,
+    calculate_page_width,
+    calculate_page_height,
+    flatten_groups_and_assign_order,
+    _post_process_table_figure_assignment,
+    _sort_recursive_by_layout,
+    find_wide_question_type,
+    find_horizontal_split_by_type,
+    find_horizontal_split_by_y_gap,
+    _sort_standard_2_column,
+    _base_case_mixed_layout,
+)
+from .sorter_구버전 import sort_layout_elements as _sort_layout_elements_legacy
+# ============================================================================
+# Enum 및 Dataclass 정의
+# ============================================================================
+class SortingStrategyType(Enum):
+    """정렬 전략 타입"""
+    GLOBAL_FIRST = auto()  # 전역 우선 (신규 로직)
+    LOCAL_FIRST = auto()  # 로컬 우선 (구 로직)
+    HYBRID = auto()  # 혼합형 (Phase 3)
+# ============================================================================
+# Strategy 인터페이스
+# ============================================================================
+class SortingStrategy(ABC):
+    """정렬 전략 추상 인터페이스"""
+    @abstractmethod
+    def sort(
+        self,
+        elements: List[MockElement],
+        document_type: str,
+        page_width: int,
+        page_height: int,
+    ) -> List[MockElement]:
+        """
+        레이아웃 요소 정렬
+        Args:
+            elements: 정렬할 요소 리스트
+            document_type: 문서 타입 ("question_based" 또는 "reading_order")
+            page_width: 페이지 너비
+            page_height: 페이지 높이
+        Returns:
+            정렬된 요소 리스트 (group_id, order_in_group 할당됨)
+        """
+        pass
+# ============================================================================
+# GlobalFirstStrategy (신규 로직)
+# ============================================================================
+class GlobalFirstStrategy(SortingStrategy):
+    """
+    전역 우선 전략 (Global-First Strategy)
+    현재 sorter.py의 로직을 그대로 사용:
+    - 오른쪽 칼럼 시작점 기준 수직 분할
+    - 2D 거리 기반 그룹핑 적용
+    PDF와 같은 전역적으로 일관된 레이아웃에 효과적
+    """
+    def sort(
+        self,
+        elements: List[MockElement],
+        document_type: str,
+        page_width: int,
+        page_height: int,
+    ) -> List[MockElement]:
+        """현재 sorter.py (v2.4) 로직 직접 호출"""
+        logger.info("[GlobalFirstStrategy] 전역 우선 전략 실행 중 (v2.4 코어 호출)")
+        return _sort_layout_elements_new(
+            elements=elements,
+            document_type=document_type,
+            page_width=page_width,
+            page_height=page_height,
+        )
+# ============================================================================
+# LocalFirstStrategy (구 버전 로직)
+# ============================================================================
+class LocalFirstStrategy(SortingStrategy):
+    """
+    로컬 우선 전략 (Local-First Strategy)
+    구 버전 sorter_구버전.py의 정렬 함수를 그대로 호출한다.
+    """
+    def sort(
+        self,
+        elements: List[MockElement],
+        document_type: str,
+        page_width: int,
+        page_height: int,
+    ) -> List[MockElement]:
+        logger.info("[LocalFirstStrategy] 로컬 우선 전략 실행 중 (구버전 직접 호출)")
+        return _sort_layout_elements_legacy(
+            elements=elements,
+            document_type=document_type,
+            page_width=page_width,
+            page_height=page_height,
+        )
+class HybridStrategy(SortingStrategy):
+    """
+    혼합 전략 (Hybrid Strategy)
+    전역/로컬 전략을 모두 실행한 뒤 그룹 품질을 평가하여 더 일관된 결과를 선택한다.
+    """
+    _COLUMN_OFFSET_RATIO = 0.4  # 앵커와 자식 간 허용 가능한 X 거리 비율
+    def __init__(self) -> None:
+        self._global_strategy = GlobalFirstStrategy()
+        self._local_strategy = LocalFirstStrategy()
+    def sort(
+        self,
+        elements: List[MockElement],
+        document_type: str,
+        page_width: int,
+        page_height: int,
+    ) -> List[MockElement]:
+        logger.info("[HybridStrategy] 혼합 전략 실행 시작")
+        # 전략별로 깊은 복사하여 독립적으로 실행
+        global_input = copy.deepcopy(elements)
+        local_input = copy.deepcopy(elements)
+        global_result = self._global_strategy.sort(
+            elements=global_input,
+            document_type=document_type,
+            page_width=page_width,
+            page_height=page_height,
+        )
+        local_result = self._local_strategy.sort(
+            elements=local_input,
+            document_type=document_type,
+            page_width=page_width,
+            page_height=page_height,
+        )
+        global_penalty = self._score_grouping(global_result, page_width)
+        local_penalty = self._score_grouping(local_result, page_width)
+        logger.info(
+            "[HybridStrategy] 평가 점수 비교 - Global: %.3f, Local: %.3f",
+            global_penalty,
+            local_penalty,
+        )
+        if global_penalty <= local_penalty:
+            logger.info("[HybridStrategy] GlobalFirstStrategy 결과採用")
+            return global_result
+        logger.info("[HybridStrategy] LocalFirstStrategy 결과採用")
+        return local_result
+    def _score_grouping(self, elements: List[MockElement], page_width: int) -> float:
+        """
+        그룹 품질을 평가하여 점수(낮을수록 좋음)를 계산한다.
+        - 앵커가 없는 그룹, 자식이 없는 앵커, 고아 자식 등을 패널티로 부여한다.
+        """
+        if not elements:
+            return float("inf")
+        anchors = [e for e in elements if e.class_name in ALLOWED_ANCHORS]
+        children = [e for e in elements if e.class_name in ALLOWED_CHILDREN]
+        anchor_ids = {anchor.element_id for anchor in anchors}
+        groups: Dict[int, List[MockElement]] = {}
+        for elem in elements:
+            group_id = getattr(elem, "group_id", None)
+            if group_id is None:
+                continue
+            groups.setdefault(group_id, []).append(elem)
+        penalty = 0.0
+        assigned_anchors = set()
+        grouped_child_ids = set()
+        x_threshold = page_width * self._COLUMN_OFFSET_RATIO if page_width else None
+        for group_id, group_elems in groups.items():
+            anchor = next(
+                (e for e in group_elems if e.class_name in ALLOWED_ANCHORS), None
+            )
+            if anchor is None:
+                # 앵커가 없는 그룹은 큰 패널티
+                penalty += 5.0
+                penalty += sum(
+                    1.5 for e in group_elems if e.class_name in ALLOWED_CHILDREN
+                )
+                continue
+            assigned_anchors.add(anchor.element_id)
+            anchor_cx = anchor.bbox_x + anchor.bbox_width / 2
+            anchor_cy = anchor.bbox_y + anchor.bbox_height / 2
+            children_in_group = [
+                e for e in group_elems if e.class_name in ALLOWED_CHILDREN
+            ]
+            if not children_in_group:
+                penalty += 1.0  # 앵커에 자식이 전혀 없는 경우 경미한 패널티
+            for child in children_in_group:
+                grouped_child_ids.add(child.element_id)
+                child_cx = child.bbox_x + child.bbox_width / 2
+                child_cy = child.bbox_y + child.bbox_height / 2
+                if child_cy < anchor_cy:
+                    penalty += 1.0  # 자식이 앵커보다 위에 위치한 경우
+                if x_threshold and abs(child_cx - anchor_cx) > x_threshold:
+                    penalty += 0.5  # 칼럼을 심하게 넘나드는 자식
+        # 그룹에 배정되지 않은 자식 요소
+        orphan_children = [
+            child for child in children if child.element_id not in grouped_child_ids
+        ]
+        penalty += len(orphan_children) * 2.0
+        # 어떤 그룹에도 속하지 않은 앵커 (고아 앵커)
+        unassigned_anchors = anchor_ids - assigned_anchors
+        penalty += len(unassigned_anchors) * 1.5
+        return penalty
+@dataclass
+class LayoutProfile:
+    """레이아웃 프로파일 (Phase 2 확장 버전)"""
+    global_consistency_score: float = 0.0
+    anchor_x_std: float = 0.0
+    horizontal_adjacency_ratio: float = 0.0
+    anchor_count: int = 0
+    layout_type: LayoutType = LayoutType.STANDARD_1_COLUMN
+    page_width: int = 0
+    page_height: int = 0
+    anchor_y_variance: float = 0.0
+    recommended_strategy: SortingStrategyType = field(
+        default=SortingStrategyType.GLOBAL_FIRST
+    )
+    effective_dpi: float = 0.0
+    width_in_inches: float = 0.0
+    height_in_inches: float = 0.0
+# ============================================================================
+# LayoutProfiler (Phase 2 구현)
+# ============================================================================
+class LayoutProfiler:
+    """
+    레이아웃 프로파일 분석기
+    입력 요소들의 전역/로컬 패턴을 분석하여 최적 전략을 추천한다.
+    """
+    @staticmethod
+    def analyze(
+        elements: List[MockElement],
+        page_width: Optional[int],
+        page_height: Optional[int],
+        page_dpi: Optional[float] = None,
+    ) -> LayoutProfile:
+        """레이아웃 특성 분석 및 전략 추천"""
+        logger.info("[LayoutProfiler] 레이아웃 분석 시작...")
+        anchors = [e for e in elements if e.class_name in ALLOWED_ANCHORS]
+        children = [e for e in elements if e.class_name in ALLOWED_CHILDREN]
+        if not page_width or page_width <= 0:
+            page_width = (
+                int(max(e.bbox_x + e.bbox_width for e in elements)) if elements else 0
+            )
+        if not page_height or page_height <= 0:
+            page_height = (
+                int(max(e.bbox_y + e.bbox_height for e in elements)) if elements else 0
+            )
+        effective_dpi = LayoutProfiler._resolve_effective_dpi(
+            page_dpi, page_width, page_height
+        )
+        width_in_inches, height_in_inches = LayoutProfiler._compute_physical_dimensions(
+            page_width, page_height, effective_dpi
+        )
+        # 1. 앵커 통계
+        if len(anchors) >= 2:
+            anchor_x_centers = [a.bbox_x + a.bbox_width / 2 for a in anchors]
+            anchor_x_std = float(np.std(anchor_x_centers))
+            anchor_y_centers = [a.bbox_y + a.bbox_height / 2 for a in anchors]
+            anchor_y_variance = float(np.var(anchor_y_centers))
+        else:
+            anchor_x_std = 0.0
+            anchor_y_variance = 0.0
+        anchor_count = len(anchors)
+        # 2. 전역 일관성 점수
+        max_x_std = page_width * 0.3 if page_width else 0.0
+        global_consistency_score = (
+            max(0.0, 1.0 - (anchor_x_std / max_x_std)) if max_x_std > 0 else 0.5
+        )
+        # 3. 수평 인접 비율
+        horizontal_adjacency_count = 0
+        if anchors and children:
+            for anchor in anchors:
+                anchor_cy = anchor.bbox_y + anchor.bbox_height / 2
+                anchor_right_x = anchor.bbox_x + anchor.bbox_width
+                for child in children:
+                    child_cy = child.bbox_y + child.bbox_height / 2
+                    child_left_x = child.bbox_x
+                    y_diff = abs(anchor_cy - child_cy)
+                    y_threshold = (
+                        (anchor.bbox_height + child.bbox_height)
+                        / 2
+                        * HORIZONTAL_ADJACENCY_Y_CENTER_RATIO
+                        if (anchor.bbox_height + child.bbox_height) > 0
+                        else 0
+                    )
+                    gap_right = child_left_x - anchor_right_x
+                    if (
+                        y_diff < y_threshold
+                        and abs(gap_right) < HORIZONTAL_ADJACENCY_X_PROXIMITY
+                    ):
+                        horizontal_adjacency_count += 1
+                        break
+        horizontal_adjacency_ratio = (
+            horizontal_adjacency_count / anchor_count if anchor_count else 0.0
+        )
+        # 4. 레이아웃 유형 판별
+        layout_type = detect_layout_type(elements, page_width, page_height)
+        # 5. 전략 추천
+        recommended_strategy = LayoutProfiler._recommend_strategy(
+            consistency=global_consistency_score,
+            anchor_x_std=anchor_x_std,
+            horizontal_adjacency_ratio=horizontal_adjacency_ratio,
+            layout_type=layout_type,
+            anchor_count=anchor_count,
+            page_width=page_width,
+            page_height=page_height,
+            anchor_y_variance=anchor_y_variance,
+            effective_dpi=effective_dpi,
+            width_in_inches=width_in_inches,
+            height_in_inches=height_in_inches,
+        )
+        logger.info(
+            "[LayoutProfiler] 분석 완료: "
+            f"consistency={global_consistency_score:.3f}, "
+            f"adjacency={horizontal_adjacency_ratio:.3f}, "
+            f"anchors={anchor_count}, "
+            f"layout={layout_type.name}, "
+            f"dpi={effective_dpi:.1f}, "
+            f"width_in={width_in_inches:.2f}, "
+            f"추천 전략={recommended_strategy.name}"
+        )
+        return LayoutProfile(
+            global_consistency_score=global_consistency_score,
+            anchor_x_std=anchor_x_std,
+            horizontal_adjacency_ratio=horizontal_adjacency_ratio,
+            anchor_count=anchor_count,
+            layout_type=layout_type,
+            page_width=page_width,
+            page_height=page_height,
+            anchor_y_variance=anchor_y_variance,
+            recommended_strategy=recommended_strategy,
+            effective_dpi=effective_dpi,
+            width_in_inches=width_in_inches,
+            height_in_inches=height_in_inches,
+        )
+    @staticmethod
+    def _recommend_strategy(
+        consistency: float,
+        anchor_x_std: float,
+        horizontal_adjacency_ratio: float,
+        layout_type: LayoutType,
+        anchor_count: int,
+        page_width: int,
+        page_height: int,
+        anchor_y_variance: float,
+        effective_dpi: float,
+        width_in_inches: float,
+        height_in_inches: float,
+    ) -> SortingStrategyType:
+        """전략 추천 로직 (Phase 2)"""
+        # 명확한 2단 구조
+        if layout_type == LayoutType.STANDARD_2_COLUMN:
+            if 0.4 <= horizontal_adjacency_ratio < 0.6 and 0.4 <= consistency <= 0.75:
+                return SortingStrategyType.HYBRID
+            if horizontal_adjacency_ratio >= 0.6:
+                if LayoutProfiler._prefer_global_for_column_layout(
+                    width_in_inches, effective_dpi
+                ):
+                    return SortingStrategyType.GLOBAL_FIRST
+                return SortingStrategyType.LOCAL_FIRST
+            if horizontal_adjacency_ratio < 0.4:
+                return SortingStrategyType.LOCAL_FIRST
+            if anchor_count < 8:
+                return SortingStrategyType.LOCAL_FIRST
+            return (
+                SortingStrategyType.GLOBAL_FIRST
+                if consistency >= 0.6
+                else SortingStrategyType.LOCAL_FIRST
+            )
+        if layout_type in (
+            LayoutType.MIXED_TOP1_BOTTOM2,
+            LayoutType.MIXED_TOP2_BOTTOM1,
+        ):
+            if horizontal_adjacency_ratio >= 0.5:
+                return SortingStrategyType.HYBRID
+            return SortingStrategyType.LOCAL_FIRST
+        if layout_type == LayoutType.HORIZONTAL_SEP_PRESENT:
+            return (
+                SortingStrategyType.LOCAL_FIRST
+                if horizontal_adjacency_ratio >= 0.4
+                else SortingStrategyType.GLOBAL_FIRST
+            )
+        if horizontal_adjacency_ratio > 0.5:
+            return SortingStrategyType.LOCAL_FIRST
+        if consistency > 0.75:
+            return SortingStrategyType.GLOBAL_FIRST
+        if consistency < 0.4:
+            return SortingStrategyType.LOCAL_FIRST
+        if 0.35 <= horizontal_adjacency_ratio <= 0.65:
+            return SortingStrategyType.HYBRID
+        return SortingStrategyType.GLOBAL_FIRST
+    @staticmethod
+    def _resolve_effective_dpi(
+        provided_dpi: Optional[float],
+        page_width: int,
+        page_height: int,
+    ) -> float:
+        """
+        DPI 정보를 인자로 전달받지 못한 경우 페이지 높이를 기반으로 추정한다.
+        기본값과 최소/최대 한계를 두어 극단값을 방지한다.
+        """
+        if provided_dpi and provided_dpi > 0:
+            return float(provided_dpi)
+        if page_height and page_height > 0:
+            estimated = page_height / BASE_PAGE_HEIGHT_INCHES
+            return min(max(estimated, MIN_EFFECTIVE_DPI), MAX_EFFECTIVE_DPI)
+        return DEFAULT_BASE_DPI
+    @staticmethod
+    def _compute_physical_dimensions(
+        page_width: int, page_height: int, effective_dpi: float
+    ) -> Tuple[float, float]:
+        """픽셀 단위 크기를 인치 단위로 변환한다."""
+        dpi = effective_dpi if effective_dpi > 0 else DEFAULT_BASE_DPI
+        width_in = page_width / dpi if page_width else 0.0
+        height_in = page_height / dpi if page_height else 0.0
+        return width_in, height_in
+    @staticmethod
+    def _prefer_global_for_column_layout(
+        width_in_inches: float, effective_dpi: float
+    ) -> bool:
+        """
+        2단 구조에서 Global 전략을 우선해야 하는지 판단한다.
+        페이지 폭이 특정 임계값 이하이거나 DPI가 낮아 스캔본 특성이 강할 때 Global을 선호한다.
+        """
+        if width_in_inches <= 0:
+            return True
+        if width_in_inches <= GLOBAL_WIDTH_INCH_THRESHOLD:
+            return True
+        dpi_ratio = effective_dpi / DEFAULT_BASE_DPI if DEFAULT_BASE_DPI else 1.0
+        adjusted_threshold = GLOBAL_WIDTH_INCH_THRESHOLD * (1 + GLOBAL_WIDTH_INCH_MARGIN)
+        if dpi_ratio <= 0.75 and width_in_inches <= adjusted_threshold:
+            return True
+        return False
+# ============================================================================
+# Strategy Factory
+# ============================================================================
+class SortingStrategyFactory:
+    """전략 인스턴스 생성 팩토리"""
+    _strategies: Dict[SortingStrategyType, SortingStrategy] = {
+        SortingStrategyType.GLOBAL_FIRST: GlobalFirstStrategy(),
+        SortingStrategyType.LOCAL_FIRST: LocalFirstStrategy(),
+        SortingStrategyType.HYBRID: HybridStrategy(),
+    }
+    @classmethod
+    def get_strategy(cls, strategy_type: SortingStrategyType) -> SortingStrategy:
+        """전략 타입에 따라 전략 인스턴스 반환"""
+        if strategy_type not in cls._strategies:
+            raise ValueError(f"지원되지 않는 전략 타입: {strategy_type}")
+        return cls._strategies[strategy_type]
+# ============================================================================
+# Adaptive 메인 함수 (Phase 1)
+# ============================================================================
+def sort_layout_elements_adaptive(
+    elements: List[MockElement],
+    document_type: str,
+    page_width: Optional[int] = None,
+    page_height: Optional[int] = None,
+    force_strategy: Optional[str] = None,
+    page_dpi: Optional[float] = None,
+) -> List[MockElement]:
+    """
+    Adaptive 정렬 함수 - 레이아웃 특성을 분석하여 최적 전략을 선택하고 정렬 실행
+    레이아웃 요소들의 구조적 특성(전역 일관성, 수평 인접성, 레이아웃 유형)을 분석하여
+    GlobalFirstStrategy, LocalFirstStrategy, HybridStrategy 중 최적의 전략을 선택합니다.
+    Args:
+        elements: 정렬할 레이아웃 요소 리스트 (MockElement 객체)
+        document_type: 문서 타입
+            - "question_based": 학습지 (앵커-자식 그룹핑 적용)
+            - "reading_order": 일반 문서 (단순 읽기 순서)
+        page_width: 페이지 너비 (픽셀). None이면 요소 bbox에서 자동 계산
+        page_height: 페이지 높이 (픽셀). None이면 요소 bbox에서 자동 계산
+        page_dpi: 페이지 DPI. 지정하지 않으면 LayoutProfiler가 높이를 기반으로 추정
+        force_strategy: 강제 전략 지정 (테스트 또는 디버깅용)
+            - None (기본값): LayoutProfiler가 자동으로 전략 선택 (권장)
+            - "GLOBAL_FIRST": GlobalFirstStrategy 강제 사용
+            - "LOCAL_FIRST": LocalFirstStrategy 강제 사용
+            - "HYBRID": HybridStrategy 강제 ���용
+    Returns:
+        정렬된 요소 리스트. 각 요소에 다음 속성이 할당됨:
+            - group_id (int): 그룹 번호 (0부터 시작)
+            - order_in_group (int): 그룹 내 순서 (0부터 시작)
+    Raises:
+        ValueError: 유효하지 않은 force_strategy 값 (자동으로 GLOBAL_FIRST로 폴백)
+    Examples:
+        >>> # 자동 전략 선택 (권장)
+        >>> sorted_elements = sort_layout_elements_adaptive(
+        ...     elements=elements,
+        ...     document_type="question_based",
+        ...     page_width=2480,
+        ...     page_height=3508,
+        ...     force_strategy=None
+        ... )
+        >>> # 강제 전략 지정 (테스트 또는 디버깅)
+        >>> sorted_elements = sort_layout_elements_adaptive(
+        ...     elements=elements,
+        ...     document_type="question_based",
+        ...     page_width=2480,
+        ...     page_height=3508,
+        ...     force_strategy="GLOBAL_FIRST"
+        ... )
+    Notes:
+        - 자동 선택 시 LayoutProfiler가 레이아웃을 분석하여 최적 전략 추천
+        - 분석 오버헤드: < 5ms (전체 실행 시간의 < 5%)
+        - HybridStrategy는 두 전략을 병렬 실행하므로 실행 시간 약 2배
+        - 상세한 로그는 loguru logger를 통해 출력됨
+    See Also:
+        - LayoutProfiler.analyze(): 레이아웃 특성 분석
+        - GlobalFirstStrategy: PDF 레이아웃 전략
+        - LocalFirstStrategy: 이미지 레이아웃 전략
+        - HybridStrategy: 혼합 전략
+    """
+    logger.info("=" * 80)
+    logger.info("[Adaptive Sorter] Phase 1 프로토타입 실행")
+    logger.info(f"  - 강제 전략: {force_strategy if force_strategy else '자동 선택'}")
+    logger.info("=" * 80)
+    filtered_elements = preprocess_elements(elements, document_type)
+    if not filtered_elements:
+        logger.warning("전처리 후 정렬할 요소가 없습니다.")
+        return []
+    if not page_width or page_width <= 0:
+        page_width = (
+            int(max(e.bbox_x + e.bbox_width for e in filtered_elements))
+            if filtered_elements
+            else 0
+        )
+    if not page_height or page_height <= 0:
+        page_height = (
+            int(max(e.bbox_y + e.bbox_height for e in filtered_elements))
+            if filtered_elements
+            else 0
+        )
+    dpi_log_value = (
+        f"{page_dpi:.1f}" if page_dpi and page_dpi > 0 else "auto (height-based)"
+    )
+    logger.info(
+        f"[Adaptive] 페이지 크기 추정: {page_width} x {page_height} / dpi={dpi_log_value}"
+    )
+    # Phase 1: 강제 전략 사용
+    if force_strategy:
+        try:
+            strategy_type = SortingStrategyType[force_strategy.upper()]
+            logger.info(f"[Adaptive] 강제 전략 사용: {strategy_type.name}")
+        except KeyError:
+            logger.error(f"유효하지 않은 전략 이름: {force_strategy}")
+            logger.info("기본 전략(GLOBAL_FIRST) 사용")
+            strategy_type = SortingStrategyType.GLOBAL_FIRST
+    else:
+        # Phase 2: 자동 선택
+        profile = LayoutProfiler.analyze(
+            filtered_elements, page_width, page_height, page_dpi
+        )
+        logger.info(f"[Adaptive] 자동 전략 선택: {profile.recommended_strategy.name}")
+        strategy_type = profile.recommended_strategy
+    # 전략 실행
+    strategy = SortingStrategyFactory.get_strategy(strategy_type)
+    sorted_elements = strategy.sort(
+        elements=elements,
+        document_type=document_type,
+        page_width=page_width,
+        page_height=page_height,
+    )
+    logger.info(f"[Adaptive Sorter] 완료: {len(sorted_elements)}개 요소 정렬됨")
+    logger.info("=" * 80)
+    return sorted_elements

app/services/sorter_구버전.py ADDED Viewed

	@@ -0,0 +1,1316 @@

+# -*- coding: utf-8 -*-
+"""
+SmartEyeSsen Layout Sorter (v.LayoutDetect.2.4 - Tie-breaker in Post-processing)
+=================================================================================
+문제 레이아웃 정렬 알고리즘 구현 (Layout Type Detection 기반 Hybrid)
+페이지 전체 레이아웃 유형(1단, 2단, 혼합형 등)을 먼저 판별하고,
+유형에 맞는 분할 전략(수평/수직) 적용.
+분할 실패 시(Base Case), 레이아웃 유형별로 특화된 그룹핑 로직 호출.
+- 표준 1단/2단 컬럼: _base_case_standard_1_column
+- 혼합형: _base_case_mixed_layout
+최종 병합 시 전역 고아 그룹 처리 로직 적용.
+알고리즘 흐름: (v.LayoutDetect.2.1/2.2/2.3과 동일)
+0. 전처리
+1. 레이아웃 유형 판별
+2. 유형별 재귀 처리
+3. Base Case 처리 (후처리 포함)
+4. 최종 병합 및 순서 부여
+v.LayoutDetect.2.4:
+- _post_process_table_figure_assignment: 최적 그룹 탐색 시 Y 거리가 동일할 경우 더 뒤쪽 그룹을 우선하는 Tie-breaker 추가.
+- sort_layout_elements: 후처리 호출 전에 임시 그룹 ID 할당하여 로그 가독성 개선.
+- (v2.3 변경 유지) _post_process_table_figure_assignment: 최적 그룹 탐색 로직 (Lookahead).
+- (v2.2 변경 유지) _post_process_table_figure_assignment: 이동 조건은 거리 비교 로직 사용.
+- (v2.1 변경 유지) _post_process_table_figure_assignment: y_diff_threshold 기본값 150.
+- (v2.1 변경 유지) _base_case_standard_1_column: 상단 고아 요소 분리 로직.
+"""
+# 필요한 라이브러리 임포트
+from typing import List, Dict, Tuple, Optional, Any, Union
+from dataclasses import dataclass, field
+import numpy as np
+from sklearn.cluster import KMeans
+from loguru import logger
+import math
+from enum import Enum, auto
+# Mock 모델 임포트
+from .mock_models import MockElement
+# ============================================================================
+# 데이터 클래스 및 Enum 정의 (기존과 동일)
+# ============================================================================
+class LayoutType(Enum):
+    STANDARD_1_COLUMN = auto()
+    STANDARD_2_COLUMN = auto()
+    MIXED_TOP1_BOTTOM2 = auto()
+    MIXED_TOP2_BOTTOM1 = auto()
+    HORIZONTAL_SEP_PRESENT = auto()
+    READING_ORDER = auto()
+    UNKNOWN = auto()
+@dataclass
+class Zone:
+    x_min: int
+    y_min: int
+    x_max: int
+    y_max: int
+    @property
+    def width(self) -> int:
+        return max(0, self.x_max - self.x_min)
+    @property
+    def height(self) -> int:
+        return max(0, self.y_max - self.y_min)
+    def __repr__(self) -> str:
+        return f"Zone(x=[{self.x_min}, {self.x_max}), y=[{self.y_min}, {self.y_max}))"
+@dataclass
+class HorizontalSplit:
+    top_zone: Zone
+    bottom_zone: Zone
+    separator_element: MockElement
+@dataclass
+class HorizontalSplitYGap:
+    top_zone: Zone
+    bottom_zone: Zone
+    split_y: float
+@dataclass
+class VerticalSplit:
+    left_zone: Zone
+    right_zone: Zone
+    gutter_x: float
+@dataclass
+class ElementGroup:
+    anchor: Optional[MockElement]
+    children: List[MockElement] = field(default_factory=list)
+    group_id: int = -1  # flatten 함수에서 최종 할당, 후처리 전 임시 할당
+    def add_child(self, child: MockElement):
+        self.children.append(child)
+    def get_all_elements_sorted(self) -> List[MockElement]:
+        """
+        그룹 내 요소들을 정렬합니다.
+        - 앵커(Anchor)가 항상 가장 먼저 위치합니다.
+        - 나머지 자식(Children) 요소들은 (Y, X) 좌표 순으로 정렬됩니다.
+        """
+        # 1. 앵커가 존재하면 리스트의 첫 요소로 설정합니다.
+        elements = [self.anchor] if self.anchor else []
+        # 2. 자식 요소들을 (Y, X) 좌표 기준으로 정렬합니다.
+        sorted_children = sorted(
+            self.children, key=lambda e: (e.y_position, e.x_position)
+        )
+        # 3. 앵커 요소 뒤에 정렬된 자식 요소들을 추가합니다.
+        elements.extend(sorted_children)
+        return elements
+    def is_empty(self) -> bool:
+        return self.anchor is None and not self.children
+    def __repr__(self) -> str:
+        anchor_id = self.anchor.element_id if self.anchor else "Orphan"
+        child_ids = sorted([c.element_id for c in self.children])
+        # flatten 전에는 group_id가 임시값일 수 있음
+        return f"Group(ID:{self.group_id}, Anchor: {anchor_id}, Children: {child_ids})"
+# ============================================================================
+# 상수 정의 (기존과 동일)
+# ============================================================================
+ALLOWED_ANCHORS = ["question type", "question number", "second_question_number"]
+ALLOWED_CHILDREN = ["question text", "list", "choices", "figure", "table", "flowchart"]
+ALLOWED_CLASSES = ALLOWED_ANCHORS + ALLOWED_CHILDREN
+HORIZONTAL_SEP_WIDTH_THRESHOLD = 0.8
+HORIZONTAL_SEP_Y_POS_THRESHOLD = 0.15
+MIN_ANCHORS_FOR_SPLIT = 2
+VERTICAL_GAP_THRESHOLD_RATIO = 1.5
+VERTICAL_GAP_THRESHOLD_ABS = 100
+KMEANS_N_CLUSTERS = 2
+KMEANS_CLUSTER_SEPARATION_MIN = 50
+LAYOUT_DETECT_Y_SPLIT_POINT = 0.4
+LAYOUT_DETECT_X_STD_THRESHOLD_RATIO = 0.1
+HORIZONTAL_ADJACENCY_Y_CENTER_RATIO = 0.7
+HORIZONTAL_ADJACENCY_X_PROXIMITY = 50
+BASE_CASE_TOP_ORPHAN_THRESHOLD_RATIO = 0.15
+POST_PROCESS_CLOSENESS_RATIO = 0.5
+POST_PROCESS_LOOKAHEAD = 2
+# ============================================================================
+# 메인 함수: 레이아웃 유형 판별 후 정렬 (수정됨)
+# ============================================================================
+def sort_layout_elements(
+    elements: List[MockElement],
+    document_type: str = "question_based",
+    page_width: Optional[int] = None,
+    page_height: Optional[int] = None,
+) -> List[MockElement]:
+    """
+    레이아웃 유형 판별 후 맞춤형 정렬 로직 적용 (v.LayoutDetect.2.4)
+    """
+    logger.info(
+        f"맞춤형 정렬(v.LayoutDetect.2.4) 시작: {len(elements)}개 요소, 타입={document_type}"
+    )
+    filtered_elements = preprocess_elements(elements, document_type)
+    if not filtered_elements:
+        logger.warning("전처리 후 정렬할 요소가 없습니다.")
+        return []
+    if page_width is None:
+        page_width = calculate_page_width(filtered_elements)
+    if page_height is None:
+        page_height = calculate_page_height(filtered_elements)
+    logger.info(f"페이지 크기: {page_width} x {page_height}")
+    initial_zone = Zone(x_min=0, y_min=0, x_max=page_width, y_max=page_height)
+    grouped_results: List[ElementGroup] = []
+    try:
+        if document_type == "reading_order":
+            layout_type = LayoutType.READING_ORDER
+            logger.info(f"판별된 레이아웃 유형: {layout_type.name} (문서 타입 지정)")
+            sorted_elements_reading = sorted(
+                filtered_elements, key=lambda e: (e.y_position, e.x_position)
+            )
+            grouped_results = [
+                ElementGroup(anchor=None, children=[elem])
+                for elem in sorted_elements_reading
+            ]
+        else:
+            layout_type = detect_layout_type(filtered_elements, page_width, page_height)
+            logger.info(f"판별된 레이아웃 유형: {layout_type.name}")
+            if layout_type == LayoutType.STANDARD_1_COLUMN:
+                logger.debug(
+                    f"{layout_type.name}: 분할 없이 전체 구역 표준 1단 Base Case 실행"
+                )
+                grouped_results = _base_case_standard_1_column(
+                    initial_zone, filtered_elements
+                )
+            elif layout_type == LayoutType.STANDARD_2_COLUMN:
+                grouped_results = _sort_standard_2_column(
+                    initial_zone, filtered_elements
+                )
+            elif layout_type in [
+                LayoutType.HORIZONTAL_SEP_PRESENT,
+                LayoutType.MIXED_TOP1_BOTTOM2,
+                LayoutType.MIXED_TOP2_BOTTOM1,
+                LayoutType.UNKNOWN,
+            ]:
+                grouped_results = _sort_recursive_by_layout(
+                    initial_zone, filtered_elements, layout_type, depth=0
+                )
+            else:
+                logger.error(
+                    f"처리할 수 없는 레이아웃 유형: {layout_type.name}. (Y,X) 정렬로 대체합니다."
+                )
+                sorted_elements_fallback = sorted(
+                    filtered_elements, key=lambda e: (e.y_position, e.x_position)
+                )
+                grouped_results = [
+                    ElementGroup(anchor=None, children=[elem])
+                    for elem in sorted_elements_fallback
+                ]
+            # --- 👇 수정: 후처리 전에 임시 그룹 ID 할당 (로깅용) ---
+            if grouped_results and document_type == "question_based":
+                logger.debug("후처리 전 임시 그룹 ID 할당...")
+                temp_groups_with_id = []
+                temp_group_id_counter = 0
+                temp_orphan_groups = [g for g in grouped_results if g.anchor is None]
+                temp_non_orphan_groups = [
+                    g for g in grouped_results if g.anchor is not None
+                ]
+                # 고아 그룹 먼저 ID 할당
+                if temp_orphan_groups:
+                    temp_orphan_groups.sort(
+                        key=lambda g: (
+                            min(c.y_position for c in g.children)
+                            if g.children
+                            else float("inf")
+                        )
+                    )
+                    for group in temp_orphan_groups:
+                        group.group_id = temp_group_id_counter
+                        temp_groups_with_id.append(group)
+                        temp_group_id_counter += 1
+                # 앵커 그룹 ID 할당
+                # (주의: _post_process... 함���는 앵커 그룹 리스트만 받도록 수정 필요)
+                # 우선 여기서 ID만 할당하고, 후처리는 non_orphan_groups 대상으로 수행
+                for group in temp_non_orphan_groups:
+                    group.group_id = temp_group_id_counter
+                    # temp_groups_with_id.append(group) # flatten 전 최종 순서는 아직 모름
+                    temp_group_id_counter += 1
+                # 후처리는 앵커가 있는 그룹들을 대상으로 수행
+                logger.debug(
+                    f"{len(temp_non_orphan_groups)}개 앵커 그룹 대상 후처리 실행..."
+                )
+                processed_non_orphan_groups = _post_process_table_figure_assignment(
+                    temp_non_orphan_groups
+                )
+                # 최종 그룹 리스트 재구성 (고아 + 후처리된 앵커 그룹)
+                grouped_results = temp_orphan_groups + processed_non_orphan_groups
+                logger.debug("후처리 및 임시 그룹 ID 할당 완료.")
+            # --- 👆 수정 끝 ---
+    except Exception as e:
+        logger.error(
+            f"맞춤형 정렬 중 심각한 오류 발생: {e}. (Y,X) 좌표 정렬로 대체합니다.",
+            exc_info=True,
+        )
+        sorted_elements_fallback = sorted(
+            filtered_elements, key=lambda e: (e.y_position, e.x_position)
+        )
+        grouped_results = [
+            ElementGroup(anchor=None, children=[elem])
+            for elem in sorted_elements_fallback
+        ]
+    if not grouped_results:
+        logger.warning("그룹핑 결과가 비어 있습니다.")
+        return []
+    # 최종 병합: 고아 그룹과 앵커 그룹 순서 결정 (기존 로직 유지)
+    orphan_groups = [g for g in grouped_results if g.anchor is None]
+    non_orphan_groups = [
+        g for g in grouped_results if g.anchor is not None
+    ]  # 후처리된 리스트 사용
+    final_ordered_groups: List[ElementGroup] = []
+    if orphan_groups:
+        # 고아 그룹은 Y 좌표 기준으로 정렬
+        orphan_groups.sort(
+            key=lambda g: (
+                min(c.y_position for c in g.children) if g.children else float("inf")
+            )
+        )
+        logger.debug(
+            f"전역 고아 그룹 {len(orphan_groups)}개 (Y 좌표 정렬됨) 리스트 맨 앞으로 이동"
+        )
+        final_ordered_groups.extend(orphan_groups)
+    else:
+        logger.debug("전역 고아 그룹 없음")
+    # 앵커 그룹은 Base Case/재귀 호출에서 결정된 순서 유지 (Y좌표 정렬 불필요)
+    final_ordered_groups.extend(non_orphan_groups)
+    # 최종 순서 및 ID 부여
+    final_sorted_elements, _, _ = flatten_groups_and_assign_order(
+        final_ordered_groups, start_global_order=0, start_group_id=0
+    )
+    logger.info(f"맞춤형 정렬 완료: {len(final_sorted_elements)}개 요소")
+    return final_sorted_elements
+# ============================================================================
+# 레이아웃 유형 판별 함수 (기존과 동일)
+# ============================================================================
+def detect_layout_type(
+    elements: List[MockElement], page_width: int, page_height: int
+) -> LayoutType:
+    # ... (코드 동일) ...
+    """앵커 요소 분포를 분석하여 페이지 레이아웃 유형 판별"""
+    anchors = [e for e in elements if e.class_name in ALLOWED_ANCHORS]
+    if len(anchors) < MIN_ANCHORS_FOR_SPLIT:
+        logger.debug(
+            f"레이아웃 판별: 앵커 수({len(anchors)}) 부족 -> STANDARD_1_COLUMN"
+        )
+        return LayoutType.STANDARD_1_COLUMN
+    top_zone_height = page_height * HORIZONTAL_SEP_Y_POS_THRESHOLD
+    wide_q_type = find_wide_question_type(elements, page_width, top_zone_height)
+    if wide_q_type:
+        logger.debug(
+            f"레이아웃 판별: 넓은 question_type(ID:{wide_q_type.element_id}) 존재 -> HORIZONTAL_SEP_PRESENT"
+        )
+        return LayoutType.HORIZONTAL_SEP_PRESENT
+    anchor_x_centers = np.array([[a.bbox_x + a.bbox_width / 2] for a in anchors])
+    is_clearly_2_column = False
+    if len(np.unique(anchor_x_centers)) >= 2:
+        try:
+            kmeans = KMeans(
+                n_clusters=KMEANS_N_CLUSTERS, random_state=42, n_init="auto"
+            )
+            kmeans.fit(anchor_x_centers)
+            centers = sorted(kmeans.cluster_centers_.flatten())
+            if (
+                len(centers) == 2
+                and centers[1] - centers[0] >= KMEANS_CLUSTER_SEPARATION_MIN
+            ):
+                is_clearly_2_column = True
+                logger.trace(
+                    f"레이아웃 판별: 전체 X 분포는 2단 구조 가능성 높음 (Centers: {centers})"
+                )
+            else:
+                logger.trace(f"레이아웃 판별: 전체 X 분포는 1단 구조 또는 불분명")
+        except Exception as e:
+            logger.warning(f"레이아웃 판별 중 K-Means 오류 발생: {e}")
+    if is_clearly_2_column:
+        split_y = page_height * LAYOUT_DETECT_Y_SPLIT_POINT
+        top_anchors = [
+            a for a in anchors if (a.y_position + a.bbox_height / 2) < split_y
+        ]
+        bottom_anchors = [
+            a for a in anchors if (a.y_position + a.bbox_height / 2) >= split_y
+        ]
+        if not top_anchors or not bottom_anchors:
+            logger.debug("레이아웃 판별: 상/하단 앵커 그룹 불완전 -> STANDARD_2_COLUMN")
+            return LayoutType.STANDARD_2_COLUMN
+        top_x_centers = (
+            np.array([[a.bbox_x + a.bbox_width / 2] for a in top_anchors])
+            if top_anchors
+            else np.array([])
+        )
+        bottom_x_centers = (
+            np.array([[a.bbox_x + a.bbox_width / 2] for a in bottom_anchors])
+            if bottom_anchors
+            else np.array([])
+        )
+        x_std_threshold = page_width * LAYOUT_DETECT_X_STD_THRESHOLD_RATIO
+        top_is_multi_column = (
+            top_x_centers.size > 1 and np.std(top_x_centers) > x_std_threshold
+        )
+        bottom_is_multi_column = (
+            bottom_x_centers.size > 1 and np.std(bottom_x_centers) > x_std_threshold
+        )
+        if not top_is_multi_column and bottom_is_multi_column:
+            logger.debug(
+                f"레이아웃 판별: 상단({len(top_anchors)}개) 1단, 하단({len(bottom_anchors)}개) 2단 -> MIXED_TOP1_BOTTOM2"
+            )
+            return LayoutType.MIXED_TOP1_BOTTOM2
+        elif top_is_multi_column and not bottom_is_multi_column:
+            logger.debug(
+                f"레이아웃 판별: 상단({len(top_anchors)}개) 2단, 하단({len(bottom_anchors)}개) 1단 -> MIXED_TOP2_BOTTOM1"
+            )
+            return LayoutType.MIXED_TOP2_BOTTOM1
+        elif top_is_multi_column and bottom_is_multi_column:
+            logger.debug(
+                f"레이아웃 판별: 상단({len(top_anchors)}개) 2단, 하단({len(bottom_anchors)}개) 2단 -> STANDARD_2_COLUMN"
+            )
+            return LayoutType.STANDARD_2_COLUMN
+        else:
+            logger.warning(
+                f"레이아웃 판별: 상/하단 모두 1단으로 보이나 전체는 2단 구조? -> UNKNOWN"
+            )
+            return LayoutType.UNKNOWN
+    else:
+        logger.debug("레이아웃 판별: 전체 1단 구조 -> STANDARD_1_COLUMN")
+        return LayoutType.STANDARD_1_COLUMN
+# ============================================================================
+# 재귀 정렬 함수 (기존과 동일)
+# ============================================================================
+def _sort_recursive_by_layout(
+    current_zone: Zone,
+    elements_in_zone: List[MockElement],
+    layout_type: LayoutType,
+    depth: int,
+) -> List[ElementGroup]:
+    # ... (코드 동일) ...
+    """레이아웃 유형에 따라 다른 분할 우선순위를 적용하는 재귀 함수"""
+    indent = "  " * depth
+    logger.debug(
+        f"{indent}[Depth {depth}, Type: {layout_type.name}] 구역 처리 시작: {current_zone}, 요소 수={len(elements_in_zone)}"
+    )
+    if not elements_in_zone:
+        logger.trace(f"{indent} -> 빈 구역")
+        return []
+    if len(elements_in_zone) == 1:
+        element = elements_in_zone[0]
+        logger.trace(f"{indent} -> 요소 1개")
+        return (
+            [ElementGroup(anchor=element)]
+            if element.class_name in ALLOWED_ANCHORS
+            else [ElementGroup(anchor=None, children=[element])]
+        )
+    if layout_type == LayoutType.STANDARD_2_COLUMN:
+        logger.debug(f"{indent} -> {layout_type.name}: 표준 2단 처리 함수 직접 호출")
+        return _sort_standard_2_column(current_zone, elements_in_zone)
+    split_result: Optional[
+        Union[HorizontalSplit, HorizontalSplitYGap, VerticalSplit]
+    ] = None
+    split_type = "None"
+    if layout_type == LayoutType.HORIZONTAL_SEP_PRESENT:
+        split_result = find_horizontal_split_by_type(current_zone, elements_in_zone)
+        if split_result:
+            split_type = "H_Type"
+        else:
+            anchors = [e for e in elements_in_zone if e.class_name in ALLOWED_ANCHORS]
+            split_result = find_vertical_split_kmeans(current_zone, anchors)
+            if split_result:
+                split_type = "Vertical"
+            else:
+                split_result = find_horizontal_split_by_y_gap(
+                    current_zone, elements_in_zone
+                )
+                if split_result:
+                    split_type = "H_YGap"
+    elif (
+        layout_type == LayoutType.MIXED_TOP1_BOTTOM2
+        or layout_type == LayoutType.MIXED_TOP2_BOTTOM1
+    ):
+        split_result = find_horizontal_split_by_y_gap(current_zone, elements_in_zone)
+        if split_result:
+            split_type = "H_YGap"
+        else:
+            split_result = find_horizontal_split_by_type(current_zone, elements_in_zone)
+            if split_result:
+                split_type = "H_Type"
+            else:
+                anchors = [
+                    e for e in elements_in_zone if e.class_name in ALLOWED_ANCHORS
+                ]
+                split_result = find_vertical_split_kmeans(current_zone, anchors)
+                if split_result:
+                    split_type = "Vertical"
+    elif layout_type == LayoutType.UNKNOWN:
+        split_result = find_horizontal_split_by_type(current_zone, elements_in_zone)
+        if split_result:
+            split_type = "H_Type"
+        else:
+            anchors = [e for e in elements_in_zone if e.class_name in ALLOWED_ANCHORS]
+            split_result = find_vertical_split_kmeans(current_zone, anchors)
+            if split_result:
+                split_type = "Vertical"
+            else:
+                split_result = find_horizontal_split_by_y_gap(
+                    current_zone, elements_in_zone
+                )
+                if split_result:
+                    split_type = "H_YGap"
+    if split_result:
+        if isinstance(split_result, (HorizontalSplit, HorizontalSplitYGap)):
+            split_y = (
+                split_result.split_y
+                if isinstance(split_result, HorizontalSplitYGap)
+                else split_result.separator_element.y_position
+                + split_result.separator_element.bbox_height / 2
+            )
+            top_elements = [
+                e
+                for e in elements_in_zone
+                if getattr(e, "element_id", -1)
+                != getattr(
+                    getattr(split_result, "separator_element", None), "element_id", -2
+                )
+                and (e.bbox_y + e.bbox_height / 2) < split_y
+            ]
+            bottom_elements = [
+                e
+                for e in elements_in_zone
+                if getattr(e, "element_id", -1)
+                != getattr(
+                    getattr(split_result, "separator_element", None), "element_id", -2
+                )
+                and (e.bbox_y + e.bbox_height / 2) >= split_y
+            ]
+            logger.debug(
+                f"{indent} -> {split_type} 수평 분할 성공! Top:{len(top_elements)}, Bottom:{len(bottom_elements)}"
+            )
+            top_layout_type = (
+                detect_layout_type(
+                    top_elements,
+                    split_result.top_zone.width,
+                    split_result.top_zone.height,
+                )
+                if top_elements
+                else LayoutType.UNKNOWN
+            )
+            bottom_layout_type = (
+                detect_layout_type(
+                    bottom_elements,
+                    split_result.bottom_zone.width,
+                    split_result.bottom_zone.height,
+                )
+                if bottom_elements
+                else LayoutType.UNKNOWN
+            )
+            sorted_top = _sort_recursive_by_layout(
+                split_result.top_zone, top_elements, top_layout_type, depth + 1
+            )
+            sep_group = (
+                [ElementGroup(anchor=split_result.separator_element)]
+                if isinstance(split_result, HorizontalSplit)
+                else []
+            )
+            sorted_bottom = _sort_recursive_by_layout(
+                split_result.bottom_zone, bottom_elements, bottom_layout_type, depth + 1
+            )
+            logger.debug(f"{indent} <- {split_type} 수평 분할 결과 병합")
+            return sorted_top + sep_group + sorted_bottom
+        elif isinstance(split_result, VerticalSplit):
+            left_elements = [
+                e
+                for e in elements_in_zone
+                if (e.bbox_x + e.bbox_width / 2) < split_result.gutter_x
+            ]
+            right_elements = [
+                e
+                for e in elements_in_zone
+                if (e.bbox_x + e.bbox_width / 2) >= split_result.gutter_x
+            ]
+            logger.debug(
+                f"{indent} -> Vertical 수직 분할 성공! Left:{len(left_elements)}, Right:{len(right_elements)}"
+            )
+            left_layout_type = (
+                detect_layout_type(
+                    left_elements,
+                    split_result.left_zone.width,
+                    split_result.left_zone.height,
+                )
+                if left_elements
+                else LayoutType.UNKNOWN
+            )
+            right_layout_type = (
+                detect_layout_type(
+                    right_elements,
+                    split_result.right_zone.width,
+                    split_result.right_zone.height,
+                )
+                if right_elements
+                else LayoutType.UNKNOWN
+            )
+            sorted_left = _sort_recursive_by_layout(
+                split_result.left_zone, left_elements, left_layout_type, depth + 1
+            )
+            sorted_right = _sort_recursive_by_layout(
+                split_result.right_zone, right_elements, right_layout_type, depth + 1
+            )
+            logger.debug(f"{indent} <- Vertical 수직 분할 결과 병합")
+            return sorted_left + sorted_right
+    else:
+        logger.debug(
+            f"{indent} -> 모든 분할 실패, 레이아웃 유형({layout_type.name})에 따른 Base Case 실행"
+        )
+        result_groups: List[ElementGroup] = []
+        if layout_type == LayoutType.STANDARD_1_COLUMN:
+            result_groups = _base_case_standard_1_column(current_zone, elements_in_zone)
+        elif (
+            layout_type == LayoutType.MIXED_TOP1_BOTTOM2
+            or layout_type == LayoutType.MIXED_TOP2_BOTTOM1
+        ):
+            result_groups = _base_case_mixed_layout(
+                current_zone, elements_in_zone, layout_type
+            )
+        elif (
+            layout_type == LayoutType.HORIZONTAL_SEP_PRESENT
+            or layout_type == LayoutType.UNKNOWN
+        ):
+            logger.warning(
+                f"{indent} -> {layout_type.name} 유형 분할 실패. 1단 Base Case로 처리합니다."
+            )
+            result_groups = _base_case_standard_1_column(current_zone, elements_in_zone)
+        else:
+            logger.error(
+                f"{indent} -> 처리할 수 없는 Base Case 유형: {layout_type.name}. 1단으로 처리."
+            )
+            result_groups = _base_case_standard_1_column(current_zone, elements_in_zone)
+        logger.debug(f"{indent} <- Base Case 처리 완료: {len(result_groups)} 그룹 생성")
+        return result_groups
+# ============================================================================
+# 표준 2단 레이아웃 처리 함수 (기존과 동일)
+# ============================================================================
+def _sort_standard_2_column(
+    zone: Zone, elements: List[MockElement]
+) -> List[ElementGroup]:
+    # ... (코드 동일) ...
+    """표준 2단 레이아웃 처리: K-Means 분할 후 컬럼별 _base_case_standard_1_column 호출"""
+    logger.debug("표준 2단 처리: K-Means 분할 시도")
+    anchors = [e for e in elements if e.class_name in ALLOWED_ANCHORS]
+    vertical_split = find_vertical_split_kmeans(zone, anchors)
+    if vertical_split:
+        logger.debug(f" -> 수직 분할 성공! 분리선 X={vertical_split.gutter_x:.1f}")
+        left_elements = [
+            e
+            for e in elements
+            if (e.bbox_x + e.bbox_width / 2) < vertical_split.gutter_x
+        ]
+        right_elements = [
+            e
+            for e in elements
+            if (e.bbox_x + e.bbox_width / 2) >= vertical_split.gutter_x
+        ]
+        logger.debug(
+            f"   Left 요소 수: {len(left_elements)}, Right 요소 수: {len(right_elements)}"
+        )
+        groups_left = _base_case_standard_1_column(
+            vertical_split.left_zone, left_elements
+        )
+        groups_right = _base_case_standard_1_column(
+            vertical_split.right_zone, right_elements
+        )
+        logger.debug(
+            f" <- 컬럼별 그룹핑 완료 (Left: {len(groups_left)} 그룹, Right: {len(groups_right)} 그룹)"
+        )
+        return groups_left + groups_right
+    else:
+        logger.warning(
+            "표준 2단 처리 실패: 수직 분할 불가. 전체 구역 표준 1단 Base Case 실행"
+        )
+        return _base_case_standard_1_column(zone, elements)
+# ============================================================================
+# 분할 함수 구현 (기존과 동일)
+# ============================================================================
+def find_wide_question_type(
+    elements: List[MockElement], page_width: int, top_y_limit: float
+) -> Optional[MockElement]:
+    # ... (코드 동일) ...
+    """페이지 상단 영역에서 넓은 question_type 찾기"""
+    wide_types = [
+        e
+        for e in elements
+        if e.class_name == "question_type"
+        and e.y_position < top_y_limit
+        and (e.bbox_width / page_width if page_width > 0 else 0)
+        >= HORIZONTAL_SEP_WIDTH_THRESHOLD
+    ]
+    return min(wide_types, key=lambda e: e.y_position) if wide_types else None
+def find_horizontal_split_by_type(
+    zone: Zone, elements: List[MockElement]
+) -> Optional[HorizontalSplit]:
+    # ... (코드 동일) ...
+    """넓은 question_type으로 수평 분할"""
+    potential_separators = []
+    for element in elements:
+        if element.class_name == "question_type":
+            width_ratio = element.bbox_width / zone.width if zone.width > 0 else 0
+            if width_ratio >= HORIZONTAL_SEP_WIDTH_THRESHOLD:
+                potential_separators.append(element)
+    if not potential_separators:
+        return None
+    separator = min(potential_separators, key=lambda e: e.y_position)
+    if not (zone.y_min < separator.y_position < zone.y_max):
+        return None
+    top_zone = Zone(zone.x_min, zone.y_min, zone.x_max, separator.y_position)
+    bottom_zone = Zone(
+        zone.x_min, separator.y_position + separator.bbox_height, zone.x_max, zone.y_max
+    )
+    if top_zone.height <= 0 or bottom_zone.height <= 0:
+        return None
+    return HorizontalSplit(top_zone, bottom_zone, separator)
+def find_horizontal_split_by_y_gap(
+    zone: Zone, elements: List[MockElement]
+) -> Optional[HorizontalSplitYGap]:
+    # ... (코드 동일) ...
+    """앵커 Y Gap으로 수평 분할"""
+    anchors = sorted(
+        [e for e in elements if e.class_name in ALLOWED_ANCHORS],
+        key=lambda e: e.y_position,
+    )
+    if len(anchors) < MIN_ANCHORS_FOR_SPLIT:
+        return None
+    max_gap = -1
+    split_index = -1
+    avg_anchor_height = (
+        np.mean([a.bbox_height for a in anchors if a.bbox_height > 0])
+        if any(a.bbox_height > 0 for a in anchors)
+        else 30
+    )
+    for i in range(len(anchors) - 1):
+        gap = (anchors[i + 1].y_position + anchors[i + 1].bbox_height / 2) - (
+            anchors[i].y_position + anchors[i].bbox_height / 2
+        )
+        if gap > max_gap:
+            max_gap = gap
+            split_index = i
+    threshold = max(
+        avg_anchor_height * VERTICAL_GAP_THRESHOLD_RATIO, VERTICAL_GAP_THRESHOLD_ABS
+    )
+    if max_gap >= threshold:
+        split_y = (
+            anchors[split_index].y_position
+            + anchors[split_index].bbox_height
+            + anchors[split_index + 1].y_position
+        ) / 2
+        if zone.y_min < split_y < zone.y_max:
+            top_zone = Zone(zone.x_min, zone.y_min, zone.x_max, int(split_y))
+            bottom_zone = Zone(zone.x_min, int(split_y), zone.x_max, zone.y_max)
+            logger.debug(
+                f"    Y Gap 분석: 수평 분할 가능 (Max Gap={max_gap:.1f} >= Threshold={threshold:.1f})"
+            )
+            return HorizontalSplitYGap(top_zone, bottom_zone, split_y)
+        else:
+            logger.warning(
+                f"    Y Gap 분석: 분할선({split_y:.1f})이 구역({zone.y_min}-{zone.y_max}) 밖에 위치. 분할 취소."
+            )
+            return None
+    else:
+        logger.debug(
+            f"    Y Gap 분석: 최대 간격({max_gap:.1f}) 임계값({threshold:.1f}) 미만. 수평 분할 불가."
+        )
+        return None
+def find_vertical_split_kmeans(
+    zone: Zone, anchors: List[MockElement]
+) -> Optional[VerticalSplit]:
+    # ... (코드 동일) ...
+    """앵커 X 좌표 K-Means로 수직 분할"""
+    if len(anchors) < MIN_ANCHORS_FOR_SPLIT:
+        return None
+    anchor_x_centers = np.array([[a.bbox_x + a.bbox_width / 2] for a in anchors])
+    if len(np.unique(anchor_x_centers)) < 2:
+        return None
+    try:
+        kmeans = KMeans(n_clusters=KMEANS_N_CLUSTERS, random_state=42, n_init="auto")
+        kmeans.fit(anchor_x_centers)
+        centers = sorted(kmeans.cluster_centers_.flatten())
+        if (
+            len(centers) == 2
+            and centers[1] - centers[0] >= KMEANS_CLUSTER_SEPARATION_MIN
+        ):
+            gutter_x = (centers[0] + centers[1]) / 2
+            if zone.x_min < gutter_x < zone.x_max:
+                left_zone = Zone(zone.x_min, zone.y_min, int(gutter_x), zone.y_max)
+                right_zone = Zone(int(gutter_x), zone.y_min, zone.x_max, zone.y_max)
+                return VerticalSplit(left_zone, right_zone, gutter_x)
+            else:
+                logger.warning(
+                    f"    수직 분할 K-Means: 분할선({gutter_x:.1f})이 구역({zone.x_min}-{zone.x_max}) 밖에 위치. 분할 취소."
+                )
+                return None
+        else:
+            logger.debug(
+                f"    수직 분할 K-Means 실패: 중심간 거리({(centers[1] - centers[0]) if len(centers)==2 else 0:.1f}px) 임계값 미만"
+            )
+            return None
+    except Exception as e:
+        logger.error(f"    수직 분할 K-Means 중 오류: {e}")
+        return None
+# ============================================================================
+# 후처리 함수 (수정됨)
+# ============================================================================
+def _post_process_table_figure_assignment(
+    groups: List[ElementGroup], y_diff_threshold: int = 150
+) -> List[ElementGroup]:
+    """
+    그룹핑 후처리: 테이블/그림 요소가 현재 앵커보다 다음 앵커(들)에 훨씬 가까우면 이동 시도
+    --- 수정: 최적 그룹 탐색 및 Tie-breaker 추가 ---
+    """
+    logger.debug(
+        f"    테이블/그림 할당 후처리 시작: {len(groups)}개 그룹 (Threshold={y_diff_threshold}px, Closeness Ratio={POST_PROCESS_CLOSENESS_RATIO}, Lookahead={POST_PROCESS_LOOKAHEAD})"
+    )
+    adjusted_groups = groups  # 원본 리스트를 직접 수정
+    elements_to_move_dict: Dict[int, Tuple[MockElement, int]] = (
+        {}
+    )  # {element_id: (element, target_group_idx)}
+    moved_elements_log = []  # 로깅용
+    for i in range(len(adjusted_groups)):
+        current_group = adjusted_groups[i]
+        if not current_group.anchor:
+            continue
+        current_children_copy = list(
+            current_group.children
+        )  # 순회 중 변경을 위한 복사본
+        for child_idx, child in enumerate(current_children_copy):
+            # 이미 이동 대상으로 결정된 요소는 건너뜀
+            if child.element_id in elements_to_move_dict:
+                continue
+            if child.class_name in ["table", "figure", "flowchart"]:
+                y_diff_current = child.y_position - current_group.anchor.y_position
+                best_target_group_idx = -1
+                min_y_diff_next = float("inf")
+                # 현재 그룹 이후 몇 개 그룹까지 탐색
+                for lookahead_idx in range(1, POST_PROCESS_LOOKAHEAD + 1):
+                    next_group_idx = i + lookahead_idx
+                    if next_group_idx >= len(adjusted_groups):
+                        break
+                    next_group = adjusted_groups[next_group_idx]
+                    if not next_group.anchor:
+                        continue
+                    y_diff_next = abs(child.y_position - next_group.anchor.y_position)
+                    # 이동 조건 검사 (v2.2 조건)
+                    if y_diff_current > (y_diff_threshold / 2) and y_diff_next < (
+                        y_diff_current * POST_PROCESS_CLOSENESS_RATIO
+                    ):
+                        # --- 👇 Tie-breaker 수정 👇 ---
+                        # 더 가까운 그룹을 찾거나, 거리가 같지만 더 뒤의 그룹일 경우 갱신
+                        if y_diff_next < min_y_diff_next or (
+                            y_diff_next == min_y_diff_next
+                            and next_group_idx > best_target_group_idx
+                        ):
+                            min_y_diff_next = y_diff_next
+                            best_target_group_idx = next_group_idx
+                        # --- 👆 Tie-breaker 수정 끝 👆 ---
+                # 최적 그룹을 찾았으면 이동 대상으로 등록
+                if best_target_group_idx != -1:
+                    elements_to_move_dict[child.element_id] = (
+                        child,
+                        best_target_group_idx,
+                    )
+                    moved_elements_log.append(
+                        f"Elem {child.element_id} ({child.class_name}) from Grp {current_group.group_id} to Grp {adjusted_groups[best_target_group_idx].group_id}"
+                    )
+                    logger.trace(
+                        f"        이동 후보 확정: Elem {child.element_id} -> Group {adjusted_groups[best_target_group_idx].group_id} (Min Y diff next={min_y_diff_next:.0f})"
+                    )
+    # --- 실제 요소 이동 (루프 종료 후) ---
+    if elements_to_move_dict:
+        # 1. 원본 그룹에서 요소 제거
+        elements_removed_count = 0
+        for group in adjusted_groups:
+            original_children_count = len(group.children)
+            group.children = [
+                child
+                for child in group.children
+                if child.element_id not in elements_to_move_dict
+            ]
+            elements_removed_count += original_children_count - len(group.children)
+        # 2. 대상 그룹에 요소 추가
+        elements_added_count = 0
+        for element_id, (element, target_group_idx) in elements_to_move_dict.items():
+            if 0 <= target_group_idx < len(adjusted_groups):
+                adjusted_groups[target_group_idx].children.insert(
+                    0, element
+                )  # 그룹 맨 앞에 추가
+                elements_added_count += 1
+            else:
+                logger.error(
+                    f"후처리 이동 중 유효하지 않은 대상 그룹 인덱스: {target_group_idx} for Elem {element_id}"
+                )
+        logger.debug(
+            f"    후처리 요소 이동 완료: {elements_removed_count}개 제거, {elements_added_count}개 추가"
+        )
+    if moved_elements_log:
+        logger.info(
+            f"    테이블/그림 할당 후처리: {len(moved_elements_log)}개 요소 이동됨 - {', '.join(moved_elements_log)}"
+        )
+    else:
+        logger.debug("    테이블/그림 할당 후처리: 이동된 요소 없음")
+    return adjusted_groups
+# ============================================================================
+# Base Case 함수들 (기존과 동일 v2.1)
+# ============================================================================
+def _base_case_standard_1_column(
+    zone: Zone, elements: List[MockElement]
+) -> List[ElementGroup]:
+    # ... (v2.1 코드와 동일) ...
+    """표준 1단 구역 Base Case 처리 (상단 고아 분리)"""
+    logger.debug(
+        f"    표준 1단 Base Case 시작 (순차 처리 + 고아 개선): {len(elements)}개 요소 in {zone}"
+    )
+    anchors = sorted(
+        [e for e in elements if e.class_name in ALLOWED_ANCHORS],
+        key=lambda e: e.y_position,
+    )
+    children = [e for e in elements if e.class_name in ALLOWED_CHILDREN]
+    groups: Dict[int, ElementGroup] = {
+        anchor.element_id: ElementGroup(anchor=anchor) for anchor in anchors
+    }
+    assigned_children_ids = set()
+    logger.trace("      수평 인접 처리 시작...")
+    if anchors and children:
+        for anchor in anchors:
+            anchor_cy = anchor.bbox_y + anchor.bbox_height / 2
+            anchor_right_x = anchor.bbox_x + anchor.bbox_width
+            anchor_left_x = anchor.bbox_x
+            unassigned_children = [
+                c for c in children if c.element_id not in assigned_children_ids
+            ]
+            adjacent_child = None
+            min_y_diff = float("inf")
+            for child in unassigned_children:
+                child_cy = child.bbox_y + child.bbox_height / 2
+                child_right_x = child.bbox_x + child.bbox_width
+                child_left_x = child.bbox_x
+                y_diff = abs(anchor_cy - child_cy)
+                y_threshold = (
+                    (anchor.bbox_height + child.bbox_height)
+                    / 2
+                    * HORIZONTAL_ADJACENCY_Y_CENTER_RATIO
+                    if (anchor.bbox_height + child.bbox_height) > 0
+                    else 0
+                )
+                if y_diff >= y_threshold:
+                    continue
+                gap_right = child_left_x - anchor_right_x
+                gap_left = anchor_left_x - child_right_x
+                is_adjacent = (abs(gap_right) < HORIZONTAL_ADJACENCY_X_PROXIMITY) or (
+                    abs(gap_left) < HORIZONTAL_ADJACENCY_X_PROXIMITY
+                )
+                if is_adjacent and y_diff < min_y_diff:
+                    min_y_diff = y_diff
+                    adjacent_child = child
+            if adjacent_child:
+                logger.trace(
+                    f"        수평 인접 배정: 앵커 ID {anchor.element_id} <- 자식 ID {adjacent_child.element_id}"
+                )
+                groups[anchor.element_id].add_child(adjacent_child)
+                assigned_children_ids.add(adjacent_child.element_id)
+    logger.debug(
+        f"    수평 인접 처리 완료: {len(assigned_children_ids)}개 자식 우선 배정됨"
+    )
+    remaining_elements = anchors + [
+        c for c in children if c.element_id not in assigned_children_ids
+    ]
+    if not remaining_elements:
+        logger.debug("    모든 요소가 수평 인접으로 배정되어 그룹핑 완료.")
+        # 후처리 호출 전 그룹 ID 임시 할당 (선택적)
+        temp_groups = sorted(
+            list(groups.values()),
+            key=lambda g: g.anchor.y_position if g.anchor else float("inf"),
+        )
+        for idx, group in enumerate(temp_groups):
+            group.group_id = idx
+        return _post_process_table_figure_assignment(temp_groups)
+    logger.trace(
+        f"      나머지 요소 {len(remaining_elements)}개 (Y, X) 정렬 및 순차 그룹핑 시작..."
+    )
+    remaining_elements.sort(key=lambda e: (e.y_position, e.x_position))
+    final_groups: List[ElementGroup] = []
+    current_group: Optional[ElementGroup] = None
+    initial_top_orphan_children: List[MockElement] = []
+    initial_bottom_orphan_children: List[MockElement] = []
+    first_anchor_found = False
+    top_orphan_threshold_y = (
+        zone.y_min + zone.height * BASE_CASE_TOP_ORPHAN_THRESHOLD_RATIO
+    )
+    for element in remaining_elements:
+        if element.class_name in ALLOWED_ANCHORS:
+            first_anchor_found = True
+            if initial_top_orphan_children:
+                logger.trace(
+                    f"        독립적인 상단 고아 그룹 생성 ({len(initial_top_orphan_children)}개 요소)"
+                )
+                final_groups.append(
+                    ElementGroup(anchor=None, children=initial_top_orphan_children)
+                )
+                initial_top_orphan_children = []
+            if (
+                current_group is not None
+                and current_group.anchor is not None
+                and not current_group.is_empty()
+            ):
+                final_groups.append(current_group)
+            if element.element_id in groups:
+                current_group = groups[element.element_id]
+                logger.trace(f"        앵커 그룹 재사용 (ID: {element.element_id})")
+            else:
+                current_group = ElementGroup(anchor=element, children=[])
+                logger.trace(f"        새 앵커 그룹 시작 (ID: {element.element_id})")
+            if initial_bottom_orphan_children:
+                logger.trace(
+                    f"        첫 앵커(ID: {element.element_id}) 그룹에 하단 고아 자식 {len(initial_bottom_orphan_children)}개 추가"
+                )
+                current_group.children = (
+                    initial_bottom_orphan_children + current_group.children
+                )
+                initial_bottom_orphan_children = []
+        else:
+            if first_anchor_found:
+                if current_group is None:
+                    logger.warning(
+                        f"        앵커 없이 자식 요소(ID: {element.element_id}) 발견됨. 위치({element.y_position:.1f}) ��라 임시 고아 리스트에 추가."
+                    )
+                    if element.y_position < top_orphan_threshold_y:
+                        initial_top_orphan_children.append(element)
+                    else:
+                        initial_bottom_orphan_children.append(element)
+                else:
+                    current_group.add_child(element)
+                    logger.trace(
+                        f"        현재 그룹(앵커: {current_group.anchor.element_id if current_group.anchor else 'Orphan'})에 자식 추가 (ID: {element.element_id})"
+                    )
+            else:
+                if element.y_position < top_orphan_threshold_y:
+                    initial_top_orphan_children.append(element)
+                    logger.trace(
+                        f"        상단 고아 자식 요소(ID: {element.element_id}) 임시 저장 (Y < {top_orphan_threshold_y:.0f})"
+                    )
+                else:
+                    initial_bottom_orphan_children.append(element)
+                    logger.trace(
+                        f"        하단 고아 자식 요소(ID: {element.element_id}) 임시 저장 (Y >= {top_orphan_threshold_y:.0f})"
+                    )
+    if initial_top_orphan_children:
+        logger.trace(
+            f"        마지막 독립 상단 고아 그룹 생성 ({len(initial_top_orphan_children)}개 요소)"
+        )
+        final_groups.append(
+            ElementGroup(anchor=None, children=initial_top_orphan_children)
+        )
+    if current_group is not None and not current_group.is_empty():
+        final_groups.append(current_group)
+    elif initial_bottom_orphan_children:
+        logger.warning("        모든 요소가 하단 자식 요소임. 단일 고아 그룹 생성.")
+        final_groups.append(
+            ElementGroup(anchor=None, children=initial_bottom_orphan_children)
+        )
+    processed_anchor_ids = set(g.anchor.element_id for g in final_groups if g.anchor)
+    for anchor_id, group in groups.items():
+        if anchor_id not in processed_anchor_ids and group.anchor:
+            final_groups.append(group)
+            logger.trace(f"        미포함 앵커 그룹 추가 (수평 인접만): ID {anchor_id}")
+    final_groups.sort(
+        key=lambda g: (
+            g.anchor.y_position
+            if g.anchor
+            else (min(c.y_position for c in g.children) if g.children else float("inf"))
+        )
+    )
+    # 후처리 호출 전 그룹 ID 임시 할당
+    for idx, group in enumerate(final_groups):
+        group.group_id = idx
+    final_groups = _post_process_table_figure_assignment(final_groups)
+    logger.debug(
+        f"    순차 처리 기반 그룹핑 (+후처리) 완료: {len(final_groups)} 그룹 생성"
+    )
+    return final_groups
+def _base_case_mixed_layout(
+    zone: Zone, elements: List[MockElement], layout_type: LayoutType
+) -> List[ElementGroup]:
+    """혼합형 레이아웃 Base Case 처리 (기존과 동일)"""
+    # ... (v2.1 코드와 동일) ...
+    logger.debug(
+        f"    혼합형 Base Case 시작 ({layout_type.name}): {len(elements)}개 요소 in {zone}"
+    )
+    sorted_elements = sorted(elements, key=lambda e: (e.y_position, e.x_position))
+    final_groups: List[ElementGroup] = []
+    current_group: Optional[ElementGroup] = None
+    initial_top_orphan_children: List[MockElement] = []
+    initial_bottom_orphan_children: List[MockElement] = []
+    first_anchor_found = False
+    split_y = zone.y_min + zone.height * LAYOUT_DETECT_Y_SPLIT_POINT
+    logger.trace(f"      혼합형 Base Case Y 분할점: {split_y:.1f}")
+    for element in sorted_elements:
+        element_y_center = element.y_position + element.bbox_height / 2
+        if element.class_name in ALLOWED_ANCHORS:
+            first_anchor_found = True
+            if initial_top_orphan_children:
+                logger.trace(
+                    f"        독립적인 상단 고아 그룹 생성 ({len(initial_top_orphan_children)}개 요소)"
+                )
+                final_groups.append(
+                    ElementGroup(anchor=None, children=initial_top_orphan_children)
+                )
+                initial_top_orphan_children = []
+            if current_group is not None and not current_group.is_empty():
+                final_groups.append(current_group)
+            current_group = ElementGroup(anchor=element, children=[])
+            logger.trace(f"        새 앵커 그룹 시작 (ID: {element.element_id})")
+            if initial_bottom_orphan_children:
+                logger.trace(
+                    f"        첫 앵커(ID: {element.element_id}) 그룹에 하단 고아 자식 {len(initial_bottom_orphan_children)}개 추가"
+                )
+                current_group.children = (
+                    initial_bottom_orphan_children + current_group.children
+                )
+                initial_bottom_orphan_children = []
+        else:
+            if first_anchor_found:
+                if current_group is None:
+                    logger.warning(
+                        f"        앵커 없이 자식 요소(ID: {element.element_id}) 발견됨. 위치({element_y_center:.1f}) 따라 임시 고아 리스트에 추가."
+                    )
+                    if element_y_center < split_y:
+                        initial_top_orphan_children.append(element)
+                    else:
+                        initial_bottom_orphan_children.append(element)
+                else:
+                    current_group.add_child(element)
+                    logger.trace(
+                        f"        현재 그룹(앵커: {current_group.anchor.element_id if current_group.anchor else 'Orphan'})에 자식 추가 (ID: {element.element_id})"
+                    )
+            else:
+                if element_y_center < split_y:
+                    initial_top_orphan_children.append(element)
+                    logger.trace(
+                        f"        상단 고아 자식 요소(ID: {element.element_id}) 임시 저장"
+                    )
+                else:
+                    initial_bottom_orphan_children.append(element)
+                    logger.trace(
+                        f"        하단 고아 자식 요소(ID: {element.element_id}) 임시 저장"
+                    )
+    if initial_top_orphan_children:
+        logger.trace(
+            f"        마지막 독립 상단 고아 그룹 생성 ({len(initial_top_orphan_children)}개 요소)"
+        )
+        final_groups.append(
+            ElementGroup(anchor=None, children=initial_top_orphan_children)
+        )
+    if current_group is not None and not current_group.is_empty():
+        final_groups.append(current_group)
+    elif initial_bottom_orphan_children:
+        logger.warning("        모든 요소가 하단 자식 요소임. 단일 고아 그룹 생성.")
+        final_groups.append(
+            ElementGroup(anchor=None, children=initial_bottom_orphan_children)
+        )
+    # 후처리 호출 전 그룹 ID 임시 할당
+    for idx, group in enumerate(final_groups):
+        group.group_id = idx
+    final_groups = _post_process_table_figure_assignment(final_groups)
+    return final_groups
+# ============================================================================
+# 최종 병합 및 순서 부여 함수 (기존과 동일)
+# ============================================================================
+def flatten_groups_and_assign_order(
+    groups: List[ElementGroup], start_global_order: int, start_group_id: int
+) -> Tuple[List[MockElement], int, int]:
+    # ... (코드 동일) ...
+    """주어진 그룹 리스트를 평탄화하고 전역 순서/그룹 ID 부여"""
+    flattened = []
+    global_order = start_global_order
+    group_id_counter = start_group_id
+    logger.debug(
+        f"    평탄화 시작: {len(groups)}개 그룹 (시작 order={global_order}, group_id={group_id_counter})"
+    )
+    for group in groups:  # 최종 정렬된 그룹 순서 사용
+        # 그룹 객체의 ID는 임시 ID일 수 있으므로 여기서 최종 ID 할당
+        final_group_id = group_id_counter
+        group.group_id = final_group_id  # 로깅 및 참조용 업데이트
+        elements_in_group = group.get_all_elements_sorted()
+        logger.trace(
+            f"      그룹 {final_group_id} 평탄화 (Anchor: {group.anchor.element_id if group.anchor else 'Orphan'}, 요소 수: {len(elements_in_group)})"
+        )
+        for local_order, element in enumerate(elements_in_group):
+            try:
+                setattr(element, "order_in_question", global_order)
+                setattr(element, "group_id", final_group_id)  # 최종 그룹 ID 사용
+                setattr(element, "order_in_group", local_order)
+                flattened.append(element)
+                global_order += 1
+            except AttributeError as e:
+                logger.error(
+                    f"요소 (ID: {getattr(element, 'element_id', 'N/A')})에 정렬 속성 추가 실패: {e}"
+                )
+        group_id_counter += 1
+    logger.debug(
+        f"    평탄화 완료: {len(flattened)}개 요소 생성 (다음 order={global_order}, group_id={group_id_counter})"
+    )
+    return flattened, global_order, group_id_counter
+# ============================================================================
+# 헬퍼 함수 (기존과 동일)
+# ============================================================================
+def preprocess_elements(
+    elements: List[MockElement], document_type: str
+) -> List[MockElement]:
+    # ... (코드 동일) ...
+    """0단계 전처리"""
+    original_count = len(elements)
+    if document_type == "question_based":
+        filtered = [e for e in elements if e.class_name in ALLOWED_CLASSES]
+        logger.info(
+            f"전처리 (question_based): {original_count}개 → {len(filtered)}개 (허용 클래스 필터링)"
+        )
+    elif document_type == "reading_order":
+        filtered = elements
+        logger.info(f"전처리 (reading_order): {original_count}개 (모든 클래스 허용)")
+    else:
+        logger.warning(f"알 수 없는 문서 타입 '{document_type}', 모든 요소 반환")
+        filtered = elements
+    valid_elements = [e for e in filtered if hasattr(e, "area") and e.area > 0]
+    if len(valid_elements) < len(filtered):
+        logger.warning(
+            f"전처리: 면적이 0 이하인 요소 {len(filtered) - len(valid_elements)}개 제거"
+        )
+    return valid_elements
+def calculate_page_width(elements: List[MockElement]) -> int:
+    # ... (코드 동일) ...
+    """페이지 너비 추정"""
+    if not elements:
+        return 0
+        return max(e.bbox_x + e.bbox_width for e in elements) if elements else 0
+def calculate_page_height(elements: List[MockElement]) -> int:
+    # ... (코드 동일) ...
+    """페이지 높이 추정"""
+    if not elements:
+        return 0
+        return max(e.bbox_y + e.bbox_height for e in elements) if elements else 0

app/services/text_version_service.py ADDED Viewed

	@@ -0,0 +1,179 @@

+"""
+텍스트 버전 관리 서비스
+=======================
+Project/의 Mock 기반 텍스트 버전 로직을 Backend/ ORM 환경에 맞게 재구성한 모듈입니다.
+페이지별 텍스트 버전을 조회·생성·갱신하며, `batch_analysis` 및 FastAPI 라우터에서 재사용합니다.
+"""
+from __future__ import annotations
+from typing import Any, Dict, Optional
+from loguru import logger
+from sqlalchemy.orm import Session
+from ..models import CombinedResult, Page, TextVersion
+def _deactivate_existing_versions(db: Session, page_id: int) -> None:
+    """
+    지정한 페이지의 기존 text_versions 중 is_current=True 항목을 False로 설정합니다.
+    """
+    updated = (
+        db.query(TextVersion)
+        .filter(
+            TextVersion.page_id == page_id,
+            TextVersion.is_current.is_(True),
+        )
+        .update({"is_current": False}, synchronize_session=False)
+    )
+    if updated:
+        logger.debug("페이지 %s의 기존 텍스트 버전 %s건을 비활성화했습니다.", page_id, updated)
+def _get_next_version_number(db: Session, page_id: int) -> int:
+    """
+    다음 버전 번호를 계산합니다.
+    """
+    latest_number = (
+        db.query(TextVersion.version_number)
+        .filter(TextVersion.page_id == page_id)
+        .order_by(TextVersion.version_number.desc())
+        .limit(1)
+        .scalar()
+    )
+    return (latest_number or 0) + 1
+def create_text_version(
+    db: Session,
+    page: Page,
+    content: str,
+    *,
+    version_type: str = "auto_formatted",
+    user_id: Optional[int] = None,
+    commit: bool = False,
+) -> TextVersion:
+    """
+    텍스트 버전을 생성하고 is_current 플래그를 관리합니다.
+    """
+    _deactivate_existing_versions(db, page.page_id)
+    next_number = _get_next_version_number(db, page.page_id)
+    version = TextVersion(
+        page_id=page.page_id,
+        user_id=user_id,
+        content=content,
+        version_number=next_number,
+        version_type=version_type,
+        is_current=True,
+    )
+    db.add(version)
+    db.flush()
+    db.refresh(version)
+    if commit:
+        db.commit()
+    logger.info(
+        "텍스트 버전 생성 완료: page_id=%s, version_id=%s, number=%s, type=%s",
+        page.page_id,
+        version.version_id,
+        next_number,
+        version_type,
+    )
+    return version
+def _serialize_version(version: TextVersion) -> Dict[str, Any]:
+    return {
+        "page_id": version.page_id,
+        "version_id": version.version_id,
+        "version_type": version.version_type,
+        "is_current": version.is_current,
+        "content": version.content,
+        "created_at": version.created_at,
+    }
+def get_current_page_text(db: Session, page_id: int) -> Optional[Dict[str, Any]]:
+    """
+    현재(is_current=True) 텍스트 버전을 조회합니다.
+    """
+    page = (
+        db.query(Page)
+        .filter(Page.page_id == page_id)
+        .first()
+    )
+    if not page:
+        raise ValueError(f"페이지 ID {page_id}를 찾을 수 없습니다.")
+    version = (
+        db.query(TextVersion)
+        .filter(
+            TextVersion.page_id == page_id,
+            TextVersion.is_current.is_(True),
+        )
+        .order_by(TextVersion.version_number.desc())
+        .first()
+    )
+    if not version:
+        logger.warning(
+            "페이지 %s의 현재 텍스트 버전을 찾을 수 없습니다. status=%s",
+            page_id,
+            page.analysis_status,
+        )
+        return None
+    return _serialize_version(version)
+def save_user_edited_version(
+    db: Session,
+    page_id: int,
+    content: str,
+    *,
+    user_id: Optional[int],
+) -> Dict[str, Any]:
+    """
+    사용자 편집 텍스트를 새 버전으로 저장하고 해당 버전을 현재 버전으로 설정합니다.
+    """
+    page = (
+        db.query(Page)
+        .filter(Page.page_id == page_id)
+        .first()
+    )
+    if not page:
+        raise ValueError(f"페이지 ID {page_id}를 찾을 수 없습니다.")
+    version = create_text_version(
+        db,
+        page,
+        content,
+        version_type="user_edited",
+        user_id=user_id,
+        commit=False,
+    )
+    deleted = (
+        db.query(CombinedResult)
+        .filter(CombinedResult.project_id == page.project_id)
+        .delete(synchronize_session=False)
+    )
+    if deleted:
+        logger.info(
+            "CombinedResult 캐시 무효화: project_id=%s, 삭제된 레코드=%s",
+            page.project_id,
+            deleted,
+        )
+    db.commit()
+    db.refresh(version)
+    return _serialize_version(version)
+__all__ = [
+    "create_text_version",
+    "get_current_page_text",
+    "save_user_edited_version",
+]

docker-compose.yml ADDED Viewed

	@@ -0,0 +1,57 @@

+version: '3.8'
+services:
+  mysql:
+    image: mysql:8.0
+    container_name: smart_mysql
+    restart: unless-stopped
+    # 환경 변수
+    environment:
+      MYSQL_ROOT_PASSWORD: ${MYSQL_ROOT_PASSWORD:-1q2w3e4r}
+      MYSQL_DATABASE: ${MYSQL_DATABASE:-smarteyessen_db}
+      # 선택적: 추가 사용자 생성
+      # MYSQL_USER: smarteye_user
+      # MYSQL_PASSWORD: smarteye_pass
+    # 포트 매핑
+    ports:
+      - "${MYSQL_PORT:-3308}:3306"
+    # MySQL 서버 설정 (UTF-8 강제)
+    command:
+      - --character-set-server=utf8mb4
+      - --collation-server=utf8mb4_unicode_ci
+      - --default-authentication-plugin=mysql_native_password
+      - --max-connections=200
+      - --sql-mode=STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION
+    # 데이터 지속성 (Named Volume 사용)
+    volumes:
+      - smart_mysql_data:/var/lib/mysql
+      # 초기화 스크립트 (선택사항)
+      - ./scripts/init_db.sql:/docker-entrypoint-initdb.d/01_init.sql
+      - ./scripts/seed_data.sql:/docker-entrypoint-initdb.d/02_seed.sql
+    # 헬스체크
+    healthcheck:
+      test: ["CMD", "mysqladmin", "ping", "-h", "localhost", "-u", "root", "-p1q2w3e4r"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+    # 네트워크
+    networks:
+      - smarteye_network
+# Named Volume 정의
+volumes:
+  smart_mysql_data:
+    name: smart_mysql_data
+    driver: local
+# 네트워크 정의
+networks:
+  smarteye_network:
+    name: smarteye_network
+    driver: bridge

docs/Backend API 문서/00_개요.md ADDED Viewed

	@@ -0,0 +1,413 @@

+# SmartEyeSsen API 문서 - 개요
+## 📖 목차
+- [시스템 소개](#시스템-소개)
+- [기술 스택](#기술-스택)
+- [시작하기](#시작하기)
+- [인증](#인증)
+- [기본 URL 및 환경 설정](#기본-url-및-환경-설정)
+- [API 문서 구성](#api-문서-구성)
+- [빠른 시작 예제](#빠른-시작-예제)
+---
+## 시스템 소개
+**SmartEyeSsen**은 시각장애 학생을 위한 AI 기반 학습 자료 분석 시스템입니다. PDF 또는 이미지 형태의 학습 자료를 업로드하면, AI가 자동으로 레이아웃을 분석하고 텍스트를 추출하며, 도표와 그림에 대한 설명을 생성합니다.
+### 주요 기능
+#### 📄 다중 페이지 문서 처리
+- **이미지 업로드**: 개별 페이지를 이미지로 업로드
+- **PDF 업로드**: PDF 파일을 자동으로 페이지별로 분할하여 처리
+#### 🤖 AI 레이아웃 분석
+- DocLayout-YOLO 모델을 사용한 자동 레이아웃 감지
+- 문제 번호(question_number), 본문(text), 도표(figure), 표(table) 등 자동 인식
+#### 🔍 OCR 텍스트 추출
+- PaddleOCR 기반 고정밀 텍스트 인식
+- 다국어 지원 (한국어, 영어, 일본어, 중국어)
+#### ✏️ 텍스트 편집 및 버전 관리
+- TinyMCE 편집기 기반 텍스트 수정
+- 자동 버전 관리 (원본, 자동 포맷팅, 사용자 편집)
+#### 🖼️ AI 설명 생성
+- GPT-4-turbo를 활용한 도표/표/순서도 설명 자동 생성
+- 시각장애인을 위한 상세한 설명 제공
+#### 📊 지능형 정렬
+- **문제지(Worksheet)**: 문제 번호 기반 계층적 정렬
+- **일반 문서(Document)**: 좌표 기반 읽기 순서 정렬
+#### 📥 통합 문서 다운로드
+- DOCX 형식으로 전체 내용 다운로드
+- 페이지별 텍스트 통합 및 포맷팅 적용
+---
+## 기술 스택
+### Backend
+- **Framework**: FastAPI 0.104+
+- **Database**: MySQL 8.0
+- **ORM**: SQLAlchemy 2.0
+- **Validation**: Pydantic v2
+- **문서화**: Swagger/OpenAPI 3.0
+### AI Models
+- **Layout Detection**: DocLayout-YOLO
+- **OCR**: PaddleOCR (한국어/영어/중국어/일본어)
+- **AI Description**: GPT-4-turbo (OpenAI)
+### Document Processing
+- **PDF**: PyMuPDF (fitz)
+- **Image**: OpenCV, Pillow
+- **Word**: python-docx
+---
+## 시작하기
+### 1. 백엔드 서버 실행
+#### 환경 변수 설정
+`.env` 파일을 생성하여 다음 내용을 설정하세요:
+```env
+# 데이터베이스 설정
+DB_HOST=localhost
+DB_PORT=3306
+DB_USER=your_username
+DB_PASSWORD=your_password
+DB_NAME=smarteyessen_db
+# API 서버 설정
+API_HOST=0.0.0.0
+API_PORT=8000
+CORS_ORIGINS=http://localhost:3000,http://localhost:8080
+# OpenAI API (선택 사항)
+OPENAI_API_KEY=sk-...
+# 환경
+ENVIRONMENT=development
+```
+#### 서버 시작
+**Linux/Mac**:
+```bash
+cd Backend
+chmod +x start_server.sh
+./start_server.sh
+```
+**Windows**:
+```cmd
+cd Backend
+start_server.bat
+```
+서버가 정상적으로 시작되면 다음과 같은 메시지가 표시됩니다:
+```
+🚀 SmartEyeSsen Backend Starting...
+✅ Database connection successful
+✅ Database tables initialized
+✅ SmartEyeSsen Backend Ready!
+📖 API Docs: http://localhost:8000/docs
+```
+### 2. API 문서 확인
+브라우저에서 다음 URL에 접속하여 인터랙티브 API 문서를 확인할 수 있습니다:
+- **Swagger UI**: http://localhost:8000/docs
+- **ReDoc**: http://localhost:8000/redoc
+- **OpenAPI JSON**: http://localhost:8000/openapi.json
+### 3. 헬스 체크
+서버가 정상적으로 작동하는지 확인:
+```bash
+curl http://localhost:8000/health
+```
+**응답 예시**:
+```json
+{
+  "status": "healthy",
+  "database": "connected",
+  "api_version": "1.0.0"
+}
+```
+---
+## 인증
+**현재 버전 (v1.0.1)에서는 인증이 필요하지 않습니다.**
+향후 버전에서는 다음과 같은 인증 방식이 추가될 예정입니다:
+- JWT (JSON Web Token) 기반 인증
+- OAuth 2.0 (Google, Naver 등)
+- API Key 기반 인증
+---
+## 기본 URL 및 환경 설정
+### Base URL
+**개발 환경**:
+```
+http://localhost:8000
+```
+**프로덕션 환경** (배포 후):
+```
+https://api.smarteyessen.com
+```
+### API Endpoint 구조
+모든 API 엔드포인트는 `/api` 접두사를 사용합니다:
+```
+/api/projects       # 프로젝트 관리
+/api/pages          # 페이지 관리
+/api/analysis       # 분석 관련
+```
+### CORS 설정
+프론트엔드에서 API를 호출하려면 CORS 설정이 필요합니다. `.env` 파일의 `CORS_ORIGINS`에 프론트엔드 URL을 추가하세요:
+```env
+CORS_ORIGINS=http://localhost:3000,http://localhost:8080,https://yourdomain.com
+```
+---
+## API 문서 구성
+이 API 문서는 다음과 같이 구성되어 있습니다:
+| 문서 | 내용 |
+|------|------|
+| [00_개요.md](./00_개요.md) | 시스템 소개, 시작하기, 인증 (현재 문서) |
+| [01_프로젝트_API.md](./01_프로젝트_API.md) | 프로젝트 생성, 조회, 수정, 삭제 |
+| [02_페이지_API.md](./02_페이지_API.md) | 이미지/PDF 업로드, 페이지 조회, 텍스트 관리 |
+| [03_분석_API.md](./03_분석_API.md) | 배치 분석, 비동기 분석, 작업 상태 조회 |
+| [04_다운로드_API.md](./04_다운로드_API.md) | 통합 텍스트 조회, Word 문서 다운로드 |
+| [05_데이터_모델.md](./05_데이터_모델.md) | 스키마 및 Enum 정의 |
+| [06_에러_처리.md](./06_에러_처리.md) | HTTP 상태 코드, 에러 응답 형식 |
+---
+## 빠른 시작 예제
+다음은 프론트엔드에서 SmartEyeSsen API를 사용하는 기본적인 흐름입니다.
+### 1️⃣ 프로젝트 생성
+```javascript
+const createProject = async () => {
+  const response = await fetch('http://localhost:8000/api/projects', {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      project_name: '수학 문제집 1단원',
+      doc_type_id: 1,  // 1: worksheet, 2: document
+      analysis_mode: 'auto',
+      user_id: 1
+    })
+  });
+  const project = await response.json();
+  console.log('프로젝트 생성:', project);
+  return project;
+};
+```
+### 2️⃣ PDF 업로드
+```javascript
+const uploadPDF = async (projectId, pdfFile) => {
+  const formData = new FormData();
+  formData.append('project_id', projectId);
+  formData.append('file', pdfFile);
+  const response = await fetch('http://localhost:8000/api/pages/upload', {
+    method: 'POST',
+    body: formData
+  });
+  const result = await response.json();
+  console.log(`${result.total_created}개 페이지 생성됨`);
+  return result;
+};
+```
+### 3️⃣ 프로젝트 분석 (비동기)
+```javascript
+const analyzeProject = async (projectId) => {
+  const response = await fetch(`http://localhost:8000/api/projects/${projectId}/analyze`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      use_ai_descriptions: true,
+      api_key: 'sk-...'  // OpenAI API Key (선택)
+    })
+  });
+  const result = await response.json();
+  console.log('분석 결과:', result);
+  return result;
+};
+```
+### 4️⃣ 페이지 텍스트 조회
+```javascript
+const getPageText = async (pageId) => {
+  const response = await fetch(`http://localhost:8000/api/pages/${pageId}/text`);
+  const textData = await response.json();
+  console.log('페이지 텍스트:', textData.content);
+  return textData;
+};
+```
+### 5️⃣ 사용자 편집 텍스트 저장
+```javascript
+const saveEditedText = async (pageId, editedContent, userId) => {
+  const response = await fetch(`http://localhost:8000/api/pages/${pageId}/text`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      content: editedContent,
+      user_id: userId
+    })
+  });
+  const result = await response.json();
+  console.log('텍스트 저장 완료:', result);
+  return result;
+};
+```
+### 6️⃣ Word 문서 다운로드
+```javascript
+const downloadDocument = async (projectId) => {
+  const response = await fetch(`http://localhost:8000/api/projects/${projectId}/download`, {
+    method: 'POST'
+  });
+  const blob = await response.blob();
+  const url = window.URL.createObjectURL(blob);
+  const a = document.createElement('a');
+  a.href = url;
+  a.download = `project_${projectId}.docx`;
+  document.body.appendChild(a);
+  a.click();
+  a.remove();
+  window.URL.revokeObjectURL(url);
+};
+```
+---
+## React/Axios 예제
+Axios를 사용하는 경우:
+```javascript
+import axios from 'axios';
+// API 클라이언트 생성
+const apiClient = axios.create({
+  baseURL: 'http://localhost:8000',
+  headers: {
+    'Content-Type': 'application/json',
+  }
+});
+// 프로젝트 생성
+const createProject = async (projectData) => {
+  try {
+    const response = await apiClient.post('/api/projects', projectData);
+    return response.data;
+  } catch (error) {
+    console.error('프로젝트 생성 실패:', error.response?.data);
+    throw error;
+  }
+};
+// PDF 업로드
+const uploadPDF = async (projectId, pdfFile) => {
+  const formData = new FormData();
+  formData.append('project_id', projectId);
+  formData.append('file', pdfFile);
+  try {
+    const response = await apiClient.post('/api/pages/upload', formData, {
+      headers: {
+        'Content-Type': 'multipart/form-data',
+      }
+    });
+    return response.data;
+  } catch (error) {
+    console.error('PDF 업로드 실패:', error.response?.data);
+    throw error;
+  }
+};
+// 프로젝트 분석
+const analyzeProject = async (projectId, useAI = true, apiKey = null) => {
+  try {
+    const response = await apiClient.post(`/api/projects/${projectId}/analyze`, {
+      use_ai_descriptions: useAI,
+      api_key: apiKey
+    });
+    return response.data;
+  } catch (error) {
+    console.error('분석 실패:', error.response?.data);
+    throw error;
+  }
+};
+export { createProject, uploadPDF, analyzeProject };
+```
+---
+## 다음 단계
+API 사용 방법을 더 자세히 알아보려면 다음 문서를 참고하세요:
+1. **[프로젝트 API](./01_프로젝트_API.md)**: 프로젝트 생성, 조회, 수정, 삭제 방법
+2. **[페이지 API](./02_페이지_API.md)**: 이미지/PDF 업로드 및 텍스트 관리
+3. **[분석 API](./03_분석_API.md)**: AI 분석 실행 및 작업 상태 조회
+4. **[다운로드 API](./04_다운로드_API.md)**: 통합 텍스트 및 문서 다운로드
+5. **[데이터 모델](./05_데이터_모델.md)**: 상세한 스키마 및 Enum 정의
+6. **[에러 처리](./06_에러_처리.md)**: 에러 코드 및 처리 방법
+---
+## 문의 및 지원
+- **GitHub Issues**: [SmartEyeSsen Issues](https://github.com/yourorg/smarteyessen/issues)
+- **이메일**: support@smarteyessen.com
+- **문서 버전**: v1.0.1
+- **최종 수정일**: 2025-01-22

docs/Backend API 문서/01_프로젝트_API.md ADDED Viewed

	@@ -0,0 +1,608 @@

+# 프로젝트 API
+프로젝트는 문서 처리의 최상위 단위입니다. 하나의 프로젝트는 여러 페이지를 포함할 수 있으며, 각 프로젝트는 문서 타입(worksheet 또는 document)을 가집니다.
+## 📖 목차
+- [엔드포인트 목록](#엔드포인트-목록)
+- [1. 프로젝트 생성](#1-프로젝트-생성)
+- [2. 프로젝트 목록 조회](#2-프로젝트-목록-조회)
+- [3. 프로젝트 상세 조회](#3-프로젝트-상세-조회)
+- [4. 프로젝트 수정](#4-프로젝트-수정)
+- [5. 프로젝트 삭제](#5-프로젝트-삭제)
+---
+## 엔드포인트 목록
+| Method | Endpoint | 설명 |
+|--------|----------|------|
+| POST | `/api/projects` | 새 프로젝트 생성 |
+| GET | `/api/projects` | 프로젝트 목록 조회 (페이지네이션 지원) |
+| GET | `/api/projects/{project_id}` | 프로젝트 상세 조회 (페이지 포함) |
+| PATCH | `/api/projects/{project_id}` | 프로젝트 정보 수정 |
+| DELETE | `/api/projects/{project_id}` | 프로젝트 삭제 (cascade) |
+---
+## 1. 프로젝트 생성
+새로운 프로젝트를 생성합니다.
+### Endpoint
+```
+POST /api/projects
+```
+### Request Body
+```json
+{
+  "project_name": "수학 문제집 1단원",
+  "doc_type_id": 1,
+  "analysis_mode": "auto",
+  "user_id": 1
+}
+```
+**필드 설명**:
+| 필드 | 타입 | 필수 | 설명 |
+|------|------|------|------|
+| `project_name` | string | ✅ | 프로젝트 이름 (1~255자) |
+| `doc_type_id` | integer | ✅ | 문서 타입 ID<br>- `1`: worksheet (문제지)<br>- `2`: document (일반 문서) |
+| `analysis_mode` | string | ❌ | 분석 모드 (기본값: `auto`)<br>- `auto`: 자동 분석<br>- `manual`: 수동 분석<br>- `hybrid`: 하이브리드 |
+| `user_id` | integer | ✅ | 사용자 ID |
+### Response
+**HTTP 201 Created**
+```json
+{
+  "project_id": 1,
+  "user_id": 1,
+  "doc_type_id": 1,
+  "project_name": "수학 문제집 1단원",
+  "total_pages": 0,
+  "analysis_mode": "auto",
+  "status": "created",
+  "created_at": "2025-01-22T10:30:00",
+  "updated_at": "2025-01-22T10:30:00"
+}
+```
+**응답 필드**:
+| 필드 | 타입 | 설명 |
+|------|------|------|
+| `project_id` | integer | 생성된 프로젝트 고유 ID |
+| `user_id` | integer | 소유자 사용자 ID |
+| `doc_type_id` | integer | 문서 타입 ID |
+| `project_name` | string | 프로젝트 이름 |
+| `total_pages` | integer | 총 페이지 수 (초기값: 0) |
+| `analysis_mode` | string | 분석 모드 |
+| `status` | string | 프로젝트 상태<br>- `created`: 생성됨<br>- `in_progress`: 진행 중<br>- `completed`: 완료<br>- `error`: 오류 |
+| `created_at` | datetime | 생성일시 |
+| `updated_at` | datetime | 수정일시 |
+### 예제 코드
+**JavaScript (fetch)**:
+```javascript
+const createProject = async (projectData) => {
+  const response = await fetch('http://localhost:8000/api/projects', {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      project_name: projectData.name,
+      doc_type_id: projectData.docType,
+      analysis_mode: 'auto',
+      user_id: projectData.userId
+    })
+  });
+  if (!response.ok) {
+    throw new Error(`프로젝트 생성 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+createProject({
+  name: '수학 문제집 1단원',
+  docType: 1,
+  userId: 1
+}).then(project => {
+  console.log('프로젝트 생성 완료:', project.project_id);
+});
+```
+**React + Axios**:
+```javascript
+import axios from 'axios';
+const apiClient = axios.create({
+  baseURL: 'http://localhost:8000',
+});
+const createProject = async (name, docTypeId, userId) => {
+  try {
+    const response = await apiClient.post('/api/projects', {
+      project_name: name,
+      doc_type_id: docTypeId,
+      analysis_mode: 'auto',
+      user_id: userId
+    });
+    return response.data;
+  } catch (error) {
+    console.error('프로젝트 생성 실패:', error.response?.data);
+    throw error;
+  }
+};
+// 사용 예시
+createProject('수학 문제집 1단원', 1, 1)
+  .then(project => console.log('생성됨:', project))
+  .catch(error => console.error('에러:', error));
+```
+---
+## 2. 프로젝트 목록 조회
+사용자의 프로젝트 목록을 조회합니다. 페이지네이션을 지원합니다.
+### Endpoint
+```
+GET /api/projects
+```
+### Query Parameters
+| 파라미터 | 타입 | 필수 | 설명 |
+|----------|------|------|------|
+| `user_id` | integer | ❌ | 특정 사용자의 프로젝트만 필터링 |
+| `skip` | integer | ❌ | 건너뛸 개수 (기본값: 0) |
+| `limit` | integer | ❌ | 조회할 개수 (기본값: 100, 최대: 1000) |
+### Response
+**HTTP 200 OK**
+```json
+[
+  {
+    "project_id": 1,
+    "user_id": 1,
+    "doc_type_id": 1,
+    "project_name": "수학 문제집 1단원",
+    "total_pages": 5,
+    "analysis_mode": "auto",
+    "status": "completed",
+    "created_at": "2025-01-22T10:30:00",
+    "updated_at": "2025-01-22T10:35:00"
+  },
+  {
+    "project_id": 2,
+    "user_id": 1,
+    "doc_type_id": 2,
+    "project_name": "역사 교과서",
+    "total_pages": 10,
+    "analysis_mode": "auto",
+    "status": "in_progress",
+    "created_at": "2025-01-22T11:00:00",
+    "updated_at": "2025-01-22T11:05:00"
+  }
+]
+```
+### 예제 코드
+**JavaScript (fetch)**:
+```javascript
+const getProjects = async (userId = null, skip = 0, limit = 100) => {
+  const params = new URLSearchParams();
+  if (userId) params.append('user_id', userId);
+  params.append('skip', skip);
+  params.append('limit', limit);
+  const response = await fetch(`http://localhost:8000/api/projects?${params}`);
+  if (!response.ok) {
+    throw new Error(`프로젝트 조회 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+getProjects(1, 0, 20).then(projects => {
+  console.log(`${projects.length}개 프로젝트 조회됨`);
+  projects.forEach(p => {
+    console.log(`- ${p.project_name} (${p.total_pages}페이지)`);
+  });
+});
+```
+**React Component 예제**:
+```jsx
+import React, { useState, useEffect } from 'react';
+import axios from 'axios';
+function ProjectList({ userId }) {
+  const [projects, setProjects] = useState([]);
+  const [loading, setLoading] = useState(true);
+  useEffect(() => {
+    const fetchProjects = async () => {
+      try {
+        const response = await axios.get('http://localhost:8000/api/projects', {
+          params: { user_id: userId, limit: 50 }
+        });
+        setProjects(response.data);
+      } catch (error) {
+        console.error('프로젝트 조회 실패:', error);
+      } finally {
+        setLoading(false);
+      }
+    };
+    fetchProjects();
+  }, [userId]);
+  if (loading) return <div>로딩 중...</div>;
+  return (
+    <div>
+      <h2>내 프로젝트 ({projects.length}개)</h2>
+      <ul>
+        {projects.map(project => (
+          <li key={project.project_id}>
+            {project.project_name} - {project.total_pages}페이지
+            <span className={`status-${project.status}`}>
+              {project.status}
+            </span>
+          </li>
+        ))}
+      </ul>
+    </div>
+  );
+}
+```
+---
+## 3. 프로젝트 상세 조회
+프로젝트의 상세 정보를 페이지 목록과 함께 조회합니다.
+### Endpoint
+```
+GET /api/projects/{project_id}
+```
+### Path Parameters
+| 파라미터 | 타입 | 설명 |
+|----------|------|------|
+| `project_id` | integer | 조회할 프로젝트 ID |
+### Response
+**HTTP 200 OK**
+```json
+{
+  "project_id": 1,
+  "user_id": 1,
+  "doc_type_id": 1,
+  "project_name": "수학 문제집 1단원",
+  "total_pages": 3,
+  "analysis_mode": "auto",
+  "status": "completed",
+  "created_at": "2025-01-22T10:30:00",
+  "updated_at": "2025-01-22T10:35:00",
+  "pages": [
+    {
+      "page_id": 1,
+      "project_id": 1,
+      "page_number": 1,
+      "image_path": "uploads/project_1_page_1_abc123.png",
+      "image_width": 2480,
+      "image_height": 3508,
+      "analysis_status": "completed",
+      "processing_time": 5.23,
+      "created_at": "2025-01-22T10:31:00",
+      "analyzed_at": "2025-01-22T10:35:00"
+    },
+    {
+      "page_id": 2,
+      "project_id": 1,
+      "page_number": 2,
+      "image_path": "uploads/project_1_page_2_def456.png",
+      "image_width": 2480,
+      "image_height": 3508,
+      "analysis_status": "completed",
+      "processing_time": 4.87,
+      "created_at": "2025-01-22T10:32:00",
+      "analyzed_at": "2025-01-22T10:36:00"
+    }
+  ]
+}
+```
+### 예제 코드
+**JavaScript (fetch)**:
+```javascript
+const getProjectDetail = async (projectId) => {
+  const response = await fetch(`http://localhost:8000/api/projects/${projectId}`);
+  if (!response.ok) {
+    if (response.status === 404) {
+      throw new Error('프로젝트를 찾을 수 없습니다.');
+    }
+    throw new Error(`프로젝트 조회 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+getProjectDetail(1).then(project => {
+  console.log(`프로젝트: ${project.project_name}`);
+  console.log(`페이지 수: ${project.pages.length}`);
+  project.pages.forEach(page => {
+    console.log(`- 페이지 ${page.page_number}: ${page.analysis_status}`);
+  });
+});
+```
+---
+## 4. 프로젝트 수정
+프로젝트 정보를 수정합니다.
+### Endpoint
+```
+PATCH /api/projects/{project_id}
+```
+### Path Parameters
+| 파라미터 | 타입 | 설명 |
+|----------|------|------|
+| `project_id` | integer | 수정할 프로젝트 ID |
+### Request Body
+모든 필드는 선택사항(optional)입니다. 수정하려는 필드만 포함하세요.
+```json
+{
+  "project_name": "수학 문제집 1단원 (수정본)",
+  "status": "completed"
+}
+```
+**수정 가능한 필드**:
+| 필드 | 타입 | 설명 |
+|------|------|------|
+| `project_name` | string | 프로젝트 이름 (1~255자) |
+| `doc_type_id` | integer | 문서 타입 ID |
+| `analysis_mode` | string | 분석 모드 (`auto`, `manual`, `hybrid`) |
+| `status` | string | 프로젝트 상태 (`created`, `in_progress`, `completed`, `error`) |
+### Response
+**HTTP 200 OK**
+```json
+{
+  "project_id": 1,
+  "user_id": 1,
+  "doc_type_id": 1,
+  "project_name": "수학 문제집 1단원 (수정본)",
+  "total_pages": 3,
+  "analysis_mode": "auto",
+  "status": "completed",
+  "created_at": "2025-01-22T10:30:00",
+  "updated_at": "2025-01-22T14:20:00"
+}
+```
+### 예제 코드
+**JavaScript (fetch)**:
+```javascript
+const updateProject = async (projectId, updates) => {
+  const response = await fetch(`http://localhost:8000/api/projects/${projectId}`, {
+    method: 'PATCH',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify(updates)
+  });
+  if (!response.ok) {
+    throw new Error(`프로젝트 수정 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+updateProject(1, {
+  project_name: '수학 문제집 1단원 (최종)',
+  status: 'completed'
+}).then(project => {
+  console.log('프로젝트 수정 완료:', project);
+});
+```
+---
+## 5. 프로젝트 삭제
+프로젝트를 삭제합니다. **프로젝트 삭제 시 관련된 모든 페이지, 레이아웃 요소, 텍스트 등이 함께 삭제됩니다 (CASCADE).**
+### Endpoint
+```
+DELETE /api/projects/{project_id}
+```
+### Path Parameters
+| 파라미터 | 타입 | 설명 |
+|----------|------|------|
+| `project_id` | integer | 삭제할 프로젝트 ID |
+### Response
+**HTTP 204 No Content**
+응답 본문(body)이 없습니다.
+### 예제 코드
+**JavaScript (fetch)**:
+```javascript
+const deleteProject = async (projectId) => {
+  const response = await fetch(`http://localhost:8000/api/projects/${projectId}`, {
+    method: 'DELETE'
+  });
+  if (!response.ok) {
+    if (response.status === 404) {
+      throw new Error('프로젝트를 찾을 수 없습니다.');
+    }
+    throw new Error(`프로젝트 삭제 실패: ${response.status}`);
+  }
+  return true;
+};
+// 사용 예시 (확인 다이얼로그 포함)
+const handleDeleteProject = async (projectId, projectName) => {
+  const confirmed = confirm(`"${projectName}" 프로젝트를 삭제하시겠습니까?\n모든 페이지와 데이터가 함께 삭제됩니다.`);
+  if (confirmed) {
+    try {
+      await deleteProject(projectId);
+      alert('프로젝트가 삭제되었습니다.');
+      // 목록 새로고침 등
+    } catch (error) {
+      alert('프로젝트 삭제 실패: ' + error.message);
+    }
+  }
+};
+```
+**React Component 예제**:
+```jsx
+import React, { useState } from 'react';
+import axios from 'axios';
+function ProjectDeleteButton({ projectId, projectName, onDeleted }) {
+  const [deleting, setDeleting] = useState(false);
+  const handleDelete = async () => {
+    if (!confirm(`"${projectName}"을(를) 삭제하시겠습니까?`)) {
+      return;
+    }
+    setDeleting(true);
+    try {
+      await axios.delete(`http://localhost:8000/api/projects/${projectId}`);
+      alert('프로젝트가 삭제되었습니다.');
+      if (onDeleted) onDeleted(projectId);
+    } catch (error) {
+      console.error('삭제 실패:', error);
+      alert('프로젝트 삭제 실패: ' + error.message);
+    } finally {
+      setDeleting(false);
+    }
+  };
+  return (
+    <button
+      onClick={handleDelete}
+      disabled={deleting}
+      className="btn btn-danger"
+    >
+      {deleting ? '삭제 중...' : '삭제'}
+    </button>
+  );
+}
+```
+---
+## 에러 응답
+모든 프로젝트 API는 다음과 같은 에러 응답을 반환할 수 있습니다:
+### 400 Bad Request
+요청 데이터가 유효하지 않은 경우
+```json
+{
+  "error": "Validation Error",
+  "detail": "project_name은 1자 이상이어야 합니다.",
+  "status_code": 400
+}
+```
+### 404 Not Found
+프로젝트를 찾을 수 없는 경우
+```json
+{
+  "error": "프로젝트를 찾을 수 없습니다.",
+  "status_code": 404
+}
+```
+### 500 Internal Server Error
+서버 내부 오류
+```json
+{
+  "error": "Internal Server Error",
+  "detail": "데이터베이스 연결 실패",
+  "status_code": 500
+}
+```
+자세한 에러 처리 방법은 [에러 처리 문서](./06_에러_처리.md)를 참고하세요.
+---
+## 다음 단계
+- **[페이지 API](./02_페이지_API.md)**: 페이지 업로드 및 관리
+- **[분석 API](./03_분석_API.md)**: 프로젝트 분석 실행
+- **[데이터 모델](./05_데이터_모델.md)**: 프로젝트 스키마 상세 정보

docs/Backend API 문서/02_페이지_API.md ADDED Viewed

	@@ -0,0 +1,772 @@

+# 페이지 API
+페이지는 프로젝트를 구성하는 개별 문서 페이지입니다. 이미지 또는 PDF 형식으로 업로드할 수 있으며, 각 페이지는 레이아웃 분석 및 OCR 처리 대상이 됩니다.
+## 📖 목차
+- [엔드포인트 목록](#엔드포인트-목록)
+- [1. 페이지 업로드 (이미지/PDF)](#1-페이지-업로드-이미지pdf)
+- [2. 페이지 상세 조회](#2-페이지-상세-조회)
+- [3. 프로젝트 페이지 목록 조회](#3-프로젝트-페이지-목록-조회)
+- [4. 페이지 텍스트 조회](#4-페이지-텍스트-조회)
+- [5. 페이지 텍스트 저장 (사용자 편집)](#5-페이지-텍스트-저장-사용자-편집)
+---
+## 엔드포인트 목록
+| Method | Endpoint | 설명 |
+|--------|----------|------|
+| POST | `/api/pages/upload` | 이미지 또는 PDF 업로드 |
+| GET | `/api/pages/{page_id}` | 페이지 상세 조회 |
+| GET | `/api/pages/project/{project_id}` | 프로젝트의 모든 페이지 조회 |
+| GET | `/api/pages/{page_id}/text` | 페이지 텍스트 조회 (최신 버전) |
+| POST | `/api/pages/{page_id}/text` | 사용자 편집 텍스트 저장 |
+---
+## 1. 페이지 업로드 (이미지/PDF)
+이미지 파일 또는 PDF 파일을 업로드하여 페이지를 생성합니다.
+### Endpoint
+```http
+POST /api/pages/upload
+```
+### Request
+**Content-Type**: `multipart/form-data`
+#### 이미지 업로드 (단일 페이지)
+| 필드 | 타입 | 필수 | 설명 |
+|------|------|------|------|
+| `project_id` | integer | ✅ | 프로젝트 ID |
+| `page_number` | integer | ✅ | 페이지 번호 (1부터 시작) |
+| `file` | file | ✅ | 이미지 파일 (PNG, JPG, JPEG) |
+#### PDF 업로드 (다중 페이지 자동 생성)
+| 필드 | 타입 | 필수 | 설명 |
+|------|------|------|------|
+| `project_id` | integer | ✅ | 프로젝트 ID |
+| `file` | file | ✅ | PDF 파일 |
+| `page_number` | integer | ❌ | 시작 페이지 번호 (선택, 기본값: 자동 계산) |
+### Response
+#### 이미지 업로드 응답
+**HTTP 201 Created**
+```json
+{
+  "page_id": 1,
+  "project_id": 1,
+  "page_number": 1,
+  "image_path": "uploads/project_1_page_1_abc123.png",
+  "image_width": 2480,
+  "image_height": 3508,
+  "analysis_status": "pending",
+  "processing_time": null,
+  "created_at": "2025-01-22T10:31:00",
+  "analyzed_at": null
+}
+```
+#### PDF 업로드 응답
+**HTTP 201 Created**
+```json
+{
+  "project_id": 1,
+  "total_created": 5,
+  "source_type": "pdf",
+  "pages": [
+    {
+      "page_id": 1,
+      "project_id": 1,
+      "page_number": 1,
+      "image_path": "uploads/3/page_1.png",
+      "image_width": 2480,
+      "image_height": 3508,
+      "analysis_status": "pending",
+      "processing_time": null,
+      "created_at": "2025-01-22T10:31:00",
+      "analyzed_at": null
+    },
+    {
+      "page_id": 2,
+      "project_id": 1,
+      "page_number": 2,
+      "image_path": "uploads/3/page_2.png",
+      "image_width": 2480,
+      "image_height": 3508,
+      "analysis_status": "pending",
+      "processing_time": null,
+      "created_at": "2025-01-22T10:31:01",
+      "analyzed_at": null
+    }
+    // ... 나머지 페이지들
+  ]
+}
+```
+**응답 필드**:
+| 필드 | 타입 | 설명 |
+|------|------|------|
+| `page_id` | integer | 생성된 페이지 고유 ID |
+| `project_id` | integer | 소속 프로젝트 ID |
+| `page_number` | integer | 페이지 번호 |
+| `image_path` | string | 저장된 이미지 파일 경로 |
+| `image_width` | integer | 이미지 너비 (픽셀) |
+| `image_height` | integer | 이미지 높이 (픽셀) |
+| `analysis_status` | string | 분석 상태 (`pending`, `processing`, `completed`, `error`) |
+| `processing_time` | float | 처리 시간 (초, 분석 완료 후 설정) |
+| `created_at` | datetime | 페이지 생성일시 |
+| `analyzed_at` | datetime | 분석 완료일시 |
+### 예제 코드
+#### JavaScript - 이미지 업로드
+```javascript
+const uploadImage = async (projectId, pageNumber, imageFile) => {
+  const formData = new FormData();
+  formData.append('project_id', projectId);
+  formData.append('page_number', pageNumber);
+  formData.append('file', imageFile);
+  const response = await fetch('http://localhost:8000/api/pages/upload', {
+    method: 'POST',
+    body: formData
+  });
+  if (!response.ok) {
+    throw new Error(`이미지 업로드 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+const fileInput = document.getElementById('imageFile');
+const imageFile = fileInput.files[0];
+uploadImage(1, 1, imageFile)
+  .then(page => {
+    console.log('페이지 생성됨:', page.page_id);
+    console.log('이미지 크기:', page.image_width, 'x', page.image_height);
+  })
+  .catch(error => console.error('업로드 실패:', error));
+```
+#### JavaScript - PDF 업로드
+```javascript
+const uploadPDF = async (projectId, pdfFile) => {
+  const formData = new FormData();
+  formData.append('project_id', projectId);
+  formData.append('file', pdfFile);
+  const response = await fetch('http://localhost:8000/api/pages/upload', {
+    method: 'POST',
+    body: formData
+  });
+  if (!response.ok) {
+    throw new Error(`PDF 업로드 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+const fileInput = document.getElementById('pdfFile');
+const pdfFile = fileInput.files[0];
+uploadPDF(1, pdfFile)
+  .then(result => {
+    console.log(`PDF 업로드 완료: ${result.total_created}개 페이지 생성됨`);
+    result.pages.forEach((page, index) => {
+      console.log(`  페이지 ${index + 1}: ${page.image_path}`);
+    });
+  })
+  .catch(error => console.error('업로드 실패:', error));
+```
+#### React Component - 파일 업로드
+```jsx
+import React, { useState } from 'react';
+import axios from 'axios';
+function FileUploader({ projectId, onUploadComplete }) {
+  const [uploading, setUploading] = useState(false);
+  const [progress, setProgress] = useState(0);
+  const handleFileUpload = async (event) => {
+    const file = event.target.files[0];
+    if (!file) return;
+    const formData = new FormData();
+    formData.append('project_id', projectId);
+    formData.append('file', file);
+    // 이미지 파일인 경우 page_number 필요
+    if (file.type.startsWith('image/')) {
+      const pageNumber = prompt('페이지 번호를 입력하세요:');
+      if (!pageNumber) return;
+      formData.append('page_number', pageNumber);
+    }
+    setUploading(true);
+    try {
+      const response = await axios.post(
+        'http://localhost:8000/api/pages/upload',
+        formData,
+        {
+          headers: { 'Content-Type': 'multipart/form-data' },
+          onUploadProgress: (progressEvent) => {
+            const percentCompleted = Math.round(
+              (progressEvent.loaded * 100) / progressEvent.total
+            );
+            setProgress(percentCompleted);
+          }
+        }
+      );
+      if (response.data.source_type === 'pdf') {
+        alert(`${response.data.total_created}개 페이지가 생성되었습니다.`);
+      } else {
+        alert('페이지가 생성되었습니다.');
+      }
+      if (onUploadComplete) onUploadComplete(response.data);
+    } catch (error) {
+      console.error('업로드 실패:', error);
+      alert('파일 업로드 실패: ' + error.message);
+    } finally {
+      setUploading(false);
+      setProgress(0);
+    }
+  };
+  return (
+    <div>
+      <input
+        type="file"
+        accept=".pdf,.png,.jpg,.jpeg"
+        onChange={handleFileUpload}
+        disabled={uploading}
+      />
+      {uploading && (
+        <div>
+          <progress value={progress} max="100" />
+          <span>{progress}%</span>
+        </div>
+      )}
+    </div>
+  );
+}
+```
+---
+## 2. 페이지 상세 조회
+특정 페이지의 상세 정보를 조회합니다.
+### Endpoint
+```http
+GET /api/pages/{page_id}
+```
+### Path Parameters
+| 파라미터 | 타입 | 설명 |
+|----------|------|------|
+| `page_id` | integer | 조회할 페이지 ID |
+### Response
+**HTTP 200 OK**
+```json
+{
+  "page_id": 1,
+  "project_id": 1,
+  "page_number": 1,
+  "image_path": "uploads/project_1_page_1_abc123.png",
+  "image_width": 2480,
+  "image_height": 3508,
+  "analysis_status": "completed",
+  "processing_time": 5.23,
+  "created_at": "2025-01-22T10:31:00",
+  "analyzed_at": "2025-01-22T10:35:00"
+}
+```
+### 예제 코드
+```javascript
+const getPageDetail = async (pageId) => {
+  const response = await fetch(`http://localhost:8000/api/pages/${pageId}`);
+  if (!response.ok) {
+    if (response.status === 404) {
+      throw new Error('페이지를 찾을 수 없습니다.');
+    }
+    throw new Error(`페이지 조회 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+getPageDetail(1).then(page => {
+  console.log('페이지:', page.page_number);
+  console.log('분석 상태:', page.analysis_status);
+  console.log('처리 시간:', page.processing_time, '초');
+});
+```
+---
+## 3. 프로젝트 페이지 목록 조회
+프로젝트에 속한 모든 페이지를 조회합니다.
+### Endpoint
+```http
+GET /api/pages/project/{project_id}
+```
+### Path Parameters
+| 파라미터 | 타입 | 설명 |
+|----------|------|------|
+| `project_id` | integer | 프로젝트 ID |
+### Query Parameters
+| 파라미터 | 타입 | 필수 | 설명 |
+|----------|------|------|------|
+| `include_error` | boolean | ❌ | 에러 상태 페이지 포함 여부 (기본값: false) |
+### Response
+**HTTP 200 OK**
+```json
+[
+  {
+    "page_id": 1,
+    "project_id": 1,
+    "page_number": 1,
+    "image_path": "uploads/project_1_page_1_abc123.png",
+    "image_width": 2480,
+    "image_height": 3508,
+    "analysis_status": "completed",
+    "processing_time": 5.23,
+    "created_at": "2025-01-22T10:31:00",
+    "analyzed_at": "2025-01-22T10:35:00"
+  },
+  {
+    "page_id": 2,
+    "project_id": 1,
+    "page_number": 2,
+    "image_path": "uploads/project_1_page_2_def456.png",
+    "image_width": 2480,
+    "image_height": 3508,
+    "analysis_status": "completed",
+    "processing_time": 4.87,
+    "created_at": "2025-01-22T10:32:00",
+    "analyzed_at": "2025-01-22T10:36:00"
+  }
+]
+```
+### 예제 코드
+```javascript
+const getProjectPages = async (projectId, includeError = false) => {
+  const params = new URLSearchParams({ include_error: includeError });
+  const response = await fetch(
+    `http://localhost:8000/api/pages/project/${projectId}?${params}`
+  );
+  if (!response.ok) {
+    throw new Error(`페이지 조회 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+getProjectPages(1, false).then(pages => {
+  console.log(`총 ${pages.length}개 페이지`);
+  const completedPages = pages.filter(p => p.analysis_status === 'completed');
+  const pendingPages = pages.filter(p => p.analysis_status === 'pending');
+  console.log(`완료: ${completedPages.length}, 대기: ${pendingPages.length}`);
+});
+```
+---
+## 4. 페이지 텍스트 조회
+페이지의 최신 텍스트 버전을 조회합니다. `is_current=True`인 버전이 반환됩니다.
+### Endpoint
+```http
+GET /api/pages/{page_id}/text
+```
+### Path Parameters
+| 파라미터 | 타입 | 설명 |
+|----------|------|------|
+| `page_id` | integer | 페이지 ID |
+### Response
+**HTTP 200 OK**
+```json
+{
+  "page_id": 1,
+  "version_id": 3,
+  "version_type": "user_edited",
+  "is_current": true,
+  "content": "<h2>1. 다음 식을 계산하시오.</h2>\n<p>(1) 3 + 5 = ?</p>\n<p>답: 8</p>",
+  "created_at": "2025-01-22T11:00:00"
+}
+```
+**응답 필드**:
+| 필드 | 타입 | 설명 |
+|------|------|------|
+| `page_id` | integer | 페이지 ID |
+| `version_id` | integer | 텍스트 버전 ID |
+| `version_type` | string | 버전 유형<br>- `original`: 원본 OCR 결과<br>- `auto_formatted`: 자동 포맷팅 적용<br>- `user_edited`: 사용자 편집 |
+| `is_current` | boolean | 현재 버전 여부 (항상 true) |
+| `content` | string | HTML 형식의 텍스트 내용 |
+| `created_at` | datetime | 버전 생성일시 |
+### 예제 코드
+```javascript
+const getPageText = async (pageId) => {
+  const response = await fetch(`http://localhost:8000/api/pages/${pageId}/text`);
+  if (!response.ok) {
+    if (response.status === 404) {
+      throw new Error('페이지 텍스트를 찾을 수 없습니다.');
+    }
+    throw new Error(`텍스트 조회 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+getPageText(1).then(textData => {
+  console.log('버전:', textData.version_type);
+  console.log('내용:', textData.content);
+  // HTML 표시
+  document.getElementById('textEditor').innerHTML = textData.content;
+});
+```
+#### React Component - 텍스트 뷰어
+```jsx
+import React, { useState, useEffect } from 'react';
+import axios from 'axios';
+function PageTextViewer({ pageId }) {
+  const [textData, setTextData] = useState(null);
+  const [loading, setLoading] = useState(true);
+  useEffect(() => {
+    const fetchText = async () => {
+      try {
+        const response = await axios.get(
+          `http://localhost:8000/api/pages/${pageId}/text`
+        );
+        setTextData(response.data);
+      } catch (error) {
+        console.error('텍스트 조회 실패:', error);
+      } finally {
+        setLoading(false);
+      }
+    };
+    fetchText();
+  }, [pageId]);
+  if (loading) return <div>로딩 중...</div>;
+  if (!textData) return <div>텍스트를 찾을 수 없습니다.</div>;
+  return (
+    <div>
+      <div className="text-meta">
+        <span>버전: {textData.version_type}</span>
+        <span>생성: {new Date(textData.created_at).toLocaleString()}</span>
+      </div>
+      <div
+        className="text-content"
+        dangerouslySetInnerHTML={{ __html: textData.content }}
+      />
+    </div>
+  );
+}
+```
+---
+## 5. 페이지 텍스트 저장 (사용자 편집)
+사용자가 편집한 텍스트를 새로운 버전으로 저장합니다.
+### Endpoint
+```http
+POST /api/pages/{page_id}/text
+```
+### Path Parameters
+| 파라미터 | 타입 | 설명 |
+|----------|------|------|
+| `page_id` | integer | 페이지 ID |
+### Request Body
+```json
+{
+  "content": "<h2>1. 다음 식을 계산하시오.</h2>\n<p>(1) 3 + 5 = ?</p>\n<p>답: 8</p>",
+  "user_id": 1
+}
+```
+**필드 설명**:
+| 필드 | 타입 | 필수 | 설명 |
+|------|------|------|------|
+| `content` | string | ✅ | 저장할 텍스트 내용 (HTML 형식) |
+| `user_id` | integer | ❌ | 수정한 사용자 ID (선택) |
+### Response
+**HTTP 200 OK**
+```json
+{
+  "page_id": 1,
+  "version_id": 4,
+  "version_type": "user_edited",
+  "is_current": true,
+  "content": "<h2>1. 다음 식을 계산하시오.</h2>\n<p>(1) 3 + 5 = ?</p>\n<p>답: 8</p>",
+  "created_at": "2025-01-22T11:10:00"
+}
+```
+### 동작 방식
+1. 새로운 텍스트 버전을 생성 (`version_type="user_edited"`)
+2. 기존의 `is_current=True` 버전을 `False`로 변경
+3. 새 버전을 `is_current=True`로 설정
+4. 버전 번호 자동 증가
+### 예제 코드
+```javascript
+const savePageText = async (pageId, content, userId = null) => {
+  const response = await fetch(`http://localhost:8000/api/pages/${pageId}/text`, {
+    method: 'POST',
+    headers: {
+      'Content-Type': 'application/json',
+    },
+    body: JSON.stringify({
+      content: content,
+      user_id: userId
+    })
+  });
+  if (!response.ok) {
+    throw new Error(`텍스트 저장 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+const editorContent = document.getElementById('textEditor').innerHTML;
+savePageText(1, editorContent, 1)
+  .then(result => {
+    console.log('텍스트 저장 완료');
+    console.log('새 버전 ID:', result.version_id);
+    alert('저장되었습니다.');
+  })
+  .catch(error => {
+    console.error('저장 실패:', error);
+    alert('저장 실패: ' + error.message);
+  });
+```
+#### React Component - TinyMCE 편집기
+```jsx
+import React, { useState, useEffect } from 'react';
+import { Editor } from '@tinymce/tinymce-react';
+import axios from 'axios';
+function PageTextEditor({ pageId, userId }) {
+  const [content, setContent] = useState('');
+  const [saving, setSaving] = useState(false);
+  const [versionId, setVersionId] = useState(null);
+  useEffect(() => {
+    const fetchText = async () => {
+      try {
+        const response = await axios.get(
+          `http://localhost:8000/api/pages/${pageId}/text`
+        );
+        setContent(response.data.content);
+        setVersionId(response.data.version_id);
+      } catch (error) {
+        console.error('텍스트 로드 실패:', error);
+      }
+    };
+    fetchText();
+  }, [pageId]);
+  const handleSave = async () => {
+    setSaving(true);
+    try {
+      const response = await axios.post(
+        `http://localhost:8000/api/pages/${pageId}/text`,
+        {
+          content: content,
+          user_id: userId
+        }
+      );
+      setVersionId(response.data.version_id);
+      alert('저장되었습니다.');
+    } catch (error) {
+      console.error('저장 실패:', error);
+      alert('저장 실패: ' + error.message);
+    } finally {
+      setSaving(false);
+    }
+  };
+  return (
+    <div>
+      <div className="editor-header">
+        <span>버전 ID: {versionId}</span>
+        <button onClick={handleSave} disabled={saving}>
+          {saving ? '저장 중...' : '저장'}
+        </button>
+      </div>
+      <Editor
+        apiKey="your-tinymce-api-key"
+        value={content}
+        onEditorChange={setContent}
+        init={{
+          height: 500,
+          menubar: false,
+          plugins: [
+            'advlist', 'autolink', 'lists', 'link', 'image',
+            'charmap', 'preview', 'anchor', 'searchreplace',
+            'visualblocks', 'code', 'fullscreen',
+            'insertdatetime', 'media', 'table', 'help', 'wordcount'
+          ],
+          toolbar: 'undo redo | formatselect | bold italic | ' +
+            'alignleft aligncenter alignright | ' +
+            'bullist numlist outdent indent | help'
+        }}
+      />
+    </div>
+  );
+}
+```
+---
+## 에러 응답
+### 400 Bad Request
+요청 데이터가 유효하지 않은 경우
+```json
+{
+  "error": "이미지 업로드 시 page_number는 필수입니다.",
+  "status_code": 400
+}
+```
+### 404 Not Found
+페이지를 찾을 수 없는 경우
+```json
+{
+  "error": "페이지를 찾을 수 없습니다.",
+  "status_code": 404
+}
+```
+### 413 Payload Too Large
+파일 크기가 너무 큰 경우 (일반적으로 50MB 제한)
+```json
+{
+  "error": "파일 크기가 너무 큽니다.",
+  "detail": "최대 50MB까지 업로드 가능합니다.",
+  "status_code": 413
+}
+```
+### 500 Internal Server Error
+서버 내부 오류
+```json
+{
+  "error": "Internal Server Error",
+  "detail": "이미지 처리 중 오류가 발생했습니다.",
+  "status_code": 500
+}
+```
+---
+## 다음 단계
+- **[분석 API](./03_분석_API.md)**: 업로드한 페이지 분석하기
+- **[다운로드 API](./04_다운로드_API.md)**: 전체 문서 다운로드
+- **[데이터 모델](./05_데이터_모델.md)**: 페이지 스키마 상세 정보

docs/Backend API 문서/03_분석_API.md ADDED Viewed

	@@ -0,0 +1,682 @@

+# 분석 API
+분석 API는 업로드된 페이지에 대해 AI 레이아웃 분석, OCR 텍스트 추출, 정렬, 포맷팅을 수행합니다.
+## 📖 목차
+- [엔드포인트 목록](#엔드포인트-목록)
+- [1. 프로젝트 배치 분석 (동기)](#1-프로젝트-배치-분석-동기)
+- [2. 단일 페이지 비동기 분석](#2-단일-페이지-비동기-분석)
+- [3. 분석 작업 상태 조회](#3-분석-작업-상태-조회)
+- [분석 파이프라인 상세](#분석-파이프라인-상세)
+---
+## 엔드포인트 목록
+| Method | Endpoint | 설명 |
+|--------|----------|------|
+| POST | `/api/projects/{project_id}/analyze` | 프로젝트 전체 배치 분석 (동기) |
+| POST | `/api/pages/{page_id}/analyze/async` | 단일 페이지 비동기 분석 |
+| GET | `/api/analysis/jobs/{job_id}` | 비동기 작업 상태 조회 |
+---
+## 1. 프로젝트 배치 분석 (동기)
+프로젝트 내 모든 `pending` 상태 페이지를 순차적으로 분석합니다.
+### Endpoint
+```http
+POST /api/projects/{project_id}/analyze
+```
+### Path Parameters
+| 파라미터 | 타입 | 설명 |
+|----------|------|------|
+| `project_id` | integer | 분석할 프로젝트 ID |
+### Request Body
+```json
+{
+  "use_ai_descriptions": true,
+  "api_key": "sk-..."
+}
+```
+**필드 설명**:
+| 필드 | 타입 | 필수 | 설명 |
+|------|------|------|------|
+| `use_ai_descriptions` | boolean | ❌ | AI 설명 생성 여부 (기본값: true)<br>figure, table, flowchart에 대한 GPT-4 설명 생성 |
+| `api_key` | string | ❌ | OpenAI API 키 (선택)<br>제공하지 않으면 서버 환경 변수 사용 |
+### Response
+**HTTP 202 Accepted**
+```json
+{
+  "project_id": 1,
+  "status": "completed",
+  "total_pages": 3,
+  "completed_pages": 3,
+  "failed_pages": 0,
+  "total_time": 15.67,
+  "pages": [
+    {
+      "page_id": 1,
+      "page_number": 1,
+      "status": "completed",
+      "layout_count": 12,
+      "ocr_count": 10,
+      "ai_description_count": 2,
+      "processing_time": 5.23,
+      "message": "페이지 분석 완료"
+    },
+    {
+      "page_id": 2,
+      "page_number": 2,
+      "status": "completed",
+      "layout_count": 15,
+      "ocr_count": 13,
+      "ai_description_count": 2,
+      "processing_time": 5.12,
+      "message": "페이지 분석 완료"
+    },
+    {
+      "page_id": 3,
+      "page_number": 3,
+      "status": "completed",
+      "layout_count": 10,
+      "ocr_count": 8,
+      "ai_description_count": 2,
+      "processing_time": 5.32,
+      "message": "페이지 분석 완료"
+    }
+  ]
+}
+```
+**응답 필드**:
+| 필드 | 타입 | 설명 |
+|------|------|------|
+| `project_id` | integer | 프로젝트 ID |
+| `status` | string | 전체 분석 상태 (`completed`, `partial`, `failed`) |
+| `total_pages` | integer | 분석 대상 페이지 수 |
+| `completed_pages` | integer | 성공한 페이지 수 |
+| `failed_pages` | integer | 실패한 페이지 수 |
+| `total_time` | float | 전체 처리 시간 (초) |
+| `pages` | array | 페이지별 분석 결과 |
+**페이지별 결과**:
+| 필드 | 타입 | 설명 |
+|------|------|------|
+| `page_id` | integer | 페이지 ID |
+| `page_number` | integer | 페이지 번호 |
+| `status` | string | 분석 상태 (`completed`, `failed`) |
+| `layout_count` | integer | 감지된 레이아웃 요소 수 |
+| `ocr_count` | integer | OCR 수행된 요소 수 |
+| `ai_description_count` | integer | AI 설명 생성된 요소 수 |
+| `processing_time` | float | 페이지 처리 시간 (초) |
+| `message` | string | 상태 메시지 |
+### 예제 코드
+**JavaScript (fetch)**:
+```javascript
+const analyzeProject = async (projectId, useAI = true, apiKey = null) => {
+  const response = await fetch(
+    `http://localhost:8000/api/projects/${projectId}/analyze`,
+    {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify({
+        use_ai_descriptions: useAI,
+        api_key: apiKey
+      })
+    }
+  );
+  if (!response.ok) {
+    throw new Error(`분석 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+analyzeProject(1, true, 'sk-...')
+  .then(result => {
+    console.log(`분석 완료: ${result.completed_pages}/${result.total_pages} 페이지`);
+    console.log(`총 소요 시간: ${result.total_time.toFixed(2)}초`);
+    result.pages.forEach(page => {
+      console.log(`페이지 ${page.page_number}:`);
+      console.log(`  - 레이아웃: ${page.layout_count}개`);
+      console.log(`  - OCR: ${page.ocr_count}개`);
+      console.log(`  - AI 설명: ${page.ai_description_count}개`);
+      console.log(`  - 시간: ${page.processing_time.toFixed(2)}초`);
+    });
+  })
+  .catch(error => console.error('분석 실패:', error));
+```
+**React Component - 진행률 표시**:
+```jsx
+import React, { useState } from 'react';
+import axios from 'axios';
+function ProjectAnalyzer({ projectId, onComplete }) {
+  const [analyzing, setAnalyzing] = useState(false);
+  const [result, setResult] = useState(null);
+  const handleAnalyze = async () => {
+    setAnalyzing(true);
+    try {
+      const response = await axios.post(
+        `http://localhost:8000/api/projects/${projectId}/analyze`,
+        {
+          use_ai_descriptions: true,
+          api_key: localStorage.getItem('openai_api_key') // 사용자 API 키 사용
+        }
+      );
+      setResult(response.data);
+      if (response.data.status === 'completed') {
+        alert(`분석 완료: ${response.data.completed_pages}/${response.data.total_pages} 페이지`);
+      } else {
+        alert(`일부 실패: ${response.data.failed_pages}개 페이지 실패`);
+      }
+      if (onComplete) onComplete(response.data);
+    } catch (error) {
+      console.error('분석 실패:', error);
+      alert('분석 실패: ' + error.message);
+    } finally {
+      setAnalyzing(false);
+    }
+  };
+  return (
+    <div>
+      <button onClick={handleAnalyze} disabled={analyzing}>
+        {analyzing ? '분석 중...' : 'AI 분석 시작'}
+      </button>
+      {analyzing && (
+        <div className="analyzing-indicator">
+          <div className="spinner"></div>
+          <p>페이지를 분석하고 있습니다. 잠시만 기다려주세요...</p>
+        </div>
+      )}
+      {result && (
+        <div className="analysis-result">
+          <h3>분석 결과</h3>
+          <p>전체 페이지: {result.total_pages}</p>
+          <p>완료: {result.completed_pages}</p>
+          <p>실패: {result.failed_pages}</p>
+          <p>소요 시간: {result.total_time.toFixed(2)}초</p>
+          <table>
+            <thead>
+              <tr>
+                <th>페이지</th>
+                <th>상태</th>
+                <th>레이아웃</th>
+                <th>OCR</th>
+                <th>AI 설명</th>
+                <th>시간</th>
+              </tr>
+            </thead>
+            <tbody>
+              {result.pages.map(page => (
+                <tr key={page.page_id}>
+                  <td>{page.page_number}</td>
+                  <td>{page.status}</td>
+                  <td>{page.layout_count}</td>
+                  <td>{page.ocr_count}</td>
+                  <td>{page.ai_description_count}</td>
+                  <td>{page.processing_time.toFixed(2)}s</td>
+                </tr>
+              ))}
+            </tbody>
+          </table>
+        </div>
+      )}
+    </div>
+  );
+}
+```
+---
+## 2. 단일 페이지 비동기 분석
+단일 페이지를 백그라운드에서 비동기로 분석합니다. 작업 ID를 즉시 반환하고, 작업 상태는 별도로 조회할 수 있습니다.
+### Endpoint
+```http
+POST /api/pages/{page_id}/analyze/async
+```
+### Path Parameters
+| 파라미터 | 타입 | 설명 |
+|----------|------|------|
+| `page_id` | integer | 분석할 페이지 ID |
+### Request Body
+```json
+{
+  "use_ai_descriptions": true,
+  "api_key": "sk-..."
+}
+```
+### Response
+**HTTP 202 Accepted**
+```json
+{
+  "job_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
+  "status": "pending",
+  "message": "페이지 분석 작업이 시작되었습니다.",
+  "page_id": 1,
+  "status_check_url": "/api/analysis/jobs/a1b2c3d4-e5f6-7890-abcd-ef1234567890"
+}
+```
+**응답 필드**:
+| 필드 | 타입 | 설명 |
+|------|------|------|
+| `job_id` | string | 작업 고유 ID (UUID) |
+| `status` | string | 작업 상태 (`pending`) |
+| `message` | string | 상태 메시지 |
+| `page_id` | integer | 분석 중인 페이지 ID |
+| `status_check_url` | string | 작업 상태 조회 URL |
+### 예제 코드
+```javascript
+const analyzePageAsync = async (pageId, useAI = true, apiKey = null) => {
+  const response = await fetch(
+    `http://localhost:8000/api/pages/${pageId}/analyze/async`,
+    {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify({
+        use_ai_descriptions: useAI,
+        api_key: apiKey
+      })
+    }
+  );
+  if (!response.ok) {
+    throw new Error(`비동기 분석 시작 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+analyzePageAsync(1, true)
+  .then(job => {
+    console.log('작업 시작됨:', job.job_id);
+    console.log('상태 조회 URL:', job.status_check_url);
+    // 주기적으로 상태 확인
+    checkJobStatus(job.job_id);
+  })
+  .catch(error => console.error('작업 시작 실패:', error));
+```
+---
+## 3. 분석 작업 상태 조회
+비동기 분석 작업의 현재 상태를 조회합니다.
+### Endpoint
+```http
+GET /api/analysis/jobs/{job_id}
+```
+### Path Parameters
+| 파라미터 | 타입 | 설명 |
+|----------|------|------|
+| `job_id` | string | 작업 ID (UUID) |
+### Response
+#### 작업 대기 중
+**HTTP 200 OK**
+```json
+{
+  "job_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
+  "status": "pending",
+  "page_id": 1,
+  "page_number": 1,
+  "project_id": 1,
+  "result": null,
+  "error": null,
+  "progress": "작업 대기 중..."
+}
+```
+#### 작업 진행 중
+```json
+{
+  "job_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
+  "status": "processing",
+  "page_id": 1,
+  "page_number": 1,
+  "project_id": 1,
+  "result": null,
+  "error": null,
+  "progress": "레이아웃 분석 및 OCR 수행 중..."
+}
+```
+#### 작업 완료
+```json
+{
+  "job_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
+  "status": "completed",
+  "page_id": 1,
+  "page_number": 1,
+  "project_id": 1,
+  "result": {
+    "page_id": 1,
+    "page_number": 1,
+    "layout_count": 12,
+    "ocr_count": 10,
+    "ai_description_count": 2,
+    "processing_time": 5.23,
+    "message": "페이지 분석이 성공적으로 완료되었습니다."
+  },
+  "error": null,
+  "progress": "분석 완료"
+}
+```
+#### 작업 실패
+```json
+{
+  "job_id": "a1b2c3d4-e5f6-7890-abcd-ef1234567890",
+  "status": "failed",
+  "page_id": 1,
+  "page_number": 1,
+  "project_id": 1,
+  "result": null,
+  "error": "이미지 파일을 찾을 수 없습니다.",
+  "progress": "분석 실패"
+}
+```
+### 예제 코드
+**JavaScript - 폴링(Polling)**:
+```javascript
+const checkJobStatus = async (jobId) => {
+  const response = await fetch(
+    `http://localhost:8000/api/analysis/jobs/${jobId}`
+  );
+  if (!response.ok) {
+    throw new Error(`작업 상태 조회 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 주기적으로 상태 확인 (폴링)
+const pollJobStatus = async (jobId, interval = 2000, maxAttempts = 60) => {
+  let attempts = 0;
+  const poll = async () => {
+    if (attempts >= maxAttempts) {
+      throw new Error('작업 조회 시간 초과');
+    }
+    attempts++;
+    const status = await checkJobStatus(jobId);
+    console.log(`[${attempts}] 상태: ${status.status} - ${status.progress}`);
+    if (status.status === 'completed') {
+      console.log('작업 완료!', status.result);
+      return status.result;
+    }
+    if (status.status === 'failed') {
+      throw new Error(`작업 실패: ${status.error}`);
+    }
+    // 계속 대기 중이면 재시도
+    await new Promise(resolve => setTimeout(resolve, interval));
+    return poll();
+  };
+  return poll();
+};
+// 사용 예시
+analyzePageAsync(1, true)
+  .then(job => {
+    console.log('비동기 작업 시작:', job.job_id);
+    return pollJobStatus(job.job_id);
+  })
+  .then(result => {
+    console.log('분석 완료:', result);
+    alert(`분석 완료: 레이아웃 ${result.layout_count}개, OCR ${result.ocr_count}개`);
+  })
+  .catch(error => {
+    console.error('에러:', error);
+    alert('분석 실패: ' + error.message);
+  });
+```
+**React Component - 실시간 상태 표시**:
+```jsx
+import React, { useState, useEffect } from 'react';
+import axios from 'axios';
+function AsyncAnalyzer({ pageId, onComplete }) {
+  const [jobId, setJobId] = useState(null);
+  const [status, setStatus] = useState(null);
+  const [analyzing, setAnalyzing] = useState(false);
+  useEffect(() => {
+    if (!jobId) return;
+    // 2초마다 상태 확인
+    const interval = setInterval(async () => {
+      try {
+        const response = await axios.get(
+          `http://localhost:8000/api/analysis/jobs/${jobId}`
+        );
+        setStatus(response.data);
+        if (response.data.status === 'completed') {
+          clearInterval(interval);
+          setAnalyzing(false);
+          if (onComplete) onComplete(response.data.result);
+        }
+        if (response.data.status === 'failed') {
+          clearInterval(interval);
+          setAnalyzing(false);
+          alert('분석 실패: ' + response.data.error);
+        }
+      } catch (error) {
+        console.error('상태 조회 실패:', error);
+      }
+    }, 2000);
+    return () => clearInterval(interval);
+  }, [jobId, onComplete]);
+  const handleStartAnalysis = async () => {
+    setAnalyzing(true);
+    try {
+      const response = await axios.post(
+        `http://localhost:8000/api/pages/${pageId}/analyze/async`,
+        { use_ai_descriptions: true }
+      );
+      setJobId(response.data.job_id);
+      setStatus(response.data);
+    } catch (error) {
+      console.error('분석 시작 실패:', error);
+      alert('분석 시작 실패: ' + error.message);
+      setAnalyzing(false);
+    }
+  };
+  return (
+    <div>
+      <button onClick={handleStartAnalysis} disabled={analyzing}>
+        {analyzing ? '분석 중...' : '비동기 분석 시작'}
+      </button>
+      {status && (
+        <div className="status-display">
+          <p><strong>작업 ID:</strong> {status.job_id}</p>
+          <p><strong>상태:</strong> {status.status}</p>
+          <p><strong>진행상황:</strong> {status.progress}</p>
+          {status.status === 'completed' && status.result && (
+            <div className="result">
+              <h4>분석 결과</h4>
+              <p>레이아웃: {status.result.layout_count}개</p>
+              <p>OCR: {status.result.ocr_count}개</p>
+              <p>AI 설명: {status.result.ai_description_count}개</p>
+              <p>처리 시간: {status.result.processing_time.toFixed(2)}초</p>
+            </div>
+          )}
+          {status.status === 'failed' && (
+            <div className="error">
+              <p style={{color: 'red'}}>에러: {status.error}</p>
+            </div>
+          )}
+        </div>
+      )}
+    </div>
+  );
+}
+```
+---
+## 분석 파이프라인 상세
+각 페이지 분석은 다음 단계로 진행됩니다:
+### 1단계: 레이아웃 분석 (Layout Detection)
+- **모델**: DocLayout-YOLO
+- **감지 클래스**:
+  - `question_number`: 문제 번호 (worksheet 전용)
+  - `text`: 본문 텍스트
+  - `figure`: 그림/도표
+  - `table`: 표
+  - `flowchart`: 순서도
+  - 등
+### 2단계: OCR 텍스트 추출
+- **엔진**: PaddleOCR
+- **대상**: `text`, `question_number` 등 텍스트 요소
+- **언어**: 한국어, 영어, 중국어, 일본어 지원
+### 3단계: AI 설명 생성 (선택)
+- **모델**: GPT-4-turbo
+- **대상**: `figure`, `table`, `flowchart`
+- **조건**: `use_ai_descriptions=true` 일 때만 수행
+### 4단계: 정렬 (Sorting)
+#### Worksheet (문제지)
+- 문제 번호 기반 그룹화
+- 앵커 요소(문제 번호) 중심으로 자식 요소 수집
+- Y좌표 기준 정렬
+#### Document (일반 문서)
+- 좌표 기반 읽기 순서 정렬
+- Y좌표 우선, X좌표 보조
+### 5단계: 포맷팅 (Formatting)
+- 데이터베이스 포맷팅 규칙 적용
+- 클래스별 접두사/접미사, 들여쓰기 적용
+- HTML 형식으로 변환
+### 6단계: 버전 저장
+- `version_type="auto_formatted"` 버전 생성
+- `is_current=true` 설정
+---
+## 에러 응답
+### 404 Not Found
+프로젝트 또는 페이지를 찾을 수 없음
+```json
+{
+  "error": "프로젝트를 찾을 수 없습니다.",
+  "status_code": 404
+}
+```
+### 500 Internal Server Error
+분석 중 오류 발생
+```json
+{
+  "error": "Internal Server Error",
+  "detail": "레이아웃 분석 중 오류가 발생했습니다.",
+  "status_code": 500
+}
+```
+---
+## 다음 단계
+- **[다운로드 API](./04_다운로드_API.md)**: 분석 결과를 Word 문서로 다운로드
+- **[데이터 모델](./05_데이터_모델.md)**: 분석 관련 스키마 상세 정보

docs/Backend API 문서/04_다운로드_API.md ADDED Viewed

	@@ -0,0 +1,477 @@

+# 다운로드 API
+분석이 완료된 프로젝트의 텍스트를 통합하여 조회하거나 Word 문서로 다운로드할 수 있습니다.
+## 📖 목차
+- [엔드포인트 목록](#엔드포인트-목록)
+- [1. 통합 텍스트 조회](#1-통합-텍스트-조회)
+- [2. Word 문서 다운로드](#2-word-문서-다운로드)
+---
+## 엔드포인트 목록
+| Method | Endpoint | 설명 |
+|--------|----------|------|
+| GET | `/api/projects/{project_id}/combined-text` | 프로젝트 통합 텍스트 조회 (JSON) |
+| POST | `/api/projects/{project_id}/download` | Word 문서 다운로드 (DOCX) |
+---
+## 1. 통합 텍스트 조회
+프로젝트의 모든 페이지 텍스트를 통합하여 조회합니다. 캐시를 사용하므로 빠르게 응답합니다.
+### Endpoint
+```http
+GET /api/projects/{project_id}/combined-text
+```
+### Path Parameters
+| 파라미터 | 타입 | 설명 |
+|----------|------|------|
+| `project_id` | integer | 프로젝트 ID |
+### Response
+**HTTP 200 OK**
+```json
+{
+  "project_id": 1,
+  "project_name": "수학 문제집 1단원",
+  "combined_text": "<h2>1. 다음 식을 계산하시오.</h2>\n<p>(1) 3 + 5 = ?</p>\n<p>답: 8</p>\n\n<h2>2. 다음 그림을 보고 답하시오.</h2>\n<p>[그림 설명] 세 개의 사과가 그려져 있는 그림입니다...</p>",
+  "stats": {
+    "total_pages": 3,
+    "total_words": 450,
+    "total_characters": 2340
+  },
+  "generated_at": "2025-01-22T11:00:00"
+}
+```
+**응답 필드**:
+| 필드 | 타입 | 설명 |
+|------|------|------|
+| `project_id` | integer | 프로젝트 ID |
+| `project_name` | string | 프로젝트 이름 |
+| `combined_text` | string | 전체 페이지의 텍스트를 통합한 HTML 문자열 |
+| `stats` | object | 통계 정보 |
+| `stats.total_pages` | integer | 총 페이지 수 |
+| `stats.total_words` | integer | 총 단어 수 |
+| `stats.total_characters` | integer | 총 문자 수 |
+| `generated_at` | datetime | 통합 텍스트 생성 일시 |
+### 캐시 동작
+- 첫 번째 호출 시 모든 페이지의 최신 텍스트 버전을 수집하여 통합
+- 결과를 `combined_results` 테이블에 캐시
+- 이후 호출 시 캐시된 데이터 반환 (빠른 응답)
+- 페이지 텍스트가 수정되면 자동으로 캐시 갱신
+### 예제 코드
+**JavaScript (fetch)**:
+```javascript
+const getCombinedText = async (projectId) => {
+  const response = await fetch(
+    `http://localhost:8000/api/projects/${projectId}/combined-text`
+  );
+  if (!response.ok) {
+    if (response.status === 404) {
+      throw new Error('프로젝트를 찾을 수 없습니다.');
+    }
+    throw new Error(`통합 텍스트 조회 실패: ${response.status}`);
+  }
+  return await response.json();
+};
+// 사용 예시
+getCombinedText(1).then(data => {
+  console.log('프로젝트:', data.project_name);
+  console.log('페이지 수:', data.stats.total_pages);
+  console.log('단어 수:', data.stats.total_words);
+  // HTML 표시
+  document.getElementById('combined-content').innerHTML = data.combined_text;
+});
+```
+**React Component**:
+```jsx
+import React, { useState, useEffect } from 'react';
+import axios from 'axios';
+function CombinedTextViewer({ projectId }) {
+  const [data, setData] = useState(null);
+  const [loading, setLoading] = useState(true);
+  useEffect(() => {
+    const fetchCombinedText = async () => {
+      try {
+        const response = await axios.get(
+          `http://localhost:8000/api/projects/${projectId}/combined-text`
+        );
+        setData(response.data);
+      } catch (error) {
+        console.error('통합 텍스트 조회 실패:', error);
+      } finally {
+        setLoading(false);
+      }
+    };
+    fetchCombinedText();
+  }, [projectId]);
+  if (loading) return <div>로딩 중...</div>;
+  if (!data) return <div>데이터를 찾을 수 없습니다.</div>;
+  return (
+    <div className="combined-text-viewer">
+      <header>
+        <h1>{data.project_name}</h1>
+        <div className="stats">
+          <span>페이지: {data.stats.total_pages}</span>
+          <span>단어: {data.stats.total_words}</span>
+          <span>문자: {data.stats.total_characters}</span>
+        </div>
+      </header>
+      <div
+        className="content"
+        dangerouslySetInnerHTML={{ __html: data.combined_text }}
+      />
+      <footer>
+        <small>생성 일시: {new Date(data.generated_at).toLocaleString()}</small>
+      </footer>
+    </div>
+  );
+}
+```
+---
+## 2. Word 문서 다운로드
+프로젝트의 통합 텍스트를 Word 문서(DOCX) 형식으로 다운로드합니다.
+### Endpoint
+```http
+POST /api/projects/{project_id}/download
+```
+### Path Parameters
+| 파라미터 | 타입 | 설명 |
+|----------|------|------|
+| `project_id` | integer | 프로젝트 ID |
+### Response
+**HTTP 200 OK**
+**Content-Type**: `application/vnd.openxmlformats-officedocument.wordprocessingml.document`
+**Content-Disposition**: `attachment; filename="project_1_수학_문제집_1단원.docx"`
+응답은 바이너리 스트림으로 Word 문서 파일을 반환합니다.
+### 문서 구조
+생성되는 Word 문서는 다음과 같은 구조를 가집니다:
+1. **제목**: 프로젝트 이름 (Heading 1)
+2. **메타정보**: 총 페이지 수, 생성 일시 등
+3. **본문**: 페이지별로 구분된 내용
+   - 페이지 번호 (Heading 2)
+   - 페이지 내용 (HTML을 Word 형식으로 변환)
+4. **푸터**: 생성 정보
+### 예제 코드
+**JavaScript (fetch)**:
+```javascript
+const downloadDocument = async (projectId) => {
+  const response = await fetch(
+    `http://localhost:8000/api/projects/${projectId}/download`,
+    {
+      method: 'POST'
+    }
+  );
+  if (!response.ok) {
+    throw new Error(`문서 다운로드 실패: ${response.status}`);
+  }
+  // Blob으로 변환
+  const blob = await response.blob();
+  // 파일명 추출
+  const contentDisposition = response.headers.get('Content-Disposition');
+  let filename = `project_${projectId}.docx`;
+  if (contentDisposition) {
+    const match = contentDisposition.match(/filename="(.+)"/);
+    if (match) {
+      filename = match[1];
+    }
+  }
+  // 다운로드 트리거
+  const url = window.URL.createObjectURL(blob);
+  const a = document.createElement('a');
+  a.href = url;
+  a.download = filename;
+  document.body.appendChild(a);
+  a.click();
+  a.remove();
+  window.URL.revokeObjectURL(url);
+  console.log('다운로드 완료:', filename);
+};
+// 사용 예시
+document.getElementById('downloadBtn').addEventListener('click', async () => {
+  try {
+    await downloadDocument(1);
+    alert('Word 문서 다운로드가 시작되었습니다.');
+  } catch (error) {
+    console.error('다운로드 실패:', error);
+    alert('다운로드 실패: ' + error.message);
+  }
+});
+```
+**React Component**:
+```jsx
+import React, { useState } from 'react';
+import axios from 'axios';
+function DocumentDownloader({ projectId, projectName }) {
+  const [downloading, setDownloading] = useState(false);
+  const handleDownload = async () => {
+    setDownloading(true);
+    try {
+      const response = await axios.post(
+        `http://localhost:8000/api/projects/${projectId}/download`,
+        {},
+        {
+          responseType: 'blob' // 중요: blob 타입으로 응답 받기
+        }
+      );
+      // Blob 생성
+      const blob = new Blob([response.data], {
+        type: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
+      });
+      // 파일명 추출
+      let filename = `project_${projectId}.docx`;
+      const contentDisposition = response.headers['content-disposition'];
+      if (contentDisposition) {
+        const match = contentDisposition.match(/filename="(.+)"/);
+        if (match) {
+          filename = decodeURIComponent(match[1]);
+        }
+      }
+      // 다운로드
+      const url = window.URL.createObjectURL(blob);
+      const link = document.createElement('a');
+      link.href = url;
+      link.download = filename;
+      document.body.appendChild(link);
+      link.click();
+      document.body.removeChild(link);
+      window.URL.revokeObjectURL(url);
+      alert('다운로드가 완료되었습니다.');
+    } catch (error) {
+      console.error('다운로드 실패:', error);
+      alert('다운로드 실패: ' + error.message);
+    } finally {
+      setDownloading(false);
+    }
+  };
+  return (
+    <div>
+      <button
+        onClick={handleDownload}
+        disabled={downloading}
+        className="btn btn-primary"
+      >
+        {downloading ? (
+          <>
+            <span className="spinner"></span>
+            다운로드 중...
+          </>
+        ) : (
+          <>
+            <i className="icon-download"></i>
+            Word 문서 다운로드
+          </>
+        )}
+      </button>
+      {downloading && (
+        <p className="help-text">
+          문서를 생성하고 있습니다. 잠시만 기다려주세요...
+        </p>
+      )}
+    </div>
+  );
+}
+```
+**Axios 설정 팁**:
+```javascript
+// Axios 인터셉터를 사용한 다운로드 헬퍼
+import axios from 'axios';
+const downloadFile = async (url, method = 'GET', data = null) => {
+  try {
+    const response = await axios({
+      url,
+      method,
+      data,
+      responseType: 'blob',
+      onDownloadProgress: (progressEvent) => {
+        const percentCompleted = Math.round(
+          (progressEvent.loaded * 100) / progressEvent.total
+        );
+        console.log(`다운로드 진행률: ${percentCompleted}%`);
+      }
+    });
+    // 파일명 추출
+    const contentDisposition = response.headers['content-disposition'];
+    let filename = 'download';
+    if (contentDisposition) {
+      const match = contentDisposition.match(/filename="(.+)"/);
+      if (match) {
+        filename = decodeURIComponent(match[1]);
+      }
+    }
+    // Blob 생성 및 다운로드
+    const blob = new Blob([response.data]);
+    const downloadUrl = window.URL.createObjectURL(blob);
+    const link = document.createElement('a');
+    link.href = downloadUrl;
+    link.download = filename;
+    document.body.appendChild(link);
+    link.click();
+    document.body.removeChild(link);
+    window.URL.revokeObjectURL(downloadUrl);
+    return filename;
+  } catch (error) {
+    console.error('다운로드 실패:', error);
+    throw error;
+  }
+};
+// 사용 예시
+downloadFile(`http://localhost:8000/api/projects/1/download`, 'POST')
+  .then(filename => alert(`${filename} 다운로드 완료`))
+  .catch(error => alert('다운로드 실패: ' + error.message));
+```
+---
+## 다운로드 플로우 다이어그램
+```
+사용자 → [다운로드 버튼 클릭]
+         ↓
+프론트엔드 → POST /api/projects/{id}/download
+         ↓
+백엔드 → 1. 통합 텍스트 조회 (캐시 우선)
+         2. HTML → Word 변환 (python-docx)
+         3. 파일 스트림 생성
+         ↓
+프론트엔드 ← Blob 응답 수신
+         ↓
+브라우저 → 파일 다운로드 트리거
+         ↓
+완료!
+```
+---
+## 에러 응답
+### 404 Not Found
+프로젝트를 찾을 수 없음
+```json
+{
+  "error": "프로젝트를 찾을 수 없습니다.",
+  "status_code": 404
+}
+```
+### 500 Internal Server Error
+문서 생성 실패
+```json
+{
+  "error": "Internal Server Error",
+  "detail": "Word 문서 생성 중 오류가 발생했습니다.",
+  "status_code": 500
+}
+```
+### 501 Not Implemented
+python-docx 라이브러리 미설치
+```json
+{
+  "error": "python-docx 라이브러리가 설치되지 않았습니다.",
+  "status_code": 501
+}
+```
+---
+## 주의사항
+### 파일명 인코딩
+한글 파일명이 포함된 경우 브라우저마다 처리 방식이 다를 수 있습니다. 백엔드에서는 UTF-8로 인코딩된 파일명을 반환하므로, 필요시 `decodeURIComponent()`를 사용하세요.
+### 큰 프로젝트 처리
+페이지 수가 많은 프로젝트는 문서 생성에 시간이 걸릴 수 있습니다. 프론트엔드에서 로딩 인디케이터를 표시하는 것을 권장합니다.
+### 브라우저 호환성
+- `Blob` API는 IE10 이상에서 지원됩니다.
+- `download` 속성은 IE에서 지원되지 않으므로, 필요시 polyfill을 사용하세요.
+---
+## 다음 단계
+- **[데이터 모델](./05_데이터_모델.md)**: 통합 결과 스키마 상세 정보
+- **[에러 처리](./06_에러_처리.md)**: 에러 코드 및 처리 방법

docs/Backend API 문서/05_데이터_모델.md ADDED Viewed

	@@ -0,0 +1,678 @@

+# 데이터 모델
+SmartEyeSsen API에서 사용되는 주요 데이터 구조와 Enum 정의입니다.
+## 📖 목차
+- [Enum 정의](#enum-정의)
+- [프로젝트 관련 스키마](#프로젝트-관련-스키마)
+- [페이지 관련 스키마](#페이지-관련-스키마)
+- [레이아웃 및 분석 관련 스키마](#레이아웃-및-분석-관련-스키마)
+- [텍스트 버전 관련 스키마](#텍스트-버전-관련-스키마)
+- [다운로드 관련 스키마](#다운로드-관련-스키마)
+---
+## Enum 정의
+### AnalysisModeEnum
+분석 모드를 정의합니다.
+| 값 | 설명 |
+|----|------|
+| `auto` | 자동 분석 (기본값) |
+| `manual` | 수동 분석 |
+| `hybrid` | 하이브리드 (자동 + 수동 조정) |
+### ProjectStatusEnum
+프로젝트 상태를 정의합니다.
+| 값 | 설명 |
+|----|------|
+| `created` | 생성됨 (초기 상태) |
+| `in_progress` | 진행 중 (분석 중) |
+| `completed` | 완료 |
+| `error` | 오류 발생 |
+### AnalysisStatusEnum
+페이지 분석 상태를 정의합니다.
+| 값 | 설명 |
+|----|------|
+| `pending` | 대기 중 (분석 전) |
+| `processing` | 처리 중 |
+| `completed` | 완료 |
+| `error` | 오류 발생 |
+### VersionTypeEnum
+텍스트 버전 유형을 정의합니다.
+| 값 | 설명 |
+|----|------|
+| `original` | 원본 OCR 결과 (정렬만 적용) |
+| `auto_formatted` | 자동 포맷팅 적용 (접두사/접미사/들여쓰기) |
+| `user_edited` | 사용자 편집 |
+### SortingMethodEnum
+정렬 방식을 정의합니다.
+| 값 | 설명 |
+|----|------|
+| `question_based` | 문제 번호 기반 정렬 (worksheet) |
+| `reading_order` | 읽기 순서 기반 정렬 (document) |
+---
+## 프로젝트 관련 스키마
+### ProjectCreate
+프로젝트 생성 요청 스키마입니다.
+```typescript
+interface ProjectCreate {
+  project_name: string;      // 프로젝트 이름 (1~255자)
+  doc_type_id: number;        // 문서 타입 ID (1: worksheet, 2: document)
+  analysis_mode?: string;     // 분석 모드 (기본값: "auto")
+  user_id: number;            // 사용자 ID
+}
+```
+**예시**:
+```json
+{
+  "project_name": "수학 문제집 1단원",
+  "doc_type_id": 1,
+  "analysis_mode": "auto",
+  "user_id": 1
+}
+```
+### ProjectResponse
+프로젝트 응답 스키마입니다.
+```typescript
+interface ProjectResponse {
+  project_id: number;         // 프로젝트 고유 ID
+  user_id: number;            // 소유자 사용자 ID
+  doc_type_id: number;        // 문서 타입 ID
+  project_name: string;       // 프로젝트 이름
+  total_pages: number;        // 총 페이지 수
+  analysis_mode: string;      // 분석 모드
+  status: string;             // 프로젝트 상태
+  created_at: string;         // 생성일시 (ISO 8601)
+  updated_at: string;         // 수정일시 (ISO 8601)
+}
+```
+**예시**:
+```json
+{
+  "project_id": 1,
+  "user_id": 1,
+  "doc_type_id": 1,
+  "project_name": "수학 문제집 1단원",
+  "total_pages": 5,
+  "analysis_mode": "auto",
+  "status": "completed",
+  "created_at": "2025-01-22T10:30:00",
+  "updated_at": "2025-01-22T10:35:00"
+}
+```
+### ProjectWithPagesResponse
+페이지 목록을 포함한 프로젝트 응답 스키마입니다.
+```typescript
+interface ProjectWithPagesResponse extends ProjectResponse {
+  pages: PageResponse[];      // 페이지 목록
+}
+```
+**예시**:
+```json
+{
+  "project_id": 1,
+  "user_id": 1,
+  "doc_type_id": 1,
+  "project_name": "수학 문제집 1단원",
+  "total_pages": 2,
+  "analysis_mode": "auto",
+  "status": "completed",
+  "created_at": "2025-01-22T10:30:00",
+  "updated_at": "2025-01-22T10:35:00",
+  "pages": [
+    {
+      "page_id": 1,
+      "project_id": 1,
+      "page_number": 1,
+      "image_path": "uploads/project_1_page_1_abc123.png",
+      "image_width": 2480,
+      "image_height": 3508,
+      "analysis_status": "completed",
+      "processing_time": 5.23,
+      "created_at": "2025-01-22T10:31:00",
+      "analyzed_at": "2025-01-22T10:35:00"
+    }
+  ]
+}
+```
+### ProjectUpdate
+프로젝트 수정 요청 스키마입니다. 모든 필드는 선택사항입니다.
+```typescript
+interface ProjectUpdate {
+  project_name?: string;      // 프로젝트 이름
+  doc_type_id?: number;       // 문서 타입 ID
+  analysis_mode?: string;     // 분석 모드
+  status?: string;            // 프로젝트 상태
+}
+```
+---
+## 페이지 관련 스키마
+### PageCreate
+페이지 생성 요청 스키마입니다.
+```typescript
+interface PageCreate {
+  project_id: number;         // 프로젝트 ID
+  page_number: number;        // 페이지 번호 (1부터 시작)
+  image_path: string;         // 이미지 파일 경로
+  image_width?: number;       // 이미지 너비 (픽셀)
+  image_height?: number;      // 이미지 높이 (픽셀)
+}
+```
+### PageResponse
+페이지 응답 스키마입니다.
+```typescript
+interface PageResponse {
+  page_id: number;            // 페이지 고유 ID
+  project_id: number;         // 소속 프로젝트 ID
+  page_number: number;        // 페이지 번호
+  image_path: string;         // 이미지 파일 경로
+  image_width: number | null; // 이미지 너비 (픽셀)
+  image_height: number | null;// 이미지 높이 (픽셀)
+  analysis_status: string;    // 분석 상태
+  processing_time: number | null; // 처리 시간 (초)
+  created_at: string;         // 생성일시
+  analyzed_at: string | null; // 분석 완료일시
+}
+```
+**예시**:
+```json
+{
+  "page_id": 1,
+  "project_id": 1,
+  "page_number": 1,
+  "image_path": "uploads/project_1_page_1_abc123.png",
+  "image_width": 2480,
+  "image_height": 3508,
+  "analysis_status": "completed",
+  "processing_time": 5.23,
+  "created_at": "2025-01-22T10:31:00",
+  "analyzed_at": "2025-01-22T10:35:00"
+}
+```
+### MultiPageCreateResponse
+PDF 업로드 시 여러 페이지 생성 응답 스키마입니다.
+```typescript
+interface MultiPageCreateResponse {
+  project_id: number;         // 프로젝트 ID
+  total_created: number;      // 생성된 페이지 수
+  source_type: string;        // 소스 타입 ("pdf" 또는 "image")
+  pages: PageResponse[];      // 생성된 페이지 목록
+}
+```
+**예시**:
+```json
+{
+  "project_id": 1,
+  "total_created": 5,
+  "source_type": "pdf",
+  "pages": [
+    {
+      "page_id": 1,
+      "project_id": 1,
+      "page_number": 1,
+      "image_path": "uploads/3/page_1.png",
+      "image_width": 2480,
+      "image_height": 3508,
+      "analysis_status": "pending",
+      "processing_time": null,
+      "created_at": "2025-01-22T10:31:00",
+      "analyzed_at": null
+    }
+    // ... 나머지 페이지
+  ]
+}
+```
+---
+## 레이아웃 및 분석 관련 스키마
+### LayoutElementResponse
+레이아웃 요소 응답 스키마입니다.
+```typescript
+interface LayoutElementResponse {
+  element_id: number;         // 요소 고유 ID
+  page_id: number;            // 소속 페이지 ID
+  class_name: string;         // 클래스명 (question_number, text, figure, table 등)
+  confidence: number;         // 신뢰도 (0.0~1.0)
+  bbox_x: number;             // X 좌표 (왼쪽 상단)
+  bbox_y: number;             // Y 좌표 (왼쪽 상단)
+  bbox_width: number;         // 너비 (픽셀)
+  bbox_height: number;        // 높이 (픽셀)
+  area: number | null;        // 면적 (자동 계산)
+  y_position: number | null;  // Y 정렬용 좌표
+  x_position: number | null;  // X 정렬용 좌표
+  created_at: string;         // 생성일시
+}
+```
+### TextContentResponse
+OCR 텍스트 응답 스키마입니다.
+```typescript
+interface TextContentResponse {
+  text_id: number;            // OCR 결과 고유 ID
+  element_id: number;         // 레이아웃 요소 ID
+  ocr_text: string;           // OCR 추출 텍스트
+  ocr_engine: string;         // OCR 엔진명 (예: "PaddleOCR")
+  ocr_confidence: number | null; // OCR 신뢰도 (0.0~1.0)
+  language: string;           // 언어 코드 (ko, en, ja, zh)
+  created_at: string;         // 생성일시
+}
+```
+### AIDescriptionResponse
+AI 설명 응답 스키마입니다.
+```typescript
+interface AIDescriptionResponse {
+  ai_desc_id: number;         // AI 설명 고유 ID
+  element_id: number;         // 레이아웃 요소 ID
+  description: string;        // AI 생성 설명 텍스트
+  ai_model: string;           // AI 모델명 (예: "gpt-4o-mini")
+  prompt_used: string | null; // 사용한 프롬프트 (디버깅용)
+  created_at: string;         // 생성일시
+}
+```
+---
+## 텍스트 버전 관련 스키마
+### PageTextResponse
+페이지 텍스트 조회 응답 스키마입니다.
+```typescript
+interface PageTextResponse {
+  page_id: number;            // 페이지 ID
+  version_id: number;         // 텍스트 버전 ID
+  version_type: string;       // 버전 유형 (original, auto_formatted, user_edited)
+  is_current: boolean;        // 현재 버전 여부
+  content: string;            // HTML 형식의 텍스트 내용
+  created_at: string;         // 버전 생성일시
+}
+```
+**예시**:
+```json
+{
+  "page_id": 1,
+  "version_id": 3,
+  "version_type": "user_edited",
+  "is_current": true,
+  "content": "<h2>1. 다음 식을 계산하시오.</h2>\n<p>(1) 3 + 5 = ?</p>\n<p>답: 8</p>",
+  "created_at": "2025-01-22T11:00:00"
+}
+```
+### PageTextUpdate
+페이지 텍스트 저장 요청 스키마입니다.
+```typescript
+interface PageTextUpdate {
+  content: string;            // 저장할 텍스트 내용 (HTML 형식)
+  user_id?: number;           // 수정한 사용자 ID (선택)
+}
+```
+**예시**:
+```json
+{
+  "content": "<h2>1. 다음 식을 계산하시오.</h2>\n<p>(1) 3 + 5 = ?</p>\n<p>답: 8</p>",
+  "user_id": 1
+}
+```
+### TextVersionResponse
+텍스트 버전 전체 정보 응답 스키마입니다.
+```typescript
+interface TextVersionResponse {
+  version_id: number;         // 버전 고유 ID
+  page_id: number;            // 페이지 ID
+  user_id: number | null;     // 수정한 사용자 ID
+  content: string;            // 텍스트 내용
+  version_number: number;     // 버전 번호 (1, 2, 3, ...)
+  version_type: string;       // 버전 유형
+  is_current: boolean;        // 현재 버전 여부
+  created_at: string;         // 버전 생성일시
+}
+```
+---
+## 다운로드 관련 스키마
+### CombinedTextResponse
+통합 텍스트 조회 응답 스키마입니다.
+```typescript
+interface CombinedTextResponse {
+  project_id: number;         // 프로젝트 ID
+  project_name: string | null;// 프로젝트 이름
+  combined_text: string;      // 통합된 전체 텍스트 (HTML)
+  stats: CombinedTextStats;   // 통계 정보
+  generated_at: string;       // 생성일시
+}
+interface CombinedTextStats {
+  total_pages: number;        // 총 페이지 수
+  total_words: number;        // 총 단어 수
+  total_characters: number;   // 총 문자 수
+}
+```
+**예시**:
+```json
+{
+  "project_id": 1,
+  "project_name": "수학 문제집 1단원",
+  "combined_text": "<h2>1. 다음 식을 계산하시오.</h2>\n<p>(1) 3 + 5 = ?</p>\n<p>답: 8</p>",
+  "stats": {
+    "total_pages": 3,
+    "total_words": 450,
+    "total_characters": 2340
+  },
+  "generated_at": "2025-01-22T11:00:00"
+}
+```
+---
+## 분석 관련 스키마
+### ProjectAnalysisRequest
+프로젝트 분석 요청 스키마입니다.
+```typescript
+interface ProjectAnalysisRequest {
+  use_ai_descriptions?: boolean; // AI 설명 생성 여부 (기본값: true)
+  api_key?: string | null;       // OpenAI API 키 (선택)
+}
+```
+### PageAnalysisRequest
+단일 페이지 비동기 분석 요청 스키마입니다.
+```typescript
+interface PageAnalysisRequest {
+  use_ai_descriptions?: boolean; // AI 설명 생성 여부 (기본값: true)
+  api_key?: string | null;       // OpenAI API 키 (선택)
+}
+```
+### AnalysisJobResponse
+비동기 분석 작업 상태 응답 스키마입니다.
+```typescript
+interface AnalysisJobResponse {
+  job_id: string;             // 작업 ID (UUID)
+  status: string;             // 작업 상태 (pending, processing, completed, failed)
+  page_id: number;            // 페이지 ID
+  page_number: number;        // 페이지 번호
+  project_id: number;         // 프로젝트 ID
+  result: AnalysisResult | null; // 분석 결과 (완료 시)
+  error: string | null;       // 에러 메시지 (실패 시)
+  progress: string;           // 진행 상황 메시지
+}
+interface AnalysisResult {
+  page_id: number;
+  page_number: number;
+  layout_count: number;       // 감지된 레이아웃 요소 수
+  ocr_count: number;          // OCR 수행된 요소 수
+  ai_description_count: number; // AI 설명 생성된 요소 수
+  processing_time: number;    // 처리 시간 (초)
+  message: string;            // 결과 메시지
+}
+```
+---
+## 에러 응답 스키마
+### ErrorResponse
+에러 응답 스키마입니다.
+```typescript
+interface ErrorResponse {
+  error: string;              // 에러 메시지
+  detail?: string;            // 상세 설명 (선택)
+  status_code: number;        // HTTP 상태 코드
+}
+```
+**예시**:
+```json
+{
+  "error": "프로젝트를 찾을 수 없습니다.",
+  "status_code": 404
+}
+```
+```json
+{
+  "error": "Validation Error",
+  "detail": "project_name은 1자 이상이어야 합니다.",
+  "status_code": 400
+}
+```
+---
+## TypeScript 타입 정의 파일
+프론트엔드에서 사용할 수 있는 TypeScript 타입 정의 예시입니다:
+```typescript
+// types/api.ts
+// Enums
+export enum AnalysisModeEnum {
+  AUTO = 'auto',
+  MANUAL = 'manual',
+  HYBRID = 'hybrid',
+}
+export enum ProjectStatusEnum {
+  CREATED = 'created',
+  IN_PROGRESS = 'in_progress',
+  COMPLETED = 'completed',
+  ERROR = 'error',
+}
+export enum AnalysisStatusEnum {
+  PENDING = 'pending',
+  PROCESSING = 'processing',
+  COMPLETED = 'completed',
+  ERROR = 'error',
+}
+export enum VersionTypeEnum {
+  ORIGINAL = 'original',
+  AUTO_FORMATTED = 'auto_formatted',
+  USER_EDITED = 'user_edited',
+}
+// Project
+export interface ProjectCreate {
+  project_name: string;
+  doc_type_id: number;
+  analysis_mode?: AnalysisModeEnum;
+  user_id: number;
+}
+export interface ProjectResponse {
+  project_id: number;
+  user_id: number;
+  doc_type_id: number;
+  project_name: string;
+  total_pages: number;
+  analysis_mode: AnalysisModeEnum;
+  status: ProjectStatusEnum;
+  created_at: string;
+  updated_at: string;
+}
+export interface ProjectWithPagesResponse extends ProjectResponse {
+  pages: PageResponse[];
+}
+// Page
+export interface PageResponse {
+  page_id: number;
+  project_id: number;
+  page_number: number;
+  image_path: string;
+  image_width: number | null;
+  image_height: number | null;
+  analysis_status: AnalysisStatusEnum;
+  processing_time: number | null;
+  created_at: string;
+  analyzed_at: string | null;
+}
+export interface MultiPageCreateResponse {
+  project_id: number;
+  total_created: number;
+  source_type: string;
+  pages: PageResponse[];
+}
+// Text
+export interface PageTextResponse {
+  page_id: number;
+  version_id: number;
+  version_type: VersionTypeEnum;
+  is_current: boolean;
+  content: string;
+  created_at: string;
+}
+export interface PageTextUpdate {
+  content: string;
+  user_id?: number;
+}
+// Download
+export interface CombinedTextStats {
+  total_pages: number;
+  total_words: number;
+  total_characters: number;
+}
+export interface CombinedTextResponse {
+  project_id: number;
+  project_name: string | null;
+  combined_text: string;
+  stats: CombinedTextStats;
+  generated_at: string;
+}
+// Analysis
+export interface AnalysisRequest {
+  use_ai_descriptions?: boolean;
+  api_key?: string | null;
+}
+export interface AnalysisResult {
+  page_id: number;
+  page_number: number;
+  layout_count: number;
+  ocr_count: number;
+  ai_description_count: number;
+  processing_time: number;
+  message: string;
+}
+export interface AnalysisJobResponse {
+  job_id: string;
+  status: 'pending' | 'processing' | 'completed' | 'failed';
+  page_id: number;
+  page_number: number;
+  project_id: number;
+  result: AnalysisResult | null;
+  error: string | null;
+  progress: string;
+}
+// Error
+export interface ErrorResponse {
+  error: string;
+  detail?: string;
+  status_code: number;
+}
+```
+---
+## 다음 단계
+- **[에러 처리](./06_에러_처리.md)**: 에러 코드 및 처리 방법
+- **[개요](./00_개요.md)**: 시스템 소개 및 시작하기

docs/Backend API 문서/06_에러_처리.md ADDED Viewed

	@@ -0,0 +1,556 @@

+# 에러 처리
+SmartEyeSsen API의 에러 응답 형식과 처리 방법을 설명합니다.
+## 📖 목차
+- [에러 응답 형식](#에러-응답-형식)
+- [HTTP 상태 코드](#http-상태-코드)
+- [공통 에러 코드](#공통-에러-코드)
+- [에러 처리 모범 사례](#에러-처리-모범-사례)
+- [프론트엔드 에러 핸들러 예제](#프론트엔드-에러-핸들러-예제)
+---
+## 에러 응답 형식
+모든 API 에러는 일관된 JSON 형식으로 반환됩니다.
+### 기본 형식
+```json
+{
+  "error": "에러 메시지",
+  "detail": "상세 설명 (선택)",
+  "status_code": 400
+}
+```
+### 필드 설명
+| 필드 | 타입 | 필수 | 설명 |
+|------|------|------|------|
+| `error` | string | ✅ | 간단한 에러 메시지 |
+| `detail` | string | ❌ | 상세한 에러 설명 (선택) |
+| `status_code` | number | ✅ | HTTP 상태 코드 |
+---
+## HTTP 상태 코드
+### 2xx: 성공
+| 코드 | 설명 | 사용 예시 |
+|------|------|----------|
+| 200 OK | 요청 성공 | GET 요청, PATCH 요청 |
+| 201 Created | 리소스 생성 성공 | POST 요청 (프로젝트 생성, 페이지 업로드) |
+| 202 Accepted | 요청 수락됨 (비동기 처리) | 분석 작업 시작 |
+| 204 No Content | 성공, 응답 본문 없음 | DELETE 요청 |
+### 4xx: 클라이언트 에러
+#### 400 Bad Request
+요청 데이터가 유효하지 않은 경우
+**예시**:
+```json
+{
+  "error": "Validation Error",
+  "detail": "project_name은 1자 이상이어야 합니다.",
+  "status_code": 400
+}
+```
+**원인**:
+- 필수 필드 누락
+- 데이터 타입 불일치
+- 유효성 검증 실패
+- 잘못된 파일 형식
+**해결 방법**:
+- 요청 데이터 형식 확인
+- API 문서의 필수 필드 확인
+- 데이터 타입 검증
+#### 404 Not Found
+요청한 리소스를 찾을 수 없는 경우
+**예시**:
+```json
+{
+  "error": "프로젝트를 찾을 수 없습니다.",
+  "status_code": 404
+}
+```
+**원인**:
+- 존재하지 않는 ID로 조회
+- 삭제된 리소스 접근
+- 잘못된 URL
+**해결 방법**:
+- ID 값 확인
+- 리소스 존재 여부 사전 확인
+- 사용자에게 적절한 메시지 표시
+#### 413 Payload Too Large
+요청 본문이 너무 큰 경우 (일반적으로 파일 업로드)
+**예시**:
+```json
+{
+  "error": "파일 크기가 너무 큽니다.",
+  "detail": "최대 50MB까지 업로드 가능합니다.",
+  "status_code": 413
+}
+```
+**원인**:
+- 업로드 파일 크기 제한 초과 (일반적으로 50MB)
+**해결 방법**:
+- 파일 크기 사전 확인
+- 사용자에게 파일 크기 제한 안내
+- 파일 압축 권장
+#### 422 Unprocessable Entity
+요청 형식은 올바르나 의미상 처리할 수 없는 경우
+**예시**:
+```json
+{
+  "error": "Unprocessable Entity",
+  "detail": "이미 분석이 완료된 페이지입니다.",
+  "status_code": 422
+}
+```
+**원인**:
+- 비즈니스 로직 위반
+- 상태 충돌
+**해결 방법**:
+- 리소스 현재 상태 확인
+- 조건부 요청 처리
+### 5xx: 서버 에러
+#### 500 Internal Server Error
+서버 내부 오류
+**예시**:
+```json
+{
+  "error": "Internal Server Error",
+  "detail": "데이터베이스 연결 실패",
+  "status_code": 500
+}
+```
+**원인**:
+- 서버 내부 버그
+- 데이터베이스 연결 오류
+- 예상치 못한 예외
+**해결 방법**:
+- 잠시 후 재시도
+- 계속 발생 시 관리자에게 문의
+- 에러 로그 수집 및 리포트
+#### 501 Not Implemented
+기능이 구현되지 않음
+**예시**:
+```json
+{
+  "error": "python-docx 라이브러리가 설치되지 않았습니다.",
+  "status_code": 501
+}
+```
+**원인**:
+- 서버 환경 설정 문제
+- 필수 라이브러리 미설치
+**해결 방법**:
+- 관리자에게 문의
+- 대체 기능 사용
+#### 503 Service Unavailable
+서비스 일시적 사용 불가
+**예시**:
+```json
+{
+  "error": "Service Unavailable",
+  "detail": "서버가 과부하 상태입니다. 잠시 후 다시 시도해주세요.",
+  "status_code": 503
+}
+```
+**원인**:
+- 서버 과부하
+- 유지보수 중
+- 일시적 장애
+**해결 방법**:
+- 지수 백오프(exponential backoff)로 재시도
+- 사용자에게 안내 메시지 표시
+---
+## 공통 에러 코드
+### 프로젝트 관련
+| HTTP | 에러 메시지 | 원인 | 해결 방법 |
+|------|------------|------|----------|
+| 400 | `project_name은 1자 이상이어야 합니다.` | 프로젝트 이름이 비어있음 | 프로젝트 이름 입력 확인 |
+| 400 | `doc_type_id는 필수입니다.` | 문서 타입 ID 누락 | doc_type_id 제공 (1 또는 2) |
+| 404 | `프로젝트를 찾을 수 없습니다.` | 존재하지 않는 프로젝트 ID | 프로젝트 ID 확인 |
+### 페이지 관련
+| HTTP | 에러 메시지 | 원인 | 해결 방법 |
+|------|------------|------|----------|
+| 400 | `이미지 업로드 시 page_number는 필수입니다.` | 이미지 업로드 시 페이지 번호 누락 | page_number 제공 |
+| 400 | `이미지 처리 실패` | 손상된 이미지 파일 | 올바른 이미지 파일 확인 |
+| 400 | `PDF 처리 실패` | 손상된 PDF 파일 | 올바른 PDF 파일 확인 |
+| 404 | `페이지를 찾을 수 없습니다.` | 존재하지 않는 페이지 ID | 페이지 ID 확인 |
+| 413 | `파일 크기가 너무 큽니다.` | 파일 크기 제한 초과 | 파일 크기 확인 (최대 50MB) |
+### 분석 관련
+| HTTP | 에러 메시지 | 원인 | 해결 방법 |
+|------|------------|------|----------|
+| 400 | `분석할 페이지가 없습니다.` | pending 상태 페이지 없음 | 페이지 업로드 먼저 수행 |
+| 404 | `작업을 찾을 수 없습니다.` | 존재하지 않는 작업 ID | 작업 ID 확인 |
+| 500 | `레이아웃 분석 중 오류가 발생했습니다.` | AI 모델 오류 | 재시도 또는 관리자 문의 |
+### 다운로드 관련
+| HTTP | 에러 메시지 | 원인 | 해결 방법 |
+|------|------------|------|----------|
+| 404 | `통합 텍스트를 찾을 수 없습니다.` | 분석되지 않은 프로젝트 | 분석 먼저 수행 |
+| 500 | `Word 문서 생성 중 오류가 발생했습니다.` | 문서 변환 오류 | 재시도 또는 관리자 문의 |
+| 501 | `python-docx 라이브러리가 설치되지 않았습니다.` | 서버 설정 문제 | 관리자에게 문의 |
+---
+## 에러 처리 모범 사례
+### 1. 일관된 에러 핸들링
+모든 API 호출에 대해 일관된 에러 처리 로직을 적용하세요.
+```javascript
+// ❌ 나쁜 예
+fetch('/api/projects')
+  .then(res => res.json())
+  .then(data => console.log(data));
+// ✅ 좋은 예
+fetch('/api/projects')
+  .then(res => {
+    if (!res.ok) {
+      throw new Error(`HTTP ${res.status}`);
+    }
+    return res.json();
+  })
+  .then(data => console.log(data))
+  .catch(error => {
+    console.error('에러 발생:', error);
+    showErrorMessage(error.message);
+  });
+```
+### 2. 사용자 친화적인 에러 메시지
+서버 에러 메시지를 그대로 표시하지 말고, 사용자가 이해하기 쉬운 메시지로 변환하세요.
+```javascript
+function getUserFriendlyMessage(error) {
+  const errorMessages = {
+    404: '요청한 항목을 찾을 수 없습니다.',
+    413: '파일 크기가 너무 큽니다. 50MB 이하의 파일을 선택해주세요.',
+    500: '서버 오류가 발생했습니다. 잠시 후 다시 시도해주세요.',
+    503: '서비스가 일시적으로 사용 불가능합니다. 잠시 후 다시 시도해주세요.'
+  };
+  return errorMessages[error.status_code] || '알 수 없는 오류가 발생했습니다.';
+}
+```
+### 3. 재시도 로직
+일시적인 오류(5xx)에 대해서는 재시도 로직을 구현하세요.
+```javascript
+async function fetchWithRetry(url, options = {}, maxRetries = 3) {
+  let lastError;
+  for (let i = 0; i < maxRetries; i++) {
+    try {
+      const response = await fetch(url, options);
+      if (response.ok || response.status < 500) {
+        // 성공하거나 4xx 에러는 재시도하지 않음
+        return response;
+      }
+      // 5xx 에러는 재시도
+      lastError = new Error(`HTTP ${response.status}`);
+      // 지수 백오프
+      await new Promise(resolve => setTimeout(resolve, Math.pow(2, i) * 1000));
+    } catch (error) {
+      lastError = error;
+      await new Promise(resolve => setTimeout(resolve, Math.pow(2, i) * 1000));
+    }
+  }
+  throw lastError;
+}
+```
+### 4. 전역 에러 핸들러
+전역 에러 핸들러를 설정하여 모든 에러를 중앙에서 관리하세요.
+```javascript
+// Axios 인터셉터 예제
+axios.interceptors.response.use(
+  response => response,
+  error => {
+    if (error.response) {
+      // 서버 응답이 있는 경우
+      const { status, data } = error.response;
+      if (status === 401) {
+        // 인증 오류 - 로그인 페이지로 이동
+        window.location.href = '/login';
+      } else if (status === 403) {
+        // 권한 오류
+        showErrorToast('접근 권한이 없습니다.');
+      } else if (status === 404) {
+        // 리소스 없음
+        showErrorToast(data.error || '요청한 항목을 찾을 수 없습니다.');
+      } else if (status >= 500) {
+        // 서버 오류
+        showErrorToast('서버 오류가 발생했습니다. 잠시 후 다시 시도해주세요.');
+        // 에러 로깅
+        logErrorToServer(error);
+      } else {
+        // 기타 에러
+        showErrorToast(data.error || '오류가 발생했습니다.');
+      }
+    } else if (error.request) {
+      // 요청은 보냈으나 응답이 없는 경우 (네트워크 오류)
+      showErrorToast('네트워크 오류가 발생했습니다. 인터넷 연결을 확인해주세요.');
+    } else {
+      // 요청 설정 중 오류
+      showErrorToast('요청 처리 중 오류가 발생했습니다.');
+    }
+    return Promise.reject(error);
+  }
+);
+```
+---
+## 프론트엔드 에러 핸들러 예제
+### React + Axios
+```jsx
+import React, { createContext, useContext, useState } from 'react';
+import axios from 'axios';
+// 에러 컨텍스트
+const ErrorContext = createContext();
+export const useError = () => useContext(ErrorContext);
+export const ErrorProvider = ({ children }) => {
+  const [error, setError] = useState(null);
+  const showError = (message, detail = null) => {
+    setError({ message, detail });
+    setTimeout(() => setError(null), 5000); // 5초 후 자동 제거
+  };
+  const clearError = () => setError(null);
+  return (
+    <ErrorContext.Provider value={{ error, showError, clearError }}>
+      {children}
+      {error && (
+        <div className="error-toast">
+          <div className="error-message">{error.message}</div>
+          {error.detail && <div className="error-detail">{error.detail}</div>}
+          <button onClick={clearError}>×</button>
+        </div>
+      )}
+    </ErrorContext.Provider>
+  );
+};
+// API 클라이언트 설정
+const apiClient = axios.create({
+  baseURL: 'http://localhost:8000',
+});
+// 에러 핸들링 래퍼
+export const withErrorHandling = (apiCall) => async (...args) => {
+  const { showError } = useError();
+  try {
+    return await apiCall(...args);
+  } catch (error) {
+    if (error.response) {
+      const { status, data } = error.response;
+      if (status === 404) {
+        showError('요청한 항목을 찾을 수 없습니다.', data.detail);
+      } else if (status === 413) {
+        showError('파일 크기가 너무 큽니다.', '50MB 이하의 파일을 선택해주세요.');
+      } else if (status >= 500) {
+        showError('서버 오류가 발생했습니다.', '잠시 후 다시 시도해주세요.');
+      } else {
+        showError(data.error || '오류가 발생했습니다.', data.detail);
+      }
+    } else if (error.request) {
+      showError('네트워크 오류', '인터넷 연결을 확인해주세요.');
+    } else {
+      showError('요청 처리 오류', error.message);
+    }
+    throw error;
+  }
+};
+// 사용 예시
+function MyComponent() {
+  const { showError } = useError();
+  const [projects, setProjects] = useState([]);
+  const fetchProjects = async () => {
+    try {
+      const response = await apiClient.get('/api/projects');
+      setProjects(response.data);
+    } catch (error) {
+      // 에러는 이미 ErrorProvider에서 처리됨
+    }
+  };
+  return (
+    <div>
+      <button onClick={fetchProjects}>프로젝트 불러오기</button>
+      <ul>
+        {projects.map(p => (
+          <li key={p.project_id}>{p.project_name}</li>
+        ))}
+      </ul>
+    </div>
+  );
+}
+```
+### Vue.js
+```javascript
+// plugins/api.js
+import axios from 'axios';
+export default {
+  install: (app) => {
+    const apiClient = axios.create({
+      baseURL: 'http://localhost:8000',
+    });
+    apiClient.interceptors.response.use(
+      response => response,
+      error => {
+        if (error.response) {
+          const { status, data } = error.response;
+          let message = '오류가 발생했습니다.';
+          if (status === 404) {
+            message = '요청한 항목을 찾을 수 없습니다.';
+          } else if (status === 413) {
+            message = '파일 크기가 너무 큽니다. 50MB 이하의 파일을 선택해주세요.';
+          } else if (status >= 500) {
+            message = '서버 오류가 발생했습니다. 잠시 후 다시 시도해주세요.';
+          } else if (data.error) {
+            message = data.error;
+          }
+          // Vue Toast 또는 Notification 라이브러리 사용
+          app.config.globalProperties.$toast.error(message);
+        } else if (error.request) {
+          app.config.globalProperties.$toast.error('네트워크 오류가 발생했습니다.');
+        }
+        return Promise.reject(error);
+      }
+    );
+    app.config.globalProperties.$api = apiClient;
+  }
+};
+```
+---
+## 에러 로깅
+프로덕션 환경에서는 에러를 로깅하여 문제를 추적하세요.
+```javascript
+function logErrorToServer(error) {
+  // Sentry, LogRocket, 자체 로깅 서버 등에 에러 전송
+  if (window.Sentry) {
+    Sentry.captureException(error);
+  }
+  // 또는 자체 로깅 API 호출
+  fetch('/api/logs/errors', {
+    method: 'POST',
+    headers: { 'Content-Type': 'application/json' },
+    body: JSON.stringify({
+      message: error.message,
+      stack: error.stack,
+      url: window.location.href,
+      userAgent: navigator.userAgent,
+      timestamp: new Date().toISOString()
+    })
+  }).catch(() => {
+    // 로깅 실패 시 무시 (무한 루프 방지)
+  });
+}
+```
+---
+## 다음 단계
+- **[개요](./00_개요.md)**: 시스템 소개 및 시작하기
+- **[프로젝트 API](./01_프로젝트_API.md)**: 프로젝트 관리 API
+- **[데이터 모델](./05_데이터_모델.md)**: 에러 응답 스키마

docs/Backend API 문서/07_예제_코드.md ADDED Viewed

File without changes

docs/백엔드 환경 설정/백엔드 DB 준비방법.txt ADDED Viewed

	@@ -0,0 +1,61 @@

+• DB 준비 절차
+  - Backend/docker-compose.yml:3 의 mysql 서비스는 포트를 ${MYSQL_PORT:-3308}:3306 으로 매핑하므로 Backend/.env.docker 의 기본
+    값(3308)을 그대로 쓰면 호스트에서 127.0.0.1:3308 로 접근한다.
+  - 컨테이너 실행: docker compose -f Backend/docker-compose.yml --env-file Backend/.env.docker up -d mysql
+    (이미 띄워져 있다면 docker ps 로 상태만 확인).
+  - 백엔드가 같은 컨테이너를 사용하려면 .env 또는 환경 변수에 DB_HOST=127.0.0.1, DB_PORT=3308, DB_USER=root,
+    DB_PASSWORD=1q2w3e4r, DB_NAME=smarteyessen_db 를 명시한다 (Backend/.env.example 참고).
+  스키마 초기화
+  - Project/DB/erd_schema.sql:58 은 DROP DATABASE 후 전체 12개 테이블을 다시 생성하므로 실행 전 기존 데이터 백업을 권장한다.
+  - 스키마 적용:
+    docker exec -i smart_mysql \
+      mysql -uroot -p'1q2w3e4r' \
+      < Project/DB/erd_schema.sql
+    (스크립트가 내부에서 USE smarteyessen_db 를 수행).
+  - 완료 후 확인:
+    docker exec -it smart_mysql \
+      mysql -uroot -p'1q2w3e4r' smarteyessen_db \
+      -e "SHOW TABLES;"
+  기본 데이터 시드
+  - 테스트용 사용자/문서 타입만 필요한 경우 Backend/scripts/seed_test_data.sql 을 적용한다.
+    docker exec -i smart_mysql \
+      mysql -uroot -p'1q2w3e4r' smarteyessen_db \
+      < Backend/scripts/seed_test_data.sql
+  - SELECT user_id, email FROM users; 와 SELECT doc_type_id, type_name FROM document_types; 로 삽입 여부를 확인한다.
+  백엔드 연결 검증
+  - 가상환경에서 pip install -r Backend/requirements.txt 후 python -m Backend.app.database 를 실행하면 test_connection() 결과가
+    출력된다 (Backend/app/database.py:119).
+  - 서버 기동:
+    uvicorn Backend.app.main:app --reload --host 0.0.0.0 --port 8000
+    시작 로그에 ✅ Database connection successful 과 ✅ Database tables initialized 가 보이면 연결 성공.
+  - 런타임 헬스 체크: curl http://localhost:8000/health → 응답의 "database": "connected" 여부 확인.
+  파이프라인 테스트 방법
+  - 프로젝트 생성 → 단일 이미지 업로드 → 비동기 분석 → 상태 폴링 순으로 REST 엔드포인트를 호출한다 (Backend/app/routers/
+    pages.py:49, Backend/app/routers/analysis.py:45).
+  - 다중 이미지 또는 PDF(application/pdf) 업로드 후 POST /api/projects/{project_id}/analyze 로 배치 파이프라인을 호출하면
+    page_results 에 처리 요약이 담긴다.
+  - DB 검증 쿼리 예시:
+    SELECT page_id, analysis_status FROM pages WHERE project_id = ?;
+    SELECT COUNT(*) FROM layout_elements WHERE page_id = ?;
+    SELECT version_type FROM text_versions WHERE page_id = ? ORDER BY version_number DESC;
+  - OpenAI 설명을 테스트할 경우 .env 의 OPENAI_API_KEY 와 요청 본문 use_ai_descriptions=true 를 함께 지정한다.
+  문제 해결 팁
+  - 스키마 재생성 후에도 테이블이 보이지 않으면 볼륨(smart_mysql_data)을 삭제 후 다시 docker compose up 으로 마운트한다.
+  - 포트 충돌 시 Backend/.env.docker 의 MYSQL_PORT 값을 바꾼 뒤 컨테이너를 재기동하고 .env 도 동일하게 맞춘다.
+  - OCR/모델 로딩 실패는 백엔드 로그(loguru)에 경로·모델 키를 남기므로 tail -f 로 추적하면서 해결한다.

docs/백엔드 환경 설정/백엔드 테스트 준비방법.txt ADDED Viewed

	@@ -0,0 +1,202 @@

+• 구성 개요
+  - Backend/app/main.py:31 에서 FastAPI 앱을 초기화하고 CORS·라우터를 등록하며 시작/종료 훅에서 DB 연결과 테이블 생성을 보장
+    한다.
+  - Backend/app/database.py:27 는 .env 값을 읽어 MySQL용 SQLAlchemy 엔진과 세션 팩토리를 구성하고 health check/테이블 생성 유틸
+    을 제공한다.
+  - Backend/app/routers/pages.py:49 는 이미지·PDF 업로드 엔드포인트를 정의해 파일을 uploads/에 저장하고 pages 레코드를 생성
+    한다.
+  - Backend/app/routers/analysis.py:45 는 프로젝트 일괄 분석과 페이지 단일 분석(비동기) 엔드포인트를 노출하며 내부적으로 배치
+    파이프라인을 호출한다.
+  - Backend/app/services/batch_analysis.py:166 이 레이아웃→OCR→AI 설명→정렬→포맷팅까지 단일 페이지 파이프라인을 묶어 DB에 커밋
+    한다.
+  - Backend/app/services/text_version_service.py:49 는 자동 포맷팅 결과를 text_versions 테이블에 버전으로 적재하고 현재 버전을
+    관리한다.
+  파이프라인 흐름
+  - Backend/app/routers/pages.py:125 에서 멀티파트 업로드를 처리하여 원본 파일을 저장하고 pages 테이블에
+    analysis_status="pending" 으로 레코드를 만든다.
+  - Backend/app/services/analysis_service.py:308 이 DocLayout-YOLO로 레이아웃을 검출하고 layout_elements 테이블을 최신 상태로
+    갱신한다.
+  - Backend/app/services/analysis_service.py:533 는 PaddleOCR/Tesseract 기반 OCR 을 수행해 text_contents 테이블에 upsert 한다.
+  - Backend/app/services/analysis_service.py:620 이 AI 설명 옵션이 활성화된 figure/table/flowchart 요소에 대해 OpenAI 설명을 생
+    성하고 ai_descriptions 테이블을 업데이트한다.
+  - Backend/app/services/sorter.py:993 은 정렬 결과를 question_groups·question_elements 로 저장하여 문제 단위 그룹 정보를 유지
+    한다.
+  - Backend/app/services/batch_analysis.py:250 은 TextFormatter 출력으로 text_versions 를 생성하고 페이지/프로젝트 상태를
+    completed|partial|error 로 갱신한다.
+  서버 준비
+  - 환경 변수: cp Backend/.env.example Backend/.env 후 Docker MySQL 호스트/포트(예: 127.0.0.1:3308), 계정, OPENAI_API_KEY 등을
+    실제 값으로 지정한다.
+  - 의존성: python -m venv venv && source venv/bin/activate && pip install -r Backend/requirements.txt 로 모델·OCR·FastAPI 패키
+    지를 설치한다.
+  - DB: docker-compose -f Backend/docker-compose.yml up -d mysql 로 컨테이너를 가동하고 필요 시 Backend/scripts/
+    seed_test_data.sql 을 로드해 기본 user/doc_type 데이터를 넣는다.
+  - 마이그레이션: 개발 모드라면 ENVIRONMENT=development 인 상태에서 첫 서버 기동 시 init_db() 가 테이블을 자동 생성한다.
+  - 서버 실행: 리포지토리 루트에서 uvicorn Backend.app.main:app --host 0.0.0.0 --port 8000 --reload 로 백엔드를 기동하고
+    http://localhost:8000/docs 로 접근성을 확인한다.
+  # DB 시드 예시
+  docker exec -i smart_mysql mysql -uroot -p'1q2w3e4r' smarteyessen_db < Backend/scripts/seed_test_data.sql
+  단일 이미지 검증
+  - 프로젝트 생성: POST /api/projects 로 {"project_name": "...", "type_id": 1, "user_id": 1} 을 보내고 응답의 project_id 를 기
+    록한다.
+  - 이미지 업로드: POST /api/pages/upload 에 project_id, page_number=1, JPEG/PNG 파일을 멀티파트로 전송해 page_id 를 확보한다
+    (예시 파일: Backend/temp_image.jpg).
+  - 단일 분석 요청: POST /api/pages/{page_id}/analyze/async 로 AI 설명 사용 여부와 OpenAI 키를 전달해 비동기 작업을 시작한다.
+  - 상태 폴링: GET /api/analysis/jobs/{job_id} 를 수초 간격으로 호출하여 status 가 completed 로 변할 때까지 확인한다.
+  - 결과 검증: 완료 후 MySQL 에서 pages, layout_elements, text_contents, ai_descriptions, text_versions 를 page_id 기준으로 조
+    회해 레코드가 채워졌는지 확인한다.
+  BASE_URL=http://localhost:8000
+  # 1) 프로젝트 생성
+  PROJECT_ID=$(curl -s -X POST "$BASE_URL/api/projects" \
+    -H 'Content-Type: application/json' \
+    -d '{"project_name":"single-demo","type_id":1,"user_id":1}' | jq '.project_id')
+  # 2) 이미지 업로드
+  PAGE_RESP=$(curl -s -X POST "$BASE_URL/api/pages/upload" \
+    -F project_id="$PROJECT_ID" \
+    -F page_number=1 \
+    -F file=@Backend/temp_image.jpg)
+  PAGE_ID=$(echo "$PAGE_RESP" | jq '.page_id')
+  # 3) 비동기 분석
+  JOB_ID=$(curl -s -X POST "$BASE_URL/api/pages/$PAGE_ID/analyze/async" \
+    -H 'Content-Type: application/json' \
+    -d '{"use_ai_descriptions":false}' | jq -r '.job_id')
+  # 4) 상태 확인
+  curl -s "$BASE_URL/api/analysis/jobs/$JOB_ID" | jq
+  다중 이미지 검증
+  - 동일 프로젝트에 대해 page_number 를 증가시키며 ���러 장을 /api/pages/upload 으로 업로드한다.
+  - 업로드가 끝나면 POST /api/projects/{project_id}/analyze 를 호출해 pending/error 상태 페이지를 일괄 처리한다.
+  - 응답의 page_results 에서 각 페이지의 layout_count·ocr_count·status 를 검토한다.
+  - MySQL pages 테이블에서 해당 프로젝트의 analysis_status 가 모두 completed 인지 확인하고, question_groups·question_elements
+    에 페이지별 그룹이 생성됐는지 점검한다.
+  - 발견된 실패 페이지는 GET /api/pages/{page_id} 로 메타데이터를 확인하고 필요 시 개별 비동기 분석으로 재시도한다.
+  # 추가 이미지 업로드 (예: 두 번째 페이지)
+  curl -s -X POST "$BASE_URL/api/pages/upload" \
+    -F project_id="$PROJECT_ID" \
+    -F page_number=2 \
+    -F file=@path/to/second_page.png > /dev/null
+  # 프로젝트 전체 분석
+  curl -s -X POST "$BASE_URL/api/projects/$PROJECT_ID/analyze" \
+    -H 'Content-Type: application/json' \
+    -d '{"use_ai_descriptions":true,"api_key":"'"$OPENAI_API_KEY"'"}' | jq '.page_results'
+  PDF 검증
+  - 샘플 PDF 가 없으면 python Backend/scripts/test_pdf_upload.py 로 3페이지 시험용 문서를 생성한다.
+  - POST /api/pages/upload 에 Content-Type: application/pdf 파일을 전송하면 서버가 페이지별 JPEG 로 분할해 여러 page_id 를 반환
+    한다.
+  - 자동 계산된 시작 페이지 번호가 기존 페이지 수 이후로 이어지는지 응답의 pages[].page_number 로 확인한다.
+  - 이후 POST /api/projects/{project_id}/analyze 로 전체 페이지를 처리하고, 변환된 JPEG 파일이 uploads/{project_id}/ 에 저장됐
+    는지 확인한다.
+  - DB 에서 layout_elements·text_contents 를 페이지별로 조회하여 PDF 변환 후 파이프라인이 동일하게 적용됐는지 검증한다.
+  # PDF 업로드
+  curl -s -X POST "$BASE_URL/api/pages/upload" \
+    -F project_id="$PROJECT_ID" \
+    -F file=@Backend/scripts/test_sample.pdf \
+    -H 'Content-Type: application/pdf' | jq '.pages[].page_number'
+  DB 확인
+  - MySQL CLI 에 접속 후 분석 대상 프로젝트/페이지를 확인하여 상태 전이를 검증한다.
+  - 레이아웃과 OCR 수를 비교해 누락 요소나 0건 여부를 빠르게 파악한다.
+  - AI 설명을 사용했다면 ai_descriptions 에 element_id 와 model_name 이 채워졌는지 확인한다.
+  - 텍스트 버전 히스토리를 확인해 자동 포맷팅 내용이 version_type='auto_formatted' 로 저장됐는지 살핀다.
+  USE smarteyessen_db;
+  SELECT page_id, analysis_status, processing_time FROM pages WHERE project_id = {PROJECT_ID};
+  SELECT COUNT(*) AS layout_cnt FROM layout_elements WHERE page_id = {PAGE_ID};
+  SELECT COUNT(*) AS ocr_cnt FROM text_contents WHERE element_id IN (
+    SELECT element_id FROM layout_elements WHERE page_id = {PAGE_ID}
+  );
+  SELECT version_id, version_type, created_at FROM text_versions WHERE page_id = {PAGE_ID} ORDER BY version_number DESC;
+  일괄 테스트 스크립트
+  - 아래 예시는 requests 와 mysql-connector-python 을 활용해 이미지/다중/ PDF 시나리오를 한 번에 실행하고 결과를 요약한다.
+  - 환경 변수 API_BASE_URL, OPENAI_API_KEY, MYSQL_HOST, MYSQL_PORT, MYSQL_PASSWORD 등을 미리 설정한다.
+  - 프로젝트 생성 이후 단일 이미지→다중 이미지→PDF 업로드 순서로 실행하며, 각 단계가 끝나면 DB 에서 카운트를 조회한다.
+  - 실제 파일 경로는 프로젝트에 맞게 교체하고, GPU/모델 로딩 시간을 고려해 time.sleep() 으로 폴링 간격을 조정한다.
+  import os, time, requests, mysql.connector
+  base = os.environ["API_BASE_URL"]
+  headers = {"Content-Type": "application/json"}
+  def create_project(name):
+      resp = requests.post(f"{base}/api/projects", headers=headers,
+                           json={"project_name": name, "type_id": 1, "user_id": 1})
+      resp.raise_for_status()
+      return resp.json()["project_id"]
+  def upload_image(project_id, page_number, path):
+      files = {"file": open(path, "rb")}
+      data = {"project_id": str(project_id), "page_number": str(page_number)}
+      resp = requests.post(f"{base}/api/pages/upload", files=files, data=data)
+      resp.raise_for_status()
+      return resp.json()["page_id"]
+  def trigger_page(page_id):
+      resp = requests.post(f"{base}/api/pages/{page_id}/analyze/async",
+                           json={"use_ai_descriptions": False})
+      resp.raise_for_status()
+      job_id = resp.json()["job_id"]
+      while True:
+          status = requests.get(f"{base}/api/analysis/jobs/{job_id}").json()
+          if status["status"] in {"completed", "failed"}:
+              return status
+          time.sleep(3)
+  def run_project(project_id):
+      resp = requests.post(f"{base}/api/projects/{project_id}/analyze",
+                           json={"use_ai_descriptions": True,
+                                 "api_key": os.environ.get("OPENAI_API_KEY")})
+      resp.raise_for_status()
+      return resp.json()
+  def fetch_counts(project_id):
+      conn = mysql.connector.connect(host=os.environ["MYSQL_HOST"],
+                                     port=os.environ.get("MYSQL_PORT", 3308),
+                                     user="root",
+                                     password=os.environ["MYSQL_PASSWORD"],
+                                     database="smarteyessen_db")
+      cur = conn.cursor()
+      cur.execute("SELECT page_id, analysis_status FROM pages WHERE project_id=%s", (project_id,))
+      pages = cur.fetchall()
+      cur.close(); conn.close()
+      return pages
+  if __name__ == "__main__":
+      project = create_project("pipeline-suite")
+      page1 = upload_image(project, 1, "Backend/temp_image.jpg")
+      print(trigger_page(page1))
+      page2 = upload_image(project, 2, "path/to/second.png")
+      page3 = upload_image(project, 3, "path/to/third.png")
+      print(run_project(project))
+      print(fetch_counts(project))
+  추가 체크포인트
+  - OpenAI 호출을 활성화할 경우 OPENAI_API_KEY 와 요금 한도를 점검하고, 테스트에서는 use_ai_descriptions=false 로 속도를 높일
+    수 있다.
+  - Torch/Paddle 로 모델을 처음 로드할 때 수십 초가 걸리므로 CI 환경에서는 사전 워밍업이나 모델 경량화를 고려한다.
+  - Docker MySQL 과 애플리케이션 간 네트워크 포트를 일치시키고, 대용량 PDF 변환 시 uploads/ 디렉토리 용량을 주기적으로 정리
+    한다.
+  - 텍스트 버전 자동 생성 후 사용자가 수정하면 save_user_edited_version 로 캐시가 무효화되므로 다운로드 서비스 정확도를 위해 수
+    정 이력을 기록한다.
+  - 오류 발생 시 pages.analysis_status='error' 로 남으니 재시도 전 로그 (loguru) 를 확인해 모델·경로·권한 문제를 해결한다.

requirements.txt CHANGED Viewed

@@ -25,10 +25,24 @@ python-dotenv==1.0.0
 # ============================================================================
 # AI/ML 라이브러리
 # ============================================================================
-# DocLayout-YOLO 및 OCR
-ultralytics==8.1.0
 paddlepaddle==2.6.0
 paddleocr==2.7.0
 # OpenAI API (AI 설명 생성)
 openai==1.10.0
@@ -37,11 +51,23 @@ openai==1.10.0
 # ============================================================================
 pillow==10.2.0
 opencv-python==4.9.0.80
 # ============================================================================
-# 문서 생성
 # ============================================================================
 python-docx==1.1.0
 # ============================================================================
 # 유틸리티

 # ============================================================================
 # AI/ML 라이브러리
 # ============================================================================
+# PyTorch (DocLayout-YOLO 호환 버전)
+torch==2.0.1
+torchvision==0.15.2
+# DocLayout-YOLO (Project 검증된 버전)
+ultralytics==8.0.196
+# PaddleOCR
 paddlepaddle==2.6.0
 paddleocr==2.7.0
+# Tesseract OCR
+pytesseract==0.3.10
+# Hugging Face Transformers
+huggingface-hub>=0.17.0
+transformers==4.35.2
 # OpenAI API (AI 설명 생성)
 openai==1.10.0
 # ============================================================================
 pillow==10.2.0
 opencv-python==4.9.0.80
+matplotlib>=3.5.0
+scikit-image==0.22.0
+imageio==2.31.6
+# ============================================================================
+# 데이터 처리 및 분석
+# ============================================================================
+pandas>=1.3.0
+scipy>=1.7.0
+scikit-learn>=1.0.0
+numpy==1.26.4
 # ============================================================================
+# 문서 생성 및 처리
 # ============================================================================
 python-docx==1.1.0
+PyMuPDF==1.23.8  # PDF 처리
 # ============================================================================
 # 유틸리티

scripts/reset_db.sh ADDED Viewed

	@@ -0,0 +1,54 @@

+#!/usr/bin/env bash
+#
+# 개발/테스트용 SmartEyeSsen MySQL 스키마 초기화 스크립트
+# -------------------------------------------------------
+# - docker mysql 컨테이너에서 사용하는 smarteyessen_db 스키마를 드롭 후 재생성합니다.
+# - 이후 FastAPI ORM 모델 기반으로 테이블을 초기화합니다.
+# - 환경 변수(DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME)가 설정되어 있어야 합니다.
+#
+# 사용 예시:
+#   chmod +x Backend/scripts/reset_db.sh
+#   Backend/scripts/reset_db.sh
+#
+set -euo pipefail
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
+export PYTHONPATH="${ROOT_DIR}"
+DB_HOST="${DB_HOST:-10.255.255.254}"
+DB_PORT="${DB_PORT:-3308}"
+DB_USER="${DB_USER:-root}"
+DB_PASSWORD="${DB_PASSWORD:-1q2w3e4r}"
+DB_NAME="${DB_NAME:-smarteyessen_db}"
+MYSQL_CMD=(
+  mysql
+  -h "${DB_HOST}"
+  -P "${DB_PORT}"
+  -u "${DB_USER}"
+)
+if [[ -n "${DB_PASSWORD}" ]]; then
+  MYSQL_CMD+=(-p"${DB_PASSWORD}")
+fi
+echo "🔄 Dropping and recreating schema \`${DB_NAME}\` on ${DB_HOST}:${DB_PORT}..."
+"${MYSQL_CMD[@]}" <<SQL
+DROP DATABASE IF EXISTS \`${DB_NAME}\`;
+CREATE DATABASE \`${DB_NAME}\` CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
+SQL
+echo "✅ Schema recreated."
+echo "📦 Initializing tables via Backend.app.database.init_db()..."
+python - <<'PYTHON'
+from Backend.app.database import init_db
+if __name__ == "__main__":
+    init_db()
+PYTHON
+echo "✅ Table initialization complete."
+echo "🎉 Database reset finished. You can now rerun backend services or seed data as needed."

scripts/seed_test_data.sql CHANGED Viewed

@@ -22,9 +22,8 @@ ON DUPLICATE KEY UPDATE
 -- ============================================================================
 INSERT INTO document_types (doc_type_id, type_name, model_name, sorting_method, description, created_at, updated_at)
 VALUES
-    (1, '일반문서', 'general', 'reading_order', '테스트용 기본 문서 타입', NOW(), NOW()),
-    (2, '수학문제', 'math', 'question_based', '수학 문제가 포함된 문서', NOW(), NOW()),
-    (3, '표/차트', 'table', 'reading_order', '표와 차트가 포함된 문서', NOW(), NOW())
 ON DUPLICATE KEY UPDATE
     type_name = VALUES(type_name),
     model_name = VALUES(model_name),

 -- ============================================================================
 INSERT INTO document_types (doc_type_id, type_name, model_name, sorting_method, description, created_at, updated_at)
 VALUES
+    (1, '학습지', 'worksheet', 'question_based', '학습지가 포함된 문서', NOW(), NOW()),
+    (2, '일반문서', 'general', 'reading_order', '테스트용 기본 문서 타입', NOW(), NOW())
 ON DUPLICATE KEY UPDATE
     type_name = VALUES(type_name),
     model_name = VALUES(model_name),

scripts/test_pdf_upload.py CHANGED Viewed

File without changes