File size: 3,983 Bytes
be63ac6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#!/usr/bin/env python3
"""
PDF μ—…λ‘œλ“œ κΈ°λŠ₯ ν…ŒμŠ€νŠΈ 슀크립트
"""

import io
import sys
from pathlib import Path

# Backend 경둜 μΆ”κ°€
backend_root = Path(__file__).parent.parent
sys.path.insert(0, str(backend_root))

from PIL import Image
import fitz  # PyMuPDF


def create_sample_pdf(output_path: str = "test_sample.pdf", num_pages: int = 3):
    """
    ν…ŒμŠ€νŠΈμš© μƒ˜ν”Œ PDF 생성 (ν…μŠ€νŠΈκ°€ μžˆλŠ” 3νŽ˜μ΄μ§€ PDF)

    Args:
        output_path: μ €μž₯ν•  PDF 파일 경둜
        num_pages: 생성할 νŽ˜μ΄μ§€ 수
    """
    doc = fitz.open()  # μƒˆ PDF λ¬Έμ„œ 생성

    for page_num in range(1, num_pages + 1):
        page = doc.new_page(width=595, height=842)  # A4 크기

        # ν…μŠ€νŠΈ μΆ”κ°€
        text = f"ν…ŒμŠ€νŠΈ νŽ˜μ΄μ§€ {page_num}"
        point = fitz.Point(100, 100)
        page.insert_text(point, text, fontsize=20, color=(0, 0, 0))

        # κ°„λ‹¨ν•œ λ„ν˜• μΆ”κ°€
        rect = fitz.Rect(100, 150, 400, 300)
        page.draw_rect(rect, color=(0, 0, 1), width=2)
        page.insert_text(fitz.Point(150, 200), f"Sample Box - Page {page_num}", fontsize=14)

    doc.save(output_path)
    doc.close()

    print(f"βœ… μƒ˜ν”Œ PDF 생성 μ™„λ£Œ: {output_path} ({num_pages}νŽ˜μ΄μ§€)")
    return output_path


def test_pdf_processor():
    """PDF 처리 λͺ¨λ“ˆ λ‹¨μœ„ ν…ŒμŠ€νŠΈ"""
    from app.services.pdf_processor import pdf_processor

    print("=" * 60)
    print("PDF 처리 λͺ¨λ“ˆ ν…ŒμŠ€νŠΈ μ‹œμž‘")
    print("=" * 60)

    # 1. μƒ˜ν”Œ PDF 생성
    pdf_path = create_sample_pdf("test_sample.pdf", num_pages=3)

    # 2. PDF λ°”μ΄νŠΈ 읽기
    with open(pdf_path, "rb") as f:
        pdf_bytes = f.read()

    print(f"\nπŸ“„ PDF 파일 크기: {len(pdf_bytes):,} bytes")

    # 3. PDF β†’ 이미지 λ³€ν™˜ ν…ŒμŠ€νŠΈ
    try:
        project_id = 999  # ν…ŒμŠ€νŠΈμš© ν”„λ‘œμ νŠΈ ID
        start_page_number = 1

        converted_pages = pdf_processor.convert_pdf_to_images(
            pdf_bytes=pdf_bytes,
            project_id=project_id,
            start_page_number=start_page_number
        )

        print(f"\nβœ… PDF λ³€ν™˜ 성곡:")
        print(f"   - 총 {len(converted_pages)}개 νŽ˜μ΄μ§€ λ³€ν™˜")

        for page_info in converted_pages:
            print(f"   - νŽ˜μ΄μ§€ {page_info['page_number']}: "
                  f"{page_info['width']}x{page_info['height']}px, "
                  f"경둜: {page_info['image_path']}")

            # λ³€ν™˜λœ 이미지 파일 쑴재 확인
            if Path(page_info['full_path']).exists():
                size_kb = Path(page_info['full_path']).stat().st_size / 1024
                print(f"      β†’ 파일 크기: {size_kb:.1f} KB")
            else:
                print(f"      β†’ ⚠️ 파일 μ—†μŒ: {page_info['full_path']}")

        # 4. PDF 메타데이터 μΆ”μΆœ ν…ŒμŠ€νŠΈ
        pdf_info = pdf_processor.get_pdf_info(pdf_bytes)
        print(f"\nπŸ“‹ PDF 메타데이터:")
        print(f"   - 총 νŽ˜μ΄μ§€ 수: {pdf_info['total_pages']}")
        print(f"   - 제λͺ©: {pdf_info.get('title', 'N/A')}")
        print(f"   - μž‘μ„±μž: {pdf_info.get('author', 'N/A')}")

        print("\n" + "=" * 60)
        print("βœ… PDF 처리 λͺ¨λ“ˆ ν…ŒμŠ€νŠΈ μ™„λ£Œ!")
        print("=" * 60)

        # 5. μƒμ„±λœ 파일 정리 (선택)
        cleanup = input("\nμƒμ„±λœ ν…ŒμŠ€νŠΈ νŒŒμΌμ„ μ‚­μ œν•˜μ‹œκ² μŠ΅λ‹ˆκΉŒ? (y/n): ")
        if cleanup.lower() == 'y':
            import shutil
            Path(pdf_path).unlink(missing_ok=True)
            shutil.rmtree(f"uploads/{project_id}", ignore_errors=True)
            print("βœ… ν…ŒμŠ€νŠΈ 파일 μ‚­μ œ μ™„λ£Œ")
        else:
            print(f"ν…ŒμŠ€νŠΈ 파일 μœ μ§€: {pdf_path}, uploads/{project_id}/")

        return True

    except Exception as e:
        print(f"\n❌ PDF λ³€ν™˜ μ‹€νŒ¨: {str(e)}")
        import traceback
        traceback.print_exc()
        return False


if __name__ == "__main__":
    success = test_pdf_processor()
    sys.exit(0 if success else 1)