Spaces:
Sleeping
Sleeping
File size: 3,983 Bytes
be63ac6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
#!/usr/bin/env python3
"""
PDF μ
λ‘λ κΈ°λ₯ ν
μ€νΈ μ€ν¬λ¦½νΈ
"""
import io
import sys
from pathlib import Path
# Backend κ²½λ‘ μΆκ°
backend_root = Path(__file__).parent.parent
sys.path.insert(0, str(backend_root))
from PIL import Image
import fitz # PyMuPDF
def create_sample_pdf(output_path: str = "test_sample.pdf", num_pages: int = 3):
"""
ν
μ€νΈμ© μν PDF μμ± (ν
μ€νΈκ° μλ 3νμ΄μ§ PDF)
Args:
output_path: μ μ₯ν PDF νμΌ κ²½λ‘
num_pages: μμ±ν νμ΄μ§ μ
"""
doc = fitz.open() # μ PDF λ¬Έμ μμ±
for page_num in range(1, num_pages + 1):
page = doc.new_page(width=595, height=842) # A4 ν¬κΈ°
# ν
μ€νΈ μΆκ°
text = f"ν
μ€νΈ νμ΄μ§ {page_num}"
point = fitz.Point(100, 100)
page.insert_text(point, text, fontsize=20, color=(0, 0, 0))
# κ°λ¨ν λν μΆκ°
rect = fitz.Rect(100, 150, 400, 300)
page.draw_rect(rect, color=(0, 0, 1), width=2)
page.insert_text(fitz.Point(150, 200), f"Sample Box - Page {page_num}", fontsize=14)
doc.save(output_path)
doc.close()
print(f"β
μν PDF μμ± μλ£: {output_path} ({num_pages}νμ΄μ§)")
return output_path
def test_pdf_processor():
"""PDF μ²λ¦¬ λͺ¨λ λ¨μ ν
μ€νΈ"""
from app.services.pdf_processor import pdf_processor
print("=" * 60)
print("PDF μ²λ¦¬ λͺ¨λ ν
μ€νΈ μμ")
print("=" * 60)
# 1. μν PDF μμ±
pdf_path = create_sample_pdf("test_sample.pdf", num_pages=3)
# 2. PDF λ°μ΄νΈ μ½κΈ°
with open(pdf_path, "rb") as f:
pdf_bytes = f.read()
print(f"\nπ PDF νμΌ ν¬κΈ°: {len(pdf_bytes):,} bytes")
# 3. PDF β μ΄λ―Έμ§ λ³ν ν
μ€νΈ
try:
project_id = 999 # ν
μ€νΈμ© νλ‘μ νΈ ID
start_page_number = 1
converted_pages = pdf_processor.convert_pdf_to_images(
pdf_bytes=pdf_bytes,
project_id=project_id,
start_page_number=start_page_number
)
print(f"\nβ
PDF λ³ν μ±κ³΅:")
print(f" - μ΄ {len(converted_pages)}κ° νμ΄μ§ λ³ν")
for page_info in converted_pages:
print(f" - νμ΄μ§ {page_info['page_number']}: "
f"{page_info['width']}x{page_info['height']}px, "
f"κ²½λ‘: {page_info['image_path']}")
# λ³νλ μ΄λ―Έμ§ νμΌ μ‘΄μ¬ νμΈ
if Path(page_info['full_path']).exists():
size_kb = Path(page_info['full_path']).stat().st_size / 1024
print(f" β νμΌ ν¬κΈ°: {size_kb:.1f} KB")
else:
print(f" β β οΈ νμΌ μμ: {page_info['full_path']}")
# 4. PDF λ©νλ°μ΄ν° μΆμΆ ν
μ€νΈ
pdf_info = pdf_processor.get_pdf_info(pdf_bytes)
print(f"\nπ PDF λ©νλ°μ΄ν°:")
print(f" - μ΄ νμ΄μ§ μ: {pdf_info['total_pages']}")
print(f" - μ λͺ©: {pdf_info.get('title', 'N/A')}")
print(f" - μμ±μ: {pdf_info.get('author', 'N/A')}")
print("\n" + "=" * 60)
print("β
PDF μ²λ¦¬ λͺ¨λ ν
μ€νΈ μλ£!")
print("=" * 60)
# 5. μμ±λ νμΌ μ 리 (μ ν)
cleanup = input("\nμμ±λ ν
μ€νΈ νμΌμ μμ νμκ² μ΅λκΉ? (y/n): ")
if cleanup.lower() == 'y':
import shutil
Path(pdf_path).unlink(missing_ok=True)
shutil.rmtree(f"uploads/{project_id}", ignore_errors=True)
print("β
ν
μ€νΈ νμΌ μμ μλ£")
else:
print(f"ν
μ€νΈ νμΌ μ μ§: {pdf_path}, uploads/{project_id}/")
return True
except Exception as e:
print(f"\nβ PDF λ³ν μ€ν¨: {str(e)}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = test_pdf_processor()
sys.exit(0 if success else 1)
|