smarteye-backend / scripts /test_pdf_upload.py
KwanHak's picture
sync: Smart_Demo 브랜치의 Backend μ½”λ“œ 병합 & 이미지 λ‘œλ“œλ₯Ό μœ„ν•œ MultiFileLoader μ»΄ν¬λ„ŒνŠΈ κ΅¬ν˜„
82c1146
#!/usr/bin/env python3
"""
PDF μ—…λ‘œλ“œ κΈ°λŠ₯ ν…ŒμŠ€νŠΈ 슀크립트
"""
import io
import sys
from pathlib import Path
# Backend 경둜 μΆ”κ°€
backend_root = Path(__file__).parent.parent
sys.path.insert(0, str(backend_root))
from PIL import Image
import fitz # PyMuPDF
def create_sample_pdf(output_path: str = "test_sample.pdf", num_pages: int = 3):
"""
ν…ŒμŠ€νŠΈμš© μƒ˜ν”Œ PDF 생성 (ν…μŠ€νŠΈκ°€ μžˆλŠ” 3νŽ˜μ΄μ§€ PDF)
Args:
output_path: μ €μž₯ν•  PDF 파일 경둜
num_pages: 생성할 νŽ˜μ΄μ§€ 수
"""
doc = fitz.open() # μƒˆ PDF λ¬Έμ„œ 생성
for page_num in range(1, num_pages + 1):
page = doc.new_page(width=595, height=842) # A4 크기
# ν…μŠ€νŠΈ μΆ”κ°€
text = f"ν…ŒμŠ€νŠΈ νŽ˜μ΄μ§€ {page_num}"
point = fitz.Point(100, 100)
page.insert_text(point, text, fontsize=20, color=(0, 0, 0))
# κ°„λ‹¨ν•œ λ„ν˜• μΆ”κ°€
rect = fitz.Rect(100, 150, 400, 300)
page.draw_rect(rect, color=(0, 0, 1), width=2)
page.insert_text(fitz.Point(150, 200), f"Sample Box - Page {page_num}", fontsize=14)
doc.save(output_path)
doc.close()
print(f"βœ… μƒ˜ν”Œ PDF 생성 μ™„λ£Œ: {output_path} ({num_pages}νŽ˜μ΄μ§€)")
return output_path
def test_pdf_processor():
"""PDF 처리 λͺ¨λ“ˆ λ‹¨μœ„ ν…ŒμŠ€νŠΈ"""
from app.services.pdf_processor import pdf_processor
print("=" * 60)
print("PDF 처리 λͺ¨λ“ˆ ν…ŒμŠ€νŠΈ μ‹œμž‘")
print("=" * 60)
# 1. μƒ˜ν”Œ PDF 생성
pdf_path = create_sample_pdf("test_sample.pdf", num_pages=3)
# 2. PDF λ°”μ΄νŠΈ 읽기
with open(pdf_path, "rb") as f:
pdf_bytes = f.read()
print(f"\nπŸ“„ PDF 파일 크기: {len(pdf_bytes):,} bytes")
# 3. PDF β†’ 이미지 λ³€ν™˜ ν…ŒμŠ€νŠΈ
try:
project_id = 999 # ν…ŒμŠ€νŠΈμš© ν”„λ‘œμ νŠΈ ID
start_page_number = 1
converted_pages = pdf_processor.convert_pdf_to_images(
pdf_bytes=pdf_bytes,
project_id=project_id,
start_page_number=start_page_number
)
print(f"\nβœ… PDF λ³€ν™˜ 성곡:")
print(f" - 총 {len(converted_pages)}개 νŽ˜μ΄μ§€ λ³€ν™˜")
for page_info in converted_pages:
print(f" - νŽ˜μ΄μ§€ {page_info['page_number']}: "
f"{page_info['width']}x{page_info['height']}px, "
f"경둜: {page_info['image_path']}")
# λ³€ν™˜λœ 이미지 파일 쑴재 확인
if Path(page_info['full_path']).exists():
size_kb = Path(page_info['full_path']).stat().st_size / 1024
print(f" β†’ 파일 크기: {size_kb:.1f} KB")
else:
print(f" β†’ ⚠️ 파일 μ—†μŒ: {page_info['full_path']}")
# 4. PDF 메타데이터 μΆ”μΆœ ν…ŒμŠ€νŠΈ
pdf_info = pdf_processor.get_pdf_info(pdf_bytes)
print(f"\nπŸ“‹ PDF 메타데이터:")
print(f" - 총 νŽ˜μ΄μ§€ 수: {pdf_info['total_pages']}")
print(f" - 제λͺ©: {pdf_info.get('title', 'N/A')}")
print(f" - μž‘μ„±μž: {pdf_info.get('author', 'N/A')}")
print("\n" + "=" * 60)
print("βœ… PDF 처리 λͺ¨λ“ˆ ν…ŒμŠ€νŠΈ μ™„λ£Œ!")
print("=" * 60)
# 5. μƒμ„±λœ 파일 정리 (선택)
cleanup = input("\nμƒμ„±λœ ν…ŒμŠ€νŠΈ νŒŒμΌμ„ μ‚­μ œν•˜μ‹œκ² μŠ΅λ‹ˆκΉŒ? (y/n): ")
if cleanup.lower() == 'y':
import shutil
Path(pdf_path).unlink(missing_ok=True)
shutil.rmtree(f"uploads/{project_id}", ignore_errors=True)
print("βœ… ν…ŒμŠ€νŠΈ 파일 μ‚­μ œ μ™„λ£Œ")
else:
print(f"ν…ŒμŠ€νŠΈ 파일 μœ μ§€: {pdf_path}, uploads/{project_id}/")
return True
except Exception as e:
print(f"\n❌ PDF λ³€ν™˜ μ‹€νŒ¨: {str(e)}")
import traceback
traceback.print_exc()
return False
if __name__ == "__main__":
success = test_pdf_processor()
sys.exit(0 if success else 1)