Spaces:
Runtime error
Runtime error
| # utils/pdf_utils.py | |
| """PDF ํ ์คํธ ์ขํ ์ถ์ถ ์ ํธ""" | |
| import fitz # PyMuPDF | |
| from typing import List, Dict | |
| def get_text_coordinates(pdf_path: str, page_num: int, search_text: str) -> List[Dict]: | |
| """ | |
| PDF์์ ํน์ ํ ์คํธ์ ์ขํ๋ฅผ ์ฐพ์ ํ์ด๋ผ์ดํธ ์ ๋ณด ๋ฐํ | |
| Args: | |
| pdf_path: PDF ํ์ผ ๊ฒฝ๋ก | |
| page_num: ํ์ด์ง ๋ฒํธ (1-based) | |
| search_text: ๊ฒ์ํ ํ ์คํธ | |
| Returns: | |
| ํ์ด๋ผ์ดํธ ์ ๋ณด ๋ฆฌ์คํธ | |
| """ | |
| try: | |
| doc = fitz.open(pdf_path) | |
| page = doc[page_num - 1] | |
| # ๊ฒ์ ํ ์คํธ ์ ๋ฆฌ (๋๋ฌด ๊ธธ๋ฉด ์๋ถ๋ถ๋ง) | |
| search_query = search_text[:100].strip() | |
| text_instances = page.search_for(search_query) | |
| annotations = [] | |
| for rect in text_instances: | |
| # streamlit-pdf-viewer ํ์ - ํ๊ดํ ์คํ์ผ | |
| annotations.append({ | |
| "page": page_num, # 1-based index | |
| "x": rect.x0, | |
| "y": rect.y0, | |
| "width": rect.x1 - rect.x0, | |
| "height": rect.y1 - rect.y0, | |
| "color": "#FFFF00", # ๋ฐ์ ๋ ธ๋์ | |
| "opacity": 0.4 # 40% ํฌ๋ช ๋ (ํ๊ดํ ํจ๊ณผ) | |
| }) | |
| doc.close() | |
| if annotations: | |
| print(f" โ {len(annotations)}๊ฐ ํ์ด๋ผ์ดํธ ์์ฑ (ํ์ด์ง {page_num})") | |
| else: | |
| print(f" โ ๏ธ ํ ์คํธ '{search_query[:30]}...' ์ฐพ์ง ๋ชปํจ") | |
| return annotations | |
| except Exception as e: | |
| print(f"โ ํ์ด๋ผ์ดํธ ์์ฑ ์ค๋ฅ: {e}") | |
| return [] | |
| if __name__ == "__main__": | |
| result = get_text_coordinates("test.pdf", 1, "sample text") | |
| print(f"ํ์ด๋ผ์ดํธ ๊ฐ์: {len(result)}") |