Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -971,81 +971,67 @@ async def convert_text_to_pdf(text_content: str, title: str) -> str:
|
|
| 971 |
# ์๊ตฌ ์ ์ฅ์์ ํ์ผ ๊ฒฝ๋ก
|
| 972 |
file_path = PERMANENT_PDF_DIR / filename
|
| 973 |
|
| 974 |
-
# ํฐํธ
|
| 975 |
-
from reportlab.
|
| 976 |
-
from reportlab.
|
| 977 |
|
| 978 |
-
#
|
| 979 |
-
|
| 980 |
-
if not FONT_DIR.exists():
|
| 981 |
-
FONT_DIR.mkdir(parents=True)
|
| 982 |
-
|
| 983 |
-
# ๊ธฐ๋ณธ ํ๊ธ ํฐํธ ํ์ผ ๊ฒฝ๋ก
|
| 984 |
-
default_font_path = FONT_DIR / "NanumGothic.ttf"
|
| 985 |
-
|
| 986 |
-
# ํฐํธ ํ์ผ์ด ์์ผ๋ฉด ๋ค์ด๋ก๋
|
| 987 |
-
if not default_font_path.exists():
|
| 988 |
-
import urllib.request
|
| 989 |
-
try:
|
| 990 |
-
# ๋๋๊ณ ๋ ํฐํธ ๋ค์ด๋ก๋ (Google Fonts์์)
|
| 991 |
-
font_url = "https://fonts.gstatic.com/s/nanumgothic/v21/PN_3Rfi-oW3hYwmKDpxS7F_z-7rJxHVIsPV5MbNO2rV2_va-Nv6p.ttf"
|
| 992 |
-
urllib.request.urlretrieve(font_url, str(default_font_path))
|
| 993 |
-
logger.info(f"ํ๊ธ ํฐํธ ๋ค์ด๋ก๋ ์๋ฃ: {default_font_path}")
|
| 994 |
-
except Exception as e:
|
| 995 |
-
logger.error(f"ํฐํธ ๋ค์ด๋ก๋ ์คํจ: {e}")
|
| 996 |
-
# ํฐํธ ๋ค์ด๋ก๋ ์คํจ ์ ๊ธฐ๋ณธ ํฐํธ ์ฌ์ฉ
|
| 997 |
-
default_font_path = None
|
| 998 |
-
|
| 999 |
-
# ํฐํธ ๋ฑ๋ก
|
| 1000 |
-
font_name = "NanumGothic"
|
| 1001 |
-
if default_font_path and default_font_path.exists():
|
| 1002 |
-
pdfmetrics.registerFont(TTFont(font_name, str(default_font_path)))
|
| 1003 |
-
else:
|
| 1004 |
-
# ๊ธฐ๋ณธ ํฐํธ๋ฅผ ์ฌ์ฉํ ์ ์๋ ๊ฒฝ์ฐ Helvetica ์ฌ์ฉ
|
| 1005 |
-
font_name = "Helvetica"
|
| 1006 |
-
logger.warning("ํ๊ธ ํฐํธ๋ฅผ ์ฐพ์ ์ ์์ด ๊ธฐ๋ณธ ํฐํธ๋ฅผ ์ฌ์ฉํฉ๋๋ค. ํ๊ธ์ด ์ ๋๋ก ํ์๋์ง ์์ ์ ์์ต๋๋ค.")
|
| 1007 |
-
|
| 1008 |
-
# ์์ PDF ํ์ผ ์์ฑ
|
| 1009 |
-
pdf_buffer = io.BytesIO()
|
| 1010 |
-
doc = SimpleDocTemplate(pdf_buffer, pagesize=letter, encoding='utf-8')
|
| 1011 |
-
styles = getSampleStyleSheet()
|
| 1012 |
-
|
| 1013 |
-
# ์คํ์ผ ์ค์ - ํ๊ธ ํฐํธ ์ ์ฉ
|
| 1014 |
-
styles['Title'].fontName = font_name
|
| 1015 |
-
styles['Normal'].fontName = font_name
|
| 1016 |
|
| 1017 |
-
#
|
| 1018 |
-
|
| 1019 |
|
| 1020 |
# ์ ๋ชฉ ์ถ๊ฐ
|
| 1021 |
-
|
| 1022 |
-
|
| 1023 |
-
content.append(Spacer(1, 12))
|
| 1024 |
|
| 1025 |
-
# ๋ณธ๋ฌธ ํ
์คํธ
|
| 1026 |
-
|
|
|
|
|
|
|
| 1027 |
|
| 1028 |
-
# ํ
์คํธ๋ฅผ ๋จ๋ฝ์ผ๋ก
|
| 1029 |
paragraphs = text_content.split('\n\n')
|
| 1030 |
-
for para in paragraphs:
|
| 1031 |
-
if para.strip():
|
| 1032 |
-
# XML ์ด์ค์ผ์ดํ ์ฒ๋ฆฌ
|
| 1033 |
-
from xml.sax.saxutils import escape
|
| 1034 |
-
safe_para = escape(para.replace('\n', '<br/>'))
|
| 1035 |
-
p = Paragraph(safe_para, normal_style)
|
| 1036 |
-
content.append(p)
|
| 1037 |
-
content.append(Spacer(1, 10))
|
| 1038 |
|
| 1039 |
-
|
| 1040 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1041 |
|
| 1042 |
-
#
|
| 1043 |
-
|
| 1044 |
-
f.write(pdf_buffer.getvalue())
|
| 1045 |
|
| 1046 |
# ๋ฉ์ธ ๋๋ ํ ๋ฆฌ์๋ ๋ณต์ฌ
|
| 1047 |
-
|
| 1048 |
-
f.write(pdf_buffer.getvalue())
|
| 1049 |
|
| 1050 |
# PDF ID ์์ฑ ๋ฐ ๋ฉํ๋ฐ์ดํฐ ์ ์ฅ
|
| 1051 |
pdf_id = generate_pdf_id(filename)
|
|
@@ -1067,14 +1053,10 @@ async def convert_text_to_pdf(text_content: str, title: str) -> str:
|
|
| 1067 |
|
| 1068 |
# AI๋ฅผ ์ฌ์ฉํ์ฌ ํ
์คํธ๋ฅผ ๋ ๊ตฌ์กฐํ๋ ํ์์ผ๋ก ๋ณํ
|
| 1069 |
async def enhance_text_with_ai(text_content: str, title: str) -> str:
|
| 1070 |
-
|
| 1071 |
-
|
| 1072 |
-
|
| 1073 |
-
|
| 1074 |
-
|
| 1075 |
-
# ํ
์คํธ๊ฐ ์งง์ ๊ฒฝ์ฐ ์๋ณธ ๋ฐํ
|
| 1076 |
-
if len(text_content) < 100:
|
| 1077 |
-
return text_content
|
| 1078 |
|
| 1079 |
# ์ปจํ
์คํธ ํฌ๊ธฐ๋ฅผ ๊ณ ๋ คํ์ฌ ํ
์คํธ๊ฐ ๋๋ฌด ๊ธธ๋ฉด ์๋ถ๋ถ๋ง ์ฌ์ฉ
|
| 1080 |
max_context_length = 60000
|
|
|
|
| 971 |
# ์๊ตฌ ์ ์ฅ์์ ํ์ผ ๊ฒฝ๋ก
|
| 972 |
file_path = PERMANENT_PDF_DIR / filename
|
| 973 |
|
| 974 |
+
# ๊ธฐ๋ณธ Helvetica ํฐํธ ์ฌ์ฉ (ํ๊ธ ์ง์ ์์)
|
| 975 |
+
from reportlab.lib.pagesizes import letter
|
| 976 |
+
from reportlab.pdfgen import canvas
|
| 977 |
|
| 978 |
+
# PDF ์์ฑ
|
| 979 |
+
c = canvas.Canvas(str(file_path), pagesize=letter)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 980 |
|
| 981 |
+
# ํ์ด์ง ํฌ๊ธฐ ์ค์
|
| 982 |
+
page_width, page_height = letter
|
| 983 |
|
| 984 |
# ์ ๋ชฉ ์ถ๊ฐ
|
| 985 |
+
c.setFont("Helvetica-Bold", 16)
|
| 986 |
+
c.drawCentredString(page_width/2, page_height - 50, title)
|
|
|
|
| 987 |
|
| 988 |
+
# ๋ณธ๋ฌธ ํ
์คํธ ์ถ๊ฐ
|
| 989 |
+
c.setFont("Helvetica", 11)
|
| 990 |
+
y_position = page_height - 100
|
| 991 |
+
line_height = 14
|
| 992 |
|
| 993 |
+
# ํ
์คํธ๋ฅผ ๋จ๋ฝ์ผ๋ก ๋ถ๋ฆฌ
|
| 994 |
paragraphs = text_content.split('\n\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 995 |
|
| 996 |
+
for para in paragraphs:
|
| 997 |
+
if not para.strip():
|
| 998 |
+
continue
|
| 999 |
+
|
| 1000 |
+
# ๋จ๋ฝ ๋ด ์ค ๋ฐ๊ฟ ์ฒ๋ฆฌ
|
| 1001 |
+
lines = para.split('\n')
|
| 1002 |
+
for line in lines:
|
| 1003 |
+
# ํ ์ค์ ์ต๋ ๋ฌธ์ ์
|
| 1004 |
+
max_chars_per_line = 80
|
| 1005 |
+
|
| 1006 |
+
# ๊ธด ์ค ๊ฐ์ธ๊ธฐ
|
| 1007 |
+
import textwrap
|
| 1008 |
+
wrapped_lines = textwrap.wrap(line, width=max_chars_per_line)
|
| 1009 |
+
|
| 1010 |
+
for wrapped_line in wrapped_lines:
|
| 1011 |
+
# ํ์ด์ง ๋ฐ๊ฟ ํ์ธ
|
| 1012 |
+
if y_position < 50:
|
| 1013 |
+
c.showPage()
|
| 1014 |
+
c.setFont("Helvetica", 11)
|
| 1015 |
+
y_position = page_height - 50
|
| 1016 |
+
|
| 1017 |
+
try:
|
| 1018 |
+
# ASCII ๋ฌธ์๋ง ์ฒ๋ฆฌ
|
| 1019 |
+
ascii_line = ''.join(c if ord(c) < 128 else ' ' for c in wrapped_line)
|
| 1020 |
+
c.drawString(50, y_position, ascii_line)
|
| 1021 |
+
except:
|
| 1022 |
+
# ์ค๋ฅ ๋ฐ์ ์ ๊ณต๋ฐฑ์ผ๋ก ๋์ฒด
|
| 1023 |
+
c.drawString(50, y_position, "[ํ
์คํธ ๋ณํ ์ค๋ฅ]")
|
| 1024 |
+
|
| 1025 |
+
y_position -= line_height
|
| 1026 |
+
|
| 1027 |
+
# ๋จ๋ฝ ๊ฐ ๊ฐ๊ฒฉ
|
| 1028 |
+
y_position -= 10
|
| 1029 |
|
| 1030 |
+
# PDF ์ ์ฅ
|
| 1031 |
+
c.save()
|
|
|
|
| 1032 |
|
| 1033 |
# ๋ฉ์ธ ๋๋ ํ ๋ฆฌ์๋ ๋ณต์ฌ
|
| 1034 |
+
shutil.copy2(file_path, PDF_DIR / filename)
|
|
|
|
| 1035 |
|
| 1036 |
# PDF ID ์์ฑ ๋ฐ ๋ฉํ๋ฐ์ดํฐ ์ ์ฅ
|
| 1037 |
pdf_id = generate_pdf_id(filename)
|
|
|
|
| 1053 |
|
| 1054 |
# AI๋ฅผ ์ฌ์ฉํ์ฌ ํ
์คํธ๋ฅผ ๋ ๊ตฌ์กฐํ๋ ํ์์ผ๋ก ๋ณํ
|
| 1055 |
async def enhance_text_with_ai(text_content: str, title: str) -> str:
|
| 1056 |
+
# OpenAI ํฅ์ ๊ธฐ๋ฅ ์ ๊ฑฐ - ์๋ณธ ํ
์คํธ ๊ทธ๋๋ก ๋ฐํ
|
| 1057 |
+
return text_content
|
| 1058 |
+
|
| 1059 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1060 |
|
| 1061 |
# ์ปจํ
์คํธ ํฌ๊ธฐ๋ฅผ ๊ณ ๋ คํ์ฌ ํ
์คํธ๊ฐ ๋๋ฌด ๊ธธ๋ฉด ์๋ถ๋ถ๋ง ์ฌ์ฉ
|
| 1062 |
max_context_length = 60000
|