ginipick commited on
Commit
7a32207
Β·
1 Parent(s): 04a8c1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +272 -7
app.py CHANGED
@@ -9,6 +9,13 @@ import threading
9
  import concurrent.futures
10
  from openai import OpenAI
11
  import fitz # PyMuPDF
 
 
 
 
 
 
 
12
 
13
  # λ‘œκΉ… μ„€μ •
14
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
@@ -948,6 +955,199 @@ async def upload_pdf(file: UploadFile = File(...)):
948
  status_code=500
949
  )
950
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
951
  # κ΄€λ¦¬μž 인증 μ—”λ“œν¬μΈνŠΈ
952
  @app.post("/api/admin-login")
953
  async def admin_login(password: str = Form(...)):
@@ -963,9 +1163,17 @@ async def delete_pdf(path: str):
963
  if not pdf_file.exists():
964
  return {"success": False, "message": "νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"}
965
 
966
- # PDF 파일 μ‚­μ œ
 
 
 
967
  pdf_file.unlink()
968
 
 
 
 
 
 
969
  # κ΄€λ ¨ μΊμ‹œ 파일 μ‚­μ œ
970
  pdf_name = pdf_file.stem
971
  cache_path = get_cache_path(pdf_name)
@@ -979,7 +1187,7 @@ async def delete_pdf(path: str):
979
  # λ©”νƒ€λ°μ΄ν„°μ—μ„œ ν•΄λ‹Ή 파일 ID 제거
980
  to_remove = []
981
  for pid, fpath in pdf_metadata.items():
982
- if os.path.basename(fpath) == pdf_file.name:
983
  to_remove.append(pid)
984
 
985
  for pid in to_remove:
@@ -1351,7 +1559,9 @@ HTML = """
1351
  font-weight: 500;
1352
  display: flex;
1353
  align-items: center;
1354
- box-shadow: var(--shadow-sm);
 
 
1355
  transition: var(--transition);
1356
  position: relative;
1357
  overflow: hidden;
@@ -2111,7 +2321,11 @@ HTML = """
2111
  <button class="upload" id="pdfUploadBtn">
2112
  <i class="fas fa-file-pdf"></i> PDF Upload
2113
  </button>
 
 
 
2114
  <input id="pdfInput" type="file" accept="application/pdf" style="display:none">
 
2115
  </div>
2116
 
2117
  <div class="section-title">Projects</div>
@@ -2436,9 +2650,6 @@ async function submitQuestion(question) {
2436
  }
2437
  }
2438
 
2439
-
2440
-
2441
-
2442
 
2443
  // DOM이 λ‘œλ“œλ˜λ©΄ μ‹€ν–‰
2444
  document.addEventListener('DOMContentLoaded', function() {
@@ -2473,6 +2684,26 @@ async function submitQuestion(question) {
2473
  console.error("PDF μ—…λ‘œλ“œ μš”μ†Œλ₯Ό 찾을 수 μ—†μŒ");
2474
  }
2475
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2476
  // μ„œλ²„ PDF λ‘œλ“œ 및 μΊμ‹œ μƒνƒœ 확인
2477
  loadServerPDFs();
2478
 
@@ -2573,6 +2804,40 @@ async function submitQuestion(question) {
2573
  }
2574
  }
2575
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2576
  function addCard(i, thumb, title, isCached = false, pdfId = null) {
2577
  const d = document.createElement('div');
2578
  d.className = 'card fade-in';
@@ -3636,7 +3901,7 @@ async function submitQuestion(question) {
3636
  }
3637
  </script>
3638
  </body>
3639
- </html>
3640
  """
3641
 
3642
  if __name__ == "__main__":
 
9
  import concurrent.futures
10
  from openai import OpenAI
11
  import fitz # PyMuPDF
12
+ import tempfile
13
+ from reportlab.lib.pagesizes import letter
14
+ from reportlab.pdfgen import canvas
15
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
16
+ from reportlab.lib.styles import getSampleStyleSheet
17
+ import io
18
+ import docx2txt
19
 
20
  # λ‘œκΉ… μ„€μ •
21
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 
955
  status_code=500
956
  )
957
 
958
+ # ν…μŠ€νŠΈ νŒŒμΌμ„ PDF둜 λ³€ν™˜ν•˜λŠ” ν•¨μˆ˜
959
+ async def convert_text_to_pdf(text_content: str, title: str) -> str:
960
+ try:
961
+ # 제λͺ©μ—μ„œ μœ νš¨ν•œ 파일λͺ… 생성
962
+ import re
963
+ safe_title = re.sub(r'[^\w\-_\. ]', '_', title)
964
+ if not safe_title:
965
+ safe_title = "aibook"
966
+
967
+ # νƒ€μž„μŠ€νƒ¬ν”„ μΆ”κ°€λ‘œ κ³ μœ ν•œ 파일λͺ… 생성
968
+ timestamp = int(time.time())
969
+ filename = f"{safe_title}_{timestamp}.pdf"
970
+
971
+ # 영ꡬ μ €μž₯μ†Œμ˜ 파일 경둜
972
+ file_path = PERMANENT_PDF_DIR / filename
973
+
974
+ # ν•œκΈ€ 폰트 등둝 - μ—…λ‘œλ“œλœ MaruBuri-SemiBold.ttf μ‚¬μš©
975
+ from reportlab.pdfbase import pdfmetrics
976
+ from reportlab.pdfbase.ttfonts import TTFont
977
+
978
+ # 폰트 경둜 μ„€μ • (app.py와 같은 디렉토리에 μžˆλŠ” 폰트 μ‚¬μš©)
979
+ font_path = BASE / "MaruBuri-SemiBold.ttf"
980
+
981
+ # 폰트 등둝
982
+ font_name = "MaruBuri"
983
+ if font_path.exists():
984
+ pdfmetrics.registerFont(TTFont(font_name, str(font_path)))
985
+ logger.info(f"ν•œκΈ€ 폰트 등둝 성곡: {font_path}")
986
+ else:
987
+ font_name = "Helvetica"
988
+ logger.warning(f"ν•œκΈ€ 폰트 νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€: {font_path}. κΈ°λ³Έ 폰트λ₯Ό μ‚¬μš©ν•©λ‹ˆλ‹€.")
989
+
990
+ # μž„μ‹œ PDF 파일 생성
991
+ pdf_buffer = io.BytesIO()
992
+
993
+ # ν•œκΈ€ 지원을 μœ„ν•œ μŠ€νƒ€μΌ μ„€μ •
994
+ from reportlab.lib.pagesizes import letter
995
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
996
+ from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
997
+ from reportlab.lib.enums import TA_CENTER, TA_LEFT
998
+
999
+ doc = SimpleDocTemplate(pdf_buffer, pagesize=letter, encoding='utf-8')
1000
+
1001
+ # μ‚¬μš©μž μ •μ˜ μŠ€νƒ€μΌ 생성
1002
+ title_style = ParagraphStyle(
1003
+ name='CustomTitle',
1004
+ fontName=font_name,
1005
+ fontSize=18,
1006
+ leading=22,
1007
+ alignment=TA_CENTER,
1008
+ spaceAfter=20
1009
+ )
1010
+
1011
+ normal_style = ParagraphStyle(
1012
+ name='CustomNormal',
1013
+ fontName=font_name,
1014
+ fontSize=12,
1015
+ leading=15,
1016
+ alignment=TA_LEFT,
1017
+ spaceBefore=6,
1018
+ spaceAfter=6
1019
+ )
1020
+
1021
+ # λ‚΄μš©μ„ λ¬Έλ‹¨μœΌλ‘œ λΆ„ν• 
1022
+ content = []
1023
+
1024
+ # 제λͺ© μΆ”κ°€
1025
+ content.append(Paragraph(title, title_style))
1026
+ content.append(Spacer(1, 20))
1027
+
1028
+ # ν…μŠ€νŠΈλ₯Ό λ‹¨λ½μœΌλ‘œ λΆ„λ¦¬ν•˜μ—¬ μΆ”κ°€
1029
+ paragraphs = text_content.split('\n\n')
1030
+ for para in paragraphs:
1031
+ if para.strip():
1032
+ # XML 특수문자 μ΄μŠ€μΌ€μ΄ν”„ 처리
1033
+ from xml.sax.saxutils import escape
1034
+ safe_para = escape(para.replace('\n', '<br/>'))
1035
+ p = Paragraph(safe_para, normal_style)
1036
+ content.append(p)
1037
+ content.append(Spacer(1, 10))
1038
+
1039
+ # PDF 생성
1040
+ doc.build(content)
1041
+
1042
+ # 파일둜 μ €μž₯
1043
+ with open(file_path, 'wb') as f:
1044
+ f.write(pdf_buffer.getvalue())
1045
+
1046
+ # 메인 디렉토리에도 볡사
1047
+ with open(PDF_DIR / filename, 'wb') as f:
1048
+ f.write(pdf_buffer.getvalue())
1049
+
1050
+ # PDF ID 생성 및 메타데이터 μ €μž₯
1051
+ pdf_id = generate_pdf_id(filename)
1052
+ pdf_metadata[pdf_id] = str(file_path)
1053
+ save_pdf_metadata()
1054
+
1055
+ # λ°±κ·ΈλΌμš΄λ“œμ—μ„œ 캐싱 μ‹œμž‘
1056
+ asyncio.create_task(cache_pdf(str(file_path)))
1057
+
1058
+ return {
1059
+ "path": str(file_path),
1060
+ "filename": filename,
1061
+ "id": pdf_id
1062
+ }
1063
+
1064
+ except Exception as e:
1065
+ logger.error(f"ν…μŠ€νŠΈλ₯Ό PDF둜 λ³€ν™˜ 쀑 였λ₯˜: {e}")
1066
+ raise e
1067
+
1068
+
1069
+ # AIλ₯Ό μ‚¬μš©ν•˜μ—¬ ν…μŠ€νŠΈλ₯Ό 더 κ΅¬μ‘°ν™”λœ ν˜•μ‹μœΌλ‘œ λ³€ν™˜ (OpenAI 제거 버전)
1070
+ async def enhance_text_with_ai(text_content: str, title: str) -> str:
1071
+ # 원본 ν…μŠ€νŠΈ κ·ΈλŒ€λ‘œ λ°˜ν™˜ (AI ν–₯상 κΈ°λŠ₯ λΉ„ν™œμ„±ν™”)
1072
+ return text_content
1073
+
1074
+
1075
+
1076
+ # ν…μŠ€νŠΈ νŒŒμΌμ„ PDF둜 λ³€ν™˜ν•˜λŠ” μ—”λ“œν¬μΈνŠΈ
1077
+ @app.post("/api/text-to-pdf")
1078
+ async def text_to_pdf(file: UploadFile = File(...)):
1079
+ try:
1080
+ # μ§€μ›ν•˜λŠ” 파일 ν˜•μ‹ 확인
1081
+ filename = file.filename.lower()
1082
+ if not (filename.endswith('.txt') or filename.endswith('.docx') or filename.endswith('.doc')):
1083
+ return JSONResponse(
1084
+ content={"success": False, "message": "μ§€μ›ν•˜λŠ” 파일 ν˜•μ‹μ€ .txt, .docx, .docμž…λ‹ˆλ‹€."},
1085
+ status_code=400
1086
+ )
1087
+
1088
+ # 파일 λ‚΄μš© 읽기
1089
+ content = await file.read()
1090
+
1091
+ # 파일 νƒ€μž…μ— 따라 ν…μŠ€νŠΈ μΆ”μΆœ
1092
+ if filename.endswith('.txt'):
1093
+ # 인코딩 μžλ™ 감지 μ‹œλ„
1094
+ encodings = ['utf-8', 'euc-kr', 'cp949', 'latin1']
1095
+ text_content = None
1096
+
1097
+ for encoding in encodings:
1098
+ try:
1099
+ text_content = content.decode(encoding, errors='strict')
1100
+ logger.info(f"ν…μŠ€νŠΈ 파일 인코딩 감지: {encoding}")
1101
+ break
1102
+ except UnicodeDecodeError:
1103
+ continue
1104
+
1105
+ if text_content is None:
1106
+ # λͺ¨λ“  인코딩 μ‹œλ„ μ‹€νŒ¨ μ‹œ 기본적으둜 UTF-8둜 μ‹œλ„ν•˜κ³  였λ₯˜λŠ” λŒ€μ²΄ 문자둜 처리
1107
+ text_content = content.decode('utf-8', errors='replace')
1108
+ logger.warning("ν…μŠ€νŠΈ 파일 인코딩을 감지할 수 μ—†μ–΄ UTF-8으둜 μ‹œλ„ν•©λ‹ˆλ‹€.")
1109
+
1110
+ elif filename.endswith('.docx') or filename.endswith('.doc'):
1111
+ # μž„μ‹œ 파일둜 μ €μž₯
1112
+ with tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1]) as temp_file:
1113
+ temp_file.write(content)
1114
+ temp_path = temp_file.name
1115
+
1116
+ try:
1117
+ # docx2txt둜 ν…μŠ€νŠΈ μΆ”μΆœ
1118
+ text_content = docx2txt.process(temp_path)
1119
+ finally:
1120
+ # μž„μ‹œ 파일 μ‚­μ œ
1121
+ os.unlink(temp_path)
1122
+
1123
+ # 파일λͺ…μ—μ„œ 제λͺ© μΆ”μΆœ (ν™•μž₯자 μ œμ™Έ)
1124
+ title = os.path.splitext(filename)[0]
1125
+
1126
+ # AI둜 ν…μŠ€νŠΈ λ‚΄μš© ν–₯상
1127
+ enhanced_text = await enhance_text_with_ai(text_content, title)
1128
+
1129
+ # ν…μŠ€νŠΈλ₯Ό PDF둜 λ³€ν™˜
1130
+ pdf_info = await convert_text_to_pdf(enhanced_text, title)
1131
+
1132
+ return JSONResponse(
1133
+ content={
1134
+ "success": True,
1135
+ "path": pdf_info["path"],
1136
+ "name": os.path.splitext(pdf_info["filename"])[0],
1137
+ "id": pdf_info["id"],
1138
+ "viewUrl": f"/view/{pdf_info['id']}"
1139
+ },
1140
+ status_code=200
1141
+ )
1142
+ except Exception as e:
1143
+ import traceback
1144
+ error_details = traceback.format_exc()
1145
+ logger.error(f"ν…μŠ€νŠΈλ₯Ό PDF둜 λ³€ν™˜ 쀑 였λ₯˜: {str(e)}\n{error_details}")
1146
+ return JSONResponse(
1147
+ content={"success": False, "message": str(e)},
1148
+ status_code=500
1149
+ )
1150
+
1151
  # κ΄€λ¦¬μž 인증 μ—”λ“œν¬μΈνŠΈ
1152
  @app.post("/api/admin-login")
1153
  async def admin_login(password: str = Form(...)):
 
1163
  if not pdf_file.exists():
1164
  return {"success": False, "message": "νŒŒμΌμ„ 찾을 수 μ—†μŠ΅λ‹ˆλ‹€"}
1165
 
1166
+ # PDF 파일λͺ… κ°€μ Έμ˜€κΈ°
1167
+ filename = pdf_file.name
1168
+
1169
+ # PDF 파일 μ‚­μ œ (영ꡬ μ €μž₯μ†Œμ—μ„œ)
1170
  pdf_file.unlink()
1171
 
1172
+ # 메인 λ””λ ‰ν† λ¦¬μ—μ„œλ„ λ™μΌν•œ 파일이 있으면 μ‚­μ œ (버그 μˆ˜μ •)
1173
+ main_file_path = PDF_DIR / filename
1174
+ if main_file_path.exists():
1175
+ main_file_path.unlink()
1176
+
1177
  # κ΄€λ ¨ μΊμ‹œ 파일 μ‚­μ œ
1178
  pdf_name = pdf_file.stem
1179
  cache_path = get_cache_path(pdf_name)
 
1187
  # λ©”νƒ€λ°μ΄ν„°μ—μ„œ ν•΄λ‹Ή 파일 ID 제거
1188
  to_remove = []
1189
  for pid, fpath in pdf_metadata.items():
1190
+ if os.path.basename(fpath) == filename:
1191
  to_remove.append(pid)
1192
 
1193
  for pid in to_remove:
 
1559
  font-weight: 500;
1560
  display: flex;
1561
  align-items: center;
1562
+
1563
+
1564
+ box-shadow: var(--shadow-sm);
1565
  transition: var(--transition);
1566
  position: relative;
1567
  overflow: hidden;
 
2321
  <button class="upload" id="pdfUploadBtn">
2322
  <i class="fas fa-file-pdf"></i> PDF Upload
2323
  </button>
2324
+ <button class="upload" id="textToAIBookBtn">
2325
+ <i class="fas fa-file-alt"></i> Text to AI-Book
2326
+ </button>
2327
  <input id="pdfInput" type="file" accept="application/pdf" style="display:none">
2328
+ <input id="textInput" type="file" accept=".txt,.docx,.doc" style="display:none">
2329
  </div>
2330
 
2331
  <div class="section-title">Projects</div>
 
2650
  }
2651
  }
2652
 
 
 
 
2653
 
2654
  // DOM이 λ‘œλ“œλ˜λ©΄ μ‹€ν–‰
2655
  document.addEventListener('DOMContentLoaded', function() {
 
2684
  console.error("PDF μ—…λ‘œλ“œ μš”μ†Œλ₯Ό 찾을 수 μ—†μŒ");
2685
  }
2686
 
2687
+ // ν…μŠ€νŠΈ μ—…λ‘œλ“œ λ²„νŠΌ
2688
+ const textBtn = document.getElementById('textToAIBookBtn');
2689
+ const textInput = document.getElementById('textInput');
2690
+
2691
+ if (textBtn && textInput) {
2692
+ // λ²„νŠΌ 클릭 μ‹œ 파일 μž…λ ₯ 트리거
2693
+ textBtn.addEventListener('click', function() {
2694
+ textInput.click();
2695
+ });
2696
+
2697
+ // 파일 선택 μ‹œ 처리
2698
+ textInput.addEventListener('change', function(e) {
2699
+ const file = e.target.files[0];
2700
+ if (!file) return;
2701
+
2702
+ // μ„œλ²„μ— ν…μŠ€νŠΈ 파일 μ—…λ‘œλ“œ (영ꡬ μ €μž₯μ†Œμ— PDF둜 λ³€ν™˜ν•˜μ—¬ μ €μž₯)
2703
+ uploadTextToServer(file);
2704
+ });
2705
+ }
2706
+
2707
  // μ„œλ²„ PDF λ‘œλ“œ 및 μΊμ‹œ μƒνƒœ 확인
2708
  loadServerPDFs();
2709
 
 
2804
  }
2805
  }
2806
 
2807
+ // μ„œλ²„μ— ν…μŠ€νŠΈ νŒŒμΌμ„ μ—…λ‘œλ“œν•˜μ—¬ PDF둜 λ³€ν™˜ν•˜λŠ” ν•¨μˆ˜
2808
+ async function uploadTextToServer(file) {
2809
+ try {
2810
+ showLoading("ν…μŠ€νŠΈ 뢄석 및 PDF λ³€ν™˜ 쀑...");
2811
+
2812
+ const formData = new FormData();
2813
+ formData.append('file', file);
2814
+
2815
+ const response = await fetch('/api/text-to-pdf', {
2816
+ method: 'POST',
2817
+ body: formData
2818
+ });
2819
+
2820
+ const result = await response.json();
2821
+
2822
+ if (result.success) {
2823
+ hideLoading();
2824
+
2825
+ // μ—…λ‘œλ“œ 성곡 μ‹œ μ„œλ²„ PDF 리슀트 λ¦¬λ‘œλ“œ
2826
+ await loadServerPDFs();
2827
+
2828
+ // 성곡 λ©”μ‹œμ§€
2829
+ showMessage("ν…μŠ€νŠΈκ°€ μ„±κ³΅μ μœΌλ‘œ PDF둜 λ³€ν™˜λ˜μ—ˆμŠ΅λ‹ˆλ‹€! 곡유 URL: " + result.viewUrl);
2830
+ } else {
2831
+ hideLoading();
2832
+ showError("λ³€ν™˜ μ‹€νŒ¨: " + (result.message || "μ•Œ 수 μ—†λŠ” 였λ₯˜"));
2833
+ }
2834
+ } catch (error) {
2835
+ console.error("ν…μŠ€νŠΈ λ³€ν™˜ 였λ₯˜:", error);
2836
+ hideLoading();
2837
+ showError("ν…μŠ€νŠΈλ₯Ό PDF둜 λ³€ν™˜ν•˜λŠ” 쀑 였λ₯˜κ°€ λ°œμƒν–ˆμŠ΅λ‹ˆλ‹€.");
2838
+ }
2839
+ }
2840
+
2841
  function addCard(i, thumb, title, isCached = false, pdfId = null) {
2842
  const d = document.createElement('div');
2843
  d.className = 'card fade-in';
 
3901
  }
3902
  </script>
3903
  </body>
3904
+ </html>
3905
  """
3906
 
3907
  if __name__ == "__main__":