seawolf2357 commited on
Commit
5ad8687
·
verified ·
1 Parent(s): 0450b1f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -1470
app.py CHANGED
@@ -1,1483 +1,33 @@
1
- """
2
- HWP AI 어시스턴트 - Gradio 웹 앱
3
- AI가 HWP 파일을 읽고, 보고, 말하며, 생각하고 기억합니다.
4
- - Tab 1: LLM 채팅 (스트리밍, 파일 첨부 지원)
5
- - Tab 2: HWP 변환기
6
- """
7
- import gradio as gr
8
- import tempfile
9
  import os
10
- import subprocess
11
- import shutil
12
  import sys
13
- import re
14
- import json
15
- import uuid
16
- import sqlite3
17
- import base64
18
- import requests
19
- import zlib
20
- import zipfile
21
- from pathlib import Path
22
- from datetime import datetime
23
- from typing import Generator, List, Dict, Optional
24
- from xml.etree import ElementTree as ET
25
 
26
- # Groq 라이브러리 임포트
27
- try:
28
- from groq import Groq
29
- GROQ_AVAILABLE = True
30
- print("✅ Groq library loaded")
31
- except ImportError:
32
- GROQ_AVAILABLE = False
33
- print("❌ Groq library not available - pip install groq")
34
-
35
- # ============== Comic Style CSS ==============
36
- COMIC_CSS = """
37
- @import url('https://fonts.googleapis.com/css2?family=Bangers&family=Comic+Neue:wght@400;700&display=swap');
38
-
39
- .gradio-container {
40
- background-color: #FEF9C3 !important;
41
- background-image: radial-gradient(#1F2937 1px, transparent 1px) !important;
42
- background-size: 20px 20px !important;
43
- min-height: 100vh !important;
44
- font-family: 'Comic Neue', cursive, sans-serif !important;
45
- }
46
-
47
- footer, .footer, .gradio-container footer, .built-with, [class*="footer"], .gradio-footer, a[href*="gradio.app"] {
48
- display: none !important;
49
- visibility: hidden !important;
50
- height: 0 !important;
51
- }
52
-
53
- /* HOME Button Style */
54
- .home-button-container {
55
- display: flex;
56
- justify-content: center;
57
- align-items: center;
58
- gap: 15px;
59
- margin-bottom: 15px;
60
- padding: 12px 20px;
61
- background: linear-gradient(135deg, #10B981 0%, #059669 100%);
62
- border: 4px solid #1F2937;
63
- border-radius: 12px;
64
- box-shadow: 6px 6px 0 #1F2937;
65
- }
66
-
67
- .home-button {
68
- display: inline-flex;
69
- align-items: center;
70
- gap: 8px;
71
- padding: 10px 25px;
72
- background: linear-gradient(135deg, #FACC15 0%, #F59E0B 100%);
73
- color: #1F2937;
74
- font-family: 'Bangers', cursive;
75
- font-size: 1.4rem;
76
- letter-spacing: 2px;
77
- text-decoration: none;
78
- border: 3px solid #1F2937;
79
- border-radius: 8px;
80
- box-shadow: 4px 4px 0 #1F2937;
81
- transition: all 0.2s ease;
82
- }
83
-
84
- .home-button:hover {
85
- background: linear-gradient(135deg, #FDE047 0%, #FACC15 100%);
86
- transform: translate(-2px, -2px);
87
- box-shadow: 6px 6px 0 #1F2937;
88
- }
89
-
90
- .home-button:active {
91
- transform: translate(2px, 2px);
92
- box-shadow: 2px 2px 0 #1F2937;
93
- }
94
-
95
- .url-display {
96
- font-family: 'Comic Neue', cursive;
97
- font-size: 1.1rem;
98
- font-weight: 700;
99
- color: #FFF;
100
- background: rgba(0,0,0,0.3);
101
- padding: 8px 16px;
102
- border-radius: 6px;
103
- border: 2px solid rgba(255,255,255,0.3);
104
- }
105
-
106
- .header-container {
107
- text-align: center;
108
- padding: 25px 20px;
109
- background: linear-gradient(135deg, #3B82F6 0%, #8B5CF6 100%);
110
- border: 4px solid #1F2937;
111
- border-radius: 12px;
112
- margin-bottom: 20px;
113
- box-shadow: 8px 8px 0 #1F2937;
114
- position: relative;
115
- }
116
-
117
- .header-title {
118
- font-family: 'Bangers', cursive !important;
119
- color: #FFF !important;
120
- font-size: 2.8rem !important;
121
- text-shadow: 3px 3px 0 #1F2937 !important;
122
- letter-spacing: 3px !important;
123
- margin: 0 !important;
124
- }
125
-
126
- .header-subtitle {
127
- font-family: 'Comic Neue', cursive !important;
128
- font-size: 1.1rem !important;
129
- color: #FEF9C3 !important;
130
- margin-top: 8px !important;
131
- font-weight: 700 !important;
132
- }
133
-
134
- .stats-badge {
135
- display: inline-block;
136
- background: #FACC15;
137
- color: #1F2937;
138
- padding: 6px 14px;
139
- border-radius: 20px;
140
- font-size: 0.9rem;
141
- margin: 3px;
142
- font-weight: 700;
143
- border: 2px solid #1F2937;
144
- box-shadow: 2px 2px 0 #1F2937;
145
- }
146
-
147
- /* 무료 서비스 안내 박스 */
148
- .free-service-notice {
149
- text-align: center;
150
- padding: 10px 15px;
151
- background: linear-gradient(135deg, #FEE2E2 0%, #FECACA 100%);
152
- border: 3px solid #1F2937;
153
- border-radius: 8px;
154
- margin: 10px 0;
155
- box-shadow: 4px 4px 0 #1F2937;
156
- font-family: 'Comic Neue', cursive;
157
- font-weight: 700;
158
- color: #991B1B;
159
- }
160
-
161
- .free-service-notice a {
162
- color: #1D4ED8;
163
- text-decoration: none;
164
- font-weight: 700;
165
- }
166
-
167
- .free-service-notice a:hover {
168
- text-decoration: underline;
169
- }
170
-
171
- .gr-panel, .gr-box, .gr-form, .block, .gr-group {
172
- background: #FFF !important;
173
- border: 3px solid #1F2937 !important;
174
- border-radius: 8px !important;
175
- box-shadow: 5px 5px 0 #1F2937 !important;
176
- }
177
-
178
- .gr-button-primary, button.primary, .gr-button.primary {
179
- background: linear-gradient(135deg, #EF4444 0%, #F97316 100%) !important;
180
- border: 3px solid #1F2937 !important;
181
- border-radius: 8px !important;
182
- color: #FFF !important;
183
- font-family: 'Bangers', cursive !important;
184
- font-size: 1.3rem !important;
185
- letter-spacing: 2px !important;
186
- padding: 12px 24px !important;
187
- box-shadow: 4px 4px 0 #1F2937 !important;
188
- text-shadow: 1px 1px 0 #1F2937 !important;
189
- transition: all 0.2s ease !important;
190
- }
191
-
192
- .gr-button-primary:hover, button.primary:hover {
193
- background: linear-gradient(135deg, #DC2626 0%, #EA580C 100%) !important;
194
- transform: translate(-2px, -2px) !important;
195
- box-shadow: 6px 6px 0 #1F2937 !important;
196
- }
197
-
198
- .gr-button-primary:active, button.primary:active {
199
- transform: translate(2px, 2px) !important;
200
- box-shadow: 2px 2px 0 #1F2937 !important;
201
- }
202
-
203
- textarea, input[type="text"], input[type="number"] {
204
- background: #FFF !important;
205
- border: 3px solid #1F2937 !important;
206
- border-radius: 8px !important;
207
- color: #1F2937 !important;
208
- font-family: 'Comic Neue', cursive !important;
209
- font-weight: 700 !important;
210
- }
211
-
212
- textarea:focus, input[type="text"]:focus {
213
- border-color: #3B82F6 !important;
214
- box-shadow: 3px 3px 0 #3B82F6 !important;
215
- }
216
-
217
- .info-box {
218
- background: linear-gradient(135deg, #FACC15 0%, #FDE047 100%) !important;
219
- border: 3px solid #1F2937 !important;
220
- border-radius: 8px !important;
221
- padding: 12px 15px !important;
222
- margin: 10px 0 !important;
223
- box-shadow: 4px 4px 0 #1F2937 !important;
224
- font-family: 'Comic Neue', cursive !important;
225
- font-weight: 700 !important;
226
- color: #1F2937 !important;
227
- }
228
-
229
- .feature-box {
230
- background: linear-gradient(135deg, #E0F2FE 0%, #BAE6FD 100%) !important;
231
- border: 3px solid #1F2937 !important;
232
- border-radius: 12px !important;
233
- padding: 20px !important;
234
- margin: 15px 0 !important;
235
- box-shadow: 5px 5px 0 #1F2937 !important;
236
- }
237
-
238
- .feature-title {
239
- font-family: 'Bangers', cursive !important;
240
- font-size: 1.5rem !important;
241
- color: #1F2937 !important;
242
- margin-bottom: 10px !important;
243
- text-shadow: 1px 1px 0 #FFF !important;
244
- }
245
-
246
- .feature-item {
247
- display: flex;
248
- align-items: center;
249
- gap: 10px;
250
- padding: 8px 0;
251
- font-family: 'Comic Neue', cursive !important;
252
- font-weight: 700 !important;
253
- font-size: 1rem !important;
254
- color: #1F2937 !important;
255
- }
256
-
257
- .feature-icon {
258
- font-size: 1.5rem;
259
- }
260
-
261
- /* Markdown 강조 박스 */
262
- .markdown-highlight-box {
263
- background: linear-gradient(135deg, #EC4899 0%, #F472B6 100%) !important;
264
- border: 4px solid #1F2937 !important;
265
- border-radius: 12px !important;
266
- padding: 20px !important;
267
- margin: 15px 0 !important;
268
- box-shadow: 6px 6px 0 #1F2937 !important;
269
- animation: pulse-glow 2s ease-in-out infinite;
270
- }
271
-
272
- @keyframes pulse-glow {
273
- 0%, 100% { box-shadow: 6px 6px 0 #1F2937; }
274
- 50% { box-shadow: 8px 8px 0 #1F2937, 0 0 20px rgba(236, 72, 153, 0.5); }
275
- }
276
-
277
- .markdown-title {
278
- font-family: 'Bangers', cursive !important;
279
- font-size: 2rem !important;
280
- color: #FFF !important;
281
- text-shadow: 3px 3px 0 #1F2937 !important;
282
- letter-spacing: 2px !important;
283
- margin-bottom: 15px !important;
284
- text-align: center !important;
285
- }
286
-
287
- .markdown-benefits {
288
- display: grid;
289
- grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
290
- gap: 12px;
291
- margin-top: 10px;
292
- }
293
-
294
- .markdown-benefit-item {
295
- background: rgba(255,255,255,0.95) !important;
296
- border: 3px solid #1F2937 !important;
297
- border-radius: 8px !important;
298
- padding: 12px !important;
299
- box-shadow: 3px 3px 0 #1F2937 !important;
300
- font-family: 'Comic Neue', cursive !important;
301
- font-weight: 700 !important;
302
- font-size: 0.95rem !important;
303
- color: #1F2937 !important;
304
- text-align: center !important;
305
- }
306
-
307
- .markdown-benefit-icon {
308
- font-size: 1.8rem !important;
309
- display: block !important;
310
- margin-bottom: 5px !important;
311
- }
312
-
313
- label, .gr-input-label, .gr-block-label {
314
- color: #1F2937 !important;
315
- font-family: 'Comic Neue', cursive !important;
316
- font-weight: 700 !important;
317
- }
318
-
319
- .gr-accordion {
320
- background: #E0F2FE !important;
321
- border: 3px solid #1F2937 !important;
322
- border-radius: 8px !important;
323
- box-shadow: 4px 4px 0 #1F2937 !important;
324
- }
325
-
326
- .footer-comic {
327
- text-align: center;
328
- padding: 20px;
329
- background: linear-gradient(135deg, #3B82F6 0%, #8B5CF6 100%);
330
- border: 4px solid #1F2937;
331
- border-radius: 12px;
332
- margin-top: 20px;
333
- box-shadow: 6px 6px 0 #1F2937;
334
- }
335
-
336
- .footer-comic p {
337
- font-family: 'Comic Neue', cursive !important;
338
- color: #FFF !important;
339
- margin: 5px 0 !important;
340
- font-weight: 700 !important;
341
- }
342
-
343
- ::-webkit-scrollbar {
344
- width: 12px;
345
- height: 12px;
346
- }
347
-
348
- ::-webkit-scrollbar-track {
349
- background: #FEF9C3;
350
- border: 2px solid #1F2937;
351
- }
352
-
353
- ::-webkit-scrollbar-thumb {
354
- background: #3B82F6;
355
- border: 2px solid #1F2937;
356
- border-radius: 6px;
357
- }
358
-
359
- ::-webkit-scrollbar-thumb:hover {
360
- background: #EF4444;
361
- }
362
-
363
- ::selection {
364
- background: #FACC15;
365
- color: #1F2937;
366
- }
367
-
368
- /* Chatbot Styling */
369
- .gr-chatbot {
370
- border: 3px solid #1F2937 !important;
371
- border-radius: 12px !important;
372
- box-shadow: 5px 5px 0 #1F2937 !important;
373
- }
374
-
375
- /* Tab Styling */
376
- .gr-tab-nav {
377
- background: linear-gradient(135deg, #F59E0B 0%, #FACC15 100%) !important;
378
- border: 3px solid #1F2937 !important;
379
- border-radius: 8px 8px 0 0 !important;
380
- }
381
-
382
- .gr-tab-nav button {
383
- font-family: 'Bangers', cursive !important;
384
- font-size: 1.2rem !important;
385
- letter-spacing: 1px !important;
386
- color: #1F2937 !important;
387
- }
388
-
389
- .gr-tab-nav button.selected {
390
- background: #FFF !important;
391
- border-bottom: 3px solid #FFF !important;
392
- }
393
-
394
- /* File Upload Box */
395
- .upload-box {
396
- border: 3px dashed #3B82F6 !important;
397
- border-radius: 12px !important;
398
- background: linear-gradient(135deg, #EFF6FF 0%, #DBEAFE 100%) !important;
399
- box-shadow: 4px 4px 0 #1F2937 !important;
400
- }
401
-
402
- .download-box {
403
- border: 3px solid #10B981 !important;
404
- border-radius: 12px !important;
405
- background: linear-gradient(135deg, #ECFDF5 0%, #D1FAE5 100%) !important;
406
- box-shadow: 4px 4px 0 #1F2937 !important;
407
- }
408
- """
409
-
410
- # ============== 환경 설정 ==============
411
- SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
412
- PYHWP_PATH = os.path.join(SCRIPT_DIR, 'pyhwp')
413
- DB_PATH = os.path.join(SCRIPT_DIR, 'chat_history.db')
414
-
415
- if os.path.exists(PYHWP_PATH):
416
- sys.path.insert(0, PYHWP_PATH)
417
-
418
- # ============== 모듈 임포트 ==============
419
- try:
420
- import olefile
421
- OLEFILE_AVAILABLE = True
422
- print("✅ olefile loaded")
423
- except ImportError:
424
- OLEFILE_AVAILABLE = False
425
-
426
- try:
427
- from markdownify import markdownify as md
428
- MARKDOWNIFY_AVAILABLE = True
429
- print("✅ markdownify loaded")
430
- except ImportError:
431
- MARKDOWNIFY_AVAILABLE = False
432
-
433
- try:
434
- import html2text
435
- HTML2TEXT_AVAILABLE = True
436
- print("✅ html2text loaded")
437
- except ImportError:
438
- HTML2TEXT_AVAILABLE = False
439
-
440
- try:
441
- from bs4 import BeautifulSoup
442
- BS4_AVAILABLE = True
443
- except ImportError:
444
- BS4_AVAILABLE = False
445
-
446
- try:
447
- import PyPDF2
448
- PYPDF2_AVAILABLE = True
449
- print("✅ PyPDF2 loaded")
450
- except ImportError:
451
- PYPDF2_AVAILABLE = False
452
-
453
- try:
454
- import pdfplumber
455
- PDFPLUMBER_AVAILABLE = True
456
- print("✅ pdfplumber loaded")
457
- except ImportError:
458
- PDFPLUMBER_AVAILABLE = False
459
-
460
- # ============== API 키 설정 ==============
461
- GROQ_API_KEY = os.environ.get("GROQ_API_KEY", "")
462
- FIREWORKS_API_KEY = os.environ.get("FIREWORKS_API_KEY", "")
463
-
464
- # ============== SQLite 데이터베이스 ==============
465
- def init_database():
466
- conn = sqlite3.connect(DB_PATH)
467
- cursor = conn.cursor()
468
- cursor.execute('''
469
- CREATE TABLE IF NOT EXISTS sessions (
470
- session_id TEXT PRIMARY KEY,
471
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
472
- updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
473
- title TEXT
474
- )
475
- ''')
476
- cursor.execute('''
477
- CREATE TABLE IF NOT EXISTS messages (
478
- id INTEGER PRIMARY KEY AUTOINCREMENT,
479
- session_id TEXT,
480
- role TEXT,
481
- content TEXT,
482
- file_info TEXT,
483
- created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
484
- FOREIGN KEY (session_id) REFERENCES sessions(session_id)
485
- )
486
- ''')
487
- conn.commit()
488
- conn.close()
489
-
490
- def create_session() -> str:
491
- session_id = str(uuid.uuid4())
492
- conn = sqlite3.connect(DB_PATH)
493
- cursor = conn.cursor()
494
- cursor.execute("INSERT INTO sessions (session_id, title) VALUES (?, ?)",
495
- (session_id, f"대화 {datetime.now().strftime('%Y-%m-%d %H:%M')}"))
496
- conn.commit()
497
- conn.close()
498
- return session_id
499
-
500
- def save_message(session_id: str, role: str, content: str, file_info: str = None):
501
- conn = sqlite3.connect(DB_PATH)
502
- cursor = conn.cursor()
503
- cursor.execute("INSERT INTO messages (session_id, role, content, file_info) VALUES (?, ?, ?, ?)",
504
- (session_id, role, content, file_info))
505
- cursor.execute("UPDATE sessions SET updated_at = CURRENT_TIMESTAMP WHERE session_id = ?", (session_id,))
506
- conn.commit()
507
- conn.close()
508
-
509
- def get_session_messages(session_id: str, limit: int = 20) -> List[Dict]:
510
- conn = sqlite3.connect(DB_PATH)
511
- cursor = conn.cursor()
512
- cursor.execute("SELECT role, content, file_info, created_at FROM messages WHERE session_id = ? ORDER BY created_at DESC LIMIT ?",
513
- (session_id, limit))
514
- rows = cursor.fetchall()
515
- conn.close()
516
- return [{"role": r[0], "content": r[1], "file_info": r[2], "created_at": r[3]} for r in reversed(rows)]
517
-
518
- def get_all_sessions() -> List[Dict]:
519
- conn = sqlite3.connect(DB_PATH)
520
- cursor = conn.cursor()
521
- cursor.execute("SELECT session_id, title, created_at, updated_at FROM sessions ORDER BY updated_at DESC LIMIT 50")
522
- rows = cursor.fetchall()
523
- conn.close()
524
- return [{"session_id": r[0], "title": r[1], "created_at": r[2], "updated_at": r[3]} for r in rows]
525
-
526
- def update_session_title(session_id: str, title: str):
527
- conn = sqlite3.connect(DB_PATH)
528
- cursor = conn.cursor()
529
- cursor.execute("UPDATE sessions SET title = ? WHERE session_id = ?", (title, session_id))
530
- conn.commit()
531
- conn.close()
532
-
533
- init_database()
534
-
535
- # ============== 파일 유틸리티 ==============
536
- def extract_text_from_pdf(file_path: str) -> str:
537
- text_parts = []
538
- if PDFPLUMBER_AVAILABLE:
539
- try:
540
- with pdfplumber.open(file_path) as pdf:
541
- for page in pdf.pages:
542
- text = page.extract_text()
543
- if text:
544
- text_parts.append(text)
545
- if text_parts:
546
- return "\n\n".join(text_parts)
547
- except Exception as e:
548
- print(f"pdfplumber error: {e}")
549
-
550
- if PYPDF2_AVAILABLE:
551
- try:
552
- with open(file_path, 'rb') as f:
553
- reader = PyPDF2.PdfReader(f)
554
- for page in reader.pages:
555
- text = page.extract_text()
556
- if text:
557
- text_parts.append(text)
558
- if text_parts:
559
- return "\n\n".join(text_parts)
560
- except Exception as e:
561
- print(f"PyPDF2 error: {e}")
562
- return None
563
-
564
- def extract_text_from_txt(file_path: str) -> str:
565
- for encoding in ['utf-8', 'euc-kr', 'cp949', 'utf-16', 'latin-1']:
566
- try:
567
- with open(file_path, 'r', encoding=encoding) as f:
568
- return f.read()
569
- except:
570
- continue
571
- return None
572
-
573
- def image_to_base64(file_path: str) -> str:
574
- with open(file_path, 'rb') as f:
575
- return base64.b64encode(f.read()).decode('utf-8')
576
-
577
- def get_image_mime_type(file_path: str) -> str:
578
- ext = Path(file_path).suffix.lower()
579
- return {'.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.png': 'image/png',
580
- '.gif': 'image/gif', '.webp': 'image/webp', '.bmp': 'image/bmp'}.get(ext, 'image/jpeg')
581
-
582
- def is_image_file(fp: str) -> bool:
583
- return Path(fp).suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp']
584
-
585
- def is_hwp_file(fp: str) -> bool:
586
- return Path(fp).suffix.lower() == '.hwp'
587
-
588
- def is_hwpx_file(fp: str) -> bool:
589
- return Path(fp).suffix.lower() == '.hwpx'
590
-
591
- def is_pdf_file(fp: str) -> bool:
592
- return Path(fp).suffix.lower() == '.pdf'
593
-
594
- def is_text_file(fp: str) -> bool:
595
- return Path(fp).suffix.lower() in ['.txt', '.md', '.json', '.csv', '.xml', '.html', '.css', '.js', '.py']
596
-
597
- # ============== HWPX 텍스트 추출 ==============
598
- def extract_text_from_hwpx(file_path: str) -> tuple:
599
- try:
600
- text_parts = []
601
- with zipfile.ZipFile(file_path, 'r') as zf:
602
- file_list = zf.namelist()
603
- section_files = sorted([f for f in file_list if f.startswith('Contents/section') and f.endswith('.xml')])
604
- if not section_files:
605
- section_files = sorted([f for f in file_list if 'section' in f.lower() and f.endswith('.xml')])
606
-
607
- for section_file in section_files:
608
- try:
609
- with zf.open(section_file) as sf:
610
- content = sf.read()
611
- content_str = content.decode('utf-8')
612
- content_str = re.sub(r'\sxmlns[^"]*"[^"]*"', '', content_str)
613
- content_str = re.sub(r'<[a-zA-Z]+:', '<', content_str)
614
- content_str = re.sub(r'</[a-zA-Z]+:', '</', content_str)
615
-
616
- try:
617
- root = ET.fromstring(content_str)
618
- texts = []
619
- for elem in root.iter():
620
- if elem.tag.endswith('t') or elem.tag == 't':
621
- if elem.text:
622
- texts.append(elem.text)
623
- elif elem.text and elem.text.strip():
624
- if any(x in elem.tag.lower() for x in ['text', 'run', 'para', 'char']):
625
- texts.append(elem.text.strip())
626
- if texts:
627
- text_parts.append(' '.join(texts))
628
- except ET.ParseError:
629
- text_matches = re.findall(r'>([^<]+)<', content.decode('utf-8', errors='ignore'))
630
- clean_texts = [t.strip() for t in text_matches if t.strip() and len(t.strip()) > 1]
631
- if clean_texts:
632
- text_parts.append(' '.join(clean_texts))
633
- except:
634
- continue
635
-
636
- if text_parts:
637
- result = '\n\n'.join(text_parts)
638
- result = re.sub(r'\s+', ' ', result)
639
- result = re.sub(r'\n{3,}', '\n\n', result)
640
- return result.strip(), None
641
- return None, "HWPX에서 텍스트를 찾을 수 없습니다"
642
- except zipfile.BadZipFile:
643
- return None, "유효하지 않은 HWPX 파일"
644
- except Exception as e:
645
- return None, f"HWPX 처리 오류: {str(e)}"
646
-
647
- # ============== HWP 텍스트 추출 ==============
648
- def extract_text_with_hwp5txt(file_path: str) -> tuple:
649
- try:
650
- result = subprocess.run(['hwp5txt', file_path], capture_output=True, timeout=60)
651
- if result.returncode == 0 and result.stdout:
652
- for enc in ['utf-8', 'cp949', 'euc-kr']:
653
- try:
654
- text = result.stdout.decode(enc)
655
- if text.strip() and len(text.strip()) > 10:
656
- return text.strip(), None
657
- except:
658
- continue
659
- except FileNotFoundError:
660
- pass
661
- except Exception as e:
662
- print(f"hwp5txt error: {e}")
663
-
664
- try:
665
- code = f'''
666
- import sys
667
- sys.path.insert(0, "{PYHWP_PATH}")
668
- from hwp5.filestructure import Hwp5File
669
- from hwp5.hwp5txt import extract_text
670
- hwp = Hwp5File("{file_path}")
671
- for idx in hwp.bodytext.sections():
672
- section = hwp.bodytext.section(idx)
673
- for para in extract_text(section):
674
- if para.strip():
675
- print(para.strip())
676
- hwp.close()
677
- '''
678
- result = subprocess.run([sys.executable, '-c', code], capture_output=True, timeout=60)
679
- if result.returncode == 0 and result.stdout:
680
- for enc in ['utf-8', 'cp949', 'euc-kr']:
681
- try:
682
- text = result.stdout.decode(enc)
683
- if text.strip() and len(text.strip()) > 10:
684
- return text.strip(), None
685
- except:
686
- continue
687
- except Exception as e:
688
- print(f"hwp5txt subprocess error: {e}")
689
-
690
- return None, "hwp5txt 실패"
691
-
692
- def extract_text_with_olefile(file_path: str) -> tuple:
693
- if not OLEFILE_AVAILABLE:
694
- return None, "olefile 모듈 없음"
695
-
696
- try:
697
- ole = olefile.OleFileIO(file_path)
698
- if not ole.exists('FileHeader'):
699
- ole.close()
700
- return None, "HWP 파일 헤더 없음"
701
-
702
- header_data = ole.openstream('FileHeader').read()
703
- is_compressed = (header_data[36] & 1) == 1 if len(header_data) > 36 else True
704
-
705
- all_texts = []
706
- for entry in ole.listdir():
707
- entry_path = '/'.join(entry)
708
- if entry_path.startswith('BodyText/Section'):
709
- try:
710
- stream_data = ole.openstream(entry).read()
711
- if is_compressed:
712
- try:
713
- stream_data = zlib.decompress(stream_data, -15)
714
- except:
715
- try:
716
- stream_data = zlib.decompress(stream_data)
717
- except:
718
- pass
719
-
720
- section_text = extract_hwp_section_text(stream_data)
721
- if section_text:
722
- all_texts.append(section_text)
723
- except:
724
- continue
725
-
726
- ole.close()
727
- if all_texts:
728
- return '\n\n'.join(all_texts).strip(), None
729
- return None, "텍스트를 찾을 수 없습니다"
730
- except Exception as e:
731
- return None, f"olefile 오류: {str(e)}"
732
-
733
- def extract_hwp_section_text(data: bytes) -> str:
734
- texts = []
735
- pos = 0
736
- while pos < len(data) - 4:
737
- try:
738
- header = int.from_bytes(data[pos:pos+4], 'little')
739
- tag_id = header & 0x3FF
740
- size = (header >> 20) & 0xFFF
741
- pos += 4
742
- if size == 0xFFF:
743
- if pos + 4 > len(data):
744
- break
745
- size = int.from_bytes(data[pos:pos+4], 'little')
746
- pos += 4
747
- if pos + size > len(data):
748
- break
749
- record_data = data[pos:pos+size]
750
- pos += size
751
- if tag_id == 67 and size > 0:
752
- text = decode_para_text(record_data)
753
- if text:
754
- texts.append(text)
755
- except:
756
- pos += 1
757
- continue
758
- return '\n'.join(texts) if texts else None
759
-
760
- def decode_para_text(data: bytes) -> str:
761
- result = []
762
- i = 0
763
- while i < len(data) - 1:
764
- code = int.from_bytes(data[i:i+2], 'little')
765
- if code == 0:
766
- pass
767
- elif code == 1:
768
- i += 14
769
- elif code == 2:
770
- i += 14
771
- elif code == 3:
772
- i += 14
773
- elif code == 4:
774
- pass
775
- elif code == 9:
776
- result.append('\t')
777
- elif code == 10:
778
- result.append('\n')
779
- elif code == 13:
780
- result.append('\n')
781
- elif code == 24:
782
- result.append('-')
783
- elif code == 30 or code == 31:
784
- result.append(' ')
785
- elif code < 32:
786
- pass
787
- else:
788
- try:
789
- char = chr(code)
790
- if char.isprintable() or char in '\n\t ':
791
- result.append(char)
792
- except:
793
- pass
794
- i += 2
795
- text = ''.join(result).strip()
796
- text = re.sub(r'[ \t]+', ' ', text)
797
- text = re.sub(r'\n{3,}', '\n\n', text)
798
- return text if len(text) > 2 else None
799
-
800
- def extract_text_from_hwp(file_path: str) -> tuple:
801
- print(f"\n📖 [HWP 읽기] {os.path.basename(file_path)}")
802
- text, error = extract_text_with_hwp5txt(file_path)
803
- if text and len(text.strip()) > 20:
804
- print(f" ✅ 성공: {len(text)} 글자")
805
- return text, None
806
- text, error = extract_text_with_olefile(file_path)
807
- if text and len(text.strip()) > 20:
808
- print(f" ✅ 성공: {len(text)} 글자")
809
- return text, None
810
- print(f" ❌ 실패: {error}")
811
- return None, "모든 추출 방법 실패"
812
-
813
- def extract_text_from_hwp_or_hwpx(file_path: str) -> tuple:
814
- if is_hwpx_file(file_path):
815
- print(f"\n📖 [HWPX 읽기] {os.path.basename(file_path)}")
816
- return extract_text_from_hwpx(file_path)
817
- else:
818
- return extract_text_from_hwp(file_path)
819
-
820
- # ============== HWP 변환 함수들 ==============
821
- def check_hwp_version(file_path):
822
- try:
823
- with open(file_path, 'rb') as f:
824
- header = f.read(32)
825
- if b'HWP Document File' in header:
826
- return "HWP v5", True
827
- elif header[:4] == b'\xd0\xcf\x11\xe0':
828
- return "HWP v5 (OLE)", True
829
- elif header[:4] == b'PK\x03\x04':
830
- return "HWPX", True
831
- else:
832
- return "Unknown", False
833
- except Exception as e:
834
- return f"Error: {e}", False
835
-
836
- def convert_to_html_subprocess(input_path, output_dir):
837
- output_path = os.path.join(output_dir, "output.html")
838
- try:
839
- for cmd in [['hwp5html', '--output', output_path, input_path]]:
840
- try:
841
- result = subprocess.run(cmd, capture_output=True, timeout=120)
842
- if result.returncode == 0:
843
- if os.path.exists(output_path):
844
- return output_path, None
845
- for item in os.listdir(output_dir):
846
- item_path = os.path.join(output_dir, item)
847
- if item.lower().endswith(('.html', '.htm')):
848
- return item_path, None
849
- if os.path.isdir(item_path):
850
- return item_path, None
851
- except:
852
- continue
853
- except Exception as e:
854
- print(f"HTML 변환 오류: {e}")
855
- return None, "HTML 변환 실패"
856
-
857
- def html_to_markdown(html_content):
858
- if MARKDOWNIFY_AVAILABLE:
859
- try:
860
- return md(html_content, heading_style="ATX", bullets="-"), None
861
- except:
862
- pass
863
- if HTML2TEXT_AVAILABLE:
864
- try:
865
- h = html2text.HTML2Text()
866
- h.body_width = 0
867
- return h.handle(html_content), None
868
- except:
869
- pass
870
- if BS4_AVAILABLE:
871
- try:
872
- soup = BeautifulSoup(html_content, 'html.parser')
873
- return soup.get_text(separator='\n'), None
874
- except:
875
- pass
876
- return None, "Markdown 변환 실패"
877
-
878
- def convert_hwp_to_markdown(input_path: str) -> tuple:
879
- text, error = extract_text_from_hwp_or_hwpx(input_path)
880
- if text:
881
- return text, None
882
- return None, error
883
-
884
- # ============== LLM API (Groq 라이브러리 사용) ==============
885
- def call_groq_api_stream(messages: List[Dict]) -> Generator[str, None, None]:
886
- """Groq API 스트리밍 호출 - openai/gpt-oss-120b 모델 사용"""
887
- if not GROQ_AVAILABLE:
888
- yield "❌ Groq 라이브러리가 설치되지 않았습니다. pip install groq"
889
- return
890
-
891
- if not GROQ_API_KEY:
892
- yield "❌ GROQ_API_KEY 환경변수가 설정되지 않았습니다."
893
- return
894
-
895
  try:
896
- client = Groq(api_key=GROQ_API_KEY)
897
-
898
- completion = client.chat.completions.create(
899
- model="openai/gpt-oss-120b",
900
- messages=messages,
901
- temperature=1,
902
- max_completion_tokens=8192,
903
- top_p=1,
904
- reasoning_effort="medium",
905
- stream=True,
906
- stop=None
907
- )
908
-
909
- for chunk in completion:
910
- if chunk.choices[0].delta.content:
911
- yield chunk.choices[0].delta.content
912
-
913
- except Exception as e:
914
- error_msg = str(e)
915
- print(f"❌ Groq API 오류: {error_msg}")
916
- yield f"❌ API 오류: {error_msg}"
917
-
918
- def call_fireworks_api_stream(messages: List[Dict], image_base64: str, mime_type: str) -> Generator[str, None, None]:
919
- """Fireworks API 스트리밍 호출 (이미지 분석용)"""
920
- if not FIREWORKS_API_KEY:
921
- yield "❌ FIREWORKS_API_KEY 환경변수가 설정되지 않았습니다."
922
- return
923
-
924
- try:
925
- formatted_messages = [{"role": m["role"], "content": m["content"]} for m in messages[:-1]]
926
- formatted_messages.append({
927
- "role": messages[-1]["role"],
928
- "content": [
929
- {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image_base64}"}},
930
- {"type": "text", "text": messages[-1]["content"]}
931
- ]
932
- })
933
-
934
- response = requests.post(
935
- "https://api.fireworks.ai/inference/v1/chat/completions",
936
- headers={"Authorization": f"Bearer {FIREWORKS_API_KEY}", "Content-Type": "application/json"},
937
- json={
938
- "model": "accounts/fireworks/models/qwen3-vl-235b-a22b-thinking",
939
- "max_tokens": 4096,
940
- "temperature": 0.6,
941
- "messages": formatted_messages,
942
- "stream": True
943
- },
944
- stream=True
945
- )
946
-
947
- if response.status_code != 200:
948
- yield f"❌ Fireworks API 오류: {response.status_code}"
949
- return
950
-
951
- for line in response.iter_lines():
952
- if line:
953
- line = line.decode('utf-8')
954
- if line.startswith('data: ') and line[6:] != '[DONE]':
955
- try:
956
- data = json.loads(line[6:])
957
- content = data.get('choices', [{}])[0].get('delta', {}).get('content', '')
958
- if content:
959
- yield content
960
- except:
961
- continue
962
- except Exception as e:
963
- yield f"❌ API 오류: {str(e)}"
964
-
965
- # ============== 채팅 처리 ==============
966
- def process_file(file_path: str) -> tuple:
967
- if not file_path:
968
- return None, None, None
969
- filename = os.path.basename(file_path)
970
-
971
- if is_image_file(file_path):
972
- return "image", image_to_base64(file_path), get_image_mime_type(file_path)
973
-
974
- if is_hwp_file(file_path) or is_hwpx_file(file_path):
975
- text, error = extract_text_from_hwp_or_hwpx(file_path)
976
- if text and len(text.strip()) > 20:
977
- print(f"📄 [문서 내용 추출 완료] {len(text)} 글자")
978
- print(f"📄 [문서 미리보기] {text[:500]}...")
979
- return "text", text, None
980
- return "error", f"한글 문서 추출 실패: {error}", None
981
-
982
- if is_pdf_file(file_path):
983
- text = extract_text_from_pdf(file_path)
984
- if text:
985
- print(f"📄 [PDF 내용 추출 완료] {len(text)} 글자")
986
- return "text", text, None
987
- return "error", "PDF 추출 실패", None
988
-
989
- if is_text_file(file_path):
990
- text = extract_text_from_txt(file_path)
991
- if text:
992
- return "text", text, None
993
- return "error", "텍스트 읽기 실패", None
994
-
995
- return "unsupported", f"지원하지 않는 형식: {filename}", None
996
-
997
- def chat_response(message: str, history: List[Dict], file: Optional[str],
998
- session_id: str) -> Generator[tuple, None, None]:
999
- if history is None:
1000
- history = []
1001
- if not message.strip() and not file:
1002
- yield history, session_id
1003
- return
1004
- if not session_id:
1005
- session_id = create_session()
1006
-
1007
- file_type, file_content, file_mime = None, None, None
1008
- file_info = None
1009
- filename = None
1010
-
1011
- if file:
1012
- filename = os.path.basename(file)
1013
- file_type, file_content, file_mime = process_file(file)
1014
- file_info = json.dumps({"type": file_type, "filename": filename})
1015
 
1016
- if file_type == "error":
1017
- history = history + [
1018
- {"role": "user", "content": message or "파일 업로드"},
1019
- {"role": "assistant", "content": f"❌ {file_content}"}
1020
- ]
1021
- yield history, session_id
 
1022
  return
1023
- elif file_type == "unsupported":
1024
- history = history + [
1025
- {"role": "user", "content": message or "파일 업로드"},
1026
- {"role": "assistant", "content": f"⚠️ {file_content}"}
1027
- ]
1028
- yield history, session_id
1029
- return
1030
-
1031
- # 사용자 메시지 표시
1032
- user_msg = message
1033
- if file:
1034
- user_msg = f"📎 {filename}\n\n{message}" if message else f"📎 {filename}"
1035
-
1036
- history = history + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": ""}]
1037
- yield history, session_id
1038
-
1039
- # 이전 대화 불러오기
1040
- db_messages = get_session_messages(session_id, limit=10)
1041
-
1042
- # 시스템 프롬프트 - 문서 분석 강화
1043
- system_prompt = """당신은 문서 분석 전문 AI 어시스턴트입니다.
1044
-
1045
- ## 핵심 역할
1046
- - 사용자가 업로드한 문서의 내용을 **정확하게 분석**하고 **구체적으로 답변**합니다.
1047
- - 문서에 있는 **실제 내용**을 기반으로만 답변합니다.
1048
- - 문서에 없는 내용은 추측하지 않습니다.
1049
-
1050
- ## 문서 분석 방법
1051
- 1. **문서가 제공되면**: 문서 전체 내용을 꼼꼼히 읽고 핵심 정보를 파악합니다.
1052
- 2. **요약 요청 시**: 문서의 주제, 목적, 핵심 ��용, 주요 항목을 구조화하여 요약합니다.
1053
- 3. **질문 응답 시**: 문서에서 관련 내용을 찾아 **직접 인용하거나 구체적으로 설명**합니다.
1054
-
1055
- ## 답변 형식
1056
- - 한국어로 자연스럽고 명확하게 답변합니다.
1057
- - 문서 내용을 인용할 때는 구체적으로 언급합니다.
1058
- - 긴 문서는 섹션별로 나누어 정리합니다.
1059
-
1060
- ## 주의사항
1061
- - 문서에 **실제로 있는 내용만** 답변에 포함합니다.
1062
- - 불확실한 내용은 "문서에서 확인되지 않습니다"라고 명시합니다."""
1063
-
1064
- api_messages = [{"role": "system", "content": system_prompt}]
1065
-
1066
- # 이전 대화 추가
1067
- for m in db_messages:
1068
- api_messages.append({"role": m["role"], "content": m["content"]})
1069
-
1070
- # 현재 메시지 구성 - 문서 내용을 명확하게 구분
1071
- if file_type == "text" and file_content:
1072
- if message:
1073
- current_content = f"""## 📄 업로드된 문서 내용 ({filename})
1074
-
1075
- 다음은 사용자가 업로드한 문서의 전체 내용입니다:
1076
-
1077
- ---
1078
- {file_content}
1079
- ---
1080
-
1081
- ## 💬 사용자 질문
1082
- {message}
1083
-
1084
- 위 문서 내용을 바탕으로 사용자의 질문에 **구체적이고 정확하게** 답변해주세요."""
1085
- else:
1086
- current_content = f"""## 📄 업로드된 문서 내용 ({filename})
1087
-
1088
- 다음은 사용자가 업로드한 문서의 전체 내용입니다:
1089
-
1090
- ---
1091
- {file_content}
1092
- ---
1093
-
1094
- ## 📋 요청사항
1095
- 위 문서의 내용을 다음 형식으로 **상세하게 요약**해주세요:
1096
-
1097
- 1. **문서 제목/주제**: 문서가 다루는 주요 주제
1098
- 2. **문서 목적**: 이 문서의 작성 목적
1099
- 3. **핵심 내용**: 가장 중요한 내용 3-5가지
1100
- 4. **세부 항목**: 문서에 포함된 주요 섹션이나 항목
1101
- 5. **결론/요약**: 문서의 핵심 메시지"""
1102
- else:
1103
- current_content = message or ""
1104
-
1105
- api_messages.append({"role": "user", "content": current_content})
1106
-
1107
- # 디버그 로그
1108
- print(f"\n🤖 [API 요청]")
1109
- print(f" - 모델: openai/gpt-oss-120b")
1110
- print(f" - 메시지 수: {len(api_messages)}")
1111
- print(f" - 파일 타입: {file_type}")
1112
- print(f" - 문서 길이: {len(file_content) if file_content else 0} 글자")
1113
- if file_content:
1114
- print(f" - 문서 미리보기: {file_content[:200]}...")
1115
-
1116
- # 응답 생성
1117
- full_response = ""
1118
- if file_type == "image":
1119
- for chunk in call_fireworks_api_stream(api_messages, file_content, file_mime):
1120
- full_response += chunk
1121
- history[-1] = {"role": "assistant", "content": full_response}
1122
- yield history, session_id
1123
- else:
1124
- for chunk in call_groq_api_stream(api_messages):
1125
- full_response += chunk
1126
- history[-1] = {"role": "assistant", "content": full_response}
1127
- yield history, session_id
1128
-
1129
- # DB 저장
1130
- save_message(session_id, "user", current_content, file_info)
1131
- save_message(session_id, "assistant", full_response)
1132
-
1133
- if len(db_messages) == 0 and message:
1134
- update_session_title(session_id, message[:50])
1135
-
1136
- def new_chat():
1137
- return [], create_session(), None
1138
-
1139
- def load_session(session_id: str) -> tuple:
1140
- if not session_id:
1141
- return [], ""
1142
- messages = get_session_messages(session_id, limit=50)
1143
- return [{"role": m["role"], "content": m["content"]} for m in messages], session_id
1144
-
1145
- # ============== HWP 변환기 ==============
1146
- def convert_to_odt_subprocess(input_path, output_dir):
1147
- output_path = os.path.join(output_dir, "output.odt")
1148
- try:
1149
- result = subprocess.run(['hwp5odt', '--output', output_path, input_path], capture_output=True, timeout=120)
1150
- if result.returncode == 0 and os.path.exists(output_path):
1151
- return output_path, None
1152
- except:
1153
- pass
1154
- return None, "ODT 변환 실패"
1155
-
1156
- def convert_to_xml_subprocess(input_path, output_dir):
1157
- output_path = os.path.join(output_dir, "output.xml")
1158
- try:
1159
- result = subprocess.run(['hwp5xml', input_path], capture_output=True, timeout=120)
1160
- if result.returncode == 0 and result.stdout:
1161
- with open(output_path, 'wb') as f:
1162
- f.write(result.stdout)
1163
- return output_path, None
1164
- except:
1165
- pass
1166
- return None, "XML 변환 실패"
1167
-
1168
- def convert_hwp(file, output_format, progress=gr.Progress()):
1169
- if not file:
1170
- return None, "❌ 파일을 업로드해주세요.", ""
1171
-
1172
- input_file = file.name if hasattr(file, 'name') else str(file)
1173
- ext_lower = Path(input_file).suffix.lower()
1174
-
1175
- if ext_lower not in ['.hwp', '.hwpx']:
1176
- return None, "❌ HWP 또는 HWPX 파일만 지원됩니다.", ""
1177
-
1178
- progress(0.1, desc="📖 파일 읽는 중...")
1179
- version, is_valid = check_hwp_version(input_file)
1180
- if not is_valid:
1181
- return None, f"❌ 지원하지 않는 파일: {version}", ""
1182
-
1183
- tmp_dir = tempfile.mkdtemp()
1184
-
1185
- try:
1186
- input_filename = os.path.basename(input_file)
1187
- input_path = os.path.join(tmp_dir, input_filename)
1188
- shutil.copy(input_file, input_path)
1189
-
1190
- progress(0.3, desc=f"🔄 {output_format}로 변환 중...")
1191
 
1192
- output_path, error, ext = None, None, ""
1193
-
1194
- if output_format == "HTML":
1195
- if ext_lower == '.hwpx':
1196
- return None, "❌ HWPX는 HTML 변환을 지원하지 않습니다.", ""
1197
- output_path, error = convert_to_html_subprocess(input_path, tmp_dir)
1198
- ext = ".html"
1199
- if output_path and os.path.isdir(output_path):
1200
- zip_path = shutil.make_archive(os.path.join(tmp_dir, "html"), 'zip', output_path)
1201
- output_path, ext = zip_path, ".zip"
1202
-
1203
- elif output_format == "ODT (OpenDocument)":
1204
- if ext_lower == '.hwpx':
1205
- return None, "❌ HWPX는 ODT 변환을 지원하지 않습니다.", ""
1206
- output_path, error = convert_to_odt_subprocess(input_path, tmp_dir)
1207
- ext = ".odt"
1208
-
1209
- elif output_format == "TXT (텍스트)":
1210
- text, error = extract_text_from_hwp_or_hwpx(input_path)
1211
- if text:
1212
- output_path = os.path.join(tmp_dir, "output.txt")
1213
- with open(output_path, 'w', encoding='utf-8') as f:
1214
- f.write(text)
1215
- ext = ".txt"
1216
-
1217
- elif output_format == "⭐ MARKDOWN (추천)":
1218
- text, error = convert_hwp_to_markdown(input_path)
1219
- if text:
1220
- output_path = os.path.join(tmp_dir, "output.md")
1221
- with open(output_path, 'w', encoding='utf-8') as f:
1222
- f.write(text)
1223
- ext = ".md"
1224
-
1225
- elif output_format == "XML":
1226
- if ext_lower == '.hwpx':
1227
- try:
1228
- with zipfile.ZipFile(input_path, 'r') as zf:
1229
- xml_contents = []
1230
- for name in zf.namelist():
1231
- if name.endswith('.xml'):
1232
- with zf.open(name) as f:
1233
- xml_contents.append(f"<!-- {name} -->\n{f.read().decode('utf-8', errors='ignore')}")
1234
- output_path = os.path.join(tmp_dir, "output.xml")
1235
- with open(output_path, 'w', encoding='utf-8') as f:
1236
- f.write('\n\n'.join(xml_contents))
1237
- except Exception as e:
1238
- error = f"HWPX XML 추출 실패: {e}"
1239
- else:
1240
- output_path, error = convert_to_xml_subprocess(input_path, tmp_dir)
1241
- ext = ".xml"
1242
-
1243
- if not output_path:
1244
- return None, f"❌ {error or '변환 실패'}", ""
1245
-
1246
- if not os.path.exists(output_path):
1247
- return None, "❌ 변환된 파일을 찾을 수 없습니다.", ""
1248
-
1249
- progress(0.8, desc="✅ 완료 중...")
1250
-
1251
- base_name = Path(input_filename).stem
1252
- final_output = os.path.join(tmp_dir, f"{base_name}{ext}")
1253
- if output_path != final_output:
1254
- shutil.copy2(output_path, final_output)
1255
-
1256
- file_size = os.path.getsize(final_output)
1257
- size_str = f"{file_size/1024:.1f} KB" if file_size > 1024 else f"{file_size} bytes"
1258
-
1259
- preview = ""
1260
- if ext in ['.txt', '.md', '.xml']:
1261
- try:
1262
- with open(final_output, 'r', encoding='utf-8', errors='ignore') as f:
1263
- preview = f.read(5000)
1264
- if len(preview) >= 5000:
1265
- preview += "\n\n... (생략)"
1266
- except:
1267
- pass
1268
- elif ext == '.zip':
1269
- preview = "📦 HTML이 ZIP으로 압축되었습니다."
1270
-
1271
- progress(1.0, desc="🎉 완료!")
1272
- return final_output, f"✅ 변환 완료: {base_name}{ext} ({size_str})", preview
1273
 
1274
  except Exception as e:
 
1275
  import traceback
1276
- traceback.print_exc()
1277
- return None, f"❌ 오류: {str(e)}", ""
1278
-
1279
- # ============== Gradio UI ==============
1280
- with gr.Blocks(title="HWP AI 어시스턴트", css=COMIC_CSS, delete_cache=(60, 60)) as demo:
1281
-
1282
- # HOME Button
1283
- gr.HTML("""
1284
- <div class="home-button-container">
1285
- <a href="https://www.humangen.ai" target="_blank" class="home-button">
1286
- 🏠 HOME
1287
- </a>
1288
- <span class="url-display">🌐 www.humangen.ai</span>
1289
- </div>
1290
- """)
1291
-
1292
- # Header
1293
- gr.HTML("""
1294
- <div class="header-container">
1295
- <div class="header-title">📄 HWP AI 어시스턴트 🤖</div>
1296
- <div class="header-subtitle">AI가 HWP 파일을 읽고, 보고, 말하며, 생각하고 기억합니다!</div>
1297
- <div style="margin-top:12px">
1298
- <span class="stats-badge">📖 읽기 READ</span>
1299
- <span class="stats-badge">👁️ 보기 SEE</span>
1300
- <span class="stats-badge">💬 말하기 SPEAK</span>
1301
- <span class="stats-badge">🧠 생각 THINK</span>
1302
- <span class="stats-badge">💾 기억 MEMORY</span>
1303
- </div>
1304
- </div>
1305
- """)
1306
-
1307
- # 무료 서비스 안내
1308
- gr.HTML("""
1309
- <div class="free-service-notice">
1310
- 🆓 본 서비스는 <b>무료 버전</b>으로 일부 기능에 제약이 있습니다.<br>
1311
- 📧 문의: <a href="mailto:arxivgpt@gmail.com">arxivgpt@gmail.com</a>
1312
- </div>
1313
- """)
1314
-
1315
- session_state = gr.State("")
1316
-
1317
- with gr.Tabs():
1318
- # Tab 1: AI 채팅
1319
- with gr.Tab("💬 AI 채팅"):
1320
- # Feature Box
1321
-
1322
- with gr.Row():
1323
- with gr.Column(scale=1):
1324
- gr.HTML("""
1325
- <div class="info-box">
1326
- 📁 <b>지원 파일 형식</b><br><br>
1327
- 🖼️ <b>이미지</b>: JPG, PNG, GIF, WebP<br>
1328
- 📑 <b>문서</b>: PDF, TXT, MD<br>
1329
- 📄 <b>한글</b>: HWP, HWPX ✨
1330
- </div>
1331
- """)
1332
-
1333
- new_btn = gr.Button("🆕 새 대화 시작", variant="primary")
1334
-
1335
- with gr.Accordion("📜 대화 기록 (Memory)", open=False):
1336
- session_list = gr.Dataframe(headers=["ID", "제목", "시간"], interactive=False)
1337
- refresh_btn = gr.Button("🔄 새로고침", size="sm")
1338
-
1339
- with gr.Column(scale=3):
1340
- chatbot = gr.Chatbot(label="💬 AI 대화", height=500)
1341
-
1342
- with gr.Row():
1343
- file_upload = gr.File(
1344
- label="📎 파일 첨부 (HWP/HWPX/PDF/이미지)",
1345
- file_types=[".jpg", ".jpeg", ".png", ".gif", ".webp", ".pdf", ".txt", ".md", ".hwp", ".hwpx"],
1346
- scale=1,
1347
- elem_classes=["upload-box"]
1348
- )
1349
- msg_input = gr.Textbox(
1350
- placeholder="💭 메시지를 입력하세요... (파일을 업로드하면 AI가 내용을 읽고 분석합니다)",
1351
- lines=2,
1352
- show_label=False,
1353
- scale=4
1354
- )
1355
-
1356
- with gr.Row():
1357
- submit_btn = gr.Button("🚀 전송", variant="primary", scale=3)
1358
- clear_btn = gr.Button("🗑️ 지우기", scale=1)
1359
 
1360
- # Tab 2: HWP 변환기
1361
- with gr.Tab("📄 HWP 변환기"):
1362
- gr.HTML("""
1363
- <div class="feature-box">
1364
- <div class="feature-title">🔄 HWP/HWPX 파일 변환기</div>
1365
- <p style="font-family: 'Comic Neue', cursive; font-weight: 700; color: #1F2937;">
1366
- 한글 문서를 다양한 형식으로 변환합니다. AI가 문서를 읽고 텍스트를 추출합니다.
1367
- </p>
1368
- </div>
1369
- """)
1370
-
1371
- # Markdown 강조 박스
1372
- gr.HTML("""
1373
- <div class="markdown-highlight-box">
1374
- <div class="markdown-title">⭐ MARKDOWN 변환 추천! ⭐</div>
1375
- <div class="markdown-benefits">
1376
- <div class="markdown-benefit-item">
1377
- <span class="markdown-benefit-icon">🤖</span>
1378
- <b>AI/LLM 최적화</b><br>
1379
- ChatGPT, Claude 등 AI에 바로 입력 가능
1380
- </div>
1381
- <div class="markdown-benefit-item">
1382
- <span class="markdown-benefit-icon">📝</span>
1383
- <b>범용 포맷</b><br>
1384
- GitHub, Notion, 블로그 등 어디서나 사용
1385
- </div>
1386
- <div class="markdown-benefit-item">
1387
- <span class="markdown-benefit-icon">🔍</span>
1388
- <b>구조 유지</b><br>
1389
- 제목, 목록, 표 등 문서 구조 보존
1390
- </div>
1391
- <div class="markdown-benefit-item">
1392
- <span class="markdown-benefit-icon">⚡</span>
1393
- <b>가볍고 빠름</b><br>
1394
- 용량이 작고 처리 속도 빠름
1395
- </div>
1396
- <div class="markdown-benefit-item">
1397
- <span class="markdown-benefit-icon">🔄</span>
1398
- <b>변환 용이</b><br>
1399
- HTML, PDF, Word 등으로 재변환 가능
1400
- </div>
1401
- <div class="markdown-benefit-item">
1402
- <span class="markdown-benefit-icon">✏️</span>
1403
- <b>편집 간편</b><br>
1404
- 메모장으로도 바로 수정 가능
1405
- </div>
1406
- </div>
1407
- </div>
1408
- """)
1409
-
1410
- with gr.Row():
1411
- with gr.Column():
1412
- gr.HTML('<div class="info-box">📤 <b>파일 업로드</b></div>')
1413
- hwp_input = gr.File(
1414
- label="HWP/HWPX 파일 선택",
1415
- file_types=[".hwp", ".hwpx"],
1416
- elem_classes=["upload-box"]
1417
- )
1418
- format_select = gr.Radio(
1419
- ["⭐ MARKDOWN (추천)", "TXT (텍스트)", "HTML", "ODT (OpenDocument)", "XML"],
1420
- value="⭐ MARKDOWN (추천)",
1421
- label="📋 변환 형식"
1422
- )
1423
- convert_btn = gr.Button("🔄 변환하기", variant="primary", size="lg")
1424
-
1425
- with gr.Column():
1426
- gr.HTML('<div class="info-box">📥 <b>변환 결과</b></div>')
1427
- status_out = gr.Textbox(label="상태", interactive=False)
1428
- file_out = gr.File(label="다운로드", elem_classes=["download-box"])
1429
-
1430
- with gr.Accordion("📋 미리보기", open=False):
1431
- preview_out = gr.Textbox(lines=15, interactive=False)
1432
-
1433
- gr.HTML("""
1434
- <div class="info-box">
1435
- ℹ️ <b>안내</b>: 변환 서비스는 개인용도로 사용시 어떠한 제약도 없습니다.
1436
- </div>
1437
- """)
1438
-
1439
- # Footer
1440
- gr.HTML("""
1441
- <div class="footer-comic">
1442
- <p style="font-family:'Bangers',cursive;font-size:1.8rem;letter-spacing:2px">📄 HWP AI 어시스턴트 🤖</p>
1443
- <p>AI가 HWP 파일을 읽고, 보고, 말하며, 생각하고 기억합니다!</p>
1444
- <p>📖 READ • 👁️ SEE • 💬 SPEAK • 🧠 THINK • 💾 MEMORY</p>
1445
- <p style="margin-top:8px;font-size:0.9rem;">🆓 무료 서비스 (일부 기능 제한) | 📧 arxivgpt@gmail.com</p>
1446
- <p style="margin-top:10px"><a href="https://www.humangen.ai" target="_blank" style="color:#FACC15;text-decoration:none;font-weight:bold;">🏠 www.humangen.ai</a></p>
1447
- </div>
1448
- """)
1449
-
1450
- # ============== 이벤트 핸들러 ==============
1451
- def on_submit(msg, hist, f, sid):
1452
- if hist is None:
1453
- hist = []
1454
- for r in chat_response(msg, hist, f, sid):
1455
- yield r[0], r[1], "", None
1456
-
1457
- submit_btn.click(on_submit, [msg_input, chatbot, file_upload, session_state],
1458
- [chatbot, session_state, msg_input, file_upload])
1459
- msg_input.submit(on_submit, [msg_input, chatbot, file_upload, session_state],
1460
- [chatbot, session_state, msg_input, file_upload])
1461
-
1462
- new_btn.click(lambda: ([], create_session(), None, ""), outputs=[chatbot, session_state, file_upload, msg_input])
1463
- clear_btn.click(lambda: ([], None, ""), outputs=[chatbot, file_upload, msg_input])
1464
-
1465
- def refresh():
1466
- sessions = get_all_sessions()
1467
- return [[s["session_id"][:8], s["title"] or "제목없음", s["updated_at"][:16] if s["updated_at"] else ""] for s in sessions]
1468
-
1469
- refresh_btn.click(refresh, outputs=[session_list])
1470
-
1471
- def select_session(evt: gr.SelectData, data):
1472
- if evt.index[0] < len(data):
1473
- for s in get_all_sessions():
1474
- if s["session_id"].startswith(data[evt.index[0]][0]):
1475
- return load_session(s["session_id"])
1476
- return [], ""
1477
-
1478
- session_list.select(select_session, [session_list], [chatbot, session_state])
1479
- convert_btn.click(convert_hwp, [hwp_input, format_select], [file_out, status_out, preview_out])
1480
- demo.load(refresh, outputs=[session_list])
1481
 
1482
  if __name__ == "__main__":
1483
- demo.launch(ssr_mode=False)
 
 
 
 
 
 
 
 
 
1
  import os
 
 
2
  import sys
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
+ def main():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  try:
6
+ # Get the code from secrets
7
+ code = os.environ.get("MAIN_CODE")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ if not code:
10
+ # Fallback: create a simple error display
11
+ import gradio as gr
12
+ with gr.Blocks() as demo:
13
+ gr.Markdown("# ⚠️ Error")
14
+ gr.Markdown("The application code wasn't found in secrets. Please add the MAIN_CODE secret.")
15
+ demo.launch()
16
  return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
+ # Execute the code directly
19
+ exec(compile(code, '<string>', 'exec'), globals())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
  except Exception as e:
22
+ import gradio as gr
23
  import traceback
24
+ error_msg = traceback.format_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
+ with gr.Blocks() as demo:
27
+ gr.Markdown("# ⚠️ Error Loading Application")
28
+ gr.Markdown(f"**Error:** {str(e)}")
29
+ gr.Code(error_msg, language="python", label="Traceback")
30
+ demo.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
 
32
  if __name__ == "__main__":
33
+ main()