Spaces:

Heartsync
/

EXAM-GEN

Running

App Files Files Community

seawolf2357 commited on Jan 15

Commit

46e1b25

verified ·

1 Parent(s): 021ef20

Update app.py

Browse files

Files changed (1) hide show

app.py +422 -1052

app.py CHANGED Viewed

@@ -9,16 +9,16 @@ import os
 import subprocess
 import shutil
 import sys
-import zipfile
 import re
 import json
 import uuid
 import sqlite3
 import base64
 import requests
 from pathlib import Path
 from datetime import datetime
-from typing import Generator, List, Dict, Any, Optional
 # ============== 환경 설정 ==============
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -30,20 +30,13 @@ if os.path.exists(PYHWP_PATH):
     print(f"Added local pyhwp path: {PYHWP_PATH}")
 # ============== 모듈 임포트 ==============
-try:
-    from hwp5.filestructure import Hwp5File
-    PYHWP_AVAILABLE = True
-    print("pyhwp modules loaded successfully")
-except ImportError as e:
-    PYHWP_AVAILABLE = False
-    print(f"Warning: Could not import pyhwp modules: {e}")
 try:
     import olefile
     OLEFILE_AVAILABLE = True
     print("olefile loaded successfully")
 except ImportError:
     OLEFILE_AVAILABLE = False
 try:
     from markdownify import markdownify as md
@@ -51,7 +44,6 @@ try:
     print("markdownify loaded successfully")
 except ImportError:
     MARKDOWNIFY_AVAILABLE = False
-    print("markdownify not available")
 try:
     import html2text
@@ -59,7 +51,6 @@ try:
     print("html2text loaded successfully")
 except ImportError:
     HTML2TEXT_AVAILABLE = False
-    print("html2text not available")
 try:
     from bs4 import BeautifulSoup
@@ -89,7 +80,6 @@ FIREWORKS_API_KEY = os.environ.get("FIREWORKS_API_KEY", "")
 def init_database():
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
     cursor.execute('''
         CREATE TABLE IF NOT EXISTS sessions (
             session_id TEXT PRIMARY KEY,
@@ -98,7 +88,6 @@ def init_database():
             title TEXT
         )
     ''')
     cursor.execute('''
         CREATE TABLE IF NOT EXISTS messages (
             id INTEGER PRIMARY KEY AUTOINCREMENT,
@@ -110,7 +99,6 @@ def init_database():
             FOREIGN KEY (session_id) REFERENCES sessions(session_id)
         )
     ''')
     conn.commit()
     conn.close()
@@ -145,58 +133,36 @@ def get_session_messages(session_id: str, limit: int = 20) -> List[Dict]:
     cursor = conn.cursor()
     cursor.execute(
         """SELECT role, content, file_info, created_at
-           FROM messages
-           WHERE session_id = ?
-           ORDER BY created_at DESC
-           LIMIT ?""",
         (session_id, limit)
     )
     rows = cursor.fetchall()
     conn.close()
-    messages = []
-    for row in reversed(rows):
-        messages.append({
-            "role": row[0],
-            "content": row[1],
-            "file_info": row[2],
-            "created_at": row[3]
-        })
-    return messages
 def get_all_sessions() -> List[Dict]:
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
     cursor.execute(
-        """SELECT session_id, title, created_at, updated_at
-           FROM sessions
-           ORDER BY updated_at DESC
-           LIMIT 50"""
     )
     rows = cursor.fetchall()
     conn.close()
-    return [
-        {"session_id": row[0], "title": row[1], "created_at": row[2], "updated_at": row[3]}
-        for row in rows
-    ]
 def update_session_title(session_id: str, title: str):
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
-    cursor.execute(
-        "UPDATE sessions SET title = ? WHERE session_id = ?",
-        (title, session_id)
-    )
     conn.commit()
     conn.close()
 init_database()
-# ============== 파일 처리 함수들 ==============
 def extract_text_from_pdf(file_path: str) -> str:
     text_parts = []
     if PDFPLUMBER_AVAILABLE:
         try:
             with pdfplumber.open(file_path) as pdf:
@@ -221,19 +187,15 @@ def extract_text_from_pdf(file_path: str) -> str:
                 return "\n\n".join(text_parts)
         except Exception as e:
             print(f"PyPDF2 error: {e}")
     return None
 def extract_text_from_txt(file_path: str) -> str:
-    encodings = ['utf-8', 'euc-kr', 'cp949', 'utf-16', 'latin-1']
-    for encoding in encodings:
         try:
             with open(file_path, 'r', encoding=encoding) as f:
                 return f.read()
-        except (UnicodeDecodeError, UnicodeError):
             continue
     return None
 def image_to_base64(file_path: str) -> str:
@@ -242,332 +204,234 @@ def image_to_base64(file_path: str) -> str:
 def get_image_mime_type(file_path: str) -> str:
     ext = Path(file_path).suffix.lower()
-    mime_types = {
-        '.jpg': 'image/jpeg',
-        '.jpeg': 'image/jpeg',
-        '.png': 'image/png',
-        '.gif': 'image/gif',
-        '.webp': 'image/webp',
-        '.bmp': 'image/bmp'
-    }
-    return mime_types.get(ext, 'image/jpeg')
-def is_image_file(file_path: str) -> bool:
-    ext = Path(file_path).suffix.lower()
-    return ext in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp']
-def is_hwp_file(file_path: str) -> bool:
-    ext = Path(file_path).suffix.lower()
-    return ext in ['.hwp', '.hwpx']
-def is_pdf_file(file_path: str) -> bool:
-    return Path(file_path).suffix.lower() == '.pdf'
-def is_text_file(file_path: str) -> bool:
-    ext = Path(file_path).suffix.lower()
-    return ext in ['.txt', '.md', '.json', '.csv', '.xml', '.html', '.css', '.js', '.py']
-# ============== HWP 텍스트 추출 함수들 (개선됨) ==============
-def extract_text_with_pyhwp(file_path: str) -> tuple:
-    """pyhwp 라이브러리를 사용한 텍스트 추출 (가장 정확)"""
-    if not PYHWP_AVAILABLE:
-        return None, "pyhwp 모듈이 없습니다."
     try:
-        # 방법 1: hwp5txt 모듈 직접 사용
         try:
-            from hwp5.hwp5txt import TextExtractor
-            from hwp5.dataio import ParseError
-            hwp5file = Hwp5File(file_path)
-            text_extractor = TextExtractor()
-            text_parts = []
-            for section_idx in range(len(hwp5file.bodytext)):
-                section = hwp5file.bodytext[section_idx]
-                text = text_extractor.extract(section)
-                if text:
-                    text_parts.append(text)
-            hwp5file.close()
-            if text_parts:
-                result = '\n\n'.join(text_parts)
-                print(f"[pyhwp TextExtractor] 추출 성공: {len(result)} chars")
-                return result, None
-        except Exception as e:
-            print(f"[pyhwp TextExtractor] 실패: {e}")
-        # 방법 2: hwp5proc txt 직접 호출
-        try:
-            from hwp5 import plat
-            from hwp5.hwp5txt import extract_text
-            hwp5file = Hwp5File(file_path)
-            text_parts = []
-            for section in hwp5file.bodytext:
-                paragraphs = extract_text(section)
-                for para in paragraphs:
-                    if para.strip():
-                        text_parts.append(para.strip())
-            hwp5file.close()
-            if text_parts:
-                result = '\n'.join(text_parts)
-                print(f"[pyhwp extract_text] 추출 성공: {len(result)} chars")
-                return result, None
-        except Exception as e:
-            print(f"[pyhwp extract_text] 실패: {e}")
-        # 방법 3: XML 변환 후 텍스트 추출
         try:
-            from hwp5.xmlmodel import Hwp5File as XmlHwp5File
-            import io
-            from lxml import etree
-            hwp5file = XmlHwp5File(file_path)
-            # XML로 변환
-            xml_buffer = io.BytesIO()
-            from hwp5.xmldump import xmldump_flat
-            xmldump_flat(hwp5file, xml_buffer)
-            xml_buffer.seek(0)
-            # XML에서 텍스트 추출
-            tree = etree.parse(xml_buffer)
-            # 모든 텍스트 노드 추출
-            text_parts = []
-            for elem in tree.iter():
-                if elem.text and elem.text.strip():
-                    text_parts.append(elem.text.strip())
-                if elem.tail and elem.tail.strip():
-                    text_parts.append(elem.tail.strip())
-            hwp5file.close()
-            if text_parts:
-                result = '\n'.join(text_parts)
-                print(f"[pyhwp XML] 추출 성공: {len(result)} chars")
-                return result, None
         except Exception as e:
-            print(f"[pyhwp XML] 실패: {e}")
-        return None, "pyhwp로 텍스트 추출에 실패했습니다."
-    except Exception as e:
-        return None, f"pyhwp 오류: {str(e)}"
-def extract_text_with_hwp5txt_command(file_path: str) -> tuple:
-    """hwp5txt 명령어로 텍스트 추출"""
-    try:
-        # python -m hwp5 txt 실행
-        result = subprocess.run(
-            [sys.executable, '-m', 'hwp5', 'txt', file_path],
-            capture_output=True,
-            timeout=60
-        )
-        if result.returncode == 0:
-            # stdout을 여러 인코딩으로 디코딩 시도
-            for encoding in ['utf-8', 'euc-kr', 'cp949']:
-                try:
-                    text = result.stdout.decode(encoding)
-                    if text.strip():
-                        print(f"[hwp5txt command] 추출 성공: {len(text)} chars")
-                        return text.strip(), None
-                except:
-                    continue
-        # stderr 확인
-        if result.stderr:
-            print(f"[hwp5txt command] stderr: {result.stderr.decode('utf-8', errors='ignore')[:200]}")
-    except subprocess.TimeoutExpired:
-        print("[hwp5txt command] 타임아웃")
-    except Exception as e:
-        print(f"[hwp5txt command] 오류: {e}")
-    return None, "hwp5txt 명령 실패"
-def extract_text_with_olefile_improved(file_path: str) -> tuple:
-    """olefile을 사용한 개선된 텍스트 추출"""
     if not OLEFILE_AVAILABLE:
-        return None, "olefile 모듈이 없습니다."
     try:
         ole = olefile.OleFileIO(file_path)
-        # HWP 파일 구조 확인
-        print(f"[olefile] OLE 스트림 목록: {ole.listdir()}")
-        text_parts = []
-        # BodyText 섹션에서 텍스트 추출
-        for entry in ole.listdir():
-            entry_path = '/'.join(entry)
-            # BodyText/Section 스트림 찾기
-            if 'BodyText' in entry_path or 'Section' in entry_path:
-                try:
-                    stream = ole.openstream(entry)
-                    data = stream.read()
-                    print(f"[olefile] 스트림 {entry_path}: {len(data)} bytes")
-                    # HWP5 레코드 파싱 시도
-                    extracted = parse_hwp_bodytext(data)
-                    if extracted:
-                        text_parts.append(extracted)
-                        continue
-                    # 단순 UTF-16 디코딩 (fallback)
-                    for encoding in ['utf-16-le', 'utf-16-be', 'utf-8', 'euc-kr', 'cp949']:
-                        try:
-                            text = data.decode(encoding, errors='ignore')
-                            # 제어 문자 제거 및 정리
-                            cleaned = clean_extracted_text(text)
-                            if cleaned and len(cleaned) > 10:
-                                text_parts.append(cleaned)
-                                break
-                        except:
-                            continue
-                except Exception as e:
-                    print(f"[olefile] 스트림 읽기 오류 {entry_path}: {e}")
-                    continue
         ole.close()
-        if text_parts:
-            result = '\n\n'.join(text_parts)
-            print(f"[olefile] 최종 추출: {len(result)} chars")
-            return result, None
-        else:
-            return None, "텍스트를 추출할 수 없습니다."
     except Exception as e:
-        return None, f"OLE 파일 처리 오류: {str(e)}"
-def parse_hwp_bodytext(data: bytes) -> str:
-    """HWP5 BodyText 레코드에서 텍스트 추출"""
     try:
-        # HWP5 레코드 구조: 태그(4바이트) + 데이터
-        # 텍스트는 HWPTAG_PARA_TEXT (0x4A) 레코드에 저장됨
-        text_parts = []
-        offset = 0
-        while offset < len(data) - 4:
-            # 레코드 헤더 읽기 (4바이트)
-            header = int.from_bytes(data[offset:offset+4], 'little')
-            tag_id = header & 0x3FF  # 하위 10비트
-            level = (header >> 10) & 0x3FF  # 다음 10비트
-            size = (header >> 20) & 0xFFF  # 상위 12비트
-            # 확장 크기 처리
-            if size == 0xFFF:
-                if offset + 8 > len(data):
-                    break
-                size = int.from_bytes(data[offset+4:offset+8], 'little')
-                offset += 4
-            offset += 4
-            if offset + size > len(data):
-                break
-            # HWPTAG_PARA_TEXT (0x4A = 74)
-            if tag_id == 67:  # PARA_TEXT
-                record_data = data[offset:offset+size]
-                # UTF-16LE로 디코딩
                 try:
-                    text = record_data.decode('utf-16-le', errors='ignore')
-                    # 제어 문자 필터링
-                    cleaned = ''.join(c for c in text if c.isprintable() or c in '\n\r\t ')
-                    if cleaned.strip():
-                        text_parts.append(cleaned.strip())
                 except:
-                    pass
-            offset += size
-        if text_parts:
-            return '\n'.join(text_parts)
-        return None
     except Exception as e:
-        print(f"[parse_hwp_bodytext] 오류: {e}")
-        return None
-def clean_extracted_text(text: str) -> str:
-    """추출된 텍스트 정리"""
-    if not text:
-        return ""
-    # NULL 문자 제거
-    text = text.replace('\x00', '')
-    # 제어 문자 제거 (탭, 줄바꿈 제외)
-    cleaned = ''.join(
-        c for c in text
-        if c.isprintable() or c in '\n\r\t '
-    )
-    # 연속된 공백 정리
-    cleaned = re.sub(r'[ \t]+', ' ', cleaned)
-    cleaned = re.sub(r'\n{3,}', '\n\n', cleaned)
-    # 앞뒤 공백 제거
-    cleaned = cleaned.strip()
-    # 너무 짧으면 무시
-    if len(cleaned) < 10:
-        return ""
-    return cleaned
 def extract_text_from_hwp(file_path: str) -> tuple:
-    """HWP 파일에서 텍스트 추출 (다중 방법 시도)"""
-    errors = []
     # 방법 1: hwp5txt 명령어 (가장 안정적)
-    print("[HWP 추출] 방법 1: hwp5txt 명령어 시도...")
-    text, error = extract_text_with_hwp5txt_command(file_path)
-    if text and len(text.strip()) > 20:
         return text, None
-    if error:
-        errors.append(f"hwp5txt: {error}")
-    # 방법 2: pyhwp API
-    print("[HWP 추출] 방법 2: pyhwp API 시도...")
-    text, error = extract_text_with_pyhwp(file_path)
-    if text and len(text.strip()) > 20:
         return text, None
-    if error:
-        errors.append(f"pyhwp: {error}")
-    # 방법 3: olefile (fallback)
-    print("[HWP 추출] 방법 3: olefile 시도...")
-    text, error = extract_text_with_olefile_improved(file_path)
-    if text and len(text.strip()) > 20:
-        return text, None
-    if error:
-        errors.append(f"olefile: {error}")
-    # 모든 방법 실패
-    return None, f"텍스트 추출 실패: {'; '.join(errors)}"
 def check_hwp_version(file_path):
     try:
@@ -582,25 +446,9 @@ def check_hwp_version(file_path):
     except Exception as e:
         return f"Error: {e}", False
-def get_hwp5_command_paths(command_name):
-    paths = [
-        command_name,
-        os.path.join(os.path.dirname(sys.executable), command_name),
-        os.path.join(SCRIPT_DIR, 'bin', command_name),
-        os.path.join(PYHWP_PATH, 'bin', command_name),
-    ]
-    if sys.platform == 'win32':
-        paths.append(os.path.join(os.path.dirname(sys.executable), 'Scripts', command_name))
-        paths.append(os.path.join(os.path.dirname(sys.executable), 'Scripts', f'{command_name}.exe'))
-    return paths
 def convert_to_html_subprocess(input_path, output_dir):
-    output_name = "output.html"
-    output_path = os.path.join(output_dir, output_name)
     try:
         result = subprocess.run(
@@ -611,288 +459,135 @@ def convert_to_html_subprocess(input_path, output_dir):
         )
         if result.returncode == 0:
             if os.path.isfile(output_path):
                 return output_path, None
             if os.path.isdir(output_path):
                 return output_path, None
             for item in os.listdir(output_dir):
                 item_path = os.path.join(output_dir, item)
-                if item.lower().endswith(('.html', '.htm', '.xhtml')) and os.path.isfile(item_path):
                     return item_path, None
                 if os.path.isdir(item_path):
-                    for sub_item in os.listdir(item_path):
-                        if sub_item.lower().endswith(('.html', '.htm', '.xhtml')):
                             return item_path, None
             return output_dir, None
     except Exception as e:
-        print(f"python -m hwp5 html 오류: {e}")
-    return None, "hwp5html 변환 실패"
-def convert_to_txt_subprocess(input_path, output_dir):
-    output_path = os.path.join(output_dir, "output.txt")
-    try:
-        result = subprocess.run(
-            [sys.executable, '-m', 'hwp5', 'txt', input_path],
-            capture_output=True,
-            timeout=120
-        )
-        if result.returncode == 0 and result.stdout:
-            # 인코딩 감지
-            for encoding in ['utf-8', 'euc-kr', 'cp949']:
-                try:
-                    text = result.stdout.decode(encoding)
-                    if text.strip():
-                        with open(output_path, 'w', encoding='utf-8') as f:
-                            f.write(text)
-                        return output_path, None
-                except:
-                    continue
-    except Exception as e:
-        print(f"python -m hwp5 txt 오류: {e}")
-    return None, "hwp5txt 변환 실패"
-# ============== HTML/Markdown 변환 ==============
-def html_to_markdown_with_markdownify(html_content):
-    try:
-        markdown_content = md(
-            html_content,
-            heading_style="ATX",
-            bullets="-",
-            strip=['script', 'style', 'meta', 'link'],
-            code_language="",
-            escape_asterisks=False,
-            escape_underscores=False,
-        )
-        return markdown_content, None
-    except Exception as e:
-        return None, f"markdownify 변환 오류: {str(e)}"
-def html_to_markdown_with_html2text(html_content):
-    try:
-        h = html2text.HTML2Text()
-        h.ignore_links = False
-        h.ignore_images = False
-        h.ignore_tables = False
-        h.body_width = 0
-        h.unicode_snob = True
-        h.skip_internal_links = True
-        h.inline_links = True
-        h.protect_links = True
-        h.ignore_emphasis = False
-        markdown_content = h.handle(html_content)
-        return markdown_content, None
-    except Exception as e:
-        return None, f"html2text 변환 오류: {str(e)}"
-def html_to_markdown_simple(html_content):
-    try:
-        if BS4_AVAILABLE:
-            soup = BeautifulSoup(html_content, 'html.parser')
-            for tag in soup(['script', 'style', 'meta', 'link']):
-                tag.decompose()
-            text = str(soup)
-        else:
-            text = html_content
-        conversions = [
-            (r'<h1[^>]*>(.*?)</h1>', r'# \1\n'),
-            (r'<h2[^>]*>(.*?)</h2>', r'## \1\n'),
-            (r'<h3[^>]*>(.*?)</h3>', r'### \1\n'),
-            (r'<h4[^>]*>(.*?)</h4>', r'#### \1\n'),
-            (r'<h5[^>]*>(.*?)</h5>', r'##### \1\n'),
-            (r'<h6[^>]*>(.*?)</h6>', r'###### \1\n'),
-            (r'<strong[^>]*>(.*?)</strong>', r'**\1**'),
-            (r'<b[^>]*>(.*?)</b>', r'**\1**'),
-            (r'<em[^>]*>(.*?)</em>', r'*\1*'),
-            (r'<i[^>]*>(.*?)</i>', r'*\1*'),
-            (r'<code[^>]*>(.*?)</code>', r'`\1`'),
-            (r'<a[^>]*href=["\']([^"\']*)["\'][^>]*>(.*?)</a>', r'[\2](\1)'),
-            (r'<li[^>]*>(.*?)</li>', r'- \1\n'),
-            (r'<ul[^>]*>', ''),
-            (r'</ul>', '\n'),
-            (r'<ol[^>]*>', ''),
-            (r'</ol>', '\n'),
-            (r'<p[^>]*>(.*?)</p>', r'\1\n\n'),
-            (r'<br\s*/?>', '\n'),
-            (r'<hr\s*/?>', '\n---\n'),
-            (r'<blockquote[^>]*>(.*?)</blockquote>', r'> \1\n'),
-            (r'<pre[^>]*><code[^>]*>(.*?)</code></pre>', r'```\n\1\n```\n'),
-            (r'<pre[^>]*>(.*?)</pre>', r'```\n\1\n```\n'),
-            (r'<div[^>]*>', ''),
-            (r'</div>', '\n'),
-            (r'<span[^>]*>', ''),
-            (r'</span>', ''),
-            (r'<[^>]+>', ''),
-            (r'&nbsp;', ' '),
-            (r'&lt;', '<'),
-            (r'&gt;', '>'),
-            (r'&amp;', '&'),
-            (r'&quot;', '"'),
-            (r'&#39;', "'"),
-        ]
-        for pattern, replacement in conversions:
-            text = re.sub(pattern, replacement, text, flags=re.DOTALL | re.IGNORECASE)
-        text = re.sub(r'\n{3,}', '\n\n', text)
-        text = text.strip()
-        return text, None
-    except Exception as e:
-        return None, f"기본 변환 오류: {str(e)}"
-def convert_html_to_markdown(html_content):
     if MARKDOWNIFY_AVAILABLE:
-        result, error = html_to_markdown_with_markdownify(html_content)
-        if result:
-            return result, None
     if HTML2TEXT_AVAILABLE:
-        result, error = html_to_markdown_with_html2text(html_content)
-        if result:
-            return result, None
-    result, error = html_to_markdown_simple(html_content)
-    if result:
-        return result, None
-    return None, "HTML → Markdown 변환에 실패했습니다."
 def convert_hwp_to_markdown(input_path: str) -> tuple:
-    """HWP 파일을 텍스트/마크다운으로 변환 (개선된 버전)"""
-    print(f"[HWP→MD] 변환 시작: {input_path}")
-    # 1단계: 직접 텍스트 추출 시도 (가장 빠르고 안정적)
     text, error = extract_text_from_hwp(input_path)
-    if text and len(text.strip()) > 20:
-        print(f"[HWP→MD] 텍스트 추출 성공: {len(text)} chars")
         return text, None
-    print(f"[HWP→MD] 텍스트 추출 실패, HTML 변환 시도...")
-    # 2단계: HTML 변환 후 마크다운 변환
     tmp_dir = tempfile.mkdtemp()
     try:
         html_output, error = convert_to_html_subprocess(input_path, tmp_dir)
-        if html_output is None:
-            return None, f"변환 실패: {error}"
-        # HTML 파일 찾기
-        html_contents = []
-        def find_html_files(search_dir):
-            files = []
-            for root, dirs, filenames in os.walk(search_dir):
-                for filename in filenames:
-                    if filename.lower().endswith(('.html', '.htm', '.xhtml')):
-                        files.append(os.path.join(root, filename))
-            return sorted(files)
-        if os.path.isfile(html_output) and html_output.lower().endswith(('.html', '.htm', '.xhtml')):
-            html_files = [html_output]
-        else:
-            search_path = html_output if os.path.isdir(html_output) else tmp_dir
-            html_files = find_html_files(search_path)
-        for html_file in html_files:
-            for encoding in ['utf-8', 'euc-kr', 'cp949', 'utf-16']:
-                try:
-                    with open(html_file, 'r', encoding=encoding) as f:
-                        content = f.read()
-                        html_contents.append(content)
-                        break
-                except:
-                    continue
-        if not html_contents:
-            return None, "HTML 파일을 찾을 수 없습니다."
-        # HTML → Markdown 변환
-        markdown_parts = []
-        for html_content in html_contents:
-            md_content, error = convert_html_to_markdown(html_content)
-            if md_content and len(md_content.strip()) > 10:
-                markdown_parts.append(md_content)
-        if markdown_parts:
-            result = "\n\n---\n\n".join(markdown_parts)
-            print(f"[HWP→MD] HTML→MD 변환 성공: {len(result)} chars")
-            return result, None
-        return None, "Markdown 변환에 실패했습니다."
-    except Exception as e:
-        return None, f"변환 오류: {str(e)}"
     finally:
         shutil.rmtree(tmp_dir, ignore_errors=True)
-# ============== LLM API 함수들 ==============
 def call_groq_api_stream(messages: List[Dict], api_key: str) -> Generator[str, None, None]:
     if not api_key:
         yield "❌ Groq API 키가 설정되지 않았습니다."
         return
     try:
-        url = "https://api.groq.com/openai/v1/chat/completions"
-        headers = {
-            "Authorization": f"Bearer {api_key}",
-            "Content-Type": "application/json"
-        }
-        payload = {
-            "model": "meta-llama/llama-4-scout-17b-16e-instruct",
-            "messages": messages,
-            "temperature": 0.7,
-            "max_tokens": 8192,
-            "top_p": 1,
-            "stream": True
-        }
-        response = requests.post(url, headers=headers, json=payload, stream=True)
         if response.status_code != 200:
-            yield f"❌ Groq API 오류: {response.status_code} - {response.text}"
             return
         for line in response.iter_lines():
             if line:
                 line = line.decode('utf-8')
-                if line.startswith('data: '):
-                    data = line[6:]
-                    if data == '[DONE]':
-                        break
                     try:
-                        json_data = json.loads(data)
-                        if 'choices' in json_data and len(json_data['choices']) > 0:
-                            delta = json_data['choices'][0].get('delta', {})
-                            content = delta.get('content', '')
-                            if content:
-                                yield content
-                    except json.JSONDecodeError:
                         continue
     except Exception as e:
-        yield f"\n\n❌ Groq API 오류: {str(e)}"
 def call_fireworks_api_stream(messages: List[Dict], image_base64: str, mime_type: str, api_key: str) -> Generator[str, None, None]:
     if not api_key:
@@ -900,150 +595,96 @@ def call_fireworks_api_stream(messages: List[Dict], image_base64: str, mime_type
         return
     try:
-        url = "https://api.fireworks.ai/inference/v1/chat/completions"
-        formatted_messages = []
-        for msg in messages[:-1]:
-            formatted_messages.append({
-                "role": msg["role"],
-                "content": msg["content"]
-            })
-        last_msg = messages[-1]
         formatted_messages.append({
-            "role": last_msg["role"],
             "content": [
-                {
-                    "type": "image_url",
-                    "image_url": {
-                        "url": f"data:{mime_type};base64,{image_base64}"
-                    }
-                },
-                {
-                    "type": "text",
-                    "text": last_msg["content"]
-                }
             ]
         })
-        payload = {
-            "model": "accounts/fireworks/models/qwen3-vl-235b-a22b-thinking",
-            "max_tokens": 4096,
-            "top_p": 1,
-            "top_k": 40,
-            "presence_penalty": 0,
-            "frequency_penalty": 0,
-            "temperature": 0.6,
-            "messages": formatted_messages,
-            "stream": True
-        }
-        headers = {
-            "Accept": "application/json",
-            "Content-Type": "application/json",
-            "Authorization": f"Bearer {api_key}"
-        }
-        response = requests.post(url, headers=headers, json=payload, stream=True)
         if response.status_code != 200:
-            yield f"❌ Fireworks API 오류: {response.status_code} - {response.text}"
             return
         for line in response.iter_lines():
             if line:
                 line = line.decode('utf-8')
-                if line.startswith('data: '):
-                    data = line[6:]
-                    if data == '[DONE]':
-                        break
                     try:
-                        json_data = json.loads(data)
-                        if 'choices' in json_data and len(json_data['choices']) > 0:
-                            delta = json_data['choices'][0].get('delta', {})
-                            content = delta.get('content', '')
-                            if content:
-                                yield content
-                    except json.JSONDecodeError:
                         continue
     except Exception as e:
-        yield f"\n\n❌ Fireworks API 오류: {str(e)}"
-# ============== 채팅 처리 함수 ==============
 def process_file(file_path: str) -> tuple:
-    if file_path is None:
         return None, None, None
     filename = os.path.basename(file_path)
     if is_image_file(file_path):
-        base64_data = image_to_base64(file_path)
-        mime_type = get_image_mime_type(file_path)
-        return "image", base64_data, mime_type
     if is_hwp_file(file_path):
-        print(f"[process_file] HWP 파일 처리: {filename}")
-        markdown_content, error = convert_hwp_to_markdown(file_path)
-        if markdown_content and len(markdown_content.strip()) > 20:
-            # 컨텐츠가 너무 길면 요약 정보 추가
-            content_preview = markdown_content[:500] + "..." if len(markdown_content) > 500 else markdown_content
-            print(f"[process_file] HWP 변환 성공: {len(markdown_content)} chars")
-            print(f"[process_file] 미리보기: {content_preview[:200]}")
-            return "text", f"[HWP 문서: {filename}]\n\n{markdown_content}", None
-        else:
-            print(f"[process_file] HWP 변환 실패: {error}")
-            return "error", f"HWP 변환 실패: {error}", None
     if is_pdf_file(file_path):
         text = extract_text_from_pdf(file_path)
         if text:
             return "text", f"[PDF 문서: {filename}]\n\n{text}", None
-        else:
-            return "error", "PDF 텍스트 추출 실패", None
     if is_text_file(file_path):
         text = extract_text_from_txt(file_path)
         if text:
             return "text", f"[텍스트 파일: {filename}]\n\n{text}", None
-        else:
-            return "error", "텍스트 파일 읽기 실패", None
-    return "unsupported", f"지원하지 않는 파일 형식: {filename}", None
-def chat_response(
-    message: str,
-    history: List[Dict],
-    file: Optional[str],
-    session_id: str,
-    groq_api_key: str,
-    fireworks_api_key: str
-) -> Generator[tuple, None, None]:
-    """채팅 응답 생성"""
     if history is None:
         history = []
-    if not message.strip() and file is None:
         yield history, session_id
         return
     if not session_id:
         session_id = create_session()
-    file_type = None
-    file_content = None
-    file_mime = None
     file_info = None
-    if file is not None:
         file_type, file_content, file_mime = process_file(file)
-        file_info = json.dumps({
-            "type": file_type,
-            "filename": os.path.basename(file) if file else None
-        })
         if file_type == "error":
             history = history + [
@@ -1060,184 +701,102 @@ def chat_response(
             yield history, session_id
             return
-    # 사용자 메시지 구성
-    user_display_message = message
-    if file is not None:
         filename = os.path.basename(file)
-        user_display_message = f"📎 {filename}\n\n{message}" if message else f"📎 {filename}"
-    history = history + [
-        {"role": "user", "content": user_display_message},
-        {"role": "assistant", "content": ""}
-    ]
     yield history, session_id
-    # DB에서 이전 대화
-    db_messages = get_session_messages(session_id, limit=10)
     # API 메시지 구성
-    api_messages = []
-    api_messages.append({
         "role": "system",
-        "content": """당신은 도움이 되는 AI 어시스턴트입니다. 한국어로 자연스럽게 대화하며, 사용자의 질문에 정확하고 유용한 답변을 제공합니다.
-파일이 첨부된 경우:
-- 문서 내용을 주의 깊게 분석하세요
-- 문서에서 중요한 정보를 추출하고 요약해주세요
-- 사용자의 질문에 문서 내용을 기반으로 답변하세요
-- 문서에 없는 내용은 추측하지 말고 문서에 기반한 답변을 하세요"""
-    })
-    for db_msg in db_messages:
-        api_messages.append({
-            "role": db_msg["role"],
-            "content": db_msg["content"]
-        })
-    # 현재 메시지 구성
     current_content = message or ""
     if file_type == "text" and file_content:
-        if message:
-            current_content = f"{file_content}\n\n---\n\n사용자 질문: {message}"
-        else:
-            current_content = f"{file_content}\n\n---\n\n위 문서의 내용을 요약해주세요."
-    api_messages.append({
-        "role": "user",
-        "content": current_content
-    })
     full_response = ""
     if file_type == "image":
-        for chunk in call_fireworks_api_stream(api_messages, file_content, file_mime, fireworks_api_key):
             full_response += chunk
             history[-1] = {"role": "assistant", "content": full_response}
             yield history, session_id
     else:
-        for chunk in call_groq_api_stream(api_messages, groq_api_key):
             full_response += chunk
             history[-1] = {"role": "assistant", "content": full_response}
             yield history, session_id
-    # DB에 저장
     save_message(session_id, "user", current_content, file_info)
     save_message(session_id, "assistant", full_response)
     if len(db_messages) == 0 and message:
-        title = message[:50] + "..." if len(message) > 50 else message
-        update_session_title(session_id, title)
 def new_chat():
-    session_id = create_session()
-    return [], session_id, None
 def load_session(session_id: str) -> tuple:
     if not session_id:
         return [], ""
     messages = get_session_messages(session_id, limit=50)
-    history = []
-    for msg in messages:
-        history.append({"role": msg["role"], "content": msg["content"]})
-    return history, session_id
-# ============== HWP 변환기 함수 ==============
 def convert_to_odt_subprocess(input_path, output_dir):
     output_path = os.path.join(output_dir, "output.odt")
     try:
         result = subprocess.run(
             [sys.executable, '-m', 'hwp5', 'odt', '--output', output_path, input_path],
-            capture_output=True,
-            text=True,
-            timeout=120
         )
         if result.returncode == 0 and os.path.exists(output_path):
             return output_path, None
-    except Exception as e:
-        print(f"python -m hwp5 odt 오류: {e}")
-    return None, "hwp5odt 변환 실패"
 def convert_to_xml_subprocess(input_path, output_dir):
     output_path = os.path.join(output_dir, "output.xml")
-    if PYHWP_AVAILABLE:
-        try:
-            from hwp5.xmlmodel import Hwp5File as XmlHwp5File
-            from hwp5.xmldump import xmldump_flat
-            hwp5file = XmlHwp5File(input_path)
-            with open(output_path, 'wb') as f:
-                xmldump_flat(hwp5file, f)
-            if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
-                return output_path, None
-        except Exception as e:
-            print(f"xmldump_flat 오류: {e}")
     try:
         result = subprocess.run(
             [sys.executable, '-m', 'hwp5', 'xml', input_path],
-            capture_output=True,
-            timeout=120
         )
         if result.returncode == 0 and result.stdout:
             with open(output_path, 'wb') as f:
                 f.write(result.stdout)
             return output_path, None
-    except Exception as e:
-        print(f"python -m hwp5 xml 오류: {e}")
-    return None, "hwp5xml 변환 실패"
-def convert_to_markdown_file(input_path, output_dir, progress_callback=None):
-    if progress_callback:
-        progress_callback(0.2, "변환 중...")
-    markdown_content, error = convert_hwp_to_markdown(input_path)
-    if markdown_content is None:
-        return None, error
-    if progress_callback:
-        progress_callback(0.8, "파일 저장 중...")
-    output_path = os.path.join(output_dir, "output.md")
-    with open(output_path, 'w', encoding='utf-8') as f:
-        f.write(markdown_content)
-    return output_path, None
 def convert_hwp(file, output_format, progress=gr.Progress()):
-    if file is None:
         return None, "❌ 파일을 업로드해주세요.", ""
-    if hasattr(file, 'name'):
-        input_file = file.name
-    else:
-        input_file = str(file)
     if not input_file.lower().endswith('.hwp'):
         return None, "❌ HWP 파일만 지원됩니다.", ""
     progress(0.1, desc="파일 분석 중...")
     version, is_valid = check_hwp_version(input_file)
     if not is_valid:
-        return None, f"❌ 지원하지 않는 파일 형식입니다: {version}", ""
     tmp_dir = tempfile.mkdtemp()
@@ -1248,347 +807,158 @@ def convert_hwp(file, output_format, progress=gr.Progress()):
         progress(0.3, desc=f"{output_format}로 변환 중...")
-        output_path = None
-        error = None
-        ext = ""
         if output_format == "HTML":
             output_path, error = convert_to_html_subprocess(input_path, tmp_dir)
             ext = ".html"
             if output_path and os.path.isdir(output_path):
-                zip_base = os.path.join(tmp_dir, "html_output")
-                zip_path = shutil.make_archive(zip_base, 'zip', output_path)
-                output_path = zip_path
-                ext = ".zip"
-            elif output_path and output_path == tmp_dir:
-                html_files = []
-                for item in os.listdir(tmp_dir):
-                    if item.lower().endswith(('.html', '.htm', '.xhtml')):
-                        html_files.append(item)
-                    elif os.path.isdir(os.path.join(tmp_dir, item)):
-                        for sub in os.listdir(os.path.join(tmp_dir, item)):
-                            if sub.lower().endswith(('.html', '.htm', '.xhtml')):
-                                html_files.append(os.path.join(item, sub))
-                if html_files:
-                    zip_base = os.path.join(tmp_dir, "html_output")
-                    zip_path = shutil.make_archive(zip_base, 'zip', tmp_dir)
-                    output_path = zip_path
-                    ext = ".zip"
         elif output_format == "ODT (OpenDocument)":
             output_path, error = convert_to_odt_subprocess(input_path, tmp_dir)
             ext = ".odt"
         elif output_format == "TXT (텍스트)":
-            # 개선된 텍스트 추출 사용
             text, error = extract_text_from_hwp(input_path)
             if text:
                 output_path = os.path.join(tmp_dir, "output.txt")
                 with open(output_path, 'w', encoding='utf-8') as f:
                     f.write(text)
-                error = None
             ext = ".txt"
         elif output_format == "Markdown":
-            def progress_callback(val, desc):
-                progress(0.3 + val * 0.5, desc=desc)
-            output_path, error = convert_to_markdown_file(input_path, tmp_dir, progress_callback)
             ext = ".md"
         elif output_format == "XML":
             output_path, error = convert_to_xml_subprocess(input_path, tmp_dir)
             ext = ".xml"
-        else:
-            return None, f"❌ 지원하지 않는 형식: {output_format}", ""
-        if output_path is None:
-            error_msg = error or "변환에 실패했습니다."
-            return None, f"❌ {error_msg}", ""
-        if os.path.isdir(output_path):
-            zip_base = os.path.join(tmp_dir, "output_archive")
-            zip_path = shutil.make_archive(zip_base, 'zip', output_path)
-            output_path = zip_path
-            ext = ".zip"
         if not os.path.exists(output_path):
             return None, "❌ 변환된 파일을 찾을 수 없습니다.", ""
-        progress(0.8, desc="파일 준비 중...")
         base_name = Path(input_filename).stem
-        final_filename = f"{base_name}{ext}"
-        final_output = os.path.join(tmp_dir, final_filename)
-        if output_path != final_output and os.path.isfile(output_path):
             shutil.copy2(output_path, final_output)
-        elif output_path == final_output:
-            pass
-        else:
-            final_output = output_path
         file_size = os.path.getsize(final_output)
-        size_str = f"{file_size / 1024:.1f} KB" if file_size > 1024 else f"{file_size} bytes"
-        progress(1.0, desc="완료!")
         preview = ""
-        if ext in ['.txt', '.md', '.xml'] and os.path.isfile(final_output):
             try:
                 with open(final_output, 'r', encoding='utf-8', errors='ignore') as f:
                     preview = f.read(5000)
                     if len(preview) >= 5000:
-                        preview += "\n\n... (미리보기 생략)"
             except:
                 pass
         elif ext == '.zip':
-            preview = "📦 HTML 변환 결과가 ZIP 파일로 압축되었습니다.\n다운로드 후 압축을 풀어서 HTML 파일을 확인하세요."
-        return final_output, f"✅ 변환 완료: {final_filename} ({size_str})", preview
     except Exception as e:
         import traceback
         traceback.print_exc()
-        return None, f"❌ 오류 발생: {str(e)}", ""
-# ============== CSS 스타일 ==============
 css = """
-.upload-box {
-    border: 2px dashed #6366f1 !important;
-    border-radius: 12px !important;
-}
-.download-box {
-    border: 2px solid #22c55e !important;
-    border-radius: 12px !important;
-}
-.preview-box {
-    max-height: 400px;
-    overflow-y: auto;
-    font-family: monospace;
-    white-space: pre-wrap;
-    background: #f8fafc;
-    padding: 16px;
-    border-radius: 8px;
-}
 """
-# ============== Gradio 인터페이스 ==============
 with gr.Blocks(title="AI 문서 어시스턴트") as demo:
     session_state = gr.State("")
-    gr.Markdown("""
-    # 🤖 AI 문서 어시스턴트
-    LLM 채팅 + HWP 문서 변환 통합 도구
-    """)
     with gr.Tabs():
-        # Tab 1: LLM 채팅
-        with gr.Tab("💬 AI 채팅", id="chat"):
             with gr.Row():
                 with gr.Column(scale=1):
                     gr.Markdown("### ⚙️ 설정")
-                    with gr.Accordion("🔑 API 키 설정", open=True):
-                        groq_key_input = gr.Textbox(
-                            label="Groq API Key",
-                            type="password",
-                            placeholder="gsk_...",
-                            value=GROQ_API_KEY
-                        )
-                        fireworks_key_input = gr.Textbox(
-                            label="Fireworks API Key",
-                            type="password",
-                            placeholder="fw_...",
-                            value=FIREWORKS_API_KEY
-                        )
-                    gr.Markdown("### 📁 지원 파일")
-                    gr.Markdown("""
-                    - **이미지**: JPG, PNG, GIF, WebP
-                    - **문서**: PDF, TXT, MD
-                    - **한글**: HWP, HWPX ✨
-                    > HWP/HWPX 파일은 자동으로 텍스트 추출됩니다.
-                    """)
-                    new_chat_btn = gr.Button("🆕 새 대화", variant="primary")
-                    with gr.Accordion("📜 대화 기록", open=False):
-                        session_list = gr.Dataframe(
-                            headers=["세션 ID", "제목", "업데이트"],
-                            datatype=["str", "str", "str"],
-                            interactive=False,
-                            wrap=True
-                        )
-                        refresh_sessions_btn = gr.Button("🔄 새로고침", size="sm")
                 with gr.Column(scale=3):
-                    chatbot = gr.Chatbot(
-                        label="대화",
-                        height=500
-                    )
                     with gr.Row():
-                        file_upload = gr.File(
-                            label="📎 파일 첨부",
-                            file_types=[".jpg", ".jpeg", ".png", ".gif", ".webp", ".pdf", ".txt", ".md", ".hwp", ".hwpx"],
-                            file_count="single",
-                            scale=1
-                        )
-                        with gr.Column(scale=4):
-                            msg_input = gr.Textbox(
-                                label="메시지",
-                                placeholder="메시지를 입력하세요... (Shift+Enter: 줄바꿈)",
-                                lines=2,
-                                max_lines=5,
-                                show_label=False
-                            )
                     with gr.Row():
                         submit_btn = gr.Button("📤 전송", variant="primary", scale=3)
                         clear_btn = gr.Button("🗑️ 지우기", scale=1)
-        # Tab 2: HWP 변환기
-        with gr.Tab("📄 HWP 변환기", id="converter"):
-            gr.Markdown("""
-            ### HWP 파일 변환기
-            한글(HWP) 문서를 다양한 형식으로 변환합니다.
-            """)
             with gr.Row():
-                with gr.Column(scale=1):
-                    gr.Markdown("#### 📤 파일 업로드")
-                    hwp_file_input = gr.File(
-                        label="HWP 파일 선택",
-                        file_types=[".hwp"],
-                        type="filepath",
-                        elem_classes=["upload-box"]
-                    )
-                    format_select = gr.Radio(
-                        label="변환 형식",
-                        choices=["HTML", "ODT (OpenDocument)", "TXT (텍스트)", "Markdown", "XML"],
-                        value="TXT (텍스트)",
-                        info="원하는 출력 형식을 선택하세요"
-                    )
-                    convert_btn = gr.Button("🔄 변환하기", variant="primary", size="lg")
-                with gr.Column(scale=1):
-                    gr.Markdown("#### 📥 변환 결과")
-                    status_output = gr.Textbox(
-                        label="상태",
-                        interactive=False,
-                        lines=2
-                    )
-                    file_output = gr.File(
-                        label="다운로드",
-                        elem_classes=["download-box"]
-                    )
-            with gr.Accordion("📋 내용 미리보기", open=False):
-                preview_output = gr.Textbox(
-                    label="",
-                    lines=15,
-                    max_lines=25,
-                    interactive=False,
-                    elem_classes=["preview-box"]
-                )
-            gr.Markdown("""
-            ---
-            #### ℹ️ 안내사항
-            | 형식 | 설명 | 용도 |
-            |------|------|------|
-            | **HTML** | 웹 페이지 형식 | 브라우저에서 보기 |
-            | **ODT** | OpenDocument | LibreOffice, Google Docs |
-            | **TXT** | 순수 텍스트 | 텍스트 추출 |
-            | **Markdown** | 마크다운 형식 | GitHub, 노션 |
-            | **XML** | 구조화 데이터 | 데이터 처리 |
-            > ⚠️ HWP v5 형식(한글 2007+)만 지원 | 암호화 파일 불가
-            """)
-    # ============== 이벤트 핸들러 ==============
-    def on_submit(message, history, file, session_id, groq_key, fireworks_key):
-        if history is None:
-            history = []
-        for result in chat_response(message, history, file, session_id, groq_key, fireworks_key):
-            yield result[0], result[1], "", None
-    submit_btn.click(
-        fn=on_submit,
-        inputs=[msg_input, chatbot, file_upload, session_state, groq_key_input, fireworks_key_input],
-        outputs=[chatbot, session_state, msg_input, file_upload]
-    )
-    msg_input.submit(
-        fn=on_submit,
-        inputs=[msg_input, chatbot, file_upload, session_state, groq_key_input, fireworks_key_input],
-        outputs=[chatbot, session_state, msg_input, file_upload]
-    )
-    def on_new_chat():
-        history, session_id, file = new_chat()
-        return history, session_id, None, ""
-    new_chat_btn.click(
-        fn=on_new_chat,
-        outputs=[chatbot, session_state, file_upload, msg_input]
-    )
-    clear_btn.click(
-        fn=lambda: ([], None, ""),
-        outputs=[chatbot, file_upload, msg_input]
-    )
-    def refresh_sessions():
         sessions = get_all_sessions()
-        data = [[s["session_id"][:8], s["title"] or "제목 없음", s["updated_at"][:16] if s["updated_at"] else ""] for s in sessions]
-        return data
-    refresh_sessions_btn.click(
-        fn=refresh_sessions,
-        outputs=[session_list]
-    )
-    def on_session_select(evt: gr.SelectData, data):
         if evt.index[0] < len(data):
-            session_id_short = data[evt.index[0]][0]
-            sessions = get_all_sessions()
-            for s in sessions:
-                if s["session_id"].startswith(session_id_short):
-                    history, session_id = load_session(s["session_id"])
-                    return history, session_id
         return [], ""
-    session_list.select(
-        fn=on_session_select,
-        inputs=[session_list],
-        outputs=[chatbot, session_state]
-    )
-    convert_btn.click(
-        fn=convert_hwp,
-        inputs=[hwp_file_input, format_select],
-        outputs=[file_output, status_output, preview_output]
-    )
-    demo.load(
-        fn=refresh_sessions,
-        outputs=[session_list]
-    )
 if __name__ == "__main__":
     demo.launch(css=css)

 import subprocess
 import shutil
 import sys
 import re
 import json
 import uuid
 import sqlite3
 import base64
 import requests
+import zlib
 from pathlib import Path
 from datetime import datetime
+from typing import Generator, List, Dict, Optional
 # ============== 환경 설정 ==============
 SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
     print(f"Added local pyhwp path: {PYHWP_PATH}")
 # ============== 모듈 임포트 ==============
 try:
     import olefile
     OLEFILE_AVAILABLE = True
     print("olefile loaded successfully")
 except ImportError:
     OLEFILE_AVAILABLE = False
+    print("olefile not available")
 try:
     from markdownify import markdownify as md
     print("markdownify loaded successfully")
 except ImportError:
     MARKDOWNIFY_AVAILABLE = False
 try:
     import html2text
     print("html2text loaded successfully")
 except ImportError:
     HTML2TEXT_AVAILABLE = False
 try:
     from bs4 import BeautifulSoup
 def init_database():
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
     cursor.execute('''
         CREATE TABLE IF NOT EXISTS sessions (
             session_id TEXT PRIMARY KEY,
             title TEXT
         )
     ''')
     cursor.execute('''
         CREATE TABLE IF NOT EXISTS messages (
             id INTEGER PRIMARY KEY AUTOINCREMENT,
             FOREIGN KEY (session_id) REFERENCES sessions(session_id)
         )
     ''')
     conn.commit()
     conn.close()
     cursor = conn.cursor()
     cursor.execute(
         """SELECT role, content, file_info, created_at
+           FROM messages WHERE session_id = ?
+           ORDER BY created_at DESC LIMIT ?""",
         (session_id, limit)
     )
     rows = cursor.fetchall()
     conn.close()
+    return [{"role": r[0], "content": r[1], "file_info": r[2], "created_at": r[3]} for r in reversed(rows)]
 def get_all_sessions() -> List[Dict]:
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
     cursor.execute(
+        "SELECT session_id, title, created_at, updated_at FROM sessions ORDER BY updated_at DESC LIMIT 50"
     )
     rows = cursor.fetchall()
     conn.close()
+    return [{"session_id": r[0], "title": r[1], "created_at": r[2], "updated_at": r[3]} for r in rows]
 def update_session_title(session_id: str, title: str):
     conn = sqlite3.connect(DB_PATH)
     cursor = conn.cursor()
+    cursor.execute("UPDATE sessions SET title = ? WHERE session_id = ?", (title, session_id))
     conn.commit()
     conn.close()
 init_database()
+# ============== 파일 유틸리티 ==============
 def extract_text_from_pdf(file_path: str) -> str:
     text_parts = []
     if PDFPLUMBER_AVAILABLE:
         try:
             with pdfplumber.open(file_path) as pdf:
                 return "\n\n".join(text_parts)
         except Exception as e:
             print(f"PyPDF2 error: {e}")
     return None
 def extract_text_from_txt(file_path: str) -> str:
+    for encoding in ['utf-8', 'euc-kr', 'cp949', 'utf-16', 'latin-1']:
         try:
             with open(file_path, 'r', encoding=encoding) as f:
                 return f.read()
+        except:
             continue
     return None
 def image_to_base64(file_path: str) -> str:
 def get_image_mime_type(file_path: str) -> str:
     ext = Path(file_path).suffix.lower()
+    return {'.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.png': 'image/png',
+            '.gif': 'image/gif', '.webp': 'image/webp', '.bmp': 'image/bmp'}.get(ext, 'image/jpeg')
+def is_image_file(fp: str) -> bool:
+    return Path(fp).suffix.lower() in ['.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp']
+def is_hwp_file(fp: str) -> bool:
+    return Path(fp).suffix.lower() in ['.hwp', '.hwpx']
+def is_pdf_file(fp: str) -> bool:
+    return Path(fp).suffix.lower() == '.pdf'
+def is_text_file(fp: str) -> bool:
+    return Path(fp).suffix.lower() in ['.txt', '.md', '.json', '.csv', '.xml', '.html', '.css', '.js', '.py']
+# ============== HWP 텍스트 추출 (핵심 - 단순하고 안정적으로) ==============
+def decompress_stream(data: bytes) -> bytes:
+    """zlib 압축 해제 시도"""
     try:
+        return zlib.decompress(data, -15)
+    except:
         try:
+            return zlib.decompress(data)
+        except:
+            return data
+def extract_hwp_text_from_bodytext(ole) -> str:
+    """BodyText 섹션에서 텍스트 추출 (HWP5 포맷)"""
+    text_parts = []
+    for entry in ole.listdir():
+        entry_path = '/'.join(entry)
+        # BodyText/SectionX 스트림 찾기
+        if entry_path.startswith('BodyText/Section'):
+            try:
+                stream_data = ole.openstream(entry).read()
+                # 압축 해제 시도
+                try:
+                    decompressed = zlib.decompress(stream_data, -15)
+                except:
+                    decompressed = stream_data
+                # HWP5 레코드에서 텍스트 추출
+                extracted = extract_text_from_hwp_records(decompressed)
+                if extracted:
+                    text_parts.append(extracted)
+            except Exception as e:
+                print(f"  섹션 읽기 오류 {entry_path}: {e}")
+                continue
+    return '\n\n'.join(text_parts) if text_parts else None
+def extract_text_from_hwp_records(data: bytes) -> str:
+    """HWP5 레코드 구조에서 텍스트 추출"""
+    texts = []
+    pos = 0
+    while pos < len(data) - 4:
         try:
+            # 레코드 헤더 (4바이트)
+            header = int.from_bytes(data[pos:pos+4], 'little')
+            tag_id = header & 0x3FF
+            size = (header >> 20) & 0xFFF
+            pos += 4
+            # 확장 크기
+            if size == 0xFFF:
+                if pos + 4 > len(data):
+                    break
+                size = int.from_bytes(data[pos:pos+4], 'little')
+                pos += 4
+            if pos + size > len(data):
+                break
+            record_data = data[pos:pos+size]
+            pos += size
+            # HWPTAG_PARA_TEXT = 67 (0x43)
+            if tag_id == 67 and size > 0:
+                # 텍스트 추출 (컨트롤 문자 처리)
+                text = extract_para_text(record_data)
+                if text:
+                    texts.append(text)
         except Exception as e:
+            pos += 1
+            continue
+    return '\n'.join(texts) if texts else None
+def extract_para_text(data: bytes) -> str:
+    """PARA_TEXT 레코드에서 실제 텍스트 추출"""
+    result = []
+    i = 0
+    while i < len(data) - 1:
+        code = int.from_bytes(data[i:i+2], 'little')
+        # 일반 문자 (유니코드)
+        if code >= 32:
+            try:
+                char = chr(code)
+                # 한글, 영문, 숫자, 일반 기호만 허용
+                if char.isprintable() and not (0x4E00 <= code <= 0x9FFF and code not in range(0xAC00, 0xD7A4)):
+                    result.append(char)
+                elif 0xAC00 <= code <= 0xD7A3:  # 한글 음절
+                    result.append(char)
+            except:
+                pass
+        # 컨트롤 문자 처리
+        elif code == 0:  # NULL
+            pass
+        elif code == 1:  # 예약
+            i += 14  # 확장 컨트롤 건너뛰기
+        elif code == 2:  # 섹션/컬럼 정의
+            i += 14
+        elif code == 3:  # 필드 시작
+            i += 14
+        elif code == 4:  # 필드 끝
+            pass
+        elif code == 9:  # 탭
+            result.append('\t')
+        elif code == 10:  # 줄바꿈
+            result.append('\n')
+        elif code == 13:  # 문단 끝
+            result.append('\n')
+        elif code == 24:  # 하이픈
+            result.append('-')
+        elif code == 30:  # 묶음 빈칸
+            result.append(' ')
+        elif code == 31:  # 고정폭 빈칸
+            result.append(' ')
+        i += 2
+    text = ''.join(result).strip()
+    # 의미 없는 텍스트 필터링
+    if len(text) < 2:
+        return None
+    return text
+def extract_text_with_olefile(file_path: str) -> tuple:
+    """olefile을 사용한 HWP 텍스트 추출"""
     if not OLEFILE_AVAILABLE:
+        return None, "olefile 모듈 없음"
     try:
         ole = olefile.OleFileIO(file_path)
+        # 파일 헤더 확인
+        if not ole.exists('FileHeader'):
+            ole.close()
+            return None, "HWP 파일 헤더 없음"
+        # 압축 여부 확인
+        header_data = ole.openstream('FileHeader').read()
+        is_compressed = (header_data[36] & 1) == 1 if len(header_data) > 36 else True
+        print(f"  HWP 압축 여부: {is_compressed}")
+        # BodyText에서 텍스트 추출
+        text = extract_hwp_text_from_bodytext(ole)
         ole.close()
+        if text and len(text.strip()) > 10:
+            return text.strip(), None
+        return None, "텍스트 추출 실패"
     except Exception as e:
+        return None, f"olefile 오류: {str(e)}"
+def extract_text_with_hwp5txt(file_path: str) -> tuple:
+    """hwp5txt 명령어로 텍스트 추출"""
     try:
+        result = subprocess.run(
+            [sys.executable, '-m', 'hwp5', 'txt', file_path],
+            capture_output=True,
+            timeout=60
+        )
+        if result.returncode == 0 and result.stdout:
+            # 여러 인코딩 시도
+            for enc in ['utf-8', 'cp949', 'euc-kr']:
                 try:
+                    text = result.stdout.decode(enc)
+                    if text.strip() and len(text.strip()) > 10:
+                        return text.strip(), None
                 except:
+                    continue
+        stderr = result.stderr.decode('utf-8', errors='ignore') if result.stderr else ""
+        return None, f"hwp5txt 실패: {stderr[:100]}"
+    except subprocess.TimeoutExpired:
+        return None, "hwp5txt 타임아웃"
     except Exception as e:
+        return None, f"hwp5txt 오류: {str(e)}"
 def extract_text_from_hwp(file_path: str) -> tuple:
+    """HWP 파일에서 텍스트 추출 (메인 함수)"""
+    print(f"\n[HWP 추출] 시작: {os.path.basename(file_path)}")
     # 방법 1: hwp5txt 명령어 (가장 안정적)
+    print("  방법 1: hwp5txt 명령어...")
+    text, error = extract_text_with_hwp5txt(file_path)
+    if text:
+        print(f"  ✓ hwp5txt 성공: {len(text)} 글자")
         return text, None
+    print(f"  ✗ hwp5txt 실패: {error}")
+    # 방법 2: olefile 직접 파싱
+    print("  방법 2: olefile 파싱...")
+    text, error = extract_text_with_olefile(file_path)
+    if text:
+        print(f"  ✓ olefile 성공: {len(text)} 글자")
         return text, None
+    print(f"  ✗ olefile 실패: {error}")
+    return None, "모든 추출 방법 실패"
+# ============== HWP 변환 함수들 ==============
 def check_hwp_version(file_path):
     try:
     except Exception as e:
         return f"Error: {e}", False
 def convert_to_html_subprocess(input_path, output_dir):
+    """HTML 변환"""
+    output_path = os.path.join(output_dir, "output.html")
     try:
         result = subprocess.run(
         )
         if result.returncode == 0:
+            # 결과 파일/디렉토리 찾기
             if os.path.isfile(output_path):
                 return output_path, None
             if os.path.isdir(output_path):
                 return output_path, None
+            # 다른 위치 검색
             for item in os.listdir(output_dir):
                 item_path = os.path.join(output_dir, item)
+                if item.lower().endswith(('.html', '.htm')) and os.path.isfile(item_path):
                     return item_path, None
                 if os.path.isdir(item_path):
+                    for sub in os.listdir(item_path):
+                        if sub.lower().endswith(('.html', '.htm')):
                             return item_path, None
             return output_dir, None
+    except subprocess.TimeoutExpired:
+        return None, "HTML 변환 타임아웃"
     except Exception as e:
+        return None, f"HTML 변환 오류: {str(e)}"
+    return None, "HTML 변환 실패"
+def convert_hwp_to_text(input_path: str) -> tuple:
+    """HWP를 텍스트로 변환"""
+    return extract_text_from_hwp(input_path)
+def html_to_markdown(html_content):
+    """HTML을 Markdown으로 변환"""
     if MARKDOWNIFY_AVAILABLE:
+        try:
+            return md(html_content, heading_style="ATX", bullets="-"), None
+        except:
+            pass
     if HTML2TEXT_AVAILABLE:
+        try:
+            h = html2text.HTML2Text()
+            h.body_width = 0
+            return h.handle(html_content), None
+        except:
+            pass
+    # 기본 변환
+    if BS4_AVAILABLE:
+        try:
+            soup = BeautifulSoup(html_content, 'html.parser')
+            return soup.get_text(separator='\n'), None
+        except:
+            pass
+    return None, "Markdown 변환 실패"
 def convert_hwp_to_markdown(input_path: str) -> tuple:
+    """HWP를 Markdown으로 변환"""
+    # 먼저 텍스트 추출 시도
     text, error = extract_text_from_hwp(input_path)
+    if text:
         return text, None
+    # HTML 변환 후 Markdown 변환
     tmp_dir = tempfile.mkdtemp()
     try:
         html_output, error = convert_to_html_subprocess(input_path, tmp_dir)
+        if html_output:
+            # HTML 파일 읽기
+            html_files = []
+            if os.path.isfile(html_output):
+                html_files = [html_output]
+            elif os.path.isdir(html_output):
+                for root, dirs, files in os.walk(html_output):
+                    for f in files:
+                        if f.lower().endswith(('.html', '.htm')):
+                            html_files.append(os.path.join(root, f))
+            for html_file in html_files:
+                for enc in ['utf-8', 'cp949', 'euc-kr']:
+                    try:
+                        with open(html_file, 'r', encoding=enc) as f:
+                            content = f.read()
+                        md_text, _ = html_to_markdown(content)
+                        if md_text and len(md_text.strip()) > 10:
+                            return md_text.strip(), None
+                    except:
+                        continue
+        return None, error or "변환 실패"
     finally:
         shutil.rmtree(tmp_dir, ignore_errors=True)
+# ============== LLM API ==============
 def call_groq_api_stream(messages: List[Dict], api_key: str) -> Generator[str, None, None]:
     if not api_key:
         yield "❌ Groq API 키가 설정되지 않았습니다."
         return
     try:
+        response = requests.post(
+            "https://api.groq.com/openai/v1/chat/completions",
+            headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
+            json={
+                "model": "meta-llama/llama-4-scout-17b-16e-instruct",
+                "messages": messages,
+                "temperature": 0.7,
+                "max_tokens": 8192,
+                "stream": True
+            },
+            stream=True
+        )
         if response.status_code != 200:
+            yield f"❌ Groq API 오류: {response.status_code}"
             return
         for line in response.iter_lines():
             if line:
                 line = line.decode('utf-8')
+                if line.startswith('data: ') and line[6:] != '[DONE]':
                     try:
+                        data = json.loads(line[6:])
+                        content = data.get('choices', [{}])[0].get('delta', {}).get('content', '')
+                        if content:
+                            yield content
+                    except:
                         continue
     except Exception as e:
+        yield f"❌ API 오류: {str(e)}"
 def call_fireworks_api_stream(messages: List[Dict], image_base64: str, mime_type: str, api_key: str) -> Generator[str, None, None]:
     if not api_key:
         return
     try:
+        formatted_messages = [{"role": m["role"], "content": m["content"]} for m in messages[:-1]]
         formatted_messages.append({
+            "role": messages[-1]["role"],
             "content": [
+                {"type": "image_url", "image_url": {"url": f"data:{mime_type};base64,{image_base64}"}},
+                {"type": "text", "text": messages[-1]["content"]}
             ]
         })
+        response = requests.post(
+            "https://api.fireworks.ai/inference/v1/chat/completions",
+            headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
+            json={
+                "model": "accounts/fireworks/models/qwen3-vl-235b-a22b-thinking",
+                "max_tokens": 4096,
+                "temperature": 0.6,
+                "messages": formatted_messages,
+                "stream": True
+            },
+            stream=True
+        )
         if response.status_code != 200:
+            yield f"❌ Fireworks API 오류: {response.status_code}"
             return
         for line in response.iter_lines():
             if line:
                 line = line.decode('utf-8')
+                if line.startswith('data: ') and line[6:] != '[DONE]':
                     try:
+                        data = json.loads(line[6:])
+                        content = data.get('choices', [{}])[0].get('delta', {}).get('content', '')
+                        if content:
+                            yield content
+                    except:
                         continue
     except Exception as e:
+        yield f"❌ API 오류: {str(e)}"
+# ============== 채팅 처리 ==============
 def process_file(file_path: str) -> tuple:
+    if not file_path:
         return None, None, None
     filename = os.path.basename(file_path)
     if is_image_file(file_path):
+        return "image", image_to_base64(file_path), get_image_mime_type(file_path)
     if is_hwp_file(file_path):
+        text, error = extract_text_from_hwp(file_path)
+        if text:
+            return "text", f"[HWP 문서: {filename}]\n\n{text}", None
+        return "error", f"HWP 추출 실패: {error}", None
     if is_pdf_file(file_path):
         text = extract_text_from_pdf(file_path)
         if text:
             return "text", f"[PDF 문서: {filename}]\n\n{text}", None
+        return "error", "PDF 추출 실패", None
     if is_text_file(file_path):
         text = extract_text_from_txt(file_path)
         if text:
             return "text", f"[텍스트 파일: {filename}]\n\n{text}", None
+        return "error", "텍스트 읽기 실패", None
+    return "unsupported", f"지원하지 않는 형식: {filename}", None
+def chat_response(message: str, history: List[Dict], file: Optional[str],
+                  session_id: str, groq_key: str, fireworks_key: str) -> Generator[tuple, None, None]:
     if history is None:
         history = []
+    if not message.strip() and not file:
         yield history, session_id
         return
     if not session_id:
         session_id = create_session()
+    # 파일 처리
+    file_type, file_content, file_mime = None, None, None
     file_info = None
+    if file:
         file_type, file_content, file_mime = process_file(file)
+        file_info = json.dumps({"type": file_type, "filename": os.path.basename(file)})
         if file_type == "error":
             history = history + [
             yield history, session_id
             return
+    # 사용자 메시지
+    user_msg = message
+    if file:
         filename = os.path.basename(file)
+        user_msg = f"📎 {filename}\n\n{message}" if message else f"📎 {filename}"
+    history = history + [{"role": "user", "content": user_msg}, {"role": "assistant", "content": ""}]
     yield history, session_id
     # API 메시지 구성
+    db_messages = get_session_messages(session_id, limit=10)
+    api_messages = [{
         "role": "system",
+        "content": "당신은 도움이 되는 AI 어시스턴트입니다. 한국어로 자연스럽게 대화하며, 파일이 첨부되면 내용을 분석하여 답변합니다."
+    }]
+    for m in db_messages:
+        api_messages.append({"role": m["role"], "content": m["content"]})
     current_content = message or ""
     if file_type == "text" and file_content:
+        current_content = f"{file_content}\n\n사용자 질문: {message}" if message else f"{file_content}\n\n위 문서 내용을 요약해주세요."
+    api_messages.append({"role": "user", "content": current_content})
+    # 응답 생성
     full_response = ""
     if file_type == "image":
+        for chunk in call_fireworks_api_stream(api_messages, file_content, file_mime, fireworks_key):
             full_response += chunk
             history[-1] = {"role": "assistant", "content": full_response}
             yield history, session_id
     else:
+        for chunk in call_groq_api_stream(api_messages, groq_key):
             full_response += chunk
             history[-1] = {"role": "assistant", "content": full_response}
             yield history, session_id
+    # 저장
     save_message(session_id, "user", current_content, file_info)
     save_message(session_id, "assistant", full_response)
     if len(db_messages) == 0 and message:
+        update_session_title(session_id, message[:50])
 def new_chat():
+    return [], create_session(), None
 def load_session(session_id: str) -> tuple:
     if not session_id:
         return [], ""
     messages = get_session_messages(session_id, limit=50)
+    return [{"role": m["role"], "content": m["content"]} for m in messages], session_id
+# ============== HWP 변환기 (Tab 2) ==============
 def convert_to_odt_subprocess(input_path, output_dir):
     output_path = os.path.join(output_dir, "output.odt")
     try:
         result = subprocess.run(
             [sys.executable, '-m', 'hwp5', 'odt', '--output', output_path, input_path],
+            capture_output=True, timeout=120
         )
         if result.returncode == 0 and os.path.exists(output_path):
             return output_path, None
+    except:
+        pass
+    return None, "ODT 변환 실패"
 def convert_to_xml_subprocess(input_path, output_dir):
     output_path = os.path.join(output_dir, "output.xml")
     try:
         result = subprocess.run(
             [sys.executable, '-m', 'hwp5', 'xml', input_path],
+            capture_output=True, timeout=120
         )
         if result.returncode == 0 and result.stdout:
             with open(output_path, 'wb') as f:
                 f.write(result.stdout)
             return output_path, None
+    except:
+        pass
+    return None, "XML 변환 실패"
 def convert_hwp(file, output_format, progress=gr.Progress()):
+    if not file:
         return None, "❌ 파일을 업로드해주세요.", ""
+    input_file = file.name if hasattr(file, 'name') else str(file)
     if not input_file.lower().endswith('.hwp'):
         return None, "❌ HWP 파일만 지원됩니다.", ""
     progress(0.1, desc="파일 분석 중...")
     version, is_valid = check_hwp_version(input_file)
     if not is_valid:
+        return None, f"❌ 지원하지 않는 파일: {version}", ""
     tmp_dir = tempfile.mkdtemp()
         progress(0.3, desc=f"{output_format}로 변환 중...")
+        output_path, error, ext = None, None, ""
         if output_format == "HTML":
             output_path, error = convert_to_html_subprocess(input_path, tmp_dir)
             ext = ".html"
             if output_path and os.path.isdir(output_path):
+                zip_path = shutil.make_archive(os.path.join(tmp_dir, "html"), 'zip', output_path)
+                output_path, ext = zip_path, ".zip"
         elif output_format == "ODT (OpenDocument)":
             output_path, error = convert_to_odt_subprocess(input_path, tmp_dir)
             ext = ".odt"
         elif output_format == "TXT (텍스트)":
             text, error = extract_text_from_hwp(input_path)
             if text:
                 output_path = os.path.join(tmp_dir, "output.txt")
                 with open(output_path, 'w', encoding='utf-8') as f:
                     f.write(text)
             ext = ".txt"
         elif output_format == "Markdown":
+            text, error = convert_hwp_to_markdown(input_path)
+            if text:
+                output_path = os.path.join(tmp_dir, "output.md")
+                with open(output_path, 'w', encoding='utf-8') as f:
+                    f.write(text)
             ext = ".md"
         elif output_format == "XML":
             output_path, error = convert_to_xml_subprocess(input_path, tmp_dir)
             ext = ".xml"
+        if not output_path:
+            return None, f"❌ {error or '변환 실패'}", ""
         if not os.path.exists(output_path):
             return None, "❌ 변환된 파일을 찾을 수 없습니다.", ""
+        progress(0.8, desc="완료...")
         base_name = Path(input_filename).stem
+        final_output = os.path.join(tmp_dir, f"{base_name}{ext}")
+        if output_path != final_output:
             shutil.copy2(output_path, final_output)
         file_size = os.path.getsize(final_output)
+        size_str = f"{file_size/1024:.1f} KB" if file_size > 1024 else f"{file_size} bytes"
         preview = ""
+        if ext in ['.txt', '.md', '.xml']:
             try:
                 with open(final_output, 'r', encoding='utf-8', errors='ignore') as f:
                     preview = f.read(5000)
                     if len(preview) >= 5000:
+                        preview += "\n\n... (생략)"
             except:
                 pass
         elif ext == '.zip':
+            preview = "📦 HTML이 ZIP으로 압축되었습니다."
+        progress(1.0, desc="완료!")
+        return final_output, f"✅ 변환 완료: {base_name}{ext} ({size_str})", preview
     except Exception as e:
         import traceback
         traceback.print_exc()
+        return None, f"❌ 오류: {str(e)}", ""
+# ============== Gradio UI ==============
 css = """
+.upload-box { border: 2px dashed #6366f1 !important; border-radius: 12px !important; }
+.download-box { border: 2px solid #22c55e !important; border-radius: 12px !important; }
 """
 with gr.Blocks(title="AI 문서 어시스턴트") as demo:
     session_state = gr.State("")
+    gr.Markdown("# 🤖 AI 문서 어시스턴트\nLLM 채팅 + HWP 문서 변환")
     with gr.Tabs():
+        with gr.Tab("💬 AI 채팅"):
             with gr.Row():
                 with gr.Column(scale=1):
                     gr.Markdown("### ⚙️ 설정")
+                    with gr.Accordion("🔑 API 키", open=True):
+                        groq_key = gr.Textbox(label="Groq API Key", type="password", value=GROQ_API_KEY)
+                        fireworks_key = gr.Textbox(label="Fireworks API Key", type="password", value=FIREWORKS_API_KEY)
+                    gr.Markdown("### 📁 지원 파일\n- 이미지: JPG, PNG\n- 문서: PDF, TXT, HWP ✨")
+                    new_btn = gr.Button("🆕 새 대화", variant="primary")
+                    with gr.Accordion("📜 기록", open=False):
+                        session_list = gr.Dataframe(headers=["ID", "제목", "시간"], interactive=False)
+                        refresh_btn = gr.Button("🔄 새로고침", size="sm")
                 with gr.Column(scale=3):
+                    chatbot = gr.Chatbot(label="대화", height=500)
                     with gr.Row():
+                        file_upload = gr.File(label="📎 파일", file_types=[".jpg",".jpeg",".png",".gif",".webp",".pdf",".txt",".md",".hwp",".hwpx"], scale=1)
+                        msg_input = gr.Textbox(placeholder="메시지 입력...", lines=2, show_label=False, scale=4)
                     with gr.Row():
                         submit_btn = gr.Button("📤 전송", variant="primary", scale=3)
                         clear_btn = gr.Button("🗑️ 지우기", scale=1)
+        with gr.Tab("📄 HWP 변환기"):
+            gr.Markdown("### HWP 파일 변환기")
             with gr.Row():
+                with gr.Column():
+                    hwp_input = gr.File(label="HWP 파일", file_types=[".hwp"], elem_classes=["upload-box"])
+                    format_select = gr.Radio(["HTML", "ODT (OpenDocument)", "TXT (텍스트)", "Markdown", "XML"], value="TXT (텍스트)", label="형식")
+                    convert_btn = gr.Button("🔄 변환", variant="primary", size="lg")
+                with gr.Column():
+                    status_out = gr.Textbox(label="상태", interactive=False)
+                    file_out = gr.File(label="다운로드", elem_classes=["download-box"])
+            with gr.Accordion("📋 미리보기", open=False):
+                preview_out = gr.Textbox(lines=15, interactive=False)
+    # 이벤트
+    def on_submit(msg, hist, f, sid, gk, fk):
+        if hist is None: hist = []
+        for r in chat_response(msg, hist, f, sid, gk, fk):
+            yield r[0], r[1], "", None
+    submit_btn.click(on_submit, [msg_input, chatbot, file_upload, session_state, groq_key, fireworks_key],
+                     [chatbot, session_state, msg_input, file_upload])
+    msg_input.submit(on_submit, [msg_input, chatbot, file_upload, session_state, groq_key, fireworks_key],
+                     [chatbot, session_state, msg_input, file_upload])
+    new_btn.click(lambda: ([], create_session(), None, ""), outputs=[chatbot, session_state, file_upload, msg_input])
+    clear_btn.click(lambda: ([], None, ""), outputs=[chatbot, file_upload, msg_input])
+    def refresh():
         sessions = get_all_sessions()
+        return [[s["session_id"][:8], s["title"] or "제목없음", s["updated_at"][:16] if s["updated_at"] else ""] for s in sessions]
+    refresh_btn.click(refresh, outputs=[session_list])
+    def select_session(evt: gr.SelectData, data):
         if evt.index[0] < len(data):
+            for s in get_all_sessions():
+                if s["session_id"].startswith(data[evt.index[0]][0]):
+                    return load_session(s["session_id"])
         return [], ""
+    session_list.select(select_session, [session_list], [chatbot, session_state])
+    convert_btn.click(convert_hwp, [hwp_input, format_select], [file_out, status_out, preview_out])
+    demo.load(refresh, outputs=[session_list])
 if __name__ == "__main__":
     demo.launch(css=css)