Spaces:

gbrabbit
/

lily-math-rag

Sleeping

App Files Files Community

gbrabbit commited on Aug 6, 2025

Commit

b9ecb65

1 Parent(s): 163e68e

Auto commit at 07-2025-08 4:43:48

Browse files

Files changed (8) hide show

.gitignore +219 -0
app.py +101 -498
app_250807_0427.py +574 -0
app_local.py +245 -0
app_local_250807_0427.py +245 -0
test_input.py +100 -0
test_text.py +100 -0
test_tokenizer.py +159 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,219 @@

+.github/
+.env
+lily_llm_env/
+lily_llm_core/models/
+__pycache__/
+*.pyc
+.ipynb_checkpoints/
+lily_llm_media/
+vector_stores/
+latex_ocr_env/
+lily_llm_ignore/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[codz]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py.cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# UV
+#   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#uv.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+#poetry.toml
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#   pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
+#   https://pdm-project.org/en/latest/usage/project/#working-with-version-control
+#pdm.lock
+#pdm.toml
+.pdm-python
+.pdm-build/
+# pixi
+#   Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
+#pixi.lock
+#   Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
+#   in the .venv directory. It is recommended not to include this directory in version control.
+.pixi
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.envrc
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+# Abstra
+# Abstra is an AI-powered process automation framework.
+# Ignore directories containing user credentials, local state, and settings.
+# Learn more at https://abstra.io/docs
+.abstra/
+# Visual Studio Code
+#  Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
+#  that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
+#  and can be added to the global gitignore or merged into this file. However, if you prefer,
+#  you could uncomment the following to ignore the entire vscode folder
+# .vscode/
+# Ruff stuff:
+.ruff_cache/
+# PyPI configuration file
+.pypirc
+# Cursor
+#  Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to
+#  exclude from AI features like autocomplete and code analysis. Recommended for sensitive data
+#  refer to https://docs.cursor.com/context/ignore-files
+.cursorignore
+.cursorindexingignore
+# Marimo
+marimo/_static/
+marimo/_lsp/
+__marimo__/

app.py CHANGED Viewed

@@ -1,574 +1,177 @@
 import gradio as gr
 import os
-import requests
-import json
 import traceback
-from transformers import AutoTokenizer
 import torch
 import fitz  # PyMuPDF
 from PIL import Image
 import io
-import base64
-# 전역 변수로 선언
 tokenizer = None
 model = None
 MODEL_LOADED = False
-# .env 파일에서 환경 변수 로드
 try:
     from dotenv import load_dotenv
     load_dotenv()
     print("✅ .env 파일 로드됨")
 except ImportError:
     print("⚠️ python-dotenv가 설치되지 않음, 시스템 환경 변수 사용")
-except Exception as e:
-    print(f"⚠️ .env 파일 로드 실패: {e}")
-# 환경 변수에서만 토큰 가져오기 (보안)
 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_NAME = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")
-print("🔍 상세 디버깅 시작")
-print("=" * 50)
-print(f"1. 환경 변수 확인:")
-print(f"   HF_TOKEN: {'✅ 설정됨' if HF_TOKEN else '❌ 설정되지 않음'}")
-print(f"   MODEL_NAME: {MODEL_NAME}")
-if HF_TOKEN:
-    print(f"   토큰 길이: {len(HF_TOKEN)}")
-    print(f"   토큰 시작: {HF_TOKEN[:10]}...")
-    print(f"   토큰 끝: ...{HF_TOKEN[-10:]}")
-# 모델 로드 (커스텀 모델 클래스 사용)
 try:
-    print(f"\n2. 모델 로딩 시작:")
-    print(f"   모델: {MODEL_NAME}")
-    print(f"   토큰 사용: {'예' if HF_TOKEN else '아니오'}")
     if HF_TOKEN:
-        print("   토크나이저 로딩 중...")
         tokenizer = AutoTokenizer.from_pretrained(
             MODEL_NAME,
             token=HF_TOKEN,
             trust_remote_code=True,
-            use_fast=False
         )
-        print("   ✅ 토크나이저 로딩 완료")
-        print(f"   토크나이저 타입: {type(tokenizer)}")
-        print(f"   토크나이저 hasattr('encode'): {hasattr(tokenizer, 'encode')}")
-        print("   커스텀 모델 로딩 중...")
-        # 커스텀 모델 클래스 import (Space 폴더의 modeling.py 사용)
-        try:
-            from modeling import KananaVForConditionalGeneration
-            print("   ✅ modeling.py import 성공")
-        except Exception as import_error:
-            print(f"   ❌ modeling.py import 실패: {import_error}")
-            raise import_error
-        try:
-            print(f"   모델 로딩 파라미터:")
-            print(f"     MODEL_NAME: {MODEL_NAME}")
-            print(f"     torch_dtype: {torch.float16}")
-            print(f"     trust_remote_code: True")
-            print(f"     device_map: None")
-            print(f"     low_cpu_mem_usage: True")
-            model = KananaVForConditionalGeneration.from_pretrained(
-                MODEL_NAME,
-                token=HF_TOKEN,
-                torch_dtype=torch.float16,
-                trust_remote_code=True,
-                device_map=None,
-                low_cpu_mem_usage=True
-            )
-            print("   ✅ 커스텀 모델 로딩 완료")
-            print(f"   모델 타입: {type(model)}")
-            print(f"   모델 디바이스: {next(model.parameters()).device}")
-        except Exception as model_error:
-            print(f"   ❌ 커스텀 모델 로딩 실패: {model_error}")
-            print(f"   오류 타입: {type(model_error).__name__}")
-            import traceback
-            traceback.print_exc()
-            raise model_error
     else:
-        print("   ⚠️ 토큰이 없어서 공개 모델 사용")
         MODEL_NAME = "microsoft/DialoGPT-medium"
-        print(f"   공개 모델: {MODEL_NAME}")
-        print("   토크나이저 로딩 중...")
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
-        print("   ✅ 토크나이저 로딩 완료")
-        print("   모델 로딩 중...")
-        from transformers import AutoModelForCausalLM
-        model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16)
-        print("   ✅ 모델 로딩 완료")
-    print("✅ 모델 로딩 완료!")
-    MODEL_LOADED = True
 except Exception as e:
-    print(f"❌ 모델 로딩 실패:")
-    print(f"   오류 타입: {type(e).__name__}")
-    print(f"   오류 메시지: {str(e)}")
-    print(f"   상세 오류:")
     traceback.print_exc()
     MODEL_LOADED = False
-print(f"\n3. 최종 상태:")
-print(f"   MODEL_LOADED: {MODEL_LOADED}")
-print(f"   최종 모델명: {MODEL_NAME}")
 def extract_text_from_pdf(pdf_file):
-    """PDF에서 텍스트 추출"""
     try:
         doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
-        text = ""
-        for page in doc:
-            text += page.get_text()
         doc.close()
         return text
     except Exception as e:
-        return f"PDF 읽기 오류: {str(e)}"
-def extract_text_from_image(image_file):
-    """이미지에서 OCR로 텍스트 추출"""
-    try:
-        # PIL로 이미지 열기
-        image = Image.open(image_file)
-        # 간단한 OCR (실제로는 더 정교한 OCR 라이브러리 사용 필요)
-        # 여기서는 이미지 정보만 반환
-        return f"이미지 파일: {image.size[0]}x{image.size[1]} 픽셀"
-    except Exception as e:
-        return f"이미지 읽기 오류: {str(e)}"
 def process_uploaded_file(file):
-    """업로드된 파일 처리"""
     if file is None:
-        return None, None
     file_path = file.name
-    file_extension = file_path.lower().split('.')[-1]
-    if file_extension == 'pdf':
         text_content = extract_text_from_pdf(file)
-        return text_content, None
-    elif file_extension in ['png', 'jpg', 'jpeg']:
-        text_content = extract_text_from_image(file)
-        return text_content, file
     else:
         return f"지원하지 않는 파일 형식: {file_extension}", None
-def chat_with_model(message, history, file=None):
-    global tokenizer, model
-    print(f"🔍 DEBUG: chat_with_model 시작")
-    print(f"   메시지: {message}")
-    print(f"   파일: {file}")
-    print(f"   MODEL_LOADED: {MODEL_LOADED}")
     if not MODEL_LOADED:
-        print("❌ DEBUG: 모델이 로드되지 않음")
-        return "❌ 모델이 로드되지 않았습니다."
     try:
-        print("📁 DEBUG: 파일 처리 시작")
-        # 파일 처리
-        file_content = ""
-        image_file = None
-        if file is not None:
-            print(f"   파일명: {file.name}")
-            text_content, image_file = process_uploaded_file(file)
-            print(f"   텍스트 내용: {text_content[:100] if text_content else 'None'}...")
-            print(f"   이미지 파일: {image_file}")
-            if text_content:
-                file_content = f"\n[업로드된 파일 내용]\n{text_content}\n"
-        # 메시지에 파일 내용 추가
-        full_message = message + file_content
-        print(f"📝 DEBUG: 전체 메시지: {full_message[:200]}...")
-        print("🔤 DEBUG: 토크나이저 처리 시작")
-        print(f"   tokenizer 타입: {type(tokenizer)}")
-        print(f"   tokenizer 값: {tokenizer}")
-        # tokenizer가 올바른지 확인
-        if not hasattr(tokenizer, 'encode') or tokenizer is None or isinstance(tokenizer, bool):
-            print("❌ DEBUG: tokenizer가 올바르지 않음")
-            # tokenizer를 다시 로드
-            print("🔄 DEBUG: tokenizer 재로드 시도")
-            try:
-                tokenizer = AutoTokenizer.from_pretrained(
-                    MODEL_NAME,
-                    token=HF_TOKEN,
-                    trust_remote_code=True,
-                    use_fast=False
-                )
-                print("✅ DEBUG: tokenizer 재로드 성공")
-                print(f"   새로운 tokenizer 타입: {type(tokenizer)}")
-            except Exception as reload_error:
-                print(f"❌ DEBUG: tokenizer 재로드 실패: {reload_error}")
-                return f"토크나이저 오류: {str(reload_error)}"
-        inputs = tokenizer(full_message, return_tensors="pt")
-        print(f"   입력 shape: {inputs['input_ids'].shape}")
-        print(f"   attention_mask shape: {inputs['attention_mask'].shape}")
-        print("🤖 DEBUG: 모델 추론 시작")
-        with torch.no_grad():
-            if image_file is not None:
-                print("🖼️ DEBUG: 이미지 처리 모드")
-                # 이미지가 있는 경우 멀티모달 생성
-                import torchvision.transforms as transforms
-                # 이미지 전처리
-                transform = transforms.Compose([
-                    transforms.Resize((224, 224)),
-                    transforms.ToTensor(),
-                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-                ])
-                pil_image = Image.open(image_file).convert('RGB')
-                pixel_values = transform(pil_image).unsqueeze(0)
-                image_metas = {"vision_grid_thw": torch.tensor([[1, 14, 14]])}  # 기본 그리드 크기
-                print(f"   이미지 shape: {pixel_values.shape}")
-                print(f"   이미지 메타: {image_metas}")
-                # 멀티모달 모델의 forward 메서드 사용
-                print("🔄 DEBUG: 모델 호출 (멀티모달)")
-                try:
-                    outputs = model(
-                        input_ids=inputs["input_ids"],
-                        attention_mask=inputs["attention_mask"],
-                        pixel_values=[pixel_values],
-                        image_metas=image_metas,
-                        max_new_tokens=200,
-                        temperature=0.7,
-                        do_sample=True,
-                        pad_token_id=tokenizer.eos_token_id
-                    )
-                    print("✅ DEBUG: 멀티모달 모델 호출 성공")
-                except Exception as model_error:
-                    print(f"❌ DEBUG: 멀티모달 모델 호출 실패: {model_error}")
-                    print(f"   오류 타입: {type(model_error).__name__}")
-                    raise model_error
-            else:
-                print("📄 DEBUG: 텍스트만 처리 모드")
-                # 텍스트만 생성
-                print("🔄 DEBUG: 모델 호출 (텍스트만)")
-                try:
-                    outputs = model(
-                        input_ids=inputs["input_ids"],
-                        attention_mask=inputs["attention_mask"],
-                        max_new_tokens=200,
-                        temperature=0.7,
-                        do_sample=True,
-                        pad_token_id=tokenizer.eos_token_id
-                    )
-                    print("✅ DEBUG: 텍스트 모델 호출 성공")
-                except Exception as model_error:
-                    print(f"❌ DEBUG: 텍스트 모델 호출 실패: {model_error}")
-                    print(f"   오류 타입: {type(model_error).__name__}")
-                    raise model_error
-        print("🔍 DEBUG: 출력 처리 시작")
-        print(f"   outputs 타입: {type(outputs)}")
-        print(f"   outputs 내용: {outputs}")
-        # outputs가 튜플인 경우 첫 번째 요소 사용
-        if isinstance(outputs, tuple):
-            print("📦 DEBUG: outputs가 튜플임")
-            logits = outputs[0]
-            print(f"   logits shape: {logits.shape}")
         else:
-            print("📦 DEBUG: outputs가 객체임")
-            if hasattr(outputs, 'logits'):
-                logits = outputs.logits
-                print(f"   logits shape: {logits.shape}")
-            else:
-                logits = outputs
-                print(f"   outputs shape: {logits.shape}")
-        print("🎯 DEBUG: 토큰 생성 시작")
-        # 가장 높은 확률의 토큰 선택
-        next_token = torch.argmax(logits[:, -1, :], dim=-1)
-        generated_tokens = [next_token]
-        print(f"   첫 번째 토큰: {next_token.item()}")
-        # 추가 토큰 생성
-        print("🔄 DEBUG: 반복 토큰 생성 시작")
-        for i in range(199):  # max_new_tokens - 1
-            if i % 50 == 0:
-                print(f"   진행률: {i}/199")
-            inputs["input_ids"] = torch.cat([inputs["input_ids"], next_token.unsqueeze(-1)], dim=-1)
-            inputs["attention_mask"] = torch.cat([inputs["attention_mask"], torch.ones_like(next_token.unsqueeze(-1))], dim=-1)
-            with torch.no_grad():
-                try:
-                    outputs = model(**inputs)
-                    if isinstance(outputs, tuple):
-                        logits = outputs[0]
-                    else:
-                        logits = outputs.logits if hasattr(outputs, 'logits') else outputs
-                    next_token = torch.argmax(logits[:, -1, :], dim=-1)
-                    generated_tokens.append(next_token)
-                    if next_token.item() == tokenizer.eos_token_id:
-                        print(f"   EOS 토큰 발견: {i}번째")
-                        break
-                except Exception as loop_error:
-                    print(f"❌ DEBUG: 토큰 생성 루프 오류 (i={i}): {loop_error}")
-                    raise loop_error
-        print("🔤 DEBUG: 토큰 디코딩 시작")
-        # 생성된 토큰들을 디코딩
-        generated_ids = torch.cat(generated_tokens, dim=0)
-        response = tokenizer.decode(generated_ids, skip_special_tokens=True)
-        print(f"   원본 응답: {response[:200]}...")
-        if full_message in response:
-            response = response.replace(full_message, "").strip()
-            print(f"   정리된 응답: {response[:200]}...")
-        print("✅ DEBUG: chat_with_model 완료")
-        return response if response else "죄송합니다. 응답을 생성할 수 없습니다."
-    except Exception as e:
-        print(f"❌ DEBUG: chat_with_model 전체 오류: {e}")
-        print(f"   오류 타입: {type(e).__name__}")
-        import traceback
-        traceback.print_exc()
-        return f"오류 발생: {str(e)}"
-def solve_math_problem(problem, file=None):
-    global tokenizer, model
-    print(f"🔍 DEBUG: solve_math_problem 시작")
-    print(f"   문제: {problem}")
-    print(f"   파일: {file}")
-    print(f"   MODEL_LOADED: {MODEL_LOADED}")
-    if not MODEL_LOADED:
-        print("❌ DEBUG: 모델이 로드되지 않음")
-        return "❌ 모델이 로드되지 않았습니다."
-    try:
-        print("📁 DEBUG: 파일 처리 시작")
-        # 파일 처리
-        file_content = ""
-        image_file = None
-        if file is not None:
-            print(f"   파일명: {file.name}")
-            text_content, image_file = process_uploaded_file(file)
-            print(f"   텍스트 내용: {text_content[:100] if text_content else 'None'}...")
-            print(f"   이미지 파일: {image_file}")
-            if text_content:
-                file_content = f"\n[업로드된 파일 내용]\n{text_content}\n"
-        # 메시지에 파일 내용 추가
-        full_prompt = f"다음 수학 문제를 단계별로 풀어주세요: {problem}{file_content}"
-        print(f"📝 DEBUG: 전체 프롬프트: {full_prompt[:200]}...")
-        print("🔤 DEBUG: 토크나이저 처리 시작")
-        print(f"   tokenizer 타입: {type(tokenizer)}")
-        print(f"   tokenizer 값: {tokenizer}")
-        # tokenizer가 올바른지 확인
-        if not hasattr(tokenizer, 'encode') or tokenizer is None or isinstance(tokenizer, bool):
-            print("❌ DEBUG: tokenizer가 올바르지 않음")
-            # tokenizer를 다시 로드
-            print("🔄 DEBUG: tokenizer 재로드 시도")
-            try:
-                tokenizer = AutoTokenizer.from_pretrained(
-                    MODEL_NAME,
-                    token=HF_TOKEN,
-                    trust_remote_code=True,
-                    use_fast=False
-                )
-                print("✅ DEBUG: tokenizer 재로드 성공")
-                print(f"   새로운 tokenizer 타입: {type(tokenizer)}")
-            except Exception as reload_error:
-                print(f"❌ DEBUG: tokenizer 재로드 실패: {reload_error}")
-                return f"토크나이저 오류: {str(reload_error)}"
-        inputs = tokenizer(full_prompt, return_tensors="pt")
-        print(f"   입력 shape: {inputs['input_ids'].shape}")
-        print(f"   attention_mask shape: {inputs['attention_mask'].shape}")
-        print("🤖 DEBUG: 모델 추론 시작")
         with torch.no_grad():
-            if image_file is not None:
-                print("🖼️ DEBUG: 이미지 처리 모드")
-                # 이미지가 있는 경우 멀티모달 생성
-                import torchvision.transforms as transforms
-                # 이미지 전처리
-                transform = transforms.Compose([
-                    transforms.Resize((224, 224)),
-                    transforms.ToTensor(),
-                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
-                ])
-                pil_image = Image.open(image_file).convert('RGB')
-                pixel_values = transform(pil_image).unsqueeze(0)
-                image_metas = {"vision_grid_thw": torch.tensor([[1, 14, 14]])}  # 기본 그리드 크기
-                print(f"   이미지 shape: {pixel_values.shape}")
-                print(f"   이미지 메타: {image_metas}")
-                # 멀티모달 모델의 forward 메서드 사용
-                print("🔄 DEBUG: 모델 호출 (멀티모달)")
-                try:
-                    outputs = model(
-                        input_ids=inputs["input_ids"],
-                        attention_mask=inputs["attention_mask"],
-                        pixel_values=[pixel_values],
-                        image_metas=image_metas,
-                        max_new_tokens=300,
-                        temperature=0.3,
-                        do_sample=True,
-                        pad_token_id=tokenizer.eos_token_id
-                    )
-                    print("✅ DEBUG: 멀티모달 모델 호출 성공")
-                except Exception as model_error:
-                    print(f"❌ DEBUG: 멀티모달 모델 호출 실패: {model_error}")
-                    print(f"   오류 타입: {type(model_error).__name__}")
-                    raise model_error
-            else:
-                print("📄 DEBUG: 텍스트만 처리 모드")
-                # 텍스트만 생성
-                print("🔄 DEBUG: 모델 호출 (텍스트만)")
-                try:
-                    outputs = model(
-                        input_ids=inputs["input_ids"],
-                        attention_mask=inputs["attention_mask"],
-                        max_new_tokens=300,
-                        temperature=0.3,
-                        do_sample=True,
-                        pad_token_id=tokenizer.eos_token_id
-                    )
-                    print("✅ DEBUG: 텍스트 모델 호출 성공")
-                except Exception as model_error:
-                    print(f"❌ DEBUG: 텍스트 모델 호출 실패: {model_error}")
-                    print(f"   오류 타입: {type(model_error).__name__}")
-                    raise model_error
-        print("🔍 DEBUG: 출력 처리 시작")
-        print(f"   outputs 타입: {type(outputs)}")
-        print(f"   outputs 내용: {outputs}")
-        # outputs가 튜플인 경우 첫 번째 요소 사용
-        if isinstance(outputs, tuple):
-            print("📦 DEBUG: outputs가 튜플임")
-            logits = outputs[0]
-            print(f"   logits shape: {logits.shape}")
-        else:
-            print("📦 DEBUG: outputs가 객체임")
-            if hasattr(outputs, 'logits'):
-                logits = outputs.logits
-                print(f"   logits shape: {logits.shape}")
-            else:
-                logits = outputs
-                print(f"   outputs shape: {logits.shape}")
-        print("🎯 DEBUG: 토큰 생성 시작")
-        # 가장 높은 확률의 토큰 선택
-        next_token = torch.argmax(logits[:, -1, :], dim=-1)
-        generated_tokens = [next_token]
-        print(f"   첫 번째 토큰: {next_token.item()}")
-        # 추가 토큰 생성
-        print("🔄 DEBUG: 반복 토큰 생성 시작")
-        for i in range(299):  # max_new_tokens - 1
-            if i % 50 == 0:
-                print(f"   진행률: {i}/299")
-            inputs["input_ids"] = torch.cat([inputs["input_ids"], next_token.unsqueeze(-1)], dim=-1)
-            inputs["attention_mask"] = torch.cat([inputs["attention_mask"], torch.ones_like(next_token.unsqueeze(-1))], dim=-1)
-            with torch.no_grad():
-                try:
-                    outputs = model(**inputs)
-                    if isinstance(outputs, tuple):
-                        logits = outputs[0]
-                    else:
-                        logits = outputs.logits if hasattr(outputs, 'logits') else outputs
-                    next_token = torch.argmax(logits[:, -1, :], dim=-1)
-                    generated_tokens.append(next_token)
-                    if next_token.item() == tokenizer.eos_token_id:
-                        print(f"   EOS 토큰 발견: {i}번째")
-                        break
-                except Exception as loop_error:
-                    print(f"❌ DEBUG: 토큰 생성 루프 오류 (i={i}): {loop_error}")
-                    raise loop_error
-        print("🔤 DEBUG: 토큰 디코딩 시작")
-        # 생성된 토큰들을 디코딩
-        generated_ids = torch.cat(generated_tokens, dim=0)
-        response = tokenizer.decode(generated_ids, skip_special_tokens=True)
-        print(f"   원본 응답: {response[:200]}...")
-        if full_prompt in response:
-            response = response.replace(full_prompt, "").strip()
-            print(f"   정리된 응답: {response[:200]}...")
-        print("✅ DEBUG: solve_math_problem 완료")
-        return response if response else "죄송합니다. 수학 문제를 풀 수 없습니다."
     except Exception as e:
-        print(f"❌ DEBUG: solve_math_problem 전체 오류: {e}")
-        print(f"   오류 타입: {type(e).__name__}")
-        import traceback
         traceback.print_exc()
-        return f"오류 발생: {str(e)}"
 with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🧮 Lily Math RAG System")
-    gr.Markdown("수학 문제 해결을 위한 AI 시스템입니다.")
     with gr.Tabs():
         with gr.Tab("💬 채팅"):
             with gr.Row():
-                with gr.Column(scale=3):
-                    chatbot = gr.Chatbot(height=400, type="messages")
-                    msg = gr.Textbox(label="메시지를 입력하세요", placeholder="안녕하세요! 수학 문제를 도와주세요.", lines=2)
-                    clear = gr.Button("대화 초기화")
-                with gr.Column(scale=1):
-                    gr.Markdown("### 📁 파일 업로드")
-                    file_input = gr.File(label="PDF/이미지 파일 (선택사항)", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
-                    gr.Markdown("PDF나 이미지 파일을 업로드하면 문서를 해석하여 답변합니다.")
             def respond(message, chat_history, file):
-                bot_message = chat_with_model(message, chat_history, file)
                 chat_history.append({"role": "user", "content": message})
                 chat_history.append({"role": "assistant", "content": bot_message})
                 return "", chat_history
             msg.submit(respond, [msg, chatbot, file_input], [msg, chatbot])
-            clear.click(lambda: None, None, chatbot, queue=False)
-        with gr.Tab("🧮 수학 문제 해결"):
-            with gr.Row():
-                with gr.Column(scale=2):
-                    math_input = gr.Textbox(label="수학 문제", placeholder="예: 2x + 5 = 13", lines=3)
-                    solve_btn = gr.Button("문제 풀기", variant="primary")
-                with gr.Column(scale=1):
-                    gr.Markdown("### 📁 파일 업로드")
-                    math_file_input = gr.File(label="수학 문제 파일 (선택사항)", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
-                    gr.Markdown("수학 문제 PDF나 이미지를 업로드하면 더 정확한 답변을 받을 수 있습니다.")
-                with gr.Column(scale=2):
-                    math_output = gr.Textbox(label="해답", lines=8, interactive=False)
-            solve_btn.click(solve_math_problem, [math_input, math_file_input], math_output)
-        with gr.Tab("⚙️ 설정"):
-            gr.Markdown("## 시스템 정보")
-            gr.Markdown(f"**모델**: {MODEL_NAME}")
-            gr.Markdown(f"**모델 상태**: {'✅ 로드됨' if MODEL_LOADED else '❌ 로드 실패'}")
-            gr.Markdown(f"**토큰 상태**: {'✅ 설정됨' if HF_TOKEN else '❌ 설정되지 않음'}")
-            gr.Markdown("**버전**: 3.0.0 (멀티모달)")
-            gr.Markdown("**기능**: 텍스트 + 이미지 멀티모달 대화")
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import os
 import traceback
+from transformers import AutoTokenizer, AutoModelForCausalLM
 import torch
 import fitz  # PyMuPDF
 from PIL import Image
 import io
+# --- 1. 전역 변수 및 환경 설정 ---
 tokenizer = None
 model = None
 MODEL_LOADED = False
+# .env 파일에서 환경 변수 로드 (주로 로컬에서 사용)
 try:
     from dotenv import load_dotenv
     load_dotenv()
     print("✅ .env 파일 로드됨")
 except ImportError:
     print("⚠️ python-dotenv가 설치되지 않음, 시스템 환경 변수 사용")
+# 환경 변수에서 토큰 및 모델 이름 가져오기
 HF_TOKEN = os.getenv("HF_TOKEN")
 MODEL_NAME = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")
+print(f"🔍 모델: {MODEL_NAME}")
+print(f"🔍 HF 토큰: {'✅ 설정됨'if HF_TOKEN else '❌ 설정되지 않음'}")
+# --- 2. 핵심 로직: 모델 및 토크나이저 로딩 ---
 try:
+    print("🔧 모델 및 토크나이저 로딩 시작...")
+    # 커스텀 모델 클래스 import
+    from modeling import KananaVForConditionalGeneration
     if HF_TOKEN:
         tokenizer = AutoTokenizer.from_pretrained(
             MODEL_NAME,
             token=HF_TOKEN,
+            trust_remote_code=True
+        )
+        model = KananaVForConditionalGeneration.from_pretrained(
+            MODEL_NAME,
+            token=HF_TOKEN,
+            torch_dtype=torch.float16,
             trust_remote_code=True,
+            device_map="auto"  # GPU 자동 할당 (서버 환경에 필수)
         )
+        MODEL_LOADED = True
+        print("✅ 커스텀 모델 로딩 완료!")
     else:
+        print("⚠️ HF 토큰이 없어 공개 모델(DialoGPT)로 대체합니다.")
         MODEL_NAME = "microsoft/DialoGPT-medium"
         tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
+        MODEL_LOADED = True
 except Exception as e:
+    print(f"❌ 모델 로딩 실패: {e}")
     traceback.print_exc()
     MODEL_LOADED = False
+# --- 3. 파일 처리 유틸리티 ---
 def extract_text_from_pdf(pdf_file):
     try:
         doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
+        text = "".join(page.get_text() for page in doc)
         doc.close()
         return text
     except Exception as e:
+        print(f"PDF 처리 오류: {e}")
+        return f"PDF 파일을 읽는 중 오류가 발생했습니다: {e}"
 def process_uploaded_file(file):
+    """업로드된 파일을 텍스트와 이미지 객체로 분리"""
     if file is None:
+        return "", None  # 텍스트, 이미지 없음
     file_path = file.name
+    file_extension = os.path.splitext(file_path)[1].lower()
+    if file_extension == '.pdf':
         text_content = extract_text_from_pdf(file)
+        return text_content, None # PDF는 텍스트만, 이미지는 없음
+    elif file_extension in ['.png', '.jpg', '.jpeg']:
+        image = Image.open(file).convert('RGB')
+        # 이미지 파일 자체를 반환 (OCR 대신 멀티모달 입력으로 사용)
+        return "업로드된 이미지가 있습니다.", image
     else:
         return f"지원하지 않는 파일 형식: {file_extension}", None
+# --- 4. 핵심 로직: 통합 응답 생성 함수 ---
+def generate_response(prompt_template: str, message: str, file: Optional = None):
+    """텍스트와 이미지를 모두 처리하는 통합 응답 생성 함수"""
     if not MODEL_LOADED:
+        return "❌ 모델이 로드되지 않았습니다. 관리자에게 문의하세요."
     try:
+        # 1. 파일 처리
+        file_text, pil_image = process_uploaded_file(file)
+        # 2. 전체 프롬프트 구성
+        full_message = message
+        if file_text:
+            full_message += f"\n\n[첨부 파일 내용]\n{file_text}"
+        full_prompt = prompt_template.format(message=full_message)
+        # 3. 토크나이저로 텍스트 입력 변환
+        inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
+        # 4. 생성 파라미터 준비
+        generation_args = {
+            "max_new_tokens": 512,
+            "temperature": 0.7,
+            "do_sample": True,
+            "pad_token_id": tokenizer.eos_token_id
+        }
+        # 5. 이미지가 있는 경우, 멀티모달 입력 추가
+        if pil_image:
+            print("🖼️ 이미지 포함, 멀티모달 모드로 생성")
+            # KananaV 모델에 맞는 형태로 이미지 전처리
+            # (모델의 요구사항에 따라 이 부분은 달라질 수 있습니다)
+            pixel_values = model.vision_model.image_processor(pil_image, return_tensors='pt')['pixel_values']
+            generation_args["pixel_values"] = pixel_values.to(model.device, dtype=torch.float16)
         else:
+            print("📄 텍스트만으로 생성")
+        # 6. 모델을 통해 응답 생성 (단 한 번의 올바른 호출)
         with torch.no_grad():
+            outputs = model.generate(**inputs, **generation_args)
+        # 7. 생성된 토큰 ID를 텍스트로 디코딩
+        # 입력 프롬프트 부분을 제외하고 순수한 답변만 추출
+        input_length = inputs["input_ids"].shape[1]
+        response_ids = outputs[0][input_length:]
+        response = tokenizer.decode(response_ids, skip_special_tokens=True).strip()
+        return response
     except Exception as e:
+        print(f"❌ 응답 생성 중 오류 발생: {e}")
         traceback.print_exc()
+        return f"오류가 발생했습니다: {e}"
+# --- 5. Gradio UI 및 실행 ---
 with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🧮 Lily Math RAG System")
+    gr.Markdown("수학 문제 해결 및 멀티모달 대화를 위한 AI 시스템입니다.")
     with gr.Tabs():
         with gr.Tab("💬 채팅"):
+            chat_prompt = "<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+            chatbot = gr.Chatbot(height=500, label="대화창", type="messages")
             with gr.Row():
+                with gr.Column(scale=4):
+                    msg = gr.Textbox(label="메시지", placeholder="이미지나 PDF를 첨부하고 질문해보세요!", lines=3, show_label=False)
+                with gr.Column(scale=1, min_width=150):
+                    file_input = gr.File(label="파일 업로드", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
             def respond(message, chat_history, file):
+                bot_message = generate_response(chat_prompt, message, file)
                 chat_history.append({"role": "user", "content": message})
                 chat_history.append({"role": "assistant", "content": bot_message})
                 return "", chat_history
             msg.submit(respond, [msg, chatbot, file_input], [msg, chatbot])
+        with gr.Tab("⚙️ 시스템 정보"):
+            gr.Markdown(f"**모델**: `{MODEL_NAME}`")
+            gr.Markdown(f"**모델 상태**: `{'✅ 로드됨' if MODEL_LOADED else '❌ 로드 실패'}`")
 if __name__ == "__main__":
+    # share=True를 사용하면 외부에서도 접속 가능한 공개 링크가 생성됩니다.
+    demo.launch(share=True)

app_250807_0427.py ADDED Viewed

	@@ -0,0 +1,574 @@

+import gradio as gr
+import os
+import requests
+import json
+import traceback
+from transformers import AutoTokenizer
+import torch
+import fitz  # PyMuPDF
+from PIL import Image
+import io
+import base64
+# 전역 변수로 선언
+tokenizer = None
+model = None
+MODEL_LOADED = False
+# .env 파일에서 환경 변수 로드
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+    print("✅ .env 파일 로드됨")
+except ImportError:
+    print("⚠️ python-dotenv가 설치되지 않음, 시스템 환경 변수 사용")
+except Exception as e:
+    print(f"⚠️ .env 파일 로드 실패: {e}")
+# 환경 변수에서만 토큰 가져오기 (보안)
+HF_TOKEN = os.getenv("HF_TOKEN")
+MODEL_NAME = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")
+print("🔍 상세 디버깅 시작")
+print("=" * 50)
+print(f"1. 환경 변수 확인:")
+print(f"   HF_TOKEN: {'✅ 설정됨' if HF_TOKEN else '❌ 설정되지 않음'}")
+print(f"   MODEL_NAME: {MODEL_NAME}")
+if HF_TOKEN:
+    print(f"   토큰 길이: {len(HF_TOKEN)}")
+    print(f"   토큰 시작: {HF_TOKEN[:10]}...")
+    print(f"   토큰 끝: ...{HF_TOKEN[-10:]}")
+# 모델 로드 (커스텀 모델 클래스 사용)
+try:
+    print(f"\n2. 모델 로딩 시작:")
+    print(f"   모델: {MODEL_NAME}")
+    print(f"   토큰 사용: {'예' if HF_TOKEN else '아니오'}")
+    if HF_TOKEN:
+        print("   토크나이저 로딩 중...")
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_NAME,
+            token=HF_TOKEN,
+            trust_remote_code=True,
+            use_fast=False
+        )
+        print("   ✅ 토크나이저 로딩 완료")
+        print(f"   토크나이저 타입: {type(tokenizer)}")
+        print(f"   토크나이저 hasattr('encode'): {hasattr(tokenizer, 'encode')}")
+        print("   커스텀 모델 로딩 중...")
+        # 커스텀 모델 클래스 import (Space 폴더의 modeling.py 사용)
+        try:
+            from modeling import KananaVForConditionalGeneration
+            print("   ✅ modeling.py import 성공")
+        except Exception as import_error:
+            print(f"   ❌ modeling.py import 실패: {import_error}")
+            raise import_error
+        try:
+            print(f"   모델 로딩 파라미터:")
+            print(f"     MODEL_NAME: {MODEL_NAME}")
+            print(f"     torch_dtype: {torch.float16}")
+            print(f"     trust_remote_code: True")
+            print(f"     device_map: None")
+            print(f"     low_cpu_mem_usage: True")
+            model = KananaVForConditionalGeneration.from_pretrained(
+                MODEL_NAME,
+                token=HF_TOKEN,
+                torch_dtype=torch.float16,
+                trust_remote_code=True,
+                device_map=None,
+                low_cpu_mem_usage=True
+            )
+            print("   ✅ 커스텀 모델 로딩 완료")
+            print(f"   모델 타입: {type(model)}")
+            print(f"   모델 디바이스: {next(model.parameters()).device}")
+        except Exception as model_error:
+            print(f"   ❌ 커스텀 모델 로딩 실패: {model_error}")
+            print(f"   오류 타입: {type(model_error).__name__}")
+            import traceback
+            traceback.print_exc()
+            raise model_error
+    else:
+        print("   ⚠️ 토큰이 없어서 공개 모델 사용")
+        MODEL_NAME = "microsoft/DialoGPT-medium"
+        print(f"   공개 모델: {MODEL_NAME}")
+        print("   토크나이저 로딩 중...")
+        tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+        print("   ✅ 토크나이저 로딩 완료")
+        print("   모델 로딩 중...")
+        from transformers import AutoModelForCausalLM
+        model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16)
+        print("   ✅ 모델 로딩 완료")
+    print("✅ 모델 로딩 완료!")
+    MODEL_LOADED = True
+except Exception as e:
+    print(f"❌ 모델 로딩 실패:")
+    print(f"   오류 타입: {type(e).__name__}")
+    print(f"   오류 메시지: {str(e)}")
+    print(f"   상세 오류:")
+    traceback.print_exc()
+    MODEL_LOADED = False
+print(f"\n3. 최종 상태:")
+print(f"   MODEL_LOADED: {MODEL_LOADED}")
+print(f"   최종 모델명: {MODEL_NAME}")
+def extract_text_from_pdf(pdf_file):
+    """PDF에서 텍스트 추출"""
+    try:
+        doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
+        text = ""
+        for page in doc:
+            text += page.get_text()
+        doc.close()
+        return text
+    except Exception as e:
+        return f"PDF 읽기 오류: {str(e)}"
+def extract_text_from_image(image_file):
+    """이미지에서 OCR로 텍스트 추출"""
+    try:
+        # PIL로 이미지 열기
+        image = Image.open(image_file)
+        # 간단한 OCR (실제로는 더 정교한 OCR 라이브러리 사용 필요)
+        # 여기서는 이미지 정보만 반환
+        return f"이미지 파일: {image.size[0]}x{image.size[1]} 픽셀"
+    except Exception as e:
+        return f"이미지 읽기 오류: {str(e)}"
+def process_uploaded_file(file):
+    """업로드된 파일 처리"""
+    if file is None:
+        return None, None
+    file_path = file.name
+    file_extension = file_path.lower().split('.')[-1]
+    if file_extension == 'pdf':
+        text_content = extract_text_from_pdf(file)
+        return text_content, None
+    elif file_extension in ['png', 'jpg', 'jpeg']:
+        text_content = extract_text_from_image(file)
+        return text_content, file
+    else:
+        return f"지원하지 않는 파일 형식: {file_extension}", None
+def chat_with_model(message, history, file=None):
+    global tokenizer, model
+    print(f"🔍 DEBUG: chat_with_model 시작")
+    print(f"   메시지: {message}")
+    print(f"   파일: {file}")
+    print(f"   MODEL_LOADED: {MODEL_LOADED}")
+    if not MODEL_LOADED:
+        print("❌ DEBUG: 모델이 로드되지 않음")
+        return "❌ 모델이 로드되지 않았습니다."
+    try:
+        print("📁 DEBUG: 파일 처리 시작")
+        # 파일 처리
+        file_content = ""
+        image_file = None
+        if file is not None:
+            print(f"   파일명: {file.name}")
+            text_content, image_file = process_uploaded_file(file)
+            print(f"   텍스트 내용: {text_content[:100] if text_content else 'None'}...")
+            print(f"   이미지 파일: {image_file}")
+            if text_content:
+                file_content = f"\n[업로드된 파일 내용]\n{text_content}\n"
+        # 메시지에 파일 내용 추가
+        full_message = message + file_content
+        print(f"📝 DEBUG: 전체 메시지: {full_message[:200]}...")
+        print("🔤 DEBUG: 토크나이저 처리 시작")
+        print(f"   tokenizer 타입: {type(tokenizer)}")
+        print(f"   tokenizer 값: {tokenizer}")
+        # tokenizer가 올바른지 확인
+        if not hasattr(tokenizer, 'encode') or tokenizer is None or isinstance(tokenizer, bool):
+            print("❌ DEBUG: tokenizer가 올바르지 않음")
+            # tokenizer를 다시 로드
+            print("🔄 DEBUG: tokenizer 재로드 시도")
+            try:
+                tokenizer = AutoTokenizer.from_pretrained(
+                    MODEL_NAME,
+                    token=HF_TOKEN,
+                    trust_remote_code=True,
+                    use_fast=False
+                )
+                print("✅ DEBUG: tokenizer 재로드 성공")
+                print(f"   새로운 tokenizer 타입: {type(tokenizer)}")
+            except Exception as reload_error:
+                print(f"❌ DEBUG: tokenizer 재로드 실패: {reload_error}")
+                return f"토크나이저 오류: {str(reload_error)}"
+        inputs = tokenizer(full_message, return_tensors="pt")
+        print(f"   입력 shape: {inputs['input_ids'].shape}")
+        print(f"   attention_mask shape: {inputs['attention_mask'].shape}")
+        print("🤖 DEBUG: 모델 추론 시작")
+        with torch.no_grad():
+            if image_file is not None:
+                print("🖼️ DEBUG: 이미지 처리 모드")
+                # 이미지가 있는 경우 멀티모달 생성
+                import torchvision.transforms as transforms
+                # 이미지 전처리
+                transform = transforms.Compose([
+                    transforms.Resize((224, 224)),
+                    transforms.ToTensor(),
+                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+                ])
+                pil_image = Image.open(image_file).convert('RGB')
+                pixel_values = transform(pil_image).unsqueeze(0)
+                image_metas = {"vision_grid_thw": torch.tensor([[1, 14, 14]])}  # 기본 그리드 크기
+                print(f"   이미지 shape: {pixel_values.shape}")
+                print(f"   이미지 메타: {image_metas}")
+                # 멀티모달 모델의 forward 메서드 사용
+                print("🔄 DEBUG: 모델 호출 (멀티모달)")
+                try:
+                    outputs = model(
+                        input_ids=inputs["input_ids"],
+                        attention_mask=inputs["attention_mask"],
+                        pixel_values=[pixel_values],
+                        image_metas=image_metas,
+                        max_new_tokens=200,
+                        temperature=0.7,
+                        do_sample=True,
+                        pad_token_id=tokenizer.eos_token_id
+                    )
+                    print("✅ DEBUG: 멀티모달 모델 호출 성공")
+                except Exception as model_error:
+                    print(f"❌ DEBUG: 멀티모달 모델 호출 실패: {model_error}")
+                    print(f"   오류 타입: {type(model_error).__name__}")
+                    raise model_error
+            else:
+                print("📄 DEBUG: 텍스트만 처리 모드")
+                # 텍스트만 생성
+                print("🔄 DEBUG: 모델 호출 (텍스트만)")
+                try:
+                    outputs = model(
+                        input_ids=inputs["input_ids"],
+                        attention_mask=inputs["attention_mask"],
+                        max_new_tokens=200,
+                        temperature=0.7,
+                        do_sample=True,
+                        pad_token_id=tokenizer.eos_token_id
+                    )
+                    print("✅ DEBUG: 텍스트 모델 호출 성공")
+                except Exception as model_error:
+                    print(f"❌ DEBUG: 텍스트 모델 호출 실패: {model_error}")
+                    print(f"   오류 타입: {type(model_error).__name__}")
+                    raise model_error
+        print("🔍 DEBUG: 출력 처리 시작")
+        print(f"   outputs 타입: {type(outputs)}")
+        print(f"   outputs 내용: {outputs}")
+        # outputs가 튜플인 경우 첫 번째 요소 사용
+        if isinstance(outputs, tuple):
+            print("📦 DEBUG: outputs가 튜플임")
+            logits = outputs[0]
+            print(f"   logits shape: {logits.shape}")
+        else:
+            print("📦 DEBUG: outputs가 객체임")
+            if hasattr(outputs, 'logits'):
+                logits = outputs.logits
+                print(f"   logits shape: {logits.shape}")
+            else:
+                logits = outputs
+                print(f"   outputs shape: {logits.shape}")
+        print("🎯 DEBUG: 토큰 생성 시작")
+        # 가장 높은 확률의 토큰 선택
+        next_token = torch.argmax(logits[:, -1, :], dim=-1)
+        generated_tokens = [next_token]
+        print(f"   첫 번째 토큰: {next_token.item()}")
+        # 추가 토큰 생성
+        print("🔄 DEBUG: 반복 토큰 생성 시작")
+        for i in range(199):  # max_new_tokens - 1
+            if i % 50 == 0:
+                print(f"   진행률: {i}/199")
+            inputs["input_ids"] = torch.cat([inputs["input_ids"], next_token.unsqueeze(-1)], dim=-1)
+            inputs["attention_mask"] = torch.cat([inputs["attention_mask"], torch.ones_like(next_token.unsqueeze(-1))], dim=-1)
+            with torch.no_grad():
+                try:
+                    outputs = model(**inputs)
+                    if isinstance(outputs, tuple):
+                        logits = outputs[0]
+                    else:
+                        logits = outputs.logits if hasattr(outputs, 'logits') else outputs
+                    next_token = torch.argmax(logits[:, -1, :], dim=-1)
+                    generated_tokens.append(next_token)
+                    if next_token.item() == tokenizer.eos_token_id:
+                        print(f"   EOS 토큰 발견: {i}번째")
+                        break
+                except Exception as loop_error:
+                    print(f"❌ DEBUG: 토큰 생성 루프 오류 (i={i}): {loop_error}")
+                    raise loop_error
+        print("🔤 DEBUG: 토큰 디코딩 시작")
+        # 생성된 토큰들을 디코딩
+        generated_ids = torch.cat(generated_tokens, dim=0)
+        response = tokenizer.decode(generated_ids, skip_special_tokens=True)
+        print(f"   원본 응답: {response[:200]}...")
+        if full_message in response:
+            response = response.replace(full_message, "").strip()
+            print(f"   정리된 응답: {response[:200]}...")
+        print("✅ DEBUG: chat_with_model 완료")
+        return response if response else "죄송합니다. 응답을 생성할 수 없습니다."
+    except Exception as e:
+        print(f"❌ DEBUG: chat_with_model 전체 오류: {e}")
+        print(f"   오류 타입: {type(e).__name__}")
+        import traceback
+        traceback.print_exc()
+        return f"오류 발생: {str(e)}"
+def solve_math_problem(problem, file=None):
+    global tokenizer, model
+    print(f"🔍 DEBUG: solve_math_problem 시작")
+    print(f"   문제: {problem}")
+    print(f"   파일: {file}")
+    print(f"   MODEL_LOADED: {MODEL_LOADED}")
+    if not MODEL_LOADED:
+        print("❌ DEBUG: 모델이 로드되지 않음")
+        return "❌ 모델이 로드되지 않았습니다."
+    try:
+        print("📁 DEBUG: 파일 처리 시작")
+        # 파일 처리
+        file_content = ""
+        image_file = None
+        if file is not None:
+            print(f"   파일명: {file.name}")
+            text_content, image_file = process_uploaded_file(file)
+            print(f"   텍스트 내용: {text_content[:100] if text_content else 'None'}...")
+            print(f"   이미지 파일: {image_file}")
+            if text_content:
+                file_content = f"\n[업로드된 파일 내용]\n{text_content}\n"
+        # 메시지에 파일 내용 추가
+        full_prompt = f"다음 수학 문제를 단계별로 풀어주세요: {problem}{file_content}"
+        print(f"📝 DEBUG: 전체 프롬프트: {full_prompt[:200]}...")
+        print("🔤 DEBUG: 토크나이저 처리 시작")
+        print(f"   tokenizer 타입: {type(tokenizer)}")
+        print(f"   tokenizer 값: {tokenizer}")
+        # tokenizer가 올바른지 확인
+        if not hasattr(tokenizer, 'encode') or tokenizer is None or isinstance(tokenizer, bool):
+            print("❌ DEBUG: tokenizer가 올바르지 않음")
+            # tokenizer를 다시 로드
+            print("🔄 DEBUG: tokenizer 재로드 시도")
+            try:
+                tokenizer = AutoTokenizer.from_pretrained(
+                    MODEL_NAME,
+                    token=HF_TOKEN,
+                    trust_remote_code=True,
+                    use_fast=False
+                )
+                print("✅ DEBUG: tokenizer 재로드 성공")
+                print(f"   새로운 tokenizer 타입: {type(tokenizer)}")
+            except Exception as reload_error:
+                print(f"❌ DEBUG: tokenizer 재로드 실패: {reload_error}")
+                return f"토크나이저 오류: {str(reload_error)}"
+        inputs = tokenizer(full_prompt, return_tensors="pt")
+        print(f"   입력 shape: {inputs['input_ids'].shape}")
+        print(f"   attention_mask shape: {inputs['attention_mask'].shape}")
+        print("🤖 DEBUG: 모델 추론 시작")
+        with torch.no_grad():
+            if image_file is not None:
+                print("🖼️ DEBUG: 이미지 처리 모드")
+                # 이미지가 있는 경우 멀티모달 생성
+                import torchvision.transforms as transforms
+                # 이미지 전처리
+                transform = transforms.Compose([
+                    transforms.Resize((224, 224)),
+                    transforms.ToTensor(),
+                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+                ])
+                pil_image = Image.open(image_file).convert('RGB')
+                pixel_values = transform(pil_image).unsqueeze(0)
+                image_metas = {"vision_grid_thw": torch.tensor([[1, 14, 14]])}  # 기본 그리드 크기
+                print(f"   이미지 shape: {pixel_values.shape}")
+                print(f"   이미지 메타: {image_metas}")
+                # 멀티모달 모델의 forward 메서드 사용
+                print("🔄 DEBUG: 모델 호출 (멀티모달)")
+                try:
+                    outputs = model(
+                        input_ids=inputs["input_ids"],
+                        attention_mask=inputs["attention_mask"],
+                        pixel_values=[pixel_values],
+                        image_metas=image_metas,
+                        max_new_tokens=300,
+                        temperature=0.3,
+                        do_sample=True,
+                        pad_token_id=tokenizer.eos_token_id
+                    )
+                    print("✅ DEBUG: 멀티모달 모델 호출 성공")
+                except Exception as model_error:
+                    print(f"❌ DEBUG: 멀티모달 모델 호출 실패: {model_error}")
+                    print(f"   오류 타입: {type(model_error).__name__}")
+                    raise model_error
+            else:
+                print("📄 DEBUG: 텍스트만 처리 모드")
+                # 텍스트만 생성
+                print("🔄 DEBUG: 모델 호출 (텍스트만)")
+                try:
+                    outputs = model(
+                        input_ids=inputs["input_ids"],
+                        attention_mask=inputs["attention_mask"],
+                        max_new_tokens=300,
+                        temperature=0.3,
+                        do_sample=True,
+                        pad_token_id=tokenizer.eos_token_id
+                    )
+                    print("✅ DEBUG: 텍스트 모델 호출 성공")
+                except Exception as model_error:
+                    print(f"❌ DEBUG: 텍스트 모델 호출 실패: {model_error}")
+                    print(f"   오류 타입: {type(model_error).__name__}")
+                    raise model_error
+        print("🔍 DEBUG: 출력 처리 시작")
+        print(f"   outputs 타입: {type(outputs)}")
+        print(f"   outputs 내용: {outputs}")
+        # outputs가 튜플인 경우 첫 번째 요소 사용
+        if isinstance(outputs, tuple):
+            print("📦 DEBUG: outputs가 튜플임")
+            logits = outputs[0]
+            print(f"   logits shape: {logits.shape}")
+        else:
+            print("📦 DEBUG: outputs가 객체임")
+            if hasattr(outputs, 'logits'):
+                logits = outputs.logits
+                print(f"   logits shape: {logits.shape}")
+            else:
+                logits = outputs
+                print(f"   outputs shape: {logits.shape}")
+        print("🎯 DEBUG: 토큰 생성 시작")
+        # 가장 높은 확률의 토큰 선택
+        next_token = torch.argmax(logits[:, -1, :], dim=-1)
+        generated_tokens = [next_token]
+        print(f"   첫 번째 토큰: {next_token.item()}")
+        # 추가 토큰 생성
+        print("🔄 DEBUG: 반복 토큰 생성 시작")
+        for i in range(299):  # max_new_tokens - 1
+            if i % 50 == 0:
+                print(f"   진행률: {i}/299")
+            inputs["input_ids"] = torch.cat([inputs["input_ids"], next_token.unsqueeze(-1)], dim=-1)
+            inputs["attention_mask"] = torch.cat([inputs["attention_mask"], torch.ones_like(next_token.unsqueeze(-1))], dim=-1)
+            with torch.no_grad():
+                try:
+                    outputs = model(**inputs)
+                    if isinstance(outputs, tuple):
+                        logits = outputs[0]
+                    else:
+                        logits = outputs.logits if hasattr(outputs, 'logits') else outputs
+                    next_token = torch.argmax(logits[:, -1, :], dim=-1)
+                    generated_tokens.append(next_token)
+                    if next_token.item() == tokenizer.eos_token_id:
+                        print(f"   EOS 토큰 발견: {i}번째")
+                        break
+                except Exception as loop_error:
+                    print(f"❌ DEBUG: 토큰 생성 루프 오류 (i={i}): {loop_error}")
+                    raise loop_error
+        print("🔤 DEBUG: 토큰 디코딩 시작")
+        # 생성된 토큰들을 디코딩
+        generated_ids = torch.cat(generated_tokens, dim=0)
+        response = tokenizer.decode(generated_ids, skip_special_tokens=True)
+        print(f"   원본 응답: {response[:200]}...")
+        if full_prompt in response:
+            response = response.replace(full_prompt, "").strip()
+            print(f"   정리된 응답: {response[:200]}...")
+        print("✅ DEBUG: solve_math_problem 완료")
+        return response if response else "죄송합니다. 수학 문제를 풀 수 없습니다."
+    except Exception as e:
+        print(f"❌ DEBUG: solve_math_problem 전체 오류: {e}")
+        print(f"   오류 타입: {type(e).__name__}")
+        import traceback
+        traceback.print_exc()
+        return f"오류 발생: {str(e)}"
+with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🧮 Lily Math RAG System")
+    gr.Markdown("수학 문제 해결을 위한 AI 시스템입니다.")
+    with gr.Tabs():
+        with gr.Tab("💬 채팅"):
+            with gr.Row():
+                with gr.Column(scale=3):
+                    chatbot = gr.Chatbot(height=400, type="messages")
+                    msg = gr.Textbox(label="메시지를 입력하세요", placeholder="안녕하세요! 수학 문제를 도와주세요.", lines=2)
+                    clear = gr.Button("대화 초기화")
+                with gr.Column(scale=1):
+                    gr.Markdown("### 📁 파일 업로드")
+                    file_input = gr.File(label="PDF/이미지 파일 (선택사항)", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
+                    gr.Markdown("PDF나 이미지 파일을 업로드하면 문서를 해석하여 답변합니다.")
+            def respond(message, chat_history, file):
+                bot_message = chat_with_model(message, chat_history, file)
+                chat_history.append({"role": "user", "content": message})
+                chat_history.append({"role": "assistant", "content": bot_message})
+                return "", chat_history
+            msg.submit(respond, [msg, chatbot, file_input], [msg, chatbot])
+            clear.click(lambda: None, None, chatbot, queue=False)
+        with gr.Tab("🧮 수학 문제 해결"):
+            with gr.Row():
+                with gr.Column(scale=2):
+                    math_input = gr.Textbox(label="수학 문제", placeholder="예: 2x + 5 = 13", lines=3)
+                    solve_btn = gr.Button("문제 풀기", variant="primary")
+                with gr.Column(scale=1):
+                    gr.Markdown("### 📁 파일 업로드")
+                    math_file_input = gr.File(label="수학 문제 파일 (선택사항)", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
+                    gr.Markdown("수학 문제 PDF나 이미지를 업로드하면 더 정확한 답변을 받을 수 있습니다.")
+                with gr.Column(scale=2):
+                    math_output = gr.Textbox(label="해답", lines=8, interactive=False)
+            solve_btn.click(solve_math_problem, [math_input, math_file_input], math_output)
+        with gr.Tab("⚙️ 설정"):
+            gr.Markdown("## ��스템 정보")
+            gr.Markdown(f"**모델**: {MODEL_NAME}")
+            gr.Markdown(f"**모델 상태**: {'✅ 로드됨' if MODEL_LOADED else '❌ 로드 실패'}")
+            gr.Markdown(f"**토큰 상태**: {'✅ 설정됨' if HF_TOKEN else '❌ 설정되지 않음'}")
+            gr.Markdown("**버전**: 3.0.0 (멀티모달)")
+            gr.Markdown("**기능**: 텍스트 + 이미지 멀티모달 대화")
+if __name__ == "__main__":
+    demo.launch()

app_local.py ADDED Viewed

	@@ -0,0 +1,245 @@

+import gradio as gr
+import os
+import traceback
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from typing import Optional
+import fitz  # PyMuPDF
+from PIL import Image
+import io
+# --- 1. 전역 변수 및 환경 설정 ---
+# 전역 변수 선언
+tokenizer = None
+model = None
+MODEL_LOADED = False
+# 환경 감지 (로컬 개발 환경인지, 서버인지 확인)
+IS_LOCAL = os.path.exists('../.env') or 'LOCAL_TEST' in os.environ
+print(f"🔍 환경: {'로컬' if IS_LOCAL else '서버'}")
+# .env 파일에서 환경 변수 로드 (주로 로컬에서 사용)
+try:
+    from dotenv import load_dotenv
+    if IS_LOCAL:
+        load_dotenv(dotenv_path='../.env')
+        print("✅ .env 파일 로드됨")
+except ImportError:
+    print("⚠️ python-dotenv가 설치되지 않음")
+# 환경 변수에서 토큰 및 모델 이름 가져오기
+HF_TOKEN = os.getenv("HF_TOKEN")
+MODEL_NAME_SERVER = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")
+MODEL_PATH_LOCAL = "../lily_llm_core/models/kanana-1.5-v-3b-instruct"
+# 최종 모델 경로 설정
+MODEL_PATH = MODEL_PATH_LOCAL if IS_LOCAL else MODEL_NAME_SERVER
+print(f"🔍 모델 경로: {MODEL_PATH}")
+print(f"🔍 HF 토큰: {'✅ 설정됨' if HF_TOKEN else '❌ 설정되지 않음'}")
+# --- 2. 핵심 로직: 모델 및 토크나이저 로딩 ---
+def load_model_and_tokenizer():
+    """환경에 맞게 모델과 토크나이저를 로드하는 통합 함수"""
+    global tokenizer, model # 전역 변수 사용 선언
+    print("🔧 모델 및 토크나이저 로딩 시작...")
+    # modeling.py는 커스텀 모델을 사용할 때만 import
+    from modeling import KananaVForConditionalGeneration
+    if IS_LOCAL:
+        # 로컬 파일 시스템에서 모델 로드
+        if not os.path.exists(MODEL_PATH):
+            raise FileNotFoundError(f"로컬 모델 경로를 찾을 수 없습니다: {MODEL_PATH}")
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_PATH,
+            trust_remote_code=True,
+            local_files_only=True # 로컬 파일만 사용
+        )
+        model = KananaVForConditionalGeneration.from_pretrained(
+            MODEL_PATH,
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            local_files_only=True
+        )
+    else:
+        # Hugging Face Hub에서 모델 로드
+        if not HF_TOKEN:
+             # 토큰이 없으면 공개된 모델로 대체 (선택적)
+            print("⚠️ HF 토큰이 없어 공개 모델(DialoGPT)로 대체합니다.")
+            public_model = "microsoft/DialoGPT-medium"
+            tokenizer = AutoTokenizer.from_pretrained(public_model)
+            model = AutoModelForCausalLM.from_pretrained(public_model, torch_dtype=torch.float16)
+            return
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_PATH,
+            token=HF_TOKEN,
+            trust_remote_code=True
+        )
+        model = KananaVForConditionalGeneration.from_pretrained(
+            MODEL_PATH,
+            token=HF_TOKEN,
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            device_map="auto" # GPU 자동 할당
+        )
+    print("✅ 모델 및 토크나이저 로딩 성공!")
+# --- 3. 파일 처리 유틸리티 ---
+def extract_text_from_pdf(pdf_file):
+    """PDF에서 텍스트 추출"""
+    try:
+        doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
+        text = "".join(page.get_text() for page in doc)
+        doc.close()
+        return f"\n\n--- PDF 내용 ---\n{text}\n--- PDF 내용 끝 ---"
+    except Exception as e:
+        return f"PDF 읽기 오류: {e}"
+def extract_text_from_image(image_file):
+    """이미지에서 OCR로 텍스트 추출 (현재는 파일 정보만 반환)"""
+    try:
+        # 실제 OCR 라이브러리(예: Tesseract) 연동 필요
+        image = Image.open(image_file)
+        return f"\n\n--- 이미지 파일 정보: {image.format}, {image.size[0]}x{image.size[1]} ---"
+    except Exception as e:
+        return f"이미지 읽기 오류: {e}"
+def process_uploaded_file(file):
+    """업로드된 파일을 종류에 맞게 처리"""
+    if file is None:
+        return ""
+    file_path = file.name
+    file_extension = os.path.splitext(file_path)[1].lower()
+    if file_extension == '.pdf':
+        return extract_text_from_pdf(file)
+    elif file_extension in ['.png', '.jpg', '.jpeg']:
+        # TODO: 멀티모달 모델을 위한 이미지 전처리 로직 추가
+        return extract_text_from_image(file)
+    else:
+        return f"지원하지 않는 파일 형식: {file_extension}"
+# --- 4. 핵심 로직: 응답 생성 함수 ---
+def generate_response(prompt_template: str, message: str, file: Optional = None):
+    """통합된 응답 생성 함수"""
+    if not MODEL_LOADED:
+        return "❌ 모델이 로드되지 않았습니다. 앱을 재시작해주세요."
+    try:
+        print("✍️ 응답 생성 시작...")
+        # 1. 파일 내용 처리
+        file_content = process_uploaded_file(file)
+        # 2. 전체 프롬프트 구성
+        full_message = message + file_content
+        full_prompt = prompt_template.format(message=full_message)
+        print(f"📝 전체 프롬프트 (일부): {full_prompt[:200]}...")
+        # 3. 토크나이저로 입력 변환
+        inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
+        # 4. 모델을 통해 응답 생성 (단 한 번의 호출)
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=512,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True,
+                pad_token_id=tokenizer.eos_token_id,
+                eos_token_id=tokenizer.eos_token_id
+            )
+        # 5. 생성된 토큰 ID를 텍스트로 디코딩
+        response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # 6. 입력 프롬프트를 응답에서 제거하여 순수한 답변만 추출
+        assistant_response = response_text.split("<|im_start|>assistant\n")[-1].strip()
+        print(f"💬 생성된 응답 (일부): {assistant_response[:200]}...")
+        print("✅ 응답 생성 완료")
+        return assistant_response if assistant_response else "죄송합니다. 답변을 생성할 수 없습니다."
+    except Exception as e:
+        print(f"❌ 응답 생성 중 오류 발생: {e}")
+        traceback.print_exc()
+        return f"오류가 발생했습니다: {e}"
+# --- 5. Gradio UI 설정 ---
+with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🧮 Lily Math RAG System")
+    gr.Markdown("수학 문제 해결 및 일반 대화를 위한 AI 시스템입니다.")
+    with gr.Tabs():
+        with gr.Tab("💬 일반 채팅"):
+            # 채팅 프롬프트 템플릿
+            chat_prompt = "<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+            with gr.Row():
+                with gr.Column(scale=3):
+                    chatbot = gr.Chatbot(height=500, label="대화창", type="messages")
+                    msg = gr.Textbox(label="메시지", placeholder="안녕하세요! 무엇을 도와드릴까요?", lines=3)
+                    clear = gr.Button("새 대화 시작")
+                with gr.Column(scale=1):
+                    gr.Markdown("### 📁 파일 업로드 (선택)")
+                    chat_file = gr.File(label="PDF/이미지 파일", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
+            def respond(message, chat_history, file):
+                bot_message = generate_response(chat_prompt, message, file)
+                chat_history.append({"role": "user", "content": message})
+                chat_history.append({"role": "assistant", "content": bot_message})
+                return "", chat_history
+            msg.submit(respond, [msg, chatbot, chat_file], [msg, chatbot])
+            clear.click(lambda: None, None, chatbot, queue=False)
+        with gr.Tab("🧮 수학 문제 해결"):
+            # 수학 문제 풀이용 프롬프트 템플릿
+            math_prompt = "다음 수학 문제를 단계별로 상세히 풀어주세요:\n\n{message}\n\n<|im_start|>assistant\n"
+            with gr.Row():
+                with gr.Column(scale=2):
+                    math_input = gr.Textbox(label="수학 문제", placeholder="예: 2x + 5 = 13", lines=5)
+                    gr.Markdown("### 📁 문제 파일 업로드 (선택)")
+                    math_file = gr.File(label="PDF/이미지 형식의 문제", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
+                    solve_btn = gr.Button("문제 풀기", variant="primary")
+                with gr.Column(scale=2):
+                    math_output = gr.Textbox(label="풀이 과정 및 정답", lines=10, interactive=False)
+            solve_btn.click(lambda msg, file: generate_response(math_prompt, msg, file), [math_input, math_file], math_output)
+        with gr.Tab("⚙️ 시스템 정보"):
+            gr.Markdown(f"**모델 경로**: `{MODEL_PATH}`")
+            gr.Markdown(f"**모델 상태**: `{'✅ 로드됨' if MODEL_LOADED else '❌ 로드 실패'}`")
+            gr.Markdown(f"**실행 환경**: `{'로컬' if IS_LOCAL else '서버'}`")
+# --- 6. 애플리케이션 실행 ---
+if __name__ == "__main__":
+    try:
+        load_model_and_tokenizer()
+        MODEL_LOADED = True
+    except Exception as e:
+        print(f"❌ 최종 실행 실패: 애플리케이션 시작에 필요한 모델을 로드하지 못했습니다.")
+        print(f"오류: {e}")
+        traceback.print_exc()
+        MODEL_LOADED = False
+    demo.launch(server_name="localhost", server_port=8006)

app_local_250807_0427.py ADDED Viewed

	@@ -0,0 +1,245 @@

+import gradio as gr
+import os
+import traceback
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from typing import Optional
+import fitz  # PyMuPDF
+from PIL import Image
+import io
+# --- 1. 전역 변수 및 환경 설정 ---
+# 전역 변수 선언
+tokenizer = None
+model = None
+MODEL_LOADED = False
+# 환경 감지 (로컬 개발 환경인지, 서버인지 확인)
+IS_LOCAL = os.path.exists('../.env') or 'LOCAL_TEST' in os.environ
+print(f"🔍 환경: {'로컬' if IS_LOCAL else '서버'}")
+# .env 파일에서 환경 변수 로드 (주로 로컬에서 사용)
+try:
+    from dotenv import load_dotenv
+    if IS_LOCAL:
+        load_dotenv(dotenv_path='../.env')
+        print("✅ .env 파일 로드됨")
+except ImportError:
+    print("⚠️ python-dotenv가 설치되지 않음")
+# 환경 변수에서 토큰 및 모델 이름 가져오기
+HF_TOKEN = os.getenv("HF_TOKEN")
+MODEL_NAME_SERVER = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")
+MODEL_PATH_LOCAL = "../lily_llm_core/models/kanana-1.5-v-3b-instruct"
+# 최종 모델 경로 설정
+MODEL_PATH = MODEL_PATH_LOCAL if IS_LOCAL else MODEL_NAME_SERVER
+print(f"🔍 모델 경로: {MODEL_PATH}")
+print(f"🔍 HF 토큰: {'✅ 설정됨' if HF_TOKEN else '❌ 설정되지 않음'}")
+# --- 2. 핵심 로직: 모델 및 토크나이저 로딩 ---
+def load_model_and_tokenizer():
+    """환경에 맞게 모델과 토크나이저를 로드하는 통합 함수"""
+    global tokenizer, model # 전역 변수 사용 선언
+    print("🔧 모델 및 토크나이저 로딩 시작...")
+    # modeling.py는 커스텀 모델을 사용할 때만 import
+    from modeling import KananaVForConditionalGeneration
+    if IS_LOCAL:
+        # 로컬 파일 시스템에서 모델 로드
+        if not os.path.exists(MODEL_PATH):
+            raise FileNotFoundError(f"로컬 모델 경로를 찾을 수 없습니다: {MODEL_PATH}")
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_PATH,
+            trust_remote_code=True,
+            local_files_only=True # 로컬 파일만 사용
+        )
+        model = KananaVForConditionalGeneration.from_pretrained(
+            MODEL_PATH,
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            local_files_only=True
+        )
+    else:
+        # Hugging Face Hub에서 모델 로드
+        if not HF_TOKEN:
+             # 토큰이 없으면 공개된 모델로 대체 (선택적)
+            print("⚠️ HF 토큰이 없어 공개 모델(DialoGPT)로 대체합니다.")
+            public_model = "microsoft/DialoGPT-medium"
+            tokenizer = AutoTokenizer.from_pretrained(public_model)
+            model = AutoModelForCausalLM.from_pretrained(public_model, torch_dtype=torch.float16)
+            return
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_PATH,
+            token=HF_TOKEN,
+            trust_remote_code=True
+        )
+        model = KananaVForConditionalGeneration.from_pretrained(
+            MODEL_PATH,
+            token=HF_TOKEN,
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            device_map="auto" # GPU 자동 할당
+        )
+    print("✅ 모델 및 토크나이저 로딩 성공!")
+# --- 3. 파일 처리 유틸리티 ---
+def extract_text_from_pdf(pdf_file):
+    """PDF에서 텍스트 추출"""
+    try:
+        doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
+        text = "".join(page.get_text() for page in doc)
+        doc.close()
+        return f"\n\n--- PDF 내용 ---\n{text}\n--- PDF 내용 끝 ---"
+    except Exception as e:
+        return f"PDF 읽기 오류: {e}"
+def extract_text_from_image(image_file):
+    """이미지에서 OCR로 텍스트 추출 (현재는 파일 정보만 반환)"""
+    try:
+        # 실제 OCR 라이브러리(예: Tesseract) 연동 필요
+        image = Image.open(image_file)
+        return f"\n\n--- 이미지 파일 정보: {image.format}, {image.size[0]}x{image.size[1]} ---"
+    except Exception as e:
+        return f"이미지 읽기 오류: {e}"
+def process_uploaded_file(file):
+    """업로드된 파일을 종류에 맞게 처리"""
+    if file is None:
+        return ""
+    file_path = file.name
+    file_extension = os.path.splitext(file_path)[1].lower()
+    if file_extension == '.pdf':
+        return extract_text_from_pdf(file)
+    elif file_extension in ['.png', '.jpg', '.jpeg']:
+        # TODO: 멀티모달 모델을 위한 이미지 전처리 로직 추가
+        return extract_text_from_image(file)
+    else:
+        return f"지원하지 않는 파일 형식: {file_extension}"
+# --- 4. 핵심 로직: 응답 생성 함수 ---
+def generate_response(prompt_template: str, message: str, file: Optional = None):
+    """통합된 응답 생성 함수"""
+    if not MODEL_LOADED:
+        return "❌ 모델이 로드되지 않았습니다. 앱을 재시작해주세요."
+    try:
+        print("✍️ 응답 생성 시작...")
+        # 1. 파일 내용 처리
+        file_content = process_uploaded_file(file)
+        # 2. 전체 프롬프트 구성
+        full_message = message + file_content
+        full_prompt = prompt_template.format(message=full_message)
+        print(f"📝 전체 프롬프트 (일부): {full_prompt[:200]}...")
+        # 3. 토크나이저로 입력 변환
+        inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
+        # 4. 모델을 통해 응답 생성 (단 한 번의 호출)
+        with torch.no_grad():
+            outputs = model.generate(
+                **inputs,
+                max_new_tokens=512,
+                temperature=0.7,
+                top_p=0.9,
+                do_sample=True,
+                pad_token_id=tokenizer.eos_token_id,
+                eos_token_id=tokenizer.eos_token_id
+            )
+        # 5. 생성된 토큰 ID를 텍스트로 디코딩
+        response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        # 6. 입력 프롬프트를 응답에서 제거하여 순수한 답변만 추출
+        assistant_response = response_text.split("<|im_start|>assistant\n")[-1].strip()
+        print(f"💬 생성된 응답 (일부): {assistant_response[:200]}...")
+        print("✅ 응답 생성 완료")
+        return assistant_response if assistant_response else "죄송합니다. 답변을 생성할 수 없습니다."
+    except Exception as e:
+        print(f"❌ 응답 생성 중 오류 발생: {e}")
+        traceback.print_exc()
+        return f"오류가 발생했습니다: {e}"
+# --- 5. Gradio UI 설정 ---
+with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🧮 Lily Math RAG System")
+    gr.Markdown("수학 문제 해결 및 일반 대화를 위한 AI 시스템입니다.")
+    with gr.Tabs():
+        with gr.Tab("💬 일반 채팅"):
+            # 채팅 프롬프트 템플릿
+            chat_prompt = "<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+            with gr.Row():
+                with gr.Column(scale=3):
+                    chatbot = gr.Chatbot(height=500, label="대화창", type="messages")
+                    msg = gr.Textbox(label="메시지", placeholder="안녕하세요! 무엇을 도와드릴까요?", lines=3)
+                    clear = gr.Button("새 대화 시작")
+                with gr.Column(scale=1):
+                    gr.Markdown("### 📁 파일 업로드 (선택)")
+                    chat_file = gr.File(label="PDF/이미지 파일", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
+            def respond(message, chat_history, file):
+                bot_message = generate_response(chat_prompt, message, file)
+                chat_history.append({"role": "user", "content": message})
+                chat_history.append({"role": "assistant", "content": bot_message})
+                return "", chat_history
+            msg.submit(respond, [msg, chatbot, chat_file], [msg, chatbot])
+            clear.click(lambda: None, None, chatbot, queue=False)
+        with gr.Tab("🧮 수학 문제 해결"):
+            # 수학 문제 풀이용 프롬프트 템플릿
+            math_prompt = "다음 수학 문제를 단계별로 상세히 풀어주세요:\n\n{message}\n\n<|im_start|>assistant\n"
+            with gr.Row():
+                with gr.Column(scale=2):
+                    math_input = gr.Textbox(label="수학 문제", placeholder="예: 2x + 5 = 13", lines=5)
+                    gr.Markdown("### 📁 문제 파일 업로드 (선택)")
+                    math_file = gr.File(label="PDF/이미지 형식의 문제", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
+                    solve_btn = gr.Button("문제 풀기", variant="primary")
+                with gr.Column(scale=2):
+                    math_output = gr.Textbox(label="풀이 과정 및 정답", lines=10, interactive=False)
+            solve_btn.click(lambda msg, file: generate_response(math_prompt, msg, file), [math_input, math_file], math_output)
+        with gr.Tab("⚙️ 시스템 정보"):
+            gr.Markdown(f"**모델 경로**: `{MODEL_PATH}`")
+            gr.Markdown(f"**모델 상태**: `{'✅ 로드됨' if MODEL_LOADED else '❌ 로드 실패'}`")
+            gr.Markdown(f"**실행 환경**: `{'로컬' if IS_LOCAL else '서버'}`")
+# --- 6. 애플리케이션 실행 ---
+if __name__ == "__main__":
+    try:
+        load_model_and_tokenizer()
+        MODEL_LOADED = True
+    except Exception as e:
+        print(f"❌ 최종 실행 실패: 애플리케이션 시작에 필요한 모델을 로드하지 못했습니다.")
+        print(f"오류: {e}")
+        traceback.print_exc()
+        MODEL_LOADED = False
+    demo.launch(server_name="localhost", server_port=8006)

test_input.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import os
+from gradio_client import Client, file
+# --- 설정 ---
+# 로컬 Gradio 서버 주소 (app.py 실행 시 터미널에 표시되는 주소)
+SERVER_URL = "http://localhost:8006/"
+def run_chat_test(client):
+    """일반 채팅 탭의 기능을 테스트합니다."""
+    print("\n--- 💬 일반 채팅 테스트 시작 ---")
+    test_message = "안녕하세요! 오늘 날씨는 어떤가요?"
+    chat_history = []  # 초기 대화 내역은 비어있음
+    print(f"보내는 메시지: '{test_message}'")
+    # `respond` 함수 호출 (API 엔드포인트 인덱스: 0)
+    # 입력: (메시지, 채팅 내역, 파일)
+    # 출력: (비워진 텍스트 박스, 갱신된 채팅 내역)
+    result = client.predict(
+        test_message,
+        chat_history,
+        None,  # 파일 없음
+        fn_index=0
+    )
+    # 갱신된 채팅 내역에서 마지막 응답(봇 메시지)을 추출
+    updated_history = result[1]
+    bot_response = updated_history[-1]['content']
+    print("✅ 테스트 성공!")
+    print(f"🤖 받은 응답: '{bot_response}'")
+def run_math_test(client):
+    """수학 문제 해결 탭의 기능을 테스트합니다."""
+    print("\n--- 🧮 수학 문제 해결 테스트 시작 ---")
+    test_problem = "두 개의 연속된 짝수의 합이 34일 때, 두 짝수는 무엇인가요?"
+    print(f"보내는 문제: '{test_problem}'")
+    # 수학 문제 해결 함수 호출 (API 엔드포인트 인덱스: 1)
+    # 입력: (수학 문제, 파일)
+    # 출력: (결과 텍스트)
+    result = client.predict(
+        test_problem,
+        None,  # 파일 없음
+        fn_index=1
+    )
+    print("✅ 테스트 성공!")
+    print(f"🤖 받은 응답 (일부): '{result[:200]}...'")
+def run_file_test(client):
+    """파일 업로드 기능을 테스트합니다."""
+    print("\n--- 📁 파일 업로드 채팅 테스트 시작 ---")
+    # 테스트용 임시 텍스트 파일 생성
+    temp_file_path = "test_document.txt"
+    with open(temp_file_path, "w", encoding="utf-8") as f:
+        f.write("이 파일은 테스트를 위해 생성되었습니다.\n")
+        f.write("파일의 핵심 내용은 '대한민국의 수도는 서울이다' 입니다.")
+    print(f"업로드할 파일: '{temp_file_path}'")
+    test_message = "업로드한 파일의 핵심 내용이 뭐야?"
+    print(f"보내는 메시지: '{test_message}'")
+    # `file()` 함수를 사용하여 파일을 서버에 업로드 가능한 형태로 변환
+    result = client.predict(
+        test_message,
+        [], # 채팅 내역 없음
+        file(temp_file_path),
+        fn_index=0
+    )
+    # 임시 파일 삭제
+    os.remove(temp_file_path)
+    bot_response = result[1][-1]['content']
+    print("✅ 테스트 성공!")
+    print(f"🤖 받은 응답: '{bot_response}'")
+if __name__ == "__main__":
+    print(f"Gradio 서버({SERVER_URL})에 연결을 시도합니다...")
+    try:
+        # 서버에 클라이언트로 연결
+        client = Client(SERVER_URL, verbose=False)
+        print("✅ 서버 연결 성공!")
+        # 테스트 실행
+        run_chat_test(client)
+        run_math_test(client)
+        # run_file_test(client) # 파일 테스트는 필요시 주석 해제하여 사용
+    except Exception as e:
+        print(f"\n❌ 테스트 실패: 서버에 연결할 수 없거나 오류가 발생했습니다.")
+        print("먼저 다른 터미널에서 'python app.py'를 실행했는지 확인해주세요.")
+        print(f"오류 상세 정보: {e}")

test_text.py ADDED Viewed

	@@ -0,0 +1,100 @@

+import os
+from gradio_client import Client, file
+# --- 설정 ---
+# 로컬 Gradio 서버 주소 (app.py 실행 시 터미널에 표시되는 주소)
+SERVER_URL = "http://localhost:8006/"
+def run_chat_test(client):
+    """일반 채팅 탭의 기능을 테스트합니다."""
+    print("\n--- 💬 일반 채팅 테스트 시작 ---")
+    test_message = "안녕하세요! 오늘 날씨는 어떤가요?"
+    chat_history = []  # 초기 대화 내역은 비어있음
+    print(f"보내는 메시지: '{test_message}'")
+    # `respond` 함수 호출 (API 엔드포인트 인덱스: 0)
+    # 입력: (메시지, 채팅 내역, 파일)
+    # 출력: (비워진 텍스트 박스, 갱신된 채팅 내역)
+    result = client.predict(
+        test_message,
+        chat_history,
+        None,  # 파일 없음
+        fn_index=0
+    )
+    # 갱신된 채팅 내역에서 마지막 응답(봇 메시지)을 추출
+    updated_history = result[1]
+    bot_response = updated_history[-1]['content']
+    print("✅ 테스트 성공!")
+    print(f"🤖 받은 응답: '{bot_response}'")
+def run_math_test(client):
+    """수학 문제 해결 탭의 기능을 테스트합니다."""
+    print("\n--- 🧮 수학 문제 해결 테스트 시작 ---")
+    test_problem = "두 개의 연속된 짝수의 합이 34일 때, 두 짝수는 무엇인가요?"
+    print(f"보내는 문제: '{test_problem}'")
+    # 수학 문제 해결 함수 호출 (API 엔드포인트 인덱스: 1)
+    # 입력: (수학 문제, 파일)
+    # 출력: (결과 텍스트)
+    result = client.predict(
+        test_problem,
+        None,  # 파일 없음
+        fn_index=1
+    )
+    print("✅ 테스트 성공!")
+    print(f"🤖 받은 응답 (일부): '{result[:200]}...'")
+def run_file_test(client):
+    """파일 업로드 기능을 테스트합니다."""
+    print("\n--- 📁 파일 업로드 채팅 테스트 시작 ---")
+    # 테스트용 임시 텍스트 파일 생성
+    temp_file_path = "test_document.txt"
+    with open(temp_file_path, "w", encoding="utf-8") as f:
+        f.write("이 파일은 테스트를 위해 생성되었습니다.\n")
+        f.write("파일의 핵심 내용은 '대한민국의 수도는 서울이다' 입니다.")
+    print(f"업로드할 파일: '{temp_file_path}'")
+    test_message = "업로드한 파일의 핵심 내용이 뭐야?"
+    print(f"보내는 메시지: '{test_message}'")
+    # `file()` 함수를 사용하여 파일을 서버에 업로드 가능한 형태로 변환
+    result = client.predict(
+        test_message,
+        [], # 채팅 내역 없음
+        file(temp_file_path),
+        fn_index=0
+    )
+    # 임시 파일 삭제
+    os.remove(temp_file_path)
+    bot_response = result[1][-1]['content']
+    print("✅ 테스트 성공!")
+    print(f"🤖 받은 응답: '{bot_response}'")
+if __name__ == "__main__":
+    print(f"Gradio 서버({SERVER_URL})에 연결을 시도합니다...")
+    try:
+        # 서버에 클라이언트로 연결
+        client = Client(SERVER_URL, verbose=False)
+        print("✅ 서버 연결 성공!")
+        # 테스트 실행
+        run_chat_test(client)
+        run_math_test(client)
+        # run_file_test(client) # 파일 테스트는 필요시 주석 해제하여 사용
+    except Exception as e:
+        print(f"\n❌ 테스트 실패: 서버에 연결할 수 없거나 오류가 발생했습니다.")
+        print("먼저 다른 터미널에서 'python app.py'를 실행했는지 확인해주세요.")
+        print(f"오류 상세 정보: {e}")

test_tokenizer.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import os
+import traceback
+from typing import Optional
+from transformers import AutoTokenizer
+import torch
+# 환경 변수 로드
+try:
+    from dotenv import load_dotenv
+    load_dotenv()
+    print("✅ .env 파일 로드됨")
+except ImportError:
+    print("⚠️ python-dotenv가 설치되지 않음")
+HF_TOKEN = os.getenv("HF_TOKEN")
+# 환경 감지
+IS_LOCAL = os.path.exists('../.env') or 'LOCAL_TEST' in os.environ
+print(f"🔍 환경: {'로컬' if IS_LOCAL else '서버'}")
+# 환경에 따른 모델 경로 설정
+if IS_LOCAL:
+    # 로컬 모델 경로 (hearth_llm_model 폴더 사용)
+    MODEL_PATH = "../lily_llm_core/models/kanana-1.5-v-3b-instruct"
+    print(f"🔍 로컬 모델 경로: {MODEL_PATH}")
+    print(f"🔍 경로 존재: {os.path.exists(MODEL_PATH)}")
+else:
+    # 서버에서는 Hugging Face 모델 사용
+    MODEL_PATH = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")
+    print(f"🔍 서버 모델: {MODEL_PATH}")
+print(f"🔍 토큰: {'✅ 설정됨' if HF_TOKEN else '❌ 설정되지 않음'}")
+# 토크나이저 테스트
+print("\n🔧 토크나이저 테스트 시작...")
+try:
+    print("📤 토크나이저 로딩 중...")
+    print(f"   MODEL_PATH: {MODEL_PATH}")
+    print(f"   IS_LOCAL: {IS_LOCAL}")
+    print(f"   trust_remote_code: True")
+    print(f"   use_fast: False")
+    if IS_LOCAL:
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_PATH,
+            trust_remote_code=True,
+        )
+    else:
+        tokenizer = AutoTokenizer.from_pretrained(
+            MODEL_PATH,
+            token=HF_TOKEN,
+            trust_remote_code=True,
+        )
+    print(f"✅ 토크나이저 로딩 완료")
+    print(f"   타입: {type(tokenizer)}")
+    print(f"   값: {tokenizer}")
+    print(f"   hasattr('encode'): {hasattr(tokenizer, 'encode')}")
+    print(f"   hasattr('__call__'): {hasattr(tokenizer, '__call__')}")
+    # 토크나이저 테스트
+    test_input = "안녕하세요"
+    print(f"\n🔤 토크나이저 테스트: '{test_input}'")
+    test_tokens = tokenizer(test_input, return_tensors="pt")
+    print(f"   ✅ 토크나이저 호출 성공")
+    print(f"   input_ids shape: {test_tokens['input_ids'].shape}")
+    print(f"   attention_mask shape: {test_tokens['attention_mask'].shape}")
+    # 디코딩 테스트
+    decoded = tokenizer.decode(test_tokens['input_ids'][0], skip_special_tokens=True)
+    print(f"   디코딩 결과: '{decoded}'")
+except Exception as e:
+    print(f"❌ 토크나이저 테스트 실패: {e}")
+    print(f"   오류 타입: {type(e).__name__}")
+    traceback.print_exc()
+# 모델 테스트
+print("\n🔧 모델 테스트 시작...")
+try:
+    print("📤 모델 로딩 중...")
+    from modeling import KananaVForConditionalGeneration
+    if IS_LOCAL:
+        model = KananaVForConditionalGeneration.from_pretrained(
+            MODEL_PATH,
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            device_map=None,
+            low_cpu_mem_usage=True
+        )
+    else:
+        model = KananaVForConditionalGeneration.from_pretrained(
+            MODEL_PATH,
+            token=HF_TOKEN,
+            torch_dtype=torch.float16,
+            trust_remote_code=True,
+            device_map=None,
+            low_cpu_mem_usage=True
+        )
+    print(f"✅ 모델 로딩 완료")
+    # print(f"   타입: {type(model)}")
+    # print(f"   디바이스: {next(model.parameters()).device}")
+    # 모델 테스트
+    test_input = "안녕하세요"
+    formatted_prompt = f"<|im_start|>user\n{test_input}<|im_end|>\n<|im_start|>assistant\n"
+    max_length: Optional[int] = None
+    inputs = tokenizer(
+        formatted_prompt,
+        return_tensors="pt",
+        padding=True,
+        truncation=True,
+        max_length=512
+    )
+    print(f"\n🤖 모델 추론 테스트: '{test_input}'")
+    # Kanana용 생성 설정
+    max_new_tokens = max_length or 100
+    with torch.no_grad():
+        outputs = model.generate(
+            input_ids=inputs["input_ids"],
+            attention_mask=inputs["attention_mask"],
+            max_new_tokens=max_new_tokens,
+            repetition_penalty=1.1,
+            no_repeat_ngram_size=2,
+            pad_token_id=tokenizer.eos_token_id,
+            eos_token_id=tokenizer.eos_token_id,
+            use_cache=True
+        )
+    print(f"   ✅ 모델 호출 성공")
+    print(f"   outputs 타입: {type(outputs)}")
+    print(f"   outputs shape: {outputs.shape}")
+    # 디코딩 테스트
+    # model.generate()의 출력은 전체 시퀀스이므로 바로 디코딩합니다.
+    # outputs[0]은 배치 중 첫 번째 결과를 의미합니다.
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # 입력 프롬프트를 응답에서 제거 (선택사항)
+    assistant_response = response.split("<|im_start|>assistant\n")[-1]
+    print(f"   생성된 전체 텍스트: '{response}'")
+    print(f"   어시스턴트 응답: '{assistant_response.strip()}'")
+except Exception as e:
+    print(f"❌ 모델 테스트 실패: {e}")
+    print(f"   오류 타입: {type(e).__name__}")
+    traceback.print_exc()
+print("\n✅ 테스트 완료!")