Spaces:

gbrabbit
/

lily-math-rag

Sleeping

App Files Files Community

gbrabbit commited on Aug 6, 2025

Commit

0553b33

1 Parent(s): 5e29010

Auto commit at 07-2025-08 1:02:24

Browse files

Files changed (2) hide show

app.py +318 -107
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -5,6 +5,10 @@ import json
 import traceback
 from transformers import AutoTokenizer
 import torch
 # .env 파일에서 환경 변수 로드
 try:
@@ -48,17 +52,38 @@ try:
         print("   커스텀 모델 로딩 중...")
         # 커스텀 모델 클래스 import (Space 폴더의 modeling.py 사용)
-        from modeling import KananaVForConditionalGeneration
-        model = KananaVForConditionalGeneration.from_pretrained(
-            MODEL_NAME,
-            token=HF_TOKEN,
-            torch_dtype=torch.float16,
-            trust_remote_code=True,
-            device_map=None,
-            low_cpu_mem_usage=True
-        )
-        print("   ✅ 커스텀 모델 로딩 완료")
     else:
         print("   ⚠️ 토큰이 없어서 공개 모델 사용")
         MODEL_NAME = "microsoft/DialoGPT-medium"
@@ -88,16 +113,84 @@ print(f"\n3. 최종 상태:")
 print(f"   MODEL_LOADED: {MODEL_LOADED}")
 print(f"   최종 모델명: {MODEL_NAME}")
-def chat_with_model(message, history, image=None):
     if not MODEL_LOADED:
         return "❌ 모델이 로드되지 않았습니다."
     try:
-        inputs = tokenizer(message, return_tensors="pt")
         with torch.no_grad():
-            if image is not None:
                 # 이미지가 있는 경우 멀티모달 생성
-                from PIL import Image
                 import torchvision.transforms as transforms
                 # 이미지 전처리
@@ -107,85 +200,157 @@ def chat_with_model(message, history, image=None):
                     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                 ])
-                if isinstance(image, str):
-                    pil_image = Image.open(image).convert('RGB')
-                else:
-                    pil_image = image.convert('RGB')
                 pixel_values = transform(pil_image).unsqueeze(0)
                 image_metas = {"vision_grid_thw": torch.tensor([[1, 14, 14]])}  # 기본 그리드 크기
                 # 멀티모달 모델의 forward 메서드 사용
-                outputs = model(
-                    input_ids=inputs["input_ids"],
-                    attention_mask=inputs["attention_mask"],
-                    pixel_values=[pixel_values],
-                    image_metas=image_metas,
-                    max_new_tokens=200,
-                    temperature=0.7,
-                    do_sample=True,
-                    pad_token_id=tokenizer.eos_token_id
-                )
             else:
-                # 이미지가 없는 경우 텍스트만 생성
-                outputs = model(
-                    input_ids=inputs["input_ids"],
-                    attention_mask=inputs["attention_mask"],
-                    max_new_tokens=200,
-                    temperature=0.7,
-                    do_sample=True,
-                    pad_token_id=tokenizer.eos_token_id
-                )
         # outputs가 튜플인 경우 첫 번째 요소 사용
         if isinstance(outputs, tuple):
             logits = outputs[0]
         else:
-            logits = outputs.logits if hasattr(outputs, 'logits') else outputs
         # 가장 높은 확률의 토큰 선택
         next_token = torch.argmax(logits[:, -1, :], dim=-1)
         generated_tokens = [next_token]
         # 추가 토큰 생성
-        for _ in range(199):  # max_new_tokens - 1
             inputs["input_ids"] = torch.cat([inputs["input_ids"], next_token.unsqueeze(-1)], dim=-1)
             inputs["attention_mask"] = torch.cat([inputs["attention_mask"], torch.ones_like(next_token.unsqueeze(-1))], dim=-1)
             with torch.no_grad():
-                outputs = model(**inputs)
-                if isinstance(outputs, tuple):
-                    logits = outputs[0]
-                else:
-                    logits = outputs.logits if hasattr(outputs, 'logits') else outputs
-                next_token = torch.argmax(logits[:, -1, :], dim=-1)
-                generated_tokens.append(next_token)
-                if next_token.item() == tokenizer.eos_token_id:
-                    break
         # 생성된 토큰들을 디코딩
         generated_ids = torch.cat(generated_tokens, dim=0)
         response = tokenizer.decode(generated_ids, skip_special_tokens=True)
-        if message in response:
-            response = response.replace(message, "").strip()
         return response if response else "죄송합니다. 응답을 생성할 수 없습니다."
     except Exception as e:
         return f"오류 발생: {str(e)}"
-def solve_math_problem(problem, image=None):
     if not MODEL_LOADED:
         return "❌ 모델이 로드되지 않았습니다."
     try:
-        prompt = f"다음 수학 문제를 단계별로 풀어주세요: {problem}"
-        inputs = tokenizer(prompt, return_tensors="pt")
         with torch.no_grad():
-            if image is not None:
                 # 이미지가 있는 경우 멀티모달 생성
-                from PIL import Image
                 import torchvision.transforms as transforms
                 # 이미지 전처리
@@ -195,72 +360,118 @@ def solve_math_problem(problem, image=None):
                     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                 ])
-                if isinstance(image, str):
-                    pil_image = Image.open(image).convert('RGB')
-                else:
-                    pil_image = image.convert('RGB')
                 pixel_values = transform(pil_image).unsqueeze(0)
                 image_metas = {"vision_grid_thw": torch.tensor([[1, 14, 14]])}  # 기본 그리드 크기
                 # 멀티모달 모델의 forward 메서드 사용
-                outputs = model(
-                    input_ids=inputs["input_ids"],
-                    attention_mask=inputs["attention_mask"],
-                    pixel_values=[pixel_values],
-                    image_metas=image_metas,
-                    max_new_tokens=300,
-                    temperature=0.3,
-                    do_sample=True,
-                    pad_token_id=tokenizer.eos_token_id
-                )
             else:
-                # 이미지가 없는 경우 텍스트만 생성
-                outputs = model(
-                    input_ids=inputs["input_ids"],
-                    attention_mask=inputs["attention_mask"],
-                    max_new_tokens=300,
-                    temperature=0.3,
-                    do_sample=True,
-                    pad_token_id=tokenizer.eos_token_id
-                )
         # outputs가 튜플인 경우 첫 번째 요소 사용
         if isinstance(outputs, tuple):
             logits = outputs[0]
         else:
-            logits = outputs.logits if hasattr(outputs, 'logits') else outputs
         # 가장 높은 확률의 토큰 선택
         next_token = torch.argmax(logits[:, -1, :], dim=-1)
         generated_tokens = [next_token]
         # 추가 토큰 생성
-        for _ in range(299):  # max_new_tokens - 1
             inputs["input_ids"] = torch.cat([inputs["input_ids"], next_token.unsqueeze(-1)], dim=-1)
             inputs["attention_mask"] = torch.cat([inputs["attention_mask"], torch.ones_like(next_token.unsqueeze(-1))], dim=-1)
             with torch.no_grad():
-                outputs = model(**inputs)
-                if isinstance(outputs, tuple):
-                    logits = outputs[0]
-                else:
-                    logits = outputs.logits if hasattr(outputs, 'logits') else outputs
-                next_token = torch.argmax(logits[:, -1, :], dim=-1)
-                generated_tokens.append(next_token)
-                if next_token.item() == tokenizer.eos_token_id:
-                    break
         # 생성된 토큰들을 디코딩
         generated_ids = torch.cat(generated_tokens, dim=0)
         response = tokenizer.decode(generated_ids, skip_special_tokens=True)
-        if prompt in response:
-            response = response.replace(prompt, "").strip()
         return response if response else "죄송합니다. 수학 문제를 풀 수 없습니다."
     except Exception as e:
         return f"오류 발생: {str(e)}"
 with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
@@ -274,16 +485,16 @@ with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
                     msg = gr.Textbox(label="메시지를 입력하세요", placeholder="안녕하세요! 수학 문제를 도와주세요.", lines=2)
                     clear = gr.Button("대화 초기화")
                 with gr.Column(scale=1):
-                    gr.Markdown("### 📷 이미지 업로드")
-                    image_input = gr.Image(label="이미지 (선택사항)", type="pil")
-                    gr.Markdown("이미지를 업로드하면 멀티모달 대화가 가능합니다.")
-            def respond(message, chat_history, image):
-                bot_message = chat_with_model(message, chat_history, image)
                 chat_history.append({"role": "user", "content": message})
                 chat_history.append({"role": "assistant", "content": bot_message})
                 return "", chat_history
-            msg.submit(respond, [msg, chatbot, image_input], [msg, chatbot])
             clear.click(lambda: None, None, chatbot, queue=False)
         with gr.Tab("🧮 수학 문제 해결"):
@@ -292,12 +503,12 @@ with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
                     math_input = gr.Textbox(label="수학 문제", placeholder="예: 2x + 5 = 13", lines=3)
                     solve_btn = gr.Button("문제 풀기", variant="primary")
                 with gr.Column(scale=1):
-                    gr.Markdown("### 📷 이미지 업로드")
-                    math_image_input = gr.Image(label="수학 문제 이미지 (선택사항)", type="pil")
-                    gr.Markdown("수학 문제 이미지를 업로드하면 더 정확한 답변을 받을 수 있습니다.")
                 with gr.Column(scale=2):
                     math_output = gr.Textbox(label="해답", lines=8, interactive=False)
-            solve_btn.click(solve_math_problem, [math_input, math_image_input], math_output)
         with gr.Tab("⚙️ 설정"):
             gr.Markdown("## 시스템 정보")

 import traceback
 from transformers import AutoTokenizer
 import torch
+import fitz  # PyMuPDF
+from PIL import Image
+import io
+import base64
 # .env 파일에서 환경 변수 로드
 try:
         print("   커스텀 모델 로딩 중...")
         # 커스텀 모델 클래스 import (Space 폴더의 modeling.py 사용)
+        try:
+            from modeling import KananaVForConditionalGeneration
+            print("   ✅ modeling.py import 성공")
+        except Exception as import_error:
+            print(f"   ❌ modeling.py import 실패: {import_error}")
+            raise import_error
+        try:
+            print(f"   모델 로딩 파라미터:")
+            print(f"     MODEL_NAME: {MODEL_NAME}")
+            print(f"     torch_dtype: {torch.float16}")
+            print(f"     trust_remote_code: True")
+            print(f"     device_map: None")
+            print(f"     low_cpu_mem_usage: True")
+            model = KananaVForConditionalGeneration.from_pretrained(
+                MODEL_NAME,
+                token=HF_TOKEN,
+                torch_dtype=torch.float16,
+                trust_remote_code=True,
+                device_map=None,
+                low_cpu_mem_usage=True
+            )
+            print("   ✅ 커스텀 모델 로딩 완료")
+            print(f"   모델 타입: {type(model)}")
+            print(f"   모델 디바이스: {next(model.parameters()).device}")
+        except Exception as model_error:
+            print(f"   ❌ 커스텀 모델 로딩 실패: {model_error}")
+            print(f"   오류 타입: {type(model_error).__name__}")
+            import traceback
+            traceback.print_exc()
+            raise model_error
     else:
         print("   ⚠️ 토큰이 없어서 공개 모델 사용")
         MODEL_NAME = "microsoft/DialoGPT-medium"
 print(f"   MODEL_LOADED: {MODEL_LOADED}")
 print(f"   최종 모델명: {MODEL_NAME}")
+def extract_text_from_pdf(pdf_file):
+    """PDF에서 텍스트 추출"""
+    try:
+        doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
+        text = ""
+        for page in doc:
+            text += page.get_text()
+        doc.close()
+        return text
+    except Exception as e:
+        return f"PDF 읽기 오류: {str(e)}"
+def extract_text_from_image(image_file):
+    """이미지에서 OCR로 텍스트 추출"""
+    try:
+        # PIL로 이미지 열기
+        image = Image.open(image_file)
+        # 간단한 OCR (실제로는 더 정교한 OCR 라이브러리 사용 필요)
+        # 여기서는 이미지 정보만 반환
+        return f"이미지 파일: {image.size[0]}x{image.size[1]} 픽셀"
+    except Exception as e:
+        return f"이미지 읽기 오류: {str(e)}"
+def process_uploaded_file(file):
+    """업로드된 파일 처리"""
+    if file is None:
+        return None, None
+    file_path = file.name
+    file_extension = file_path.lower().split('.')[-1]
+    if file_extension == 'pdf':
+        text_content = extract_text_from_pdf(file)
+        return text_content, None
+    elif file_extension in ['png', 'jpg', 'jpeg']:
+        text_content = extract_text_from_image(file)
+        return text_content, file
+    else:
+        return f"지원하지 않는 파일 형식: {file_extension}", None
+def chat_with_model(message, history, file=None):
+    print(f"🔍 DEBUG: chat_with_model 시작")
+    print(f"   메시지: {message}")
+    print(f"   파일: {file}")
+    print(f"   MODEL_LOADED: {MODEL_LOADED}")
     if not MODEL_LOADED:
+        print("❌ DEBUG: 모델이 로드되지 않음")
         return "❌ 모델이 로드되지 않았습니다."
     try:
+        print("📁 DEBUG: 파일 처리 시작")
+        # 파일 처리
+        file_content = ""
+        image_file = None
+        if file is not None:
+            print(f"   파일명: {file.name}")
+            text_content, image_file = process_uploaded_file(file)
+            print(f"   텍스트 내용: {text_content[:100] if text_content else 'None'}...")
+            print(f"   이미지 파일: {image_file}")
+            if text_content:
+                file_content = f"\n[업로드된 파일 내용]\n{text_content}\n"
+        # 메시지에 파일 내용 추가
+        full_message = message + file_content
+        print(f"📝 DEBUG: 전체 메시지: {full_message[:200]}...")
+        print("🔤 DEBUG: 토크나이저 처리 시작")
+        inputs = tokenizer(full_message, return_tensors="pt")
+        print(f"   입력 shape: {inputs['input_ids'].shape}")
+        print(f"   attention_mask shape: {inputs['attention_mask'].shape}")
+        print("🤖 DEBUG: 모델 추론 시작")
         with torch.no_grad():
+            if image_file is not None:
+                print("🖼️ DEBUG: 이미지 처리 모드")
                 # 이미지가 있는 경우 멀티모달 생성
                 import torchvision.transforms as transforms
                 # 이미지 전처리
                     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                 ])
+                pil_image = Image.open(image_file).convert('RGB')
                 pixel_values = transform(pil_image).unsqueeze(0)
                 image_metas = {"vision_grid_thw": torch.tensor([[1, 14, 14]])}  # 기본 그리드 크기
+                print(f"   이미지 shape: {pixel_values.shape}")
+                print(f"   이미지 메타: {image_metas}")
                 # 멀티모달 모델의 forward 메서드 사용
+                print("🔄 DEBUG: 모델 호출 (멀티모달)")
+                try:
+                    outputs = model(
+                        input_ids=inputs["input_ids"],
+                        attention_mask=inputs["attention_mask"],
+                        pixel_values=[pixel_values],
+                        image_metas=image_metas,
+                        max_new_tokens=200,
+                        temperature=0.7,
+                        do_sample=True,
+                        pad_token_id=tokenizer.eos_token_id
+                    )
+                    print("✅ DEBUG: 멀티모달 모델 호출 성공")
+                except Exception as model_error:
+                    print(f"❌ DEBUG: 멀티모달 모델 호출 실패: {model_error}")
+                    print(f"   오류 타입: {type(model_error).__name__}")
+                    raise model_error
             else:
+                print("📄 DEBUG: 텍스트만 처리 모드")
+                # 텍스트만 생성
+                print("🔄 DEBUG: 모델 호출 (텍스트만)")
+                try:
+                    outputs = model(
+                        input_ids=inputs["input_ids"],
+                        attention_mask=inputs["attention_mask"],
+                        max_new_tokens=200,
+                        temperature=0.7,
+                        do_sample=True,
+                        pad_token_id=tokenizer.eos_token_id
+                    )
+                    print("✅ DEBUG: 텍스트 모델 호출 성공")
+                except Exception as model_error:
+                    print(f"❌ DEBUG: 텍스트 모델 호출 실패: {model_error}")
+                    print(f"   오류 타입: {type(model_error).__name__}")
+                    raise model_error
+        print("🔍 DEBUG: 출력 처리 시작")
+        print(f"   outputs 타입: {type(outputs)}")
+        print(f"   outputs 내용: {outputs}")
         # outputs가 튜플인 경우 첫 번째 요소 사용
         if isinstance(outputs, tuple):
+            print("📦 DEBUG: outputs가 튜플임")
             logits = outputs[0]
+            print(f"   logits shape: {logits.shape}")
         else:
+            print("📦 DEBUG: outputs가 객체임")
+            if hasattr(outputs, 'logits'):
+                logits = outputs.logits
+                print(f"   logits shape: {logits.shape}")
+            else:
+                logits = outputs
+                print(f"   outputs shape: {logits.shape}")
+        print("🎯 DEBUG: 토큰 생성 시��")
         # 가장 높은 확률의 토큰 선택
         next_token = torch.argmax(logits[:, -1, :], dim=-1)
         generated_tokens = [next_token]
+        print(f"   첫 번째 토큰: {next_token.item()}")
         # 추가 토큰 생성
+        print("🔄 DEBUG: 반복 토큰 생성 시작")
+        for i in range(199):  # max_new_tokens - 1
+            if i % 50 == 0:
+                print(f"   진행률: {i}/199")
             inputs["input_ids"] = torch.cat([inputs["input_ids"], next_token.unsqueeze(-1)], dim=-1)
             inputs["attention_mask"] = torch.cat([inputs["attention_mask"], torch.ones_like(next_token.unsqueeze(-1))], dim=-1)
             with torch.no_grad():
+                try:
+                    outputs = model(**inputs)
+                    if isinstance(outputs, tuple):
+                        logits = outputs[0]
+                    else:
+                        logits = outputs.logits if hasattr(outputs, 'logits') else outputs
+                    next_token = torch.argmax(logits[:, -1, :], dim=-1)
+                    generated_tokens.append(next_token)
+                    if next_token.item() == tokenizer.eos_token_id:
+                        print(f"   EOS 토큰 발견: {i}번째")
+                        break
+                except Exception as loop_error:
+                    print(f"❌ DEBUG: 토큰 생성 루프 오류 (i={i}): {loop_error}")
+                    raise loop_error
+        print("🔤 DEBUG: 토큰 디코딩 시작")
         # 생성된 토큰들을 디코딩
         generated_ids = torch.cat(generated_tokens, dim=0)
         response = tokenizer.decode(generated_ids, skip_special_tokens=True)
+        print(f"   원본 응답: {response[:200]}...")
+        if full_message in response:
+            response = response.replace(full_message, "").strip()
+            print(f"   정리된 응답: {response[:200]}...")
+        print("✅ DEBUG: chat_with_model 완료")
         return response if response else "죄송합니다. 응답을 생성할 수 없습니다."
     except Exception as e:
+        print(f"❌ DEBUG: chat_with_model 전체 오류: {e}")
+        print(f"   오류 타입: {type(e).__name__}")
+        import traceback
+        traceback.print_exc()
         return f"오류 발생: {str(e)}"
+def solve_math_problem(problem, file=None):
+    print(f"🔍 DEBUG: solve_math_problem 시작")
+    print(f"   문제: {problem}")
+    print(f"   파일: {file}")
+    print(f"   MODEL_LOADED: {MODEL_LOADED}")
     if not MODEL_LOADED:
+        print("❌ DEBUG: 모델이 로드되지 않음")
         return "❌ 모델이 로드되지 않았습니다."
     try:
+        print("📁 DEBUG: 파일 처리 시작")
+        # 파일 처리
+        file_content = ""
+        image_file = None
+        if file is not None:
+            print(f"   파일명: {file.name}")
+            text_content, image_file = process_uploaded_file(file)
+            print(f"   텍스트 내용: {text_content[:100] if text_content else 'None'}...")
+            print(f"   이미지 파일: {image_file}")
+            if text_content:
+                file_content = f"\n[업로드된 파일 내용]\n{text_content}\n"
+        # 메시지에 파일 내용 추가
+        full_prompt = f"다음 수학 문제를 단계별로 풀어주세요: {problem}{file_content}"
+        print(f"📝 DEBUG: 전체 프롬프트: {full_prompt[:200]}...")
+        print("🔤 DEBUG: 토크나이저 처리 시작")
+        inputs = tokenizer(full_prompt, return_tensors="pt")
+        print(f"   입력 shape: {inputs['input_ids'].shape}")
+        print(f"   attention_mask shape: {inputs['attention_mask'].shape}")
+        print("🤖 DEBUG: 모델 추론 시작")
         with torch.no_grad():
+            if image_file is not None:
+                print("🖼️ DEBUG: 이미지 처리 모드")
                 # 이미지가 있는 경우 멀티모달 생성
                 import torchvision.transforms as transforms
                 # 이미지 전처리
                     transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
                 ])
+                pil_image = Image.open(image_file).convert('RGB')
                 pixel_values = transform(pil_image).unsqueeze(0)
                 image_metas = {"vision_grid_thw": torch.tensor([[1, 14, 14]])}  # 기본 그리드 크기
+                print(f"   이미지 shape: {pixel_values.shape}")
+                print(f"   이미지 메타: {image_metas}")
                 # 멀티모달 모델의 forward 메서드 사용
+                print("🔄 DEBUG: 모델 호출 (멀티모달)")
+                try:
+                    outputs = model(
+                        input_ids=inputs["input_ids"],
+                        attention_mask=inputs["attention_mask"],
+                        pixel_values=[pixel_values],
+                        image_metas=image_metas,
+                        max_new_tokens=300,
+                        temperature=0.3,
+                        do_sample=True,
+                        pad_token_id=tokenizer.eos_token_id
+                    )
+                    print("✅ DEBUG: 멀티모달 모델 호출 성공")
+                except Exception as model_error:
+                    print(f"❌ DEBUG: 멀티모달 모델 호출 실패: {model_error}")
+                    print(f"   오류 타입: {type(model_error).__name__}")
+                    raise model_error
             else:
+                print("📄 DEBUG: 텍스트만 처리 모드")
+                # 텍스트만 생성
+                print("🔄 DEBUG: 모델 호출 (텍스트만)")
+                try:
+                    outputs = model(
+                        input_ids=inputs["input_ids"],
+                        attention_mask=inputs["attention_mask"],
+                        max_new_tokens=300,
+                        temperature=0.3,
+                        do_sample=True,
+                        pad_token_id=tokenizer.eos_token_id
+                    )
+                    print("✅ DEBUG: 텍스트 모델 호출 성공")
+                except Exception as model_error:
+                    print(f"❌ DEBUG: 텍스트 모델 호출 실패: {model_error}")
+                    print(f"   오류 타입: {type(model_error).__name__}")
+                    raise model_error
+        print("🔍 DEBUG: 출력 처리 시작")
+        print(f"   outputs 타입: {type(outputs)}")
+        print(f"   outputs 내용: {outputs}")
         # outputs가 튜플인 경우 첫 번째 요소 사용
         if isinstance(outputs, tuple):
+            print("📦 DEBUG: outputs가 튜플임")
             logits = outputs[0]
+            print(f"   logits shape: {logits.shape}")
         else:
+            print("📦 DEBUG: outputs가 객체임")
+            if hasattr(outputs, 'logits'):
+                logits = outputs.logits
+                print(f"   logits shape: {logits.shape}")
+            else:
+                logits = outputs
+                print(f"   outputs shape: {logits.shape}")
+        print("🎯 DEBUG: 토큰 생성 시작")
         # 가장 높은 확률의 토큰 선택
         next_token = torch.argmax(logits[:, -1, :], dim=-1)
         generated_tokens = [next_token]
+        print(f"   첫 번째 토큰: {next_token.item()}")
         # 추가 토큰 생성
+        print("🔄 DEBUG: 반복 토큰 생성 시작")
+        for i in range(299):  # max_new_tokens - 1
+            if i % 50 == 0:
+                print(f"   진행률: {i}/299")
             inputs["input_ids"] = torch.cat([inputs["input_ids"], next_token.unsqueeze(-1)], dim=-1)
             inputs["attention_mask"] = torch.cat([inputs["attention_mask"], torch.ones_like(next_token.unsqueeze(-1))], dim=-1)
             with torch.no_grad():
+                try:
+                    outputs = model(**inputs)
+                    if isinstance(outputs, tuple):
+                        logits = outputs[0]
+                    else:
+                        logits = outputs.logits if hasattr(outputs, 'logits') else outputs
+                    next_token = torch.argmax(logits[:, -1, :], dim=-1)
+                    generated_tokens.append(next_token)
+                    if next_token.item() == tokenizer.eos_token_id:
+                        print(f"   EOS 토큰 발견: {i}번째")
+                        break
+                except Exception as loop_error:
+                    print(f"❌ DEBUG: 토큰 생성 루프 오류 (i={i}): {loop_error}")
+                    raise loop_error
+        print("🔤 DEBUG: 토큰 디코딩 시작")
         # 생성된 토큰들을 디코딩
         generated_ids = torch.cat(generated_tokens, dim=0)
         response = tokenizer.decode(generated_ids, skip_special_tokens=True)
+        print(f"   원본 응답: {response[:200]}...")
+        if full_prompt in response:
+            response = response.replace(full_prompt, "").strip()
+            print(f"   정리된 응답: {response[:200]}...")
+        print("✅ DEBUG: solve_math_problem 완료")
         return response if response else "죄송합니다. 수학 문제를 풀 수 없습니다."
     except Exception as e:
+        print(f"❌ DEBUG: solve_math_problem 전체 오류: {e}")
+        print(f"   오류 타입: {type(e).__name__}")
+        import traceback
+        traceback.print_exc()
         return f"오류 발생: {str(e)}"
 with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
                     msg = gr.Textbox(label="메시지를 입력하세요", placeholder="안녕하세요! 수학 문제를 도와주세요.", lines=2)
                     clear = gr.Button("대화 초기화")
                 with gr.Column(scale=1):
+                    gr.Markdown("### 📁 파일 업로드")
+                    file_input = gr.File(label="PDF/이미지 파일 (선택사항)", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
+                    gr.Markdown("PDF나 이미지 파일을 업로드하면 문서를 해석하여 답변합니다.")
+            def respond(message, chat_history, file):
+                bot_message = chat_with_model(message, chat_history, file)
                 chat_history.append({"role": "user", "content": message})
                 chat_history.append({"role": "assistant", "content": bot_message})
                 return "", chat_history
+            msg.submit(respond, [msg, chatbot, file_input], [msg, chatbot])
             clear.click(lambda: None, None, chatbot, queue=False)
         with gr.Tab("🧮 수학 문제 해결"):
                     math_input = gr.Textbox(label="수학 문제", placeholder="예: 2x + 5 = 13", lines=3)
                     solve_btn = gr.Button("문제 풀기", variant="primary")
                 with gr.Column(scale=1):
+                    gr.Markdown("### 📁 파일 업로드")
+                    math_file_input = gr.File(label="수학 문제 파일 (선택사항)", file_types=[".pdf", ".png", ".jpg", ".jpeg"])
+                    gr.Markdown("수학 문제 PDF나 이미지를 업로드하면 더 정확한 답변을 받을 수 있습니다.")
                 with gr.Column(scale=2):
                     math_output = gr.Textbox(label="해답", lines=8, interactive=False)
+            solve_btn.click(solve_math_problem, [math_input, math_file_input], math_output)
         with gr.Tab("⚙️ 설정"):
             gr.Markdown("## 시스템 정보")

requirements.txt CHANGED Viewed

@@ -8,3 +8,4 @@ python-dotenv>=1.0.0
 Pillow>=9.0.0
 torchvision>=0.15.0
 accelerate==1.9.0

 Pillow>=9.0.0
 torchvision>=0.15.0
 accelerate==1.9.0
+PyMuPDF>=1.23.0