Spaces:

gbrabbit
/

lily-math-rag

Sleeping

File size: 8,749 Bytes

e3f9de3
 
a796dd8
 
8f7bcc7
7964ad2
a796dd8
e3f9de3
0553b33
7964ad2
a796dd8
e3f9de3
 
 
 
d3654f8
 
e3f9de3
d3654f8
abb24e2
 
2100340
 
abb24e2
 
 
2100340
7964ad2
a796dd8
abb24e2
4056037
abb24e2
 
 
 
 
 
a796dd8
7964ad2
 
abb24e2
 
 
 
b9ecb65
e3f9de3
7964ad2
 
 
e3f9de3
abb24e2
 
 
 
e3f9de3
 
7964ad2
 
abb24e2
a796dd8
b9ecb65
8f7bcc7
a796dd8
 
e3f9de3
7964ad2
0553b33
7964ad2
b9ecb65
0553b33
 
 
b9ecb65
 
0553b33
7964ad2
e3f9de3
a796dd8
e3f9de3
7964ad2
 
e3f9de3
 
7964ad2
 
 
e3f9de3
 
7964ad2
 
 
b9ecb65
7964ad2
e3f9de3
7964ad2
e3f9de3
7964ad2
 
b9ecb65
e3f9de3
 
 
 
 
 
b9ecb65
a796dd8
7964ad2
e3f9de3
7964ad2
 
 
e3f9de3
a796dd8
e3f9de3
a796dd8
e3f9de3
 
7964ad2
 
 
 
e3f9de3
a796dd8
b9ecb65
e3f9de3
 
7964ad2
e3f9de3
 
7964ad2
0f0528f
e3f9de3
7964ad2
 
e3f9de3
15846c7
7964ad2
15846c7
 
 
 
e3f9de3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b9ecb65
 
abb24e2
 
b9ecb65
a796dd8
e3f9de3
abb24e2
e3f9de3
 
abb24e2
 
959d547

# 파일: app.py (최종 수정본)

import gradio as gr
import os
import traceback
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoImageProcessor
import torch
import fitz
from PIL import Image
from typing import Optional, List

# --- 1 & 2. 전역 변수, 환경 설정, 모델 로딩 (기존 코드와 동일) ---
# (이 부분은 수정할 필요 없이 그대로 두시면 됩니다)
# ... (생략) ...
# --- 1 & 2. 전역 변수, 환경 설정, 모델 로딩 (기존 코드와 동일) ---
tokenizer = None
model = None
image_processor = None
MODEL_LOADED = False
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IS_LOCAL = os.path.exists('.env') or os.path.exists('../.env') or os.getenv('IS_LOCAL') == 'true'
try:
    from dotenv import load_dotenv
    if IS_LOCAL:
        load_dotenv()
        print("✅ .env 파일 로드됨")
except ImportError:
    print("⚠️ python-dotenv가 설치되지 않음")
HF_TOKEN = os.getenv("HF_TOKEN")
MODEL_NAME_SERVER = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")
MODEL_PATH_LOCAL = "../lily_llm_core/models/kanana_1_5_v_3b_instruct"
MODEL_PATH = MODEL_PATH_LOCAL if IS_LOCAL else MODEL_NAME_SERVER
print(f"============== 시스템 환경 정보 ==============")
print(f"🔍 실행 환경: {'로컬' if IS_LOCAL else '서버'}")
print(f"🔍 모델 경로: {MODEL_PATH}")
print(f"🔍 사용 디바이스: {DEVICE.upper()}")
print("==========================================")
try:
    print("🔧 모델 로딩 시작...")
    from modeling import KananaVForConditionalGeneration
    if IS_LOCAL:
        if not os.path.exists(MODEL_PATH):
            raise FileNotFoundError(f"로컬 모델 경로를 찾을 수 없습니다: {MODEL_PATH}")
        tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True, local_files_only=True)
        model = KananaVForConditionalGeneration.from_pretrained(
            MODEL_PATH, torch_dtype=torch.bfloat16, trust_remote_code=True, local_files_only=True,
        ).to(DEVICE)
        image_processor = AutoImageProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True, local_files_only=True)
        print("✅ 로컬 모델 및 이미지 프로세서 로딩 완료!")
    else:
        if not HF_TOKEN:
            raise ValueError("서버 환경에서는 Hugging Face 토큰(HF_TOKEN)이 반드시 필요합니다.")
        tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, token=HF_TOKEN, trust_remote_code=True)
        model = KananaVForConditionalGeneration.from_pretrained(
            MODEL_PATH, token=HF_TOKEN, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto"
        )
        image_processor = AutoImageProcessor.from_pretrained(MODEL_PATH, token=HF_TOKEN, trust_remote_code=True)
        print("✅ 서버 모델 및 이미지 프로세서 로딩 완료!")
    MODEL_LOADED = True
except Exception as e:
    print(f"❌ 모델 로딩 실패: {e}")
    traceback.print_exc()
    MODEL_LOADED = False

# --- 3. 응답 생성 로직 (기존 코드와 동일) ---
def extract_text_from_pdf(pdf_file_path):
    try:
        doc = fitz.open(pdf_file_path)
        text = "".join(page.get_text() for page in doc)
        doc.close()
        return text
    except Exception as e:
        print(f"PDF 처리 오류: {e}")
        return f"PDF 파일을 읽는 중 오류가 발생했습니다: {e}"

def generate_response(prompt_template: str, message: str, files: Optional[List] = None):
    if not MODEL_LOADED: return "❌ 모델이 로드되지 않았습니다."
    try:
        all_pixel_values, all_image_metas, file_texts = [], [], []
        if files:
            for file in files:
                file_path, file_extension = file.name, os.path.splitext(file.name)[1].lower()
                if file_extension == '.pdf': file_texts.append(extract_text_from_pdf(file_path))
                elif file_extension in ['.png', '.jpg', '.jpeg']:
                    pil_image = Image.open(file_path).convert('RGB')
                    processed_data = image_processor(pil_image)
                    all_pixel_values.append(processed_data["pixel_values"])
                    all_image_metas.append(processed_data["image_meta"])
        image_tokens = "<image>" * len(all_pixel_values)
        pdf_content = "\n\n".join(file_texts)
        full_message = message + (f"\n{image_tokens}" if image_tokens else "") + (f"\n\n[첨부된 PDF 내용]:\n{pdf_content}" if pdf_content else "")
        full_prompt = prompt_template.format(message=full_message)
        if all_image_metas:
            combined_metas = {key: [meta[key] for meta in all_image_metas] for key in all_image_metas[0]}
            inputs = tokenizer.encode_prompt(prompt=full_prompt, image_meta=combined_metas)
            inputs = {k: (v.unsqueeze(0).to(model.device) if torch.is_tensor(v) else v) for k, v in inputs.items()}
        else:
            inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
        generation_args = {
            "max_new_tokens": 32, 
            "temperature": 0.8, 
            "do_sample": True, 
            "pad_token_id": tokenizer.eos_token_id, 
            "eos_token_id": tokenizer.eos_token_id,
            "top_p": 0.95,
        }
        with torch.no_grad():
            if all_pixel_values:
                outputs = model.generate(**inputs, pixel_values=all_pixel_values, image_metas=combined_metas, **generation_args)
            else:
                outputs = model.generate(**inputs, **generation_args)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)
        return response.split("<|im_start|>assistant\n")[-1].strip()
    except Exception as e:
        print(f"❌ 응답 생성 중 오류 발생: {e}"); traceback.print_exc(); return f"오류가 발생했습니다: {e}"


# --- 4. Gradio UI 및 실행 (최종 수정) ---
with gr.Blocks(title="Lily LLM System", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# 🧮 Lily LLM System")
    gr.Markdown("이미지, PDF, 텍스트를 이해하고 답변하는 멀티모달 AI 시스템입니다.")
    
    with gr.Tabs():                
        with gr.Tab("💬 채팅"):
            chat_prompt = "<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
            chatbot = gr.Chatbot(height=320, label="대화창", elem_id="chatbot", type="messages")

            with gr.Row():
                msg = gr.Textbox(label="메시지 입력", placeholder="메시지를 입력하세요", lines=3, show_label=False, scale=4)
                file_input = gr.File(label="파일 업로드", file_count="multiple", file_types=[".pdf", ".png", ".jpg", ".jpeg"], scale=1)
                send_btn = gr.Button("전송", variant="primary", scale=1)
            
            # ✅ 1. respond 함수가 'files'를 세 번째 인자로 받도록 수정
            def respond(message, chat_history, files):
                if not message.strip() and not files:
                    return "", chat_history, None # files 출력도 비워줌
                
                bot_message = generate_response(chat_prompt, message, files)
                
                chat_history.append({"role": "user", "content": message})
                chat_history.append({"role": "assistant", "content": bot_message})
                
                # ✅ 2. 출력의 개수를 inputs와 맞추기 위해 file_input도 반환값에 추가
                return "", chat_history, None
                        
            # ✅ 3. click과 submit의 inputs 리스트에 'file_input' 추가
            send_btn.click(
                respond, 
                inputs=[msg, chatbot, file_input], 
                outputs=[msg, chatbot, file_input], # 출력에도 file_input 추가
                api_name="chat", # api_name은 슬래시 없이 사용
                # queue=False
            )
            msg.submit(
                respond, 
                inputs=[msg, chatbot, file_input], 
                outputs=[msg, chatbot, file_input], # 출력에도 file_input 추가
                api_name="chat", 
                # queue=False
            )
            
        with gr.Tab("⚙️ 시스템 정보"):
            gr.Markdown(f"**실행 환경**: `{'로컬' if IS_LOCAL else '서버'}`")
            gr.Markdown(f"**모델 경로**: `{MODEL_PATH}`")
            gr.Markdown(f"**모델 상태**: `{'✅ 로드됨' if MODEL_LOADED else '❌ 로드 실패'}`")

if __name__ == "__main__":    
    if IS_LOCAL:
        print("\n🚀 로컬 서버를 시작합니다. http://127.0.0.1:8006")
        demo.launch(server_name="127.0.0.1", server_port=8006, share=False)
    else:
        print("\n🚀 서버를 시작합니다...")
        demo.launch()