Spaces:

gbrabbit
/

lily-math-rag

Sleeping

App Files Files Community

lily-math-rag / app.py

gbrabbit

Auto commit at 07-2025-08 4:43:48

b9ecb65 6 months ago

raw

history blame

7.29 kB

	import gradio as gr
	import os
	import traceback
	from transformers import AutoTokenizer, AutoModelForCausalLM
	import torch
	import fitz # PyMuPDF
	from PIL import Image
	import io

	# --- 1. 전역 변수 및 환경 설정 ---
	tokenizer = None
	model = None
	MODEL_LOADED = False

	# .env 파일에서 환경 변수 로드 (주로 로컬에서 사용)
	try:
	from dotenv import load_dotenv
	load_dotenv()
	print("✅ .env 파일 로드됨")
	except ImportError:
	print("⚠️ python-dotenv가 설치되지 않음, 시스템 환경 변수 사용")

	# 환경 변수에서 토큰 및 모델 이름 가져오기
	HF_TOKEN = os.getenv("HF_TOKEN")
	MODEL_NAME = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")

	print(f"🔍 모델: {MODEL_NAME}")
	print(f"🔍 HF 토큰: {'✅ 설정됨'if HF_TOKEN else '❌ 설정되지 않음'}")

	# --- 2. 핵심 로직: 모델 및 토크나이저 로딩 ---
	try:
	print("🔧 모델 및 토크나이저 로딩 시작...")

	# 커스텀 모델 클래스 import
	from modeling import KananaVForConditionalGeneration

	if HF_TOKEN:
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_NAME,
	token=HF_TOKEN,
	trust_remote_code=True
	)
	model = KananaVForConditionalGeneration.from_pretrained(
	MODEL_NAME,
	token=HF_TOKEN,
	torch_dtype=torch.float16,
	trust_remote_code=True,
	device_map="auto" # GPU 자동 할당 (서버 환경에 필수)
	)
	MODEL_LOADED = True
	print("✅ 커스텀 모델 로딩 완료!")
	else:
	print("⚠️ HF 토큰이 없어 공개 모델(DialoGPT)로 대체합니다.")
	MODEL_NAME = "microsoft/DialoGPT-medium"
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
	model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float16, device_map="auto")
	MODEL_LOADED = True

	except Exception as e:
	print(f"❌ 모델 로딩 실패: {e}")
	traceback.print_exc()
	MODEL_LOADED = False

	# --- 3. 파일 처리 유틸리티 ---
	def extract_text_from_pdf(pdf_file):
	try:
	doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
	text = "".join(page.get_text() for page in doc)
	doc.close()
	return text
	except Exception as e:
	print(f"PDF 처리 오류: {e}")
	return f"PDF 파일을 읽는 중 오류가 발생했습니다: {e}"

	def process_uploaded_file(file):
	"""업로드된 파일을 텍스트와 이미지 객체로 분리"""
	if file is None:
	return "", None # 텍스트, 이미지 없음

	file_path = file.name
	file_extension = os.path.splitext(file_path)[1].lower()

	if file_extension == '.pdf':
	text_content = extract_text_from_pdf(file)
	return text_content, None # PDF는 텍스트만, 이미지는 없음
	elif file_extension in ['.png', '.jpg', '.jpeg']:
	image = Image.open(file).convert('RGB')
	# 이미지 파일 자체를 반환 (OCR 대신 멀티모달 입력으로 사용)
	return "업로드된 이미지가 있습니다.", image
	else:
	return f"지원하지 않는 파일 형식: {file_extension}", None

	# --- 4. 핵심 로직: 통합 응답 생성 함수 ---
	def generate_response(prompt_template: str, message: str, file: Optional = None):
	"""텍스트와 이미지를 모두 처리하는 통합 응답 생성 함수"""
	if not MODEL_LOADED:
	return "❌ 모델이 로드되지 않았습니다. 관리자에게 문의하세요."

	try:
	# 1. 파일 처리
	file_text, pil_image = process_uploaded_file(file)

	# 2. 전체 프롬프트 구성
	full_message = message
	if file_text:
	full_message += f"\n\n[첨부 파일 내용]\n{file_text}"

	full_prompt = prompt_template.format(message=full_message)

	# 3. 토크나이저로 텍스트 입력 변환
	inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)

	# 4. 생성 파라미터 준비
	generation_args = {
	"max_new_tokens": 512,
	"temperature": 0.7,
	"do_sample": True,
	"pad_token_id": tokenizer.eos_token_id
	}

	# 5. 이미지가 있는 경우, 멀티모달 입력 추가
	if pil_image:
	print("🖼️ 이미지 포함, 멀티모달 모드로 생성")
	# KananaV 모델에 맞는 형태로 이미지 전처리
	# (모델의 요구사항에 따라 이 부분은 달라질 수 있습니다)
	pixel_values = model.vision_model.image_processor(pil_image, return_tensors='pt')['pixel_values']
	generation_args["pixel_values"] = pixel_values.to(model.device, dtype=torch.float16)
	else:
	print("📄 텍스트만으로 생성")

	# 6. 모델을 통해 응답 생성 (단 한 번의 올바른 호출)
	with torch.no_grad():
	outputs = model.generate(inputs, generation_args)

	# 7. 생성된 토큰 ID를 텍스트로 디코딩
	# 입력 프롬프트 부분을 제외하고 순수한 답변만 추출
	input_length = inputs["input_ids"].shape[1]
	response_ids = outputs[0][input_length:]
	response = tokenizer.decode(response_ids, skip_special_tokens=True).strip()

	return response

	except Exception as e:
	print(f"❌ 응답 생성 중 오류 발생: {e}")
	traceback.print_exc()
	return f"오류가 발생했습니다: {e}"

	# --- 5. Gradio UI 및 실행 ---
	with gr.Blocks(title="Lily Math RAG System", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# 🧮 Lily Math RAG System")
	gr.Markdown("수학 문제 해결 및 멀티모달 대화를 위한 AI 시스템입니다.")

	with gr.Tabs():
	with gr.Tab("💬 채팅"):
	chat_prompt = "<\|im_start\|>user\n{message}<\|im_end\|>\n<\|im_start\|>assistant\n"
	chatbot = gr.Chatbot(height=500, label="대화창", type="messages")
	with gr.Row():
	with gr.Column(scale=4):
	msg = gr.Textbox(label="메시지", placeholder="이미지나 PDF를 첨부하고 질문해보세요!", lines=3, show_label=False)
	with gr.Column(scale=1, min_width=150):
	file_input = gr.File(label="파일 업로드", file_types=[".pdf", ".png", ".jpg", ".jpeg"])

	def respond(message, chat_history, file):
	bot_message = generate_response(chat_prompt, message, file)
	chat_history.append({"role": "user", "content": message})
	chat_history.append({"role": "assistant", "content": bot_message})
	return "", chat_history

	msg.submit(respond, [msg, chatbot, file_input], [msg, chatbot])

	with gr.Tab("⚙️ 시스템 정보"):
	gr.Markdown(f"모델: `{MODEL_NAME}`")
	gr.Markdown(f"모델 상태: `{'✅ 로드됨' if MODEL_LOADED else '❌ 로드 실패'}`")

	if __name__ == "__main__":
	# share=True를 사용하면 외부에서도 접속 가능한 공개 링크가 생성됩니다.
	demo.launch(share=True)