lily-math-rag / app.py
gbrabbit's picture
Auto commit at 08-2025-08 3:40:22
e3f9de3
# ํŒŒ์ผ: app.py (์ตœ์ข… ์ˆ˜์ •๋ณธ)
import gradio as gr
import os
import traceback
from transformers import AutoTokenizer, AutoModelForCausalLM, AutoImageProcessor
import torch
import fitz
from PIL import Image
from typing import Optional, List
# --- 1 & 2. ์ „์—ญ ๋ณ€์ˆ˜, ํ™˜๊ฒฝ ์„ค์ •, ๋ชจ๋ธ ๋กœ๋”ฉ (๊ธฐ์กด ์ฝ”๋“œ์™€ ๋™์ผ) ---
# (์ด ๋ถ€๋ถ„์€ ์ˆ˜์ •ํ•  ํ•„์š” ์—†์ด ๊ทธ๋Œ€๋กœ ๋‘์‹œ๋ฉด ๋ฉ๋‹ˆ๋‹ค)
# ... (์ƒ๋žต) ...
# --- 1 & 2. ์ „์—ญ ๋ณ€์ˆ˜, ํ™˜๊ฒฝ ์„ค์ •, ๋ชจ๋ธ ๋กœ๋”ฉ (๊ธฐ์กด ์ฝ”๋“œ์™€ ๋™์ผ) ---
tokenizer = None
model = None
image_processor = None
MODEL_LOADED = False
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
IS_LOCAL = os.path.exists('.env') or os.path.exists('../.env') or os.getenv('IS_LOCAL') == 'true'
try:
from dotenv import load_dotenv
if IS_LOCAL:
load_dotenv()
print("โœ… .env ํŒŒ์ผ ๋กœ๋“œ๋จ")
except ImportError:
print("โš ๏ธ python-dotenv๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์Œ")
HF_TOKEN = os.getenv("HF_TOKEN")
MODEL_NAME_SERVER = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")
MODEL_PATH_LOCAL = "../lily_llm_core/models/kanana_1_5_v_3b_instruct"
MODEL_PATH = MODEL_PATH_LOCAL if IS_LOCAL else MODEL_NAME_SERVER
print(f"============== ์‹œ์Šคํ…œ ํ™˜๊ฒฝ ์ •๋ณด ==============")
print(f"๐Ÿ” ์‹คํ–‰ ํ™˜๊ฒฝ: {'๋กœ์ปฌ' if IS_LOCAL else '์„œ๋ฒ„'}")
print(f"๐Ÿ” ๋ชจ๋ธ ๊ฒฝ๋กœ: {MODEL_PATH}")
print(f"๐Ÿ” ์‚ฌ์šฉ ๋””๋ฐ”์ด์Šค: {DEVICE.upper()}")
print("==========================================")
try:
print("๐Ÿ”ง ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
from modeling import KananaVForConditionalGeneration
if IS_LOCAL:
if not os.path.exists(MODEL_PATH):
raise FileNotFoundError(f"๋กœ์ปฌ ๋ชจ๋ธ ๊ฒฝ๋กœ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {MODEL_PATH}")
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True, local_files_only=True)
model = KananaVForConditionalGeneration.from_pretrained(
MODEL_PATH, torch_dtype=torch.bfloat16, trust_remote_code=True, local_files_only=True,
).to(DEVICE)
image_processor = AutoImageProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True, local_files_only=True)
print("โœ… ๋กœ์ปฌ ๋ชจ๋ธ ๋ฐ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋”ฉ ์™„๋ฃŒ!")
else:
if not HF_TOKEN:
raise ValueError("์„œ๋ฒ„ ํ™˜๊ฒฝ์—์„œ๋Š” Hugging Face ํ† ํฐ(HF_TOKEN)์ด ๋ฐ˜๋“œ์‹œ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.")
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, token=HF_TOKEN, trust_remote_code=True)
model = KananaVForConditionalGeneration.from_pretrained(
MODEL_PATH, token=HF_TOKEN, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto"
)
image_processor = AutoImageProcessor.from_pretrained(MODEL_PATH, token=HF_TOKEN, trust_remote_code=True)
print("โœ… ์„œ๋ฒ„ ๋ชจ๋ธ ๋ฐ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋”ฉ ์™„๋ฃŒ!")
MODEL_LOADED = True
except Exception as e:
print(f"โŒ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
traceback.print_exc()
MODEL_LOADED = False
# --- 3. ์‘๋‹ต ์ƒ์„ฑ ๋กœ์ง (๊ธฐ์กด ์ฝ”๋“œ์™€ ๋™์ผ) ---
def extract_text_from_pdf(pdf_file_path):
try:
doc = fitz.open(pdf_file_path)
text = "".join(page.get_text() for page in doc)
doc.close()
return text
except Exception as e:
print(f"PDF ์ฒ˜๋ฆฌ ์˜ค๋ฅ˜: {e}")
return f"PDF ํŒŒ์ผ์„ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"
def generate_response(prompt_template: str, message: str, files: Optional[List] = None):
if not MODEL_LOADED: return "โŒ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
try:
all_pixel_values, all_image_metas, file_texts = [], [], []
if files:
for file in files:
file_path, file_extension = file.name, os.path.splitext(file.name)[1].lower()
if file_extension == '.pdf': file_texts.append(extract_text_from_pdf(file_path))
elif file_extension in ['.png', '.jpg', '.jpeg']:
pil_image = Image.open(file_path).convert('RGB')
processed_data = image_processor(pil_image)
all_pixel_values.append(processed_data["pixel_values"])
all_image_metas.append(processed_data["image_meta"])
image_tokens = "<image>" * len(all_pixel_values)
pdf_content = "\n\n".join(file_texts)
full_message = message + (f"\n{image_tokens}" if image_tokens else "") + (f"\n\n[์ฒจ๋ถ€๋œ PDF ๋‚ด์šฉ]:\n{pdf_content}" if pdf_content else "")
full_prompt = prompt_template.format(message=full_message)
if all_image_metas:
combined_metas = {key: [meta[key] for meta in all_image_metas] for key in all_image_metas[0]}
inputs = tokenizer.encode_prompt(prompt=full_prompt, image_meta=combined_metas)
inputs = {k: (v.unsqueeze(0).to(model.device) if torch.is_tensor(v) else v) for k, v in inputs.items()}
else:
inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
generation_args = {
"max_new_tokens": 32,
"temperature": 0.8,
"do_sample": True,
"pad_token_id": tokenizer.eos_token_id,
"eos_token_id": tokenizer.eos_token_id,
"top_p": 0.95,
}
with torch.no_grad():
if all_pixel_values:
outputs = model.generate(**inputs, pixel_values=all_pixel_values, image_metas=combined_metas, **generation_args)
else:
outputs = model.generate(**inputs, **generation_args)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response.split("<|im_start|>assistant\n")[-1].strip()
except Exception as e:
print(f"โŒ ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}"); traceback.print_exc(); return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"
# --- 4. Gradio UI ๋ฐ ์‹คํ–‰ (์ตœ์ข… ์ˆ˜์ •) ---
with gr.Blocks(title="Lily LLM System", theme=gr.themes.Soft()) as demo:
gr.Markdown("# ๐Ÿงฎ Lily LLM System")
gr.Markdown("์ด๋ฏธ์ง€, PDF, ํ…์ŠคํŠธ๋ฅผ ์ดํ•ดํ•˜๊ณ  ๋‹ต๋ณ€ํ•˜๋Š” ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ AI ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค.")
with gr.Tabs():
with gr.Tab("๐Ÿ’ฌ ์ฑ„ํŒ…"):
chat_prompt = "<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
chatbot = gr.Chatbot(height=320, label="๋Œ€ํ™”์ฐฝ", elem_id="chatbot", type="messages")
with gr.Row():
msg = gr.Textbox(label="๋ฉ”์‹œ์ง€ ์ž…๋ ฅ", placeholder="๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", lines=3, show_label=False, scale=4)
file_input = gr.File(label="ํŒŒ์ผ ์—…๋กœ๋“œ", file_count="multiple", file_types=[".pdf", ".png", ".jpg", ".jpeg"], scale=1)
send_btn = gr.Button("์ „์†ก", variant="primary", scale=1)
# โœ… 1. respond ํ•จ์ˆ˜๊ฐ€ 'files'๋ฅผ ์„ธ ๋ฒˆ์งธ ์ธ์ž๋กœ ๋ฐ›๋„๋ก ์ˆ˜์ •
def respond(message, chat_history, files):
if not message.strip() and not files:
return "", chat_history, None # files ์ถœ๋ ฅ๋„ ๋น„์›Œ์คŒ
bot_message = generate_response(chat_prompt, message, files)
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": bot_message})
# โœ… 2. ์ถœ๋ ฅ์˜ ๊ฐœ์ˆ˜๋ฅผ inputs์™€ ๋งž์ถ”๊ธฐ ์œ„ํ•ด file_input๋„ ๋ฐ˜ํ™˜๊ฐ’์— ์ถ”๊ฐ€
return "", chat_history, None
# โœ… 3. click๊ณผ submit์˜ inputs ๋ฆฌ์ŠคํŠธ์— 'file_input' ์ถ”๊ฐ€
send_btn.click(
respond,
inputs=[msg, chatbot, file_input],
outputs=[msg, chatbot, file_input], # ์ถœ๋ ฅ์—๋„ file_input ์ถ”๊ฐ€
api_name="chat", # api_name์€ ์Šฌ๋ž˜์‹œ ์—†์ด ์‚ฌ์šฉ
# queue=False
)
msg.submit(
respond,
inputs=[msg, chatbot, file_input],
outputs=[msg, chatbot, file_input], # ์ถœ๋ ฅ์—๋„ file_input ์ถ”๊ฐ€
api_name="chat",
# queue=False
)
with gr.Tab("โš™๏ธ ์‹œ์Šคํ…œ ์ •๋ณด"):
gr.Markdown(f"**์‹คํ–‰ ํ™˜๊ฒฝ**: `{'๋กœ์ปฌ' if IS_LOCAL else '์„œ๋ฒ„'}`")
gr.Markdown(f"**๋ชจ๋ธ ๊ฒฝ๋กœ**: `{MODEL_PATH}`")
gr.Markdown(f"**๋ชจ๋ธ ์ƒํƒœ**: `{'โœ… ๋กœ๋“œ๋จ' if MODEL_LOADED else 'โŒ ๋กœ๋“œ ์‹คํŒจ'}`")
if __name__ == "__main__":
if IS_LOCAL:
print("\n๐Ÿš€ ๋กœ์ปฌ ์„œ๋ฒ„๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค. http://127.0.0.1:8006")
demo.launch(server_name="127.0.0.1", server_port=8006, share=False)
else:
print("\n๐Ÿš€ ์„œ๋ฒ„๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค...")
demo.launch()