gbrabbit commited on
Commit
e3f9de3
ยท
1 Parent(s): 4056037

Auto commit at 08-2025-08 3:40:22

Browse files
Files changed (4) hide show
  1. app.py +60 -112
  2. test_input.py +0 -100
  3. test_text.py +0 -100
  4. test_tokenizer.py +0 -159
app.py CHANGED
@@ -1,21 +1,24 @@
 
 
1
  import gradio as gr
2
  import os
3
  import traceback
4
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoImageProcessor
5
  import torch
6
- import fitz # PyMuPDF
7
  from PIL import Image
8
  from typing import Optional, List
9
 
10
- # --- 1. ์ „์—ญ ๋ณ€์ˆ˜ ๋ฐ ํ™˜๊ฒฝ ์„ค์ • ---
 
 
 
11
  tokenizer = None
12
  model = None
13
- image_processor = None # ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ์ „์—ญ ๋ณ€์ˆ˜ ์ถ”๊ฐ€
14
  MODEL_LOADED = False
15
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
16
-
17
  IS_LOCAL = os.path.exists('.env') or os.path.exists('../.env') or os.getenv('IS_LOCAL') == 'true'
18
-
19
  try:
20
  from dotenv import load_dotenv
21
  if IS_LOCAL:
@@ -23,58 +26,43 @@ try:
23
  print("โœ… .env ํŒŒ์ผ ๋กœ๋“œ๋จ")
24
  except ImportError:
25
  print("โš ๏ธ python-dotenv๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์Œ")
26
-
27
  HF_TOKEN = os.getenv("HF_TOKEN")
28
  MODEL_NAME_SERVER = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")
29
  MODEL_PATH_LOCAL = "../lily_llm_core/models/kanana_1_5_v_3b_instruct"
30
  MODEL_PATH = MODEL_PATH_LOCAL if IS_LOCAL else MODEL_NAME_SERVER
31
-
32
  print(f"============== ์‹œ์Šคํ…œ ํ™˜๊ฒฝ ์ •๋ณด ==============")
33
  print(f"๐Ÿ” ์‹คํ–‰ ํ™˜๊ฒฝ: {'๋กœ์ปฌ' if IS_LOCAL else '์„œ๋ฒ„'}")
34
  print(f"๐Ÿ” ๋ชจ๋ธ ๊ฒฝ๋กœ: {MODEL_PATH}")
35
  print(f"๐Ÿ” ์‚ฌ์šฉ ๋””๋ฐ”์ด์Šค: {DEVICE.upper()}")
36
  print("==========================================")
37
-
38
-
39
- # --- 2. ํ•ต์‹ฌ ๋กœ์ง: ๋ชจ๋ธ ๋ฐ ํ”„๋กœ์„ธ์„œ ๋กœ๋”ฉ ---
40
  try:
41
  print("๐Ÿ”ง ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
42
  from modeling import KananaVForConditionalGeneration
43
-
44
  if IS_LOCAL:
45
  if not os.path.exists(MODEL_PATH):
46
  raise FileNotFoundError(f"๋กœ์ปฌ ๋ชจ๋ธ ๊ฒฝ๋กœ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {MODEL_PATH}")
47
-
48
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True, local_files_only=True)
49
  model = KananaVForConditionalGeneration.from_pretrained(
50
- MODEL_PATH, torch_dtype=torch.float16, trust_remote_code=True, local_files_only=True,
51
  ).to(DEVICE)
52
- # ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋“œ (๋กœ์ปฌ)
53
  image_processor = AutoImageProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True, local_files_only=True)
54
  print("โœ… ๋กœ์ปฌ ๋ชจ๋ธ ๋ฐ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋”ฉ ์™„๋ฃŒ!")
55
-
56
- else: # ์„œ๋ฒ„ ํ™˜๊ฒฝ
57
  if not HF_TOKEN:
58
  raise ValueError("์„œ๋ฒ„ ํ™˜๊ฒฝ์—์„œ๋Š” Hugging Face ํ† ํฐ(HF_TOKEN)์ด ๋ฐ˜๋“œ์‹œ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.")
59
-
60
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, token=HF_TOKEN, trust_remote_code=True)
61
  model = KananaVForConditionalGeneration.from_pretrained(
62
- MODEL_PATH, token=HF_TOKEN, torch_dtype=torch.float16, trust_remote_code=True,
63
- ).to(DEVICE)
64
- # ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋“œ (์„œ๋ฒ„)
65
  image_processor = AutoImageProcessor.from_pretrained(MODEL_PATH, token=HF_TOKEN, trust_remote_code=True)
66
  print("โœ… ์„œ๋ฒ„ ๋ชจ๋ธ ๋ฐ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋”ฉ ์™„๋ฃŒ!")
67
-
68
  MODEL_LOADED = True
69
-
70
  except Exception as e:
71
  print(f"โŒ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
72
  traceback.print_exc()
73
  MODEL_LOADED = False
74
 
75
-
76
- # --- 3. ํŒŒ์ผ ์ฒ˜๋ฆฌ ๋ฐ ์‘๋‹ต ์ƒ์„ฑ ๋กœ์ง ---
77
-
78
  def extract_text_from_pdf(pdf_file_path):
79
  try:
80
  doc = fitz.open(pdf_file_path)
@@ -86,140 +74,100 @@ def extract_text_from_pdf(pdf_file_path):
86
  return f"PDF ํŒŒ์ผ์„ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"
87
 
88
  def generate_response(prompt_template: str, message: str, files: Optional[List] = None):
89
- if not MODEL_LOADED:
90
- return "โŒ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
91
-
92
  try:
93
- all_pixel_values = []
94
- all_image_metas = []
95
- file_texts = []
96
-
97
- # 1. ์—…๋กœ๋“œ๋œ ํŒŒ์ผ๋“ค ์ฒ˜๋ฆฌ (์ด๋ฏธ์ง€/PDF ๋ถ„๋ฆฌ)
98
  if files:
99
  for file in files:
100
- file_path = file.name
101
- file_extension = os.path.splitext(file_path)[1].lower()
102
-
103
- if file_extension == '.pdf':
104
- file_texts.append(extract_text_from_pdf(file_path))
105
  elif file_extension in ['.png', '.jpg', '.jpeg']:
106
  pil_image = Image.open(file_path).convert('RGB')
107
  processed_data = image_processor(pil_image)
108
-
109
- pixel_values = processed_data["pixel_values"]
110
- image_metas = processed_data["image_meta"]
111
-
112
- all_pixel_values.append(pixel_values)
113
- all_image_metas.append(image_metas)
114
-
115
- # 2. ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ
116
  image_tokens = "<image>" * len(all_pixel_values)
117
  pdf_content = "\n\n".join(file_texts)
118
  full_message = message + (f"\n{image_tokens}" if image_tokens else "") + (f"\n\n[์ฒจ๋ถ€๋œ PDF ๋‚ด์šฉ]:\n{pdf_content}" if pdf_content else "")
119
  full_prompt = prompt_template.format(message=full_message)
120
-
121
- # 3. ํ† ํฌ๋‚˜์ด์ง• ๋ฐ `image_metas` ๊ฒฐํ•ฉ
122
  if all_image_metas:
123
- # ์—ฌ๋Ÿฌ ์ด๋ฏธ์ง€์˜ ๋ฉ”ํƒ€๋ฐ์ดํ„ฐ๋ฅผ ํ•˜๋‚˜๋กœ ํ•ฉ์นจ
124
- combined_metas = {}
125
- for key in all_image_metas[0].keys():
126
- combined_metas[key] = [meta[key] for meta in all_image_metas]
127
-
128
- # `encode_prompt`๋Š” Kanana ๋ชจ๋ธ์˜ ํ† ํฌ๋‚˜์ด์ €์— ๋‚ด์žฅ๋œ ์ปค์Šคํ…€ ํ•จ์ˆ˜๋กœ ๊ฐ€์ •
129
  inputs = tokenizer.encode_prompt(prompt=full_prompt, image_meta=combined_metas)
130
-
131
- # ๊ฐ’์ด ํ…์„œ์ธ ๊ฒฝ์šฐ์—๋งŒ ๋ฐฐ์น˜ ์ฐจ์›์„ ์ถ”๊ฐ€ํ•˜๊ณ  ๋””๋ฐ”์ด์Šค๋กœ ๋ณด๋ƒ…๋‹ˆ๋‹ค.
132
- inputs = {
133
- k: (v.unsqueeze(0).to(model.device) if torch.is_tensor(v) else v)
134
- for k, v in inputs.items()
135
- }
136
  else:
137
  inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
138
-
139
- # 4. ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ ์ค€๋น„
140
  generation_args = {
141
- "max_new_tokens": 256, "temperature": 0.7, "do_sample": True,
142
- "pad_token_id": tokenizer.eos_token_id, "eos_token_id": tokenizer.eos_token_id
 
 
 
 
143
  }
144
-
145
- # 5. ๋ชจ๋ธ ์ถ”๋ก  (๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ / ํ…์ŠคํŠธ ์ „์šฉ ๋ถ„๊ธฐ)
146
  with torch.no_grad():
147
  if all_pixel_values:
148
- print(f"๐Ÿ–ผ๏ธ ์ด๋ฏธ์ง€ {len(all_pixel_values)}๊ฐœ ํฌํ•จ, ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋“œ๋กœ ์ƒ์„ฑ")
149
- # pixel_values์™€ image_metas๋ฅผ `generate` ํ•จ์ˆ˜์— ์ง์ ‘ ์ „๋‹ฌ
150
- outputs = model.generate(
151
- **inputs,
152
- pixel_values=all_pixel_values,
153
- image_metas=combined_metas,
154
- **generation_args
155
- )
156
  else:
157
- print("๐Ÿ“„ ํ…์ŠคํŠธ๋งŒ์œผ๋กœ ์ƒ์„ฑ")
158
  outputs = model.generate(**inputs, **generation_args)
159
-
160
- # 6. ๊ฒฐ๊ณผ ๋””์ฝ”๋”ฉ
161
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
162
- # ์‘๋‹ต์—์„œ ํ”„๋กฌํ”„ํŠธ ๋ถ€๋ถ„ ์ œ๊ฑฐ
163
- assistant_response = response.split("<|im_start|>assistant\n")[-1].strip()
164
-
165
- return assistant_response
166
-
167
  except Exception as e:
168
- print(f"โŒ ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
169
- traceback.print_exc()
170
- return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"
171
 
172
- # --- 4. Gradio UI ๋ฐ ์‹คํ–‰ ---
 
173
  with gr.Blocks(title="Lily LLM System", theme=gr.themes.Soft()) as demo:
174
  gr.Markdown("# ๐Ÿงฎ Lily LLM System")
175
  gr.Markdown("์ด๋ฏธ์ง€, PDF, ํ…์ŠคํŠธ๋ฅผ ์ดํ•ดํ•˜๊ณ  ๋‹ต๋ณ€ํ•˜๋Š” ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ AI ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค.")
176
 
177
- with gr.Tabs():
178
  with gr.Tab("๐Ÿ’ฌ ์ฑ„ํŒ…"):
179
  chat_prompt = "<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
180
- chatbot = gr.Chatbot(height=500, label="๋Œ€ํ™”์ฐฝ", elem_id="chatbot", type="messages")
181
-
182
- with gr.Row():
183
- file_input = gr.File(
184
- label="ํŒŒ์ผ ์—…๋กœ๋“œ (๋‹ค์ค‘ ์„ ํƒ ๊ฐ€๋Šฅ)",
185
- file_count="multiple", # ๋‹ค์ค‘ ํŒŒ์ผ ์—…๋กœ๋“œ ํ™œ์„ฑํ™”
186
- file_types=[".pdf", ".png", ".jpg", ".jpeg"]
187
- )
188
  with gr.Row():
189
- msg = gr.Textbox(
190
- label="๋ฉ”์‹œ์ง€ ์ž…๋ ฅ",
191
- placeholder="ํŒŒ์ผ์„ ์—…๋กœ๋“œํ•˜๊ณ  ์งˆ๋ฌธํ•˜๊ฑฐ๋‚˜, ํ…์ŠคํŠธ๋กœ๋งŒ ๋Œ€ํ™”ํ•  ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค.",
192
- lines=3,
193
- show_label=False,
194
- scale=7
195
- )
196
  send_btn = gr.Button("์ „์†ก", variant="primary", scale=1)
197
 
 
198
  def respond(message, chat_history, files):
199
  if not message.strip() and not files:
200
- # ์ž…๋ ฅ์ด ์—†์œผ๋ฉด ์•„๋ฌด ์ž‘์—…๋„ ํ•˜์ง€ ์•Š๊ณ  ํ˜„์žฌ ์ƒํƒœ๋ฅผ ๊ทธ๋Œ€๋กœ ๋ฐ˜ํ™˜
201
- return "", chat_history
202
 
203
  bot_message = generate_response(chat_prompt, message, files)
204
 
205
- # 'messages' ํƒ€์ž…์— ๋งž๋Š” ๋”•์…”๋„ˆ๋ฆฌ ํ˜•ํƒœ๋กœ ๋Œ€ํ™” ๊ธฐ๋ก ์ถ”๊ฐ€
206
  chat_history.append({"role": "user", "content": message})
207
  chat_history.append({"role": "assistant", "content": bot_message})
208
 
209
- return "", chat_history
210
-
211
- send_btn.click(respond, inputs=[msg, chatbot, file_input], outputs=[msg, chatbot])
212
- msg.submit(respond, inputs=[msg, chatbot, file_input], outputs=[msg, chatbot])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
  with gr.Tab("โš™๏ธ ์‹œ์Šคํ…œ ์ •๋ณด"):
215
  gr.Markdown(f"**์‹คํ–‰ ํ™˜๊ฒฝ**: `{'๋กœ์ปฌ' if IS_LOCAL else '์„œ๋ฒ„'}`")
216
  gr.Markdown(f"**๋ชจ๋ธ ๊ฒฝ๋กœ**: `{MODEL_PATH}`")
217
  gr.Markdown(f"**๋ชจ๋ธ ์ƒํƒœ**: `{'โœ… ๋กœ๋“œ๋จ' if MODEL_LOADED else 'โŒ ๋กœ๋“œ ์‹คํŒจ'}`")
218
 
219
- if __name__ == "__main__":
220
  if IS_LOCAL:
221
- print("\n๐Ÿš€ ๋กœ์ปฌ ์„œ๋ฒ„๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค. http://localhost:8006")
222
- demo.launch(server_name="localhost", server_port=8006, share=False)
223
  else:
224
  print("\n๐Ÿš€ ์„œ๋ฒ„๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค...")
225
  demo.launch()
 
1
+ # ํŒŒ์ผ: app.py (์ตœ์ข… ์ˆ˜์ •๋ณธ)
2
+
3
  import gradio as gr
4
  import os
5
  import traceback
6
  from transformers import AutoTokenizer, AutoModelForCausalLM, AutoImageProcessor
7
  import torch
8
+ import fitz
9
  from PIL import Image
10
  from typing import Optional, List
11
 
12
+ # --- 1 & 2. ์ „์—ญ ๋ณ€์ˆ˜, ํ™˜๊ฒฝ ์„ค์ •, ๋ชจ๋ธ ๋กœ๋”ฉ (๊ธฐ์กด ์ฝ”๋“œ์™€ ๋™์ผ) ---
13
+ # (์ด ๋ถ€๋ถ„์€ ์ˆ˜์ •ํ•  ํ•„์š” ์—†์ด ๊ทธ๋Œ€๋กœ ๋‘์‹œ๋ฉด ๋ฉ๋‹ˆ๋‹ค)
14
+ # ... (์ƒ๋žต) ...
15
+ # --- 1 & 2. ์ „์—ญ ๋ณ€์ˆ˜, ํ™˜๊ฒฝ ์„ค์ •, ๋ชจ๋ธ ๋กœ๋”ฉ (๊ธฐ์กด ์ฝ”๋“œ์™€ ๋™์ผ) ---
16
  tokenizer = None
17
  model = None
18
+ image_processor = None
19
  MODEL_LOADED = False
20
  DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
 
21
  IS_LOCAL = os.path.exists('.env') or os.path.exists('../.env') or os.getenv('IS_LOCAL') == 'true'
 
22
  try:
23
  from dotenv import load_dotenv
24
  if IS_LOCAL:
 
26
  print("โœ… .env ํŒŒ์ผ ๋กœ๋“œ๋จ")
27
  except ImportError:
28
  print("โš ๏ธ python-dotenv๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์Œ")
 
29
  HF_TOKEN = os.getenv("HF_TOKEN")
30
  MODEL_NAME_SERVER = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")
31
  MODEL_PATH_LOCAL = "../lily_llm_core/models/kanana_1_5_v_3b_instruct"
32
  MODEL_PATH = MODEL_PATH_LOCAL if IS_LOCAL else MODEL_NAME_SERVER
 
33
  print(f"============== ์‹œ์Šคํ…œ ํ™˜๊ฒฝ ์ •๋ณด ==============")
34
  print(f"๐Ÿ” ์‹คํ–‰ ํ™˜๊ฒฝ: {'๋กœ์ปฌ' if IS_LOCAL else '์„œ๋ฒ„'}")
35
  print(f"๐Ÿ” ๋ชจ๋ธ ๊ฒฝ๋กœ: {MODEL_PATH}")
36
  print(f"๐Ÿ” ์‚ฌ์šฉ ๋””๋ฐ”์ด์Šค: {DEVICE.upper()}")
37
  print("==========================================")
 
 
 
38
  try:
39
  print("๐Ÿ”ง ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘...")
40
  from modeling import KananaVForConditionalGeneration
 
41
  if IS_LOCAL:
42
  if not os.path.exists(MODEL_PATH):
43
  raise FileNotFoundError(f"๋กœ์ปฌ ๋ชจ๋ธ ๊ฒฝ๋กœ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {MODEL_PATH}")
 
44
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True, local_files_only=True)
45
  model = KananaVForConditionalGeneration.from_pretrained(
46
+ MODEL_PATH, torch_dtype=torch.bfloat16, trust_remote_code=True, local_files_only=True,
47
  ).to(DEVICE)
 
48
  image_processor = AutoImageProcessor.from_pretrained(MODEL_PATH, trust_remote_code=True, local_files_only=True)
49
  print("โœ… ๋กœ์ปฌ ๋ชจ๋ธ ๋ฐ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋”ฉ ์™„๋ฃŒ!")
50
+ else:
 
51
  if not HF_TOKEN:
52
  raise ValueError("์„œ๋ฒ„ ํ™˜๊ฒฝ์—์„œ๋Š” Hugging Face ํ† ํฐ(HF_TOKEN)์ด ๋ฐ˜๋“œ์‹œ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค.")
 
53
  tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, token=HF_TOKEN, trust_remote_code=True)
54
  model = KananaVForConditionalGeneration.from_pretrained(
55
+ MODEL_PATH, token=HF_TOKEN, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto"
56
+ )
 
57
  image_processor = AutoImageProcessor.from_pretrained(MODEL_PATH, token=HF_TOKEN, trust_remote_code=True)
58
  print("โœ… ์„œ๋ฒ„ ๋ชจ๋ธ ๋ฐ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ๋กœ๋”ฉ ์™„๋ฃŒ!")
 
59
  MODEL_LOADED = True
 
60
  except Exception as e:
61
  print(f"โŒ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
62
  traceback.print_exc()
63
  MODEL_LOADED = False
64
 
65
+ # --- 3. ์‘๋‹ต ์ƒ์„ฑ ๋กœ์ง (๊ธฐ์กด ์ฝ”๋“œ์™€ ๋™์ผ) ---
 
 
66
  def extract_text_from_pdf(pdf_file_path):
67
  try:
68
  doc = fitz.open(pdf_file_path)
 
74
  return f"PDF ํŒŒ์ผ์„ ์ฝ๋Š” ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"
75
 
76
  def generate_response(prompt_template: str, message: str, files: Optional[List] = None):
77
+ if not MODEL_LOADED: return "โŒ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."
 
 
78
  try:
79
+ all_pixel_values, all_image_metas, file_texts = [], [], []
 
 
 
 
80
  if files:
81
  for file in files:
82
+ file_path, file_extension = file.name, os.path.splitext(file.name)[1].lower()
83
+ if file_extension == '.pdf': file_texts.append(extract_text_from_pdf(file_path))
 
 
 
84
  elif file_extension in ['.png', '.jpg', '.jpeg']:
85
  pil_image = Image.open(file_path).convert('RGB')
86
  processed_data = image_processor(pil_image)
87
+ all_pixel_values.append(processed_data["pixel_values"])
88
+ all_image_metas.append(processed_data["image_meta"])
 
 
 
 
 
 
89
  image_tokens = "<image>" * len(all_pixel_values)
90
  pdf_content = "\n\n".join(file_texts)
91
  full_message = message + (f"\n{image_tokens}" if image_tokens else "") + (f"\n\n[์ฒจ๋ถ€๋œ PDF ๋‚ด์šฉ]:\n{pdf_content}" if pdf_content else "")
92
  full_prompt = prompt_template.format(message=full_message)
 
 
93
  if all_image_metas:
94
+ combined_metas = {key: [meta[key] for meta in all_image_metas] for key in all_image_metas[0]}
 
 
 
 
 
95
  inputs = tokenizer.encode_prompt(prompt=full_prompt, image_meta=combined_metas)
96
+ inputs = {k: (v.unsqueeze(0).to(model.device) if torch.is_tensor(v) else v) for k, v in inputs.items()}
 
 
 
 
 
97
  else:
98
  inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
 
 
99
  generation_args = {
100
+ "max_new_tokens": 32,
101
+ "temperature": 0.8,
102
+ "do_sample": True,
103
+ "pad_token_id": tokenizer.eos_token_id,
104
+ "eos_token_id": tokenizer.eos_token_id,
105
+ "top_p": 0.95,
106
  }
 
 
107
  with torch.no_grad():
108
  if all_pixel_values:
109
+ outputs = model.generate(**inputs, pixel_values=all_pixel_values, image_metas=combined_metas, **generation_args)
 
 
 
 
 
 
 
110
  else:
 
111
  outputs = model.generate(**inputs, **generation_args)
 
 
112
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
113
+ return response.split("<|im_start|>assistant\n")[-1].strip()
 
 
 
 
114
  except Exception as e:
115
+ print(f"โŒ ์‘๋‹ต ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}"); traceback.print_exc(); return f"์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {e}"
 
 
116
 
117
+
118
+ # --- 4. Gradio UI ๋ฐ ์‹คํ–‰ (์ตœ์ข… ์ˆ˜์ •) ---
119
  with gr.Blocks(title="Lily LLM System", theme=gr.themes.Soft()) as demo:
120
  gr.Markdown("# ๐Ÿงฎ Lily LLM System")
121
  gr.Markdown("์ด๋ฏธ์ง€, PDF, ํ…์ŠคํŠธ๋ฅผ ์ดํ•ดํ•˜๊ณ  ๋‹ต๋ณ€ํ•˜๋Š” ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ AI ์‹œ์Šคํ…œ์ž…๋‹ˆ๋‹ค.")
122
 
123
+ with gr.Tabs():
124
  with gr.Tab("๐Ÿ’ฌ ์ฑ„ํŒ…"):
125
  chat_prompt = "<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
126
+ chatbot = gr.Chatbot(height=320, label="๋Œ€ํ™”์ฐฝ", elem_id="chatbot", type="messages")
127
+
 
 
 
 
 
 
128
  with gr.Row():
129
+ msg = gr.Textbox(label="๋ฉ”์‹œ์ง€ ์ž…๋ ฅ", placeholder="๋ฉ”์‹œ์ง€๋ฅผ ์ž…๋ ฅํ•˜์„ธ์š”", lines=3, show_label=False, scale=4)
130
+ file_input = gr.File(label="ํŒŒ์ผ ์—…๋กœ๋“œ", file_count="multiple", file_types=[".pdf", ".png", ".jpg", ".jpeg"], scale=1)
 
 
 
 
 
131
  send_btn = gr.Button("์ „์†ก", variant="primary", scale=1)
132
 
133
+ # โœ… 1. respond ํ•จ์ˆ˜๊ฐ€ 'files'๋ฅผ ์„ธ ๋ฒˆ์งธ ์ธ์ž๋กœ ๋ฐ›๋„๋ก ์ˆ˜์ •
134
  def respond(message, chat_history, files):
135
  if not message.strip() and not files:
136
+ return "", chat_history, None # files ์ถœ๋ ฅ๋„ ๋น„์›Œ์คŒ
 
137
 
138
  bot_message = generate_response(chat_prompt, message, files)
139
 
 
140
  chat_history.append({"role": "user", "content": message})
141
  chat_history.append({"role": "assistant", "content": bot_message})
142
 
143
+ # โœ… 2. ์ถœ๋ ฅ์˜ ๊ฐœ์ˆ˜๋ฅผ inputs์™€ ๋งž์ถ”๊ธฐ ์œ„ํ•ด file_input๋„ ๋ฐ˜ํ™˜๊ฐ’์— ์ถ”๊ฐ€
144
+ return "", chat_history, None
145
+
146
+ # โœ… 3. click๊ณผ submit์˜ inputs ๋ฆฌ์ŠคํŠธ์— 'file_input' ์ถ”๊ฐ€
147
+ send_btn.click(
148
+ respond,
149
+ inputs=[msg, chatbot, file_input],
150
+ outputs=[msg, chatbot, file_input], # ์ถœ๋ ฅ์—๋„ file_input ์ถ”๊ฐ€
151
+ api_name="chat", # api_name์€ ์Šฌ๋ž˜์‹œ ์—†์ด ์‚ฌ์šฉ
152
+ # queue=False
153
+ )
154
+ msg.submit(
155
+ respond,
156
+ inputs=[msg, chatbot, file_input],
157
+ outputs=[msg, chatbot, file_input], # ์ถœ๋ ฅ์—๋„ file_input ์ถ”๊ฐ€
158
+ api_name="chat",
159
+ # queue=False
160
+ )
161
 
162
  with gr.Tab("โš™๏ธ ์‹œ์Šคํ…œ ์ •๋ณด"):
163
  gr.Markdown(f"**์‹คํ–‰ ํ™˜๊ฒฝ**: `{'๋กœ์ปฌ' if IS_LOCAL else '์„œ๋ฒ„'}`")
164
  gr.Markdown(f"**๋ชจ๋ธ ๊ฒฝ๋กœ**: `{MODEL_PATH}`")
165
  gr.Markdown(f"**๋ชจ๋ธ ์ƒํƒœ**: `{'โœ… ๋กœ๋“œ๋จ' if MODEL_LOADED else 'โŒ ๋กœ๋“œ ์‹คํŒจ'}`")
166
 
167
+ if __name__ == "__main__":
168
  if IS_LOCAL:
169
+ print("\n๐Ÿš€ ๋กœ์ปฌ ์„œ๋ฒ„๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค. http://127.0.0.1:8006")
170
+ demo.launch(server_name="127.0.0.1", server_port=8006, share=False)
171
  else:
172
  print("\n๐Ÿš€ ์„œ๋ฒ„๋ฅผ ์‹œ์ž‘ํ•ฉ๋‹ˆ๋‹ค...")
173
  demo.launch()
test_input.py DELETED
@@ -1,100 +0,0 @@
1
- import os
2
- from gradio_client import Client, file
3
-
4
- # --- ์„ค์ • ---
5
- # ๋กœ์ปฌ Gradio ์„œ๋ฒ„ ์ฃผ์†Œ (app.py ์‹คํ–‰ ์‹œ ํ„ฐ๋ฏธ๋„์— ํ‘œ์‹œ๋˜๋Š” ์ฃผ์†Œ)
6
- SERVER_URL = "http://localhost:8006/"
7
-
8
- def run_chat_test(client):
9
- """์ผ๋ฐ˜ ์ฑ„ํŒ… ํƒญ์˜ ๊ธฐ๋Šฅ์„ ํ…Œ์ŠคํŠธํ•ฉ๋‹ˆ๋‹ค."""
10
- print("\n--- ๐Ÿ’ฌ ์ผ๋ฐ˜ ์ฑ„ํŒ… ํ…Œ์ŠคํŠธ ์‹œ์ž‘ ---")
11
-
12
- test_message = "์•ˆ๋…•ํ•˜์„ธ์š”! ์˜ค๋Š˜ ๋‚ ์”จ๋Š” ์–ด๋–ค๊ฐ€์š”?"
13
- chat_history = [] # ์ดˆ๊ธฐ ๋Œ€ํ™” ๋‚ด์—ญ์€ ๋น„์–ด์žˆ์Œ
14
-
15
- print(f"๋ณด๋‚ด๋Š” ๋ฉ”์‹œ์ง€: '{test_message}'")
16
-
17
- # `respond` ํ•จ์ˆ˜ ํ˜ธ์ถœ (API ์—”๋“œํฌ์ธํŠธ ์ธ๋ฑ์Šค: 0)
18
- # ์ž…๋ ฅ: (๋ฉ”์‹œ์ง€, ์ฑ„ํŒ… ๋‚ด์—ญ, ํŒŒ์ผ)
19
- # ์ถœ๋ ฅ: (๋น„์›Œ์ง„ ํ…์ŠคํŠธ ๋ฐ•์Šค, ๊ฐฑ์‹ ๋œ ์ฑ„ํŒ… ๋‚ด์—ญ)
20
- result = client.predict(
21
- test_message,
22
- chat_history,
23
- None, # ํŒŒ์ผ ์—†์Œ
24
- fn_index=0
25
- )
26
-
27
- # ๊ฐฑ์‹ ๋œ ์ฑ„ํŒ… ๋‚ด์—ญ์—์„œ ๋งˆ์ง€๋ง‰ ์‘๋‹ต(๋ด‡ ๋ฉ”์‹œ์ง€)์„ ์ถ”์ถœ
28
- updated_history = result[1]
29
- bot_response = updated_history[-1]['content']
30
-
31
- print("โœ… ํ…Œ์ŠคํŠธ ์„ฑ๊ณต!")
32
- print(f"๐Ÿค– ๋ฐ›์€ ์‘๋‹ต: '{bot_response}'")
33
-
34
- def run_math_test(client):
35
- """์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ ํƒญ์˜ ๊ธฐ๋Šฅ์„ ํ…Œ์ŠคํŠธํ•ฉ๋‹ˆ๋‹ค."""
36
- print("\n--- ๐Ÿงฎ ์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ ํ…Œ์ŠคํŠธ ์‹œ์ž‘ ---")
37
-
38
- test_problem = "๋‘ ๊ฐœ์˜ ์—ฐ์†๋œ ์ง์ˆ˜์˜ ํ•ฉ์ด 34์ผ ๋•Œ, ๋‘ ์ง์ˆ˜๋Š” ๋ฌด์—‡์ธ๊ฐ€์š”?"
39
-
40
- print(f"๋ณด๋‚ด๋Š” ๋ฌธ์ œ: '{test_problem}'")
41
-
42
- # ์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ ํ•จ์ˆ˜ ํ˜ธ์ถœ (API ์—”๋“œํฌ์ธํŠธ ์ธ๋ฑ์Šค: 1)
43
- # ์ž…๋ ฅ: (์ˆ˜ํ•™ ๋ฌธ์ œ, ํŒŒ์ผ)
44
- # ์ถœ๋ ฅ: (๊ฒฐ๊ณผ ํ…์ŠคํŠธ)
45
- result = client.predict(
46
- test_problem,
47
- None, # ํŒŒ์ผ ์—†์Œ
48
- fn_index=1
49
- )
50
-
51
- print("โœ… ํ…Œ์ŠคํŠธ ์„ฑ๊ณต!")
52
- print(f"๐Ÿค– ๋ฐ›์€ ์‘๋‹ต (์ผ๋ถ€): '{result[:200]}...'")
53
-
54
- def run_file_test(client):
55
- """ํŒŒ์ผ ์—…๋กœ๋“œ ๊ธฐ๋Šฅ์„ ํ…Œ์ŠคํŠธํ•ฉ๋‹ˆ๋‹ค."""
56
- print("\n--- ๐Ÿ“ ํŒŒ์ผ ์—…๋กœ๋“œ ์ฑ„ํŒ… ํ…Œ์ŠคํŠธ ์‹œ์ž‘ ---")
57
-
58
- # ํ…Œ์ŠคํŠธ์šฉ ์ž„์‹œ ํ…์ŠคํŠธ ํŒŒ์ผ ์ƒ์„ฑ
59
- temp_file_path = "test_document.txt"
60
- with open(temp_file_path, "w", encoding="utf-8") as f:
61
- f.write("์ด ํŒŒ์ผ์€ ํ…Œ์ŠคํŠธ๋ฅผ ์œ„ํ•ด ์ƒ์„ฑ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.\n")
62
- f.write("ํŒŒ์ผ์˜ ํ•ต์‹ฌ ๋‚ด์šฉ์€ '๋Œ€ํ•œ๋ฏผ๊ตญ์˜ ์ˆ˜๋„๋Š” ์„œ์šธ์ด๋‹ค' ์ž…๋‹ˆ๋‹ค.")
63
-
64
- print(f"์—…๋กœ๋“œํ•  ํŒŒ์ผ: '{temp_file_path}'")
65
- test_message = "์—…๋กœ๋“œํ•œ ํŒŒ์ผ์˜ ํ•ต์‹ฌ ๋‚ด์šฉ์ด ๋ญ์•ผ?"
66
- print(f"๋ณด๋‚ด๋Š” ๋ฉ”์‹œ์ง€: '{test_message}'")
67
-
68
- # `file()` ํ•จ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํŒŒ์ผ์„ ์„œ๋ฒ„์— ์—…๋กœ๋“œ ๊ฐ€๋Šฅํ•œ ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜
69
- result = client.predict(
70
- test_message,
71
- [], # ์ฑ„ํŒ… ๋‚ด์—ญ ์—†์Œ
72
- file(temp_file_path),
73
- fn_index=0
74
- )
75
-
76
- # ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
77
- os.remove(temp_file_path)
78
-
79
- bot_response = result[1][-1]['content']
80
- print("โœ… ํ…Œ์ŠคํŠธ ์„ฑ๊ณต!")
81
- print(f"๐Ÿค– ๋ฐ›์€ ์‘๋‹ต: '{bot_response}'")
82
-
83
-
84
- if __name__ == "__main__":
85
- print(f"Gradio ์„œ๋ฒ„({SERVER_URL})์— ์—ฐ๊ฒฐ์„ ์‹œ๋„ํ•ฉ๋‹ˆ๋‹ค...")
86
-
87
- try:
88
- # ์„œ๋ฒ„์— ํด๋ผ์ด์–ธํŠธ๋กœ ์—ฐ๊ฒฐ
89
- client = Client(SERVER_URL, verbose=False)
90
- print("โœ… ์„œ๋ฒ„ ์—ฐ๊ฒฐ ์„ฑ๊ณต!")
91
-
92
- # ํ…Œ์ŠคํŠธ ์‹คํ–‰
93
- run_chat_test(client)
94
- run_math_test(client)
95
- # run_file_test(client) # ํŒŒ์ผ ํ…Œ์ŠคํŠธ๋Š” ํ•„์š”์‹œ ์ฃผ์„ ํ•ด์ œํ•˜์—ฌ ์‚ฌ์šฉ
96
-
97
- except Exception as e:
98
- print(f"\nโŒ ํ…Œ์ŠคํŠธ ์‹คํŒจ: ์„œ๋ฒ„์— ์—ฐ๊ฒฐํ•  ์ˆ˜ ์—†๊ฑฐ๋‚˜ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
99
- print("๋จผ์ € ๋‹ค๋ฅธ ํ„ฐ๋ฏธ๋„์—์„œ 'python app.py'๋ฅผ ์‹คํ–‰ํ–ˆ๋Š”์ง€ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
100
- print(f"์˜ค๋ฅ˜ ์ƒ์„ธ ์ •๋ณด: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_text.py DELETED
@@ -1,100 +0,0 @@
1
- import os
2
- from gradio_client import Client, file
3
-
4
- # --- ์„ค์ • ---
5
- # ๋กœ์ปฌ Gradio ์„œ๋ฒ„ ์ฃผ์†Œ (app.py ์‹คํ–‰ ์‹œ ํ„ฐ๋ฏธ๋„์— ํ‘œ์‹œ๋˜๋Š” ์ฃผ์†Œ)
6
- SERVER_URL = "http://localhost:8006/"
7
-
8
- def run_chat_test(client):
9
- """์ผ๋ฐ˜ ์ฑ„ํŒ… ํƒญ์˜ ๊ธฐ๋Šฅ์„ ํ…Œ์ŠคํŠธํ•ฉ๋‹ˆ๋‹ค."""
10
- print("\n--- ๐Ÿ’ฌ ์ผ๋ฐ˜ ์ฑ„ํŒ… ํ…Œ์ŠคํŠธ ์‹œ์ž‘ ---")
11
-
12
- test_message = "์•ˆ๋…•ํ•˜์„ธ์š”! ์˜ค๋Š˜ ๋‚ ์”จ๋Š” ์–ด๋–ค๊ฐ€์š”?"
13
- chat_history = [] # ์ดˆ๊ธฐ ๋Œ€ํ™” ๋‚ด์—ญ์€ ๋น„์–ด์žˆ์Œ
14
-
15
- print(f"๋ณด๋‚ด๋Š” ๋ฉ”์‹œ์ง€: '{test_message}'")
16
-
17
- # `respond` ํ•จ์ˆ˜ ํ˜ธ์ถœ (API ์—”๋“œํฌ์ธํŠธ ์ธ๋ฑ์Šค: 0)
18
- # ์ž…๋ ฅ: (๋ฉ”์‹œ์ง€, ์ฑ„ํŒ… ๋‚ด์—ญ, ํŒŒ์ผ)
19
- # ์ถœ๋ ฅ: (๋น„์›Œ์ง„ ํ…์ŠคํŠธ ๋ฐ•์Šค, ๊ฐฑ์‹ ๋œ ์ฑ„ํŒ… ๋‚ด์—ญ)
20
- result = client.predict(
21
- test_message,
22
- chat_history,
23
- None, # ํŒŒ์ผ ์—†์Œ
24
- fn_index=0
25
- )
26
-
27
- # ๊ฐฑ์‹ ๋œ ์ฑ„ํŒ… ๋‚ด์—ญ์—์„œ ๋งˆ์ง€๋ง‰ ์‘๋‹ต(๋ด‡ ๋ฉ”์‹œ์ง€)์„ ์ถ”์ถœ
28
- updated_history = result[1]
29
- bot_response = updated_history[-1]['content']
30
-
31
- print("โœ… ํ…Œ์ŠคํŠธ ์„ฑ๊ณต!")
32
- print(f"๐Ÿค– ๋ฐ›์€ ์‘๋‹ต: '{bot_response}'")
33
-
34
- def run_math_test(client):
35
- """์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ ํƒญ์˜ ๊ธฐ๋Šฅ์„ ํ…Œ์ŠคํŠธํ•ฉ๋‹ˆ๋‹ค."""
36
- print("\n--- ๐Ÿงฎ ์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ ํ…Œ์ŠคํŠธ ์‹œ์ž‘ ---")
37
-
38
- test_problem = "๋‘ ๊ฐœ์˜ ์—ฐ์†๋œ ์ง์ˆ˜์˜ ํ•ฉ์ด 34์ผ ๋•Œ, ๋‘ ์ง์ˆ˜๋Š” ๋ฌด์—‡์ธ๊ฐ€์š”?"
39
-
40
- print(f"๋ณด๋‚ด๋Š” ๋ฌธ์ œ: '{test_problem}'")
41
-
42
- # ์ˆ˜ํ•™ ๋ฌธ์ œ ํ•ด๊ฒฐ ํ•จ์ˆ˜ ํ˜ธ์ถœ (API ์—”๋“œํฌ์ธํŠธ ์ธ๋ฑ์Šค: 1)
43
- # ์ž…๋ ฅ: (์ˆ˜ํ•™ ๋ฌธ์ œ, ํŒŒ์ผ)
44
- # ์ถœ๋ ฅ: (๊ฒฐ๊ณผ ํ…์ŠคํŠธ)
45
- result = client.predict(
46
- test_problem,
47
- None, # ํŒŒ์ผ ์—†์Œ
48
- fn_index=1
49
- )
50
-
51
- print("โœ… ํ…Œ์ŠคํŠธ ์„ฑ๊ณต!")
52
- print(f"๐Ÿค– ๋ฐ›์€ ์‘๋‹ต (์ผ๋ถ€): '{result[:200]}...'")
53
-
54
- def run_file_test(client):
55
- """ํŒŒ์ผ ์—…๋กœ๋“œ ๊ธฐ๋Šฅ์„ ํ…Œ์ŠคํŠธํ•ฉ๋‹ˆ๋‹ค."""
56
- print("\n--- ๐Ÿ“ ํŒŒ์ผ ์—…๋กœ๋“œ ์ฑ„ํŒ… ํ…Œ์ŠคํŠธ ์‹œ์ž‘ ---")
57
-
58
- # ํ…Œ์ŠคํŠธ์šฉ ์ž„์‹œ ํ…์ŠคํŠธ ํŒŒ์ผ ์ƒ์„ฑ
59
- temp_file_path = "test_document.txt"
60
- with open(temp_file_path, "w", encoding="utf-8") as f:
61
- f.write("์ด ํŒŒ์ผ์€ ํ…Œ์ŠคํŠธ๋ฅผ ์œ„ํ•ด ์ƒ์„ฑ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.\n")
62
- f.write("ํŒŒ์ผ์˜ ํ•ต์‹ฌ ๋‚ด์šฉ์€ '๋Œ€ํ•œ๋ฏผ๊ตญ์˜ ์ˆ˜๋„๋Š” ์„œ์šธ์ด๋‹ค' ์ž…๋‹ˆ๋‹ค.")
63
-
64
- print(f"์—…๋กœ๋“œํ•  ํŒŒ์ผ: '{temp_file_path}'")
65
- test_message = "์—…๋กœ๋“œํ•œ ํŒŒ์ผ์˜ ํ•ต์‹ฌ ๋‚ด์šฉ์ด ๋ญ์•ผ?"
66
- print(f"๋ณด๋‚ด๋Š” ๋ฉ”์‹œ์ง€: '{test_message}'")
67
-
68
- # `file()` ํ•จ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜์—ฌ ํŒŒ์ผ์„ ์„œ๋ฒ„์— ์—…๋กœ๋“œ ๊ฐ€๋Šฅํ•œ ํ˜•ํƒœ๋กœ ๋ณ€ํ™˜
69
- result = client.predict(
70
- test_message,
71
- [], # ์ฑ„ํŒ… ๋‚ด์—ญ ์—†์Œ
72
- file(temp_file_path),
73
- fn_index=0
74
- )
75
-
76
- # ์ž„์‹œ ํŒŒ์ผ ์‚ญ์ œ
77
- os.remove(temp_file_path)
78
-
79
- bot_response = result[1][-1]['content']
80
- print("โœ… ํ…Œ์ŠคํŠธ ์„ฑ๊ณต!")
81
- print(f"๐Ÿค– ๋ฐ›์€ ์‘๋‹ต: '{bot_response}'")
82
-
83
-
84
- if __name__ == "__main__":
85
- print(f"Gradio ์„œ๋ฒ„({SERVER_URL})์— ์—ฐ๊ฒฐ์„ ์‹œ๋„ํ•ฉ๋‹ˆ๋‹ค...")
86
-
87
- try:
88
- # ์„œ๋ฒ„์— ํด๋ผ์ด์–ธํŠธ๋กœ ์—ฐ๊ฒฐ
89
- client = Client(SERVER_URL, verbose=False)
90
- print("โœ… ์„œ๋ฒ„ ์—ฐ๊ฒฐ ์„ฑ๊ณต!")
91
-
92
- # ํ…Œ์ŠคํŠธ ์‹คํ–‰
93
- run_chat_test(client)
94
- run_math_test(client)
95
- # run_file_test(client) # ํŒŒ์ผ ํ…Œ์ŠคํŠธ๋Š” ํ•„์š”์‹œ ์ฃผ์„ ํ•ด์ œํ•˜์—ฌ ์‚ฌ์šฉ
96
-
97
- except Exception as e:
98
- print(f"\nโŒ ํ…Œ์ŠคํŠธ ์‹คํŒจ: ์„œ๋ฒ„์— ์—ฐ๊ฒฐํ•  ์ˆ˜ ์—†๊ฑฐ๋‚˜ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
99
- print("๋จผ์ € ๋‹ค๋ฅธ ํ„ฐ๋ฏธ๋„์—์„œ 'python app.py'๋ฅผ ์‹คํ–‰ํ–ˆ๋Š”์ง€ ํ™•์ธํ•ด์ฃผ์„ธ์š”.")
100
- print(f"์˜ค๋ฅ˜ ์ƒ์„ธ ์ •๋ณด: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
test_tokenizer.py DELETED
@@ -1,159 +0,0 @@
1
- import os
2
- import traceback
3
- from typing import Optional
4
- from transformers import AutoTokenizer
5
- import torch
6
-
7
- # ํ™˜๊ฒฝ ๋ณ€์ˆ˜ ๋กœ๋“œ
8
- try:
9
- from dotenv import load_dotenv
10
- load_dotenv()
11
- print("โœ… .env ํŒŒ์ผ ๋กœ๋“œ๋จ")
12
- except ImportError:
13
- print("โš ๏ธ python-dotenv๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์Œ")
14
-
15
- HF_TOKEN = os.getenv("HF_TOKEN")
16
-
17
- # ํ™˜๊ฒฝ ๊ฐ์ง€
18
- IS_LOCAL = os.path.exists('../.env') or 'LOCAL_TEST' in os.environ
19
- print(f"๐Ÿ” ํ™˜๊ฒฝ: {'๋กœ์ปฌ' if IS_LOCAL else '์„œ๋ฒ„'}")
20
-
21
- # ํ™˜๊ฒฝ์— ๋”ฐ๋ฅธ ๋ชจ๋ธ ๊ฒฝ๋กœ ์„ค์ •
22
- if IS_LOCAL:
23
- # ๋กœ์ปฌ ๋ชจ๋ธ ๊ฒฝ๋กœ (hearth_llm_model ํด๋” ์‚ฌ์šฉ)
24
- MODEL_PATH = "../lily_llm_core/models/kanana_1_5_v_3b_instruct"
25
- print(f"๐Ÿ” ๋กœ์ปฌ ๋ชจ๋ธ ๊ฒฝ๋กœ: {MODEL_PATH}")
26
- print(f"๐Ÿ” ๊ฒฝ๋กœ ์กด์žฌ: {os.path.exists(MODEL_PATH)}")
27
- else:
28
- # ์„œ๋ฒ„์—์„œ๋Š” Hugging Face ๋ชจ๋ธ ์‚ฌ์šฉ
29
- MODEL_PATH = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")
30
- print(f"๐Ÿ” ์„œ๋ฒ„ ๋ชจ๋ธ: {MODEL_PATH}")
31
-
32
- print(f"๐Ÿ” ํ† ํฐ: {'โœ… ์„ค์ •๋จ' if HF_TOKEN else 'โŒ ์„ค์ •๋˜์ง€ ์•Š์Œ'}")
33
-
34
- # ํ† ํฌ๋‚˜์ด์ € ํ…Œ์ŠคํŠธ
35
- print("\n๐Ÿ”ง ํ† ํฌ๋‚˜์ด์ € ํ…Œ์ŠคํŠธ ์‹œ์ž‘...")
36
-
37
- try:
38
- print("๐Ÿ“ค ํ† ํฌ๋‚˜์ด์ € ๋กœ๋”ฉ ์ค‘...")
39
- print(f" MODEL_PATH: {MODEL_PATH}")
40
- print(f" IS_LOCAL: {IS_LOCAL}")
41
- print(f" trust_remote_code: True")
42
- print(f" use_fast: False")
43
-
44
- if IS_LOCAL:
45
- tokenizer = AutoTokenizer.from_pretrained(
46
- MODEL_PATH,
47
- trust_remote_code=True,
48
- )
49
- else:
50
- tokenizer = AutoTokenizer.from_pretrained(
51
- MODEL_PATH,
52
- token=HF_TOKEN,
53
- trust_remote_code=True,
54
- )
55
-
56
- print(f"โœ… ํ† ํฌ๋‚˜์ด์ € ๋กœ๋”ฉ ์™„๋ฃŒ")
57
- print(f" ํƒ€์ž…: {type(tokenizer)}")
58
- print(f" ๊ฐ’: {tokenizer}")
59
- print(f" hasattr('encode'): {hasattr(tokenizer, 'encode')}")
60
- print(f" hasattr('__call__'): {hasattr(tokenizer, '__call__')}")
61
-
62
- # ํ† ํฌ๋‚˜์ด์ € ํ…Œ์ŠคํŠธ
63
- test_input = "์•ˆ๋…•ํ•˜์„ธ์š”"
64
- print(f"\n๐Ÿ”ค ํ† ํฌ๋‚˜์ด์ € ํ…Œ์ŠคํŠธ: '{test_input}'")
65
-
66
- test_tokens = tokenizer(test_input, return_tensors="pt")
67
- print(f" โœ… ํ† ํฌ๋‚˜์ด์ € ํ˜ธ์ถœ ์„ฑ๊ณต")
68
- print(f" input_ids shape: {test_tokens['input_ids'].shape}")
69
- print(f" attention_mask shape: {test_tokens['attention_mask'].shape}")
70
-
71
- # ๋””์ฝ”๋”ฉ ํ…Œ์ŠคํŠธ
72
- decoded = tokenizer.decode(test_tokens['input_ids'][0], skip_special_tokens=True)
73
- print(f" ๋””์ฝ”๋”ฉ ๊ฒฐ๊ณผ: '{decoded}'")
74
-
75
- except Exception as e:
76
- print(f"โŒ ํ† ํฌ๋‚˜์ด์ € ํ…Œ์ŠคํŠธ ์‹คํŒจ: {e}")
77
- print(f" ์˜ค๋ฅ˜ ํƒ€์ž…: {type(e).__name__}")
78
- traceback.print_exc()
79
-
80
- # ๋ชจ๋ธ ํ…Œ์ŠคํŠธ
81
- print("\n๐Ÿ”ง ๋ชจ๋ธ ํ…Œ์ŠคํŠธ ์‹œ์ž‘...")
82
-
83
- try:
84
- print("๐Ÿ“ค ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
85
- from modeling import KananaVForConditionalGeneration
86
-
87
- if IS_LOCAL:
88
- model = KananaVForConditionalGeneration.from_pretrained(
89
- MODEL_PATH,
90
- torch_dtype=torch.float16,
91
- trust_remote_code=True,
92
- device_map=None,
93
- low_cpu_mem_usage=True
94
- )
95
- else:
96
- model = KananaVForConditionalGeneration.from_pretrained(
97
- MODEL_PATH,
98
- token=HF_TOKEN,
99
- torch_dtype=torch.float16,
100
- trust_remote_code=True,
101
- device_map=None,
102
- low_cpu_mem_usage=True
103
- )
104
-
105
- print(f"โœ… ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ")
106
- # print(f" ํƒ€์ž…: {type(model)}")
107
- # print(f" ๋””๋ฐ”์ด์Šค: {next(model.parameters()).device}")
108
-
109
- # ๋ชจ๋ธ ํ…Œ์ŠคํŠธ
110
- test_input = "์•ˆ๋…•ํ•˜์„ธ์š”"
111
- formatted_prompt = f"<|im_start|>user\n{test_input}<|im_end|>\n<|im_start|>assistant\n"
112
- max_length: Optional[int] = None
113
-
114
- inputs = tokenizer(
115
- formatted_prompt,
116
- return_tensors="pt",
117
- padding=True,
118
- truncation=True,
119
- max_length=512
120
- )
121
-
122
- print(f"\n๐Ÿค– ๋ชจ๋ธ ์ถ”๋ก  ํ…Œ์ŠคํŠธ: '{test_input}'")
123
-
124
- # Kanana์šฉ ์ƒ์„ฑ ์„ค์ •
125
- max_new_tokens = max_length or 100
126
-
127
- with torch.no_grad():
128
- outputs = model.generate(
129
- input_ids=inputs["input_ids"],
130
- attention_mask=inputs["attention_mask"],
131
- max_new_tokens=max_new_tokens,
132
- repetition_penalty=1.1,
133
- no_repeat_ngram_size=2,
134
- pad_token_id=tokenizer.eos_token_id,
135
- eos_token_id=tokenizer.eos_token_id,
136
- use_cache=True
137
- )
138
-
139
- print(f" โœ… ๋ชจ๋ธ ํ˜ธ์ถœ ์„ฑ๊ณต")
140
- print(f" outputs ํƒ€์ž…: {type(outputs)}")
141
- print(f" outputs shape: {outputs.shape}")
142
-
143
- # ๋””์ฝ”๋”ฉ ํ…Œ์ŠคํŠธ
144
- # model.generate()์˜ ์ถœ๋ ฅ์€ ์ „์ฒด ์‹œํ€€์Šค์ด๋ฏ€๋กœ ๋ฐ”๋กœ ๋””์ฝ”๋”ฉํ•ฉ๋‹ˆ๋‹ค.
145
- # outputs[0]์€ ๋ฐฐ์น˜ ์ค‘ ์ฒซ ๋ฒˆ์งธ ๊ฒฐ๊ณผ๋ฅผ ์˜๋ฏธํ•ฉ๋‹ˆ๋‹ค.
146
- response = tokenizer.decode(outputs[0], skip_special_tokens=True)
147
-
148
- # ์ž…๋ ฅ ํ”„๋กฌํ”„ํŠธ๋ฅผ ์‘๋‹ต์—์„œ ์ œ๊ฑฐ (์„ ํƒ์‚ฌํ•ญ)
149
- assistant_response = response.split("<|im_start|>assistant\n")[-1]
150
-
151
- print(f" ์ƒ์„ฑ๋œ ์ „์ฒด ํ…์ŠคํŠธ: '{response}'")
152
- print(f" ์–ด์‹œ์Šคํ„ดํŠธ ์‘๋‹ต: '{assistant_response.strip()}'")
153
-
154
- except Exception as e:
155
- print(f"โŒ ๋ชจ๋ธ ํ…Œ์ŠคํŠธ ์‹คํŒจ: {e}")
156
- print(f" ์˜ค๋ฅ˜ ํƒ€์ž…: {type(e).__name__}")
157
- traceback.print_exc()
158
-
159
- print("\nโœ… ํ…Œ์ŠคํŠธ ์™„๋ฃŒ!")