gbrabbit commited on
Commit
149ff1e
ยท
1 Parent(s): f143d67

Auto commit at 22-2025-08 16:26:25

Browse files
fix_kanana_target_modules.py ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kanana ๋ชจ๋ธ์˜ ์ •ํ™•ํ•œ target modules ํŒจํ„ด ์ฐพ๊ธฐ
4
+ """
5
+ import sys
6
+ import os
7
+ from pathlib import Path
8
+
9
+ # ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ ๊ฒฝ๋กœ ์ถ”๊ฐ€
10
+ project_root = Path(__file__).parent
11
+ sys.path.insert(0, str(project_root))
12
+
13
+ def find_exact_target_modules():
14
+ """์ •ํ™•ํ•œ target modules ํŒจํ„ด ์ฐพ๊ธฐ"""
15
+ print("๐Ÿ” Kanana ๋ชจ๋ธ์˜ ์ •ํ™•ํ•œ target modules ํŒจํ„ด ์ฐพ๊ธฐ...")
16
+
17
+ try:
18
+ import torch
19
+ from transformers import AutoModelForVision2Seq
20
+
21
+ model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
22
+
23
+ print(f"๐Ÿ“ฅ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘: {model_path}")
24
+
25
+ # ๋ชจ๋ธ ๋กœ๋“œ
26
+ model = AutoModelForVision2Seq.from_pretrained(
27
+ model_path,
28
+ trust_remote_code=True,
29
+ local_files_only=True,
30
+ torch_dtype=torch.bfloat16
31
+ )
32
+
33
+ print(f"โœ… ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต!")
34
+
35
+ # language_model ๋ถ€๋ถ„์˜ ์ •ํ™•ํ•œ ๋ชจ๋“ˆ ์ด๋ฆ„ ์ฐพ๊ธฐ
36
+ print("\n๐ŸŽฏ Language Model ๋ชจ๋“ˆ ๊ฒ€์ƒ‰:")
37
+
38
+ target_candidates = []
39
+
40
+ for name, module in model.named_modules():
41
+ # language_model ๋ถ€๋ถ„๋งŒ ํ•„ํ„ฐ๋ง
42
+ if name.startswith("language_model.model.layers."):
43
+ if hasattr(module, 'weight') and module.weight is not None:
44
+ module_type = type(module).__name__
45
+
46
+ # LoRA์— ์ ํ•ฉํ•œ ๋ชจ๋“ˆ๋“ค ์ฐพ๊ธฐ
47
+ if any(pattern in name for pattern in ['q_proj', 'k_proj', 'v_proj', 'o_proj']):
48
+ target_candidates.append((name, module_type, "Attention"))
49
+ elif any(pattern in name for pattern in ['gate_proj', 'up_proj', 'down_proj']):
50
+ target_candidates.append((name, module_type, "MLP"))
51
+
52
+ # ๊ฒฐ๊ณผ ์ถœ๋ ฅ
53
+ if target_candidates:
54
+ print(" โœ… ๋ฐœ๊ฒฌ๋œ target modules:")
55
+ for name, module_type, category in target_candidates:
56
+ print(f" - {name} ({module_type}) - {category}")
57
+
58
+ # ์‹ค์ œ ์‚ฌ์šฉํ•  target modules ์ถ”์ถœ
59
+ print("\n๐Ÿ“‹ ์‹ค์ œ ์‚ฌ์šฉํ•  target modules:")
60
+ target_modules = []
61
+ for name, _, _ in target_candidates:
62
+ target_modules.append(name)
63
+ print(f" '{name}',")
64
+
65
+ print(f"\n๐Ÿ”ข ์ด {len(target_modules)}๊ฐœ์˜ target modules ๋ฐœ๊ฒฌ")
66
+
67
+ else:
68
+ print(" โŒ language_model์—์„œ target modules๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Œ")
69
+
70
+ # ๋ชจ๋ธ ํ•ด์ œ
71
+ del model
72
+ import gc
73
+ gc.collect()
74
+
75
+ print("\nโœ… target modules ๊ฒ€์ƒ‰ ์™„๋ฃŒ!")
76
+
77
+ except Exception as e:
78
+ print(f"โŒ target modules ๊ฒ€์ƒ‰ ์‹คํŒจ: {e}")
79
+ import traceback
80
+ traceback.print_exc()
81
+
82
+ if __name__ == "__main__":
83
+ find_exact_target_modules()
inspect_kanana_model.py ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Kanana ๋ชจ๋ธ ๊ตฌ์กฐ ํ™•์ธ ์Šคํฌ๋ฆฝํŠธ
4
+ """
5
+ import sys
6
+ import os
7
+ from pathlib import Path
8
+
9
+ # ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ ๊ฒฝ๋กœ ์ถ”๊ฐ€
10
+ project_root = Path(__file__).parent
11
+ sys.path.insert(0, str(project_root))
12
+
13
+ def inspect_kanana_model():
14
+ """Kanana ๋ชจ๋ธ์˜ ๊ตฌ์กฐ๋ฅผ ํ™•์ธํ•˜์—ฌ target modules ์ฐพ๊ธฐ"""
15
+ print("๐Ÿ” Kanana ๋ชจ๋ธ ๊ตฌ์กฐ ํ™•์ธ ์‹œ์ž‘...")
16
+
17
+ try:
18
+ import torch
19
+ from transformers import AutoModelForVision2Seq, AutoProcessor
20
+
21
+ model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
22
+
23
+ print(f"๐Ÿ“ฅ ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘: {model_path}")
24
+
25
+ # ํ”„๋กœ์„ธ์„œ ๋กœ๋“œ
26
+ processor = AutoProcessor.from_pretrained(
27
+ model_path,
28
+ trust_remote_code=True,
29
+ local_files_only=True
30
+ )
31
+
32
+ # ๋ชจ๋ธ ๋กœ๋“œ
33
+ model = AutoModelForVision2Seq.from_pretrained(
34
+ model_path,
35
+ trust_remote_code=True,
36
+ local_files_only=True,
37
+ torch_dtype=torch.bfloat16
38
+ )
39
+
40
+ print(f"โœ… ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต!")
41
+ print(f"๐Ÿ” ๋ชจ๋ธ ํƒ€์ž…: {type(model).__name__}")
42
+ print(f"๐Ÿ” ๋ชจ๋ธ ๊ตฌ์กฐ:")
43
+
44
+ # ๋ชจ๋ธ์˜ ๋ชจ๋“  named_modules ํ™•์ธ
45
+ print("\n๐Ÿ“‹ ๋ชจ๋“  named_modules:")
46
+ for name, module in model.named_modules():
47
+ if hasattr(module, 'weight') and module.weight is not None:
48
+ print(f" - {name}: {type(module).__name__}")
49
+
50
+ # ์ผ๋ฐ˜์ ์ธ LoRA target modules ํŒจํ„ด ์ฐพ๊ธฐ
51
+ print("\n๐ŸŽฏ LoRA target modules ํ›„๋ณด:")
52
+ target_candidates = []
53
+
54
+ for name, module in model.named_modules():
55
+ module_type = type(module).__name__
56
+
57
+ # ์ผ๋ฐ˜์ ์ธ LoRA target modules ํŒจํ„ด
58
+ if any(pattern in name.lower() for pattern in ['q_proj', 'k_proj', 'v_proj', 'o_proj']):
59
+ target_candidates.append((name, module_type, "Attention"))
60
+ elif any(pattern in name.lower() for pattern in ['gate_proj', 'up_proj', 'down_proj']):
61
+ target_candidates.append((name, module_type, "MLP"))
62
+ elif any(pattern in name.lower() for pattern in ['query_key_value', 'dense_h_to_4h', 'dense_4h_to_h']):
63
+ target_candidates.append((name, module_type, "GPTNeoX"))
64
+ elif any(pattern in name.lower() for pattern in ['fc1', 'fc2', 'proj']):
65
+ target_candidates.append((name, module_type, "Linear"))
66
+
67
+ # ๊ฒฐ๊ณผ ์ถœ๋ ฅ
68
+ if target_candidates:
69
+ print(" โœ… ๋ฐœ๊ฒฌ๋œ target modules:")
70
+ for name, module_type, category in target_candidates:
71
+ print(f" - {name} ({module_type}) - {category}")
72
+ else:
73
+ print(" โŒ ์ผ๋ฐ˜์ ์ธ ํŒจํ„ด์„ ์ฐพ์„ ์ˆ˜ ์—†์Œ")
74
+
75
+ # ๋ชจ๋ธ์˜ ์ฒซ ๋ฒˆ์งธ ๋ ˆ์ด์–ด ๊ตฌ์กฐ ์ž์„ธํžˆ ๋ณด๊ธฐ
76
+ print("\n๐Ÿ” ์ฒซ ๋ฒˆ์งธ ๋ ˆ์ด์–ด ๊ตฌ์กฐ:")
77
+ for name, module in list(model.named_modules())[:20]:
78
+ if hasattr(module, 'weight') and module.weight is not None:
79
+ print(f" - {name}: {type(module).__name__} (shape: {module.weight.shape})")
80
+
81
+ # ๋ชจ๋ธ ํ•ด์ œ
82
+ del model
83
+ del processor
84
+ import gc
85
+ gc.collect()
86
+
87
+ print("\nโœ… ๋ชจ๋ธ ๊ตฌ์กฐ ํ™•์ธ ์™„๋ฃŒ!")
88
+
89
+ except Exception as e:
90
+ print(f"โŒ ๋ชจ๋ธ ๊ตฌ์กฐ ํ™•์ธ ์‹คํŒจ: {e}")
91
+ import traceback
92
+ traceback.print_exc()
93
+
94
+ if __name__ == "__main__":
95
+ inspect_kanana_model()
lily_llm_api/app_v2.py CHANGED
@@ -54,6 +54,11 @@ from lily_llm_core.hybrid_rag_processor import hybrid_rag_processor
54
  # ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ๋ฐ LoRA ๊ด€๋ฆฌ์ž ์ถ”๊ฐ€
55
  from lily_llm_core.context_manager import get_context_manager, context_manager
56
 
 
 
 
 
 
57
  # LoRA ๊ด€๋ฆฌ์ž import (์„ ํƒ์ )
58
  try:
59
  from lily_llm_core.lora_manager import get_lora_manager, lora_manager
@@ -65,6 +70,124 @@ except ImportError as e:
65
  lora_manager = None
66
  get_lora_manager = None
67
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  # ===== lifespan ์ปจํ…์ŠคํŠธ ๋งค๋‹ˆ์ € (์„œ๋ฒ„ ์‹œ์ž‘/์ข…๋ฃŒ ์ด๋ฒคํŠธ) =====
69
  from contextlib import asynccontextmanager
70
 
@@ -81,9 +204,9 @@ async def lifespan(app: FastAPI):
81
  except Exception as e:
82
  logger.error(f"โŒ CPU ์Šค๋ ˆ๋“œ ์„ค์ • ์‹คํŒจ: {e}")
83
 
84
- # ๊ธฐ๋ณธ ๋ชจ๋ธ ์ž๋™ ๋กœ๋“œ (polyglot-ko-1.3b-chat)
85
- selected_model_id = "polyglot-ko-1.3b-chat"
86
- logger.info(f"๐Ÿš€ ์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ๊ธฐ๋ณธ ๋ชจ๋ธ ์ž๋™ ๋กœ๋“œ: {selected_model_id}")
87
 
88
  try:
89
  await load_model_async(selected_model_id)
@@ -110,47 +233,8 @@ async def lifespan(app: FastAPI):
110
  except Exception as e:
111
  logger.warning(f"โš ๏ธ ๊ณ ๊ธ‰ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ์„ค์ • ์‹คํŒจ: {e}")
112
 
113
- # LoRA ์ž๋™ ์„ค์ • (๋ชจ๋ธ ๋กœ๋“œ ์™„๋ฃŒ ํ›„)
114
- if LORA_AVAILABLE and lora_manager:
115
- try:
116
- logger.info("๐Ÿ”ง ์„œ๋ฒ„ ์‹œ์ž‘ ํ›„ LoRA ์ž๋™ ์„ค์ • ์‹œ์ž‘...")
117
-
118
- # ๋ชจ๋ธ ๊ฒฝ๋กœ ์„ค์ •
119
- current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
120
- logger.info(f"๐Ÿ” LoRA ๋ชจ๋ธ ๊ฒฝ๋กœ: {current_model_path}")
121
-
122
- # LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ
123
- logger.info("๐Ÿ”ง LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ ์‹œ์ž‘...")
124
- success = lora_manager.load_base_model(current_model_path, "causal_lm")
125
- if success:
126
- logger.info("โœ… LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต")
127
-
128
- # LoRA ์„ค์ • ์ƒ์„ฑ
129
- logger.info("๐Ÿ”ง LoRA ์„ค์ • ์ƒ์„ฑ ์‹œ์ž‘...")
130
- lora_config = lora_manager.create_lora_config(
131
- r=16,
132
- lora_alpha=32,
133
- lora_dropout=0.1,
134
- bias="none",
135
- task_type="CAUSAL_LM",
136
- target_modules=["query_key_value", "mlp.dense_h_to_4h", "mlp.dense_4h_to_h"]
137
- )
138
- logger.info("โœ… LoRA ์„ค์ • ์ƒ์„ฑ ์™„๋ฃŒ")
139
-
140
- # LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ
141
- logger.info("๐Ÿ”ง LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์‹œ์ž‘...")
142
- adapter_success = lora_manager.apply_lora_to_model("auto_adapter")
143
- if adapter_success:
144
- logger.info("โœ… LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์™„๋ฃŒ: auto_adapter")
145
- logger.info("๐ŸŽ‰ ์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ LoRA ์ž๋™ ์„ค์ • ์™„๋ฃŒ!")
146
- else:
147
- logger.error("โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์‹คํŒจ")
148
- else:
149
- logger.error("โŒ LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ")
150
- except Exception as e:
151
- logger.error(f"โŒ LoRA ์ž๋™ ์„ค์ • ์ค‘ ์˜ค๋ฅ˜: {e}")
152
- else:
153
- logger.warning("โš ๏ธ LoRA๊ฐ€ ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•˜์—ฌ ์ž๋™ ์„ค์ • ๊ฑด๋„ˆ๋œ€")
154
 
155
  except Exception as e:
156
  logger.error(f"โŒ ๋ชจ๋ธ ๋กœ๋“œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค: {e}", exc_info=True)
@@ -325,14 +409,14 @@ def select_model_interactive():
325
  print(f"{i:2d}. {model_info['name']} ({model_info['model_id']})")
326
  while True:
327
  try:
328
- # choice = input(f"\n๐Ÿ“ ์‚ฌ์šฉํ•  ๋ชจ๋ธ ๋ฒˆํ˜ธ๋ฅผ ์„ ํƒํ•˜์„ธ์š” (1-{len(available_models)}): ")
329
- # selected_model = available_models[int(choice) - 1]
330
- selected_model = available_models[1]
331
  print(f"\nโœ… '{selected_model['name']}' ๋ชจ๋ธ์„ ์„ ํƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
332
  return selected_model['model_id']
333
  except (ValueError, IndexError):
334
  print(f"โŒ 1์—์„œ {len(available_models)} ์‚ฌ์ด์˜ ์ˆซ์ž๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.")
335
- except KeyboardInterrupt: sys.exit("\n\n๐Ÿ‘‹ ํ”„๋กœ๊ทธ๋žจ์„ ์ข…๋ฃŒํ•ฉ๋‹ˆ๋‹ค.")
 
336
 
337
  # @app.on_event("startup") - FastAPI ์ตœ์‹  ๋ฒ„์ „์—์„œ ์ž‘๋™ํ•˜์ง€ ์•Š์Œ
338
  # startup_event ํ•จ์ˆ˜๋Š” lifespan์œผ๋กœ ์ด๋™๋จ
@@ -358,7 +442,7 @@ async def load_model_endpoint(model_id: str):
358
 
359
  def load_model_sync(model_id: str):
360
  """๋ชจ๋ธ ๋ฐ ๊ด€๋ จ ํ”„๋กœ์„ธ์„œ๋ฅผ ๋™๊ธฐ์ ์œผ๋กœ ๋กœ๋”ฉ (์ตœ์ข… ์ˆ˜์ •๋ณธ)"""
361
- global model, tokenizer, processor, current_profile
362
 
363
  try:
364
  if model is not None:
@@ -377,6 +461,9 @@ def load_model_sync(model_id: str):
377
  # ์ด์ œ load_model์€ (model, processor)๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
378
  model, processor = current_profile.load_model()
379
 
 
 
 
380
  # processor์—์„œ tokenizer๋ฅผ ๊บผ๋‚ด ์ „์—ญ ๋ณ€์ˆ˜์— ํ• ๋‹นํ•ฉ๋‹ˆ๋‹ค.
381
  if hasattr(processor, 'tokenizer'):
382
  tokenizer = processor.tokenizer
@@ -386,64 +473,8 @@ def load_model_sync(model_id: str):
386
 
387
  logger.info(f"โœ… '{current_profile.display_name}' ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ!")
388
 
389
- # LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ์ž๋™ ๋กœ๋“œ
390
- try:
391
- if LORA_AVAILABLE and lora_manager:
392
- # ํ˜„์žฌ ๋กœ๋“œ๋œ ๋ชจ๋ธ ๊ฒฝ๋กœ ํ™•์ธ
393
- current_model_path = None
394
- if hasattr(current_profile, 'model_path') and current_profile.model_path:
395
- current_model_path = current_profile.model_path
396
- logger.info(f"๐Ÿ” ๋ชจ๋ธ ๊ฒฝ๋กœ ์ง์ ‘ ์‚ฌ์šฉ: {current_model_path}")
397
- elif hasattr(current_profile, 'model_id') and current_profile.model_id:
398
- # ๋ชจ๋ธ ID๋ฅผ ๊ฒฝ๋กœ๋กœ ๋ณ€ํ™˜
399
- model_id = current_profile.model_id
400
- logger.info(f"๐Ÿ” ๋ชจ๋ธ ID ๊ฐ์ง€: {model_id}")
401
-
402
- if model_id == "polyglot-ko-1.3b-chat":
403
- current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
404
- elif model_id == "kanana-1.5-v-3b-instruct":
405
- current_model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
406
- elif model_id == "polyglot-ko-5.8b-chat":
407
- current_model_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
408
-
409
- logger.info(f"๐Ÿ” ๋ณ€ํ™˜๋œ ๋ชจ๋ธ ๊ฒฝ๋กœ: {current_model_path}")
410
-
411
- if current_model_path:
412
- logger.info(f"๐Ÿ”ง LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ์ž๋™ ๋กœ๋“œ ์‹œ์ž‘: {current_model_path}")
413
- success = lora_manager.load_base_model(current_model_path, "causal_lm")
414
- if success:
415
- logger.info(f"โœ… LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ์ž๋™ ๋กœ๋“œ ์„ฑ๊ณต: {current_model_path}")
416
-
417
- # LoRA ์„ค์ • ์ž๋™ ์ƒ์„ฑ
418
- try:
419
- logger.info("๐Ÿ”ง LoRA ์„ค์ • ์ž๋™ ์ƒ์„ฑ ์‹œ์ž‘...")
420
- lora_config = lora_manager.create_lora_config(
421
- r=16,
422
- lora_alpha=32,
423
- lora_dropout=0.1,
424
- bias="none",
425
- task_type="CAUSAL_LM",
426
- target_modules=["query_key_value", "mlp.dense_h_to_4h", "mlp.dense_4h_to_h"]
427
- )
428
- logger.info("โœ… LoRA ์„ค์ • ์ž๋™ ์ƒ์„ฑ ์™„๋ฃŒ")
429
-
430
- # LoRA ์–ด๋Œ‘ํ„ฐ ์ž๋™ ์ ์šฉ
431
- logger.info("๐Ÿ”ง LoRA ์–ด๋Œ‘ํ„ฐ ์ž๋™ ์ ์šฉ ์‹œ์ž‘...")
432
- adapter_success = lora_manager.apply_lora_to_model("auto_adapter")
433
- if adapter_success:
434
- logger.info("โœ… LoRA ์–ด๋Œ‘ํ„ฐ ์ž๋™ ์ ์šฉ ์™„๋ฃŒ: auto_adapter")
435
- else:
436
- logger.error("โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ž๋™ ์ ์šฉ ์‹คํŒจ")
437
- except Exception as e:
438
- logger.error(f"โŒ LoRA ๏ฟฝ๏ฟฝ๏ฟฝ์ •/์–ด๋Œ‘ํ„ฐ ์ž๋™ ์ƒ์„ฑ ์‹คํŒจ: {e}")
439
- else:
440
- logger.error(f"โŒ LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ์ž๋™ ๋กœ๋“œ ์‹คํŒจ: {current_model_path}")
441
- else:
442
- logger.warning("โš ๏ธ ํ˜„์žฌ ๋ชจ๋ธ์˜ ๊ฒฝ๋กœ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์–ด LoRA ์ž๋™ ๋กœ๋“œ ๊ฑด๋„ˆ๋œ€")
443
- else:
444
- logger.info("โš ๏ธ LoRA๊ฐ€ ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•˜์—ฌ ์ž๋™ ๋กœ๋“œ ๊ฑด๋„ˆ๋œ€")
445
- except Exception as e:
446
- logger.error(f"โŒ LoRA ์ž๋™ ๋กœ๋“œ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")
447
 
448
  except Exception as e:
449
  logger.error(f"โŒ load_model_sync ์‹คํŒจ: {e}")
 
54
  # ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ๋ฐ LoRA ๊ด€๋ฆฌ์ž ์ถ”๊ฐ€
55
  from lily_llm_core.context_manager import get_context_manager, context_manager
56
 
57
+ # ์ „์—ญ ๋ณ€์ˆ˜๋“ค
58
+ current_model = None # ๐Ÿ”„ ํ˜„์žฌ ๋กœ๋“œ๋œ ๋ชจ๋ธ ์ธ์Šคํ„ด์Šค
59
+ current_profile = None # ๐Ÿ”„ ํ˜„์žฌ ์„ ํƒ๋œ ๋ชจ๋ธ ํ”„๋กœํ•„
60
+ model_loaded = False # ๐Ÿ”„ ๋ชจ๋ธ ๋กœ๋“œ ์ƒํƒœ
61
+
62
  # LoRA ๊ด€๋ฆฌ์ž import (์„ ํƒ์ )
63
  try:
64
  from lily_llm_core.lora_manager import get_lora_manager, lora_manager
 
70
  lora_manager = None
71
  get_lora_manager = None
72
 
73
+ # ===== ๊ณตํ†ต LoRA ์„ค์ • ํ•จ์ˆ˜ =====
74
+ def setup_lora_for_model(profile, lora_manager):
75
+ """๋ชจ๋ธ ํ”„๋กœํ•„์— ๋”ฐ๋ฅธ LoRA ์„ค์ • (๊ณตํ†ต ํ•จ์ˆ˜)"""
76
+ if not LORA_AVAILABLE or not lora_manager:
77
+ logger.warning("โš ๏ธ LoRA๊ฐ€ ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•˜์—ฌ ์ž๋™ ์„ค์ • ๊ฑด๋„ˆ๋œ€")
78
+ return False
79
+
80
+ try:
81
+ logger.info("๐Ÿ”ง LoRA ์ž๋™ ์„ค์ • ์‹œ์ž‘...")
82
+
83
+ # ๐Ÿ”„ ๋ชจ๋ธ ํ”„๋กœํ•„์—์„œ ๊ฒฝ๋กœ ๋ฐ ํƒ€์ž… ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ
84
+ current_model_path = None
85
+ model_type = "causal_lm" # ๊ธฐ๋ณธ๊ฐ’
86
+
87
+ # ๐Ÿ”„ ๋ชจ๋ธ ํ”„๋กœํ•„์—์„œ ๊ฒฝ๋กœ ๋ฐ ํƒ€์ž… ์ •๋ณด ๊ฐ€์ ธ์˜ค๊ธฐ
88
+ if hasattr(profile, 'local_path') and profile.local_path:
89
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ: ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ
90
+ current_model_path = profile.local_path
91
+ # ๐Ÿ”„ local_path ์‚ฌ์šฉ ์‹œ์—๋„ model_type ์„ค์ • ํ•„์š”
92
+ if hasattr(profile, 'model_id') and profile.model_id:
93
+ model_id = profile.model_id
94
+ if model_id == "kanana-1.5-v-3b-instruct":
95
+ model_type = "vision2seq" # ๐Ÿ”„ kanana๋Š” vision2seq ํƒ€์ž…
96
+ else:
97
+ model_type = "causal_lm" # ๊ธฐ๋ณธ๊ฐ’
98
+ logger.info(f"๐Ÿ” ๋ชจ๋ธ ํ”„๋กœํ•„์—์„œ ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ: {current_model_path}")
99
+ logger.info(f"๐Ÿ” ๊ฒฐ์ •๋œ ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
100
+ elif hasattr(profile, 'model_id') and profile.model_id:
101
+ # ๋ชจ๋ธ ID๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ๊ฒฝ๋กœ ๊ฒฐ์ •
102
+ model_id = profile.model_id
103
+ logger.info(f"๐Ÿ” ๋ชจ๋ธ ID ๊ธฐ๋ฐ˜ ๊ฒฝ๋กœ ๊ฒฐ์ •: {model_id}")
104
+
105
+ # ๐Ÿ”„ ํ™˜๊ฒฝ์— ๋”ฐ๋ฅธ ๊ฒฝ๋กœ ๊ฒฐ์ •
106
+ if hasattr(profile, 'is_local') and profile.is_local:
107
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ: ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ
108
+ if model_id == "polyglot-ko-1.3b-chat":
109
+ current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
110
+ model_type = "causal_lm"
111
+ elif model_id == "kanana-1.5-v-3b-instruct":
112
+ current_model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
113
+ model_type = "vision2seq" # ๐Ÿ”„ kanana๋Š” vision2seq ํƒ€์ž…
114
+ elif model_id == "polyglot-ko-5.8b-chat":
115
+ current_model_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
116
+ model_type = "causal_lm"
117
+ else:
118
+ # ๋ฐฐํฌ ํ™˜๊ฒฝ: HF ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ (๋กœ์ปฌ ๊ฒฝ๋กœ ์—†์Œ)
119
+ current_model_path = None
120
+ logger.info(f"๐Ÿ” ๋ฐฐํฌ ํ™˜๊ฒฝ: LoRA ์„ค์ • ๊ฑด๋„ˆ๋œ€ (HF ๋ชจ๋ธ)")
121
+ return False
122
+
123
+ logger.info(f"๐Ÿ” ๊ฒฐ์ •๋œ ๋ชจ๋ธ ๊ฒฝ๋กœ: {current_model_path}")
124
+ logger.info(f"๐Ÿ” ๊ฒฐ์ •๋œ ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
125
+
126
+ if not current_model_path:
127
+ logger.warning("โš ๏ธ ํ˜„์žฌ ๋ชจ๋ธ์˜ ๊ฒฝ๋กœ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์–ด LoRA ์ž๋™ ๋กœ๋“œ ๊ฑด๋„ˆ๋œ€")
128
+ return False
129
+
130
+ logger.info(f"๐Ÿ” LoRA ๋ชจ๋ธ ๊ฒฝ๋กœ: {current_model_path}")
131
+ logger.info(f"๐Ÿ” LoRA ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
132
+
133
+ # ๐Ÿ”„ ์ด๋ฏธ ๋กœ๋“œ๋œ ๋ฉ”์ธ ๋ชจ๋ธ์„ LoRA์— ์ง์ ‘ ์ ์šฉ (์ค‘๋ณต ๋กœ๋“œ ๋ฐฉ์ง€)
134
+ logger.info("๐Ÿ”ง ๊ธฐ์กด ๋ฉ”์ธ ๋ชจ๋ธ์— LoRA ์ง์ ‘ ์ ์šฉ ์‹œ์ž‘...")
135
+
136
+ # ๐Ÿ”„ lora_manager์— ๊ธฐ์กด ๋ฉ”์ธ ๋ชจ๋ธ ์„ค์ •
137
+ if hasattr(lora_manager, 'base_model') and lora_manager.base_model is None:
138
+ # ์ „์—ญ ๋ณ€์ˆ˜์—์„œ ๋ฉ”์ธ ๋ชจ๋ธ ๊ฐ€์ ธ์˜ค๊ธฐ
139
+ from lily_llm_api.app_v2 import current_model
140
+ if current_model is not None:
141
+ lora_manager.base_model = current_model
142
+ logger.info("โœ… ๊ธฐ์กด ๋ฉ”์ธ ๋ชจ๋ธ์„ LoRA ๊ด€๋ฆฌ์ž์— ์„ค์ • ์™„๋ฃŒ")
143
+ else:
144
+ logger.warning("โš ๏ธ ๋ฉ”์ธ ๋ชจ๋ธ์„ ์ฐพ์„ ์ˆ˜ ์—†์–ด LoRA ์„ค์ • ๊ฑด๋„ˆ๋œ€")
145
+ return False
146
+
147
+ # LoRA ์„ค์ • ์ƒ์„ฑ
148
+ logger.info("๐Ÿ”ง LoRA ์„ค์ • ์ƒ์„ฑ ์‹œ์ž‘...")
149
+
150
+ # ๐Ÿ”„ ๋ชจ๋ธ๋ณ„ target modules ์„ค์ •
151
+ if model_type == "vision2seq" and "kanana" in profile.model_id:
152
+ # Kanana ๋ชจ๋ธ: Llama ๊ธฐ๋ฐ˜ language model ์‚ฌ์šฉ (์ฒซ ๋ฒˆ์งธ ๋ ˆ์ด์–ด๋งŒ ์‚ฌ์šฉ)
153
+ target_modules = [
154
+ "language_model.model.layers.0.self_attn.q_proj",
155
+ "language_model.model.layers.0.self_attn.k_proj",
156
+ "language_model.model.layers.0.self_attn.v_proj",
157
+ "language_model.model.layers.0.self_attn.o_proj",
158
+ "language_model.model.layers.0.mlp.gate_proj",
159
+ "language_model.model.layers.0.mlp.up_proj",
160
+ "language_model.model.layers.0.mlp.down_proj"
161
+ ]
162
+ else:
163
+ # ๊ธฐ์กด ๋ชจ๋ธ๋“ค: GPTNeoX ๊ธฐ๋ฐ˜
164
+ target_modules = ["query_key_value", "mlp.dense_h_to_4h", "mlp.dense_4h_to_h"]
165
+
166
+ lora_config = lora_manager.create_lora_config(
167
+ r=16,
168
+ lora_alpha=32,
169
+ lora_dropout=0.1,
170
+ bias="none",
171
+ task_type="CAUSAL_LM" if model_type == "causal_lm" else "VISION_2_SEQ",
172
+ target_modules=target_modules
173
+ )
174
+ logger.info("โœ… LoRA ์„ค์ • ์ƒ์„ฑ ์™„๋ฃŒ")
175
+
176
+ # LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ (๊ธฐ์กด ๋ฉ”์ธ ๋ชจ๋ธ์— ์ง์ ‘)
177
+ logger.info("๐Ÿ”ง LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์‹œ์ž‘...")
178
+ adapter_success = lora_manager.apply_lora_to_model("auto_adapter")
179
+ if adapter_success:
180
+ logger.info("โœ… LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์™„๋ฃŒ: auto_adapter")
181
+ logger.info("๐ŸŽ‰ LoRA ์ž๋™ ์„ค์ • ์™„๋ฃŒ!")
182
+ return True
183
+ else:
184
+ logger.error("โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์‹คํŒจ")
185
+ return False
186
+
187
+ except Exception as e:
188
+ logger.error(f"โŒ LoRA ์ž๋™ ์„ค์ • ์ค‘ ์˜ค๋ฅ˜: {e}")
189
+ return False
190
+
191
  # ===== lifespan ์ปจํ…์ŠคํŠธ ๋งค๋‹ˆ์ € (์„œ๋ฒ„ ์‹œ์ž‘/์ข…๋ฃŒ ์ด๋ฒคํŠธ) =====
192
  from contextlib import asynccontextmanager
193
 
 
204
  except Exception as e:
205
  logger.error(f"โŒ CPU ์Šค๋ ˆ๋“œ ์„ค์ • ์‹คํŒจ: {e}")
206
 
207
+ # ๐Ÿ”„ ๋ชจ๋ธ ์„ ํƒ ๋ณต์›: ์‚ฌ์šฉ์ž๊ฐ€ ๋ชจ๋ธ์„ ์„ ํƒํ•  ์ˆ˜ ์žˆ๋„๋ก
208
+ selected_model_id = select_model_interactive()
209
+ logger.info(f"๐Ÿš€ ์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ์„ ํƒ๋œ ๋ชจ๋ธ: {selected_model_id}")
210
 
211
  try:
212
  await load_model_async(selected_model_id)
 
233
  except Exception as e:
234
  logger.warning(f"โš ๏ธ ๊ณ ๊ธ‰ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ์„ค์ • ์‹คํŒจ: {e}")
235
 
236
+ # ๐Ÿ”„ LoRA ์ž๋™ ์„ค์ •์€ load_model_async ๋‚ด๋ถ€์—์„œ ์ด๋ฏธ ์ฒ˜๋ฆฌ๋จ
237
+ # setup_lora_for_model(current_profile, lora_manager) # ์ค‘๋ณต ํ˜ธ์ถœ ์ œ๊ฑฐ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
  except Exception as e:
240
  logger.error(f"โŒ ๋ชจ๋ธ ๋กœ๋“œ์— ์‹คํŒจํ–ˆ์Šต๋‹ˆ๋‹ค: {e}", exc_info=True)
 
409
  print(f"{i:2d}. {model_info['name']} ({model_info['model_id']})")
410
  while True:
411
  try:
412
+ choice = input(f"\n๐Ÿ“ ์‚ฌ์šฉํ•  ๋ชจ๋ธ ๋ฒˆํ˜ธ๋ฅผ ์„ ํƒํ•˜์„ธ์š” (1-{len(available_models)}): ")
413
+ selected_model = available_models[int(choice) - 1]
 
414
  print(f"\nโœ… '{selected_model['name']}' ๋ชจ๋ธ์„ ์„ ํƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
415
  return selected_model['model_id']
416
  except (ValueError, IndexError):
417
  print(f"โŒ 1์—์„œ {len(available_models)} ์‚ฌ์ด์˜ ์ˆซ์ž๋ฅผ ์ž…๋ ฅํ•ด์ฃผ์„ธ์š”.")
418
+ except KeyboardInterrupt:
419
+ sys.exit("\n\n๐Ÿ‘‹ ํ”„๋กœ๊ทธ๋žจ์„ ์ข…๋ฃŒํ•ฉ๋‹ˆ๋‹ค.")
420
 
421
  # @app.on_event("startup") - FastAPI ์ตœ์‹  ๋ฒ„์ „์—์„œ ์ž‘๋™ํ•˜์ง€ ์•Š์Œ
422
  # startup_event ํ•จ์ˆ˜๋Š” lifespan์œผ๋กœ ์ด๋™๋จ
 
442
 
443
  def load_model_sync(model_id: str):
444
  """๋ชจ๋ธ ๋ฐ ๊ด€๋ จ ํ”„๋กœ์„ธ์„œ๋ฅผ ๋™๊ธฐ์ ์œผ๋กœ ๋กœ๋”ฉ (์ตœ์ข… ์ˆ˜์ •๋ณธ)"""
445
+ global model, tokenizer, processor, current_profile, current_model
446
 
447
  try:
448
  if model is not None:
 
461
  # ์ด์ œ load_model์€ (model, processor)๋ฅผ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.
462
  model, processor = current_profile.load_model()
463
 
464
+ # ๐Ÿ”„ ์ „์—ญ ๋ณ€์ˆ˜์— ๋ชจ๋ธ ์„ค์ • (LoRA์—์„œ ์‚ฌ์šฉ)
465
+ current_model = model
466
+
467
  # processor์—์„œ tokenizer๋ฅผ ๊บผ๋‚ด ์ „์—ญ ๋ณ€์ˆ˜์— ํ• ๋‹นํ•ฉ๋‹ˆ๋‹ค.
468
  if hasattr(processor, 'tokenizer'):
469
  tokenizer = processor.tokenizer
 
473
 
474
  logger.info(f"โœ… '{current_profile.display_name}' ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ!")
475
 
476
+ # ๐Ÿ”„ LoRA ๊ธฐ๋ณธ ๋ชจ๋ธ ์ž๋™ ๋กœ๋“œ (๊ณตํ†ต ํ•จ์ˆ˜ ์‚ฌ์šฉ)
477
+ setup_lora_for_model(current_profile, lora_manager)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
478
 
479
  except Exception as e:
480
  logger.error(f"โŒ load_model_sync ์‹คํŒจ: {e}")
lily_llm_api/models/kanana_1_5_v_3b_instruct.py CHANGED
@@ -25,16 +25,21 @@ class Kanana15V3bInstructProfile:
25
  # ํ™˜๊ฒฝ ๊ฐ์ง€
26
  self.is_local = self._detect_local_environment()
27
 
28
- # ๋ชจ๋ธ ๊ฒฝ๋กœ ์„ค์ •
 
 
29
  if self.is_local:
30
- self.model_name = "gbrabbit/lily-math-model" # ๋กœ์ปฌ์—์„œ๋„ HF ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ
31
  self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
32
  self.display_name = "kanana-1.5-v-3b-instruct"
33
  else:
34
- self.model_name = "gbrabbit/lily-math-model" # Hugging Face Hub ๋ชจ๋ธ ๊ฒฝ๋กœ
35
- self.local_path = None # ์„œ๋ฒ„์—์„œ๋Š” ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ ์•ˆํ•จ
36
  self.display_name = "kanana-1.5-v-3b-instruct"
37
 
 
 
 
38
  self.description = "์นด์นด์˜ค ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ (3.6B) - Math RAG ํŠนํ™”"
39
  self.language = "ko"
40
  self.model_size = "3.6B"
@@ -97,15 +102,19 @@ class Kanana15V3bInstructProfile:
97
  logger.error(f"โŒ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ ์‹คํŒจ: {e}")
98
 
99
  def load_model(self) -> Tuple[Any, Any]:
100
- """๋ชจ๋ธ ๋กœ๋“œ (๊ณต์‹์ ์ธ ๋ฐฉ๋ฒ• + ์ ˆ๋Œ€ ๊ฒฝ๋กœ sys.path ์ˆ˜์ • ์ตœ์ข…๋ณธ)"""
101
- logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘ (๊ณต์‹ ๋ฐฉ๋ฒ•)...")
102
 
103
- # self.local_path๋ฅผ ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋กœ ๋ณ€ํ™˜
104
- absolute_model_path = os.path.abspath(self.local_path)
105
-
106
- use_local = Path(absolute_model_path).exists() and any(Path(absolute_model_path).iterdir())
107
- # model_path ๋ณ€์ˆ˜์— ์ ˆ๋Œ€ ๊ฒฝ๋กœ๋ฅผ ์‚ฌ์šฉ
108
- model_path = absolute_model_path if use_local else self.model_name
 
 
 
 
109
 
110
  try:
111
  from transformers import AutoModelForVision2Seq, AutoProcessor
@@ -119,16 +128,17 @@ class Kanana15V3bInstructProfile:
119
  processor = AutoProcessor.from_pretrained(
120
  model_path,
121
  trust_remote_code=True,
122
- local_files_only=use_local
 
123
  )
124
 
125
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
126
 
127
  # dtype ์„ค์ • ์ตœ์ ํ™” - CPU์—์„œ๋Š” float32 ์‚ฌ์šฉ
128
  if device == 'cuda':
129
- selected_dtype = torch.float16 # GPU์—์„œ๋Š” float16์œผ๋กœ ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ
130
  else:
131
- selected_dtype = torch.float32 # CPU์—์„œ๋Š” float32๋กœ ์•ˆ์ •์„ฑ ํ™•๋ณด
132
 
133
  logger.info(f"๐Ÿ”ง ์„ ํƒ๋œ dtype: {selected_dtype} (device: {device})")
134
 
@@ -163,8 +173,16 @@ class Kanana15V3bInstructProfile:
163
  "top_p": 0.95,
164
  "repetition_penalty": 1.1,
165
  "no_repeat_ngram_size": 3,
166
- "pad_token_id": None,
167
- "eos_token_id": None,
 
 
 
 
 
 
 
 
168
  }
169
 
170
  def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
@@ -220,7 +238,9 @@ class Kanana15V3bInstructProfile:
220
  # ์ผ๋ฐ˜์ ์ธ ํ”„๋กฌํ”„ํŠธ ํŒจํ„ด ์ œ๊ฑฐ ์‹œ๋„
221
  patterns_to_remove = [
222
  "<|im_start|>user\n",
 
223
  "<|im_end|>",
 
224
  "<image>",
225
  "user\n",
226
  "assistant\n"
 
25
  # ํ™˜๊ฒฝ ๊ฐ์ง€
26
  self.is_local = self._detect_local_environment()
27
 
28
+ # ๐Ÿ”„ ๋ชจ๋ธ ๊ฒฝ๋กœ ์„ค์ • (๋กœ์ปฌ/๋ฐฐํฌ ํ™˜๊ฒฝ ๋ชจ๋‘ ์ง€์›)
29
+ self.model_name = "kakaocorp/kanana-1.5-v-3b-instruct"
30
+
31
  if self.is_local:
32
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ: ๋กœ์ปฌ ๊ฒฝ๋กœ ์šฐ์„ , ์—†์œผ๋ฉด HF์—์„œ ๋‹ค์šด๋กœ๋“œ
33
  self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
34
  self.display_name = "kanana-1.5-v-3b-instruct"
35
  else:
36
+ # ๋ฐฐํฌ ํ™˜๊ฒฝ: HF ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ, ๋กœ์ปฌ ๊ฒฝ๋กœ๋Š” None
37
+ self.local_path = None
38
  self.display_name = "kanana-1.5-v-3b-instruct"
39
 
40
+ # ๐Ÿ”„ ๋ชจ๋ธ ID ์ถ”๊ฐ€ (LoRA ๋ฐ ๊ธฐํƒ€ ์„ค์ •์—์„œ ์‚ฌ์šฉ)
41
+ self.model_id = "kanana-1.5-v-3b-instruct"
42
+
43
  self.description = "์นด์นด์˜ค ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๋ชจ๋ธ (3.6B) - Math RAG ํŠนํ™”"
44
  self.language = "ko"
45
  self.model_size = "3.6B"
 
102
  logger.error(f"โŒ ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋กœ๋“œ ์‹คํŒจ: {e}")
103
 
104
  def load_model(self) -> Tuple[Any, Any]:
105
+ """๋ชจ๋ธ ๋กœ๋“œ (๋กœ์ปฌ/๋ฐฐํฌ ํ™˜๊ฒฝ ๋ชจ๋‘ ์ง€์›)"""
106
+ logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
107
 
108
+ # ๐Ÿ”„ ํ™˜๊ฒฝ์— ๋”ฐ๋ฅธ ๋ชจ๋ธ ๊ฒฝ๋กœ ๊ฒฐ์ •
109
+ if self.is_local and self.local_path:
110
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ: ๋กœ์ปฌ ๊ฒฝ๋กœ ํ™•์ธ
111
+ absolute_model_path = os.path.abspath(self.local_path)
112
+ use_local = Path(absolute_model_path).exists() and any(Path(absolute_model_path).iterdir())
113
+ model_path = absolute_model_path if use_local else self.model_name
114
+ else:
115
+ # ๋ฐฐํฌ ํ™˜๊ฒฝ: HF ๋ชจ๋ธ๋ช… ์‚ฌ์šฉ
116
+ use_local = False
117
+ model_path = self.model_name
118
 
119
  try:
120
  from transformers import AutoModelForVision2Seq, AutoProcessor
 
128
  processor = AutoProcessor.from_pretrained(
129
  model_path,
130
  trust_remote_code=True,
131
+ local_files_only=use_local,
132
+ use_fast=True # ๐Ÿ”„ ๋น ๋ฅธ ์ด๋ฏธ์ง€ ํ”„๋กœ์„ธ์„œ ์‚ฌ์šฉ (๊ฒฝ๊ณ  ์ œ๊ฑฐ)
133
  )
134
 
135
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
136
 
137
  # dtype ์„ค์ • ์ตœ์ ํ™” - CPU์—์„œ๋Š” float32 ์‚ฌ์šฉ
138
  if device == 'cuda':
139
+ selected_dtype = torch.bfloat16 # GPU์—์„œ๋Š” float16์œผ๋กœ ๋ฉ”๋ชจ๋ฆฌ ์ ˆ์•ฝ
140
  else:
141
+ selected_dtype = torch.bfloat16 # CPU์—์„œ๋Š” float32๋กœ ์•ˆ์ •์„ฑ ํ™•๋ณด
142
 
143
  logger.info(f"๐Ÿ”ง ์„ ํƒ๋œ dtype: {selected_dtype} (device: {device})")
144
 
 
173
  "top_p": 0.95,
174
  "repetition_penalty": 1.1,
175
  "no_repeat_ngram_size": 3,
176
+ "pad_token_id": 128001,
177
+ "eos_token_id": 128009,
178
+ "bos_token_id": 128000,
179
+ "use_cache": True,
180
+ # "early_stopping": False,
181
+ # "num_beams": 1,
182
+ # "num_return_sequences": 1,
183
+ # "return_full_text": False,
184
+ # "return_dict": False,
185
+ # "return_dict_in_generate": False,
186
  }
187
 
188
  def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
 
238
  # ์ผ๋ฐ˜์ ์ธ ํ”„๋กฌํ”„ํŠธ ํŒจํ„ด ์ œ๊ฑฐ ์‹œ๋„
239
  patterns_to_remove = [
240
  "<|im_start|>user\n",
241
+ "<|im_start|>assistant\n",
242
  "<|im_end|>",
243
+ "<|im_in_end|>",
244
  "<image>",
245
  "user\n",
246
  "assistant\n"
lily_llm_api/models/polyglot_ko_1_3b_chat.py CHANGED
@@ -24,6 +24,10 @@ class PolyglotKo13bChatProfile:
24
  self.model_name = "heegyu/polyglot-ko-1.3b-chat"
25
  self.local_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
26
  self.display_name = "Polyglot-ko-1.3b-chat"
 
 
 
 
27
  self.description = "ํ•œ๊ตญ์–ด ์ฑ„ํŒ… ์ „์šฉ ๊ฒฝ๋Ÿ‰ ๋ชจ๋ธ (1.3B)"
28
  self.language = "ko"
29
  self.model_size = "1.3B"
 
24
  self.model_name = "heegyu/polyglot-ko-1.3b-chat"
25
  self.local_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
26
  self.display_name = "Polyglot-ko-1.3b-chat"
27
+
28
+ # ๐Ÿ”„ ๋ชจ๋ธ ID ์ถ”๊ฐ€ (LoRA ๋ฐ ๊ธฐํƒ€ ์„ค์ •์—์„œ ์‚ฌ์šฉ)
29
+ self.model_id = "polyglot-ko-1.3b-chat"
30
+
31
  self.description = "ํ•œ๊ตญ์–ด ์ฑ„ํŒ… ์ „์šฉ ๊ฒฝ๋Ÿ‰ ๋ชจ๋ธ (1.3B)"
32
  self.language = "ko"
33
  self.model_size = "1.3B"
lily_llm_api/models/polyglot_ko_5_8b_chat.py CHANGED
@@ -21,6 +21,10 @@ class PolyglotKo58bChatProfile:
21
  self.model_name = "heegyu/polyglot-ko-5.8b-chat"
22
  self.local_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
23
  self.display_name = "Polyglot-ko-5.8b-chat"
 
 
 
 
24
  self.description = "ํ•œ๊ตญ์–ด ์ฑ„ํŒ… ์ „์šฉ ๊ณ ์„ฑ๋Šฅ ๋ชจ๋ธ (5.8B)"
25
  self.language = "ko"
26
  self.model_size = "5.8B"
@@ -85,7 +89,7 @@ class PolyglotKo58bChatProfile:
85
 
86
  # CPU์—์„œ๋Š” float32๊ฐ€ ๋” ์•ˆ์ •์ , CUDA์—์„œ๋Š” float16 ์‚ฌ์šฉ
87
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
88
- selected_dtype = torch.float16 if device == 'cuda' else torch.bfloat16
89
 
90
  model = AutoModelForCausalLM.from_pretrained(
91
  model_path,
 
21
  self.model_name = "heegyu/polyglot-ko-5.8b-chat"
22
  self.local_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
23
  self.display_name = "Polyglot-ko-5.8b-chat"
24
+
25
+ # ๐Ÿ”„ ๋ชจ๋ธ ID ์ถ”๊ฐ€ (LoRA ๋ฐ ๊ธฐํƒ€ ์„ค์ •์—์„œ ์‚ฌ์šฉ)
26
+ self.model_id = "polyglot-ko-5.8b-chat"
27
+
28
  self.description = "ํ•œ๊ตญ์–ด ์ฑ„ํŒ… ์ „์šฉ ๊ณ ์„ฑ๋Šฅ ๋ชจ๋ธ (5.8B)"
29
  self.language = "ko"
30
  self.model_size = "5.8B"
 
89
 
90
  # CPU์—์„œ๋Š” float32๊ฐ€ ๋” ์•ˆ์ •์ , CUDA์—์„œ๋Š” float16 ์‚ฌ์šฉ
91
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
92
+ selected_dtype = torch.bfloat16 if device == 'cuda' else torch.bfloat16
93
 
94
  model = AutoModelForCausalLM.from_pretrained(
95
  model_path,
lily_llm_core/lora_manager.py CHANGED
@@ -156,6 +156,16 @@ class LoRAManager:
156
  torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
157
  device_map="auto" if self.device == "cuda" else None
158
  )
 
 
 
 
 
 
 
 
 
 
159
  else:
160
  raise ValueError(f"์ง€์›ํ•˜์ง€ ์•Š๋Š” ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
161
 
@@ -190,6 +200,9 @@ class LoRAManager:
190
  # ์ง์ ‘ TaskType ์‚ฌ์šฉ (๋ฌธ์ž์—ด ๋ณ€ํ™˜ ์ œ๊ฑฐ)
191
  if task_type == "CAUSAL_LM":
192
  task_type_enum = TaskType.CAUSAL_LM
 
 
 
193
  elif task_type == "SEQ_2_SEQ_LM":
194
  task_type_enum = TaskType.SEQ_2_SEQ_LM
195
  elif task_type == "SEQUENCE_CLASSIFICATION":
 
156
  torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
157
  device_map="auto" if self.device == "cuda" else None
158
  )
159
+ elif model_type == "vision2seq":
160
+ # ๐Ÿ”„ Vision2Seq ๋ชจ๋ธ ์ง€์› ์ถ”๊ฐ€ (kanana ๋“ฑ)
161
+ from transformers import AutoModelForVision2Seq
162
+ self.base_model = AutoModelForVision2Seq.from_pretrained(
163
+ str(model_path),
164
+ trust_remote_code=True,
165
+ local_files_only=True,
166
+ torch_dtype=torch.bfloat16 if self.device == "cuda" else torch.bfloat16,
167
+ device_map="auto" if self.device == "cuda" else None
168
+ )
169
  else:
170
  raise ValueError(f"์ง€์›ํ•˜์ง€ ์•Š๋Š” ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
171
 
 
200
  # ์ง์ ‘ TaskType ์‚ฌ์šฉ (๋ฌธ์ž์—ด ๋ณ€ํ™˜ ์ œ๊ฑฐ)
201
  if task_type == "CAUSAL_LM":
202
  task_type_enum = TaskType.CAUSAL_LM
203
+ elif task_type == "VISION_2_SEQ":
204
+ # ๐Ÿ”„ Vision2Seq ๋ชจ๋ธ ์ง€์› ์ถ”๊ฐ€
205
+ task_type_enum = TaskType.SEQ_2_SEQ_LM # Vision2Seq๋Š” SEQ_2_SEQ_LM๊ณผ ์œ ์‚ฌ
206
  elif task_type == "SEQ_2_SEQ_LM":
207
  task_type_enum = TaskType.SEQ_2_SEQ_LM
208
  elif task_type == "SEQUENCE_CLASSIFICATION":
lily_llm_core/rag_processor.py CHANGED
@@ -245,7 +245,7 @@ class RAGProcessor:
245
  "context": "",
246
  "sources": []
247
  }
248
-
249
  def _generate_text_response(self, query: str, text_docs: List[Document],
250
  llm_model, image_files: List[str]) -> Dict[str, Any]:
251
  """ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์‘๋‹ต ์ƒ์„ฑ"""
@@ -255,8 +255,8 @@ class RAGProcessor:
255
 
256
  # ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
257
  prompt = f"""
258
- ์งˆ๋ฌธ: {query}
259
-
260
  ์ฐธ๊ณ  ๋ฌธ์„œ:
261
  {text_context}
262
 
@@ -397,7 +397,7 @@ class RAGProcessor:
397
  "document_id": document_id,
398
  "error": str(e)
399
  }
400
-
401
  def get_performance_stats(self) -> Dict[str, Any]:
402
  """์„ฑ๋Šฅ ํ†ต๊ณ„ ๋ฐ˜ํ™˜"""
403
  try:
 
245
  "context": "",
246
  "sources": []
247
  }
248
+
249
  def _generate_text_response(self, query: str, text_docs: List[Document],
250
  llm_model, image_files: List[str]) -> Dict[str, Any]:
251
  """ํ…์ŠคํŠธ ๊ธฐ๋ฐ˜ ์‘๋‹ต ์ƒ์„ฑ"""
 
255
 
256
  # ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
257
  prompt = f"""
258
+ ์งˆ๋ฌธ: {query}
259
+
260
  ์ฐธ๊ณ  ๋ฌธ์„œ:
261
  {text_context}
262
 
 
397
  "document_id": document_id,
398
  "error": str(e)
399
  }
400
+
401
  def get_performance_stats(self) -> Dict[str, Any]:
402
  """์„ฑ๋Šฅ ํ†ต๊ณ„ ๋ฐ˜ํ™˜"""
403
  try:
test_lora_integration.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ LoRA ํ†ตํ•ฉ ๋ฐ ๋ชจ๋ธ ํƒ€์ž… ์ง€์› ํ…Œ์ŠคํŠธ ์Šคํฌ๋ฆฝํŠธ
4
+ """
5
+ import sys
6
+ import os
7
+ from pathlib import Path
8
+
9
+ # ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ ๊ฒฝ๋กœ ์ถ”๊ฐ€
10
+ project_root = Path(__file__).parent
11
+ sys.path.insert(0, str(project_root))
12
+
13
+ def test_lora_integration():
14
+ """LoRA ํ†ตํ•ฉ ํ…Œ์ŠคํŠธ"""
15
+ print("๐Ÿ” LoRA ํ†ตํ•ฉ ํ…Œ์ŠคํŠธ ์‹œ์ž‘...")
16
+
17
+ try:
18
+ from lily_llm_api.models import get_model_profile, list_available_models
19
+
20
+ available_models = list_available_models()
21
+ print(f"๐Ÿ“‹ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ: {len(available_models)}๊ฐœ")
22
+
23
+ for model_info in available_models:
24
+ model_id = model_info['model_id']
25
+ print(f"\n๐Ÿ“ ๋ชจ๋ธ: {model_info['name']} ({model_id})")
26
+
27
+ try:
28
+ profile = get_model_profile(model_id)
29
+ print(f" โœ… ํ”„๋กœํ•„ ๋กœ๋“œ ์„ฑ๊ณต")
30
+ print(f" - display_name: {getattr(profile, 'display_name', 'N/A')}")
31
+ print(f" - model_id: {getattr(profile, 'model_id', 'N/A')}")
32
+ print(f" - local_path: {getattr(profile, 'local_path', 'N/A')}")
33
+ print(f" - is_local: {getattr(profile, 'is_local', 'N/A')}")
34
+ print(f" - multimodal: {getattr(profile, 'multimodal', 'N/A')}")
35
+
36
+ # LoRA ๊ฒฝ๋กœ ์‹œ๋ฎฌ๋ ˆ์ด์…˜
37
+ print(f" ๐Ÿ” LoRA ๊ฒฝ๋กœ ์‹œ๋ฎฌ๋ ˆ์ด์…˜:")
38
+
39
+ if hasattr(profile, 'local_path') and profile.local_path:
40
+ current_model_path = profile.local_path
41
+ print(f" - ๋กœ์ปฌ ๊ฒฝ๋กœ ์ง์ ‘ ์‚ฌ์šฉ: {current_model_path}")
42
+ elif hasattr(profile, 'model_id') and profile.model_id:
43
+ model_id = profile.model_id
44
+ print(f" - ๋ชจ๋ธ ID ๊ธฐ๋ฐ˜: {model_id}")
45
+
46
+ if hasattr(profile, 'is_local') and profile.is_local:
47
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ ์‹œ๋ฎฌ๋ ˆ์ด์…˜
48
+ if model_id == "polyglot-ko-1.3b-chat":
49
+ current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
50
+ model_type = "causal_lm"
51
+ elif model_id == "kanana-1.5-v-3b-instruct":
52
+ current_model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
53
+ model_type = "vision2seq"
54
+ elif model_id == "polyglot-ko-5.8b-chat":
55
+ current_model_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
56
+ model_type = "causal_lm"
57
+
58
+ print(f" - ๋กœ์ปฌ ํ™˜๊ฒฝ ๊ฒฝ๋กœ: {current_model_path}")
59
+ print(f" - ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
60
+ else:
61
+ print(f" - ๋ฐฐํฌ ํ™˜๊ฒฝ: LoRA ์„ค์ • ๊ฑด๋„ˆ๋œ€")
62
+ current_model_path = None
63
+
64
+ except Exception as e:
65
+ print(f" โŒ ํ”„๋กœํ•„ ๋กœ๋“œ ์‹คํŒจ: {e}")
66
+
67
+ print("\n" + "="*50)
68
+ print("๐ŸŽฏ ๊ณตํ†ต LoRA ์„ค์ • ํ•จ์ˆ˜ ํ…Œ์ŠคํŠธ")
69
+ print("="*50)
70
+
71
+ # ๊ณตํ†ต ํ•จ์ˆ˜ ํ…Œ์ŠคํŠธ
72
+ try:
73
+ from lily_llm_api.app_v2 import setup_lora_for_model
74
+ print("โœ… ๊ณตํ†ต LoRA ์„ค์ • ํ•จ์ˆ˜ import ์„ฑ๊ณต")
75
+
76
+ # ์ฒซ ๋ฒˆ์งธ ๋ชจ๋ธ๋กœ ํ…Œ์ŠคํŠธ
77
+ if available_models:
78
+ test_model_id = available_models[0]['model_id']
79
+ test_profile = get_model_profile(test_model_id)
80
+ print(f"๐Ÿ“ ํ…Œ์ŠคํŠธ ๋ชจ๋ธ: {test_profile.display_name}")
81
+
82
+ # LoRA ๋งค๋‹ˆ์ €๊ฐ€ ์—†๋Š” ์ƒํƒœ์—์„œ ํ…Œ์ŠคํŠธ
83
+ result = setup_lora_for_model(test_profile, None)
84
+ print(f"๐Ÿ” LoRA ๋งค๋‹ˆ์ € ์—†์Œ ํ…Œ์ŠคํŠธ ๊ฒฐ๊ณผ: {result}")
85
+
86
+ except Exception as e:
87
+ print(f"โŒ ๊ณตํ†ต ํ•จ์ˆ˜ ํ…Œ์ŠคํŠธ ์‹คํŒจ: {e}")
88
+
89
+ except Exception as e:
90
+ print(f"โŒ ์ „์ฒด ํ…Œ์ŠคํŠธ ์‹คํŒจ: {e}")
91
+
92
+ if __name__ == "__main__":
93
+ test_lora_integration()
test_model_selection.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ๋ชจ๋ธ ์„ ํƒ ๋ฐ LoRA ๊ฒฝ๋กœ ์„ค์ • ํ…Œ์ŠคํŠธ ์Šคํฌ๋ฆฝํŠธ
4
+ """
5
+ import sys
6
+ import os
7
+ from pathlib import Path
8
+
9
+ # ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ ๊ฒฝ๋กœ ์ถ”๊ฐ€
10
+ project_root = Path(__file__).parent
11
+ sys.path.insert(0, str(project_root))
12
+
13
+ from lily_llm_api.models import get_model_profile, list_available_models
14
+
15
+ def test_model_profiles():
16
+ """๋ชจ๋ธ ํ”„๋กœํ•„๋“ค์ด ์˜ฌ๋ฐ”๋ฅธ ์†์„ฑ์„ ๊ฐ€์ง€๊ณ  ์žˆ๋Š”์ง€ ํ…Œ์ŠคํŠธ"""
17
+ print("๐Ÿ” ๋ชจ๋ธ ํ”„๋กœํ•„ ํ…Œ์ŠคํŠธ ์‹œ์ž‘...")
18
+
19
+ available_models = list_available_models()
20
+ print(f"๐Ÿ“‹ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ: {len(available_models)}๊ฐœ")
21
+
22
+ for model_info in available_models:
23
+ model_id = model_info['model_id']
24
+ print(f"\n๐Ÿ“ ๋ชจ๋ธ: {model_info['name']} ({model_id})")
25
+
26
+ try:
27
+ profile = get_model_profile(model_id)
28
+ print(f" โœ… ํ”„๋กœํ•„ ๋กœ๋“œ ์„ฑ๊ณต")
29
+ print(f" - display_name: {getattr(profile, 'display_name', 'N/A')}")
30
+ print(f" - model_id: {getattr(profile, 'model_id', 'N/A')}")
31
+ print(f" - local_path: {getattr(profile, 'local_path', 'N/A')}")
32
+ print(f" - multimodal: {getattr(profile, 'multimodal', 'N/A')}")
33
+
34
+ # ํ•„์ˆ˜ ์†์„ฑ ํ™•์ธ
35
+ required_attrs = ['model_id', 'local_path', 'display_name']
36
+ missing_attrs = [attr for attr in required_attrs if not hasattr(profile, attr)]
37
+
38
+ if missing_attrs:
39
+ print(f" โŒ ๋ˆ„๋ฝ๋œ ์†์„ฑ: {missing_attrs}")
40
+ else:
41
+ print(f" โœ… ๋ชจ๋“  ํ•„์ˆ˜ ์†์„ฑ ์กด์žฌ")
42
+
43
+ except Exception as e:
44
+ print(f" โŒ ํ”„๋กœํ•„ ๋กœ๋“œ ์‹คํŒจ: {e}")
45
+
46
+ print("\n" + "="*50)
47
+ print("๐ŸŽฏ ๋ชจ๋ธ ์„ ํƒ ์‹œ๋ฎฌ๋ ˆ์ด์…˜")
48
+ print("="*50)
49
+
50
+ # ๋ชจ๋ธ ์„ ํƒ ์‹œ๋ฎฌ๋ ˆ์ด์…˜
51
+ for i, model_info in enumerate(available_models, 1):
52
+ print(f"{i:2d}. {model_info['name']} ({model_info['model_id']})")
53
+
54
+ # ์ฒซ ๋ฒˆ์งธ ๋ชจ๋ธ ์„ ํƒ ์‹œ๋ฎฌ๋ ˆ์ด์…˜
55
+ if available_models:
56
+ selected_model = available_models[0]
57
+ selected_model_id = selected_model['model_id']
58
+ print(f"\n๐Ÿ“ ์„ ํƒ๋œ ๋ชจ๋ธ: {selected_model['name']} ({selected_model_id})")
59
+
60
+ # LoRA ๊ฒฝ๋กœ ๊ฒฐ์ • ์‹œ๋ฎฌ๋ ˆ์ด์…˜
61
+ profile = get_model_profile(selected_model_id)
62
+ current_model_path = None
63
+
64
+ if hasattr(profile, 'local_path') and profile.local_path:
65
+ current_model_path = profile.local_path
66
+ print(f"๐Ÿ” ๋ชจ๋ธ ํ”„๋กœํ•„์—์„œ ๊ฒฝ๋กœ ์ง์ ‘ ์‚ฌ์šฉ: {current_model_path}")
67
+ elif hasattr(profile, 'model_id') and profile.model_id:
68
+ model_id = profile.model_id
69
+ print(f"๐Ÿ” ๋ชจ๋ธ ID ๊ธฐ๋ฐ˜ ๊ฒฝ๋กœ ๊ฒฐ์ •: {model_id}")
70
+
71
+ if model_id == "polyglot-ko-1.3b-chat":
72
+ current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
73
+ elif model_id == "kanana-1.5-v-3b-instruct":
74
+ current_model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
75
+ elif model_id == "polyglot-ko-5.8b-chat":
76
+ current_model_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
77
+
78
+ print(f"๐Ÿ” ๊ฒฐ์ •๋œ ๋ชจ๋ธ ๊ฒฝ๋กœ: {current_model_path}")
79
+
80
+ if current_model_path:
81
+ print(f"โœ… LoRA ๊ฒฝ๋กœ ๊ฒฐ์ • ์„ฑ๊ณต: {current_model_path}")
82
+ else:
83
+ print(f"โŒ LoRA ๊ฒฝ๋กœ ๊ฒฐ์ • ์‹คํŒจ")
84
+
85
+ if __name__ == "__main__":
86
+ test_model_profiles()
test_model_type_fix.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ ๋ชจ๋ธ ํƒ€์ž… ์„ค์ • ํ…Œ์ŠคํŠธ ์Šคํฌ๋ฆฝํŠธ
4
+ """
5
+ import sys
6
+ import os
7
+ from pathlib import Path
8
+
9
+ # ํ”„๋กœ์ ํŠธ ๋ฃจํŠธ ๊ฒฝ๋กœ ์ถ”๊ฐ€
10
+ project_root = Path(__file__).parent
11
+ sys.path.insert(0, str(project_root))
12
+
13
+ def test_model_type_detection():
14
+ """๋ชจ๋ธ ํƒ€์ž… ๊ฐ์ง€ ํ…Œ์ŠคํŠธ"""
15
+ print("๐Ÿ” ๋ชจ๋ธ ํƒ€์ž… ๊ฐ์ง€ ํ…Œ์ŠคํŠธ ์‹œ์ž‘...")
16
+
17
+ try:
18
+ from lily_llm_api.models import get_model_profile, list_available_models
19
+
20
+ available_models = list_available_models()
21
+ print(f"๐Ÿ“‹ ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ: {len(available_models)}๊ฐœ")
22
+
23
+ for model_info in available_models:
24
+ model_id = model_info['model_id']
25
+ print(f"\n๐Ÿ“ ๋ชจ๋ธ: {model_info['name']} ({model_id})")
26
+
27
+ try:
28
+ profile = get_model_profile(model_id)
29
+ print(f" โœ… ํ”„๋กœํ•„ ๋กœ๋“œ ์„ฑ๊ณต")
30
+ print(f" - display_name: {getattr(profile, 'display_name', 'N/A')}")
31
+ print(f" - model_id: {getattr(profile, 'model_id', 'N/A')}")
32
+ print(f" - local_path: {getattr(profile, 'local_path', 'N/A')}")
33
+ print(f" - is_local: {getattr(profile, 'is_local', 'N/A')}")
34
+
35
+ # ๐Ÿ”„ ๋ชจ๋ธ ํƒ€์ž… ๊ฐ์ง€ ์‹œ๋ฎฌ๋ ˆ์ด์…˜
36
+ print(f" ๐Ÿ” ๋ชจ๋ธ ํƒ€์ž… ๊ฐ์ง€ ์‹œ๋ฎฌ๋ ˆ์ด์…˜:")
37
+
38
+ current_model_path = None
39
+ model_type = "causal_lm" # ๊ธฐ๋ณธ๊ฐ’
40
+
41
+ if hasattr(profile, 'local_path') and profile.local_path:
42
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ: ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ
43
+ current_model_path = profile.local_path
44
+ # ๐Ÿ”„ local_path ์‚ฌ์šฉ ์‹œ์—๋„ model_type ์„ค์ • ํ•„์š”
45
+ if hasattr(profile, 'model_id') and profile.model_id:
46
+ model_id = profile.model_id
47
+ if model_id == "kanana-1.5-v-3b-instruct":
48
+ model_type = "vision2seq" # ๐Ÿ”„ kanana๋Š” vision2seq ํƒ€์ž…
49
+ else:
50
+ model_type = "causal_lm" # ๊ธฐ๋ณธ๊ฐ’
51
+ print(f" - ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ: {current_model_path}")
52
+ print(f" - ๊ฒฐ์ •๋œ ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
53
+
54
+ elif hasattr(profile, 'model_id') and profile.model_id:
55
+ # ๋ชจ๋ธ ID๋ฅผ ๊ธฐ๋ฐ˜์œผ๋กœ ๊ฒฝ๋กœ ๊ฒฐ์ •
56
+ model_id = profile.model_id
57
+ print(f" - ๋ชจ๋ธ ID ๊ธฐ๋ฐ˜: {model_id}")
58
+
59
+ if hasattr(profile, 'is_local') and profile.is_local:
60
+ # ๋กœ์ปฌ ํ™˜๊ฒฝ: ๋กœ์ปฌ ๊ฒฝ๋กœ ์‚ฌ์šฉ
61
+ if model_id == "polyglot-ko-1.3b-chat":
62
+ current_model_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
63
+ model_type = "causal_lm"
64
+ elif model_id == "kanana-1.5-v-3b-instruct":
65
+ current_model_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
66
+ model_type = "vision2seq"
67
+ elif model_id == "polyglot-ko-5.8b-chat":
68
+ current_model_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
69
+ model_type = "causal_lm"
70
+
71
+ print(f" - ๋กœ์ปฌ ํ™˜๊ฒฝ ๊ฒฝ๋กœ: {current_model_path}")
72
+ print(f" - ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
73
+ else:
74
+ print(f" - ๋ฐฐํฌ ํ™˜๊ฒฝ: LoRA ์„ค์ • ๊ฑด๋„ˆ๋œ€")
75
+ current_model_path = None
76
+
77
+ # ์ตœ์ข… ๊ฒฐ๊ณผ
78
+ if current_model_path:
79
+ print(f" โœ… ์ตœ์ข… ๊ฒฐ๊ณผ: ๊ฒฝ๋กœ={current_model_path}, ํƒ€์ž…={model_type}")
80
+ else:
81
+ print(f" โŒ ์ตœ์ข… ๊ฒฐ๊ณผ: ๊ฒฝ๋กœ ์—†์Œ")
82
+
83
+ except Exception as e:
84
+ print(f" โŒ ํ”„๋กœํ•„ ๋กœ๋“œ ์‹คํŒจ: {e}")
85
+
86
+ except Exception as e:
87
+ print(f"โŒ ์ „์ฒด ํ…Œ์ŠคํŠธ ์‹คํŒจ: {e}")
88
+
89
+ if __name__ == "__main__":
90
+ test_model_type_detection()
test_rag_integration.py CHANGED
@@ -267,3 +267,4 @@ if __name__ == "__main__":
267
  print("\nํ…Œ์ŠคํŠธ ์™„๋ฃŒ! ๐ŸŽ‰")
268
 
269
 
 
 
267
  print("\nํ…Œ์ŠคํŠธ ์™„๋ฃŒ! ๐ŸŽ‰")
268
 
269
 
270
+