gbrabbit commited on
Commit
797e09d
ยท
1 Parent(s): 11ddb38

Auto commit at 22-2025-08 4:00:04

Browse files
README.md CHANGED
@@ -10,6 +10,9 @@ app_file: app.py
10
  pinned: false
11
  ---
12
 
 
 
 
13
  # 250821
14
  - polyplot 5.8b ์‘๋‹ต ์†๋„ ๊ฐœ์„ , ๋ชจ๋ธ๋ณ„ tokenizer config settings json ๋ณ€์ˆ˜ ๋ช…์‹œ์ ์œผ๋กœ ๊ธฐ์ž…
15
 
 
10
  pinned: false
11
  ---
12
 
13
+ # 250822
14
+ - polyglot 1.3b lora ํŒŒ๋ผ๋ฉ”ํ„ฐ ์กฐ์ ˆ, ์‘๋‹ต ํ’ˆ์งˆ ํ–ฅ์ƒ
15
+
16
  # 250821
17
  - polyplot 5.8b ์‘๋‹ต ์†๋„ ๊ฐœ์„ , ๋ชจ๋ธ๋ณ„ tokenizer config settings json ๋ณ€์ˆ˜ ๋ช…์‹œ์ ์œผ๋กœ ๊ธฐ์ž…
18
 
lily_llm_api/app_v2_250822_0312.py DELETED
The diff for this file is too large to render. See raw diff
 
lily_llm_api/models/polyglot_ko_1_3b_chat_250822_0312.py DELETED
@@ -1,280 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Polyglot-ko-1.3b-chat ๋ชจ๋ธ ํ”„๋กœํ•„
4
- heegyu/polyglot-ko-1.3b-chat ๋ชจ๋ธ์šฉ
5
- """
6
-
7
- from typing import Dict, Any, Tuple
8
- import torch
9
- from transformers import AutoTokenizer, AutoModelForCausalLM
10
- import logging
11
- import os
12
- from pathlib import Path
13
- import re
14
-
15
-
16
- HF_TOKEN = os.getenv("HF_TOKEN")
17
-
18
- logger = logging.getLogger(__name__)
19
-
20
- class PolyglotKo13bChatProfile:
21
- """Polyglot-ko-1.3b-chat ๋ชจ๋ธ ํ”„๋กœํ•„"""
22
-
23
- def __init__(self):
24
- self.model_name = "heegyu/polyglot-ko-1.3b-chat"
25
- self.local_path = "./lily_llm_core/models/polyglot_ko_1_3b_chat"
26
- self.display_name = "Polyglot-ko-1.3b-chat"
27
- self.description = "ํ•œ๊ตญ์–ด ์ฑ„ํŒ… ์ „์šฉ ๊ฒฝ๋Ÿ‰ ๋ชจ๋ธ (1.3B)"
28
- self.language = "ko"
29
- self.model_size = "1.3B"
30
-
31
- def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
32
- """๋ชจ๋ธ ๋กœ๋“œ (ํ† ํฌ๋‚˜์ด์ € ์„ค์ • ์ˆ˜์ •)"""
33
- logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
34
- try:
35
- use_local = Path(self.local_path).exists() and any(Path(self.local_path).iterdir())
36
- model_path = self.local_path if use_local else self.model_name
37
- logger.info(f"๐Ÿ” ๋ชจ๋ธ ๊ฒฝ๋กœ: {model_path} (local={'yes' if use_local else 'no'})")
38
-
39
- # ๊ฐ•์ œ๋กœ Hugging Face์—์„œ ๋‹ค์šด๋กœ๋“œ (๋กœ์ปฌ ๋ชจ๋ธ ๋ฌธ์ œ ํ•ด๊ฒฐ)
40
- # use_local = False
41
- # model_path = self.model_name
42
- # logger.info(f"๐Ÿ” ๋ชจ๋ธ ๊ฒฝ๋กœ: {model_path} (local=no - ๊ฐ•์ œ HF ๋‹ค์šด๋กœ๋“œ)")
43
-
44
- tokenizer = AutoTokenizer.from_pretrained(
45
- model_path,
46
- token=HF_TOKEN,
47
- use_fast=True,
48
- trust_remote_code=True,
49
- local_files_only=use_local,
50
- )
51
-
52
- # ํ† ํฌ๋‚˜์ด์ € ์„ค์ • ์ˆ˜์ • - EOS ํ† ํฐ ๋ฌธ์ œ ํ•ด๊ฒฐ
53
- if tokenizer.eos_token is None:
54
- logger.warning("โš ๏ธ EOS ํ† ํฐ์ด ์—†์Šต๋‹ˆ๋‹ค. ๋ชจ๋ธ ๊ณต์‹ ๋ฌธ์„œ์— ๋”ฐ๋ผ <|endoftext|> ์„ค์ •")
55
- tokenizer.eos_token = "<|endoftext|>"
56
-
57
- if tokenizer.pad_token is None:
58
- logger.warning("โš ๏ธ PAD ํ† ํฐ์ด ์—†์Šต๋‹ˆ๋‹ค. EOS ํ† ํฐ์œผ๋กœ ์„ค์ •")
59
- tokenizer.pad_token = tokenizer.eos_token
60
-
61
- # ํŠน์ˆ˜ ํ† ํฐ ํ™•์ธ
62
- logger.info(f"๐Ÿ” ํ† ํฌ๋‚˜์ด์ € ์„ค์ •:")
63
- logger.info(f" - EOS ํ† ํฐ: {tokenizer.eos_token} (ID: {tokenizer.eos_token_id})")
64
- logger.info(f" - PAD ํ† ํฐ: {tokenizer.pad_token} (ID: {tokenizer.pad_token_id})")
65
- # logger.info(f" - BOS ํ† ํฐ: {tokenizer.bos_token} (ID: {tokenizer.bos_token_id})")
66
-
67
- # CPU์—์„œ๋Š” float32๊ฐ€ ๋” ์•ˆ์ •์ , CUDA์—์„œ๋Š” float16 ์‚ฌ์šฉ
68
- device = 'cuda' if torch.cuda.is_available() else 'cpu'
69
- selected_dtype = torch.float16 if device == 'cuda' else torch.float32
70
-
71
- model = AutoModelForCausalLM.from_pretrained(
72
- model_path,
73
- token=HF_TOKEN,
74
- trust_remote_code=True,
75
- torch_dtype=selected_dtype,
76
- local_files_only=use_local,
77
- ).to(device)
78
-
79
- logger.info(f"โœ… {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต! (device={device}, dtype={selected_dtype})")
80
- return model, tokenizer
81
- except Exception as e:
82
- logger.error(f"โŒ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์‹คํŒจ: {e}")
83
- raise
84
-
85
- def format_prompt(self, user_input: str, context: str = None) -> str:
86
- """ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ… - ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๋‹จ์ˆœํ™”"""
87
-
88
- # ๊ธฐ๋ณธ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ (๋‹จ์ˆœํ™”)
89
- system_prompt = """๋‹น์‹ ์€ ์นœ์ ˆํ•˜๊ณ  ๋„์›€์ด ๋˜๋Š” AI ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ์ •ํ™•ํ•˜๊ณ  ์œ ์šฉํ•œ ๋‹ต๋ณ€์„ ์ œ๊ณตํ•˜์„ธ์š”."""
90
-
91
- # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ๋ฅผ ํ•ญ์ƒ ๋จผ์ € ํฌํ•จ
92
- if context:
93
- # ์ปจํ…์ŠคํŠธ๊ฐ€ ์žˆ์„ ๋•Œ
94
- if user_input in context:
95
- # ์ค‘๋ณต ๋ฐฉ์ง€: ์ปจํ…์ŠคํŠธ๋งŒ ์‚ฌ์šฉ
96
- prompt = f"""{system_prompt}
97
-
98
- {context}
99
-
100
- ### ์ฑ—๋ด‡:"""
101
- else:
102
- # ์ƒˆ๋กœ์šด ์‚ฌ์šฉ์ž ์ž…๋ ฅ ์ถ”๊ฐ€
103
- prompt = f"""{system_prompt}
104
-
105
- {context}
106
-
107
- ### ์‚ฌ์šฉ์ž:
108
- {user_input}
109
-
110
- ### ์ฑ—๋ด‡:"""
111
- else:
112
- # ์ปจํ…์ŠคํŠธ๊ฐ€ ์—†์–ด๋„ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ๋Š” ํฌํ•จ
113
- prompt = f"""{system_prompt}
114
-
115
- ### ์‚ฌ์šฉ์ž:
116
- {user_input}
117
-
118
- ### ์ฑ—๋ด‡:"""
119
- return prompt
120
-
121
- def extract_response(self, full_text: str, formatted_prompt: str = None) -> str:
122
- """์‘๋‹ต ์ถ”์ถœ - ํ’ˆ์งˆ ๊ฒ€์ฆ ๋ฐ ๊ฐœ์„ """
123
- logger.info(f"--- Polyglot ์‘๋‹ต ์ถ”์ถœ ์‹œ์ž‘ ---")
124
- logger.info(f"์ „์ฒด ์ƒ์„ฑ ํ…์ŠคํŠธ (Raw): \n---\n{full_text}\n---")
125
- logger.info(f"์‚ฌ์šฉ๋œ ํ”„๋กฌํ”„ํŠธ: {formatted_prompt}")
126
-
127
- # 1์ˆœ์œ„: "### ์ฑ—๋ด‡:" ํƒœ๊ทธ๋กœ ๏ฟฝ๏ฟฝ๏ฟฝ์ถœ ์‹œ๋„
128
- if "### ์ฑ—๋ด‡:" in full_text:
129
- response = full_text.split("### ์ฑ—๋ด‡:")[-1].strip()
130
- logger.info(f"โœ… ์„ฑ๊ณต: '### ์ฑ—๋ด‡:' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
131
- logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
132
-
133
- # ์‘๋‹ต ํ’ˆ์งˆ ๊ฒ€์ฆ
134
- if self._validate_response_quality(response):
135
- return response
136
- else:
137
- logger.warning("โš ๏ธ ์‘๋‹ต ํ’ˆ์งˆ์ด ๋‚ฎ์Šต๋‹ˆ๋‹ค. ํ’ˆ์งˆ ๊ฐœ์„  ์ œ์•ˆ์„ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.")
138
- return self._improve_response_quality(response)
139
-
140
- # 2์ˆœ์œ„: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์ถ”์ถœ ์‹œ๋„
141
- if formatted_prompt and formatted_prompt in full_text:
142
- response = full_text.replace(formatted_prompt, "").strip()
143
- logger.info(f"โœ… ์„ฑ๊ณต: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ถ”์ถœ")
144
- logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
145
-
146
- if self._validate_response_quality(response):
147
- return response
148
- else:
149
- return self._improve_response_quality(response)
150
-
151
- # 3์ˆœ์œ„: <|im_start|>assistant ํƒœ๊ทธ ์ดํ›„ ๋‚ด์šฉ ์ถ”์ถœ
152
- if "<|im_start|>assistant" in full_text:
153
- parts = full_text.split("<|im_start|>assistant")
154
- if len(parts) > 1:
155
- # ๋งˆ์ง€๋ง‰ assistant ํƒœ๊ทธ ์ดํ›„ ๋‚ด์šฉ
156
- last_assistant_part = parts[-1]
157
- # <|im_end|> ํƒœ๊ทธ ์ œ๊ฑฐ
158
- if "<|im_end|>" in last_assistant_part:
159
- response = last_assistant_part.split("<|im_end|>")[0].strip()
160
- else:
161
- response = last_assistant_part.strip()
162
-
163
- logger.info(f"โœ… ์„ฑ๊ณต: '<|im_start|>assistant' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
164
- logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
165
-
166
- if self._validate_response_quality(response):
167
- return response
168
- else:
169
- return self._improve_response_quality(response)
170
-
171
- # 4์ˆœ์œ„: ์ผ๋ฐ˜์ ์ธ ํ”„๋กฌํ”„ํŠธ ํŒจํ„ด ์ œ๊ฑฐ ์‹œ๋„
172
- clean_text = full_text.strip()
173
- patterns_to_remove = [
174
- "(์‘๋‹ต์ด ๋„ˆ๋ฌด ์งง์Šต๋‹ˆ๋‹ค. ๋” ์ž์„ธํ•œ ๋‹ต๋ณ€์„ ์›ํ•˜์‹œ๋ฉด ๋‹ค์‹œ ์งˆ๋ฌธํ•ด์ฃผ์„ธ์š”.)",
175
- "### ์‚ฌ์šฉ์ž:",
176
- "### ์ฑ—๋ด‡:",
177
- "์‚ฌ์šฉ์ž:",
178
- "์ฑ—๋ด‡:",
179
- "assistant:",
180
- "user:",
181
- "<|im_start|>user",
182
- "<|im_end|>",
183
- "<|im_start|>assistant",
184
- "<|im_start|>system"
185
- ]
186
-
187
- for pattern in patterns_to_remove:
188
- clean_text = clean_text.replace(pattern, "")
189
-
190
- clean_text = clean_text.strip()
191
-
192
- if clean_text and clean_text != full_text:
193
- logger.info("โœ… ์„ฑ๊ณต: ํŒจํ„ด ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ •๋ฆฌ")
194
- logger.info(f"์ •๋ฆฌ๋œ ์‘๋‹ต: {clean_text}")
195
-
196
- if self._validate_response_quality(clean_text):
197
- return clean_text
198
- else:
199
- return self._improve_response_quality(clean_text)
200
-
201
- # 5์ˆœ์œ„: ์ „์ฒด ํ…์ŠคํŠธ์—์„œ ๋ถˆํ•„์š”ํ•œ ๋ถ€๋ถ„๋งŒ ์ œ๊ฑฐ
202
- final_response = full_text.strip()
203
- logger.warning("โš ๏ธ ๊ฒฝ๊ณ : ํŠน๋ณ„ํ•œ ์‘๋‹ต ์ถ”์ถœ ํŒจํ„ด์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ์ •๋ฆฌํ•˜์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
204
- logger.info(f"์ตœ์ข… ๋ฐ˜ํ™˜ ํ…์ŠคํŠธ: {final_response}")
205
-
206
- if self._validate_response_quality(final_response):
207
- return final_response
208
- else:
209
- return self._improve_response_quality(final_response)
210
-
211
- def _validate_response_quality(self, response: str) -> bool:
212
- """์‘๋‹ต ํ’ˆ์งˆ ๊ฒ€์ฆ"""
213
- if not response or len(response.strip()) < 5:
214
- return False
215
-
216
- # ์˜์–ด๊ฐ€ ํฌํ•จ๋˜์–ด ์žˆ์œผ๋ฉด ํ’ˆ์งˆ ๋‚ฎ์Œ
217
- # if any(char.isascii() and char.isalpha() for char in response):
218
- # return False
219
-
220
- # ๋ฌธ์žฅ์ด ์ค‘๊ฐ„์— ๋Š์–ด์ง„ ๊ฒฝ์šฐ ํ’ˆ์งˆ ๋‚ฎ์Œ
221
- # if response.endswith(('ํ•˜', '๋Š”', '์„', '๋ฅผ', '์ด', '๊ฐ€', '์˜', '์—', '๋กœ')):
222
- # return False
223
-
224
- # ์ค‘๋ณต๋œ ๋‹จ์–ด๊ฐ€ ๋งŽ์œผ๋ฉด ํ’ˆ์งˆ ๋‚ฎ์Œ
225
- # words = response.split()
226
- # if len(words) > 3 and len(set(words)) / len(words) < 0.7:
227
- # return False
228
-
229
- return True
230
-
231
- def _improve_response_quality(self, response: str) -> str:
232
- """์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„ """
233
- # ๊ธฐ๋ณธ ์ •๋ฆฌ
234
- improved = response.strip()
235
-
236
- # ์˜์–ด ์ œ๊ฑฐ
237
-
238
- # improved = re.sub(r'[a-zA-Z]+', '', improved)
239
-
240
- # ์ค‘๋ณต ๊ณต๋ฐฑ ์ œ๊ฑฐ
241
- improved = re.sub(r'\s+', ' ', improved)
242
-
243
- # ๋ฌธ์žฅ์ด ์ค‘๊ฐ„์— ๋Š์–ด์ง„ ๊ฒฝ์šฐ ์ฒ˜๋ฆฌ
244
- # if improved.endswith(('ํ•˜', '๋Š”', '์„', '๋ฅผ', '์ด', '๊ฐ€', '์˜', '์—', '๋กœ')):
245
- # improved += '๋‹ˆ๋‹ค.'
246
-
247
- # ๋„ˆ๋ฌด ์งง์€ ๊ฒฝ์šฐ ๊ธฐ๋ณธ ์‘๋‹ต ์ถ”๊ฐ€
248
- if len(improved) < 5:
249
- improved = f"{improved} (์‘๋‹ต์ด ๋„ˆ๋ฌด ์งง์Šต๋‹ˆ๋‹ค. ๋” ์ž์„ธํ•œ ๋‹ต๋ณ€์„ ์›ํ•˜์‹œ๋ฉด ๋‹ค์‹œ ์งˆ๋ฌธํ•ด์ฃผ์„ธ์š”.)"
250
-
251
- logger.info(f"๐Ÿ”ง ์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„  ์™„๋ฃŒ: {improved}")
252
- return improved
253
-
254
- def get_generation_config(self) -> Dict[str, Any]:
255
- """์ƒ์„ฑ ์„ค์ • - ๊ณต์‹ EOS ํ† ํฐ ์‚ฌ์šฉ, ์ƒ์„ฑ ํŒŒ๋ผ๋ฏธํ„ฐ ์ตœ์ ํ™”"""
256
- return {
257
- "max_new_tokens": 128, # 256 โ†’ 128๋กœ ์ค„์ž„ (์ปจํ…์ŠคํŠธ ๊ธธ์ด ๊ณ ๋ ค)
258
- "temperature": 0.7, # 0.9 โ†’ 0.7๋กœ ์กฐ์ • (์•ˆ์ •์„ฑ ํ–ฅ์ƒ)
259
- "do_sample": True, # ์ƒ˜ํ”Œ๋ง ํ™œ์„ฑํ™”
260
- "top_k": 50, # 100 โ†’ 50์œผ๋กœ ์กฐ์ • (ํ’ˆ์งˆ๊ณผ ์•ˆ์ •์„ฑ ๊ท ํ˜•)
261
- "top_p": 0.9, # 0.95 โ†’ 0.9๋กœ ์กฐ์ •
262
- "repetition_penalty": 1.1, # 1.05 โ†’ 1.1๋กœ ์กฐ์ •
263
- "no_repeat_ngram_size": 3, # 2 โ†’ 3์œผ๋กœ ์กฐ์ •
264
- "pad_token_id": 2, # ๊ณต์‹ ์„ค์ • ์‚ฌ์šฉ
265
- "eos_token_id": 2, # ๊ณต์‹ ์„ค์ • ์‚ฌ์šฉ
266
- "use_cache": True, # ์บ์‹œ ํ™œ์„ฑํ™” (์†๋„ ํ–ฅ์ƒ)
267
- "early_stopping": False, # EOS ํ† ํฐ๊นŒ์ง€ ์ƒ์„ฑํ•˜๋„๋ก ์„ค์ •
268
- }
269
-
270
- def get_model_info(self) -> Dict[str, Any]:
271
- """๋ชจ๋ธ ์ •๋ณด"""
272
- return {
273
- "model_name": self.model_name,
274
- "display_name": self.display_name,
275
- "description": self.description,
276
- "language": self.language,
277
- "model_size": self.model_size,
278
- "local_path": self.local_path,
279
- "multimodal": False,
280
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lily_llm_core/context_manager_250822_0312.py DELETED
@@ -1,702 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž (Context Manager)
4
- ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ์™€ ๋‹จ๊ธฐ ๊ธฐ์–ต์„ ๊ด€๋ฆฌํ•˜๋Š” ์‹œ์Šคํ…œ
5
- """
6
-
7
- import logging
8
- import time
9
- from typing import List, Dict, Any, Optional, Tuple
10
- from dataclasses import dataclass
11
- from collections import deque
12
- import json
13
-
14
- logger = logging.getLogger(__name__)
15
-
16
- @dataclass
17
- class ConversationTurn:
18
- """๋Œ€ํ™” ํ„ด์„ ๋‚˜ํƒ€๋‚ด๋Š” ๋ฐ์ดํ„ฐ ํด๋ž˜์Šค"""
19
- role: str # 'user' ๋˜๋Š” 'assistant'
20
- content: str
21
- timestamp: float
22
- message_id: str
23
- metadata: Optional[Dict[str, Any]] = None
24
-
25
- class ContextManager:
26
- """๋Œ€ํ™” ์ปจํ…์ŠคํŠธ๋ฅผ ๊ด€๋ฆฌํ•˜๋Š” ํด๋ž˜์Šค"""
27
-
28
- def __init__(self,
29
- max_tokens: int = 2000, # 4000 โ†’ 2000์œผ๋กœ ์ค„์ž„
30
- max_turns: int = 20, # 20 โ†’ 10์œผ๋กœ ์ค„์ž„
31
- strategy: str = "sliding_window"):
32
- """
33
- Args:
34
- max_tokens: ์ตœ๋Œ€ ํ† ํฐ ์ˆ˜
35
- max_turns: ์ตœ๋Œ€ ๋Œ€ํ™” ํ„ด ์ˆ˜
36
- strategy: ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ ์ „๋žต ('sliding_window', 'priority_keep', 'circular')
37
- """
38
- self.max_tokens = max_tokens
39
- self.max_turns = max_turns
40
- self.strategy = strategy
41
-
42
- # ์„ธ์…˜๋ณ„ ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ (์„ธ์…˜ ID๋กœ ๋ถ„๋ฆฌ)
43
- self.session_conversations: Dict[str, deque] = {}
44
- self.default_session = "default"
45
-
46
- # ๊ธฐ๋ณธ ์„ธ์…˜ ์ดˆ๊ธฐํ™”
47
- self.session_conversations[self.default_session] = deque(maxlen=max_turns * 2)
48
-
49
- # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
50
- self.system_prompt = ""
51
-
52
- # ์ปจํ…์ŠคํŠธ ํ†ต๊ณ„
53
- self.total_tokens = 0
54
- self.current_context_length = 0
55
-
56
- # ๋ฉ”๋ชจ๋ฆฌ ์ตœ์ ํ™” ์„ค์ •
57
- self.enable_memory_optimization = True
58
- self.compression_threshold = 0.8 # 80% ๋„๋‹ฌ ์‹œ ์••์ถ• ์‹œ์ž‘
59
-
60
- # ๐Ÿ”„ ์ž๋™ ์ •๋ฆฌ ์ฃผ๊ธฐ ์„ค์ •
61
- self.auto_cleanup_enabled = True
62
- self.cleanup_interval_turns = 5 # 8 โ†’ 5ํ„ด๋งˆ๋‹ค ์ •๋ฆฌ
63
- self.cleanup_interval_time = 180 # 5๋ถ„ โ†’ 3๋ถ„๋งˆ๋‹ค ์ •๋ฆฌ
64
- self.cleanup_strategy = "aggressive" # smart โ†’ aggressive๋กœ ๋ณ€๊ฒฝ
65
- self.last_cleanup_time = {} # ์„ธ์…˜๋ณ„ ๋งˆ์ง€๋ง‰ ์ •๋ฆฌ ์‹œ๊ฐ„
66
- self.turn_counters = {} # ์„ธ์…˜๋ณ„ ํ„ด ์นด์šดํ„ฐ
67
-
68
- logger.info(f"๐Ÿ”ง ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ์ดˆ๊ธฐํ™”: max_tokens={max_tokens}, strategy={strategy}, auto_cleanup={self.auto_cleanup_enabled}")
69
-
70
- def set_system_prompt(self, prompt: str):
71
- """์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •"""
72
- self.system_prompt = prompt
73
- logger.info(f"๐Ÿ“ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ์„ค์ •: {len(prompt)} ๋ฌธ์ž")
74
-
75
- def set_auto_cleanup_config(self,
76
- enabled: bool = True,
77
- interval_turns: int = 8,
78
- interval_time: int = 300,
79
- strategy: str = "smart"):
80
- """์ž๋™ ์ •๋ฆฌ ์„ค์ • ๊ตฌ์„ฑ"""
81
- self.auto_cleanup_enabled = enabled
82
- self.cleanup_interval_turns = max(1, interval_turns)
83
- self.cleanup_interval_time = max(60, interval_time)
84
- self.cleanup_strategy = strategy
85
-
86
- logger.info(f"๐Ÿ”„ ์ž๋™ ์ •๋ฆฌ ์„ค์ •: enabled={enabled}, turns={interval_turns}, time={interval_time}s, strategy={strategy}")
87
-
88
- def get_auto_cleanup_config(self) -> Dict[str, Any]:
89
- """์ž๋™ ์ •๋ฆฌ ์„ค์ • ๋ฐ˜ํ™˜"""
90
- return {
91
- "enabled": self.auto_cleanup_enabled,
92
- "interval_turns": self.cleanup_interval_turns,
93
- "interval_time": self.cleanup_interval_time,
94
- "strategy": self.cleanup_strategy
95
- }
96
-
97
- def add_user_message(self, content: str, message_id: str = None, metadata: Dict[str, Any] = None) -> str:
98
- """์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€"""
99
- if not message_id:
100
- message_id = f"user_{int(time.time() * 1000)}"
101
-
102
- # ์„ธ์…˜ ID ์ถ”์ถœ (metadata์—์„œ)
103
- session_id = "default"
104
- if metadata and "session_id" in metadata:
105
- session_id = metadata["session_id"]
106
-
107
- # ์„ธ์…˜์ด ์—†์œผ๋ฉด ์ƒ์„ฑ
108
- if session_id not in self.session_conversations:
109
- self.session_conversations[session_id] = deque(maxlen=self.max_turns * 2)
110
-
111
- turn = ConversationTurn(
112
- role="user",
113
- content=content,
114
- timestamp=time.time(),
115
- message_id=message_id,
116
- metadata=metadata or {}
117
- )
118
-
119
- self.session_conversations[session_id].append(turn)
120
- self._update_context_stats(session_id)
121
- self._optimize_context(session_id)
122
-
123
- # ๐Ÿ”„ ์ž๋™ ์ •๋ฆฌ ์ฒดํฌ
124
- self._check_auto_cleanup(session_id)
125
-
126
- logger.info(f"๐Ÿ‘ค ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€: {len(content)} ๋ฌธ์ž (์„ธ์…˜: {session_id}, ์ด {len(self.session_conversations[session_id])} ํ„ด)")
127
- return message_id
128
-
129
- def add_assistant_message(self, content: str, message_id: str = None, metadata: Dict[str, Any] = None) -> str:
130
- """์–ด์‹œ์Šคํ„ดํŠธ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€"""
131
- if not message_id:
132
- message_id = f"assistant_{int(time.time() * 1000)}"
133
-
134
- # ์„ธ์…˜ ID ์ถ”์ถœ (metadata์—์„œ)
135
- session_id = "default"
136
- if metadata and "session_id" in metadata:
137
- session_id = metadata["session_id"]
138
-
139
- # ์„ธ์…˜์ด ์—†์œผ๋ฉด ์ƒ์„ฑ
140
- if session_id not in self.session_conversations:
141
- self.session_conversations[session_id] = deque(maxlen=self.max_turns * 2)
142
-
143
- turn = ConversationTurn(
144
- role="assistant",
145
- content=content,
146
- timestamp=time.time(),
147
- message_id=message_id,
148
- metadata=metadata or {}
149
- )
150
-
151
- self.session_conversations[session_id].append(turn)
152
- self._update_context_stats(session_id)
153
- self._optimize_context(session_id)
154
-
155
- # ๐Ÿ”„ ์ž๋™ ์ •๋ฆฌ ์ฒดํฌ
156
- self._check_auto_cleanup(session_id)
157
-
158
- logger.info(f"๐Ÿค– ์–ด์‹œ์Šคํ„ดํŠธ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€: {len(content)} ๋ฌธ์ž (์„ธ์…˜: {session_id}, ์ด {len(self.session_conversations[session_id])} ํ„ด)")
159
- return message_id
160
-
161
- def get_context(self, include_system: bool = True, max_length: Optional[int] = None, session_id: str = "default") -> str:
162
- """ํ˜„์žฌ ์ปจํ…์ŠคํŠธ๋ฅผ ๋ฌธ์ž์—ด๋กœ ๋ฐ˜ํ™˜ (์„ธ์…˜๋ณ„)"""
163
- context_parts = []
164
-
165
- # ์„ธ์…˜์ด ์—†์œผ๋ฉด ๊ธฐ๋ณธ ์„ธ์…˜ ์‚ฌ์šฉ
166
- if session_id not in self.session_conversations:
167
- session_id = "default"
168
-
169
- conversation_history = self.session_conversations[session_id]
170
-
171
- # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ํฌํ•จ
172
- if include_system and self.system_prompt:
173
- context_parts.append(f"<|im_start|>system\n{self.system_prompt}<|im_end|>")
174
-
175
- # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ํฌํ•จ
176
- for turn in conversation_history:
177
- if turn.role == "user":
178
- context_parts.append(f"<|im_start|>user\n{turn.content}<|im_end|>")
179
- elif turn.role == "assistant":
180
- context_parts.append(f"<|im_start|>assistant\n{turn.content}<|im_end|>")
181
-
182
- # ์–ด์‹œ์Šคํ„ดํŠธ ์‘๋‹ต ์‹œ์ž‘ ํ† ํฐ ์ถ”๊ฐ€
183
- context_parts.append("<|im_start|>assistant\n")
184
-
185
- context = "\n".join(context_parts)
186
-
187
- # ๊ธธ์ด ์ œํ•œ ์ ์šฉ
188
- if max_length and len(context) > max_length:
189
- context = self._truncate_context(context, max_length)
190
-
191
- return context
192
-
193
- def get_context_for_model(self, model_name: str = "default", session_id: str = "default") -> str:
194
- """๋ชจ๋ธ๋ณ„ ์ตœ์ ํ™”๋œ ์ปจํ…์ŠคํŠธ ๋ฐ˜ํ™˜ (์„ธ์…˜๋ณ„)"""
195
- # ๋ชจ๋ธ๋ณ„ ํŠน๋ณ„ํ•œ ์ฒ˜๋ฆฌ (ํ•„์š”์‹œ ํ™•์žฅ)
196
- if "kanana" in model_name.lower():
197
- return self.get_context(include_system=True, session_id=session_id)
198
- elif "llama" in model_name.lower():
199
- # Llama ํ˜•์‹
200
- return self._format_for_llama(session_id)
201
- elif "polyglot" in model_name.lower():
202
- # Polyglot ํ˜•์‹ - <|im_start|> ํƒœ๊ทธ ์‚ฌ์šฉํ•˜์ง€ ์•Š์Œ
203
- return self._format_for_polyglot(session_id)
204
- else:
205
- return self.get_context(include_system=True, session_id=session_id)
206
-
207
- def _format_for_llama(self, session_id: str = "default") -> str:
208
- """Llama ๋ชจ๋ธ์šฉ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ (์„ธ์…˜๋ณ„)"""
209
- context_parts = []
210
-
211
- # ์„ธ์…˜์ด ์—†์œผ๋ฉด ๊ธฐ๋ณธ ์„ธ์…˜ ์‚ฌ์šฉ
212
- if session_id not in self.session_conversations:
213
- session_id = "default"
214
-
215
- conversation_history = self.session_conversations[session_id]
216
-
217
- if self.system_prompt:
218
- context_parts.append(f"[INST] {self.system_prompt} [/INST]")
219
-
220
- for turn in conversation_history:
221
- if turn.role == "user":
222
- context_parts.append(f"[INST] {turn.content} [/INST]")
223
- elif turn.role == "assistant":
224
- context_parts.append(turn.content)
225
-
226
- return "\n".join(context_parts)
227
-
228
- def _format_for_polyglot(self, session_id: str = "default") -> str:
229
- """Polyglot ๋ชจ๋ธ์šฉ ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜ (์„ธ์…˜๋ณ„) - ๊ณต์‹ ํ˜•์‹ ์‚ฌ์šฉ"""
230
- context_parts = []
231
-
232
- # ์„ธ์…˜์ด ์—†์œผ๋ฉด ๊ธฐ๋ณธ ์„ธ์…˜ ์‚ฌ์šฉ
233
- if session_id not in self.session_conversations:
234
- session_id = "default"
235
-
236
- conversation_history = self.session_conversations[session_id]
237
-
238
- # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ๋งŒ ํฌํ•จ (๊ณต์‹ ํ˜•์‹ ์‚ฌ์šฉ)
239
- for turn in conversation_history:
240
- if turn.role == "user":
241
- context_parts.append(f"### ์‚ฌ์šฉ์ž:\n{turn.content}")
242
- elif turn.role == "assistant":
243
- context_parts.append(f"### ์ฑ—๋ด‡:\n{turn.content}")
244
-
245
- if context_parts:
246
- return "\n\n".join(context_parts)
247
- else:
248
- return ""
249
-
250
- def get_recent_context(self, turns: int = 5, session_id: str = "default") -> str:
251
- """์ตœ๊ทผ N๊ฐœ ํ„ด์˜ ์ปจํ…์ŠคํŠธ๋งŒ ๋ฐ˜ํ™˜ (์„ธ์…˜๋ณ„)"""
252
- # ์„ธ์…˜์ด ์—†์œผ๋ฉด ๊ธฐ๋ณธ ์„ธ์…˜ ์‚ฌ์šฉ
253
- if session_id not in self.session_conversations:
254
- session_id = "default"
255
-
256
- conversation_history = self.session_conversations[session_id]
257
- recent_turns = list(conversation_history)[-turns:]
258
- context_parts = []
259
-
260
- for turn in recent_turns:
261
- if turn.role == "user":
262
- context_parts.append(f"<|im_start|>user\n{turn.content}<|im_end|>")
263
- elif turn.role == "assistant":
264
- context_parts.append(f"<|im_start|>assistant\n{turn.content}<|im_end|>")
265
-
266
- context_parts.append("<|im_start|>assistant\n")
267
- return "\n".join(context_parts)
268
-
269
- def get_context_summary(self, session_id: str = "default") -> Dict[str, Any]:
270
- """์ปจํ…์ŠคํŠธ ์š”์•ฝ ์ •๋ณด ๋ฐ˜ํ™˜ (์„ธ์…˜๋ณ„)"""
271
- # ์„ธ์…˜์ด ์—†์œผ๋ฉด ๊ธฐ๋ณธ ์„ธ์…˜ ์‚ฌ์šฉ
272
- if session_id not in self.session_conversations:
273
- session_id = "default"
274
-
275
- conversation_history = self.session_conversations[session_id]
276
-
277
- return {
278
- "session_id": session_id,
279
- "total_turns": len(conversation_history),
280
- "user_messages": len([t for t in conversation_history if t.role == "user"]),
281
- "assistant_messages": len([t for t in conversation_history if t.role == "assistant"]),
282
- "estimated_tokens": self.total_tokens,
283
- "context_length": self.current_context_length,
284
- "memory_usage": len(conversation_history) / self.max_turns,
285
- "oldest_message": conversation_history[0].timestamp if conversation_history else None,
286
- "newest_message": conversation_history[-1].timestamp if conversation_history else None
287
- }
288
-
289
- def clear_context(self, session_id: str = "default"):
290
- """์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™” (์„ธ์…˜๋ณ„)"""
291
- if session_id not in self.session_conversations:
292
- logger.warning(f"โš ๏ธ ์„ธ์…˜ {session_id}๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
293
- return
294
-
295
- self.session_conversations[session_id].clear()
296
- self.total_tokens = 0
297
- self.current_context_length = 0
298
- logger.info(f"๐Ÿ—‘๏ธ ์„ธ์…˜ {session_id} ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
299
-
300
- def clear_all_sessions(self):
301
- """๋ชจ๋“  ์„ธ์…˜ ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™”"""
302
- for session_id in list(self.session_conversations.keys()):
303
- self.session_conversations[session_id].clear()
304
- self.total_tokens = 0
305
- self.current_context_length = 0
306
- logger.info("๐Ÿ—‘๏ธ ๋ชจ๋“  ์„ธ์…˜ ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™” ์™„๋ฃŒ")
307
-
308
- def remove_message(self, message_id: str, session_id: str = "default") -> bool:
309
- """ํŠน์ • ๋ฉ”์‹œ์ง€ ์ œ๊ฑฐ (์„ธ์…˜๋ณ„)"""
310
- if session_id not in self.session_conversations:
311
- return False
312
-
313
- conversation_history = self.session_conversations[session_id]
314
- for i, turn in enumerate(conversation_history):
315
- if turn.message_id == message_id:
316
- removed_turn = conversation_history.pop(i)
317
- self._update_context_stats(session_id)
318
- logger.info(f"๐Ÿ—‘๏ธ ๋ฉ”์‹œ์ง€ ์ œ๊ฑฐ: {message_id} (์„ธ์…˜: {session_id})")
319
- return True
320
- return False
321
-
322
- def edit_message(self, message_id: str, new_content: str, session_id: str = "default") -> bool:
323
- """๋ฉ”์‹œ์ง€ ๋‚ด์šฉ ์ˆ˜์ • (์„ธ์…˜๋ณ„)"""
324
- if session_id not in self.session_conversations:
325
- return False
326
-
327
- conversation_history = self.session_conversations[session_id]
328
- for turn in conversation_history:
329
- if turn.message_id == message_id:
330
- turn.content = new_content
331
- turn.timestamp = time.time()
332
- self._update_context_stats(session_id)
333
- logger.info(f"โœ๏ธ ๋ฉ”์‹œ์ง€ ์ˆ˜์ •: {message_id} (์„ธ์…˜: {session_id})")
334
- return True
335
- return False
336
-
337
- def search_context(self, query: str, max_results: int = 5, session_id: str = "default") -> List[Dict[str, Any]]:
338
- """์ปจํ…์ŠคํŠธ ๋‚ด์—์„œ ๊ฒ€์ƒ‰ (์„ธ์…˜๋ณ„)"""
339
- if session_id not in self.session_conversations:
340
- return []
341
-
342
- conversation_history = self.session_conversations[session_id]
343
- results = []
344
- query_lower = query.lower()
345
-
346
- for turn in conversation_history:
347
- if query_lower in turn.content.lower():
348
- results.append({
349
- "message_id": turn.message_id,
350
- "role": turn.role,
351
- "content": turn.content,
352
- "timestamp": turn.timestamp,
353
- "relevance_score": self._calculate_relevance(query, turn.content)
354
- })
355
-
356
- # ๊ด€๋ จ์„ฑ ์ ์ˆ˜๋กœ ์ •๋ ฌ
357
- results.sort(key=lambda x: x["relevance_score"], reverse=True)
358
- return results[:max_results]
359
-
360
- def _calculate_relevance(self, query: str, content: str) -> float:
361
- """๊ฐ„๋‹จํ•œ ๊ด€๋ จ์„ฑ ์ ์ˆ˜ ๊ณ„์‚ฐ"""
362
- query_words = set(query.lower().split())
363
- content_words = set(content.lower().split())
364
-
365
- if not query_words:
366
- return 0.0
367
-
368
- intersection = query_words.intersection(content_words)
369
- return len(intersection) / len(query_words)
370
-
371
- def _update_context_stats(self, session_id: str = "default"):
372
- """์ปจํ…์ŠคํŠธ ํ†ต๊ณ„ ์—…๋ฐ์ดํŠธ (์„ธ์…˜๋ณ„)"""
373
- if session_id not in self.session_conversations:
374
- return
375
-
376
- self.current_context_length = len(self.get_context(session_id=session_id))
377
- # ๊ฐ„๋‹จํ•œ ํ† ํฐ ์ถ”์ • (์‹ค์ œ ํ† ํฌ๋‚˜์ด์ € ์‚ฌ์šฉ ๊ถŒ์žฅ)
378
- self.total_tokens = self.current_context_length // 4
379
-
380
- def _optimize_context(self, session_id: str = "default"):
381
- """์ปจํ…์ŠคํŠธ ์ตœ์ ํ™” (์„ธ์…˜๋ณ„)"""
382
- if not self.enable_memory_optimization:
383
- return
384
-
385
- if session_id not in self.session_conversations:
386
- return
387
-
388
- conversation_history = self.session_conversations[session_id]
389
-
390
- # ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰์ด ์ž„๊ณ„๊ฐ’์„ ์ดˆ๊ณผํ•˜๋ฉด ์••์ถ• ์‹œ์ž‘
391
- if len(conversation_history) / self.max_turns > self.compression_threshold:
392
- self._compress_context(session_id)
393
-
394
- def _compress_context(self, session_id: str = "default"):
395
- """์ปจํ…์ŠคํŠธ ์••์ถ• (์ค‘์š”ํ•œ ๋ฉ”์‹œ์ง€ ์œ ์ง€, ์„ธ์…˜๋ณ„)"""
396
- if session_id not in self.session_conversations:
397
- return
398
-
399
- conversation_history = self.session_conversations[session_id]
400
-
401
- if len(conversation_history) <= self.max_turns:
402
- return
403
-
404
- logger.info(f"๐Ÿ—œ๏ธ ์„ธ์…˜ {session_id} ์ปจํ…์ŠคํŠธ ์••์ถ• ์‹œ์ž‘: {len(conversation_history)} โ†’ {self.max_turns}")
405
-
406
- # ์ „๋žต์— ๋”ฐ๋ฅธ ์••์ถ•
407
- if self.strategy == "sliding_window":
408
- # ์Šฌ๋ผ์ด๋”ฉ ์œˆ๋„์šฐ: ์ตœ๊ทผ ๋ฉ”์‹œ์ง€ ์šฐ์„ 
409
- while len(conversation_history) > self.max_turns:
410
- conversation_history.popleft()
411
-
412
- elif self.strategy == "priority_keep":
413
- # ์šฐ์„ ์ˆœ์œ„ ๊ธฐ๋ฐ˜: ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ์™€ ์ตœ๊ทผ ๋ฉ”์‹œ์ง€ ์šฐ์„ 
414
- # ์ฒซ ๋ฒˆ์งธ์™€ ๋งˆ์ง€๋ง‰ ๋ฉ”์‹œ์ง€๋Š” ์œ ์ง€
415
- if len(conversation_history) > self.max_turns:
416
- # ์ค‘๊ฐ„ ๋ฉ”์‹œ์ง€๋“ค ์ค‘ ์ผ๋ถ€ ์ œ๊ฑฐ
417
- middle_start = self.max_turns // 2
418
- middle_end = len(conversation_history) - self.max_turns // 2
419
-
420
- # ์ค‘๊ฐ„ ๋ถ€๋ถ„์„ ์š”์•ฝ์œผ๋กœ ๋Œ€์ฒด
421
- removed_turns = list(conversation_history)[middle_start:middle_end]
422
- summary_content = f"[์ด์ „ {len(removed_turns)}๊ฐœ ๋ฉ”์‹œ์ง€ ์š”์•ฝ: {len(removed_turns)}๊ฐœ ๋Œ€ํ™” ํ„ด]"
423
-
424
- # ์ค‘๊ฐ„ ๋ถ€๋ถ„ ์ œ๊ฑฐ
425
- for _ in range(middle_end - middle_start):
426
- conversation_history.pop(middle_start)
427
-
428
- # ์š”์•ฝ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
429
- summary_turn = ConversationTurn(
430
- role="system",
431
- content=summary_content,
432
- timestamp=time.time(),
433
- message_id=f"summary_{int(time.time() * 1000)}"
434
- )
435
- conversation_history.insert(middle_start, summary_turn)
436
-
437
- elif self.strategy == "circular":
438
- # ์ˆœํ™˜ ๋ฒ„ํผ: ๊ฐ€์žฅ ์˜ค๋ž˜๋œ ๋ฉ”์‹œ์ง€ ์ œ๊ฑฐ
439
- while len(conversation_history) > self.max_turns:
440
- conversation_history.popleft()
441
-
442
- self._update_context_stats(session_id)
443
- logger.info(f"โœ… ์„ธ์…˜ {session_id} ์ปจํ…์ŠคํŠธ ์••์ถ• ์™„๋ฃŒ: {len(conversation_history)} ํ„ด")
444
-
445
- def _truncate_context(self, context: str, max_length: int) -> str:
446
- """์ปจํ…์ŠคํŠธ ๊ธธ์ด ์ œํ•œ"""
447
- if len(context) <= max_length:
448
- return context
449
-
450
- # ๊ฐ€์žฅ ์ตœ๊ทผ ๋ฉ”์‹œ์ง€๋ถ€ํ„ฐ ์œ ์ง€
451
- truncated_context = context[-max_length:]
452
-
453
- # ๋ฉ”์‹œ์ง€ ๊ฒฝ๊ณ„ ํ™•์ธ
454
- if not truncated_context.startswith("<|im_start|>"):
455
- # ๋ฉ”์‹œ์ง€ ๊ฒฝ๊ณ„๋ฅผ ์ฐพ์•„์„œ ์ž๋ฅด๊ธฐ
456
- start_idx = truncated_context.find("<|im_start|>")
457
- if start_idx != -1:
458
- truncated_context = truncated_context[start_idx:]
459
-
460
- return truncated_context
461
-
462
- def export_context(self, file_path: str = None, session_id: str = "default") -> str:
463
- """์ปจํ…์ŠคํŠธ๋ฅผ ํŒŒ์ผ๋กœ ๋‚ด๋ณด๋‚ด๊ธฐ (์„ธ์…˜๋ณ„)"""
464
- if not file_path:
465
- file_path = f"context_export_{session_id}_{int(time.time())}.json"
466
-
467
- if session_id not in self.session_conversations:
468
- logger.warning(f"โš ๏ธ ์„ธ์…˜ {session_id}๊ฐ€ ์กด์žฌํ•˜์ง€ ์•Š์Šต๋‹ˆ๋‹ค.")
469
- return None
470
-
471
- conversation_history = self.session_conversations[session_id]
472
-
473
- export_data = {
474
- "export_timestamp": time.time(),
475
- "session_id": session_id,
476
- "system_prompt": self.system_prompt,
477
- "conversation_history": [
478
- {
479
- "role": turn.role,
480
- "content": turn.content,
481
- "timestamp": turn.timestamp,
482
- "message_id": turn.message_id,
483
- "metadata": turn.metadata
484
- }
485
- for turn in conversation_history
486
- ],
487
- "context_stats": self.get_context_summary(session_id)
488
- }
489
-
490
- with open(file_path, 'w', encoding='utf-8') as f:
491
- json.dump(export_data, f, ensure_ascii=False, indent=2)
492
-
493
- logger.info(f"๐Ÿ’พ ์„ธ์…˜ {session_id} ์ปจํ…์ŠคํŠธ ๋‚ด๋ณด๋‚ด๊ธฐ ์™„๋ฃŒ: {file_path}")
494
- return file_path
495
-
496
- def import_context(self, file_path: str) -> bool:
497
- """ํŒŒ์ผ์—์„œ ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ"""
498
- try:
499
- with open(file_path, 'r', encoding='utf-8') as f:
500
- import_data = json.load(f)
501
-
502
- # ๊ธฐ์กด ์ปจํ…์ŠคํŠธ ์ดˆ๊ธฐํ™”
503
- self.clear_context()
504
-
505
- # ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๋ณต์›
506
- if "system_prompt" in import_data:
507
- self.system_prompt = import_data["system_prompt"]
508
-
509
- # ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ ๋ณต์›
510
- if "conversation_history" in import_data:
511
- for turn_data in import_data["conversation_history"]:
512
- turn = ConversationTurn(
513
- role=turn_data["role"],
514
- content=turn_data["content"],
515
- timestamp=turn_data["timestamp"],
516
- message_id=turn_data["message_id"],
517
- metadata=turn_data.get("metadata", {})
518
- )
519
- self.conversation_history.append(turn)
520
-
521
- self._update_context_stats()
522
- logger.info(f"๐Ÿ“ฅ ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ ์™„๋ฃŒ: {file_path}")
523
- return True
524
-
525
- except Exception as e:
526
- logger.error(f"โŒ ์ปจํ…์ŠคํŠธ ๊ฐ€์ ธ์˜ค๊ธฐ ์‹คํŒจ: {e}")
527
- return False
528
-
529
- def get_memory_efficiency(self, session_id: str = "default") -> Dict[str, float]:
530
- """๋ฉ”๋ชจ๋ฆฌ ํšจ์œจ์„ฑ ์ง€ํ‘œ ๋ฐ˜ํ™˜ (์„ธ์…˜๋ณ„)"""
531
- if session_id not in self.session_conversations:
532
- return {}
533
-
534
- conversation_history = self.session_conversations[session_id]
535
-
536
- return {
537
- "session_id": session_id,
538
- "context_utilization": len(conversation_history) / self.max_turns,
539
- "token_efficiency": self.total_tokens / self.max_tokens if self.max_tokens > 0 else 0,
540
- "compression_ratio": 1.0 - (len(conversation_history) / (self.max_turns * 2)),
541
- "memory_fragmentation": self._calculate_fragmentation(session_id)
542
- }
543
-
544
- def _calculate_fragmentation(self, session_id: str = "default") -> float:
545
- """๋ฉ”๋ชจ๋ฆฌ ๋‹จํŽธํ™” ์ •๋„ ๊ณ„์‚ฐ (์„ธ์…˜๋ณ„)"""
546
- if session_id not in self.session_conversations:
547
- return 0.0
548
-
549
- conversation_history = self.session_conversations[session_id]
550
-
551
- if len(conversation_history) <= 1:
552
- return 0.0
553
-
554
- # ์—ฐ์†๋œ ๋ฉ”์‹œ์ง€ ๊ฐ„์˜ ์‹œ๊ฐ„ ๊ฐ„๊ฒฉ์œผ๋กœ ๋‹จํŽธํ™” ๊ณ„์‚ฐ
555
- timestamps = [turn.timestamp for turn in conversation_history]
556
- intervals = [timestamps[i+1] - timestamps[i] for i in range(len(timestamps)-1)]
557
-
558
- if not intervals:
559
- return 0.0
560
-
561
- avg_interval = sum(intervals) / len(intervals)
562
- variance = sum((x - avg_interval) ** 2 for x in intervals) / len(intervals)
563
-
564
- # ์ •๊ทœํ™”๋œ ๋‹จํŽธํ™” ์ ์ˆ˜ (0-1)
565
- return min(1.0, variance / (avg_interval ** 2) if avg_interval > 0 else 0.0)
566
-
567
- def _check_auto_cleanup(self, session_id: str = "default"):
568
- """์ž๋™ ์ •๋ฆฌ ์กฐ๊ฑด ์ฒดํฌ ๋ฐ ์‹คํ–‰"""
569
- if not self.auto_cleanup_enabled:
570
- return
571
-
572
- current_time = time.time()
573
-
574
- # ์„ธ์…˜๋ณ„ ์นด์šดํ„ฐ ์ดˆ๊ธฐํ™”
575
- if session_id not in self.turn_counters:
576
- self.turn_counters[session_id] = 0
577
- if session_id not in self.last_cleanup_time:
578
- self.last_cleanup_time[session_id] = current_time
579
-
580
- # ํ„ด ์นด์šดํ„ฐ ์ฆ๊ฐ€
581
- self.turn_counters[session_id] += 1
582
-
583
- # ์ •๋ฆฌ ์กฐ๊ฑด ์ฒดํฌ
584
- should_cleanup = False
585
- cleanup_reason = ""
586
-
587
- # ํ„ด ๊ธฐ๋ฐ˜ ์ •๋ฆฌ
588
- if self.turn_counters[session_id] >= self.cleanup_interval_turns:
589
- should_cleanup = True
590
- cleanup_reason = f"ํ„ด ๊ธฐ๋ฐ˜ ({self.turn_counters[session_id]} ํ„ด)"
591
-
592
- # ์‹œ๊ฐ„ ๊ธฐ๋ฐ˜ ์ •๋ฆฌ
593
- elif current_time - self.last_cleanup_time[session_id] >= self.cleanup_interval_time:
594
- should_cleanup = True
595
- cleanup_reason = f"์‹œ๊ฐ„ ๊ธฐ๋ฐ˜ ({int(current_time - self.last_cleanup_time[session_id])}์ดˆ)"
596
-
597
- # ์ปจํ…์ŠคํŠธ ๊ธธ์ด ๊ธฐ๋ฐ˜ ์ •๋ฆฌ (๊ฐ•ํ™”)
598
- elif len(self.session_conversations.get(session_id, [])) > self.max_turns:
599
- should_cleanup = True
600
- cleanup_reason = f"๊ธธ์ด ๊ธฐ๋ฐ˜ ({len(self.session_conversations.get(session_id, []))} > {self.max_turns})"
601
-
602
- # ์ž๋™ ์ •๋ฆฌ ์‹คํ–‰
603
- if should_cleanup:
604
- logger.info(f"๐Ÿ”„ ์„ธ์…˜ {session_id} ์ž๋™ ์ •๋ฆฌ ์‹œ์ž‘: {cleanup_reason}")
605
- self._execute_auto_cleanup(session_id)
606
-
607
- # ์นด์šดํ„ฐ ๋ฐ ์‹œ๊ฐ„ ๋ฆฌ์…‹
608
- self.turn_counters[session_id] = 0
609
- self.last_cleanup_time[session_id] = current_time
610
-
611
- def _execute_auto_cleanup(self, session_id: str = "default"):
612
- """์ž๋™ ์ •๋ฆฌ ์‹คํ–‰"""
613
- if session_id not in self.session_conversations:
614
- return
615
-
616
- conversation_history = self.session_conversations[session_id]
617
- original_length = len(conversation_history)
618
-
619
- if original_length <= self.max_turns:
620
- return
621
-
622
- # ์ „๋žต๋ณ„ ์ •๋ฆฌ ์‹คํ–‰
623
- if self.cleanup_strategy == "smart":
624
- self._smart_cleanup(session_id)
625
- elif self.cleanup_strategy == "aggressive":
626
- self._aggressive_cleanup(session_id)
627
- elif self.cleanup_strategy == "conservative":
628
- self._conservative_cleanup(session_id)
629
-
630
- final_length = len(conversation_history)
631
- removed_count = original_length - final_length
632
-
633
- if removed_count > 0:
634
- logger.info(f"โœ… ์„ธ์…˜ {session_id} ์ž๋™ ์ •๋ฆฌ ์™„๋ฃŒ: {original_length} โ†’ {final_length} ํ„ด (์ œ๊ฑฐ: {removed_count})")
635
-
636
- def _smart_cleanup(self, session_id: str = "default"):
637
- """์Šค๋งˆํŠธ ์ •๋ฆฌ: ์ค‘์š” ๋ฉ”์‹œ์ง€ ์œ ์ง€, ์ค‘๊ฐ„ ๋ฉ”์‹œ์ง€ ์š”์•ฝ"""
638
- if session_id not in self.session_conversations:
639
- return
640
-
641
- conversation_history = self.session_conversations[session_id]
642
-
643
- if len(conversation_history) <= self.max_turns:
644
- return
645
-
646
- # ์ค‘์š” ๋ฉ”์‹œ์ง€ ์ˆ˜ ๊ณ„์‚ฐ (์‹œ์Šคํ…œ + ์ตœ๊ทผ)
647
- important_count = min(3, self.max_turns // 3)
648
- recent_count = min(5, self.max_turns // 2)
649
-
650
- # ์ค‘๊ฐ„ ๋ฉ”์‹œ์ง€๋“ค ์ œ๊ฑฐ
651
- middle_start = important_count
652
- middle_end = len(conversation_history) - recent_count
653
-
654
- if middle_end > middle_start:
655
- removed_turns = list(conversation_history)[middle_start:middle_end]
656
-
657
- # ์š”์•ฝ ๋ฉ”์‹œ์ง€ ์ƒ์„ฑ
658
- summary_content = f"[์ด์ „ {len(removed_turns)}๊ฐœ ๋ฉ”์‹œ์ง€ ์š”์•ฝ: {len(removed_turns)}๊ฐœ ๋Œ€ํ™” ํ„ด]"
659
-
660
- # ์ค‘๊ฐ„ ๋ถ€๋ถ„ ์ œ๊ฑฐ
661
- for _ in range(middle_end - middle_start):
662
- conversation_history.pop(middle_start)
663
-
664
- # ์š”์•ฝ ๋ฉ”์‹œ์ง€ ์ถ”๊ฐ€
665
- summary_turn = ConversationTurn(
666
- role="system",
667
- content=summary_content,
668
- timestamp=time.time(),
669
- message_id=f"summary_{int(time.time() * 1000)}"
670
- )
671
- conversation_history.insert(middle_start, summary_turn)
672
-
673
- def _aggressive_cleanup(self, session_id: str = "default"):
674
- """์ ๊ทน์  ์ •๋ฆฌ: ์ตœ๊ทผ ๋ฉ”์‹œ์ง€๋งŒ ์œ ์ง€"""
675
- if session_id not in self.session_conversations:
676
- return
677
-
678
- conversation_history = self.session_conversations[session_id]
679
-
680
- # ์ตœ๊ทผ max_turns ๊ฐœ๋งŒ ์œ ์ง€
681
- while len(conversation_history) > self.max_turns:
682
- conversation_history.popleft()
683
-
684
- def _conservative_cleanup(self, session_id: str = "default"):
685
- """๋ณด์ˆ˜์  ์ •๋ฆฌ: ์ ์ง„์ ์œผ๋กœ ์ •๋ฆฌ"""
686
- if session_id not in self.session_conversations:
687
- return
688
-
689
- conversation_history = self.session_conversations[session_id]
690
-
691
- # 20%์”ฉ ์ ์ง„์ ์œผ๋กœ ์ œ๊ฑฐ
692
- target_length = int(len(conversation_history) * 0.8)
693
- if target_length > self.max_turns:
694
- while len(conversation_history) > target_length:
695
- conversation_history.popleft()
696
-
697
- # ์ „์—ญ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ์ธ์Šคํ„ด์Šค
698
- context_manager = ContextManager()
699
-
700
- def get_context_manager() -> ContextManager:
701
- """์ „์—ญ ์ปจํ…์ŠคํŠธ ๊ด€๋ฆฌ์ž ๋ฐ˜ํ™˜"""
702
- return context_manager
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
lily_llm_core/lora_manager_250822_0312.py DELETED
@@ -1,524 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- LoRA/QLoRA ๊ด€๋ฆฌ์ž (LoRA Manager)
4
- LoRA ์–ด๋Œ‘ํ„ฐ๋ฅผ ๋กœ๋“œํ•˜๊ณ  ๊ด€๋ฆฌํ•˜๋Š” ์‹œ์Šคํ…œ
5
- """
6
-
7
- import logging
8
- import os
9
- import json
10
- import torch
11
- from typing import Dict, Any, Optional, List, Union
12
- from pathlib import Path
13
- import warnings
14
- import time
15
-
16
- # logger๋ฅผ ๋จผ์ € ์ •์˜
17
- logger = logging.getLogger(__name__)
18
-
19
- # PEFT ๊ด€๋ จ import (์„ค์น˜๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ ๊ฒฝ๊ณ )
20
- try:
21
- logger.info("๐Ÿ” PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ import ์‹œ๋„ ์ค‘...")
22
- from peft import (
23
- LoraConfig,
24
- get_peft_model,
25
- PeftModel,
26
- TaskType,
27
- prepare_model_for_kbit_training
28
- )
29
- from peft.utils import get_peft_model_state_dict
30
- PEFT_AVAILABLE = True
31
- logger.info("โœ… PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ import ์„ฑ๊ณต")
32
- except ImportError as e:
33
- PEFT_AVAILABLE = False
34
- logger.error(f"โŒ PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ import ์‹คํŒจ: {e}")
35
- logger.error(f"โŒ Python ๊ฒฝ๋กœ: {os.environ.get('PYTHONPATH', 'Not set')}")
36
- logger.error(f"โŒ ํ˜„์žฌ ์ž‘์—… ๋””๋ ‰ํ† ๋ฆฌ: {os.getcwd()}")
37
- warnings.warn(f"PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. LoRA ๊ธฐ๋Šฅ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ์˜ค๋ฅ˜: {e}")
38
-
39
- # Transformers ๊ด€๋ จ import
40
- try:
41
- logger.info("๐Ÿ” Transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ import ์‹œ๋„ ์ค‘...")
42
- from transformers import (
43
- AutoModelForCausalLM,
44
- AutoTokenizer,
45
- BitsAndBytesConfig,
46
- TrainingArguments,
47
- Trainer,
48
- DataCollatorForLanguageModeling
49
- )
50
- TRANSFORMERS_AVAILABLE = True
51
- logger.info("โœ… Transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ import ์„ฑ๊ณต")
52
- except ImportError as e:
53
- TRANSFORMERS_AVAILABLE = False
54
- logger.error(f"โŒ Transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ import ์‹คํŒจ: {e}")
55
- warnings.warn(f"Transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์„ค์น˜๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. ์˜ค๋ฅ˜: {e}")
56
-
57
- class LoRAManager:
58
- """LoRA/QLoRA ๋ชจ๋ธ ๊ด€๋ฆฌ ํด๋ž˜์Šค"""
59
-
60
- def __init__(self, base_model_path: str = None, device: str = "auto"):
61
- """
62
- Args:
63
- base_model_path: ๊ธฐ๋ณธ ๋ชจ๋ธ ๊ฒฝ๋กœ
64
- device: ์‚ฌ์šฉํ•  ๋””๋ฐ”์ด์Šค ('auto', 'cpu', 'cuda', 'mps')
65
- """
66
- logger.info(f"๐Ÿ”ง LoRA ๊ด€๋ฆฌ์ž ์ดˆ๊ธฐํ™” ์‹œ์ž‘: PEFT_AVAILABLE={PEFT_AVAILABLE}, TRANSFORMERS_AVAILABLE={TRANSFORMERS_AVAILABLE}")
67
-
68
- if not PEFT_AVAILABLE:
69
- logger.error("โŒ PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
70
- logger.error("โŒ pip install peft๋ฅผ ์‹คํ–‰ํ–ˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”.")
71
- logger.error("โŒ ๊ฐ€์ƒํ™˜๊ฒฝ์ด ํ™œ์„ฑํ™”๋˜์–ด ์žˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”.")
72
- raise ImportError("PEFT ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. pip install peft๋ฅผ ์‹คํ–‰ํ•˜์„ธ์š”.")
73
-
74
- if not TRANSFORMERS_AVAILABLE:
75
- logger.error("โŒ Transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๋ฅผ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค.")
76
- logger.error("โŒ pip install transformers๋ฅผ ์‹คํ–‰ํ–ˆ๋Š”์ง€ ํ™•์ธํ•˜์„ธ์š”.")
77
- raise ImportError("Transformers ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ํ•„์š”ํ•ฉ๋‹ˆ๋‹ค. pip install transformers๋ฅผ ์‹คํ–‰ํ•˜์„ธ์š”.")
78
-
79
- self.base_model_path = base_model_path
80
- self.device = self._get_device(device)
81
-
82
- # ๋ชจ๋ธ ๋ฐ ํ† ํฌ๋‚˜์ด์ €
83
- self.base_model = None
84
- self.tokenizer = None
85
- self.lora_model = None
86
-
87
- # LoRA ์„ค์ •
88
- self.lora_config = None
89
- self.current_adapter_name = None
90
-
91
- # ์–ด๋Œ‘ํ„ฐ ์ €์žฅ ๊ฒฝ๋กœ
92
- self.adapters_dir = Path("lora_adapters")
93
- self.adapters_dir.mkdir(exist_ok=True)
94
-
95
- # ๋กœ๋“œ๋œ ์–ด๋Œ‘ํ„ฐ ๋ชฉ๋ก
96
- self.loaded_adapters = {}
97
-
98
- logger.info(f"๐Ÿ”ง LoRA ๊ด€๋ฆฌ์ž ์ดˆ๊ธฐํ™”: device={self.device}")
99
-
100
- def _get_device(self, device: str) -> str:
101
- """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋””๋ฐ”์ด์Šค ํ™•์ธ"""
102
- if device == "auto":
103
- if torch.cuda.is_available():
104
- return "cuda"
105
- elif torch.backends.mps.is_available():
106
- return "mps"
107
- else:
108
- return "cpu"
109
- return device
110
-
111
- def load_base_model(self, model_path: str = None, model_type: str = "causal_lm") -> bool:
112
- """๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋“œ"""
113
- try:
114
- model_path = model_path or self.base_model_path
115
- if not model_path:
116
- raise ValueError("๋ชจ๋ธ ๊ฒฝ๋กœ๊ฐ€ ์ง€์ •๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
117
-
118
- logger.info(f"๐Ÿ“ฅ ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹œ์ž‘: {model_path}")
119
-
120
- # ํ† ํฌ๋‚˜์ด์ € ๋กœ๋“œ
121
- self.tokenizer = AutoTokenizer.from_pretrained(
122
- model_path,
123
- trust_remote_code=True,
124
- local_files_only=os.path.exists(model_path)
125
- )
126
-
127
- # ํŒจ๋”ฉ ํ† ํฐ ์„ค์ •
128
- if self.tokenizer.pad_token is None:
129
- self.tokenizer.pad_token = self.tokenizer.eos_token
130
-
131
- # ๋ชจ๋ธ ๋กœ๋“œ
132
- if model_type == "causal_lm":
133
- self.base_model = AutoModelForCausalLM.from_pretrained(
134
- model_path,
135
- trust_remote_code=True,
136
- local_files_only=os.path.exists(model_path),
137
- torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
138
- device_map="auto" if self.device == "cuda" else None
139
- )
140
- else:
141
- raise ValueError(f"์ง€์›ํ•˜์ง€ ์•Š๋Š” ๋ชจ๋ธ ํƒ€์ž…: {model_type}")
142
-
143
- # ๋””๋ฐ”์ด์Šค๋กœ ์ด๋™
144
- if self.device != "cuda": # cuda๋Š” device_map="auto" ์‚ฌ์šฉ
145
- self.base_model = self.base_model.to(self.device)
146
-
147
- self.base_model_path = model_path
148
- logger.info(f"โœ… ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ: {model_path}")
149
- return True
150
-
151
- except Exception as e:
152
- logger.error(f"โŒ ๊ธฐ๋ณธ ๋ชจ๋ธ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
153
- return False
154
-
155
- def create_lora_config(self,
156
- r: int = 16,
157
- lora_alpha: int = 32,
158
- target_modules: List[str] = None,
159
- lora_dropout: float = 0.1,
160
- bias: str = "none",
161
- task_type: str = "CAUSAL_LM") -> LoraConfig:
162
- """LoRA ์„ค์ • ์ƒ์„ฑ"""
163
- if target_modules is None:
164
- # ์ผ๋ฐ˜์ ์ธ ๋ชจ๋ธ ์•„ํ‚คํ…์ฒ˜์— ๋Œ€ํ•œ ๊ธฐ๋ณธ๊ฐ’
165
- target_modules = ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
166
-
167
- # TaskType ๋ณ€ํ™˜
168
- logger.info(f"๐Ÿ” [DEBUG] ์ž…๋ ฅ๋œ task_type: {task_type}")
169
- task_type_map = {
170
- "CAUSAL_LM": TaskType.CAUSAL_LM,
171
- "SEQ_2_SEQ_LM": TaskType.SEQ_2_SEQ_LM,
172
- "SEQUENCE_CLASSIFICATION": TaskType.SEQUENCE_CLASSIFICATION,
173
- "TOKEN_CLASSIFICATION": TaskType.TOKEN_CLASSIFICATION,
174
- "QUESTION_ANSWERING": TaskType.QUESTION_ANSWERING
175
- }
176
-
177
- logger.info(f"๐Ÿ” [DEBUG] ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ TaskType: {list(task_type_map.keys())}")
178
- task_type_enum = task_type_map.get(task_type, TaskType.CAUSAL_LM)
179
- logger.info(f"๐Ÿ” [DEBUG] ์„ ํƒ๋œ TaskType: {task_type_enum}")
180
-
181
- self.lora_config = LoraConfig(
182
- r=r,
183
- lora_alpha=lora_alpha,
184
- target_modules=target_modules,
185
- lora_dropout=lora_dropout,
186
- bias=bias,
187
- task_type=task_type_enum
188
- )
189
-
190
- logger.info(f"๐Ÿ”ง LoRA ์„ค์ • ์ƒ์„ฑ: r={r}, alpha={lora_alpha}, target_modules={target_modules}")
191
- return self.lora_config
192
-
193
- def apply_lora_to_model(self, adapter_name: str = "default") -> bool:
194
- """LoRA๋ฅผ ๊ธฐ๋ณธ ๋ชจ๋ธ์— ์ ์šฉ"""
195
- try:
196
- if self.base_model is None:
197
- raise ValueError("๊ธฐ๋ณธ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
198
-
199
- if self.lora_config is None:
200
- raise ValueError("LoRA ์„ค์ •์ด ์ƒ์„ฑ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
201
-
202
- logger.info(f"๐Ÿ”— LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์‹œ์ž‘: {adapter_name}")
203
-
204
- # LoRA ๋ชจ๋ธ ์ƒ์„ฑ
205
- self.lora_model = get_peft_model(self.base_model, self.lora_config)
206
-
207
- # ์–ด๋Œ‘ํ„ฐ ์ด๋ฆ„ ์„ค์ •
208
- self.current_adapter_name = adapter_name
209
-
210
- # ํ›ˆ๋ จ ๋ชจ๋“œ๋กœ ์„ค์ •
211
- self.lora_model.train()
212
-
213
- # ๋ชจ๋ธ ์ •๋ณด ์ถœ๋ ฅ
214
- self.lora_model.print_trainable_parameters()
215
-
216
- logger.info(f"โœ… LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์™„๋ฃŒ: {adapter_name}")
217
- return True
218
-
219
- except Exception as e:
220
- logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ ์šฉ ์‹คํŒจ: {e}")
221
- return False
222
-
223
- def load_lora_adapter(self, adapter_path: str, adapter_name: str = None) -> bool:
224
- """์ €์žฅ๋œ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ"""
225
- try:
226
- if not os.path.exists(adapter_path):
227
- raise FileNotFoundError(f"์–ด๋Œ‘ํ„ฐ ๊ฒฝ๋กœ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {adapter_path}")
228
-
229
- if adapter_name is None:
230
- adapter_name = Path(adapter_path).stem
231
-
232
- logger.info(f"๐Ÿ“ฅ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ ์‹œ์ž‘: {adapter_path}")
233
-
234
- # ๊ธฐ๋ณธ ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์€ ๊ฒฝ์šฐ ๋กœ๋“œ
235
- if self.base_model is None:
236
- # ์–ด๋Œ‘ํ„ฐ ์„ค์ • ํŒŒ์ผ์—์„œ ๊ธฐ๋ณธ ๋ชจ๋ธ ๊ฒฝ๋กœ ํ™•์ธ
237
- config_path = os.path.join(adapter_path, "adapter_config.json")
238
- if os.path.exists(config_path):
239
- with open(config_path, 'r') as f:
240
- config = json.load(f)
241
- base_model_path = config.get("base_model_name_or_path")
242
- if base_model_path:
243
- self.load_base_model(base_model_path)
244
-
245
- # LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ
246
- self.lora_model = PeftModel.from_pretrained(
247
- self.base_model,
248
- adapter_path,
249
- torch_dtype=torch.float16 if self.device == "cuda" else torch.float32
250
- )
251
-
252
- # ๋””๋ฐ”์ด์Šค๋กœ ์ด๋™
253
- if self.device != "cuda":
254
- self.lora_model = self.lora_model.to(self.device)
255
-
256
- self.current_adapter_name = adapter_name
257
- self.loaded_adapters[adapter_name] = adapter_path
258
-
259
- logger.info(f"โœ… LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ ์™„๋ฃŒ: {adapter_name}")
260
- return True
261
-
262
- except Exception as e:
263
- logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ๋กœ๋”ฉ ์‹คํŒจ: {e}")
264
- return False
265
-
266
- def save_lora_adapter(self, adapter_name: str = None, output_dir: str = None) -> bool:
267
- """LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ"""
268
- try:
269
- if self.lora_model is None:
270
- raise ValueError("LoRA ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
271
-
272
- adapter_name = adapter_name or self.current_adapter_name or "default"
273
- output_dir = output_dir or str(self.adapters_dir / adapter_name)
274
-
275
- logger.info(f"๐Ÿ’พ LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ ์‹œ์ž‘: {adapter_name} -> {output_dir}")
276
-
277
- # ์–ด๋Œ‘ํ„ฐ ์ €์žฅ
278
- self.lora_model.save_pretrained(output_dir)
279
-
280
- # ํ† ํฌ๋‚˜์ด์ €๋„ ์ €์žฅ
281
- if self.tokenizer:
282
- self.tokenizer.save_pretrained(output_dir)
283
-
284
- # ์–ด๋Œ‘ํ„ฐ ์ •๋ณด ์ €์žฅ
285
- adapter_info = {
286
- "adapter_name": adapter_name,
287
- "base_model": self.base_model_path,
288
- "lora_config": self.lora_config.to_dict() if self.lora_config else None,
289
- "created_at": str(torch.tensor(time.time())),
290
- "device": self.device
291
- }
292
-
293
- with open(os.path.join(output_dir, "adapter_info.json"), 'w') as f:
294
- json.dump(adapter_info, f, indent=2)
295
-
296
- logger.info(f"โœ… LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ ์™„๋ฃŒ: {output_dir}")
297
- return True
298
-
299
- except Exception as e:
300
- logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์ €์žฅ ์‹คํŒจ: {e}")
301
- return False
302
-
303
- def merge_lora_with_base(self, output_path: str = None) -> bool:
304
- """LoRA ์–ด๋Œ‘ํ„ฐ๋ฅผ ๊ธฐ๋ณธ ๋ชจ๋ธ๊ณผ ๋ณ‘ํ•ฉ"""
305
- try:
306
- if self.lora_model is None:
307
- raise ValueError("LoRA ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
308
-
309
- output_path = output_path or f"{self.base_model_path}_merged"
310
-
311
- logger.info(f"๐Ÿ”— LoRA ์–ด๋Œ‘ํ„ฐ ๋ณ‘ํ•ฉ ์‹œ์ž‘: {output_path}")
312
-
313
- # ๋ณ‘ํ•ฉ๋œ ๋ชจ๋ธ ์ƒ์„ฑ
314
- merged_model = self.lora_model.merge_and_unload()
315
-
316
- # ๋ณ‘ํ•ฉ๋œ ๋ชจ๋ธ ์ €์žฅ
317
- merged_model.save_pretrained(output_path)
318
-
319
- # ํ† ํฌ๋‚˜์ด์ €๋„ ์ €์žฅ
320
- if self.tokenizer:
321
- self.tokenizer.save_pretrained(output_path)
322
-
323
- logger.info(f"โœ… LoRA ์–ด๋Œ‘ํ„ฐ ๋ณ‘ํ•ฉ ์™„๋ฃŒ: {output_path}")
324
- return True
325
-
326
- except Exception as e:
327
- logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ๋ณ‘ํ•ฉ ์‹คํŒจ: {e}")
328
- return False
329
-
330
- def list_available_adapters(self) -> List[Dict[str, Any]]:
331
- """์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ์–ด๋Œ‘ํ„ฐ ๋ชฉ๋ก ๋ฐ˜ํ™˜"""
332
- adapters = []
333
-
334
- for adapter_dir in self.adapters_dir.iterdir():
335
- if adapter_dir.is_dir():
336
- config_path = adapter_dir / "adapter_config.json"
337
- info_path = adapter_dir / "adapter_info.json"
338
-
339
- adapter_info = {
340
- "name": adapter_dir.name,
341
- "path": str(adapter_dir),
342
- "config_exists": config_path.exists(),
343
- "info_exists": info_path.exists()
344
- }
345
-
346
- # ์–ด๋Œ‘ํ„ฐ ์ •๋ณด ๋กœ๋“œ
347
- if info_path.exists():
348
- try:
349
- with open(info_path, 'r') as f:
350
- info = json.load(f)
351
- adapter_info.update(info)
352
- except Exception as e:
353
- logger.warning(f"์–ด๋Œ‘ํ„ฐ ์ •๋ณด ๋กœ๋“œ ์‹คํŒจ: {e}")
354
-
355
- adapters.append(adapter_info)
356
-
357
- return adapters
358
-
359
- def get_adapter_stats(self) -> Dict[str, Any]:
360
- """์–ด๋Œ‘ํ„ฐ ํ†ต๊ณ„ ์ •๋ณด ๋ฐ˜ํ™˜"""
361
- if self.lora_model is None:
362
- return {"error": "LoRA ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค."}
363
-
364
- try:
365
- # ํ›ˆ๋ จ ๊ฐ€๋Šฅํ•œ ํŒŒ๋ผ๋ฏธํ„ฐ ์ˆ˜
366
- trainable_params = 0
367
- all_param = 0
368
-
369
- for param in self.lora_model.parameters():
370
- all_param += param.numel()
371
- if param.requires_grad:
372
- trainable_params += param.numel()
373
-
374
- return {
375
- "adapter_name": self.current_adapter_name,
376
- "trainable_params": trainable_params,
377
- "all_params": all_param,
378
- "trainable_ratio": trainable_params / all_param if all_param > 0 else 0,
379
- "device": self.device,
380
- "model_type": type(self.lora_model).__name__
381
- }
382
-
383
- except Exception as e:
384
- logger.error(f"์–ด๋Œ‘ํ„ฐ ํ†ต๊ณ„ ์ˆ˜์ง‘ ์‹คํŒจ: {e}")
385
- return {"error": str(e)}
386
-
387
- def switch_adapter(self, adapter_name: str) -> bool:
388
- """๋‹ค๋ฅธ ์–ด๋Œ‘ํ„ฐ๋กœ ์ „ํ™˜"""
389
- try:
390
- if adapter_name not in self.loaded_adapters:
391
- # ์–ด๋Œ‘ํ„ฐ ๋กœ๋“œ
392
- adapter_path = self.adapters_dir / adapter_name
393
- if not adapter_path.exists():
394
- raise FileNotFoundError(f"์–ด๋Œ‘ํ„ฐ๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {adapter_name}")
395
-
396
- return self.load_lora_adapter(str(adapter_path), adapter_name)
397
- else:
398
- # ์ด๋ฏธ ๋กœ๋“œ๋œ ์–ด๋Œ‘ํ„ฐ ์‚ฌ์šฉ
399
- self.current_adapter_name = adapter_name
400
- logger.info(f"๐Ÿ”„ ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜: {adapter_name}")
401
- return True
402
-
403
- except Exception as e:
404
- logger.error(f"โŒ ์–ด๋Œ‘ํ„ฐ ์ „ํ™˜ ์‹คํŒจ: {e}")
405
- return False
406
-
407
- def unload_adapter(self) -> bool:
408
- """LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ"""
409
- try:
410
- if self.lora_model is None:
411
- return True
412
-
413
- logger.info("๐Ÿ—‘๏ธ LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ ์‹œ์ž‘")
414
-
415
- # ์–ด๋Œ‘ํ„ฐ ์ œ๊ฑฐ
416
- self.lora_model = None
417
- self.current_adapter_name = None
418
- self.lora_config = None
419
-
420
- logger.info("โœ… LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ ์™„๋ฃŒ")
421
- return True
422
-
423
- except Exception as e:
424
- logger.error(f"โŒ LoRA ์–ด๋Œ‘ํ„ฐ ์–ธ๋กœ๋“œ ์‹คํŒจ: {e}")
425
- return False
426
-
427
- def generate_text(self, prompt: str, max_length: int = 100, temperature: float = 0.7) -> str:
428
- """LoRA ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•œ ํ…์ŠคํŠธ ์ƒ์„ฑ"""
429
- try:
430
- if self.lora_model is None:
431
- raise ValueError("LoRA ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
432
-
433
- if self.tokenizer is None:
434
- raise ValueError("ํ† ํฌ๋‚˜์ด์ €๊ฐ€ ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
435
-
436
- # ์ž…๋ ฅ ํ† ํฌ๋‚˜์ด์ง•
437
- inputs = self.tokenizer(prompt, return_tensors="pt")
438
- inputs = {k: v.to(self.device) for k, v in inputs.items()}
439
-
440
- # ์ถ”๋ก  ๋ชจ๋“œ๋กœ ์„ค์ •
441
- self.lora_model.eval()
442
-
443
- with torch.no_grad():
444
- outputs = self.lora_model.generate(
445
- **inputs,
446
- max_new_tokens=max_length,
447
- temperature=temperature,
448
- do_sample=True,
449
- pad_token_id=self.tokenizer.eos_token_id
450
- )
451
-
452
- # ์‘๋‹ต ๋””์ฝ”๋”ฉ
453
- response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
454
-
455
- # ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ
456
- if response.startswith(prompt):
457
- response = response[len(prompt):].strip()
458
-
459
- return response
460
-
461
- except Exception as e:
462
- logger.error(f"โŒ ํ…์ŠคํŠธ ์ƒ์„ฑ ์‹คํŒจ: {e}")
463
- return f"ํ…์ŠคํŠธ ์ƒ์„ฑ ์ค‘ ์˜ค๋ฅ˜๊ฐ€ ๋ฐœ์ƒํ–ˆ์Šต๋‹ˆ๋‹ค: {str(e)}"
464
-
465
- def prepare_for_training(self, training_args: TrainingArguments = None) -> bool:
466
- """ํ›ˆ๋ จ์„ ์œ„ํ•œ ๋ชจ๋ธ ์ค€๋น„"""
467
- try:
468
- if self.lora_model is None:
469
- raise ValueError("LoRA ๋ชจ๋ธ์ด ๋กœ๋“œ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
470
-
471
- logger.info("๐Ÿ”ง ํ›ˆ๋ จ์„ ์œ„ํ•œ ๋ชจ๋ธ ์ค€๋น„ ์‹œ์ž‘")
472
-
473
- # ๊ธฐ๋ณธ ํ›ˆ๋ จ ์ธ์ˆ˜
474
- if training_args is None:
475
- training_args = TrainingArguments(
476
- output_dir="./lora_training_output",
477
- num_train_epochs=3,
478
- per_device_train_batch_size=4,
479
- gradient_accumulation_steps=4,
480
- learning_rate=2e-4,
481
- warmup_steps=100,
482
- logging_steps=10,
483
- save_steps=500,
484
- eval_steps=500,
485
- evaluation_strategy="steps",
486
- save_strategy="steps",
487
- load_best_model_at_end=True,
488
- metric_for_best_model="eval_loss",
489
- greater_is_better=False,
490
- fp16=torch.cuda.is_available(),
491
- dataloader_pin_memory=False,
492
- )
493
-
494
- # ํ›ˆ๋ จ ๋ชจ๋“œ๋กœ ์„ค์ •
495
- self.lora_model.train()
496
-
497
- # ๊ทธ๋ž˜๋””์–ธํŠธ ์ฒดํฌํฌ์ธํŒ… ํ™œ์„ฑํ™” (๋ฉ”๋ชจ๋ฆฌ ๏ฟฝ๏ฟฝ๏ฟฝ์•ฝ)
498
- self.lora_model.gradient_checkpointing_enable()
499
-
500
- # ๊ทธ๋ž˜๋””์–ธํŠธ ํด๋ฆฌํ•‘ ์„ค์ •
501
- self.lora_model.enable_input_require_grads()
502
-
503
- logger.info("โœ… ํ›ˆ๋ จ์„ ์œ„ํ•œ ๋ชจ๋ธ ์ค€๋น„ ์™„๋ฃŒ")
504
- return True
505
-
506
- except Exception as e:
507
- logger.error(f"โŒ ํ›ˆ๋ จ ์ค€๋น„ ์‹คํŒจ: {e}")
508
- return False
509
-
510
- # ์ „์—ญ LoRA ๊ด€๋ฆฌ์ž ์ธ์Šคํ„ด์Šค (์•ˆ์ „ํ•œ ์ƒ์„ฑ)
511
- try:
512
- if PEFT_AVAILABLE and TRANSFORMERS_AVAILABLE:
513
- lora_manager = LoRAManager()
514
- logger.info("โœ… ์ „์—ญ LoRA ๊ด€๋ฆฌ์ž ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ ์™„๋ฃŒ")
515
- else:
516
- lora_manager = None
517
- logger.warning("โš ๏ธ LoRA ๋ผ์ด๋ธŒ๋Ÿฌ๋ฆฌ๊ฐ€ ์‚ฌ์šฉ ๋ถˆ๊ฐ€๋Šฅํ•˜์—ฌ LoRA ๊ด€๋ฆฌ์ž๋ฅผ ์ƒ์„ฑํ•˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค.")
518
- except Exception as e:
519
- lora_manager = None
520
- logger.error(f"โŒ LoRA ๊ด€๋ฆฌ์ž ์ธ์Šคํ„ด์Šค ์ƒ์„ฑ ์‹คํŒจ: {e}")
521
-
522
- def get_lora_manager() -> Optional[LoRAManager]:
523
- """์ „์—ญ LoRA ๊ด€๋ฆฌ์ž ๋ฐ˜ํ™˜ (None์ผ ์ˆ˜ ์žˆ์Œ)"""
524
- return lora_manager