gbrabbit commited on
Commit
656a9b2
ยท
1 Parent(s): d098bcd

Auto commit at 21-2025-08 0:28:12

Browse files
README.md CHANGED
@@ -10,6 +10,8 @@ app_file: app.py
10
  pinned: false
11
  ---
12
 
 
 
13
 
14
  # 250819
15
  - v1.0.1
 
10
  pinned: false
11
  ---
12
 
13
+ # 250820
14
+ - lily llm kanana 3b ๋ฉ€ํ‹ฐ๋ชจ๋‹ฌ ๊ฐœ์„ , polyglot 1.3b, 5.8b ์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„ 
15
 
16
  # 250819
17
  - v1.0.1
lily_llm_api/app_v2.py CHANGED
@@ -183,6 +183,7 @@ executor = concurrent.futures.ThreadPoolExecutor()
183
 
184
  def configure_cpu_threads():
185
  """CPU ์Šค๋ ˆ๋“œ ํ™˜๊ฒฝ ์ตœ์ ํ™” (vCPU ์ˆ˜์— ๋งž๊ฒŒ ์กฐ์ •)."""
 
186
  try:
187
  # ๊ธฐ๋ณธ๊ฐ’: ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋˜๋Š” ์‹œ์Šคํ…œ CPU ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜๋˜ ๊ณผ๋„ํ•œ ์Šค๋ ˆ๋“œ ๋ฐฉ์ง€
188
  env_threads = os.getenv("CPU_THREADS")
@@ -190,7 +191,7 @@ def configure_cpu_threads():
190
  threads = max(1, int(env_threads))
191
  else:
192
  detected = os.cpu_count() or 2
193
- # ์ปจํ…Œ์ด๋„ˆ/์„œ๋ฒ„์˜ vCPU ์ˆ˜๋ฅผ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉํ•˜๋˜ ์ƒํ•œ 8 ์ ์šฉ
194
  threads = max(1, min(detected, 16))
195
 
196
  # OpenMP/MKL/numexpr
@@ -213,6 +214,7 @@ def configure_cpu_threads():
213
  logger.info(f"๐Ÿงต CPU thread config -> OMP/MKL/numexpr={threads}, torch_threads={threads}")
214
  except Exception as e:
215
  logger.warning(f"โš ๏ธ CPU ์Šค๋ ˆ๋“œ ์„ค์ • ์‹คํŒจ: {e}")
 
216
 
217
  def select_model_interactive():
218
  """์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ ๋ชจ๋ธ ์„ ํƒ"""
@@ -225,7 +227,7 @@ def select_model_interactive():
225
  try:
226
  # choice = input(f"\n๐Ÿ“ ์‚ฌ์šฉํ•  ๋ชจ๋ธ ๋ฒˆํ˜ธ๋ฅผ ์„ ํƒํ•˜์„ธ์š” (1-{len(available_models)}): ")
227
  # selected_model = available_models[int(choice) - 1]
228
- selected_model = available_models[2]
229
  print(f"\nโœ… '{selected_model['name']}' ๋ชจ๋ธ์„ ์„ ํƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
230
  return selected_model['model_id']
231
  except (ValueError, IndexError):
@@ -237,7 +239,12 @@ async def startup_event():
237
  """[๋ณต์›] ์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ ๋ชจ๋ธ ์„ ํƒ ๋ฐ ๋กœ๋“œ"""
238
  global model_loaded
239
  # CPU ์Šค๋ ˆ๋“œ ์ตœ์ ํ™” ์ ์šฉ
240
- configure_cpu_threads()
 
 
 
 
 
241
  selected_model_id = select_model_interactive()
242
  try:
243
  await load_model_async(selected_model_id)
@@ -448,7 +455,7 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
448
  return_tensors="pt",
449
  padding=True,
450
  truncation=True,
451
- max_length=64,
452
  )
453
  if 'token_type_ids' in inputs:
454
  del inputs['token_type_ids']
@@ -533,19 +540,39 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
533
  gen_config['max_time'] = 60.0 # 60์ดˆ ํƒ€์ž„์•„์›ƒ์œผ๋กœ ์กฐ์ •
534
 
535
  # ์ถ”๊ฐ€ ํƒ€์ž„์•„์›ƒ ์„ค์ •
536
- gen_config['max_time'] = 60.0 # ๊ฐ•์ œ 60์ดˆ ํƒ€์ž„์•„์›ƒ
537
- print(f"๐Ÿ” [DEBUG] ๊ฐ•์ œ ํƒ€์ž„์•„์›ƒ ์„ค์ •: 60์ดˆ")
538
 
539
  # ์ถ”๊ฐ€ ์„ฑ๋Šฅ ์ตœ์ ํ™” ์„ค์ •
540
  gen_config['use_cache'] = True # ์บ์‹œ ์‚ฌ์šฉ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ
541
- gen_config['pad_token_id'] = tokenizer.eos_token_id if tokenizer.eos_token_id else None
542
 
543
- # EOS ํ† ํฐ ๊ฐ•์ œ ์„ค์ • - ๋ฌธ์žฅ ๋ ๋ฌธ์ œ ํ•ด๊ฒฐ
544
- if tokenizer.eos_token_id is not None:
545
- gen_config['eos_token_id'] = tokenizer.eos_token_id
546
- print(f"๐Ÿ” [DEBUG] EOS ํ† ํฐ ๊ฐ•์ œ ์„ค์ •: {tokenizer.eos_token} (ID: {tokenizer.eos_token_id})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
547
  else:
548
- print(f"โš ๏ธ [DEBUG] EOS ํ† ํฐ์ด ์„ค์ •๋˜์ง€ ์•Š์Œ")
 
 
 
 
 
 
549
 
550
  # ์ƒ์„ฑ ์„ค์ • ์ตœ์ข… ํ™•์ธ
551
  print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ƒ์„ฑ ์„ค์ •: {gen_config}")
 
183
 
184
  def configure_cpu_threads():
185
  """CPU ์Šค๋ ˆ๋“œ ํ™˜๊ฒฝ ์ตœ์ ํ™” (vCPU ์ˆ˜์— ๋งž๊ฒŒ ์กฐ์ •)."""
186
+ print(f"๐Ÿ” [DEBUG] configure_cpu_threads ์‹œ์ž‘")
187
  try:
188
  # ๊ธฐ๋ณธ๊ฐ’: ํ™˜๊ฒฝ๋ณ€์ˆ˜ ๋˜๋Š” ์‹œ์Šคํ…œ CPU ์ˆ˜๋ฅผ ์‚ฌ์šฉํ•˜๋˜ ๊ณผ๋„ํ•œ ์Šค๋ ˆ๋“œ ๋ฐฉ์ง€
189
  env_threads = os.getenv("CPU_THREADS")
 
191
  threads = max(1, int(env_threads))
192
  else:
193
  detected = os.cpu_count() or 2
194
+ # ์ปจํ…Œ์ด๋„ˆ/์„œ๋ฒ„์˜ vCPU ์ˆ˜๋ฅผ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉํ•˜๋˜ ์ƒํ•œ 16 ์ ์šฉ
195
  threads = max(1, min(detected, 16))
196
 
197
  # OpenMP/MKL/numexpr
 
214
  logger.info(f"๐Ÿงต CPU thread config -> OMP/MKL/numexpr={threads}, torch_threads={threads}")
215
  except Exception as e:
216
  logger.warning(f"โš ๏ธ CPU ์Šค๋ ˆ๋“œ ์„ค์ • ์‹คํŒจ: {e}")
217
+ print(f"๐Ÿ” [DEBUG] configure_cpu_threads ์ข…๋ฃŒ")
218
 
219
  def select_model_interactive():
220
  """์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ ๋ชจ๋ธ ์„ ํƒ"""
 
227
  try:
228
  # choice = input(f"\n๐Ÿ“ ์‚ฌ์šฉํ•  ๋ชจ๋ธ ๋ฒˆํ˜ธ๋ฅผ ์„ ํƒํ•˜์„ธ์š” (1-{len(available_models)}): ")
229
  # selected_model = available_models[int(choice) - 1]
230
+ selected_model = available_models[0]
231
  print(f"\nโœ… '{selected_model['name']}' ๋ชจ๋ธ์„ ์„ ํƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
232
  return selected_model['model_id']
233
  except (ValueError, IndexError):
 
239
  """[๋ณต์›] ์„œ๋ฒ„ ์‹œ์ž‘ ์‹œ ์ธํ„ฐ๋ž™ํ‹ฐ๋ธŒ ๋ชจ๋ธ ์„ ํƒ ๋ฐ ๋กœ๋“œ"""
240
  global model_loaded
241
  # CPU ์Šค๋ ˆ๋“œ ์ตœ์ ํ™” ์ ์šฉ
242
+ try:
243
+ configure_cpu_threads()
244
+ except Exception as e:
245
+ logger.error(f"โŒ CPU ์Šค๋ ˆ๋“œ ์„ค์ • ์‹คํŒจ: {e}")
246
+ print(f"๐Ÿ” [DEBUG] CPU ์Šค๋ ˆ๋“œ ์„ค์ • ์‹คํŒจ: {e}")
247
+ print(f"๐Ÿ” [DEBUG] CPU ์Šค๋ ˆ๋“œ ์„ค์ • ์‹คํŒจ: {e}")
248
  selected_model_id = select_model_interactive()
249
  try:
250
  await load_model_async(selected_model_id)
 
455
  return_tensors="pt",
456
  padding=True,
457
  truncation=True,
458
+ max_length=256,
459
  )
460
  if 'token_type_ids' in inputs:
461
  del inputs['token_type_ids']
 
540
  gen_config['max_time'] = 60.0 # 60์ดˆ ํƒ€์ž„์•„์›ƒ์œผ๋กœ ์กฐ์ •
541
 
542
  # ์ถ”๊ฐ€ ํƒ€์ž„์•„์›ƒ ์„ค์ •
543
+ # gen_config['max_time'] = 60.0 # ๊ฐ•์ œ 60์ดˆ ํƒ€์ž„์•„์›ƒ
544
+ print(f"๐Ÿ” [DEBUG] ๊ฐ•์ œ ํƒ€์ž„์•„์›ƒ ์„ค์ •: {gen_config['max_time']}์ดˆ")
545
 
546
  # ์ถ”๊ฐ€ ์„ฑ๋Šฅ ์ตœ์ ํ™” ์„ค์ •
547
  gen_config['use_cache'] = True # ์บ์‹œ ์‚ฌ์šฉ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ
 
548
 
549
+ # PAD ํ† ํฐ ์„ค์ • - ๋ชจ๋ธ ํ”„๋กœํ•„ ์„ค์ • ์šฐ์„ 
550
+ if 'pad_token_id' not in gen_config:
551
+ # ํ”„๋กœํ•„์— ์„ค์ •์ด ์—†์„ ๋•Œ๋งŒ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
552
+ if tokenizer.pad_token_id is not None:
553
+ gen_config['pad_token_id'] = tokenizer.pad_token_id
554
+ print(f"๐Ÿ” [DEBUG] PAD ํ† ํฐ ์„ค์ •: ํ† ํฌ๋‚˜์ด์ € ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ (ID: {tokenizer.pad_token_id})")
555
+ else:
556
+ gen_config['pad_token_id'] = None
557
+ print(f"๐Ÿ” [DEBUG] PAD ํ† ํฐ ์„ค์ •: None (ํ† ํฌ๋‚˜์ด์ €์— PAD ํ† ํฐ ์—†์Œ)")
558
+
559
+ # EOS ํ† ํฐ ์„ค์ • - ๋ชจ๋ธ ํ”„๋กœํ•„ ์„ค์ • ์šฐ์„ 
560
+ if 'eos_token_id' not in gen_config:
561
+ # ํ”„๋กœํ•„์— ์„ค์ •์ด ์—†์„ ๋•Œ๋งŒ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
562
+ if tokenizer.eos_token_id is not None:
563
+ gen_config['eos_token_id'] = tokenizer.eos_token_id
564
+ print(f"๐Ÿ” [DEBUG] EOS ํ† ํฐ ์„ค์ •: ํ† ํฌ๋‚˜์ด์ € ๏ฟฝ๏ฟฝ๋ณธ๊ฐ’ ์‚ฌ์šฉ (ID: {tokenizer.eos_token_id})")
565
+ else:
566
+ gen_config['eos_token_id'] = None
567
+ print(f"๐Ÿ” [DEBUG] EOS ํ† ํฐ ์„ค์ •: None (ํ† ํฌ๋‚˜์ด์ €์— EOS ํ† ํฐ ์—†์Œ)")
568
  else:
569
+ # ํ”„๋กœํ•„์— ์ด๋ฏธ ์„ค์ •๋œ ๊ฒฝ์šฐ - ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉ
570
+ print(f"๐Ÿ” [DEBUG] EOS ํ† ํฐ ์„ค์ •: ํ”„๋กœํ•„ ์„ค์ • ์‚ฌ์šฉ (ID: {gen_config['eos_token_id']})")
571
+ # ํ”„๋กœํ•„์—์„œ ์„ค์ •ํ•œ EOS ํ† ํฐ ID๊ฐ€ ์œ ํšจํ•œ์ง€ ํ™•์ธ
572
+ if gen_config['eos_token_id'] is not None:
573
+ print(f"๐Ÿ” [DEBUG] EOS ํ† ํฐ ID {gen_config['eos_token_id']} ์‚ฌ์šฉํ•˜์—ฌ ์ƒ์„ฑ ์ค‘๋‹จ์  ์„ค์ •")
574
+ else:
575
+ print(f"๐Ÿ” [DEBUG] EOS ํ† ํฐ ์—†์Œ - max_new_tokens๊นŒ์ง€ ์ƒ์„ฑ")
576
 
577
  # ์ƒ์„ฑ ์„ค์ • ์ตœ์ข… ํ™•์ธ
578
  print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ƒ์„ฑ ์„ค์ •: {gen_config}")
lily_llm_api/models/polyglot_ko_1_3b_chat.py CHANGED
@@ -10,6 +10,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
10
  import logging
11
  import os
12
  from pathlib import Path
 
13
 
14
  logger = logging.getLogger(__name__)
15
 
@@ -77,11 +78,11 @@ class PolyglotKo13bChatProfile:
77
  # Hugging Face ๋ชจ๋ธ ํŽ˜์ด์ง€์˜ ๊ณต์‹ ํ”„๋กฌํ”„ํŠธ ํ˜•์‹ ์‚ฌ์šฉ
78
  prompt = f"""๋‹น์‹ ์€ AI ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์—๊ฒŒ ๋„์›€์ด ๋˜๊ณ  ์œ ์ตํ•œ ๋‚ด์šฉ์„ ์ œ๊ณตํ•ด์•ผํ•ฉ๋‹ˆ๋‹ค. ๋‹ต๋ณ€์€ ๊ธธ๊ณ  ์ž์„ธํ•˜๋ฉฐ ์นœ์ ˆํ•œ ์„ค๋ช…์„ ๋ง๋ถ™์—ฌ์„œ ์ž‘์„ฑํ•˜์„ธ์š”.
79
 
80
- ### ์‚ฌ์šฉ์ž:
81
- {user_input}
82
 
83
- ### ์ฑ—๋ด‡:
84
- """
85
  return prompt
86
 
87
  def extract_response(self, full_text: str, formatted_prompt: str = None) -> str:
@@ -161,17 +162,17 @@ class PolyglotKo13bChatProfile:
161
  return False
162
 
163
  # ์˜์–ด๊ฐ€ ํฌํ•จ๋˜์–ด ์žˆ์œผ๋ฉด ํ’ˆ์งˆ ๋‚ฎ์Œ
164
- if any(char.isascii() and char.isalpha() for char in response):
165
- return False
166
 
167
  # ๋ฌธ์žฅ์ด ์ค‘๊ฐ„์— ๋Š์–ด์ง„ ๊ฒฝ์šฐ ํ’ˆ์งˆ ๋‚ฎ์Œ
168
  if response.endswith(('ํ•˜', '๋Š”', '์„', '๋ฅผ', '์ด', '๊ฐ€', '์˜', '์—', '๋กœ')):
169
  return False
170
 
171
  # ์ค‘๋ณต๋œ ๋‹จ์–ด๊ฐ€ ๋งŽ์œผ๋ฉด ํ’ˆ์งˆ ๋‚ฎ์Œ
172
- words = response.split()
173
- if len(words) > 3 and len(set(words)) / len(words) < 0.7:
174
- return False
175
 
176
  return True
177
 
@@ -181,8 +182,8 @@ class PolyglotKo13bChatProfile:
181
  improved = response.strip()
182
 
183
  # ์˜์–ด ์ œ๊ฑฐ
184
- import re
185
- improved = re.sub(r'[a-zA-Z]+', '', improved)
186
 
187
  # ์ค‘๋ณต ๊ณต๋ฐฑ ์ œ๊ฑฐ
188
  improved = re.sub(r'\s+', ' ', improved)
@@ -192,7 +193,7 @@ class PolyglotKo13bChatProfile:
192
  improved += '๋‹ˆ๋‹ค.'
193
 
194
  # ๋„ˆ๋ฌด ์งง์€ ๊ฒฝ์šฐ ๊ธฐ๋ณธ ์‘๋‹ต ์ถ”๊ฐ€
195
- if len(improved) < 10:
196
  improved = f"{improved} (์‘๋‹ต์ด ๋„ˆ๋ฌด ์งง์Šต๋‹ˆ๋‹ค. ๋” ์ž์„ธํ•œ ๋‹ต๋ณ€์„ ์›ํ•˜์‹œ๋ฉด ๋‹ค์‹œ ์งˆ๋ฌธํ•ด์ฃผ์„ธ์š”.)"
197
 
198
  logger.info(f"๐Ÿ”ง ์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„  ์™„๋ฃŒ: {improved}")
@@ -209,7 +210,7 @@ class PolyglotKo13bChatProfile:
209
  "repetition_penalty": 1.2, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
210
  "no_repeat_ngram_size": 4, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
211
  "pad_token_id": None, # ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
212
- "eos_token_id": None, # None์œผ๋กœ ์„ค์ •ํ•˜์—ฌ ๋ชจ๋ธ์ด <|endoftext|> ์ž๋™ ๊ฐ์ง€
213
  "use_cache": True, # ์บ์‹œ ์‚ฌ์šฉ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ
214
  "max_time": 60.0, # 60์ดˆ ํƒ€์ž„์•„์›ƒ
215
  "early_stopping": False, # False๋กœ ์„ค์ •ํ•˜์—ฌ <|endoftext|>๊นŒ์ง€ ์ƒ์„ฑ
 
10
  import logging
11
  import os
12
  from pathlib import Path
13
+ import re
14
 
15
  logger = logging.getLogger(__name__)
16
 
 
78
  # Hugging Face ๋ชจ๋ธ ํŽ˜์ด์ง€์˜ ๊ณต์‹ ํ”„๋กฌํ”„ํŠธ ํ˜•์‹ ์‚ฌ์šฉ
79
  prompt = f"""๋‹น์‹ ์€ AI ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์—๊ฒŒ ๋„์›€์ด ๋˜๊ณ  ์œ ์ตํ•œ ๋‚ด์šฉ์„ ์ œ๊ณตํ•ด์•ผํ•ฉ๋‹ˆ๋‹ค. ๋‹ต๋ณ€์€ ๊ธธ๊ณ  ์ž์„ธํ•˜๋ฉฐ ์นœ์ ˆํ•œ ์„ค๋ช…์„ ๋ง๋ถ™์—ฌ์„œ ์ž‘์„ฑํ•˜์„ธ์š”.
80
 
81
+ ### ์‚ฌ์šฉ์ž:
82
+ {user_input}
83
 
84
+ ### ์ฑ—๋ด‡:
85
+ """
86
  return prompt
87
 
88
  def extract_response(self, full_text: str, formatted_prompt: str = None) -> str:
 
162
  return False
163
 
164
  # ์˜์–ด๊ฐ€ ํฌํ•จ๋˜์–ด ์žˆ์œผ๋ฉด ํ’ˆ์งˆ ๋‚ฎ์Œ
165
+ # if any(char.isascii() and char.isalpha() for char in response):
166
+ # return False
167
 
168
  # ๋ฌธ์žฅ์ด ์ค‘๊ฐ„์— ๋Š์–ด์ง„ ๊ฒฝ์šฐ ํ’ˆ์งˆ ๋‚ฎ์Œ
169
  if response.endswith(('ํ•˜', '๋Š”', '์„', '๋ฅผ', '์ด', '๊ฐ€', '์˜', '์—', '๋กœ')):
170
  return False
171
 
172
  # ์ค‘๋ณต๋œ ๋‹จ์–ด๊ฐ€ ๋งŽ์œผ๋ฉด ํ’ˆ์งˆ ๋‚ฎ์Œ
173
+ # words = response.split()
174
+ # if len(words) > 3 and len(set(words)) / len(words) < 0.7:
175
+ # return False
176
 
177
  return True
178
 
 
182
  improved = response.strip()
183
 
184
  # ์˜์–ด ์ œ๊ฑฐ
185
+
186
+ # improved = re.sub(r'[a-zA-Z]+', '', improved)
187
 
188
  # ์ค‘๋ณต ๊ณต๋ฐฑ ์ œ๊ฑฐ
189
  improved = re.sub(r'\s+', ' ', improved)
 
193
  improved += '๋‹ˆ๋‹ค.'
194
 
195
  # ๋„ˆ๋ฌด ์งง์€ ๊ฒฝ์šฐ ๊ธฐ๋ณธ ์‘๋‹ต ์ถ”๊ฐ€
196
+ if len(improved) < 5:
197
  improved = f"{improved} (์‘๋‹ต์ด ๋„ˆ๋ฌด ์งง์Šต๋‹ˆ๋‹ค. ๋” ์ž์„ธํ•œ ๋‹ต๋ณ€์„ ์›ํ•˜์‹œ๋ฉด ๋‹ค์‹œ ์งˆ๋ฌธํ•ด์ฃผ์„ธ์š”.)"
198
 
199
  logger.info(f"๐Ÿ”ง ์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„  ์™„๋ฃŒ: {improved}")
 
210
  "repetition_penalty": 1.2, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
211
  "no_repeat_ngram_size": 4, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
212
  "pad_token_id": None, # ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
213
+ "eos_token_id": 2, # <|endoftext|> ํ† ํฐ ID ๋ช…์‹œ์  ์„ค์ •
214
  "use_cache": True, # ์บ์‹œ ์‚ฌ์šฉ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ
215
  "max_time": 60.0, # 60์ดˆ ํƒ€์ž„์•„์›ƒ
216
  "early_stopping": False, # False๋กœ ์„ค์ •ํ•˜์—ฌ <|endoftext|>๊นŒ์ง€ ์ƒ์„ฑ
lily_llm_api/models/polyglot_ko_5_8b_chat.py CHANGED
@@ -10,6 +10,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
10
  import logging
11
  import os
12
  from pathlib import Path
 
13
 
14
  logger = logging.getLogger(__name__)
15
 
@@ -23,7 +24,7 @@ class PolyglotKo58bChatProfile:
23
  self.description = "ํ•œ๊ตญ์–ด ์ฑ„ํŒ… ์ „์šฉ ๊ณ ์„ฑ๋Šฅ ๋ชจ๋ธ (5.8B)"
24
  self.language = "ko"
25
  self.model_size = "5.8B"
26
-
27
  def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
28
  """๋ชจ๋ธ ๋กœ๋“œ (ํ† ํฌ๋‚˜์ด์ € ์„ค์ • ์ˆ˜์ •)"""
29
  logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
@@ -45,6 +46,19 @@ class PolyglotKo58bChatProfile:
45
  logger.warning("โš ๏ธ EOS ํ† ํฐ์ด ์—†์Šต๋‹ˆ๋‹ค. ๋ชจ๋ธ ๊ณต์‹ ๋ฌธ์„œ์— ๋”ฐ๋ผ <|endoftext|> ์„ค์ •")
46
  tokenizer.eos_token = "<|endoftext|>"
47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48
  if tokenizer.pad_token is None:
49
  logger.warning("โš ๏ธ PAD ํ† ํฐ์ด ์—†์Šต๋‹ˆ๋‹ค. EOS ํ† ํฐ์œผ๋กœ ์„ค์ •")
50
  tokenizer.pad_token = tokenizer.eos_token
@@ -64,6 +78,7 @@ class PolyglotKo58bChatProfile:
64
  trust_remote_code=True,
65
  torch_dtype=selected_dtype,
66
  local_files_only=use_local,
 
67
  ).to(device)
68
 
69
  logger.info(f"โœ… {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต! (device={device}, dtype={selected_dtype})")
@@ -73,144 +88,107 @@ class PolyglotKo58bChatProfile:
73
  raise
74
 
75
  def format_prompt(self, user_input: str) -> str:
76
- """ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ… - ๊ณต์‹ ๋ฌธ์„œ์™€ ์ผ์น˜"""
77
- # Hugging Face ๋ชจ๋ธ ํŽ˜์ด์ง€์˜ ๊ณต์‹ ํ”„๋กฌํ”„ํŠธ ํ˜•์‹ ์‚ฌ์šฉ
78
- prompt = f"""๋‹น์‹ ์€ AI ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์—๊ฒŒ ๋„์›€์ด ๋˜๊ณ  ์œ ์ตํ•œ ๋‚ด์šฉ์„ ์ œ๊ณตํ•ด์•ผํ•ฉ๋‹ˆ๋‹ค. ๋‹ต๋ณ€์€ ๊ธธ๊ณ  ์ž์„ธํ•˜๋ฉฐ ์นœ์ ˆํ•œ ์„ค๋ช…์„ ๋ง๋ถ™์—ฌ์„œ ์ž‘์„ฑํ•˜์„ธ์š”.
79
-
80
- ### ์‚ฌ์šฉ์ž:
81
- {user_input}
82
 
83
- ### ์ฑ—๋ด‡:
84
- """
85
  return prompt
86
 
87
  def extract_response(self, full_text: str, formatted_prompt: str = None) -> str:
88
  """์‘๋‹ต ์ถ”์ถœ - ํ’ˆ์งˆ ๊ฒ€์ฆ ๋ฐ ๊ฐœ์„ """
89
- logger.info(f"--- Polyglot 5.8B ์‘๋‹ต ์ถ”์ถœ ์‹œ์ž‘ ---")
90
  logger.info(f"์ „์ฒด ์ƒ์„ฑ ํ…์ŠคํŠธ (Raw): \n---\n{full_text}\n---")
91
  logger.info(f"์‚ฌ์šฉ๋œ ํ”„๋กฌํ”„ํŠธ: {formatted_prompt}")
92
 
93
- # 1์ˆœ์œ„: "### ์ฑ—๋ด‡:" ํƒœ๊ทธ๋กœ ์ถ”์ถœ ์‹œ๋„
94
- if "### ์ฑ—๋ด‡:" in full_text:
95
- response = full_text.split("### ์ฑ—๋ด‡:")[-1].strip()
96
- logger.info(f"โœ… ์„ฑ๊ณต: '### ์ฑ—๋ด‡:' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
97
- logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
98
-
99
- # ์‘๋‹ต ํ’ˆ์งˆ ๊ฒ€์ฆ
100
- if self._validate_response_quality(response):
101
- return response
 
102
  else:
103
- logger.warning("โš ๏ธ ์‘๋‹ต ํ’ˆ์งˆ์ด ๋‚ฎ์Šต๋‹ˆ๋‹ค. ํ’ˆ์งˆ ๊ฐœ์„  ์ œ์•ˆ์„ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.")
104
- return self._improve_response_quality(response)
105
-
106
- # 2์ˆœ์œ„: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์ถ”์ถœ ์‹œ๋„
107
- if formatted_prompt and formatted_prompt in full_text:
108
- response = full_text.replace(formatted_prompt, "").strip()
109
- logger.info(f"โœ… ์„ฑ๊ณต: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ถ”์ถœ")
 
110
  logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
111
 
112
- if self._validate_response_quality(response):
113
- return response
114
- else:
115
- return self._improve_response_quality(response)
116
-
117
- # 3์ˆœ์œ„: ์ผ๋ฐ˜์ ์ธ ํ”„๋กฌํ”„ํŠธ ํŒจํ„ด ์ œ๊ฑฐ ์‹œ๋„
118
- clean_text = full_text.strip()
119
- patterns_to_remove = [
120
- "๋‹น์‹ ์€ AI ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์—๊ฒŒ ๋„์›€์ด ๋˜๊ณ  ์œ ์ตํ•œ ๋‚ด์šฉ์„ ์ œ๊ณตํ•ด์•ผํ•ฉ๋‹ˆ๋‹ค. ๋‹ต๋ณ€์€ ๊ธธ๊ณ  ์ž์„ธํ•˜๋ฉฐ ์นœ์ ˆํ•œ ์„ค๋ช…์„ ๋ง๋ถ™์—ฌ์„œ ์ž‘์„ฑํ•˜์„ธ์š”.",
121
- "### ์‚ฌ์šฉ์ž:",
122
- "### ์ฑ—๋ด‡:",
123
- "์‚ฌ์šฉ์ž:",
124
- "์ฑ—๋ด‡:",
125
- "assistant:",
126
- "user:"
127
- ]
128
-
129
- for pattern in patterns_to_remove:
130
- clean_text = clean_text.replace(pattern, "")
131
-
132
- clean_text = clean_text.strip()
133
-
134
- if clean_text and clean_text != full_text:
135
- logger.info("โœ… ์„ฑ๊ณต: ํŒจํ„ด ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ •๋ฆฌ")
136
- logger.info(f"์ •๋ฆฌ๋œ ์‘๋‹ต: {clean_text}")
137
 
138
- if self._validate_response_quality(clean_text):
139
- return clean_text
140
- else:
141
- return self._improve_response_quality(clean_text)
142
-
143
- # 4์ˆœ์œ„: ์ „์ฒด ํ…์ŠคํŠธ์—์„œ ๋ถˆํ•„์š”ํ•œ ๋ถ€๋ถ„๋งŒ ์ œ๊ฑฐ
144
- final_response = full_text.strip()
145
- logger.warning("โš ๏ธ ๊ฒฝ๊ณ : ํŠน๋ณ„ํ•œ ์‘๋‹ต ์ถ”์ถœ ํŒจํ„ด์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ์ •๋ฆฌํ•˜์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
146
- logger.info(f"์ตœ์ข… ๋ฐ˜ํ™˜ ํ…์ŠคํŠธ: {final_response}")
147
-
148
- if self._validate_response_quality(final_response):
149
- return final_response
150
- else:
151
- return self._improve_response_quality(final_response)
152
 
153
  def _validate_response_quality(self, response: str) -> bool:
154
- """์‘๋‹ต ํ’ˆ์งˆ ๊ฒ€์ฆ"""
155
- if not response or len(response.strip()) < 5:
156
  return False
157
 
158
- # ์˜์–ด๊ฐ€ ํฌํ•จ๋˜์–ด ์žˆ์œผ๋ฉด ํ’ˆ์งˆ ๋‚ฎ์Œ
159
- if any(char.isascii() and char.isalpha() for char in response):
160
- return False
161
 
162
- # ๋ฌธ์žฅ์ด ์ค‘๊ฐ„์— ๋Š์–ด์ง„ ๊ฒฝ์šฐ ํ’ˆ์งˆ ๋‚ฎ์Œ
163
- if response.endswith(('ํ•˜', '๋Š”', '์„', '๋ฅผ', '์ด', '๊ฐ€', '์˜', '์—', '๋กœ')):
164
- return False
165
 
166
- # ์ค‘๋ณต๋œ ๋‹จ์–ด๊ฐ€ ๋งŽ์œผ๋ฉด ํ’ˆ์งˆ ๋‚ฎ์Œ
167
- words = response.split()
168
- if len(words) > 3 and len(set(words)) / len(words) < 0.7:
169
- return False
170
 
171
  return True
172
 
173
  def _improve_response_quality(self, response: str) -> str:
174
- """์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„ """
175
- # ๊ธฐ๋ณธ ์ •๋ฆฌ
176
  improved = response.strip()
177
 
178
- # ์˜์–ด ์ œ๊ฑฐ
179
- import re
180
- improved = re.sub(r'[a-zA-Z]+', '', improved)
181
-
182
- # ์ค‘๋ณต ๊ณต๋ฐฑ ์ œ๊ฑฐ
183
- improved = re.sub(r'\s+', ' ', improved)
184
-
185
- # ๋ฌธ์žฅ์ด ์ค‘๊ฐ„์— ๋Š์–ด์ง„ ๊ฒฝ์šฐ ์ฒ˜๋ฆฌ
186
- if improved.endswith(('ํ•˜', '๋Š”', '์„', '๋ฅผ', '์ด', '๊ฐ€', '์˜', '์—', '๋กœ')):
187
- improved += '๋‹ˆ๋‹ค.'
188
-
189
- # ๋„ˆ๋ฌด ์งง์€ ๊ฒฝ์šฐ ๊ธฐ๋ณธ ์‘๋‹ต ์ถ”๊ฐ€
190
- if len(improved) < 10:
191
- improved = f"{improved} (์‘๋‹ต์ด ๋„ˆ๋ฌด ์งง์Šต๋‹ˆ๋‹ค. ๋” ์ž์„ธํ•œ ๋‹ต๋ณ€์„ ์›ํ•˜์‹œ๋ฉด ๋‹ค์‹œ ์งˆ๋ฌธํ•ด์ฃผ์„ธ์š”.)"
192
 
193
  logger.info(f"๐Ÿ”ง ์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„  ์™„๋ฃŒ: {improved}")
194
  return improved
195
 
196
  def get_generation_config(self) -> Dict[str, Any]:
197
- """์ƒ์„ฑ ์„ค์ • - ๊ณต์‹ EOS ํ† ํฐ ์‚ฌ์šฉ"""
198
  return {
199
- "max_new_tokens": 128, # 5.8B ๋ชจ๋ธ์€ ๋” ๊ธด ์‘๋‹ต ์ƒ์„ฑ ๊ฐ€๋Šฅ
200
- "temperature": 0.3, # ์ผ๊ด€์„ฑ ํ–ฅ์ƒ
201
  "do_sample": True, # ์ƒ˜ํ”Œ๋ง ํ™œ์„ฑํ™”
202
- "top_k": 20, # ํ’ˆ์งˆ ํ–ฅ์ƒ
203
- "top_p": 0.8, # ์ผ๊ด€์„ฑ ํ–ฅ์ƒ
204
- "repetition_penalty": 1.2, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
205
- "no_repeat_ngram_size": 4, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
206
  "pad_token_id": None, # ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
207
- "eos_token_id": None, # None์œผ๋กœ ์„ค์ •ํ•˜์—ฌ ๋ชจ๋ธ์ด <|endoftext|> ์ž๋™ ๊ฐ์ง€
208
  "use_cache": True, # ์บ์‹œ ์‚ฌ์šฉ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ
209
- "max_time": 240.0, # 5.8B ๋ชจ๋ธ์€ ๋” ๊ธด ์‹œ๊ฐ„ ํ•„์š” (120์ดˆ)
210
- "early_stopping": False, # False๋กœ ์„ค์ •ํ•˜์—ฌ <|endoftext|>๊นŒ์ง€ ์ƒ์„ฑ
211
  "stopping_criteria": None, # ๊ธฐ๋ณธ ์ •์ง€ ๊ธฐ์ค€ ์‚ฌ์šฉ
 
 
212
  }
213
-
214
  def get_model_info(self) -> Dict[str, Any]:
215
  """๋ชจ๋ธ ์ •๋ณด"""
216
  return {
@@ -221,4 +199,4 @@ class PolyglotKo58bChatProfile:
221
  "model_size": self.model_size,
222
  "local_path": self.local_path,
223
  "multimodal": False,
224
- }
 
10
  import logging
11
  import os
12
  from pathlib import Path
13
+ import re
14
 
15
  logger = logging.getLogger(__name__)
16
 
 
24
  self.description = "ํ•œ๊ตญ์–ด ์ฑ„ํŒ… ์ „์šฉ ๊ณ ์„ฑ๋Šฅ ๋ชจ๋ธ (5.8B)"
25
  self.language = "ko"
26
  self.model_size = "5.8B"
27
+
28
  def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
29
  """๋ชจ๋ธ ๋กœ๋“œ (ํ† ํฌ๋‚˜์ด์ € ์„ค์ • ์ˆ˜์ •)"""
30
  logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
 
46
  logger.warning("โš ๏ธ EOS ํ† ํฐ์ด ์—†์Šต๋‹ˆ๋‹ค. ๋ชจ๋ธ ๊ณต์‹ ๋ฌธ์„œ์— ๋”ฐ๋ผ <|endoftext|> ์„ค์ •")
47
  tokenizer.eos_token = "<|endoftext|>"
48
 
49
+ # 5.8B ๋ชจ๋ธ ํŠน๋ณ„ ์„ค์ •
50
+ if hasattr(tokenizer, 'add_eos_token'):
51
+ tokenizer.add_eos_token = False # ์ž๋™ EOS ํ† ํฐ ์ถ”๊ฐ€ ๋น„ํ™œ์„ฑํ™”
52
+ logger.info("โœ… ์ž๋™ EOS ํ† ํฐ ์ถ”๊ฐ€ ๋น„ํ™œ์„ฑํ™”๋จ")
53
+
54
+ # EOS ํ† ํฐ ์ฒ˜๋ฆฌ ๊ฐœ์„ 
55
+ if tokenizer.eos_token == "<|endoftext|>":
56
+ logger.info("โœ… <|endoftext|> EOS ํ† ํฐ ํ™•์ธ๋จ")
57
+ # EOS ํ† ํฐ์„ ํŠน๋ณ„ํ•˜๊ฒŒ ์ฒ˜๋ฆฌํ•˜์ง€ ์•Š๋„๋ก ์„ค์ •
58
+ if hasattr(tokenizer, 'eos_token_id'):
59
+ logger.info(f"โœ… EOS ํ† ํฐ ID: {tokenizer.eos_token_id}")
60
+
61
+ # PAD ํ† ํฐ ์„ค์ •
62
  if tokenizer.pad_token is None:
63
  logger.warning("โš ๏ธ PAD ํ† ํฐ์ด ์—†์Šต๋‹ˆ๋‹ค. EOS ํ† ํฐ์œผ๋กœ ์„ค์ •")
64
  tokenizer.pad_token = tokenizer.eos_token
 
78
  trust_remote_code=True,
79
  torch_dtype=selected_dtype,
80
  local_files_only=use_local,
81
+ low_cpu_mem_usage=True, # CPU ๋ฉ”๋ชจ๋ฆฌ ์‚ฌ์šฉ๋Ÿ‰ ์ตœ์ ํ™”
82
  ).to(device)
83
 
84
  logger.info(f"โœ… {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์„ฑ๊ณต! (device={device}, dtype={selected_dtype})")
 
88
  raise
89
 
90
  def format_prompt(self, user_input: str) -> str:
91
+ """ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ… - 5.8B ๋ชจ๋ธ ์ตœ์ ํ™”"""
92
+ # 5.8B ๋ชจ๋ธ์— ๋” ์ ํ•ฉํ•œ ํ”„๋กฌํ”„ํŠธ ํ˜•์‹
93
+ prompt = f"""๋‹น์‹ ์€ ์นœ๊ทผํ•˜๊ณ  ๋„์›€์ด ๋˜๋Š” AI ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ๋Œ€ํ•ด ์ •ํ™•ํ•˜๊ณ  ์ƒ์„ธํ•˜๋ฉฐ ์นœ์ ˆํ•˜๊ฒŒ ๋‹ต๋ณ€ํ•ด์ฃผ์„ธ์š”.
 
 
 
94
 
95
+ ์‚ฌ์šฉ์ž: {user_input}
96
+ AI: """
97
  return prompt
98
 
99
  def extract_response(self, full_text: str, formatted_prompt: str = None) -> str:
100
  """์‘๋‹ต ์ถ”์ถœ - ํ’ˆ์งˆ ๊ฒ€์ฆ ๋ฐ ๊ฐœ์„ """
101
+ logger.info(f"--- Polyglot ์‘๋‹ต ์ถ”์ถœ ์‹œ์ž‘ ---")
102
  logger.info(f"์ „์ฒด ์ƒ์„ฑ ํ…์ŠคํŠธ (Raw): \n---\n{full_text}\n---")
103
  logger.info(f"์‚ฌ์šฉ๋œ ํ”„๋กฌํ”„ํŠธ: {formatted_prompt}")
104
 
105
+ try:
106
+ # ์ƒˆ๋กœ์šด ํ”„๋กฌํ”„ํŠธ ํ˜•์‹์— ๋งž๋Š” ์‘๋‹ต ์ถ”์ถœ
107
+ if "AI: " in full_text:
108
+ # "AI: " ์ดํ›„์˜ ํ…์ŠคํŠธ๋ฅผ ์‘๋‹ต์œผ๋กœ ์ถ”์ถœ
109
+ response = full_text.split("AI: ")[-1].strip()
110
+ logger.info(f"โœ… ์„ฑ๊ณต: 'AI:' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
111
+ elif "### ์ฑ—๋ด‡:" in full_text:
112
+ # ๊ธฐ์กด ํ˜•์‹๋„ ์ง€์›
113
+ response = full_text.split("### ์ฑ—๋ด‡:")[-1].strip()
114
+ logger.info(f"โœ… ์„ฑ๊ณต: '### ์ฑ—๋ด‡:' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
115
  else:
116
+ # ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ ๋ฐฉ์‹
117
+ clean_text = full_text.strip()
118
+ if formatted_prompt:
119
+ response = clean_text.replace(formatted_prompt, "").strip()
120
+ else:
121
+ response = clean_text
122
+ logger.info(f"โœ… ์„ฑ๊ณต: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ถ”์ถœ")
123
+
124
  logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
125
 
126
+ # ์‘๋‹ต ํ’ˆ์งˆ ๊ฒ€์ฆ ๋ฐ ๊ฐœ์„ 
127
+ if not self._validate_response_quality(response):
128
+ logger.warning(f"โš ๏ธ ์‘๋‹ต ํ’ˆ์งˆ์ด ๋‚ฎ์Šต๋‹ˆ๋‹ค. ํ’ˆ์งˆ ๊ฐœ์„  ์ œ์•ˆ์„ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.")
129
+ response = self._improve_response_quality(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
+ return response
132
+
133
+ except Exception as e:
134
+ logger.error(f"โŒ ์‘๋‹ต ์ถ”์ถœ ์ค‘ ์˜ค๋ฅ˜: {e}")
135
+ # ํด๋ฐฑ: ์ „์ฒด ํ…์ŠคํŠธ์—์„œ ํ”„๋กฌํ”„ํŠธ ๋ถ€๋ถ„๋งŒ ์ œ๊ฑฐ
136
+ if formatted_prompt:
137
+ return full_text.replace(formatted_prompt, "").strip()
138
+ return full_text.strip()
 
 
 
 
 
 
139
 
140
  def _validate_response_quality(self, response: str) -> bool:
141
+ """์‘๋‹ต ํ’ˆ์งˆ ๊ฒ€์ฆ - 5.8B ๋ชจ๋ธ ์ตœ์ ํ™”"""
142
+ if not response or len(response.strip()) < 3: # 5โ†’3์œผ๋กœ ์™„ํ™”
143
  return False
144
 
145
+ # ์˜์–ด๊ฐ€ ํฌํ•จ๋˜์–ด ์žˆ์–ด๋„ ํ—ˆ์šฉ (5.8B ๋ชจ๋ธ์€ ์˜์–ด๋„ ์ž˜ํ•จ)
146
+ # if any(char.isascii() and char.isalpha() for char in response):
147
+ # return False
148
 
149
+ # ๋ฌธ์žฅ์ด ์ค‘๊ฐ„์— ๋Š์–ด์ง„ ๊ฒฝ์šฐ๋„ ํ—ˆ์šฉ (5.8B ๋ชจ๋ธ์€ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์™„์„ฑ)
150
+ # if response.endswith(('ํ•˜', '๋Š”', '์„', '๋ฅผ', '์ด', '๊ฐ€', '์˜', '์—', '๋กœ')):
151
+ # return False
152
 
153
+ # ์ค‘๋ณต๋œ ๋‹จ์–ด๊ฐ€ ๋งŽ์•„๋„ ํ—ˆ์šฉ (5.8B ๋ชจ๋ธ์€ ๋ฌธ๋งฅ์ƒ ๋ฐ˜๋ณต์ด ์ž์—ฐ์Šค๋Ÿฌ์šธ ์ˆ˜ ์žˆ์Œ)
154
+ # words = response.split()
155
+ # if len(words) > 3 and len(set(words)) / len(words) < 0.7:
156
+ # return False
157
 
158
  return True
159
 
160
  def _improve_response_quality(self, response: str) -> str:
161
+ """์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„  - 5.8B ๋ชจ๋ธ ์ตœ์ ํ™”"""
162
+ # ๊ธฐ๋ณธ ์ •๋ฆฌ๋งŒ ์ˆ˜ํ–‰
163
  improved = response.strip()
164
 
165
+ # ๋„ˆ๋ฌด ์งง์€ ๊ฒฝ์šฐ์—๋งŒ ๊ธฐ๋ณธ ์‘๋‹ต ์ถ”๊ฐ€
166
+ if len(improved) < 5:
167
+ improved = f"{improved} (๋” ์ž์„ธํ•œ ๋‹ต๋ณ€์„ ์›ํ•˜์‹œ๋ฉด ๋‹ค์‹œ ์งˆ๋ฌธํ•ด์ฃผ์„ธ์š”.)"
 
 
 
 
 
 
 
 
 
 
 
168
 
169
  logger.info(f"๐Ÿ”ง ์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„  ์™„๋ฃŒ: {improved}")
170
  return improved
171
 
172
  def get_generation_config(self) -> Dict[str, Any]:
173
+ """์ƒ์„ฑ ์„ค์ • - 5.8B ๋ชจ๋ธ ์ตœ์ ํ™”"""
174
  return {
175
+ "max_new_tokens": 512, # 5.8B ๋ชจ๋ธ์€ ๋” ๊ธด ์‘๋‹ต ์ƒ์„ฑ ๊ฐ€๋Šฅ
176
+ "temperature": 0.8, # ์ฐฝ์˜์„ฑ ํ–ฅ์ƒ
177
  "do_sample": True, # ์ƒ˜ํ”Œ๋ง ํ™œ์„ฑํ™”
178
+ "top_k": 40, # ๋” ๋‹ค์–‘ํ•œ ์„ ํƒ
179
+ "top_p": 0.95, # ๋” ์ž์—ฐ์Šค๋Ÿฌ์šด ์‘๋‹ต
180
+ "repetition_penalty": 1.05, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
181
+ "no_repeat_ngram_size": 2, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
182
  "pad_token_id": None, # ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
183
+ "eos_token_id": 2, # <|endoftext|> ํ† ํฐ ID ๋ช…์‹œ์  ์„ค์ •
184
  "use_cache": True, # ์บ์‹œ ์‚ฌ์šฉ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ
185
+ "max_time": 280.0, # 5.8B ๋ชจ๋ธ์€ ๋” ๊ธด ์‹œ๊ฐ„ ํ•„์š”
186
+ "early_stopping": False, # False๋กœ ์„ค์ •ํ•˜์—ฌ ์ž์—ฐ์Šค๋Ÿฝ๊ฒŒ ์™„์„ฑ
187
  "stopping_criteria": None, # ๊ธฐ๋ณธ ์ •์ง€ ๊ธฐ์ค€ ์‚ฌ์šฉ
188
+ "min_length": 50, # ์ตœ์†Œ ๊ธธ์ด ๋ณด์žฅ (20โ†’50)
189
+ "num_beams": 1, # ๋‹จ์ผ ๋น”์œผ๋กœ ๋น ๋ฅธ ์ƒ์„ฑ
190
  }
191
+
192
  def get_model_info(self) -> Dict[str, Any]:
193
  """๋ชจ๋ธ ์ •๋ณด"""
194
  return {
 
199
  "model_size": self.model_size,
200
  "local_path": self.local_path,
201
  "multimodal": False,
202
+ }