gbrabbit commited on
Commit
d098bcd
ยท
1 Parent(s): b08dfac

Auto commit at 20-2025-08 20:10:37

Browse files
lily_llm_api/app_v2.py CHANGED
@@ -191,7 +191,7 @@ def configure_cpu_threads():
191
  else:
192
  detected = os.cpu_count() or 2
193
  # ์ปจํ…Œ์ด๋„ˆ/์„œ๋ฒ„์˜ vCPU ์ˆ˜๋ฅผ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉํ•˜๋˜ ์ƒํ•œ 8 ์ ์šฉ
194
- threads = max(1, min(detected, 8))
195
 
196
  # OpenMP/MKL/numexpr
197
  os.environ["OMP_NUM_THREADS"] = str(threads)
@@ -225,7 +225,7 @@ def select_model_interactive():
225
  try:
226
  # choice = input(f"\n๐Ÿ“ ์‚ฌ์šฉํ•  ๋ชจ๋ธ ๋ฒˆํ˜ธ๋ฅผ ์„ ํƒํ•˜์„ธ์š” (1-{len(available_models)}): ")
227
  # selected_model = available_models[int(choice) - 1]
228
- selected_model = available_models[0]
229
  print(f"\nโœ… '{selected_model['name']}' ๋ชจ๋ธ์„ ์„ ํƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
230
  return selected_model['model_id']
231
  except (ValueError, IndexError):
@@ -540,6 +540,16 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
540
  gen_config['use_cache'] = True # ์บ์‹œ ์‚ฌ์šฉ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ
541
  gen_config['pad_token_id'] = tokenizer.eos_token_id if tokenizer.eos_token_id else None
542
 
 
 
 
 
 
 
 
 
 
 
543
  print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ƒ์„ฑ ์‹œ์ž‘ - ํ…์ŠคํŠธ๋งŒ")
544
  print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ž…๋ ฅ ํ…์„œ ๋””๋ฐ”์ด์Šค: {input_ids.device}")
545
  print(f"๐Ÿ” [DEBUG] ์ตœ์ข… attention_mask ๋””๋ฐ”์ด์Šค: {attention_mask.device}")
 
191
  else:
192
  detected = os.cpu_count() or 2
193
  # ์ปจํ…Œ์ด๋„ˆ/์„œ๋ฒ„์˜ vCPU ์ˆ˜๋ฅผ ๊ทธ๋Œ€๋กœ ์‚ฌ์šฉํ•˜๋˜ ์ƒํ•œ 8 ์ ์šฉ
194
+ threads = max(1, min(detected, 16))
195
 
196
  # OpenMP/MKL/numexpr
197
  os.environ["OMP_NUM_THREADS"] = str(threads)
 
225
  try:
226
  # choice = input(f"\n๐Ÿ“ ์‚ฌ์šฉํ•  ๋ชจ๋ธ ๋ฒˆํ˜ธ๋ฅผ ์„ ํƒํ•˜์„ธ์š” (1-{len(available_models)}): ")
227
  # selected_model = available_models[int(choice) - 1]
228
+ selected_model = available_models[2]
229
  print(f"\nโœ… '{selected_model['name']}' ๋ชจ๋ธ์„ ์„ ํƒํ–ˆ์Šต๋‹ˆ๋‹ค.")
230
  return selected_model['model_id']
231
  except (ValueError, IndexError):
 
540
  gen_config['use_cache'] = True # ์บ์‹œ ์‚ฌ์šฉ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ
541
  gen_config['pad_token_id'] = tokenizer.eos_token_id if tokenizer.eos_token_id else None
542
 
543
+ # EOS ํ† ํฐ ๊ฐ•์ œ ์„ค์ • - ๋ฌธ์žฅ ๋ ๋ฌธ์ œ ํ•ด๊ฒฐ
544
+ if tokenizer.eos_token_id is not None:
545
+ gen_config['eos_token_id'] = tokenizer.eos_token_id
546
+ print(f"๐Ÿ” [DEBUG] EOS ํ† ํฐ ๊ฐ•์ œ ์„ค์ •: {tokenizer.eos_token} (ID: {tokenizer.eos_token_id})")
547
+ else:
548
+ print(f"โš ๏ธ [DEBUG] EOS ํ† ํฐ์ด ์„ค์ •๋˜์ง€ ์•Š์Œ")
549
+
550
+ # ์ƒ์„ฑ ์„ค์ • ์ตœ์ข… ํ™•์ธ
551
+ print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ƒ์„ฑ ์„ค์ •: {gen_config}")
552
+
553
  print(f"๐Ÿ” [DEBUG] ๋ชจ๋ธ ์ƒ์„ฑ ์‹œ์ž‘ - ํ…์ŠคํŠธ๋งŒ")
554
  print(f"๐Ÿ” [DEBUG] ์ตœ์ข… ์ž…๋ ฅ ํ…์„œ ๋””๋ฐ”์ด์Šค: {input_ids.device}")
555
  print(f"๐Ÿ” [DEBUG] ์ตœ์ข… attention_mask ๋””๋ฐ”์ด์Šค: {attention_mask.device}")
lily_llm_api/models/polyglot_ko_1_3b_chat.py CHANGED
@@ -25,7 +25,7 @@ class PolyglotKo13bChatProfile:
25
  self.model_size = "1.3B"
26
 
27
  def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
28
- """๋ชจ๋ธ ๋กœ๋“œ (๋กœ์ปฌ ์šฐ์„ , ์—†์œผ๋ฉด Hub)"""
29
  logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
30
  try:
31
  use_local = Path(self.local_path).exists() and any(Path(self.local_path).iterdir())
@@ -39,8 +39,21 @@ class PolyglotKo13bChatProfile:
39
  trust_remote_code=True,
40
  local_files_only=use_local,
41
  )
 
 
 
 
 
 
42
  if tokenizer.pad_token is None:
 
43
  tokenizer.pad_token = tokenizer.eos_token
 
 
 
 
 
 
44
 
45
  # CPU์—์„œ๋Š” float32๊ฐ€ ๋” ์•ˆ์ •์ , CUDA์—์„œ๋Š” float16 ์‚ฌ์šฉ
46
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
@@ -60,9 +73,9 @@ class PolyglotKo13bChatProfile:
60
  raise
61
 
62
  def format_prompt(self, user_input: str) -> str:
63
- """ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ… - ์ฑ„ํŒ… ํ˜•์‹ (์ตœ์ ํ™”)"""
64
- # ๋” ๊ฐ„๊ฒฐํ•œ ํ”„๋กฌํ”„ํŠธ๋กœ ํ† ํฐ ์ˆ˜ ์ค„์ž„
65
- prompt = f"""AI ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ๋„์›€์ด ๋˜๊ณ  ์œ ์ตํ•œ ๋‚ด์šฉ์„ ์ œ๊ณตํ•˜์„ธ์š”.
66
 
67
  ### ์‚ฌ์šฉ์ž:
68
  {user_input}
@@ -72,7 +85,7 @@ class PolyglotKo13bChatProfile:
72
  return prompt
73
 
74
  def extract_response(self, full_text: str, formatted_prompt: str = None) -> str:
75
- """์‘๋‹ต ์ถ”์ถœ - ๋” ๊ฐ•๋ ฅํ•œ ๋กœ์ง์œผ๋กœ ๊ฐœ์„ """
76
  logger.info(f"--- Polyglot ์‘๋‹ต ์ถ”์ถœ ์‹œ์ž‘ ---")
77
  logger.info(f"์ „์ฒด ์ƒ์„ฑ ํ…์ŠคํŠธ (Raw): \n---\n{full_text}\n---")
78
  logger.info(f"์‚ฌ์šฉ๋œ ํ”„๋กฌํ”„ํŠธ: {formatted_prompt}")
@@ -82,21 +95,34 @@ class PolyglotKo13bChatProfile:
82
  response = full_text.split("### ์ฑ—๋ด‡:")[-1].strip()
83
  logger.info(f"โœ… ์„ฑ๊ณต: '### ์ฑ—๋ด‡:' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
84
  logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
85
- if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
 
 
86
  return response
 
 
 
87
 
88
  # 2์ˆœ์œ„: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์ถ”์ถœ ์‹œ๋„
89
  if formatted_prompt and formatted_prompt in full_text:
90
  response = full_text.replace(formatted_prompt, "").strip()
91
  logger.info(f"โœ… ์„ฑ๊ณต: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ถ”์ถœ")
92
  logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
93
- if response: # ๋นˆ ๋ฌธ์ž์—ด์ด ์•„๋‹Œ ๊ฒฝ์šฐ์—๋งŒ ๋ฐ˜ํ™˜
 
94
  return response
 
 
95
 
96
  # 3์ˆœ์œ„: ์ผ๋ฐ˜์ ์ธ ํ”„๋กฌํ”„ํŠธ ํŒจํ„ด ์ œ๊ฑฐ ์‹œ๋„
97
  clean_text = full_text.strip()
98
  patterns_to_remove = [
99
- "๋‹น์‹ ์€ AI ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์—๊ฒŒ ๋„์›€์ด ๋˜๊ณ  ์œ ์ตํ•œ ๋‚ด์šฉ์„ ์ œ๊ณตํ•ด์•ผํ•ฉ๋‹ˆ๋‹ค. ๋‹ต๋ณ€์€ ๊ธธ๊ณ  ์ž์„ธํ•˜๋ฉฐ ์นœ์ ˆํ•œ ์„ค๋ช…์„ ๋ง๋ถ™์—ฌ์„œ ์ž‘์„ฑํ•˜์„ธ์š”.",
 
 
 
 
 
100
  "### ์‚ฌ์šฉ์ž:",
101
  "### ์ฑ—๋ด‡:",
102
  "์‚ฌ์šฉ์ž:",
@@ -113,28 +139,81 @@ class PolyglotKo13bChatProfile:
113
  if clean_text and clean_text != full_text:
114
  logger.info("โœ… ์„ฑ๊ณต: ํŒจํ„ด ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ •๋ฆฌ")
115
  logger.info(f"์ •๋ฆฌ๋œ ์‘๋‹ต: {clean_text}")
116
- return clean_text
 
 
 
 
117
 
118
  # 4์ˆœ์œ„: ์ „์ฒด ํ…์ŠคํŠธ์—์„œ ๋ถˆํ•„์š”ํ•œ ๋ถ€๋ถ„๋งŒ ์ œ๊ฑฐ
119
  final_response = full_text.strip()
120
  logger.warning("โš ๏ธ ๊ฒฝ๊ณ : ํŠน๋ณ„ํ•œ ์‘๋‹ต ์ถ”์ถœ ํŒจํ„ด์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ์ •๋ฆฌํ•˜์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
121
  logger.info(f"์ตœ์ข… ๋ฐ˜ํ™˜ ํ…์ŠคํŠธ: {final_response}")
122
- return final_response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
 
124
  def get_generation_config(self) -> Dict[str, Any]:
125
- """์ƒ์„ฑ ์„ค์ • - ์†๋„ ์ตœ์ ํ™”"""
126
  return {
127
- "max_new_tokens": 64, # 128์—์„œ 64๋กœ ์ค„์—ฌ์„œ ์†๋„ ํ–ฅ์ƒ
128
- "temperature": 0.7, # ์ ๋‹นํ•œ ์ฐฝ์˜์„ฑ ์œ ์ง€
129
- "do_sample": True, # ์ƒ˜ํ”Œ๋ง ํ™œ์„ฑํ™”
130
- "top_k": 40, # 50์—์„œ 40์œผ๋กœ ์ค„์—ฌ์„œ ์†๋„ ํ–ฅ์ƒ
131
- "top_p": 0.9, # nucleus sampling ์œ ์ง€
132
- "repetition_penalty": 1.1, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
133
- "no_repeat_ngram_size": 3, # n-gram ๋ฐ˜๋ณต ๋ฐฉ์ง€
134
- "pad_token_id": None, # ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
135
- "eos_token_id": None, # ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
136
- "use_cache": True, # ์บ์‹œ ์‚ฌ์šฉ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ
137
- "max_time": 60.0, # 60์ดˆ ํƒ€์ž„์•„์›ƒ
 
 
138
  }
139
 
140
  def get_model_info(self) -> Dict[str, Any]:
 
25
  self.model_size = "1.3B"
26
 
27
  def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
28
+ """๋ชจ๋ธ ๋กœ๋“œ (ํ† ํฌ๋‚˜์ด์ € ์„ค์ • ์ˆ˜์ •)"""
29
  logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
30
  try:
31
  use_local = Path(self.local_path).exists() and any(Path(self.local_path).iterdir())
 
39
  trust_remote_code=True,
40
  local_files_only=use_local,
41
  )
42
+
43
+ # ํ† ํฌ๋‚˜์ด์ € ์„ค์ • ์ˆ˜์ • - EOS ํ† ํฐ ๋ฌธ์ œ ํ•ด๊ฒฐ
44
+ if tokenizer.eos_token is None:
45
+ logger.warning("โš ๏ธ EOS ํ† ํฐ์ด ์—†์Šต๋‹ˆ๋‹ค. ๋ชจ๋ธ ๊ณต์‹ ๋ฌธ์„œ์— ๋”ฐ๋ผ <|endoftext|> ์„ค์ •")
46
+ tokenizer.eos_token = "<|endoftext|>"
47
+
48
  if tokenizer.pad_token is None:
49
+ logger.warning("โš ๏ธ PAD ํ† ํฐ์ด ์—†์Šต๋‹ˆ๋‹ค. EOS ํ† ํฐ์œผ๋กœ ์„ค์ •")
50
  tokenizer.pad_token = tokenizer.eos_token
51
+
52
+ # ํŠน์ˆ˜ ํ† ํฐ ํ™•์ธ
53
+ logger.info(f"๐Ÿ” ํ† ํฌ๋‚˜์ด์ € ์„ค์ •:")
54
+ logger.info(f" - EOS ํ† ํฐ: {tokenizer.eos_token} (ID: {tokenizer.eos_token_id})")
55
+ logger.info(f" - PAD ํ† ํฐ: {tokenizer.pad_token} (ID: {tokenizer.pad_token_id})")
56
+ logger.info(f" - BOS ํ† ํฐ: {tokenizer.bos_token} (ID: {tokenizer.bos_token_id})")
57
 
58
  # CPU์—์„œ๋Š” float32๊ฐ€ ๋” ์•ˆ์ •์ , CUDA์—์„œ๋Š” float16 ์‚ฌ์šฉ
59
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
 
73
  raise
74
 
75
  def format_prompt(self, user_input: str) -> str:
76
+ """ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ… - ๊ณต์‹ ๋ฌธ์„œ์™€ ์ผ์น˜"""
77
+ # Hugging Face ๋ชจ๋ธ ํŽ˜์ด์ง€์˜ ๊ณต์‹ ํ”„๋กฌํ”„ํŠธ ํ˜•์‹ ์‚ฌ์šฉ
78
+ prompt = f"""๋‹น์‹ ์€ AI ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์—๊ฒŒ ๋„์›€์ด ๋˜๊ณ  ์œ ์ตํ•œ ๋‚ด์šฉ์„ ์ œ๊ณตํ•ด์•ผํ•ฉ๋‹ˆ๋‹ค. ๋‹ต๋ณ€์€ ๊ธธ๊ณ  ์ž์„ธํ•˜๋ฉฐ ์นœ์ ˆํ•œ ์„ค๋ช…์„ ๋ง๋ถ™์—ฌ์„œ ์ž‘์„ฑํ•˜์„ธ์š”.
79
 
80
  ### ์‚ฌ์šฉ์ž:
81
  {user_input}
 
85
  return prompt
86
 
87
  def extract_response(self, full_text: str, formatted_prompt: str = None) -> str:
88
+ """์‘๋‹ต ์ถ”์ถœ - ํ’ˆ์งˆ ๊ฒ€์ฆ ๋ฐ ๊ฐœ์„ """
89
  logger.info(f"--- Polyglot ์‘๋‹ต ์ถ”์ถœ ์‹œ์ž‘ ---")
90
  logger.info(f"์ „์ฒด ์ƒ์„ฑ ํ…์ŠคํŠธ (Raw): \n---\n{full_text}\n---")
91
  logger.info(f"์‚ฌ์šฉ๋œ ํ”„๋กฌํ”„ํŠธ: {formatted_prompt}")
 
95
  response = full_text.split("### ์ฑ—๋ด‡:")[-1].strip()
96
  logger.info(f"โœ… ์„ฑ๊ณต: '### ์ฑ—๋ด‡:' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
97
  logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
98
+
99
+ # ์‘๋‹ต ํ’ˆ์งˆ ๊ฒ€์ฆ
100
+ if self._validate_response_quality(response):
101
  return response
102
+ else:
103
+ logger.warning("โš ๏ธ ์‘๋‹ต ํ’ˆ์งˆ์ด ๋‚ฎ์Šต๋‹ˆ๋‹ค. ํ’ˆ์งˆ ๊ฐœ์„  ์ œ์•ˆ์„ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.")
104
+ return self._improve_response_quality(response)
105
 
106
  # 2์ˆœ์œ„: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์ถ”์ถœ ์‹œ๋„
107
  if formatted_prompt and formatted_prompt in full_text:
108
  response = full_text.replace(formatted_prompt, "").strip()
109
  logger.info(f"โœ… ์„ฑ๊ณต: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ถ”์ถœ")
110
  logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
111
+
112
+ if self._validate_response_quality(response):
113
  return response
114
+ else:
115
+ return self._improve_response_quality(response)
116
 
117
  # 3์ˆœ์œ„: ์ผ๋ฐ˜์ ์ธ ํ”„๋กฌํ”„ํŠธ ํŒจํ„ด ์ œ๊ฑฐ ์‹œ๋„
118
  clean_text = full_text.strip()
119
  patterns_to_remove = [
120
+ "๋‹น์‹ ์€ ํ•œ๊ตญ์–ด AI ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ๋‹ค์Œ ๊ทœ์น™์„ ์—„๊ฒฉํžˆ ๋”ฐ๋ผ์ฃผ์„ธ์š”:",
121
+ "1. ๋ฐ˜๋“œ์‹œ ํ•œ๊ตญ์–ด๋กœ๋งŒ ์‘๋‹ตํ•˜์„ธ์š”",
122
+ "2. ์ž์—ฐ์Šค๋Ÿฝ๊ณ  ์ผ๊ด€์„ฑ ์žˆ๋Š” ๋Œ€ํ™”๋ฅผ ์œ ์ง€ํ•˜์„ธ์š”",
123
+ "3. ์‚ฌ์šฉ์ž์˜ ์งˆ๋ฌธ์— ์ •ํ™•ํ•˜๊ณ  ๋„์›€์ด ๋˜๋Š” ๋‹ต๋ณ€์„ ์ œ๊ณตํ•˜์„ธ์š”",
124
+ "4. ๋ฌธ์žฅ์ด ์ค‘๊ฐ„์— ๋Š๊ธฐ์ง€ ์•Š๋„๋ก ์™„์„ฑ๋œ ๋‹ต๋ณ€์„ ์ž‘์„ฑํ•˜์„ธ์š”",
125
+ "5. ์˜์–ด๋‚˜ ๋‹ค๋ฅธ ์–ธ์–ด๋ฅผ ์‚ฌ์šฉํ•˜์ง€ ๋งˆ์„ธ์š”",
126
  "### ์‚ฌ์šฉ์ž:",
127
  "### ์ฑ—๋ด‡:",
128
  "์‚ฌ์šฉ์ž:",
 
139
  if clean_text and clean_text != full_text:
140
  logger.info("โœ… ์„ฑ๊ณต: ํŒจํ„ด ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ •๋ฆฌ")
141
  logger.info(f"์ •๋ฆฌ๋œ ์‘๋‹ต: {clean_text}")
142
+
143
+ if self._validate_response_quality(clean_text):
144
+ return clean_text
145
+ else:
146
+ return self._improve_response_quality(clean_text)
147
 
148
  # 4์ˆœ์œ„: ์ „์ฒด ํ…์ŠคํŠธ์—์„œ ๋ถˆํ•„์š”ํ•œ ๋ถ€๋ถ„๋งŒ ์ œ๊ฑฐ
149
  final_response = full_text.strip()
150
  logger.warning("โš ๏ธ ๊ฒฝ๊ณ : ํŠน๋ณ„ํ•œ ์‘๋‹ต ์ถ”์ถœ ํŒจํ„ด์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ์ •๋ฆฌํ•˜์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
151
  logger.info(f"์ตœ์ข… ๋ฐ˜ํ™˜ ํ…์ŠคํŠธ: {final_response}")
152
+
153
+ if self._validate_response_quality(final_response):
154
+ return final_response
155
+ else:
156
+ return self._improve_response_quality(final_response)
157
+
158
+ def _validate_response_quality(self, response: str) -> bool:
159
+ """์‘๋‹ต ํ’ˆ์งˆ ๊ฒ€์ฆ"""
160
+ if not response or len(response.strip()) < 5:
161
+ return False
162
+
163
+ # ์˜์–ด๊ฐ€ ํฌํ•จ๋˜์–ด ์žˆ์œผ๋ฉด ํ’ˆ์งˆ ๋‚ฎ์Œ
164
+ if any(char.isascii() and char.isalpha() for char in response):
165
+ return False
166
+
167
+ # ๋ฌธ์žฅ์ด ์ค‘๊ฐ„์— ๋Š์–ด์ง„ ๊ฒฝ์šฐ ํ’ˆ์งˆ ๋‚ฎ์Œ
168
+ if response.endswith(('ํ•˜', '๋Š”', '์„', '๋ฅผ', '์ด', '๊ฐ€', '์˜', '์—', '๋กœ')):
169
+ return False
170
+
171
+ # ์ค‘๋ณต๋œ ๋‹จ์–ด๊ฐ€ ๋งŽ์œผ๋ฉด ํ’ˆ์งˆ ๋‚ฎ์Œ
172
+ words = response.split()
173
+ if len(words) > 3 and len(set(words)) / len(words) < 0.7:
174
+ return False
175
+
176
+ return True
177
+
178
+ def _improve_response_quality(self, response: str) -> str:
179
+ """์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„ """
180
+ # ๊ธฐ๋ณธ ์ •๋ฆฌ
181
+ improved = response.strip()
182
+
183
+ # ์˜์–ด ์ œ๊ฑฐ
184
+ import re
185
+ improved = re.sub(r'[a-zA-Z]+', '', improved)
186
+
187
+ # ์ค‘๋ณต ๊ณต๋ฐฑ ์ œ๊ฑฐ
188
+ improved = re.sub(r'\s+', ' ', improved)
189
+
190
+ # ๋ฌธ์žฅ์ด ์ค‘๊ฐ„์— ๋Š์–ด์ง„ ๊ฒฝ์šฐ ์ฒ˜๋ฆฌ
191
+ if improved.endswith(('ํ•˜', '๋Š”', '์„', '๋ฅผ', '์ด', '๊ฐ€', '์˜', '์—', '๋กœ')):
192
+ improved += '๋‹ˆ๋‹ค.'
193
+
194
+ # ๋„ˆ๋ฌด ์งง์€ ๊ฒฝ์šฐ ๊ธฐ๋ณธ ์‘๋‹ต ์ถ”๊ฐ€
195
+ if len(improved) < 10:
196
+ improved = f"{improved} (์‘๋‹ต์ด ๋„ˆ๋ฌด ์งง์Šต๋‹ˆ๋‹ค. ๋” ์ž์„ธํ•œ ๋‹ต๋ณ€์„ ์›ํ•˜์‹œ๋ฉด ๋‹ค์‹œ ์งˆ๋ฌธํ•ด์ฃผ์„ธ์š”.)"
197
+
198
+ logger.info(f"๐Ÿ”ง ์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„  ์™„๋ฃŒ: {improved}")
199
+ return improved
200
 
201
  def get_generation_config(self) -> Dict[str, Any]:
202
+ """์ƒ์„ฑ ์„ค์ • - ๊ณต์‹ EOS ํ† ํฐ ์‚ฌ์šฉ"""
203
  return {
204
+ "max_new_tokens": 128, # 64์—์„œ 128๋กœ ์ฆ๊ฐ€ํ•˜์—ฌ ์™„์„ฑ๋œ ๋‹ต๋ณ€ ์ƒ์„ฑ
205
+ "temperature": 0.3, # ์ผ๊ด€์„ฑ ํ–ฅ์ƒ
206
+ "do_sample": True, # ์ƒ˜ํ”Œ๋ง ํ™œ์„ฑํ™”
207
+ "top_k": 20, # ํ’ˆ์งˆ ํ–ฅ์ƒ
208
+ "top_p": 0.8, # ์ผ๊ด€์„ฑ ํ–ฅ์ƒ
209
+ "repetition_penalty": 1.2, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
210
+ "no_repeat_ngram_size": 4, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
211
+ "pad_token_id": None, # ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
212
+ "eos_token_id": None, # None์œผ๋กœ ์„ค์ •ํ•˜์—ฌ ๋ชจ๋ธ์ด <|endoftext|> ์ž๋™ ๊ฐ์ง€
213
+ "use_cache": True, # ์บ์‹œ ์‚ฌ์šฉ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ
214
+ "max_time": 60.0, # 60์ดˆ ํƒ€์ž„์•„์›ƒ
215
+ "early_stopping": False, # False๋กœ ์„ค์ •ํ•˜์—ฌ <|endoftext|>๊นŒ์ง€ ์ƒ์„ฑ
216
+ "stopping_criteria": None, # ๊ธฐ๋ณธ ์ •์ง€ ๊ธฐ์ค€ ์‚ฌ์šฉ
217
  }
218
 
219
  def get_model_info(self) -> Dict[str, Any]:
lily_llm_api/models/polyglot_ko_5_8b_chat.py CHANGED
@@ -1,31 +1,33 @@
1
  #!/usr/bin/env python3
2
  """
3
- heegyu/polyglot-ko-5.8b-chat ๋ชจ๋ธ ํ”„๋กœํ•„
 
4
  """
5
 
6
  from typing import Dict, Any, Tuple
7
  import torch
8
  from transformers import AutoTokenizer, AutoModelForCausalLM
9
  import logging
 
 
10
 
11
  logger = logging.getLogger(__name__)
12
 
13
  class PolyglotKo58bChatProfile:
14
- """heegyu/polyglot-ko-5.8b-chat ๋ชจ๋ธ ํ”„๋กœํ•„"""
15
 
16
  def __init__(self):
17
  self.model_name = "heegyu/polyglot-ko-5.8b-chat"
18
  self.local_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
19
- self.display_name = "heegyu/polyglot-ko-5.8b-chat"
20
- self.description = "EleutherAI/polyglot-ko-5.8b๋ฅผ ์—ฌ๋Ÿฌ ํ•œ๊ตญ์–ด instruction ๋ฐ์ดํ„ฐ์…‹์œผ๋กœ ํ•™์Šตํ•œ ๋ชจ๋ธ"
21
  self.language = "ko"
22
  self.model_size = "5.8B"
23
 
24
  def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
25
- """๋ชจ๋ธ ๋กœ๋“œ (๋กœ์ปฌ ์šฐ์„ , ์—†์œผ๋ฉด Hub)"""
26
  logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
27
  try:
28
- from pathlib import Path
29
  use_local = Path(self.local_path).exists() and any(Path(self.local_path).iterdir())
30
  model_path = self.local_path if use_local else self.model_name
31
 
@@ -37,11 +39,25 @@ class PolyglotKo58bChatProfile:
37
  trust_remote_code=True,
38
  local_files_only=use_local,
39
  )
 
 
 
 
 
 
40
  if tokenizer.pad_token is None:
 
41
  tokenizer.pad_token = tokenizer.eos_token
 
 
 
 
 
 
42
 
 
43
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
44
- selected_dtype = torch.float16 if device == 'cuda' else torch.float16
45
 
46
  model = AutoModelForCausalLM.from_pretrained(
47
  model_path,
@@ -57,33 +73,142 @@ class PolyglotKo58bChatProfile:
57
  raise
58
 
59
  def format_prompt(self, user_input: str) -> str:
60
- """ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ…"""
61
- return f"### ์งˆ๋ฌธ: {user_input}\n\n### ๋‹ต๋ณ€:"
 
 
 
 
 
 
 
 
62
 
63
- def extract_response(self, full_text: str, formatted_prompt: str) -> str:
64
- """์‘๋‹ต ์ถ”์ถœ"""
65
- if "### ๋‹ต๋ณ€:" in full_text:
66
- response = full_text.split("### ๋‹ต๋ณ€:")[-1].strip()
67
- else:
68
- if formatted_prompt in full_text:
69
- response = full_text.replace(formatted_prompt, "").strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  else:
71
- response = full_text.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
- return response
 
74
 
75
  def get_generation_config(self) -> Dict[str, Any]:
76
- """์ƒ์„ฑ ์„ค์ •"""
77
  return {
78
- "max_new_tokens": 128,
79
- "temperature": 0.7,
80
- "do_sample": True,
81
- "top_k": 50,
82
- "top_p": 0.9,
83
- "repetition_penalty": 1.1,
84
- "no_repeat_ngram_size": 3,
85
- "pad_token_id": None,
86
- "eos_token_id": None,
 
 
 
 
87
  }
88
 
89
  def get_model_info(self) -> Dict[str, Any]:
 
1
  #!/usr/bin/env python3
2
  """
3
+ Polyglot-ko-5.8b-chat ๋ชจ๋ธ ํ”„๋กœํ•„
4
+ heegyu/polyglot-ko-5.8b-chat ๋ชจ๋ธ์šฉ
5
  """
6
 
7
  from typing import Dict, Any, Tuple
8
  import torch
9
  from transformers import AutoTokenizer, AutoModelForCausalLM
10
  import logging
11
+ import os
12
+ from pathlib import Path
13
 
14
  logger = logging.getLogger(__name__)
15
 
16
  class PolyglotKo58bChatProfile:
17
+ """Polyglot-ko-5.8b-chat ๋ชจ๋ธ ํ”„๋กœํ•„"""
18
 
19
  def __init__(self):
20
  self.model_name = "heegyu/polyglot-ko-5.8b-chat"
21
  self.local_path = "./lily_llm_core/models/polyglot_ko_5_8b_chat"
22
+ self.display_name = "Polyglot-ko-5.8b-chat"
23
+ self.description = "ํ•œ๊ตญ์–ด ์ฑ„ํŒ… ์ „์šฉ ๊ณ ์„ฑ๋Šฅ ๋ชจ๋ธ (5.8B)"
24
  self.language = "ko"
25
  self.model_size = "5.8B"
26
 
27
  def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
28
+ """๋ชจ๋ธ ๋กœ๋“œ (ํ† ํฌ๋‚˜์ด์ € ์„ค์ • ์ˆ˜์ •)"""
29
  logger.info(f"๐Ÿ“ฅ {self.display_name} ๋ชจ๋ธ ๋กœ๋“œ ์ค‘...")
30
  try:
 
31
  use_local = Path(self.local_path).exists() and any(Path(self.local_path).iterdir())
32
  model_path = self.local_path if use_local else self.model_name
33
 
 
39
  trust_remote_code=True,
40
  local_files_only=use_local,
41
  )
42
+
43
+ # ํ† ํฌ๋‚˜์ด์ € ์„ค์ • ์ˆ˜์ • - EOS ํ† ํฐ ๋ฌธ์ œ ํ•ด๊ฒฐ
44
+ if tokenizer.eos_token is None:
45
+ logger.warning("โš ๏ธ EOS ํ† ํฐ์ด ์—†์Šต๋‹ˆ๋‹ค. ๋ชจ๋ธ ๊ณต์‹ ๋ฌธ์„œ์— ๋”ฐ๋ผ <|endoftext|> ์„ค์ •")
46
+ tokenizer.eos_token = "<|endoftext|>"
47
+
48
  if tokenizer.pad_token is None:
49
+ logger.warning("โš ๏ธ PAD ํ† ํฐ์ด ์—†์Šต๋‹ˆ๋‹ค. EOS ํ† ํฐ์œผ๋กœ ์„ค์ •")
50
  tokenizer.pad_token = tokenizer.eos_token
51
+
52
+ # ํŠน์ˆ˜ ํ† ํฐ ํ™•์ธ
53
+ logger.info(f"๐Ÿ” ํ† ํฌ๋‚˜์ด์ € ์„ค์ •:")
54
+ logger.info(f" - EOS ํ† ํฐ: {tokenizer.eos_token} (ID: {tokenizer.eos_token_id})")
55
+ logger.info(f" - PAD ํ† ํฐ: {tokenizer.pad_token} (ID: {tokenizer.pad_token_id})")
56
+ logger.info(f" - BOS ํ† ํฐ: {tokenizer.bos_token} (ID: {tokenizer.bos_token_id})")
57
 
58
+ # CPU์—์„œ๋Š” float32๊ฐ€ ๋” ์•ˆ์ •์ , CUDA์—์„œ๋Š” float16 ์‚ฌ์šฉ
59
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
60
+ selected_dtype = torch.float16 if device == 'cuda' else torch.float32
61
 
62
  model = AutoModelForCausalLM.from_pretrained(
63
  model_path,
 
73
  raise
74
 
75
  def format_prompt(self, user_input: str) -> str:
76
+ """ํ”„๋กฌํ”„ํŠธ ํฌ๋งทํŒ… - ๊ณต์‹ ๋ฌธ์„œ์™€ ์ผ์น˜"""
77
+ # Hugging Face ๋ชจ๋ธ ํŽ˜์ด์ง€์˜ ๊ณต์‹ ํ”„๋กฌํ”„ํŠธ ํ˜•์‹ ์‚ฌ์šฉ
78
+ prompt = f"""๋‹น์‹ ์€ AI ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์—๊ฒŒ ๋„์›€์ด ๋˜๊ณ  ์œ ์ตํ•œ ๋‚ด์šฉ์„ ์ œ๊ณตํ•ด์•ผํ•ฉ๋‹ˆ๋‹ค. ๋‹ต๋ณ€์€ ๊ธธ๊ณ  ์ž์„ธํ•˜๋ฉฐ ์นœ์ ˆํ•œ ์„ค๋ช…์„ ๋ง๋ถ™์—ฌ์„œ ์ž‘์„ฑํ•˜์„ธ์š”.
79
+
80
+ ### ์‚ฌ์šฉ์ž:
81
+ {user_input}
82
+
83
+ ### ์ฑ—๋ด‡:
84
+ """
85
+ return prompt
86
 
87
+ def extract_response(self, full_text: str, formatted_prompt: str = None) -> str:
88
+ """์‘๋‹ต ์ถ”์ถœ - ํ’ˆ์งˆ ๊ฒ€์ฆ ๋ฐ ๊ฐœ์„ """
89
+ logger.info(f"--- Polyglot 5.8B ์‘๋‹ต ์ถ”์ถœ ์‹œ์ž‘ ---")
90
+ logger.info(f"์ „์ฒด ์ƒ์„ฑ ํ…์ŠคํŠธ (Raw): \n---\n{full_text}\n---")
91
+ logger.info(f"์‚ฌ์šฉ๋œ ํ”„๋กฌํ”„ํŠธ: {formatted_prompt}")
92
+
93
+ # 1์ˆœ์œ„: "### ์ฑ—๋ด‡:" ํƒœ๊ทธ๋กœ ์ถ”์ถœ ์‹œ๋„
94
+ if "### ์ฑ—๋ด‡:" in full_text:
95
+ response = full_text.split("### ์ฑ—๋ด‡:")[-1].strip()
96
+ logger.info(f"โœ… ์„ฑ๊ณต: '### ์ฑ—๋ด‡:' ํƒœ๊ทธ๋กœ ์‘๋‹ต ์ถ”์ถœ")
97
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
98
+
99
+ # ์‘๋‹ต ํ’ˆ์งˆ ๊ฒ€์ฆ
100
+ if self._validate_response_quality(response):
101
+ return response
102
+ else:
103
+ logger.warning("โš ๏ธ ์‘๋‹ต ํ’ˆ์งˆ์ด ๋‚ฎ์Šต๋‹ˆ๋‹ค. ํ’ˆ์งˆ ๊ฐœ์„  ์ œ์•ˆ์„ ์ถ”๊ฐ€ํ•ฉ๋‹ˆ๋‹ค.")
104
+ return self._improve_response_quality(response)
105
+
106
+ # 2์ˆœ์œ„: ํ”„๋กฌ๏ฟฝ๏ฟฝ๏ฟฝํŠธ ์ œ๊ฑฐ๋กœ ์ถ”์ถœ ์‹œ๋„
107
+ if formatted_prompt and formatted_prompt in full_text:
108
+ response = full_text.replace(formatted_prompt, "").strip()
109
+ logger.info(f"โœ… ์„ฑ๊ณต: ํ”„๋กฌํ”„ํŠธ ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ถ”์ถœ")
110
+ logger.info(f"์ถ”์ถœ๋œ ์‘๋‹ต: {response}")
111
+
112
+ if self._validate_response_quality(response):
113
+ return response
114
  else:
115
+ return self._improve_response_quality(response)
116
+
117
+ # 3์ˆœ์œ„: ์ผ๋ฐ˜์ ์ธ ํ”„๋กฌํ”„ํŠธ ํŒจํ„ด ์ œ๊ฑฐ ์‹œ๋„
118
+ clean_text = full_text.strip()
119
+ patterns_to_remove = [
120
+ "๋‹น์‹ ์€ AI ์ฑ—๋ด‡์ž…๋‹ˆ๋‹ค. ์‚ฌ์šฉ์ž์—๊ฒŒ ๋„์›€์ด ๋˜๊ณ  ์œ ์ตํ•œ ๋‚ด์šฉ์„ ์ œ๊ณตํ•ด์•ผํ•ฉ๋‹ˆ๋‹ค. ๋‹ต๋ณ€์€ ๊ธธ๊ณ  ์ž์„ธํ•˜๋ฉฐ ์นœ์ ˆํ•œ ์„ค๋ช…์„ ๋ง๋ถ™์—ฌ์„œ ์ž‘์„ฑํ•˜์„ธ์š”.",
121
+ "### ์‚ฌ์šฉ์ž:",
122
+ "### ์ฑ—๋ด‡:",
123
+ "์‚ฌ์šฉ์ž:",
124
+ "์ฑ—๋ด‡:",
125
+ "assistant:",
126
+ "user:"
127
+ ]
128
+
129
+ for pattern in patterns_to_remove:
130
+ clean_text = clean_text.replace(pattern, "")
131
+
132
+ clean_text = clean_text.strip()
133
+
134
+ if clean_text and clean_text != full_text:
135
+ logger.info("โœ… ์„ฑ๊ณต: ํŒจํ„ด ์ œ๊ฑฐ๋กœ ์‘๋‹ต ์ •๋ฆฌ")
136
+ logger.info(f"์ •๋ฆฌ๋œ ์‘๋‹ต: {clean_text}")
137
+
138
+ if self._validate_response_quality(clean_text):
139
+ return clean_text
140
+ else:
141
+ return self._improve_response_quality(clean_text)
142
+
143
+ # 4์ˆœ์œ„: ์ „์ฒด ํ…์ŠคํŠธ์—์„œ ๋ถˆํ•„์š”ํ•œ ๋ถ€๋ถ„๋งŒ ์ œ๊ฑฐ
144
+ final_response = full_text.strip()
145
+ logger.warning("โš ๏ธ ๊ฒฝ๊ณ : ํŠน๋ณ„ํ•œ ์‘๋‹ต ์ถ”์ถœ ํŒจํ„ด์„ ์ฐพ์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค. ์ „์ฒด ํ…์ŠคํŠธ๋ฅผ ์ •๋ฆฌํ•˜์—ฌ ๋ฐ˜ํ™˜ํ•ฉ๋‹ˆ๋‹ค.")
146
+ logger.info(f"์ตœ์ข… ๋ฐ˜ํ™˜ ํ…์ŠคํŠธ: {final_response}")
147
+
148
+ if self._validate_response_quality(final_response):
149
+ return final_response
150
+ else:
151
+ return self._improve_response_quality(final_response)
152
+
153
+ def _validate_response_quality(self, response: str) -> bool:
154
+ """์‘๋‹ต ํ’ˆ์งˆ ๊ฒ€์ฆ"""
155
+ if not response or len(response.strip()) < 5:
156
+ return False
157
+
158
+ # ์˜์–ด๊ฐ€ ํฌํ•จ๋˜์–ด ์žˆ์œผ๋ฉด ํ’ˆ์งˆ ๋‚ฎ์Œ
159
+ if any(char.isascii() and char.isalpha() for char in response):
160
+ return False
161
+
162
+ # ๋ฌธ์žฅ์ด ์ค‘๊ฐ„์— ๋Š์–ด์ง„ ๊ฒฝ์šฐ ํ’ˆ์งˆ ๋‚ฎ์Œ
163
+ if response.endswith(('ํ•˜', '๋Š”', '์„', '๋ฅผ', '์ด', '๊ฐ€', '์˜', '์—', '๋กœ')):
164
+ return False
165
+
166
+ # ์ค‘๋ณต๋œ ๋‹จ์–ด๊ฐ€ ๋งŽ์œผ๋ฉด ํ’ˆ์งˆ ๋‚ฎ์Œ
167
+ words = response.split()
168
+ if len(words) > 3 and len(set(words)) / len(words) < 0.7:
169
+ return False
170
+
171
+ return True
172
+
173
+ def _improve_response_quality(self, response: str) -> str:
174
+ """์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„ """
175
+ # ๊ธฐ๋ณธ ์ •๋ฆฌ
176
+ improved = response.strip()
177
+
178
+ # ์˜์–ด ์ œ๊ฑฐ
179
+ import re
180
+ improved = re.sub(r'[a-zA-Z]+', '', improved)
181
+
182
+ # ์ค‘๋ณต ๊ณต๋ฐฑ ์ œ๊ฑฐ
183
+ improved = re.sub(r'\s+', ' ', improved)
184
+
185
+ # ๋ฌธ์žฅ์ด ์ค‘๊ฐ„์— ๋Š์–ด์ง„ ๊ฒฝ์šฐ ์ฒ˜๋ฆฌ
186
+ if improved.endswith(('ํ•˜', '๋Š”', '์„', '๋ฅผ', '์ด', '๊ฐ€', '์˜', '์—', '๋กœ')):
187
+ improved += '๋‹ˆ๋‹ค.'
188
+
189
+ # ๋„ˆ๋ฌด ์งง์€ ๊ฒฝ์šฐ ๊ธฐ๋ณธ ์‘๋‹ต ์ถ”๊ฐ€
190
+ if len(improved) < 10:
191
+ improved = f"{improved} (์‘๋‹ต์ด ๋„ˆ๋ฌด ์งง์Šต๋‹ˆ๋‹ค. ๋” ์ž์„ธํ•œ ๋‹ต๋ณ€์„ ์›ํ•˜์‹œ๋ฉด ๋‹ค์‹œ ์งˆ๋ฌธํ•ด์ฃผ์„ธ์š”.)"
192
 
193
+ logger.info(f"๐Ÿ”ง ์‘๋‹ต ํ’ˆ์งˆ ๊ฐœ์„  ์™„๋ฃŒ: {improved}")
194
+ return improved
195
 
196
  def get_generation_config(self) -> Dict[str, Any]:
197
+ """์ƒ์„ฑ ์„ค์ • - ๊ณต์‹ EOS ํ† ํฐ ์‚ฌ์šฉ"""
198
  return {
199
+ "max_new_tokens": 128, # 5.8B ๋ชจ๋ธ์€ ๋” ๊ธด ์‘๋‹ต ์ƒ์„ฑ ๊ฐ€๋Šฅ
200
+ "temperature": 0.3, # ์ผ๊ด€์„ฑ ํ–ฅ์ƒ
201
+ "do_sample": True, # ์ƒ˜ํ”Œ๋ง ํ™œ์„ฑํ™”
202
+ "top_k": 20, # ํ’ˆ์งˆ ํ–ฅ์ƒ
203
+ "top_p": 0.8, # ์ผ๊ด€์„ฑ ํ–ฅ์ƒ
204
+ "repetition_penalty": 1.2, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
205
+ "no_repeat_ngram_size": 4, # ๋ฐ˜๋ณต ๋ฐฉ์ง€
206
+ "pad_token_id": None, # ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ’ ์‚ฌ์šฉ
207
+ "eos_token_id": None, # None์œผ๋กœ ์„ค์ •ํ•˜์—ฌ ๋ชจ๋ธ์ด <|endoftext|> ์ž๋™ ๊ฐ์ง€
208
+ "use_cache": True, # ์บ์‹œ ์‚ฌ์šฉ์œผ๋กœ ์†๋„ ํ–ฅ์ƒ
209
+ "max_time": 240.0, # 5.8B ๋ชจ๋ธ์€ ๋” ๊ธด ์‹œ๊ฐ„ ํ•„์š” (120์ดˆ)
210
+ "early_stopping": False, # False๋กœ ์„ค์ •ํ•˜์—ฌ <|endoftext|>๊นŒ์ง€ ์ƒ์„ฑ
211
+ "stopping_criteria": None, # ๊ธฐ๋ณธ ์ •์ง€ ๊ธฐ์ค€ ์‚ฌ์šฉ
212
  }
213
 
214
  def get_model_info(self) -> Dict[str, Any]: