Spaces:
Sleeping
Sleeping
Auto commit at 21-2025-08 0:28:12
Browse files- README.md +2 -0
- lily_llm_api/app_v2.py +39 -12
- lily_llm_api/models/polyglot_ko_1_3b_chat.py +14 -13
- lily_llm_api/models/polyglot_ko_5_8b_chat.py +83 -105
README.md
CHANGED
|
@@ -10,6 +10,8 @@ app_file: app.py
|
|
| 10 |
pinned: false
|
| 11 |
---
|
| 12 |
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# 250819
|
| 15 |
- v1.0.1
|
|
|
|
| 10 |
pinned: false
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# 250820
|
| 14 |
+
- lily llm kanana 3b ๋ฉํฐ๋ชจ๋ฌ ๊ฐ์ , polyglot 1.3b, 5.8b ์๋ต ํ์ง ๊ฐ์
|
| 15 |
|
| 16 |
# 250819
|
| 17 |
- v1.0.1
|
lily_llm_api/app_v2.py
CHANGED
|
@@ -183,6 +183,7 @@ executor = concurrent.futures.ThreadPoolExecutor()
|
|
| 183 |
|
| 184 |
def configure_cpu_threads():
|
| 185 |
"""CPU ์ค๋ ๋ ํ๊ฒฝ ์ต์ ํ (vCPU ์์ ๋ง๊ฒ ์กฐ์ )."""
|
|
|
|
| 186 |
try:
|
| 187 |
# ๊ธฐ๋ณธ๊ฐ: ํ๊ฒฝ๋ณ์ ๋๋ ์์คํ
CPU ์๋ฅผ ์ฌ์ฉํ๋ ๊ณผ๋ํ ์ค๋ ๋ ๋ฐฉ์ง
|
| 188 |
env_threads = os.getenv("CPU_THREADS")
|
|
@@ -190,7 +191,7 @@ def configure_cpu_threads():
|
|
| 190 |
threads = max(1, int(env_threads))
|
| 191 |
else:
|
| 192 |
detected = os.cpu_count() or 2
|
| 193 |
-
# ์ปจํ
์ด๋/์๋ฒ์ vCPU ์๋ฅผ ๊ทธ๋๋ก ์ฌ์ฉํ๋ ์ํ
|
| 194 |
threads = max(1, min(detected, 16))
|
| 195 |
|
| 196 |
# OpenMP/MKL/numexpr
|
|
@@ -213,6 +214,7 @@ def configure_cpu_threads():
|
|
| 213 |
logger.info(f"๐งต CPU thread config -> OMP/MKL/numexpr={threads}, torch_threads={threads}")
|
| 214 |
except Exception as e:
|
| 215 |
logger.warning(f"โ ๏ธ CPU ์ค๋ ๋ ์ค์ ์คํจ: {e}")
|
|
|
|
| 216 |
|
| 217 |
def select_model_interactive():
|
| 218 |
"""์ธํฐ๋ํฐ๋ธ ๋ชจ๋ธ ์ ํ"""
|
|
@@ -225,7 +227,7 @@ def select_model_interactive():
|
|
| 225 |
try:
|
| 226 |
# choice = input(f"\n๐ ์ฌ์ฉํ ๋ชจ๋ธ ๋ฒํธ๋ฅผ ์ ํํ์ธ์ (1-{len(available_models)}): ")
|
| 227 |
# selected_model = available_models[int(choice) - 1]
|
| 228 |
-
selected_model = available_models[
|
| 229 |
print(f"\nโ
'{selected_model['name']}' ๋ชจ๋ธ์ ์ ํํ์ต๋๋ค.")
|
| 230 |
return selected_model['model_id']
|
| 231 |
except (ValueError, IndexError):
|
|
@@ -237,7 +239,12 @@ async def startup_event():
|
|
| 237 |
"""[๋ณต์] ์๋ฒ ์์ ์ ์ธํฐ๋ํฐ๋ธ ๋ชจ๋ธ ์ ํ ๋ฐ ๋ก๋"""
|
| 238 |
global model_loaded
|
| 239 |
# CPU ์ค๋ ๋ ์ต์ ํ ์ ์ฉ
|
| 240 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
selected_model_id = select_model_interactive()
|
| 242 |
try:
|
| 243 |
await load_model_async(selected_model_id)
|
|
@@ -448,7 +455,7 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
| 448 |
return_tensors="pt",
|
| 449 |
padding=True,
|
| 450 |
truncation=True,
|
| 451 |
-
max_length=
|
| 452 |
)
|
| 453 |
if 'token_type_ids' in inputs:
|
| 454 |
del inputs['token_type_ids']
|
|
@@ -533,19 +540,39 @@ def generate_sync(prompt: str, image_data_list: Optional[List[bytes]], max_lengt
|
|
| 533 |
gen_config['max_time'] = 60.0 # 60์ด ํ์์์์ผ๋ก ์กฐ์
|
| 534 |
|
| 535 |
# ์ถ๊ฐ ํ์์์ ์ค์
|
| 536 |
-
gen_config['max_time'] = 60.0 # ๊ฐ์ 60์ด ํ์์์
|
| 537 |
-
print(f"๐ [DEBUG] ๊ฐ์ ํ์์์ ์ค์ :
|
| 538 |
|
| 539 |
# ์ถ๊ฐ ์ฑ๋ฅ ์ต์ ํ ์ค์
|
| 540 |
gen_config['use_cache'] = True # ์บ์ ์ฌ์ฉ์ผ๋ก ์๋ ํฅ์
|
| 541 |
-
gen_config['pad_token_id'] = tokenizer.eos_token_id if tokenizer.eos_token_id else None
|
| 542 |
|
| 543 |
-
#
|
| 544 |
-
if
|
| 545 |
-
|
| 546 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 547 |
else:
|
| 548 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 549 |
|
| 550 |
# ์์ฑ ์ค์ ์ต์ข
ํ์ธ
|
| 551 |
print(f"๐ [DEBUG] ์ต์ข
์์ฑ ์ค์ : {gen_config}")
|
|
|
|
| 183 |
|
| 184 |
def configure_cpu_threads():
|
| 185 |
"""CPU ์ค๋ ๋ ํ๊ฒฝ ์ต์ ํ (vCPU ์์ ๋ง๊ฒ ์กฐ์ )."""
|
| 186 |
+
print(f"๐ [DEBUG] configure_cpu_threads ์์")
|
| 187 |
try:
|
| 188 |
# ๊ธฐ๋ณธ๊ฐ: ํ๊ฒฝ๋ณ์ ๋๋ ์์คํ
CPU ์๋ฅผ ์ฌ์ฉํ๋ ๊ณผ๋ํ ์ค๋ ๋ ๋ฐฉ์ง
|
| 189 |
env_threads = os.getenv("CPU_THREADS")
|
|
|
|
| 191 |
threads = max(1, int(env_threads))
|
| 192 |
else:
|
| 193 |
detected = os.cpu_count() or 2
|
| 194 |
+
# ์ปจํ
์ด๋/์๋ฒ์ vCPU ์๋ฅผ ๊ทธ๋๋ก ์ฌ์ฉํ๋ ์ํ 16 ์ ์ฉ
|
| 195 |
threads = max(1, min(detected, 16))
|
| 196 |
|
| 197 |
# OpenMP/MKL/numexpr
|
|
|
|
| 214 |
logger.info(f"๐งต CPU thread config -> OMP/MKL/numexpr={threads}, torch_threads={threads}")
|
| 215 |
except Exception as e:
|
| 216 |
logger.warning(f"โ ๏ธ CPU ์ค๋ ๋ ์ค์ ์คํจ: {e}")
|
| 217 |
+
print(f"๐ [DEBUG] configure_cpu_threads ์ข
๋ฃ")
|
| 218 |
|
| 219 |
def select_model_interactive():
|
| 220 |
"""์ธํฐ๋ํฐ๋ธ ๋ชจ๋ธ ์ ํ"""
|
|
|
|
| 227 |
try:
|
| 228 |
# choice = input(f"\n๐ ์ฌ์ฉํ ๋ชจ๋ธ ๋ฒํธ๋ฅผ ์ ํํ์ธ์ (1-{len(available_models)}): ")
|
| 229 |
# selected_model = available_models[int(choice) - 1]
|
| 230 |
+
selected_model = available_models[0]
|
| 231 |
print(f"\nโ
'{selected_model['name']}' ๋ชจ๋ธ์ ์ ํํ์ต๋๋ค.")
|
| 232 |
return selected_model['model_id']
|
| 233 |
except (ValueError, IndexError):
|
|
|
|
| 239 |
"""[๋ณต์] ์๋ฒ ์์ ์ ์ธํฐ๋ํฐ๋ธ ๋ชจ๋ธ ์ ํ ๋ฐ ๋ก๋"""
|
| 240 |
global model_loaded
|
| 241 |
# CPU ์ค๋ ๋ ์ต์ ํ ์ ์ฉ
|
| 242 |
+
try:
|
| 243 |
+
configure_cpu_threads()
|
| 244 |
+
except Exception as e:
|
| 245 |
+
logger.error(f"โ CPU ์ค๋ ๋ ์ค์ ์คํจ: {e}")
|
| 246 |
+
print(f"๐ [DEBUG] CPU ์ค๋ ๋ ์ค์ ์คํจ: {e}")
|
| 247 |
+
print(f"๐ [DEBUG] CPU ์ค๋ ๋ ์ค์ ์คํจ: {e}")
|
| 248 |
selected_model_id = select_model_interactive()
|
| 249 |
try:
|
| 250 |
await load_model_async(selected_model_id)
|
|
|
|
| 455 |
return_tensors="pt",
|
| 456 |
padding=True,
|
| 457 |
truncation=True,
|
| 458 |
+
max_length=256,
|
| 459 |
)
|
| 460 |
if 'token_type_ids' in inputs:
|
| 461 |
del inputs['token_type_ids']
|
|
|
|
| 540 |
gen_config['max_time'] = 60.0 # 60์ด ํ์์์์ผ๋ก ์กฐ์
|
| 541 |
|
| 542 |
# ์ถ๊ฐ ํ์์์ ์ค์
|
| 543 |
+
# gen_config['max_time'] = 60.0 # ๊ฐ์ 60์ด ํ์์์
|
| 544 |
+
print(f"๐ [DEBUG] ๊ฐ์ ํ์์์ ์ค์ : {gen_config['max_time']}์ด")
|
| 545 |
|
| 546 |
# ์ถ๊ฐ ์ฑ๋ฅ ์ต์ ํ ์ค์
|
| 547 |
gen_config['use_cache'] = True # ์บ์ ์ฌ์ฉ์ผ๋ก ์๋ ํฅ์
|
|
|
|
| 548 |
|
| 549 |
+
# PAD ํ ํฐ ์ค์ - ๋ชจ๋ธ ํ๋กํ ์ค์ ์ฐ์
|
| 550 |
+
if 'pad_token_id' not in gen_config:
|
| 551 |
+
# ํ๋กํ์ ์ค์ ์ด ์์ ๋๋ง ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ
|
| 552 |
+
if tokenizer.pad_token_id is not None:
|
| 553 |
+
gen_config['pad_token_id'] = tokenizer.pad_token_id
|
| 554 |
+
print(f"๐ [DEBUG] PAD ํ ํฐ ์ค์ : ํ ํฌ๋์ด์ ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ (ID: {tokenizer.pad_token_id})")
|
| 555 |
+
else:
|
| 556 |
+
gen_config['pad_token_id'] = None
|
| 557 |
+
print(f"๐ [DEBUG] PAD ํ ํฐ ์ค์ : None (ํ ํฌ๋์ด์ ์ PAD ํ ํฐ ์์)")
|
| 558 |
+
|
| 559 |
+
# EOS ํ ํฐ ์ค์ - ๋ชจ๋ธ ํ๋กํ ์ค์ ์ฐ์
|
| 560 |
+
if 'eos_token_id' not in gen_config:
|
| 561 |
+
# ํ๋กํ์ ์ค์ ์ด ์์ ๋๋ง ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ
|
| 562 |
+
if tokenizer.eos_token_id is not None:
|
| 563 |
+
gen_config['eos_token_id'] = tokenizer.eos_token_id
|
| 564 |
+
print(f"๐ [DEBUG] EOS ํ ํฐ ์ค์ : ํ ํฌ๋์ด์ ๏ฟฝ๏ฟฝ๋ณธ๊ฐ ์ฌ์ฉ (ID: {tokenizer.eos_token_id})")
|
| 565 |
+
else:
|
| 566 |
+
gen_config['eos_token_id'] = None
|
| 567 |
+
print(f"๐ [DEBUG] EOS ํ ํฐ ์ค์ : None (ํ ํฌ๋์ด์ ์ EOS ํ ํฐ ์์)")
|
| 568 |
else:
|
| 569 |
+
# ํ๋กํ์ ์ด๋ฏธ ์ค์ ๋ ๊ฒฝ์ฐ - ๊ทธ๋๋ก ์ฌ์ฉ
|
| 570 |
+
print(f"๐ [DEBUG] EOS ํ ํฐ ์ค์ : ํ๋กํ ์ค์ ์ฌ์ฉ (ID: {gen_config['eos_token_id']})")
|
| 571 |
+
# ํ๋กํ์์ ์ค์ ํ EOS ํ ํฐ ID๊ฐ ์ ํจํ์ง ํ์ธ
|
| 572 |
+
if gen_config['eos_token_id'] is not None:
|
| 573 |
+
print(f"๐ [DEBUG] EOS ํ ํฐ ID {gen_config['eos_token_id']} ์ฌ์ฉํ์ฌ ์์ฑ ์ค๋จ์ ์ค์ ")
|
| 574 |
+
else:
|
| 575 |
+
print(f"๐ [DEBUG] EOS ํ ํฐ ์์ - max_new_tokens๊น์ง ์์ฑ")
|
| 576 |
|
| 577 |
# ์์ฑ ์ค์ ์ต์ข
ํ์ธ
|
| 578 |
print(f"๐ [DEBUG] ์ต์ข
์์ฑ ์ค์ : {gen_config}")
|
lily_llm_api/models/polyglot_ko_1_3b_chat.py
CHANGED
|
@@ -10,6 +10,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
| 10 |
import logging
|
| 11 |
import os
|
| 12 |
from pathlib import Path
|
|
|
|
| 13 |
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
|
@@ -77,11 +78,11 @@ class PolyglotKo13bChatProfile:
|
|
| 77 |
# Hugging Face ๋ชจ๋ธ ํ์ด์ง์ ๊ณต์ ํ๋กฌํํธ ํ์ ์ฌ์ฉ
|
| 78 |
prompt = f"""๋น์ ์ AI ์ฑ๋ด์
๋๋ค. ์ฌ์ฉ์์๊ฒ ๋์์ด ๋๊ณ ์ ์ตํ ๋ด์ฉ์ ์ ๊ณตํด์ผํฉ๋๋ค. ๋ต๋ณ์ ๊ธธ๊ณ ์์ธํ๋ฉฐ ์น์ ํ ์ค๋ช
์ ๋ง๋ถ์ฌ์ ์์ฑํ์ธ์.
|
| 79 |
|
| 80 |
-
### ์ฌ์ฉ์:
|
| 81 |
-
{user_input}
|
| 82 |
|
| 83 |
-
### ์ฑ๋ด:
|
| 84 |
-
"""
|
| 85 |
return prompt
|
| 86 |
|
| 87 |
def extract_response(self, full_text: str, formatted_prompt: str = None) -> str:
|
|
@@ -161,17 +162,17 @@ class PolyglotKo13bChatProfile:
|
|
| 161 |
return False
|
| 162 |
|
| 163 |
# ์์ด๊ฐ ํฌํจ๋์ด ์์ผ๋ฉด ํ์ง ๋ฎ์
|
| 164 |
-
if any(char.isascii() and char.isalpha() for char in response):
|
| 165 |
-
|
| 166 |
|
| 167 |
# ๋ฌธ์ฅ์ด ์ค๊ฐ์ ๋์ด์ง ๊ฒฝ์ฐ ํ์ง ๋ฎ์
|
| 168 |
if response.endswith(('ํ', '๋', '์', '๋ฅผ', '์ด', '๊ฐ', '์', '์', '๋ก')):
|
| 169 |
return False
|
| 170 |
|
| 171 |
# ์ค๋ณต๋ ๋จ์ด๊ฐ ๋ง์ผ๋ฉด ํ์ง ๋ฎ์
|
| 172 |
-
words = response.split()
|
| 173 |
-
if len(words) > 3 and len(set(words)) / len(words) < 0.7:
|
| 174 |
-
|
| 175 |
|
| 176 |
return True
|
| 177 |
|
|
@@ -181,8 +182,8 @@ class PolyglotKo13bChatProfile:
|
|
| 181 |
improved = response.strip()
|
| 182 |
|
| 183 |
# ์์ด ์ ๊ฑฐ
|
| 184 |
-
|
| 185 |
-
improved = re.sub(r'[a-zA-Z]+', '', improved)
|
| 186 |
|
| 187 |
# ์ค๋ณต ๊ณต๋ฐฑ ์ ๊ฑฐ
|
| 188 |
improved = re.sub(r'\s+', ' ', improved)
|
|
@@ -192,7 +193,7 @@ class PolyglotKo13bChatProfile:
|
|
| 192 |
improved += '๋๋ค.'
|
| 193 |
|
| 194 |
# ๋๋ฌด ์งง์ ๊ฒฝ์ฐ ๊ธฐ๋ณธ ์๋ต ์ถ๊ฐ
|
| 195 |
-
if len(improved) <
|
| 196 |
improved = f"{improved} (์๋ต์ด ๋๋ฌด ์งง์ต๋๋ค. ๋ ์์ธํ ๋ต๋ณ์ ์ํ์๋ฉด ๋ค์ ์ง๋ฌธํด์ฃผ์ธ์.)"
|
| 197 |
|
| 198 |
logger.info(f"๐ง ์๋ต ํ์ง ๊ฐ์ ์๋ฃ: {improved}")
|
|
@@ -209,7 +210,7 @@ class PolyglotKo13bChatProfile:
|
|
| 209 |
"repetition_penalty": 1.2, # ๋ฐ๋ณต ๋ฐฉ์ง
|
| 210 |
"no_repeat_ngram_size": 4, # ๋ฐ๋ณต ๋ฐฉ์ง
|
| 211 |
"pad_token_id": None, # ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ
|
| 212 |
-
"eos_token_id":
|
| 213 |
"use_cache": True, # ์บ์ ์ฌ์ฉ์ผ๋ก ์๋ ํฅ์
|
| 214 |
"max_time": 60.0, # 60์ด ํ์์์
|
| 215 |
"early_stopping": False, # False๋ก ์ค์ ํ์ฌ <|endoftext|>๊น์ง ์์ฑ
|
|
|
|
| 10 |
import logging
|
| 11 |
import os
|
| 12 |
from pathlib import Path
|
| 13 |
+
import re
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
|
|
|
| 78 |
# Hugging Face ๋ชจ๋ธ ํ์ด์ง์ ๊ณต์ ํ๋กฌํํธ ํ์ ์ฌ์ฉ
|
| 79 |
prompt = f"""๋น์ ์ AI ์ฑ๋ด์
๋๋ค. ์ฌ์ฉ์์๊ฒ ๋์์ด ๋๊ณ ์ ์ตํ ๋ด์ฉ์ ์ ๊ณตํด์ผํฉ๋๋ค. ๋ต๋ณ์ ๊ธธ๊ณ ์์ธํ๋ฉฐ ์น์ ํ ์ค๋ช
์ ๋ง๋ถ์ฌ์ ์์ฑํ์ธ์.
|
| 80 |
|
| 81 |
+
### ์ฌ์ฉ์:
|
| 82 |
+
{user_input}
|
| 83 |
|
| 84 |
+
### ์ฑ๋ด:
|
| 85 |
+
"""
|
| 86 |
return prompt
|
| 87 |
|
| 88 |
def extract_response(self, full_text: str, formatted_prompt: str = None) -> str:
|
|
|
|
| 162 |
return False
|
| 163 |
|
| 164 |
# ์์ด๊ฐ ํฌํจ๋์ด ์์ผ๋ฉด ํ์ง ๋ฎ์
|
| 165 |
+
# if any(char.isascii() and char.isalpha() for char in response):
|
| 166 |
+
# return False
|
| 167 |
|
| 168 |
# ๋ฌธ์ฅ์ด ์ค๊ฐ์ ๋์ด์ง ๊ฒฝ์ฐ ํ์ง ๋ฎ์
|
| 169 |
if response.endswith(('ํ', '๋', '์', '๋ฅผ', '์ด', '๊ฐ', '์', '์', '๋ก')):
|
| 170 |
return False
|
| 171 |
|
| 172 |
# ์ค๋ณต๋ ๋จ์ด๊ฐ ๋ง์ผ๋ฉด ํ์ง ๋ฎ์
|
| 173 |
+
# words = response.split()
|
| 174 |
+
# if len(words) > 3 and len(set(words)) / len(words) < 0.7:
|
| 175 |
+
# return False
|
| 176 |
|
| 177 |
return True
|
| 178 |
|
|
|
|
| 182 |
improved = response.strip()
|
| 183 |
|
| 184 |
# ์์ด ์ ๊ฑฐ
|
| 185 |
+
|
| 186 |
+
# improved = re.sub(r'[a-zA-Z]+', '', improved)
|
| 187 |
|
| 188 |
# ์ค๋ณต ๊ณต๋ฐฑ ์ ๊ฑฐ
|
| 189 |
improved = re.sub(r'\s+', ' ', improved)
|
|
|
|
| 193 |
improved += '๋๋ค.'
|
| 194 |
|
| 195 |
# ๋๋ฌด ์งง์ ๊ฒฝ์ฐ ๊ธฐ๋ณธ ์๋ต ์ถ๊ฐ
|
| 196 |
+
if len(improved) < 5:
|
| 197 |
improved = f"{improved} (์๋ต์ด ๋๋ฌด ์งง์ต๋๋ค. ๋ ์์ธํ ๋ต๋ณ์ ์ํ์๋ฉด ๋ค์ ์ง๋ฌธํด์ฃผ์ธ์.)"
|
| 198 |
|
| 199 |
logger.info(f"๐ง ์๋ต ํ์ง ๊ฐ์ ์๋ฃ: {improved}")
|
|
|
|
| 210 |
"repetition_penalty": 1.2, # ๋ฐ๋ณต ๋ฐฉ์ง
|
| 211 |
"no_repeat_ngram_size": 4, # ๋ฐ๋ณต ๋ฐฉ์ง
|
| 212 |
"pad_token_id": None, # ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ
|
| 213 |
+
"eos_token_id": 2, # <|endoftext|> ํ ํฐ ID ๋ช
์์ ์ค์
|
| 214 |
"use_cache": True, # ์บ์ ์ฌ์ฉ์ผ๋ก ์๋ ํฅ์
|
| 215 |
"max_time": 60.0, # 60์ด ํ์์์
|
| 216 |
"early_stopping": False, # False๋ก ์ค์ ํ์ฌ <|endoftext|>๊น์ง ์์ฑ
|
lily_llm_api/models/polyglot_ko_5_8b_chat.py
CHANGED
|
@@ -10,6 +10,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
|
| 10 |
import logging
|
| 11 |
import os
|
| 12 |
from pathlib import Path
|
|
|
|
| 13 |
|
| 14 |
logger = logging.getLogger(__name__)
|
| 15 |
|
|
@@ -23,7 +24,7 @@ class PolyglotKo58bChatProfile:
|
|
| 23 |
self.description = "ํ๊ตญ์ด ์ฑํ
์ ์ฉ ๊ณ ์ฑ๋ฅ ๋ชจ๋ธ (5.8B)"
|
| 24 |
self.language = "ko"
|
| 25 |
self.model_size = "5.8B"
|
| 26 |
-
|
| 27 |
def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
|
| 28 |
"""๋ชจ๋ธ ๋ก๋ (ํ ํฌ๋์ด์ ์ค์ ์์ )"""
|
| 29 |
logger.info(f"๐ฅ {self.display_name} ๋ชจ๋ธ ๋ก๋ ์ค...")
|
|
@@ -45,6 +46,19 @@ class PolyglotKo58bChatProfile:
|
|
| 45 |
logger.warning("โ ๏ธ EOS ํ ํฐ์ด ์์ต๋๋ค. ๋ชจ๋ธ ๊ณต์ ๋ฌธ์์ ๋ฐ๋ผ <|endoftext|> ์ค์ ")
|
| 46 |
tokenizer.eos_token = "<|endoftext|>"
|
| 47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
if tokenizer.pad_token is None:
|
| 49 |
logger.warning("โ ๏ธ PAD ํ ํฐ์ด ์์ต๋๋ค. EOS ํ ํฐ์ผ๋ก ์ค์ ")
|
| 50 |
tokenizer.pad_token = tokenizer.eos_token
|
|
@@ -64,6 +78,7 @@ class PolyglotKo58bChatProfile:
|
|
| 64 |
trust_remote_code=True,
|
| 65 |
torch_dtype=selected_dtype,
|
| 66 |
local_files_only=use_local,
|
|
|
|
| 67 |
).to(device)
|
| 68 |
|
| 69 |
logger.info(f"โ
{self.display_name} ๋ชจ๋ธ ๋ก๋ ์ฑ๊ณต! (device={device}, dtype={selected_dtype})")
|
|
@@ -73,144 +88,107 @@ class PolyglotKo58bChatProfile:
|
|
| 73 |
raise
|
| 74 |
|
| 75 |
def format_prompt(self, user_input: str) -> str:
|
| 76 |
-
"""ํ๋กฌํํธ ํฌ๋งทํ
-
|
| 77 |
-
#
|
| 78 |
-
prompt = f"""๋น์ ์
|
| 79 |
-
|
| 80 |
-
### ์ฌ์ฉ์:
|
| 81 |
-
{user_input}
|
| 82 |
|
| 83 |
-
|
| 84 |
-
"""
|
| 85 |
return prompt
|
| 86 |
|
| 87 |
def extract_response(self, full_text: str, formatted_prompt: str = None) -> str:
|
| 88 |
"""์๋ต ์ถ์ถ - ํ์ง ๊ฒ์ฆ ๋ฐ ๊ฐ์ """
|
| 89 |
-
logger.info(f"--- Polyglot
|
| 90 |
logger.info(f"์ ์ฒด ์์ฑ ํ
์คํธ (Raw): \n---\n{full_text}\n---")
|
| 91 |
logger.info(f"์ฌ์ฉ๋ ํ๋กฌํํธ: {formatted_prompt}")
|
| 92 |
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
|
|
|
| 102 |
else:
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
|
|
|
| 110 |
logger.info(f"์ถ์ถ๋ ์๋ต: {response}")
|
| 111 |
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
# 3์์: ์ผ๋ฐ์ ์ธ ํ๋กฌํํธ ํจํด ์ ๊ฑฐ ์๋
|
| 118 |
-
clean_text = full_text.strip()
|
| 119 |
-
patterns_to_remove = [
|
| 120 |
-
"๋น์ ์ AI ์ฑ๋ด์
๋๋ค. ์ฌ์ฉ์์๊ฒ ๋์์ด ๋๊ณ ์ ์ตํ ๋ด์ฉ์ ์ ๊ณตํด์ผํฉ๋๋ค. ๋ต๋ณ์ ๊ธธ๊ณ ์์ธํ๋ฉฐ ์น์ ํ ์ค๋ช
์ ๋ง๋ถ์ฌ์ ์์ฑํ์ธ์.",
|
| 121 |
-
"### ์ฌ์ฉ์:",
|
| 122 |
-
"### ์ฑ๋ด:",
|
| 123 |
-
"์ฌ์ฉ์:",
|
| 124 |
-
"์ฑ๋ด:",
|
| 125 |
-
"assistant:",
|
| 126 |
-
"user:"
|
| 127 |
-
]
|
| 128 |
-
|
| 129 |
-
for pattern in patterns_to_remove:
|
| 130 |
-
clean_text = clean_text.replace(pattern, "")
|
| 131 |
-
|
| 132 |
-
clean_text = clean_text.strip()
|
| 133 |
-
|
| 134 |
-
if clean_text and clean_text != full_text:
|
| 135 |
-
logger.info("โ
์ฑ๊ณต: ํจํด ์ ๊ฑฐ๋ก ์๋ต ์ ๋ฆฌ")
|
| 136 |
-
logger.info(f"์ ๋ฆฌ๋ ์๋ต: {clean_text}")
|
| 137 |
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
logger.info(f"์ต์ข
๋ฐํ ํ
์คํธ: {final_response}")
|
| 147 |
-
|
| 148 |
-
if self._validate_response_quality(final_response):
|
| 149 |
-
return final_response
|
| 150 |
-
else:
|
| 151 |
-
return self._improve_response_quality(final_response)
|
| 152 |
|
| 153 |
def _validate_response_quality(self, response: str) -> bool:
|
| 154 |
-
"""์๋ต ํ์ง ๊ฒ์ฆ"""
|
| 155 |
-
if not response or len(response.strip()) < 5
|
| 156 |
return False
|
| 157 |
|
| 158 |
-
# ์์ด๊ฐ ํฌํจ๋์ด
|
| 159 |
-
if any(char.isascii() and char.isalpha() for char in response):
|
| 160 |
-
|
| 161 |
|
| 162 |
-
# ๋ฌธ์ฅ์ด ์ค๊ฐ์ ๋์ด์ง
|
| 163 |
-
if response.endswith(('ํ', '๋', '์', '๋ฅผ', '์ด', '๊ฐ', '์', '์', '๋ก')):
|
| 164 |
-
|
| 165 |
|
| 166 |
-
# ์ค๋ณต๋ ๋จ์ด๊ฐ
|
| 167 |
-
words = response.split()
|
| 168 |
-
if len(words) > 3 and len(set(words)) / len(words) < 0.7:
|
| 169 |
-
|
| 170 |
|
| 171 |
return True
|
| 172 |
|
| 173 |
def _improve_response_quality(self, response: str) -> str:
|
| 174 |
-
"""์๋ต ํ์ง ๊ฐ์ """
|
| 175 |
-
# ๊ธฐ๋ณธ
|
| 176 |
improved = response.strip()
|
| 177 |
|
| 178 |
-
#
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
# ์ค๋ณต ๊ณต๋ฐฑ ์ ๊ฑฐ
|
| 183 |
-
improved = re.sub(r'\s+', ' ', improved)
|
| 184 |
-
|
| 185 |
-
# ๋ฌธ์ฅ์ด ์ค๊ฐ์ ๋์ด์ง ๊ฒฝ์ฐ ์ฒ๋ฆฌ
|
| 186 |
-
if improved.endswith(('ํ', '๋', '์', '๋ฅผ', '์ด', '๊ฐ', '์', '์', '๋ก')):
|
| 187 |
-
improved += '๋๋ค.'
|
| 188 |
-
|
| 189 |
-
# ๋๋ฌด ์งง์ ๊ฒฝ์ฐ ๊ธฐ๋ณธ ์๋ต ์ถ๊ฐ
|
| 190 |
-
if len(improved) < 10:
|
| 191 |
-
improved = f"{improved} (์๋ต์ด ๋๋ฌด ์งง์ต๋๋ค. ๋ ์์ธํ ๋ต๋ณ์ ์ํ์๋ฉด ๋ค์ ์ง๋ฌธํด์ฃผ์ธ์.)"
|
| 192 |
|
| 193 |
logger.info(f"๐ง ์๋ต ํ์ง ๊ฐ์ ์๋ฃ: {improved}")
|
| 194 |
return improved
|
| 195 |
|
| 196 |
def get_generation_config(self) -> Dict[str, Any]:
|
| 197 |
-
"""์์ฑ ์ค์ -
|
| 198 |
return {
|
| 199 |
-
"max_new_tokens":
|
| 200 |
-
"temperature": 0.
|
| 201 |
"do_sample": True, # ์ํ๋ง ํ์ฑํ
|
| 202 |
-
"top_k":
|
| 203 |
-
"top_p": 0.
|
| 204 |
-
"repetition_penalty": 1.
|
| 205 |
-
"no_repeat_ngram_size":
|
| 206 |
"pad_token_id": None, # ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ
|
| 207 |
-
"eos_token_id":
|
| 208 |
"use_cache": True, # ์บ์ ์ฌ์ฉ์ผ๋ก ์๋ ํฅ์
|
| 209 |
-
"max_time":
|
| 210 |
-
"early_stopping": False, # False๋ก ์ค์ ํ์ฌ
|
| 211 |
"stopping_criteria": None, # ๊ธฐ๋ณธ ์ ์ง ๊ธฐ์ค ์ฌ์ฉ
|
|
|
|
|
|
|
| 212 |
}
|
| 213 |
-
|
| 214 |
def get_model_info(self) -> Dict[str, Any]:
|
| 215 |
"""๋ชจ๋ธ ์ ๋ณด"""
|
| 216 |
return {
|
|
@@ -221,4 +199,4 @@ class PolyglotKo58bChatProfile:
|
|
| 221 |
"model_size": self.model_size,
|
| 222 |
"local_path": self.local_path,
|
| 223 |
"multimodal": False,
|
| 224 |
-
}
|
|
|
|
| 10 |
import logging
|
| 11 |
import os
|
| 12 |
from pathlib import Path
|
| 13 |
+
import re
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
|
|
|
| 24 |
self.description = "ํ๊ตญ์ด ์ฑํ
์ ์ฉ ๊ณ ์ฑ๋ฅ ๋ชจ๋ธ (5.8B)"
|
| 25 |
self.language = "ko"
|
| 26 |
self.model_size = "5.8B"
|
| 27 |
+
|
| 28 |
def load_model(self) -> Tuple[AutoModelForCausalLM, AutoTokenizer]:
|
| 29 |
"""๋ชจ๋ธ ๋ก๋ (ํ ํฌ๋์ด์ ์ค์ ์์ )"""
|
| 30 |
logger.info(f"๐ฅ {self.display_name} ๋ชจ๋ธ ๋ก๋ ์ค...")
|
|
|
|
| 46 |
logger.warning("โ ๏ธ EOS ํ ํฐ์ด ์์ต๋๋ค. ๋ชจ๋ธ ๊ณต์ ๋ฌธ์์ ๋ฐ๋ผ <|endoftext|> ์ค์ ")
|
| 47 |
tokenizer.eos_token = "<|endoftext|>"
|
| 48 |
|
| 49 |
+
# 5.8B ๋ชจ๋ธ ํน๋ณ ์ค์
|
| 50 |
+
if hasattr(tokenizer, 'add_eos_token'):
|
| 51 |
+
tokenizer.add_eos_token = False # ์๋ EOS ํ ํฐ ์ถ๊ฐ ๋นํ์ฑํ
|
| 52 |
+
logger.info("โ
์๋ EOS ํ ํฐ ์ถ๊ฐ ๋นํ์ฑํ๋จ")
|
| 53 |
+
|
| 54 |
+
# EOS ํ ํฐ ์ฒ๋ฆฌ ๊ฐ์
|
| 55 |
+
if tokenizer.eos_token == "<|endoftext|>":
|
| 56 |
+
logger.info("โ
<|endoftext|> EOS ํ ํฐ ํ์ธ๋จ")
|
| 57 |
+
# EOS ํ ํฐ์ ํน๋ณํ๊ฒ ์ฒ๋ฆฌํ์ง ์๋๋ก ์ค์
|
| 58 |
+
if hasattr(tokenizer, 'eos_token_id'):
|
| 59 |
+
logger.info(f"โ
EOS ํ ํฐ ID: {tokenizer.eos_token_id}")
|
| 60 |
+
|
| 61 |
+
# PAD ํ ํฐ ์ค์
|
| 62 |
if tokenizer.pad_token is None:
|
| 63 |
logger.warning("โ ๏ธ PAD ํ ํฐ์ด ์์ต๋๋ค. EOS ํ ํฐ์ผ๋ก ์ค์ ")
|
| 64 |
tokenizer.pad_token = tokenizer.eos_token
|
|
|
|
| 78 |
trust_remote_code=True,
|
| 79 |
torch_dtype=selected_dtype,
|
| 80 |
local_files_only=use_local,
|
| 81 |
+
low_cpu_mem_usage=True, # CPU ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋ ์ต์ ํ
|
| 82 |
).to(device)
|
| 83 |
|
| 84 |
logger.info(f"โ
{self.display_name} ๋ชจ๋ธ ๋ก๋ ์ฑ๊ณต! (device={device}, dtype={selected_dtype})")
|
|
|
|
| 88 |
raise
|
| 89 |
|
| 90 |
def format_prompt(self, user_input: str) -> str:
|
| 91 |
+
"""ํ๋กฌํํธ ํฌ๋งทํ
- 5.8B ๋ชจ๋ธ ์ต์ ํ"""
|
| 92 |
+
# 5.8B ๋ชจ๋ธ์ ๋ ์ ํฉํ ํ๋กฌํํธ ํ์
|
| 93 |
+
prompt = f"""๋น์ ์ ์น๊ทผํ๊ณ ๋์์ด ๋๋ AI ์ฑ๋ด์
๋๋ค. ์ฌ์ฉ์์ ์ง๋ฌธ์ ๋ํด ์ ํํ๊ณ ์์ธํ๋ฉฐ ์น์ ํ๊ฒ ๋ต๋ณํด์ฃผ์ธ์.
|
|
|
|
|
|
|
|
|
|
| 94 |
|
| 95 |
+
์ฌ์ฉ์: {user_input}
|
| 96 |
+
AI: """
|
| 97 |
return prompt
|
| 98 |
|
| 99 |
def extract_response(self, full_text: str, formatted_prompt: str = None) -> str:
|
| 100 |
"""์๋ต ์ถ์ถ - ํ์ง ๊ฒ์ฆ ๋ฐ ๊ฐ์ """
|
| 101 |
+
logger.info(f"--- Polyglot ์๋ต ์ถ์ถ ์์ ---")
|
| 102 |
logger.info(f"์ ์ฒด ์์ฑ ํ
์คํธ (Raw): \n---\n{full_text}\n---")
|
| 103 |
logger.info(f"์ฌ์ฉ๋ ํ๋กฌํํธ: {formatted_prompt}")
|
| 104 |
|
| 105 |
+
try:
|
| 106 |
+
# ์๋ก์ด ํ๋กฌํํธ ํ์์ ๋ง๋ ์๋ต ์ถ์ถ
|
| 107 |
+
if "AI: " in full_text:
|
| 108 |
+
# "AI: " ์ดํ์ ํ
์คํธ๋ฅผ ์๋ต์ผ๋ก ์ถ์ถ
|
| 109 |
+
response = full_text.split("AI: ")[-1].strip()
|
| 110 |
+
logger.info(f"โ
์ฑ๊ณต: 'AI:' ํ๊ทธ๋ก ์๋ต ์ถ์ถ")
|
| 111 |
+
elif "### ์ฑ๋ด:" in full_text:
|
| 112 |
+
# ๊ธฐ์กด ํ์๋ ์ง์
|
| 113 |
+
response = full_text.split("### ์ฑ๋ด:")[-1].strip()
|
| 114 |
+
logger.info(f"โ
์ฑ๊ณต: '### ์ฑ๋ด:' ํ๊ทธ๋ก ์๋ต ์ถ์ถ")
|
| 115 |
else:
|
| 116 |
+
# ํ๋กฌํํธ ์ ๊ฑฐ ๋ฐฉ์
|
| 117 |
+
clean_text = full_text.strip()
|
| 118 |
+
if formatted_prompt:
|
| 119 |
+
response = clean_text.replace(formatted_prompt, "").strip()
|
| 120 |
+
else:
|
| 121 |
+
response = clean_text
|
| 122 |
+
logger.info(f"โ
์ฑ๊ณต: ํ๋กฌํํธ ์ ๊ฑฐ๋ก ์๋ต ์ถ์ถ")
|
| 123 |
+
|
| 124 |
logger.info(f"์ถ์ถ๋ ์๋ต: {response}")
|
| 125 |
|
| 126 |
+
# ์๋ต ํ์ง ๊ฒ์ฆ ๋ฐ ๊ฐ์
|
| 127 |
+
if not self._validate_response_quality(response):
|
| 128 |
+
logger.warning(f"โ ๏ธ ์๋ต ํ์ง์ด ๋ฎ์ต๋๋ค. ํ์ง ๊ฐ์ ์ ์์ ์ถ๊ฐํฉ๋๋ค.")
|
| 129 |
+
response = self._improve_response_quality(response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
return response
|
| 132 |
+
|
| 133 |
+
except Exception as e:
|
| 134 |
+
logger.error(f"โ ์๋ต ์ถ์ถ ์ค ์ค๋ฅ: {e}")
|
| 135 |
+
# ํด๋ฐฑ: ์ ์ฒด ํ
์คํธ์์ ํ๋กฌํํธ ๋ถ๋ถ๋ง ์ ๊ฑฐ
|
| 136 |
+
if formatted_prompt:
|
| 137 |
+
return full_text.replace(formatted_prompt, "").strip()
|
| 138 |
+
return full_text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
def _validate_response_quality(self, response: str) -> bool:
|
| 141 |
+
"""์๋ต ํ์ง ๊ฒ์ฆ - 5.8B ๋ชจ๋ธ ์ต์ ํ"""
|
| 142 |
+
if not response or len(response.strip()) < 3: # 5โ3์ผ๋ก ์ํ
|
| 143 |
return False
|
| 144 |
|
| 145 |
+
# ์์ด๊ฐ ํฌํจ๋์ด ์์ด๋ ํ์ฉ (5.8B ๋ชจ๋ธ์ ์์ด๋ ์ํจ)
|
| 146 |
+
# if any(char.isascii() and char.isalpha() for char in response):
|
| 147 |
+
# return False
|
| 148 |
|
| 149 |
+
# ๋ฌธ์ฅ์ด ์ค๊ฐ์ ๋์ด์ง ๊ฒฝ์ฐ๋ ํ์ฉ (5.8B ๋ชจ๋ธ์ ์์ฐ์ค๋ฝ๊ฒ ์์ฑ)
|
| 150 |
+
# if response.endswith(('ํ', '๋', '์', '๋ฅผ', '์ด', '๊ฐ', '์', '์', '๋ก')):
|
| 151 |
+
# return False
|
| 152 |
|
| 153 |
+
# ์ค๋ณต๋ ๋จ์ด๊ฐ ๋ง์๋ ํ์ฉ (5.8B ๋ชจ๋ธ์ ๋ฌธ๋งฅ์ ๋ฐ๋ณต์ด ์์ฐ์ค๋ฌ์ธ ์ ์์)
|
| 154 |
+
# words = response.split()
|
| 155 |
+
# if len(words) > 3 and len(set(words)) / len(words) < 0.7:
|
| 156 |
+
# return False
|
| 157 |
|
| 158 |
return True
|
| 159 |
|
| 160 |
def _improve_response_quality(self, response: str) -> str:
|
| 161 |
+
"""์๋ต ํ์ง ๊ฐ์ - 5.8B ๋ชจ๋ธ ์ต์ ํ"""
|
| 162 |
+
# ๊ธฐ๋ณธ ์ ๋ฆฌ๋ง ์ํ
|
| 163 |
improved = response.strip()
|
| 164 |
|
| 165 |
+
# ๋๋ฌด ์งง์ ๊ฒฝ์ฐ์๋ง ๊ธฐ๋ณธ ์๋ต ์ถ๊ฐ
|
| 166 |
+
if len(improved) < 5:
|
| 167 |
+
improved = f"{improved} (๋ ์์ธํ ๋ต๋ณ์ ์ํ์๋ฉด ๋ค์ ์ง๋ฌธํด์ฃผ์ธ์.)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 168 |
|
| 169 |
logger.info(f"๐ง ์๋ต ํ์ง ๊ฐ์ ์๋ฃ: {improved}")
|
| 170 |
return improved
|
| 171 |
|
| 172 |
def get_generation_config(self) -> Dict[str, Any]:
|
| 173 |
+
"""์์ฑ ์ค์ - 5.8B ๋ชจ๋ธ ์ต์ ํ"""
|
| 174 |
return {
|
| 175 |
+
"max_new_tokens": 512, # 5.8B ๋ชจ๋ธ์ ๋ ๊ธด ์๋ต ์์ฑ ๊ฐ๋ฅ
|
| 176 |
+
"temperature": 0.8, # ์ฐฝ์์ฑ ํฅ์
|
| 177 |
"do_sample": True, # ์ํ๋ง ํ์ฑํ
|
| 178 |
+
"top_k": 40, # ๋ ๋ค์ํ ์ ํ
|
| 179 |
+
"top_p": 0.95, # ๋ ์์ฐ์ค๋ฌ์ด ์๋ต
|
| 180 |
+
"repetition_penalty": 1.05, # ๋ฐ๋ณต ๋ฐฉ์ง
|
| 181 |
+
"no_repeat_ngram_size": 2, # ๋ฐ๋ณต ๋ฐฉ์ง
|
| 182 |
"pad_token_id": None, # ๋ชจ๋ธ ๊ธฐ๋ณธ๊ฐ ์ฌ์ฉ
|
| 183 |
+
"eos_token_id": 2, # <|endoftext|> ํ ํฐ ID ๋ช
์์ ์ค์
|
| 184 |
"use_cache": True, # ์บ์ ์ฌ์ฉ์ผ๋ก ์๋ ํฅ์
|
| 185 |
+
"max_time": 280.0, # 5.8B ๋ชจ๋ธ์ ๋ ๊ธด ์๊ฐ ํ์
|
| 186 |
+
"early_stopping": False, # False๋ก ์ค์ ํ์ฌ ์์ฐ์ค๋ฝ๊ฒ ์์ฑ
|
| 187 |
"stopping_criteria": None, # ๊ธฐ๋ณธ ์ ์ง ๊ธฐ์ค ์ฌ์ฉ
|
| 188 |
+
"min_length": 50, # ์ต์ ๊ธธ์ด ๋ณด์ฅ (20โ50)
|
| 189 |
+
"num_beams": 1, # ๋จ์ผ ๋น์ผ๋ก ๋น ๋ฅธ ์์ฑ
|
| 190 |
}
|
| 191 |
+
|
| 192 |
def get_model_info(self) -> Dict[str, Any]:
|
| 193 |
"""๋ชจ๋ธ ์ ๋ณด"""
|
| 194 |
return {
|
|
|
|
| 199 |
"model_size": self.model_size,
|
| 200 |
"local_path": self.local_path,
|
| 201 |
"multimodal": False,
|
| 202 |
+
}
|