Spaces:
Sleeping
Sleeping
Auto commit at 09-2025-08 7:58:00
Browse files
lily_llm_api/app_v2.py
CHANGED
|
@@ -65,7 +65,6 @@ app.add_middleware(
|
|
| 65 |
"http://127.0.0.1:8001",
|
| 66 |
"http://localhost:3000",
|
| 67 |
"http://127.0.0.1:3000",
|
| 68 |
-
"https://hearthchat-production.up.railway.app",
|
| 69 |
"*" # ๊ฐ๋ฐ ์ค์๋ ๋ชจ๋ origin ํ์ฉ
|
| 70 |
],
|
| 71 |
allow_credentials=True,
|
|
@@ -486,7 +485,7 @@ async def generate_multimodal(
|
|
| 486 |
return_tensors="pt",
|
| 487 |
padding=True,
|
| 488 |
truncation=True,
|
| 489 |
-
max_length=
|
| 490 |
)
|
| 491 |
|
| 492 |
if 'token_type_ids' in inputs:
|
|
@@ -558,10 +557,10 @@ async def generate_multimodal(
|
|
| 558 |
top_k=40,
|
| 559 |
top_p=top_p,
|
| 560 |
repetition_penalty=1.1,
|
| 561 |
-
|
| 562 |
pad_token_id=tokenizer.eos_token_id,
|
| 563 |
eos_token_id=tokenizer.eos_token_id,
|
| 564 |
-
|
| 565 |
)
|
| 566 |
logger.info("โ
์ค์ ๋ฉํฐ๋ชจ๋ฌ ์์ฑ ์ฑ๊ณต!")
|
| 567 |
|
|
@@ -579,9 +578,9 @@ async def generate_multimodal(
|
|
| 579 |
enhanced_inputs = tokenizer(
|
| 580 |
enhanced_formatted_prompt,
|
| 581 |
return_tensors="pt",
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
max_length=
|
| 585 |
)
|
| 586 |
|
| 587 |
if 'token_type_ids' in enhanced_inputs:
|
|
@@ -598,10 +597,10 @@ async def generate_multimodal(
|
|
| 598 |
top_k=40,
|
| 599 |
top_p=top_p,
|
| 600 |
repetition_penalty=1.1,
|
| 601 |
-
|
| 602 |
pad_token_id=tokenizer.eos_token_id,
|
| 603 |
eos_token_id=tokenizer.eos_token_id,
|
| 604 |
-
|
| 605 |
)
|
| 606 |
else:
|
| 607 |
# ํ
์คํธ ์ ์ฉ ์์ฑ
|
|
@@ -614,11 +613,11 @@ async def generate_multimodal(
|
|
| 614 |
temperature=temperature,
|
| 615 |
top_k=40,
|
| 616 |
top_p=top_p,
|
| 617 |
-
|
| 618 |
-
|
| 619 |
pad_token_id=tokenizer.eos_token_id,
|
| 620 |
eos_token_id=tokenizer.eos_token_id,
|
| 621 |
-
|
| 622 |
)
|
| 623 |
|
| 624 |
# ์๋ต ์ถ์ถ
|
|
|
|
| 65 |
"http://127.0.0.1:8001",
|
| 66 |
"http://localhost:3000",
|
| 67 |
"http://127.0.0.1:3000",
|
|
|
|
| 68 |
"*" # ๊ฐ๋ฐ ์ค์๋ ๋ชจ๋ origin ํ์ฉ
|
| 69 |
],
|
| 70 |
allow_credentials=True,
|
|
|
|
| 485 |
return_tensors="pt",
|
| 486 |
padding=True,
|
| 487 |
truncation=True,
|
| 488 |
+
max_length=100,
|
| 489 |
)
|
| 490 |
|
| 491 |
if 'token_type_ids' in inputs:
|
|
|
|
| 557 |
top_k=40,
|
| 558 |
top_p=top_p,
|
| 559 |
repetition_penalty=1.1,
|
| 560 |
+
no_repeat_ngram_size=2,
|
| 561 |
pad_token_id=tokenizer.eos_token_id,
|
| 562 |
eos_token_id=tokenizer.eos_token_id,
|
| 563 |
+
use_cache=True
|
| 564 |
)
|
| 565 |
logger.info("โ
์ค์ ๋ฉํฐ๋ชจ๋ฌ ์์ฑ ์ฑ๊ณต!")
|
| 566 |
|
|
|
|
| 578 |
enhanced_inputs = tokenizer(
|
| 579 |
enhanced_formatted_prompt,
|
| 580 |
return_tensors="pt",
|
| 581 |
+
padding=True,
|
| 582 |
+
truncation=True,
|
| 583 |
+
max_length=256
|
| 584 |
)
|
| 585 |
|
| 586 |
if 'token_type_ids' in enhanced_inputs:
|
|
|
|
| 597 |
top_k=40,
|
| 598 |
top_p=top_p,
|
| 599 |
repetition_penalty=1.1,
|
| 600 |
+
no_repeat_ngram_size=2,
|
| 601 |
pad_token_id=tokenizer.eos_token_id,
|
| 602 |
eos_token_id=tokenizer.eos_token_id,
|
| 603 |
+
use_cache=True
|
| 604 |
)
|
| 605 |
else:
|
| 606 |
# ํ
์คํธ ์ ์ฉ ์์ฑ
|
|
|
|
| 613 |
temperature=temperature,
|
| 614 |
top_k=40,
|
| 615 |
top_p=top_p,
|
| 616 |
+
repetition_penalty=1.1,
|
| 617 |
+
no_repeat_ngram_size=2,
|
| 618 |
pad_token_id=tokenizer.eos_token_id,
|
| 619 |
eos_token_id=tokenizer.eos_token_id,
|
| 620 |
+
use_cache=True
|
| 621 |
)
|
| 622 |
|
| 623 |
# ์๋ต ์ถ์ถ
|
lily_llm_api/models/kanana_1_5_v_3b_instruct.py
CHANGED
|
@@ -16,8 +16,6 @@ HF_TOKEN = os.getenv("HF_TOKEN")
|
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 18 |
|
| 19 |
-
max_new_tokens = 64
|
| 20 |
-
|
| 21 |
class Kanana15V3bInstructProfile:
|
| 22 |
"""Kanana-1.5-v-3b-instruct ๋ชจ๋ธ ํ๋กํ"""
|
| 23 |
|
|
@@ -29,11 +27,11 @@ class Kanana15V3bInstructProfile:
|
|
| 29 |
if self.is_local:
|
| 30 |
self.model_name = "gbrabbit/lily-math-model" # ๋ก์ปฌ์์๋ HF ๋ชจ๋ธ๋ช
์ฌ์ฉ
|
| 31 |
self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
|
| 32 |
-
self.display_name = "
|
| 33 |
else:
|
| 34 |
self.model_name = "gbrabbit/lily-math-model" # Hugging Face Hub ๋ชจ๋ธ ๊ฒฝ๋ก
|
| 35 |
self.local_path = None # ์๋ฒ์์๋ ๋ก์ปฌ ๊ฒฝ๋ก ์ฌ์ฉ ์ํจ
|
| 36 |
-
self.display_name = "
|
| 37 |
|
| 38 |
self.description = "์นด์นด์ค ๋ฉํฐ๋ชจ๋ฌ ๋ชจ๋ธ (3.6B) - Math RAG ํนํ"
|
| 39 |
self.language = "ko"
|
|
@@ -183,7 +181,7 @@ class Kanana15V3bInstructProfile:
|
|
| 183 |
token=HF_TOKEN,
|
| 184 |
torch_dtype=torch.float16,
|
| 185 |
trust_remote_code=True,
|
| 186 |
-
|
| 187 |
# device_map="auto",
|
| 188 |
# low_cpu_mem_usage=True,
|
| 189 |
).to(DEVICE)
|
|
@@ -199,7 +197,7 @@ class Kanana15V3bInstructProfile:
|
|
| 199 |
|
| 200 |
def get_generation_config(self) -> Dict[str, Any]:
|
| 201 |
# ๋ชจ๋ธ ํ๋ผ๋ฏธํฐ ์ต์ ํ ์ค์ , max_new_tokens : ์์ฑ๋๋ ํ
์คํธ ๊ธธ์ด ์ต๋๊ฐ (์ด๋ฏธ์ง ์ค๋ช
์ ์ํด ์ฆ๊ฐ)
|
| 202 |
-
return {"max_new_tokens":
|
| 203 |
|
| 204 |
def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
|
| 205 |
"""
|
|
|
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
| 18 |
|
|
|
|
|
|
|
| 19 |
class Kanana15V3bInstructProfile:
|
| 20 |
"""Kanana-1.5-v-3b-instruct ๋ชจ๋ธ ํ๋กํ"""
|
| 21 |
|
|
|
|
| 27 |
if self.is_local:
|
| 28 |
self.model_name = "gbrabbit/lily-math-model" # ๋ก์ปฌ์์๋ HF ๋ชจ๋ธ๋ช
์ฌ์ฉ
|
| 29 |
self.local_path = "./lily_llm_core/models/kanana_1_5_v_3b_instruct"
|
| 30 |
+
self.display_name = "kanana-1.5-v-3b-instruct"
|
| 31 |
else:
|
| 32 |
self.model_name = "gbrabbit/lily-math-model" # Hugging Face Hub ๋ชจ๋ธ ๊ฒฝ๋ก
|
| 33 |
self.local_path = None # ์๋ฒ์์๋ ๋ก์ปฌ ๊ฒฝ๋ก ์ฌ์ฉ ์ํจ
|
| 34 |
+
self.display_name = "kanana-1.5-v-3b-instruct"
|
| 35 |
|
| 36 |
self.description = "์นด์นด์ค ๋ฉํฐ๋ชจ๋ฌ ๋ชจ๋ธ (3.6B) - Math RAG ํนํ"
|
| 37 |
self.language = "ko"
|
|
|
|
| 181 |
token=HF_TOKEN,
|
| 182 |
torch_dtype=torch.float16,
|
| 183 |
trust_remote_code=True,
|
| 184 |
+
cache_dir="/app/cache/transformers",
|
| 185 |
# device_map="auto",
|
| 186 |
# low_cpu_mem_usage=True,
|
| 187 |
).to(DEVICE)
|
|
|
|
| 197 |
|
| 198 |
def get_generation_config(self) -> Dict[str, Any]:
|
| 199 |
# ๋ชจ๋ธ ํ๋ผ๋ฏธํฐ ์ต์ ํ ์ค์ , max_new_tokens : ์์ฑ๋๋ ํ
์คํธ ๊ธธ์ด ์ต๋๊ฐ (์ด๋ฏธ์ง ์ค๋ช
์ ์ํด ์ฆ๊ฐ)
|
| 200 |
+
return {"max_new_tokens": 256, "temperature": 0.7, "do_sample": True, "top_k": 40, "top_p": 0.9, "repetition_penalty": 1.1}
|
| 201 |
|
| 202 |
def extract_response(self, full_text: str, formatted_prompt: str = None, **kwargs) -> str:
|
| 203 |
"""
|