Spaces:
Sleeping
Sleeping
| import os | |
| import traceback | |
| from typing import Optional | |
| from transformers import AutoTokenizer | |
| import torch | |
| # ํ๊ฒฝ ๋ณ์ ๋ก๋ | |
| try: | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| print("โ .env ํ์ผ ๋ก๋๋จ") | |
| except ImportError: | |
| print("โ ๏ธ python-dotenv๊ฐ ์ค์น๋์ง ์์") | |
| HF_TOKEN = os.getenv("HF_TOKEN") | |
| # ํ๊ฒฝ ๊ฐ์ง | |
| IS_LOCAL = os.path.exists('../.env') or 'LOCAL_TEST' in os.environ | |
| print(f"๐ ํ๊ฒฝ: {'๋ก์ปฌ' if IS_LOCAL else '์๋ฒ'}") | |
| # ํ๊ฒฝ์ ๋ฐ๋ฅธ ๋ชจ๋ธ ๊ฒฝ๋ก ์ค์ | |
| if IS_LOCAL: | |
| # ๋ก์ปฌ ๋ชจ๋ธ ๊ฒฝ๋ก (hearth_llm_model ํด๋ ์ฌ์ฉ) | |
| MODEL_PATH = "../lily_llm_core/models/kanana-1.5-v-3b-instruct" | |
| print(f"๐ ๋ก์ปฌ ๋ชจ๋ธ ๊ฒฝ๋ก: {MODEL_PATH}") | |
| print(f"๐ ๊ฒฝ๋ก ์กด์ฌ: {os.path.exists(MODEL_PATH)}") | |
| else: | |
| # ์๋ฒ์์๋ Hugging Face ๋ชจ๋ธ ์ฌ์ฉ | |
| MODEL_PATH = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model") | |
| print(f"๐ ์๋ฒ ๋ชจ๋ธ: {MODEL_PATH}") | |
| print(f"๐ ํ ํฐ: {'โ ์ค์ ๋จ' if HF_TOKEN else 'โ ์ค์ ๋์ง ์์'}") | |
| # ํ ํฌ๋์ด์ ํ ์คํธ | |
| print("\n๐ง ํ ํฌ๋์ด์ ํ ์คํธ ์์...") | |
| try: | |
| print("๐ค ํ ํฌ๋์ด์ ๋ก๋ฉ ์ค...") | |
| print(f" MODEL_PATH: {MODEL_PATH}") | |
| print(f" IS_LOCAL: {IS_LOCAL}") | |
| print(f" trust_remote_code: True") | |
| print(f" use_fast: False") | |
| if IS_LOCAL: | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_PATH, | |
| trust_remote_code=True, | |
| ) | |
| else: | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| MODEL_PATH, | |
| token=HF_TOKEN, | |
| trust_remote_code=True, | |
| ) | |
| print(f"โ ํ ํฌ๋์ด์ ๋ก๋ฉ ์๋ฃ") | |
| print(f" ํ์ : {type(tokenizer)}") | |
| print(f" ๊ฐ: {tokenizer}") | |
| print(f" hasattr('encode'): {hasattr(tokenizer, 'encode')}") | |
| print(f" hasattr('__call__'): {hasattr(tokenizer, '__call__')}") | |
| # ํ ํฌ๋์ด์ ํ ์คํธ | |
| test_input = "์๋ ํ์ธ์" | |
| print(f"\n๐ค ํ ํฌ๋์ด์ ํ ์คํธ: '{test_input}'") | |
| test_tokens = tokenizer(test_input, return_tensors="pt") | |
| print(f" โ ํ ํฌ๋์ด์ ํธ์ถ ์ฑ๊ณต") | |
| print(f" input_ids shape: {test_tokens['input_ids'].shape}") | |
| print(f" attention_mask shape: {test_tokens['attention_mask'].shape}") | |
| # ๋์ฝ๋ฉ ํ ์คํธ | |
| decoded = tokenizer.decode(test_tokens['input_ids'][0], skip_special_tokens=True) | |
| print(f" ๋์ฝ๋ฉ ๊ฒฐ๊ณผ: '{decoded}'") | |
| except Exception as e: | |
| print(f"โ ํ ํฌ๋์ด์ ํ ์คํธ ์คํจ: {e}") | |
| print(f" ์ค๋ฅ ํ์ : {type(e).__name__}") | |
| traceback.print_exc() | |
| # ๋ชจ๋ธ ํ ์คํธ | |
| print("\n๐ง ๋ชจ๋ธ ํ ์คํธ ์์...") | |
| try: | |
| print("๐ค ๋ชจ๋ธ ๋ก๋ฉ ์ค...") | |
| from modeling import KananaVForConditionalGeneration | |
| if IS_LOCAL: | |
| model = KananaVForConditionalGeneration.from_pretrained( | |
| MODEL_PATH, | |
| torch_dtype=torch.float16, | |
| trust_remote_code=True, | |
| device_map=None, | |
| low_cpu_mem_usage=True | |
| ) | |
| else: | |
| model = KananaVForConditionalGeneration.from_pretrained( | |
| MODEL_PATH, | |
| token=HF_TOKEN, | |
| torch_dtype=torch.float16, | |
| trust_remote_code=True, | |
| device_map=None, | |
| low_cpu_mem_usage=True | |
| ) | |
| print(f"โ ๋ชจ๋ธ ๋ก๋ฉ ์๋ฃ") | |
| # print(f" ํ์ : {type(model)}") | |
| # print(f" ๋๋ฐ์ด์ค: {next(model.parameters()).device}") | |
| # ๋ชจ๋ธ ํ ์คํธ | |
| test_input = "์๋ ํ์ธ์" | |
| formatted_prompt = f"<|im_start|>user\n{test_input}<|im_end|>\n<|im_start|>assistant\n" | |
| max_length: Optional[int] = None | |
| inputs = tokenizer( | |
| formatted_prompt, | |
| return_tensors="pt", | |
| padding=True, | |
| truncation=True, | |
| max_length=512 | |
| ) | |
| print(f"\n๐ค ๋ชจ๋ธ ์ถ๋ก ํ ์คํธ: '{test_input}'") | |
| # Kanana์ฉ ์์ฑ ์ค์ | |
| max_new_tokens = max_length or 100 | |
| with torch.no_grad(): | |
| outputs = model.generate( | |
| input_ids=inputs["input_ids"], | |
| attention_mask=inputs["attention_mask"], | |
| max_new_tokens=max_new_tokens, | |
| repetition_penalty=1.1, | |
| no_repeat_ngram_size=2, | |
| pad_token_id=tokenizer.eos_token_id, | |
| eos_token_id=tokenizer.eos_token_id, | |
| use_cache=True | |
| ) | |
| print(f" โ ๋ชจ๋ธ ํธ์ถ ์ฑ๊ณต") | |
| print(f" outputs ํ์ : {type(outputs)}") | |
| print(f" outputs shape: {outputs.shape}") | |
| # ๋์ฝ๋ฉ ํ ์คํธ | |
| # model.generate()์ ์ถ๋ ฅ์ ์ ์ฒด ์ํ์ค์ด๋ฏ๋ก ๋ฐ๋ก ๋์ฝ๋ฉํฉ๋๋ค. | |
| # outputs[0]์ ๋ฐฐ์น ์ค ์ฒซ ๋ฒ์งธ ๊ฒฐ๊ณผ๋ฅผ ์๋ฏธํฉ๋๋ค. | |
| response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| # ์ ๋ ฅ ํ๋กฌํํธ๋ฅผ ์๋ต์์ ์ ๊ฑฐ (์ ํ์ฌํญ) | |
| assistant_response = response.split("<|im_start|>assistant\n")[-1] | |
| print(f" ์์ฑ๋ ์ ์ฒด ํ ์คํธ: '{response}'") | |
| print(f" ์ด์์คํดํธ ์๋ต: '{assistant_response.strip()}'") | |
| except Exception as e: | |
| print(f"โ ๋ชจ๋ธ ํ ์คํธ ์คํจ: {e}") | |
| print(f" ์ค๋ฅ ํ์ : {type(e).__name__}") | |
| traceback.print_exc() | |
| print("\nโ ํ ์คํธ ์๋ฃ!") |