Spaces:

gbrabbit
/

lily-math-rag

Sleeping

App Files Files Community

lily-math-rag / test_tokenizer.py

gbrabbit

Auto commit at 07-2025-08 4:43:48

b9ecb65 5 months ago

raw

history blame

5.3 kB

	import os
	import traceback
	from typing import Optional
	from transformers import AutoTokenizer
	import torch

	# 환경 변수 로드
	try:
	from dotenv import load_dotenv
	load_dotenv()
	print("✅ .env 파일 로드됨")
	except ImportError:
	print("⚠️ python-dotenv가 설치되지 않음")

	HF_TOKEN = os.getenv("HF_TOKEN")

	# 환경 감지
	IS_LOCAL = os.path.exists('../.env') or 'LOCAL_TEST' in os.environ
	print(f"🔍 환경: {'로컬' if IS_LOCAL else '서버'}")

	# 환경에 따른 모델 경로 설정
	if IS_LOCAL:
	# 로컬 모델 경로 (hearth_llm_model 폴더 사용)
	MODEL_PATH = "../lily_llm_core/models/kanana-1.5-v-3b-instruct"
	print(f"🔍 로컬 모델 경로: {MODEL_PATH}")
	print(f"🔍 경로 존재: {os.path.exists(MODEL_PATH)}")
	else:
	# 서버에서는 Hugging Face 모델 사용
	MODEL_PATH = os.getenv("MODEL_NAME", "gbrabbit/lily-math-model")
	print(f"🔍 서버 모델: {MODEL_PATH}")

	print(f"🔍 토큰: {'✅ 설정됨' if HF_TOKEN else '❌ 설정되지 않음'}")

	# 토크나이저 테스트
	print("\n🔧 토크나이저 테스트 시작...")

	try:
	print("📤 토크나이저 로딩 중...")
	print(f" MODEL_PATH: {MODEL_PATH}")
	print(f" IS_LOCAL: {IS_LOCAL}")
	print(f" trust_remote_code: True")
	print(f" use_fast: False")

	if IS_LOCAL:
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_PATH,
	trust_remote_code=True,
	)
	else:
	tokenizer = AutoTokenizer.from_pretrained(
	MODEL_PATH,
	token=HF_TOKEN,
	trust_remote_code=True,
	)

	print(f"✅ 토크나이저 로딩 완료")
	print(f" 타입: {type(tokenizer)}")
	print(f" 값: {tokenizer}")
	print(f" hasattr('encode'): {hasattr(tokenizer, 'encode')}")
	print(f" hasattr('__call__'): {hasattr(tokenizer, '__call__')}")

	# 토크나이저 테스트
	test_input = "안녕하세요"
	print(f"\n🔤 토크나이저 테스트: '{test_input}'")

	test_tokens = tokenizer(test_input, return_tensors="pt")
	print(f" ✅ 토크나이저 호출 성공")
	print(f" input_ids shape: {test_tokens['input_ids'].shape}")
	print(f" attention_mask shape: {test_tokens['attention_mask'].shape}")

	# 디코딩 테스트
	decoded = tokenizer.decode(test_tokens['input_ids'][0], skip_special_tokens=True)
	print(f" 디코딩 결과: '{decoded}'")

	except Exception as e:
	print(f"❌ 토크나이저 테스트 실패: {e}")
	print(f" 오류 타입: {type(e).__name__}")
	traceback.print_exc()

	# 모델 테스트
	print("\n🔧 모델 테스트 시작...")

	try:
	print("📤 모델 로딩 중...")
	from modeling import KananaVForConditionalGeneration

	if IS_LOCAL:
	model = KananaVForConditionalGeneration.from_pretrained(
	MODEL_PATH,
	torch_dtype=torch.float16,
	trust_remote_code=True,
	device_map=None,
	low_cpu_mem_usage=True
	)
	else:
	model = KananaVForConditionalGeneration.from_pretrained(
	MODEL_PATH,
	token=HF_TOKEN,
	torch_dtype=torch.float16,
	trust_remote_code=True,
	device_map=None,
	low_cpu_mem_usage=True
	)

	print(f"✅ 모델 로딩 완료")
	# print(f" 타입: {type(model)}")
	# print(f" 디바이스: {next(model.parameters()).device}")

	# 모델 테스트
	test_input = "안녕하세요"
	formatted_prompt = f"<\|im_start\|>user\n{test_input}<\|im_end\|>\n<\|im_start\|>assistant\n"
	max_length: Optional[int] = None

	inputs = tokenizer(
	formatted_prompt,
	return_tensors="pt",
	padding=True,
	truncation=True,
	max_length=512
	)

	print(f"\n🤖 모델 추론 테스트: '{test_input}'")

	# Kanana용 생성 설정
	max_new_tokens = max_length or 100

	with torch.no_grad():
	outputs = model.generate(
	input_ids=inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	max_new_tokens=max_new_tokens,
	repetition_penalty=1.1,
	no_repeat_ngram_size=2,
	pad_token_id=tokenizer.eos_token_id,
	eos_token_id=tokenizer.eos_token_id,
	use_cache=True
	)

	print(f" ✅ 모델 호출 성공")
	print(f" outputs 타입: {type(outputs)}")
	print(f" outputs shape: {outputs.shape}")

	# 디코딩 테스트
	# model.generate()의 출력은 전체 시퀀스이므로 바로 디코딩합니다.
	# outputs[0]은 배치 중 첫 번째 결과를 의미합니다.
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# 입력 프롬프트를 응답에서 제거 (선택사항)
	assistant_response = response.split("<\|im_start\|>assistant\n")[-1]

	print(f" 생성된 전체 텍스트: '{response}'")
	print(f" 어시스턴트 응답: '{assistant_response.strip()}'")

	except Exception as e:
	print(f"❌ 모델 테스트 실패: {e}")
	print(f" 오류 타입: {type(e).__name__}")
	traceback.print_exc()

	print("\n✅ 테스트 완료!")