# SillokBert-Scratch 프로젝트 4단계: 최종 모델 평가 # ----------------------------------------------------------------- # 3단계에서 사전학습을 마친 최종 모델의 Perplexity(PPL)를 계산합니다. # ----------------------------------------------------------------- import os import math from pathlib import Path from transformers import ( BertForMaskedLM, PreTrainedTokenizerFast, DataCollatorForLanguageModeling, Trainer, TrainingArguments, ) from datasets import load_dataset, Dataset from itertools import chain def evaluate_sillok_bert(): """사전학습이 완료된 SillokBert-Scratch 모델의 성능(Perplexity)를 평가합니다.""" # --- 경로 설정 --- project_dir = Path("/home/work/baro/sillok/sillok_scratch_20250626") final_model_path = project_dir / "sillokbert_scratch_pretraining_output" / "final_model" test_dataset_file = "/home/work/baro/sillok25060103/preprocessed_corpus/test.txt" eval_output_dir = project_dir / "evaluation_output" print("--- 4. SillokBert-Scratch Model Evaluation ---") # --- 모델 및 토크나이저 로드 --- model = BertForMaskedLM.from_pretrained(final_model_path) tokenizer = PreTrainedTokenizerFast.from_pretrained(final_model_path) # --- 데이터셋 준비 --- test_dataset = load_dataset('text', data_files={'test': test_dataset_file}) block_size = 512 def tokenize_function(examples): return tokenizer(examples['text'], add_special_tokens=False, return_special_tokens_mask=False) tokenized_datasets = test_dataset.map(tokenize_function, batched=True, num_proc=4, remove_columns=['text']) all_input_ids = list(chain(*tokenized_datasets['test']['input_ids'])) total_length = len(all_input_ids) total_length = (total_length // block_size) * block_size grouped_input_ids = [all_input_ids[i : i + block_size] for i in range(0, total_length, block_size)] eval_dataset = Dataset.from_dict({'input_ids': grouped_input_ids}) print(f"총 {len(eval_dataset)}개의 평가 샘플이 생성되었습니다.") # --- 평가 수행 --- data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) training_args = TrainingArguments(output_dir=eval_output_dir, per_device_eval_batch_size=8, fp16=True) trainer = Trainer( model=model, args=training_args, data_collator=data_collator, eval_dataset=eval_dataset ) metrics = trainer.evaluate() eval_loss = metrics["eval_loss"] perplexity = math.exp(eval_loss) # --- 최종 결과 발표 --- print("\n--- 최종 평가 결과 ---") print(f" - 최종 Eval Loss: {eval_loss:.4f}") print(f" - 최종 Perplexity(PPL): {perplexity:.4f}") print("-" * 30) if __name__ == "__main__": evaluate_sillok_bert()