| # AI Text Detection Configuration | |
| # Project settings | |
| project_name: "Token_Length_Prediction" | |
| task_type: "multi_classification" | |
| # Data settings | |
| data: | |
| dataset: "Korea-MES/open_question_type" | |
| text_columns: ['question'] | |
| #target_column: ["r_scores"] | |
| #target_column: "mlt" | |
| target_column: "resolve_type" | |
| #target_column : ['normalized_informativeness_score', 'token_count'] | |
| max_length: 512 | |
| #validation_split: 100 # 각 MLT 라벨별로 500개씩 validation으로 사용 | |
| validation_split: 0.1 # 각 MLT 라벨별로 500개씩 validation으로 사용 | |
| random_state: 42 | |
| # Model settings | |
| model: | |
| model_names: # 사용할 모델 리스트 (우선순위) | |
| - "answerdotai/ModernBERT-large" | |
| #type: "grice" | |
| type: "classification" | |
| #type : "regression" | |
| num_labels: 10 # 이진 분류를 위한 설정 (BCEWithLogitsLoss 사용) | |
| dropout_rate: 0.1 | |
| hidden_size: 768 | |
| # Training settings | |
| training: | |
| num_epochs: 5 | |
| batch_size: 64 | |
| learning_rate: 5.0e-5 | |
| weight_decay: 0.01 | |
| warmup_steps: 15 | |
| max_grad_norm: 1.0 | |
| # Evaluation settings | |
| evaluation: | |
| strategy: "epoch" # "epoch" 또는 "steps" | |
| metric: "f1" # "accuracy", "f1", "auc" 등 | |
| #metric: "eval_mse" | |
| # Hardware settings | |
| hardware: | |
| device: "cuda:0" | |
| dataloader_num_workers: 4 | |
| fp16: true | |
| # Output settings | |
| output: | |
| model_save_dir: "./models" | |
| logs_dir: "./logs" | |
| results_dir: "./results" | |
| submission_file: "submission.csv" |