Upload scaled.log with huggingface_hub

b5c1772 verified about 1 month ago

52.4 kB

	[TRAIN] Qwen2.5-0.5B-Instruct / tweet_hate
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.273, 'grad_norm': 8.716249465942383, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.5637, 'grad_norm': 1.8863756656646729, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.4755, 'grad_norm': 0.7169957756996155, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 11.744, 'train_samples_per_second': 68.12, 'train_steps_per_second': 8.515, 'train_loss': 2.03670832157135, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / tweet_hate
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.8498, 'grad_norm': 4.245172023773193, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.5592, 'grad_norm': 1.005914568901062, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.478, 'grad_norm': 0.9584357142448425, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 15.5586, 'train_samples_per_second': 51.418, 'train_steps_per_second': 6.427, 'train_loss': 2.0415080833435058, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / tweet_irony
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.3359, 'grad_norm': 8.924084663391113, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.2723, 'grad_norm': 1.7949001789093018, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.1661, 'grad_norm': 1.079243779182434, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 10.2996, 'train_samples_per_second': 77.673, 'train_steps_per_second': 9.709, 'train_loss': 1.7398324203491211, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / tweet_irony
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.8723, 'grad_norm': 4.587801456451416, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.3318, 'grad_norm': 1.1860072612762451, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.243, 'grad_norm': 0.9751102924346924, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 13.8132, 'train_samples_per_second': 57.916, 'train_steps_per_second': 7.239, 'train_loss': 1.8127707099914552, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / tweet_offensive
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.7137, 'grad_norm': 8.627317428588867, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.357, 'grad_norm': 1.4692013263702393, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.3179, 'grad_norm': 0.8801213502883911, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 11.9215, 'train_samples_per_second': 67.105, 'train_steps_per_second': 8.388, 'train_loss': 1.8510255122184753, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / tweet_offensive
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.4106, 'grad_norm': 4.313003063201904, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.4162, 'grad_norm': 0.9873558878898621, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.3411, 'grad_norm': 0.9314311742782593, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 16.6574, 'train_samples_per_second': 48.027, 'train_steps_per_second': 6.003, 'train_loss': 1.8985853576660157, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / tweet_sentiment
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.4184, 'grad_norm': 8.496637344360352, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.3666, 'grad_norm': 1.1931053400039673, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.4464, 'grad_norm': 0.9015750885009766, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 10.5431, 'train_samples_per_second': 75.879, 'train_steps_per_second': 9.485, 'train_loss': 1.9270088148117066, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / tweet_sentiment
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 5.0948, 'grad_norm': 4.41939115524292, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.4384, 'grad_norm': 1.0546472072601318, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.4671, 'grad_norm': 0.994468629360199, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 14.0076, 'train_samples_per_second': 57.112, 'train_steps_per_second': 7.139, 'train_loss': 1.9793257808685303, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / tweet_stance_abortion
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.5594, 'grad_norm': 9.245041847229004, 'learning_rate': 5e-05, 'epoch': 0.013513513513513514}
	{'loss': 2.6271, 'grad_norm': 2.3615341186523438, 'learning_rate': 5.261313375270014e-05, 'epoch': 0.6756756756756757}
	{'train_runtime': 7.8235, 'train_samples_per_second': 75.031, 'train_steps_per_second': 9.459, 'train_loss': 2.2987184266786316, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / tweet_stance_abortion
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.9786, 'grad_norm': 4.36261510848999, 'learning_rate': 5e-05, 'epoch': 0.013513513513513514}
	{'loss': 2.5435, 'grad_norm': 1.5087007284164429, 'learning_rate': 5.261313375270014e-05, 'epoch': 0.6756756756756757}
	{'train_runtime': 10.3009, 'train_samples_per_second': 56.985, 'train_steps_per_second': 7.184, 'train_loss': 2.2187244118870915, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / tweet_stance_atheism
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.5602, 'grad_norm': 9.173990249633789, 'learning_rate': 6.666666666666667e-05, 'epoch': 0.017241379310344827}
	{'loss': 2.5946, 'grad_norm': 2.4752259254455566, 'learning_rate': 1.026015713086418e-05, 'epoch': 0.8620689655172413}
	{'train_runtime': 6.147, 'train_samples_per_second': 74.996, 'train_steps_per_second': 9.435, 'train_loss': 2.4956352875150483, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / tweet_stance_atheism
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.9584, 'grad_norm': 4.38031530380249, 'learning_rate': 6.666666666666667e-05, 'epoch': 0.017241379310344827}
	{'loss': 2.5073, 'grad_norm': 1.5854002237319946, 'learning_rate': 1.026015713086418e-05, 'epoch': 0.8620689655172413}
	{'train_runtime': 8.1095, 'train_samples_per_second': 56.847, 'train_steps_per_second': 7.152, 'train_loss': 2.4100519213183174, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / tweet_stance_climate
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.6209, 'grad_norm': 9.482098579406738, 'learning_rate': 6.666666666666667e-05, 'epoch': 0.022222222222222223}
	{'train_runtime': 4.7195, 'train_samples_per_second': 75.22, 'train_steps_per_second': 9.535, 'train_loss': 3.009282864464654, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / tweet_stance_climate
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 5.1472, 'grad_norm': 4.565532684326172, 'learning_rate': 6.666666666666667e-05, 'epoch': 0.022222222222222223}
	{'train_runtime': 6.1663, 'train_samples_per_second': 57.571, 'train_steps_per_second': 7.298, 'train_loss': 2.9270129309760198, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / tweet_stance_feminist
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.957, 'grad_norm': 8.836260795593262, 'learning_rate': 5e-05, 'epoch': 0.013333333333333334}
	{'loss': 2.6696, 'grad_norm': 2.114773750305176, 'learning_rate': 5.5193321601242156e-05, 'epoch': 0.6666666666666666}
	{'train_runtime': 7.8237, 'train_samples_per_second': 76.307, 'train_steps_per_second': 9.586, 'train_loss': 2.3274858729044596, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / tweet_stance_feminist
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 5.2688, 'grad_norm': 4.236178398132324, 'learning_rate': 5e-05, 'epoch': 0.013333333333333334}
	{'loss': 2.5591, 'grad_norm': 1.579331398010254, 'learning_rate': 5.5193321601242156e-05, 'epoch': 0.6666666666666666}
	{'train_runtime': 10.4153, 'train_samples_per_second': 57.32, 'train_steps_per_second': 7.201, 'train_loss': 2.2121534220377606, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / tweet_stance_hillary
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.7115, 'grad_norm': 9.204568862915039, 'learning_rate': 5e-05, 'epoch': 0.01282051282051282}
	{'loss': 2.6804, 'grad_norm': 1.9949347972869873, 'learning_rate': 6.271435222196916e-05, 'epoch': 0.6410256410256411}
	{'train_runtime': 8.053, 'train_samples_per_second': 76.99, 'train_steps_per_second': 9.686, 'train_loss': 2.271758103981996, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / tweet_stance_hillary
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 5.16, 'grad_norm': 4.385024547576904, 'learning_rate': 5e-05, 'epoch': 0.01282051282051282}
	{'loss': 2.5969, 'grad_norm': 1.2874785661697388, 'learning_rate': 6.271435222196916e-05, 'epoch': 0.6410256410256411}
	{'train_runtime': 10.8018, 'train_samples_per_second': 57.398, 'train_steps_per_second': 7.221, 'train_loss': 2.1920253802568483, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / sst2
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.4603, 'grad_norm': 11.488479614257812, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 1.9411, 'grad_norm': 1.4684205055236816, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 0.7408, 'grad_norm': 1.0814604759216309, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 9.957, 'train_samples_per_second': 80.346, 'train_steps_per_second': 10.043, 'train_loss': 1.3661358976364135, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / sst2
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 5.242, 'grad_norm': 5.621689796447754, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.1123, 'grad_norm': 0.7866875529289246, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 0.9025, 'grad_norm': 0.8968847990036011, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 12.1944, 'train_samples_per_second': 65.604, 'train_steps_per_second': 8.201, 'train_loss': 1.5386913442611694, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / sst5
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.8562, 'grad_norm': 7.73358154296875, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.058, 'grad_norm': 1.4864909648895264, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 0.9958, 'grad_norm': 0.9336028695106506, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 10.8019, 'train_samples_per_second': 74.061, 'train_steps_per_second': 9.258, 'train_loss': 1.5448474097251892, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / sst5
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.533, 'grad_norm': 4.299591541290283, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.1324, 'grad_norm': 0.8781896829605103, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.0859, 'grad_norm': 1.1681996583938599, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 14.764, 'train_samples_per_second': 54.186, 'train_steps_per_second': 6.773, 'train_loss': 1.6331668090820313, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / ag_news
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.999, 'grad_norm': 6.8160881996154785, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.2971, 'grad_norm': 1.6471203565597534, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.4402, 'grad_norm': 0.988985002040863, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 13.3199, 'train_samples_per_second': 60.06, 'train_steps_per_second': 7.508, 'train_loss': 1.885696656703949, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / ag_news
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.5293, 'grad_norm': 3.4725465774536133, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.4202, 'grad_norm': 1.1556094884872437, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.4996, 'grad_norm': 1.1493806838989258, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 18.9593, 'train_samples_per_second': 42.196, 'train_steps_per_second': 5.274, 'train_loss': 1.9809542989730835, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / subj
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.3629, 'grad_norm': 9.562212944030762, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.4015, 'grad_norm': 1.7062252759933472, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.3222, 'grad_norm': 1.091054081916809, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 10.6852, 'train_samples_per_second': 74.87, 'train_steps_per_second': 9.359, 'train_loss': 1.881471905708313, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / subj
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.9309, 'grad_norm': 4.718301773071289, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.4671, 'grad_norm': 1.1577858924865723, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.3757, 'grad_norm': 1.229870080947876, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 14.6974, 'train_samples_per_second': 54.431, 'train_steps_per_second': 6.804, 'train_loss': 1.9460000419616699, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / CR
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.4224, 'grad_norm': 10.564358711242676, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.1553, 'grad_norm': 1.3408657312393188, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.0323, 'grad_norm': 1.1570934057235718, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 10.3565, 'train_samples_per_second': 77.246, 'train_steps_per_second': 9.656, 'train_loss': 1.6164953231811523, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / CR
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 5.0732, 'grad_norm': 5.139683246612549, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.2761, 'grad_norm': 1.5539753437042236, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.1228, 'grad_norm': 1.1279797554016113, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 13.4995, 'train_samples_per_second': 59.262, 'train_steps_per_second': 7.408, 'train_loss': 1.7274125909805298, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / amazon_cf
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.1665, 'grad_norm': 8.745065689086914, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.1257, 'grad_norm': 1.0186175107955933, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.0027, 'grad_norm': 0.8737753033638, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 11.101, 'train_samples_per_second': 72.065, 'train_steps_per_second': 9.008, 'train_loss': 1.5846089839935302, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / amazon_cf
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.7403, 'grad_norm': 4.422999858856201, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.1691, 'grad_norm': 0.8509683012962341, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.0911, 'grad_norm': 0.936338484287262, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 15.4283, 'train_samples_per_second': 51.853, 'train_steps_per_second': 6.482, 'train_loss': 1.6557646369934083, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / enron_spam
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.9295, 'grad_norm': 4.4511213302612305, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 3.1547, 'grad_norm': 1.320719599723816, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 2.6283, 'grad_norm': 1.0881730318069458, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 16.8065, 'train_samples_per_second': 47.601, 'train_steps_per_second': 5.95, 'train_loss': 2.8992503595352175, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / enron_spam
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.4619, 'grad_norm': 2.4918572902679443, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 3.2087, 'grad_norm': 1.1898902654647827, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 2.6327, 'grad_norm': 1.2654956579208374, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 24.3251, 'train_samples_per_second': 32.888, 'train_steps_per_second': 4.111, 'train_loss': 2.933229660987854, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / hate_speech_off
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.4717, 'grad_norm': 8.441450119018555, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.4507, 'grad_norm': 1.243470311164856, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.3932, 'grad_norm': 0.9001330137252808, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 11.7314, 'train_samples_per_second': 68.193, 'train_steps_per_second': 8.524, 'train_loss': 1.94217613697052, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / hate_speech_off
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.9367, 'grad_norm': 4.192142963409424, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.447, 'grad_norm': 1.0086071491241455, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.4385, 'grad_norm': 0.9927129149436951, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 15.0486, 'train_samples_per_second': 53.161, 'train_steps_per_second': 6.645, 'train_loss': 1.9676944971084596, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / insincere
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.9336, 'grad_norm': 10.130498886108398, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 1.8495, 'grad_norm': 1.058966875076294, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 0.7305, 'grad_norm': 0.6476753354072571, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 10.9655, 'train_samples_per_second': 72.956, 'train_steps_per_second': 9.12, 'train_loss': 1.3108138823509217, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / insincere
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.5841, 'grad_norm': 5.072680950164795, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 1.9582, 'grad_norm': 0.7228817343711853, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 0.8677, 'grad_norm': 0.7066493630409241, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 13.413, 'train_samples_per_second': 59.643, 'train_steps_per_second': 7.455, 'train_loss': 1.439186840057373, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / amazon_pol
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.8792, 'grad_norm': 8.585878372192383, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.2072, 'grad_norm': 1.1846158504486084, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.3157, 'grad_norm': 0.9254633188247681, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 14.0895, 'train_samples_per_second': 56.78, 'train_steps_per_second': 7.097, 'train_loss': 1.7781847405433655, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / amazon_pol
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.3915, 'grad_norm': 4.405399322509766, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.2139, 'grad_norm': 1.0393775701522827, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.3557, 'grad_norm': 0.8698548078536987, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 20.3719, 'train_samples_per_second': 39.27, 'train_steps_per_second': 4.909, 'train_loss': 1.8066069555282593, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / toxic_conv
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.1391, 'grad_norm': 6.106127738952637, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.6737, 'grad_norm': 1.0389565229415894, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.8824, 'grad_norm': 0.8112128973007202, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 16.2928, 'train_samples_per_second': 49.101, 'train_steps_per_second': 6.138, 'train_loss': 2.2927292728424074, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / toxic_conv
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.454, 'grad_norm': 3.073042392730713, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.6476, 'grad_norm': 0.8167802691459656, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.8868, 'grad_norm': 0.8130165338516235, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 23.3638, 'train_samples_per_second': 34.241, 'train_steps_per_second': 4.28, 'train_loss': 2.2852662086486815, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / ade
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.2611, 'grad_norm': 10.079445838928223, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.0914, 'grad_norm': 1.5072174072265625, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 0.9567, 'grad_norm': 1.2372623682022095, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 11.1349, 'train_samples_per_second': 71.846, 'train_steps_per_second': 8.981, 'train_loss': 1.545719051361084, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / ade
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.7785, 'grad_norm': 4.768298625946045, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.1438, 'grad_norm': 0.979763388633728, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.0262, 'grad_norm': 0.910645604133606, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 15.0294, 'train_samples_per_second': 53.229, 'train_steps_per_second': 6.654, 'train_loss': 1.611364221572876, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / 20news
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 2.5873, 'grad_norm': 4.530882835388184, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 1.8753, 'grad_norm': 0.7737817764282227, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.3877, 'grad_norm': 0.5685547590255737, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 17.0298, 'train_samples_per_second': 46.976, 'train_steps_per_second': 5.872, 'train_loss': 1.63863196849823, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / 20news
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.1497, 'grad_norm': 2.4429728984832764, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.0154, 'grad_norm': 0.767099142074585, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.5132, 'grad_norm': 0.6722097396850586, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 24.4478, 'train_samples_per_second': 32.723, 'train_steps_per_second': 4.09, 'train_loss': 1.775665946006775, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / imdb
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.8529, 'grad_norm': 4.027501583099365, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.9237, 'grad_norm': 0.8566207885742188, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 2.4696, 'grad_norm': 0.7376132607460022, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 17.0399, 'train_samples_per_second': 46.949, 'train_steps_per_second': 5.869, 'train_loss': 2.705967416763306, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / imdb
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.2714, 'grad_norm': 2.319688081741333, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.9326, 'grad_norm': 0.8715723156929016, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 2.3857, 'grad_norm': 0.7408086657524109, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 24.5068, 'train_samples_per_second': 32.644, 'train_steps_per_second': 4.081, 'train_loss': 2.6725329828262328, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / rotten
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.433, 'grad_norm': 9.2361478805542, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.2875, 'grad_norm': 1.727787733078003, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.2166, 'grad_norm': 0.9453311562538147, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 10.3905, 'train_samples_per_second': 76.993, 'train_steps_per_second': 9.624, 'train_loss': 1.7735167932510376, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / rotten
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 5.0912, 'grad_norm': 4.759712219238281, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.4038, 'grad_norm': 1.4830732345581055, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.2859, 'grad_norm': 1.2477036714553833, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 13.9995, 'train_samples_per_second': 57.145, 'train_steps_per_second': 7.143, 'train_loss': 1.871756911277771, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / dbpedia
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.1421, 'grad_norm': 4.560918807983398, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 1.8285, 'grad_norm': 1.0229161977767944, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.1279, 'grad_norm': 0.6451042294502258, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 16.9529, 'train_samples_per_second': 47.19, 'train_steps_per_second': 5.899, 'train_loss': 1.4913653326034546, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / dbpedia
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.7919, 'grad_norm': 2.565979242324829, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 1.9812, 'grad_norm': 0.85664963722229, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.0895, 'grad_norm': 0.6693203449249268, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 24.4461, 'train_samples_per_second': 32.725, 'train_steps_per_second': 4.091, 'train_loss': 1.5534515047073365, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / emotion
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.6796, 'grad_norm': 8.679558753967285, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 1.8993, 'grad_norm': 1.4098697900772095, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 0.8971, 'grad_norm': 0.7674059271812439, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 11.4415, 'train_samples_per_second': 69.921, 'train_steps_per_second': 8.74, 'train_loss': 1.415963158607483, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / emotion
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.2258, 'grad_norm': 4.511800765991211, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 1.9514, 'grad_norm': 0.8560807108879089, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 0.9838, 'grad_norm': 0.7192825078964233, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 15.4377, 'train_samples_per_second': 51.821, 'train_steps_per_second': 6.478, 'train_loss': 1.4903493642807006, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / tweet_emotion
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.1976, 'grad_norm': 8.541781425476074, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.2619, 'grad_norm': 1.8263031244277954, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.2087, 'grad_norm': 0.9338579177856445, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 10.9559, 'train_samples_per_second': 73.02, 'train_steps_per_second': 9.128, 'train_loss': 1.7546869659423827, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / tweet_emotion
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.7483, 'grad_norm': 4.2091827392578125, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.2912, 'grad_norm': 0.9139009118080139, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 1.2588, 'grad_norm': 0.988981306552887, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 14.6391, 'train_samples_per_second': 54.648, 'train_steps_per_second': 6.831, 'train_loss': 1.7996071434020997, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / bbc_news
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.6284, 'grad_norm': 3.8643064498901367, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.8199, 'grad_norm': 1.313910961151123, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 2.2287, 'grad_norm': 1.0434508323669434, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 17.1383, 'train_samples_per_second': 46.679, 'train_steps_per_second': 5.835, 'train_loss': 2.5323920035362244, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / bbc_news
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 3.9575, 'grad_norm': 2.299182891845703, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.7064, 'grad_norm': 1.0852779150009155, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 2.0325, 'grad_norm': 1.072020411491394, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 24.5314, 'train_samples_per_second': 32.611, 'train_steps_per_second': 4.076, 'train_loss': 2.3819601130485535, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / ethos_binary
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.2348, 'grad_norm': 9.303101539611816, 'learning_rate': 5e-05, 'epoch': 0.013333333333333334}
	{'loss': 2.2696, 'grad_norm': 1.7380839586257935, 'learning_rate': 5.5193321601242156e-05, 'epoch': 0.6666666666666666}
	{'train_runtime': 9.1467, 'train_samples_per_second': 65.379, 'train_steps_per_second': 8.2, 'train_loss': 1.97029296875, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / ethos_binary
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.7957, 'grad_norm': 4.550801753997803, 'learning_rate': 5e-05, 'epoch': 0.013333333333333334}
	{'loss': 2.3366, 'grad_norm': 1.4175268411636353, 'learning_rate': 5.5193321601242156e-05, 'epoch': 0.6666666666666666}
	{'train_runtime': 12.6638, 'train_samples_per_second': 47.221, 'train_steps_per_second': 5.922, 'train_loss': 2.03967955271403, 'epoch': 1.0}
	[TRAIN] Qwen2.5-0.5B-Instruct / trec
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	{'loss': 4.4726, 'grad_norm': 10.429224967956543, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.0335, 'grad_norm': 2.4829320907592773, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 0.5129, 'grad_norm': 0.8242684006690979, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 10.2663, 'train_samples_per_second': 77.925, 'train_steps_per_second': 9.741, 'train_loss': 1.2975853967666626, 'epoch': 1.0}
	[TRAIN] Llama-3.2-1B-Instruct / trec
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	WARNING:accelerate.utils.other:Detected kernel version 4.4.0, which is below the recommended minimum of 5.5.0; this can cause the process to hang. It is recommended to upgrade the kernel to the minimum version or higher.
	/usr/local/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:590: UserWarning: `do_sample` is set to `False`. However, `temperature` is set to `0.6` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `temperature`.
	warnings.warn(
	/usr/local/lib/python3.12/site-packages/transformers/generation/configuration_utils.py:595: UserWarning: `do_sample` is set to `False`. However, `top_p` is set to `0.9` -- this flag is only used in sample-based generation modes. You should set `do_sample=True` or unset `top_p`.
	warnings.warn(
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	A decoder-only architecture is being used, but right-padding was detected! For correct generation results, please set `padding_side='left'` when initializing the tokenizer.
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	Repo card metadata block was not found. Setting CardData to empty.
	WARNING:huggingface_hub.repocard:Repo card metadata block was not found. Setting CardData to empty.
	{'loss': 5.1213, 'grad_norm': 4.670011520385742, 'learning_rate': 4e-05, 'epoch': 0.01}
	{'loss': 2.1178, 'grad_norm': 1.0159612894058228, 'learning_rate': 0.00010825793454723325, 'epoch': 0.5}
	{'loss': 0.6244, 'grad_norm': 0.8839908838272095, 'learning_rate': 0.0, 'epoch': 1.0}
	{'train_runtime': 13.4709, 'train_samples_per_second': 59.387, 'train_steps_per_second': 7.423, 'train_loss': 1.4011702632904053, 'epoch': 1.0}

	Loaded 25 anchor adapter pairs.

	=== Held-out task: emotion ===
	Building mappings...
	[MLP] training on 1600 (block,task) pairs of dim 8
	ep 0 mse=1.88498
	ep 50 mse=1.16137
	ep 100 mse=0.90441
	ep 150 mse=0.87067
	ep 200 mse=0.85433
	ep 250 mse=0.84133
	ep 300 mse=0.82832
	ep 350 mse=0.81399
	ep 399 mse=0.79813
	cosines vs oracle: {'mean': 0.9839, 'global_ridge': 0.9845, 'pertensor_ridge': 0.9835, 'pertensor_pca': 0.984, 'pertensor_mlp': 0.9836}
	base_Y = 0.3367
	mean = 0.3500
	global_ridge = 0.4133
	pertensor_ridge = 0.4267
	pertensor_pca = 0.3900
	pertensor_mlp = 0.3567
	oracle_Y = 0.5467

	=== Held-out task: tweet_emotion ===
	Building mappings...
	[MLP] training on 1600 (block,task) pairs of dim 8
	ep 0 mse=1.87491
	ep 50 mse=1.13215
	ep 100 mse=0.90969
	ep 150 mse=0.87585
	ep 200 mse=0.85822
	ep 250 mse=0.84492
	ep 300 mse=0.83208
	ep 350 mse=0.81809
	ep 399 mse=0.80257
	cosines vs oracle: {'mean': 0.9865, 'global_ridge': 0.9873, 'pertensor_ridge': 0.9861, 'pertensor_pca': 0.9866, 'pertensor_mlp': 0.9864}
	base_Y = 0.4667
	mean = 0.2700
	global_ridge = 0.2633
	pertensor_ridge = 0.2700
	pertensor_pca = 0.2833
	pertensor_mlp = 0.2733
	oracle_Y = 0.7267

	=== Held-out task: bbc_news ===
	Building mappings...
	[MLP] training on 1600 (block,task) pairs of dim 8
	ep 0 mse=1.85919
	ep 50 mse=1.10253
	ep 100 mse=0.90320
	ep 150 mse=0.87330
	ep 200 mse=0.85684
	ep 250 mse=0.84406
	ep 300 mse=0.83143
	ep 350 mse=0.81793
	ep 399 mse=0.80343
	cosines vs oracle: {'mean': 0.9766, 'global_ridge': 0.9806, 'pertensor_ridge': 0.9791, 'pertensor_pca': 0.9791, 'pertensor_mlp': 0.9763}
	base_Y = 0.0633
	mean = 0.0100
	global_ridge = 0.0067
	pertensor_ridge = 0.0067
	pertensor_pca = 0.0033
	pertensor_mlp = 0.0100
	oracle_Y = 0.1033

	=== Held-out task: ethos_binary ===
	Building mappings...
	[MLP] training on 1600 (block,task) pairs of dim 8
	ep 0 mse=1.86809
	ep 50 mse=1.12861
	ep 100 mse=0.90710
	ep 150 mse=0.87480
	ep 200 mse=0.85854
	ep 250 mse=0.84590
	ep 300 mse=0.83347
	ep 350 mse=0.81984
	ep 399 mse=0.80456
	cosines vs oracle: {'mean': 0.9907, 'global_ridge': 0.9912, 'pertensor_ridge': 0.9902, 'pertensor_pca': 0.9907, 'pertensor_mlp': 0.9906}
	base_Y = 0.5033
	mean = 0.6933
	global_ridge = 0.7367
	pertensor_ridge = 0.6867
	pertensor_pca = 0.7167
	pertensor_mlp = 0.7600
	oracle_Y = 0.7033

	=== Held-out task: trec ===
	Building mappings...
	[MLP] training on 1600 (block,task) pairs of dim 8
	ep 0 mse=1.89924
	ep 50 mse=1.16138
	ep 100 mse=0.90132
	ep 150 mse=0.87022
	ep 200 mse=0.85339
	ep 250 mse=0.83966
	ep 300 mse=0.82572
	ep 350 mse=0.81026
	ep 399 mse=0.79290
	cosines vs oracle: {'mean': 0.9728, 'global_ridge': 0.9737, 'pertensor_ridge': 0.9714, 'pertensor_pca': 0.9727, 'pertensor_mlp': 0.9722}
	base_Y = 0.1933
	mean = 0.2000
	global_ridge = 0.2167
	pertensor_ridge = 0.2100
	pertensor_pca = 0.2133
	pertensor_mlp = 0.1967
	oracle_Y = 0.4533

	=== AVG ACROSS HELD-OUT TASKS ===
	base_Y 0.3127
	mean 0.3047
	global_ridge 0.3273
	pertensor_ridge 0.3200
	pertensor_pca 0.3213
	pertensor_mlp 0.3193
	oracle_Y 0.5067