| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.6253776435045317, | |
| "eval_steps": 2000, | |
| "global_step": 42000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04315925766076824, | |
| "grad_norm": 30.676715850830078, | |
| "learning_rate": 2.1579628830384117e-06, | |
| "loss": 0.358, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.08631851532153648, | |
| "grad_norm": 19.524194717407227, | |
| "learning_rate": 4.3159257660768235e-06, | |
| "loss": 0.1048, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1294777729823047, | |
| "grad_norm": 0.00297492160461843, | |
| "learning_rate": 6.473888649115235e-06, | |
| "loss": 0.0827, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.17263703064307295, | |
| "grad_norm": 8.900677680969238, | |
| "learning_rate": 8.631851532153647e-06, | |
| "loss": 0.067, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.17263703064307295, | |
| "eval_cosine_accuracy@1": 0.6191955808734679, | |
| "eval_cosine_accuracy@10": 0.9514931814258588, | |
| "eval_cosine_accuracy@3": 0.8606939409632315, | |
| "eval_cosine_accuracy@5": 0.909891248058001, | |
| "eval_cosine_map@100": 0.748128574680106, | |
| "eval_cosine_mrr@10": 0.7459635876906734, | |
| "eval_cosine_ndcg@10": 0.7968614059582585, | |
| "eval_cosine_precision@1": 0.6191955808734679, | |
| "eval_cosine_precision@10": 0.09514931814258587, | |
| "eval_cosine_precision@3": 0.28689798032107716, | |
| "eval_cosine_precision@5": 0.18197824961160017, | |
| "eval_cosine_recall@1": 0.6191955808734679, | |
| "eval_cosine_recall@10": 0.9514931814258588, | |
| "eval_cosine_recall@3": 0.8606939409632315, | |
| "eval_cosine_recall@5": 0.909891248058001, | |
| "eval_runtime": 468.0233, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.21579628830384118, | |
| "grad_norm": 0.12022869288921356, | |
| "learning_rate": 1.0789814415192059e-05, | |
| "loss": 0.0491, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.2589555459646094, | |
| "grad_norm": 0.07568053156137466, | |
| "learning_rate": 1.294777729823047e-05, | |
| "loss": 0.0831, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.3021148036253776, | |
| "grad_norm": 0.0246192067861557, | |
| "learning_rate": 1.5105740181268884e-05, | |
| "loss": 0.062, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.3452740612861459, | |
| "grad_norm": 0.009853623807430267, | |
| "learning_rate": 1.7263703064307294e-05, | |
| "loss": 0.0657, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.3452740612861459, | |
| "eval_cosine_accuracy@1": 0.6362851717590196, | |
| "eval_cosine_accuracy@10": 0.9523562920766442, | |
| "eval_cosine_accuracy@3": 0.8606939409632315, | |
| "eval_cosine_accuracy@5": 0.9110996029691006, | |
| "eval_cosine_map@100": 0.7589134849598074, | |
| "eval_cosine_mrr@10": 0.756632799848751, | |
| "eval_cosine_ndcg@10": 0.8050365772218437, | |
| "eval_cosine_precision@1": 0.6362851717590196, | |
| "eval_cosine_precision@10": 0.09523562920766442, | |
| "eval_cosine_precision@3": 0.28689798032107716, | |
| "eval_cosine_precision@5": 0.1822199205938201, | |
| "eval_cosine_recall@1": 0.6362851717590196, | |
| "eval_cosine_recall@10": 0.9523562920766442, | |
| "eval_cosine_recall@3": 0.8606939409632315, | |
| "eval_cosine_recall@5": 0.9110996029691006, | |
| "eval_runtime": 467.8258, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.38843331894691413, | |
| "grad_norm": 0.017763391137123108, | |
| "learning_rate": 1.9421665947345706e-05, | |
| "loss": 0.0522, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.43159257660768235, | |
| "grad_norm": 21.21623420715332, | |
| "learning_rate": 1.982446885041485e-05, | |
| "loss": 0.049, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.4747518342684506, | |
| "grad_norm": 0.13613158464431763, | |
| "learning_rate": 1.958467219797612e-05, | |
| "loss": 0.0426, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.5179110919292188, | |
| "grad_norm": 0.1645500212907791, | |
| "learning_rate": 1.9344875545537384e-05, | |
| "loss": 0.0708, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.5179110919292188, | |
| "eval_cosine_accuracy@1": 0.6526842741239427, | |
| "eval_cosine_accuracy@10": 0.9642672190574831, | |
| "eval_cosine_accuracy@3": 0.8865872604867944, | |
| "eval_cosine_accuracy@5": 0.9287070602451234, | |
| "eval_cosine_map@100": 0.7759321604397249, | |
| "eval_cosine_mrr@10": 0.7742270364616298, | |
| "eval_cosine_ndcg@10": 0.8214808830487713, | |
| "eval_cosine_precision@1": 0.6526842741239427, | |
| "eval_cosine_precision@10": 0.0964267219057483, | |
| "eval_cosine_precision@3": 0.2955290868289315, | |
| "eval_cosine_precision@5": 0.1857414120490247, | |
| "eval_cosine_recall@1": 0.6526842741239427, | |
| "eval_cosine_recall@10": 0.9642672190574831, | |
| "eval_cosine_recall@3": 0.8865872604867944, | |
| "eval_cosine_recall@5": 0.9287070602451234, | |
| "eval_runtime": 467.7458, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.561070349589987, | |
| "grad_norm": 0.3336288332939148, | |
| "learning_rate": 1.9105078893098655e-05, | |
| "loss": 0.0236, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.6042296072507553, | |
| "grad_norm": 0.011359921656548977, | |
| "learning_rate": 1.886528224065992e-05, | |
| "loss": 0.024, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.6473888649115235, | |
| "grad_norm": 0.0021573721896857023, | |
| "learning_rate": 1.8625485588221192e-05, | |
| "loss": 0.0256, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.6905481225722918, | |
| "grad_norm": 0.024769997224211693, | |
| "learning_rate": 1.8385688935782457e-05, | |
| "loss": 0.041, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.6905481225722918, | |
| "eval_cosine_accuracy@1": 0.6390471258415329, | |
| "eval_cosine_accuracy@10": 0.9573623338511997, | |
| "eval_cosine_accuracy@3": 0.8693250474710857, | |
| "eval_cosine_accuracy@5": 0.9195580873467979, | |
| "eval_cosine_map@100": 0.7640704294756044, | |
| "eval_cosine_mrr@10": 0.762041421091137, | |
| "eval_cosine_ndcg@10": 0.8104943817099518, | |
| "eval_cosine_precision@1": 0.6390471258415329, | |
| "eval_cosine_precision@10": 0.09573623338511995, | |
| "eval_cosine_precision@3": 0.2897750158236953, | |
| "eval_cosine_precision@5": 0.18391161746935958, | |
| "eval_cosine_recall@1": 0.6390471258415329, | |
| "eval_cosine_recall@10": 0.9573623338511997, | |
| "eval_cosine_recall@3": 0.8693250474710857, | |
| "eval_cosine_recall@5": 0.9195580873467979, | |
| "eval_runtime": 467.5761, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.73370738023306, | |
| "grad_norm": 0.001473304582759738, | |
| "learning_rate": 1.8145892283343725e-05, | |
| "loss": 0.0285, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.7768666378938283, | |
| "grad_norm": 0.002119662007316947, | |
| "learning_rate": 1.7906095630904994e-05, | |
| "loss": 0.0249, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.8200258955545965, | |
| "grad_norm": 0.035019177943468094, | |
| "learning_rate": 1.7666298978466262e-05, | |
| "loss": 0.0368, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.8631851532153647, | |
| "grad_norm": 0.2664908468723297, | |
| "learning_rate": 1.742650232602753e-05, | |
| "loss": 0.0588, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.8631851532153647, | |
| "eval_cosine_accuracy@1": 0.6407733471431037, | |
| "eval_cosine_accuracy@10": 0.9589159330226135, | |
| "eval_cosine_accuracy@3": 0.8734679785948558, | |
| "eval_cosine_accuracy@5": 0.9204211979975833, | |
| "eval_cosine_map@100": 0.7652575174635105, | |
| "eval_cosine_mrr@10": 0.7632412818974197, | |
| "eval_cosine_ndcg@10": 0.811775458664963, | |
| "eval_cosine_precision@1": 0.6407733471431037, | |
| "eval_cosine_precision@10": 0.09589159330226135, | |
| "eval_cosine_precision@3": 0.2911559928649519, | |
| "eval_cosine_precision@5": 0.18408423959951664, | |
| "eval_cosine_recall@1": 0.6407733471431037, | |
| "eval_cosine_recall@10": 0.9589159330226135, | |
| "eval_cosine_recall@3": 0.8734679785948558, | |
| "eval_cosine_recall@5": 0.9204211979975833, | |
| "eval_runtime": 467.8166, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.9063444108761329, | |
| "grad_norm": 0.032082412391901016, | |
| "learning_rate": 1.71867056735888e-05, | |
| "loss": 0.0386, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.9495036685369012, | |
| "grad_norm": 8.98410415649414, | |
| "learning_rate": 1.6946909021150067e-05, | |
| "loss": 0.0456, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.9926629261976694, | |
| "grad_norm": 0.002887778216972947, | |
| "learning_rate": 1.6707112368711332e-05, | |
| "loss": 0.0399, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.0358221838584376, | |
| "grad_norm": 0.039170317351818085, | |
| "learning_rate": 1.6467315716272604e-05, | |
| "loss": 0.0424, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.0358221838584376, | |
| "eval_cosine_accuracy@1": 0.6606248921111687, | |
| "eval_cosine_accuracy@10": 0.9654755739685827, | |
| "eval_cosine_accuracy@3": 0.8808907301916106, | |
| "eval_cosine_accuracy@5": 0.9300880372863801, | |
| "eval_cosine_map@100": 0.7789505370634054, | |
| "eval_cosine_mrr@10": 0.7772537463112309, | |
| "eval_cosine_ndcg@10": 0.8239196088222247, | |
| "eval_cosine_precision@1": 0.6606248921111687, | |
| "eval_cosine_precision@10": 0.09654755739685827, | |
| "eval_cosine_precision@3": 0.2936302433972035, | |
| "eval_cosine_precision@5": 0.186017607457276, | |
| "eval_cosine_recall@1": 0.6606248921111687, | |
| "eval_cosine_recall@10": 0.9654755739685827, | |
| "eval_cosine_recall@3": 0.8808907301916106, | |
| "eval_cosine_recall@5": 0.9300880372863801, | |
| "eval_runtime": 467.7683, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.0789814415192058, | |
| "grad_norm": 0.07316175103187561, | |
| "learning_rate": 1.622751906383387e-05, | |
| "loss": 0.0107, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.122140699179974, | |
| "grad_norm": 0.03618592023849487, | |
| "learning_rate": 1.598772241139514e-05, | |
| "loss": 0.0279, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.1652999568407423, | |
| "grad_norm": 0.023356635123491287, | |
| "learning_rate": 1.5747925758956405e-05, | |
| "loss": 0.0236, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.2084592145015105, | |
| "grad_norm": 0.002293772529810667, | |
| "learning_rate": 1.5508129106517674e-05, | |
| "loss": 0.024, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.2084592145015105, | |
| "eval_cosine_accuracy@1": 0.6506128085620576, | |
| "eval_cosine_accuracy@10": 0.9640945969273261, | |
| "eval_cosine_accuracy@3": 0.8803728638011393, | |
| "eval_cosine_accuracy@5": 0.9266355946832384, | |
| "eval_cosine_map@100": 0.7732572758885798, | |
| "eval_cosine_mrr@10": 0.7715017303313533, | |
| "eval_cosine_ndcg@10": 0.8192838549207232, | |
| "eval_cosine_precision@1": 0.6506128085620576, | |
| "eval_cosine_precision@10": 0.09640945969273261, | |
| "eval_cosine_precision@3": 0.29345762126704644, | |
| "eval_cosine_precision@5": 0.18532711893664763, | |
| "eval_cosine_recall@1": 0.6506128085620576, | |
| "eval_cosine_recall@10": 0.9640945969273261, | |
| "eval_cosine_recall@3": 0.8803728638011393, | |
| "eval_cosine_recall@5": 0.9266355946832384, | |
| "eval_runtime": 467.8783, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.2516184721622787, | |
| "grad_norm": 0.007560160476714373, | |
| "learning_rate": 1.5268332454078942e-05, | |
| "loss": 0.0143, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.2947777298230472, | |
| "grad_norm": 0.004202102776616812, | |
| "learning_rate": 1.5028535801640209e-05, | |
| "loss": 0.0118, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.3379369874838152, | |
| "grad_norm": 0.00022126469411887228, | |
| "learning_rate": 1.4788739149201479e-05, | |
| "loss": 0.0078, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.3810962451445836, | |
| "grad_norm": 0.011956814676523209, | |
| "learning_rate": 1.4548942496762745e-05, | |
| "loss": 0.023, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.3810962451445836, | |
| "eval_cosine_accuracy@1": 0.6533747626445711, | |
| "eval_cosine_accuracy@10": 0.9642672190574831, | |
| "eval_cosine_accuracy@3": 0.8826169514931814, | |
| "eval_cosine_accuracy@5": 0.9302606594165372, | |
| "eval_cosine_map@100": 0.7763076224553367, | |
| "eval_cosine_mrr@10": 0.7745393318153555, | |
| "eval_cosine_ndcg@10": 0.8216976031852626, | |
| "eval_cosine_precision@1": 0.6533747626445711, | |
| "eval_cosine_precision@10": 0.0964267219057483, | |
| "eval_cosine_precision@3": 0.2942056504977271, | |
| "eval_cosine_precision@5": 0.18605213188330738, | |
| "eval_cosine_recall@1": 0.6533747626445711, | |
| "eval_cosine_recall@10": 0.9642672190574831, | |
| "eval_cosine_recall@3": 0.8826169514931814, | |
| "eval_cosine_recall@5": 0.9302606594165372, | |
| "eval_runtime": 467.7532, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.4242555028053516, | |
| "grad_norm": 0.008947977796196938, | |
| "learning_rate": 1.4309145844324015e-05, | |
| "loss": 0.0239, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.46741476046612, | |
| "grad_norm": 0.20168237388134003, | |
| "learning_rate": 1.4069349191885282e-05, | |
| "loss": 0.0335, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.510574018126888, | |
| "grad_norm": 0.003233299357816577, | |
| "learning_rate": 1.3829552539446552e-05, | |
| "loss": 0.0119, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.5537332757876565, | |
| "grad_norm": 0.013063711114227772, | |
| "learning_rate": 1.3589755887007819e-05, | |
| "loss": 0.0411, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.5537332757876565, | |
| "eval_cosine_accuracy@1": 0.6644225789746245, | |
| "eval_cosine_accuracy@10": 0.9680649059209391, | |
| "eval_cosine_accuracy@3": 0.8898670809597791, | |
| "eval_cosine_accuracy@5": 0.9335404798895218, | |
| "eval_cosine_map@100": 0.7848911785594413, | |
| "eval_cosine_mrr@10": 0.7833323743214994, | |
| "eval_cosine_ndcg@10": 0.8292454833247894, | |
| "eval_cosine_precision@1": 0.6644225789746245, | |
| "eval_cosine_precision@10": 0.09680649059209388, | |
| "eval_cosine_precision@3": 0.2966223603199264, | |
| "eval_cosine_precision@5": 0.18670809597790436, | |
| "eval_cosine_recall@1": 0.6644225789746245, | |
| "eval_cosine_recall@10": 0.9680649059209391, | |
| "eval_cosine_recall@3": 0.8898670809597791, | |
| "eval_cosine_recall@5": 0.9335404798895218, | |
| "eval_runtime": 467.9161, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.5968925334484245, | |
| "grad_norm": 3.0231621265411377, | |
| "learning_rate": 1.3349959234569087e-05, | |
| "loss": 0.0168, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.640051791109193, | |
| "grad_norm": 0.08278048038482666, | |
| "learning_rate": 1.3110162582130355e-05, | |
| "loss": 0.0059, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.6832110487699612, | |
| "grad_norm": 0.10015950351953506, | |
| "learning_rate": 1.2870365929691622e-05, | |
| "loss": 0.0234, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.7263703064307294, | |
| "grad_norm": 2.1657984256744385, | |
| "learning_rate": 1.263056927725289e-05, | |
| "loss": 0.0184, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.7263703064307294, | |
| "eval_cosine_accuracy@1": 0.6768513723459347, | |
| "eval_cosine_accuracy@10": 0.969963749352667, | |
| "eval_cosine_accuracy@3": 0.897807698947005, | |
| "eval_cosine_accuracy@5": 0.9369929224926635, | |
| "eval_cosine_map@100": 0.7938770196077543, | |
| "eval_cosine_mrr@10": 0.7923516066188262, | |
| "eval_cosine_ndcg@10": 0.8365875778541227, | |
| "eval_cosine_precision@1": 0.6768513723459347, | |
| "eval_cosine_precision@10": 0.09699637493526668, | |
| "eval_cosine_precision@3": 0.29926923298233504, | |
| "eval_cosine_precision@5": 0.1873985844985327, | |
| "eval_cosine_recall@1": 0.6768513723459347, | |
| "eval_cosine_recall@10": 0.969963749352667, | |
| "eval_cosine_recall@3": 0.897807698947005, | |
| "eval_cosine_recall@5": 0.9369929224926635, | |
| "eval_runtime": 467.8044, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.7695295640914976, | |
| "grad_norm": 1.5666255950927734, | |
| "learning_rate": 1.2390772624814159e-05, | |
| "loss": 0.0128, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.8126888217522659, | |
| "grad_norm": 0.00032274972181767225, | |
| "learning_rate": 1.2150975972375427e-05, | |
| "loss": 0.0166, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.855848079413034, | |
| "grad_norm": 0.051935628056526184, | |
| "learning_rate": 1.1911179319936694e-05, | |
| "loss": 0.0181, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.8990073370738023, | |
| "grad_norm": 0.02546406351029873, | |
| "learning_rate": 1.1671382667497964e-05, | |
| "loss": 0.0148, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.8990073370738023, | |
| "eval_cosine_accuracy@1": 0.6744346625237355, | |
| "eval_cosine_accuracy@10": 0.9697911272225099, | |
| "eval_cosine_accuracy@3": 0.8971172104263767, | |
| "eval_cosine_accuracy@5": 0.9388917659243915, | |
| "eval_cosine_map@100": 0.792274316391964, | |
| "eval_cosine_mrr@10": 0.7907476593261165, | |
| "eval_cosine_ndcg@10": 0.8353359235071491, | |
| "eval_cosine_precision@1": 0.6744346625237355, | |
| "eval_cosine_precision@10": 0.09697911272225099, | |
| "eval_cosine_precision@3": 0.2990390701421256, | |
| "eval_cosine_precision@5": 0.1877783531848783, | |
| "eval_cosine_recall@1": 0.6744346625237355, | |
| "eval_cosine_recall@10": 0.9697911272225099, | |
| "eval_cosine_recall@3": 0.8971172104263767, | |
| "eval_cosine_recall@5": 0.9388917659243915, | |
| "eval_runtime": 467.8952, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.9421665947345705, | |
| "grad_norm": 0.009108115918934345, | |
| "learning_rate": 1.143158601505923e-05, | |
| "loss": 0.0225, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.9853258523953388, | |
| "grad_norm": 0.06883949786424637, | |
| "learning_rate": 1.1191789362620497e-05, | |
| "loss": 0.0158, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.028485110056107, | |
| "grad_norm": 0.00019052527204621583, | |
| "learning_rate": 1.0951992710181767e-05, | |
| "loss": 0.0123, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.071644367716875, | |
| "grad_norm": 0.005655207671225071, | |
| "learning_rate": 1.0712196057743034e-05, | |
| "loss": 0.0173, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.071644367716875, | |
| "eval_cosine_accuracy@1": 0.6718453305713793, | |
| "eval_cosine_accuracy@10": 0.9685827723114103, | |
| "eval_cosine_accuracy@3": 0.8934921456930779, | |
| "eval_cosine_accuracy@5": 0.9383738995339203, | |
| "eval_cosine_map@100": 0.7895192117982024, | |
| "eval_cosine_mrr@10": 0.7879250134946668, | |
| "eval_cosine_ndcg@10": 0.832874525127316, | |
| "eval_cosine_precision@1": 0.6718453305713793, | |
| "eval_cosine_precision@10": 0.09685827723114103, | |
| "eval_cosine_precision@3": 0.297830715231026, | |
| "eval_cosine_precision@5": 0.18767477990678402, | |
| "eval_cosine_recall@1": 0.6718453305713793, | |
| "eval_cosine_recall@10": 0.9685827723114103, | |
| "eval_cosine_recall@3": 0.8934921456930779, | |
| "eval_cosine_recall@5": 0.9383738995339203, | |
| "eval_runtime": 468.4558, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.1148036253776437, | |
| "grad_norm": 0.1119648739695549, | |
| "learning_rate": 1.0472399405304304e-05, | |
| "loss": 0.0167, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.1579628830384117, | |
| "grad_norm": 0.03796195238828659, | |
| "learning_rate": 1.023260275286557e-05, | |
| "loss": 0.0125, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.20112214069918, | |
| "grad_norm": 0.012651159428060055, | |
| "learning_rate": 9.992806100426838e-06, | |
| "loss": 0.013, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.244281398359948, | |
| "grad_norm": 0.0021349990274757147, | |
| "learning_rate": 9.753009447988107e-06, | |
| "loss": 0.0079, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.244281398359948, | |
| "eval_cosine_accuracy@1": 0.669255998619023, | |
| "eval_cosine_accuracy@10": 0.9709994821336095, | |
| "eval_cosine_accuracy@3": 0.8950457448644916, | |
| "eval_cosine_accuracy@5": 0.9390643880545486, | |
| "eval_cosine_map@100": 0.7897457483356454, | |
| "eval_cosine_mrr@10": 0.7882845059308039, | |
| "eval_cosine_ndcg@10": 0.8337888145070348, | |
| "eval_cosine_precision@1": 0.669255998619023, | |
| "eval_cosine_precision@10": 0.09709994821336093, | |
| "eval_cosine_precision@3": 0.29834858162149724, | |
| "eval_cosine_precision@5": 0.18781287761090973, | |
| "eval_cosine_recall@1": 0.669255998619023, | |
| "eval_cosine_recall@10": 0.9709994821336095, | |
| "eval_cosine_recall@3": 0.8950457448644916, | |
| "eval_cosine_recall@5": 0.9390643880545486, | |
| "eval_runtime": 467.762, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.2874406560207166, | |
| "grad_norm": 0.4521012306213379, | |
| "learning_rate": 9.513212795549375e-06, | |
| "loss": 0.007, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.3305999136814846, | |
| "grad_norm": 0.0015283157117664814, | |
| "learning_rate": 9.273416143110643e-06, | |
| "loss": 0.0171, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.373759171342253, | |
| "grad_norm": 0.0033215314615517855, | |
| "learning_rate": 9.033619490671912e-06, | |
| "loss": 0.0058, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.416918429003021, | |
| "grad_norm": 4.302379131317139, | |
| "learning_rate": 8.793822838233178e-06, | |
| "loss": 0.0048, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.416918429003021, | |
| "eval_cosine_accuracy@1": 0.6825479026411186, | |
| "eval_cosine_accuracy@10": 0.9718625927843949, | |
| "eval_cosine_accuracy@3": 0.8993612981184188, | |
| "eval_cosine_accuracy@5": 0.9390643880545486, | |
| "eval_cosine_map@100": 0.7983751737002095, | |
| "eval_cosine_mrr@10": 0.7969948679166703, | |
| "eval_cosine_ndcg@10": 0.8405363983140419, | |
| "eval_cosine_precision@1": 0.6825479026411186, | |
| "eval_cosine_precision@10": 0.09718625927843948, | |
| "eval_cosine_precision@3": 0.2997870993728063, | |
| "eval_cosine_precision@5": 0.18781287761090973, | |
| "eval_cosine_recall@1": 0.6825479026411186, | |
| "eval_cosine_recall@10": 0.9718625927843949, | |
| "eval_cosine_recall@3": 0.8993612981184188, | |
| "eval_cosine_recall@5": 0.9390643880545486, | |
| "eval_runtime": 467.6926, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.4600776866637895, | |
| "grad_norm": 0.001049822778441012, | |
| "learning_rate": 8.554026185794447e-06, | |
| "loss": 0.005, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.5032369443245575, | |
| "grad_norm": 0.0011170560028403997, | |
| "learning_rate": 8.314229533355715e-06, | |
| "loss": 0.0141, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.546396201985326, | |
| "grad_norm": 0.0026090971659868956, | |
| "learning_rate": 8.074432880916982e-06, | |
| "loss": 0.0132, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.5895554596460943, | |
| "grad_norm": 7.936817564768717e-05, | |
| "learning_rate": 7.83463622847825e-06, | |
| "loss": 0.006, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.5895554596460943, | |
| "eval_cosine_accuracy@1": 0.6911790091489729, | |
| "eval_cosine_accuracy@10": 0.9735888140859659, | |
| "eval_cosine_accuracy@3": 0.9092007595373727, | |
| "eval_cosine_accuracy@5": 0.9442430519592612, | |
| "eval_cosine_map@100": 0.8050289389600185, | |
| "eval_cosine_mrr@10": 0.8036913735515502, | |
| "eval_cosine_ndcg@10": 0.8461133955612519, | |
| "eval_cosine_precision@1": 0.6911790091489729, | |
| "eval_cosine_precision@10": 0.09735888140859657, | |
| "eval_cosine_precision@3": 0.3030669198457909, | |
| "eval_cosine_precision@5": 0.18884861039185225, | |
| "eval_cosine_recall@1": 0.6911790091489729, | |
| "eval_cosine_recall@10": 0.9735888140859659, | |
| "eval_cosine_recall@3": 0.9092007595373727, | |
| "eval_cosine_recall@5": 0.9442430519592612, | |
| "eval_runtime": 467.8028, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.6327147173068624, | |
| "grad_norm": 0.014025676064193249, | |
| "learning_rate": 7.5948395760395184e-06, | |
| "loss": 0.0095, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.6758739749676304, | |
| "grad_norm": 0.0240753386169672, | |
| "learning_rate": 7.355042923600787e-06, | |
| "loss": 0.0061, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.719033232628399, | |
| "grad_norm": 0.051389552652835846, | |
| "learning_rate": 7.115246271162055e-06, | |
| "loss": 0.0107, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.7621924902891672, | |
| "grad_norm": 0.0053047193214297295, | |
| "learning_rate": 6.875449618723323e-06, | |
| "loss": 0.0157, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.7621924902891672, | |
| "eval_cosine_accuracy@1": 0.689452787847402, | |
| "eval_cosine_accuracy@10": 0.9723804591748663, | |
| "eval_cosine_accuracy@3": 0.9074745382358018, | |
| "eval_cosine_accuracy@5": 0.9442430519592612, | |
| "eval_cosine_map@100": 0.8041420474637542, | |
| "eval_cosine_mrr@10": 0.8027525694667068, | |
| "eval_cosine_ndcg@10": 0.8451171490975874, | |
| "eval_cosine_precision@1": 0.689452787847402, | |
| "eval_cosine_precision@10": 0.09723804591748661, | |
| "eval_cosine_precision@3": 0.3024915127452673, | |
| "eval_cosine_precision@5": 0.18884861039185225, | |
| "eval_cosine_recall@1": 0.689452787847402, | |
| "eval_cosine_recall@10": 0.9723804591748663, | |
| "eval_cosine_recall@3": 0.9074745382358018, | |
| "eval_cosine_recall@5": 0.9442430519592612, | |
| "eval_runtime": 467.7248, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.8053517479499352, | |
| "grad_norm": 0.005983938928693533, | |
| "learning_rate": 6.635652966284592e-06, | |
| "loss": 0.005, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.8485110056107033, | |
| "grad_norm": 0.006458807270973921, | |
| "learning_rate": 6.395856313845859e-06, | |
| "loss": 0.0087, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.8916702632714717, | |
| "grad_norm": 0.00440911203622818, | |
| "learning_rate": 6.1560596614071276e-06, | |
| "loss": 0.0064, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.93482952093224, | |
| "grad_norm": 0.0034452094696462154, | |
| "learning_rate": 5.916263008968395e-06, | |
| "loss": 0.005, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.93482952093224, | |
| "eval_cosine_accuracy@1": 0.6884170550664596, | |
| "eval_cosine_accuracy@10": 0.9725530813050233, | |
| "eval_cosine_accuracy@3": 0.9083376488865873, | |
| "eval_cosine_accuracy@5": 0.9463145175211463, | |
| "eval_cosine_map@100": 0.8037708008346327, | |
| "eval_cosine_mrr@10": 0.8023887614773162, | |
| "eval_cosine_ndcg@10": 0.8449160090668899, | |
| "eval_cosine_precision@1": 0.6884170550664596, | |
| "eval_cosine_precision@10": 0.0972553081305023, | |
| "eval_cosine_precision@3": 0.30277921629552906, | |
| "eval_cosine_precision@5": 0.18926290350422922, | |
| "eval_cosine_recall@1": 0.6884170550664596, | |
| "eval_cosine_recall@10": 0.9725530813050233, | |
| "eval_cosine_recall@3": 0.9083376488865873, | |
| "eval_cosine_recall@5": 0.9463145175211463, | |
| "eval_runtime": 467.6593, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.977988778593008, | |
| "grad_norm": 1.5224103927612305, | |
| "learning_rate": 5.6764663565296625e-06, | |
| "loss": 0.0115, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 3.0211480362537766, | |
| "grad_norm": 0.007577585522085428, | |
| "learning_rate": 5.436669704090931e-06, | |
| "loss": 0.0079, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 3.0643072939145446, | |
| "grad_norm": 0.01359875500202179, | |
| "learning_rate": 5.196873051652199e-06, | |
| "loss": 0.0045, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 3.107466551575313, | |
| "grad_norm": 0.005014342721551657, | |
| "learning_rate": 4.9570763992134675e-06, | |
| "loss": 0.0029, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.107466551575313, | |
| "eval_cosine_accuracy@1": 0.6875539444156741, | |
| "eval_cosine_accuracy@10": 0.972035214914552, | |
| "eval_cosine_accuracy@3": 0.9067840497151735, | |
| "eval_cosine_accuracy@5": 0.9442430519592612, | |
| "eval_cosine_map@100": 0.8031759037555115, | |
| "eval_cosine_mrr@10": 0.8017571836836468, | |
| "eval_cosine_ndcg@10": 0.8443043752760462, | |
| "eval_cosine_precision@1": 0.6875539444156741, | |
| "eval_cosine_precision@10": 0.09720352149145518, | |
| "eval_cosine_precision@3": 0.3022613499050578, | |
| "eval_cosine_precision@5": 0.18884861039185225, | |
| "eval_cosine_recall@1": 0.6875539444156741, | |
| "eval_cosine_recall@10": 0.972035214914552, | |
| "eval_cosine_recall@3": 0.9067840497151735, | |
| "eval_cosine_recall@5": 0.9442430519592612, | |
| "eval_runtime": 467.7266, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 3.150625809236081, | |
| "grad_norm": 0.015572451055049896, | |
| "learning_rate": 4.717279746774736e-06, | |
| "loss": 0.0161, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 3.1937850668968495, | |
| "grad_norm": 0.004311546217650175, | |
| "learning_rate": 4.477483094336003e-06, | |
| "loss": 0.0144, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 3.2369443245576175, | |
| "grad_norm": 0.0009289888548664749, | |
| "learning_rate": 4.237686441897272e-06, | |
| "loss": 0.0076, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 3.280103582218386, | |
| "grad_norm": 0.0010557913919910789, | |
| "learning_rate": 3.997889789458539e-06, | |
| "loss": 0.0157, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.280103582218386, | |
| "eval_cosine_accuracy@1": 0.6977386500949422, | |
| "eval_cosine_accuracy@10": 0.9747971689970655, | |
| "eval_cosine_accuracy@3": 0.909891248058001, | |
| "eval_cosine_accuracy@5": 0.9470050060417745, | |
| "eval_cosine_map@100": 0.809749193191093, | |
| "eval_cosine_mrr@10": 0.8084805416498834, | |
| "eval_cosine_ndcg@10": 0.8499994995327701, | |
| "eval_cosine_precision@1": 0.6977386500949422, | |
| "eval_cosine_precision@10": 0.09747971689970651, | |
| "eval_cosine_precision@3": 0.30329708268600036, | |
| "eval_cosine_precision@5": 0.18940100120835487, | |
| "eval_cosine_recall@1": 0.6977386500949422, | |
| "eval_cosine_recall@10": 0.9747971689970655, | |
| "eval_cosine_recall@3": 0.909891248058001, | |
| "eval_cosine_recall@5": 0.9470050060417745, | |
| "eval_runtime": 467.9009, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 3.323262839879154, | |
| "grad_norm": 0.002490697894245386, | |
| "learning_rate": 3.7580931370198075e-06, | |
| "loss": 0.0039, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 3.3664220975399224, | |
| "grad_norm": 0.0011037011863663793, | |
| "learning_rate": 3.518296484581076e-06, | |
| "loss": 0.0045, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 3.4095813552006904, | |
| "grad_norm": 0.008491401560604572, | |
| "learning_rate": 3.2784998321423433e-06, | |
| "loss": 0.0033, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 3.452740612861459, | |
| "grad_norm": 0.0002366910339333117, | |
| "learning_rate": 3.0387031797036116e-06, | |
| "loss": 0.0064, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.452740612861459, | |
| "eval_cosine_accuracy@1": 0.6832383911617469, | |
| "eval_cosine_accuracy@10": 0.97393405834628, | |
| "eval_cosine_accuracy@3": 0.9062661833247022, | |
| "eval_cosine_accuracy@5": 0.9464871396513033, | |
| "eval_cosine_map@100": 0.8011659555812971, | |
| "eval_cosine_mrr@10": 0.7998895081365299, | |
| "eval_cosine_ndcg@10": 0.8433601615941685, | |
| "eval_cosine_precision@1": 0.6832383911617469, | |
| "eval_cosine_precision@10": 0.097393405834628, | |
| "eval_cosine_precision@3": 0.30208872777490076, | |
| "eval_cosine_precision@5": 0.18929742793026064, | |
| "eval_cosine_recall@1": 0.6832383911617469, | |
| "eval_cosine_recall@10": 0.97393405834628, | |
| "eval_cosine_recall@3": 0.9062661833247022, | |
| "eval_cosine_recall@5": 0.9464871396513033, | |
| "eval_runtime": 467.6658, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 3.495899870522227, | |
| "grad_norm": 0.0015487176133319736, | |
| "learning_rate": 2.7989065272648796e-06, | |
| "loss": 0.0054, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 3.5390591281829953, | |
| "grad_norm": 1.1207655668258667, | |
| "learning_rate": 2.559109874826148e-06, | |
| "loss": 0.0061, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 3.5822183858437633, | |
| "grad_norm": 0.0002378961944486946, | |
| "learning_rate": 2.319313222387416e-06, | |
| "loss": 0.0051, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 3.6253776435045317, | |
| "grad_norm": 0.0002853251644410193, | |
| "learning_rate": 2.0795165699486837e-06, | |
| "loss": 0.0019, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 3.6253776435045317, | |
| "eval_cosine_accuracy@1": 0.6910063870188158, | |
| "eval_cosine_accuracy@10": 0.9742793026065941, | |
| "eval_cosine_accuracy@3": 0.9109269808389435, | |
| "eval_cosine_accuracy@5": 0.9461418953909891, | |
| "eval_cosine_map@100": 0.8061197699360279, | |
| "eval_cosine_mrr@10": 0.804833419644399, | |
| "eval_cosine_ndcg@10": 0.8471731447814336, | |
| "eval_cosine_precision@1": 0.6910063870188158, | |
| "eval_cosine_precision@10": 0.09742793026065939, | |
| "eval_cosine_precision@3": 0.30364232694631454, | |
| "eval_cosine_precision@5": 0.18922837907819778, | |
| "eval_cosine_recall@1": 0.6910063870188158, | |
| "eval_cosine_recall@10": 0.9742793026065941, | |
| "eval_cosine_recall@3": 0.9109269808389435, | |
| "eval_cosine_recall@5": 0.9461418953909891, | |
| "eval_runtime": 467.6854, | |
| "eval_samples_per_second": 0.0, | |
| "eval_steps_per_second": 0.0, | |
| "step": 42000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 46340, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 2000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |