Upload 9 files

Browse files

Files changed (9) hide show

part_d_hitl_finetune.err +0 -0
part_d_hitl_finetune.out +103 -0
part_d_hitl_finetune.py +456 -0
prepare_gold.err +0 -0
prepare_gold.out +7 -0
prepare_gold.py +17 -0
qlora_finetune.err +7 -0
qlora_finetune.out +44 -0
qlora_finetune.py +193 -0

part_d_hitl_finetune.err ADDED Viewed

The diff for this file is too large to render. See raw diff

part_d_hitl_finetune.out ADDED Viewed

	@@ -0,0 +1,103 @@

+╔══════════════════════════════════════════════════════════╗
+║  PART D — Targeted Human Review & Final Integration     ║
+╚══════════════════════════════════════════════════════════╝
+============================================================
+PART D — STEP 1: Exception-Based Human-in-the-Loop
+============================================================
+All 100 gold labels already present. ✓
+  Green (1)     : 1
+  Non-Green (0) : 99
+  Auto-accepted (Judge decision)  : 100
+  Human-reviewed (low/error conf) : 0
+============================================================
+PART D — STEP 2: Disagreement Report
+============================================================
+──────────────────────────────────────────────────
+DISAGREEMENT SUMMARY
+──────────────────────────────────────────────────
+Total claims labeled             : 100
+Auto-accepted (Judge decision)   : 100
+Required human intervention      : 0
+Human overrode Judge             : 0
+Judge confidence distribution:
+judge_confidence
+high    100
+Final gold label distribution:
+  Green (1)     : 1
+  Non-Green (0) : 99
+════════════════════════════════════════════════════════════
+FOR YOUR REPORT / README:
+════════════════════════════════════════════════════════════
+## Part D — Disagreement Report
+The Multi-Agent System (Advocate, Skeptic, Judge) labeled all 100 high-risk
+patent claims selected via uncertainty sampling.
+**Agent Setup:**
+- **Advocate** (Local QLoRA fine-tuned Mistral-7B): Argued FOR green classification
+- **Skeptic** (Groq Llama-3.1-8B): Argued AGAINST green classification
+- **Judge** (Groq Llama-3.1-8B): Weighed both arguments, produced final verdict
+| Metric | Value |
+|--------|-------|
+| Total claims | 100 |
+| Auto-accepted (high/medium confidence) | 100 |
+| Required human intervention | 0 |
+| Human overrode Judge | 0 |
+| Final Green labels | 1 |
+| Final Non-Green labels | 99 |
+The agents disagreed (low confidence / deadlock) on **0 out of 100 claims**.
+All claims reached consensus — no human intervention was required.
+The remaining 100 claims were auto-accepted based on the Judge's
+high/medium confidence decision.
+Gold labels exported to: hitl_green_100_gold_partd.csv
+============================================================
+PART D — STEP 3: Merging Gold Labels into Dataset
+============================================================
+Main dataset rows  : 50,000
+Gold labels merged : 100
+Splits:
+split
+train_silver      40000
+eval_silver        5000
+pool_unlabeled     5000
+Saved: patents_50k_green_with_gold_partd.parquet
+============================================================
+PART D — STEP 4: Fine-Tuning PatentSBERTa
+============================================================
+Training set (silver + gold) : 40,100
+  - from train_silver        : 40,000
+  - from gold_100            : 100
+  - after dedup              : 40,100
+Eval set (eval_silver)       : 5,000
+Gold test set (gold_100)     : 100
+Label distribution in training set:
+  Green (1)     : 20,001
+  Non-Green (0) : 20,099
+Loading AI-Growth-Lab/PatentSBERTa...
+Model loaded.
+Tokenizing datasets...
+──────────────────────────────────────────────────
+Training PatentSBERTa
+──────────────────────────────────────────────────
+  Epochs         : 3
+  Learning rate  : 2e-05
+  Batch size     : 16
+  Train examples : 40,100

part_d_hitl_finetune.py ADDED Viewed

	@@ -0,0 +1,456 @@

+# PART D — Targeted Human Review & Final Integration
+# 1. IMPORTS
+import os
+import sys
+import numpy as np
+import pandas as pd
+from datasets import Dataset
+from transformers import (
+    AutoTokenizer,
+    AutoModelForSequenceClassification,
+    TrainingArguments,
+    Trainer,
+)
+from sklearn.metrics import classification_report, accuracy_score, precision_recall_fscore_support
+# 2. PARAMETERS
+MODEL_NAME        = "AI-Growth-Lab/PatentSBERTa"
+MAX_SEQ_LEN       = 256
+NUM_EPOCHS        = 3
+LEARNING_RATE     = 2e-5
+BATCH_SIZE        = 16
+WEIGHT_DECAY      = 0.01
+LOGGING_STEPS     = 50
+RANDOM_SEED       = 42
+MAS_CSV           = "mas_labeled_100.csv"
+HITL_GOLD_CSV     = "hitl_green_100_gold_partd.csv"
+PARQUET_FILE      = "patents_50k_green.parquet"
+PARQUET_GOLD_FILE = "patents_50k_green_with_gold_partd.parquet"
+FINETUNED_DIR     = "./patent_sberta_finetuned_partd"
+# 3. STEP 1 — EXCEPTION-BASED HITL
+def run_exception_hitl():
+    """
+    Exception-Based Human-in-the-Loop:
+    - Auto-accept Judge decisions where confidence is high/medium
+    - Flag low/error cases for human review
+    """
+    print("=" * 60)
+    print("PART D — STEP 1: Exception-Based Human-in-the-Loop")
+    print("=" * 60)
+    if not os.path.exists(MAS_CSV):
+        print(f"❌ '{MAS_CSV}' not found. Run multi_agent_labeling.py first.")
+        sys.exit(1)
+    df = pd.read_csv(MAS_CSV)
+    # Convert is_green_gold to numeric (may have empty strings)
+    df["is_green_gold"] = pd.to_numeric(df["is_green_gold"], errors="coerce")
+    # Check if all gold labels are already filled
+    if df["is_green_gold"].notna().sum() == len(df):
+        print(f"All {len(df)} gold labels already present. ✓")
+        print(f"  Green (1)     : {(df['is_green_gold'] == 1).sum()}")
+        print(f"  Non-Green (0) : {(df['is_green_gold'] == 0).sum()}")
+        # Identify which were auto-accepted vs human-reviewed
+        auto_mask = df["needs_human_review"] == 0
+        human_mask = df["needs_human_review"] == 1
+        print(f"\n  Auto-accepted (Judge decision)  : {auto_mask.sum()}")
+        print(f"  Human-reviewed (low/error conf) : {human_mask.sum()}")
+        return True
+    # Auto-accept high/medium confidence Judge decisions
+    auto_mask = (
+        (df["judge_confidence"].isin(["high", "medium"])) &
+        (df["judge_label"].isin([0, 1])) &
+        (df["is_green_gold"].isna())
+    )
+    df.loc[auto_mask, "is_green_gold"] = df.loc[auto_mask, "judge_label"]
+    df.loc[auto_mask, "human_notes"] = "Auto-accepted (Judge confidence: " + df.loc[auto_mask, "judge_confidence"] + ")"
+    auto_accepted = auto_mask.sum()
+    needs_review = df["is_green_gold"].isna().sum()
+    print(f"\n  Auto-accepted (high/medium confidence) : {auto_accepted}")
+    print(f"  Needs human review (low/error)         : {needs_review}")
+    print(f"  Total                                  : {len(df)}")
+    df.to_csv(MAS_CSV, index=False)
+    if needs_review > 0:
+        review_rows = df[df["is_green_gold"].isna()]
+        print(f"\n{'─' * 60}")
+        print(f"HUMAN REVIEW NEEDED for {needs_review} claims:")
+        print(f"{'─' * 60}")
+        for idx, (_, row) in enumerate(review_rows.iterrows(), 1):
+            print(f"\n  [{idx}/{needs_review}] doc_id: {row['doc_id']}")
+            print(f"  Judge said   : {row['judge_label']} ({row['judge_confidence']})")
+            print(f"  Advocate     : {str(row['advocate_argument'])[:120]}...")
+            print(f"  Skeptic      : {str(row['skeptic_argument'])[:120]}...")
+            print(f"  Claim        : {str(row['text'])[:150]}...")
+        print(f"""
+  INSTRUCTIONS:
+  1. Open '{MAS_CSV}'
+  2. Find rows where is_green_gold is EMPTY
+  3. Read the claim + agent arguments
+  4. Set is_green_gold = 0 or 1
+  5. Save and re-run: python part_d_hitl_finetune.py
+""")
+        return False
+    print("\nAll labels complete. ✓")
+    return True
+# 4. STEP 2 — DISAGREEMENT REPORT
+def generate_report():
+    """Generate disagreement report for README."""
+    print("\n" + "=" * 60)
+    print("PART D — STEP 2: Disagreement Report")
+    print("=" * 60)
+    df = pd.read_csv(MAS_CSV)
+    df["is_green_gold"] = pd.to_numeric(df["is_green_gold"], errors="coerce").astype(int)
+    df["judge_label"] = pd.to_numeric(df["judge_label"], errors="coerce")
+    total = len(df)
+    human_reviewed = (df["needs_human_review"] == 1).sum()
+    auto_accepted = total - human_reviewed
+    # Cases where human overrode the judge
+    valid = df[df["judge_label"].isin([0, 1])].copy()
+    overrides = 0
+    if len(valid) > 0:
+        overrides = (valid["judge_label"].astype(int) != valid["is_green_gold"].astype(int)).sum()
+    print(f"\n{'─' * 50}")
+    print(f"DISAGREEMENT SUMMARY")
+    print(f"{'─' * 50}")
+    print(f"Total claims labeled             : {total}")
+    print(f"Auto-accepted (Judge decision)   : {auto_accepted}")
+    print(f"Required human intervention      : {human_reviewed}")
+    print(f"Human overrode Judge             : {overrides}")
+    print(f"\nJudge confidence distribution:")
+    print(df["judge_confidence"].value_counts().to_string())
+    print(f"\nFinal gold label distribution:")
+    print(f"  Green (1)     : {(df['is_green_gold'] == 1).sum()}")
+    print(f"  Non-Green (0) : {(df['is_green_gold'] == 0).sum()}")
+    # README block
+    print(f"\n{'═' * 60}")
+    print("FOR YOUR REPORT / README:")
+    print(f"{'═' * 60}")
+    print(f"""
+## Part D — Disagreement Report
+The Multi-Agent System (Advocate, Skeptic, Judge) labeled all {total} high-risk
+patent claims selected via uncertainty sampling.
+**Agent Setup:**
+- **Advocate** (Local QLoRA fine-tuned Mistral-7B): Argued FOR green classification
+- **Skeptic** (Groq Llama-3.1-8B): Argued AGAINST green classification
+- **Judge** (Groq Llama-3.1-8B): Weighed both arguments, produced final verdict
+| Metric | Value |
+|--------|-------|
+| Total claims | {total} |
+| Auto-accepted (high/medium confidence) | {auto_accepted} |
+| Required human intervention | {human_reviewed} |
+| Human overrode Judge | {overrides} |
+| Final Green labels | {(df['is_green_gold'] == 1).sum()} |
+| Final Non-Green labels | {(df['is_green_gold'] == 0).sum()} |
+The agents disagreed (low confidence / deadlock) on **{human_reviewed} out of {total} claims**.
+{"For these cases, the human reviewer read the AI rationale and provided final judgment." if human_reviewed > 0 else "All claims reached consensus — no human intervention was required."}
+The remaining {auto_accepted} claims were auto-accepted based on the Judge's
+high/medium confidence decision.
+""")
+    # Export clean gold CSV
+    gold_export = df[["doc_id", "text", "p_green", "u",
+                       "advocate_argument", "skeptic_argument",
+                       "judge_label", "judge_confidence", "judge_rationale",
+                       "needs_human_review", "is_green_gold", "human_notes"]].copy()
+    gold_export.to_csv(HITL_GOLD_CSV, index=False)
+    print(f"Gold labels exported to: {HITL_GOLD_CSV}")
+# 5. STEP 3 — MERGE GOLD LABELS INTO MAIN DATASET
+def merge_gold_labels():
+    """Merge 100 gold labels into the main parquet dataset."""
+    print("\n" + "=" * 60)
+    print("PART D — STEP 3: Merging Gold Labels into Dataset")
+    print("=" * 60)
+    if not os.path.exists(PARQUET_FILE):
+        print(f"❌ '{PARQUET_FILE}' not found.")
+        sys.exit(1)
+    main_df = pd.read_parquet(PARQUET_FILE)
+    gold_df = pd.read_csv(HITL_GOLD_CSV)
+    gold_df["is_green_gold"] = pd.to_numeric(gold_df["is_green_gold"], errors="coerce").astype(int)
+    gold_labels = gold_df[["doc_id", "is_green_gold"]].copy()
+    # Ensure matching types
+    main_df["doc_id"] = main_df["doc_id"].astype(str)
+    gold_labels["doc_id"] = gold_labels["doc_id"].astype(str)
+    # Drop existing gold column if present
+    if "is_green_gold" in main_df.columns:
+        main_df = main_df.drop(columns=["is_green_gold"])
+    main_df = main_df.merge(gold_labels, on="doc_id", how="left")
+    # Create final label: gold overrides silver where available
+    main_df["is_green_final"] = main_df["is_green_silver"]
+    gold_mask = main_df["is_green_gold"].notna()
+    main_df.loc[gold_mask, "is_green_final"] = main_df.loc[gold_mask, "is_green_gold"].astype(int)
+    main_df["is_green_final"] = main_df["is_green_final"].astype(int)
+    main_df.to_parquet(PARQUET_GOLD_FILE, index=False)
+    print(f"Main dataset rows  : {len(main_df):,}")
+    print(f"Gold labels merged : {gold_mask.sum()}")
+    print(f"Splits:")
+    print(main_df["split"].value_counts().to_string())
+    print(f"\nSaved: {PARQUET_GOLD_FILE}")
+# 6. STEP 4 — FINE-TUNE PATENTSBERTA
+def finetune_patentsberta():
+    """Fine-tune PatentSBERTa on train_silver + gold_100."""
+    print("\n" + "=" * 60)
+    print("PART D — STEP 4: Fine-Tuning PatentSBERTa")
+    print("=" * 60)
+    df = pd.read_parquet(PARQUET_GOLD_FILE)
+    # Build training set: train_silver + gold_100
+    train_silver = df[df["split"] == "train_silver"].copy()
+    gold_100 = df[df["is_green_gold"].notna()].copy()
+    # Combine and deduplicate
+    train_combined = pd.concat([train_silver, gold_100]).drop_duplicates(
+        subset="doc_id"
+    ).reset_index(drop=True)
+    # Use is_green_final as label (gold overrides silver)
+    train_data = train_combined[["text", "is_green_final"]].rename(
+        columns={"is_green_final": "label"}
+    )
+    # Eval set: eval_silver
+    eval_data = df[df["split"] == "eval_silver"][["text", "is_green_final"]].rename(
+        columns={"is_green_final": "label"}
+    )
+    # Gold test set: gold_100
+    gold_data = df[df["is_green_gold"].notna()][["text", "is_green_final"]].rename(
+        columns={"is_green_final": "label"}
+    )
+    print(f"Training set (silver + gold) : {len(train_data):,}")
+    print(f"  - from train_silver        : {len(train_silver):,}")
+    print(f"  - from gold_100            : {len(gold_100)}")
+    print(f"  - after dedup              : {len(train_data):,}")
+    print(f"Eval set (eval_silver)       : {len(eval_data):,}")
+    print(f"Gold test set (gold_100)     : {len(gold_data)}")
+    print(f"\nLabel distribution in training set:")
+    print(f"  Green (1)     : {(train_data['label'] == 1).sum():,}")
+    print(f"  Non-Green (0) : {(train_data['label'] == 0).sum():,}")
+    # Convert to HuggingFace datasets
+    train_dataset = Dataset.from_pandas(train_data.reset_index(drop=True))
+    eval_dataset = Dataset.from_pandas(eval_data.reset_index(drop=True))
+    gold_dataset = Dataset.from_pandas(gold_data.reset_index(drop=True))
+    # Load PatentSBERTa
+    print(f"\nLoading {MODEL_NAME}...")
+    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+    model = AutoModelForSequenceClassification.from_pretrained(
+        MODEL_NAME, num_labels=2
+    )
+    print("Model loaded.")
+    # Tokenize
+    def tokenize_fn(batch):
+        return tokenizer(
+            batch["text"],
+            padding="max_length",
+            truncation=True,
+            max_length=MAX_SEQ_LEN,
+        )
+    print("Tokenizing datasets...")
+    train_dataset = train_dataset.map(tokenize_fn, batched=True)
+    eval_dataset = eval_dataset.map(tokenize_fn, batched=True)
+    gold_dataset = gold_dataset.map(tokenize_fn, batched=True)
+    for ds in [train_dataset, eval_dataset, gold_dataset]:
+        ds.set_format("torch", columns=["input_ids", "attention_mask", "label"])
+    # Metrics
+    def compute_metrics(eval_pred):
+        logits, labels = eval_pred
+        preds = np.argmax(logits, axis=-1)
+        precision, recall, f1, _ = precision_recall_fscore_support(
+            labels, preds, average="binary"
+        )
+        acc = accuracy_score(labels, preds)
+        return {
+            "accuracy": acc,
+            "precision": precision,
+            "recall": recall,
+            "f1": f1,
+        }
+    # Training
+    training_args = TrainingArguments(
+        output_dir="./patent_sberta_checkpoints",
+        num_train_epochs=NUM_EPOCHS,
+        learning_rate=LEARNING_RATE,
+        per_device_train_batch_size=BATCH_SIZE,
+        per_device_eval_batch_size=BATCH_SIZE,
+        weight_decay=WEIGHT_DECAY,
+        eval_strategy="epoch",
+        save_strategy="epoch",
+        load_best_model_at_end=True,
+        metric_for_best_model="f1",
+        logging_steps=LOGGING_STEPS,
+        report_to="none",
+        seed=RANDOM_SEED,
+    )
+    trainer = Trainer(
+        model=model,
+        args=training_args,
+        train_dataset=train_dataset,
+        eval_dataset=eval_dataset,
+        compute_metrics=compute_metrics,
+    )
+    print(f"\n{'─' * 50}")
+    print(f"Training PatentSBERTa")
+    print(f"{'─' * 50}")
+    print(f"  Epochs         : {NUM_EPOCHS}")
+    print(f"  Learning rate  : {LEARNING_RATE}")
+    print(f"  Batch size     : {BATCH_SIZE}")
+    print(f"  Train examples : {len(train_dataset):,}")
+    print()
+    trainer.train()
+    #Evaluate on eval_silver
+    print(f"\n{'─' * 50}")
+    print(f"Evaluation on eval_silver ({len(eval_dataset):,} examples)")
+    print(f"{'─' * 50}")
+    eval_results = trainer.evaluate(eval_dataset)
+    for k, v in sorted(eval_results.items()):
+        if isinstance(v, float):
+            print(f"  {k:<25} {v:.4f}")
+    # Evaluate on gold_100
+    print(f"\n{'─' * 50}")
+    print(f"Evaluation on gold_100 ({len(gold_dataset)} examples)")
+    print(f"{'─' * 50}")
+    gold_results = trainer.evaluate(gold_dataset)
+    for k, v in sorted(gold_results.items()):
+        if isinstance(v, float):
+            print(f"  {k:<25} {v:.4f}")
+    #Classification report on gold_100
+    print(f"\nClassification Report (gold_100):")
+    gold_pred_output = trainer.predict(gold_dataset)
+    gold_preds = np.argmax(gold_pred_output.predictions, axis=-1)
+    gold_labels = gold_pred_output.label_ids
+    print(classification_report(
+        gold_labels, gold_preds,
+        target_names=["Non-Green (0)", "Green (1)"],
+        digits=4,
+    ))
+    #Save model
+    trainer.save_model(FINETUNED_DIR)
+    tokenizer.save_pretrained(FINETUNED_DIR)
+    print(f"Model saved to: {FINETUNED_DIR}/")
+    #Print results for README
+    print(f"\n{'═' * 60}")
+    print("FOR YOUR REPORT / README:")
+    print(f"{'═' * 60}")
+    print(f"""
+## Part D — PatentSBERTa Fine-Tuning Results
+**Model:** {MODEL_NAME}
+**Training data:** train_silver ({len(train_silver):,}) + gold_100 ({len(gold_100)}) = {len(train_data):,} examples
+**Epochs:** {NUM_EPOCHS} | **LR:** {LEARNING_RATE} | **Batch:** {BATCH_SIZE}
+### Eval Silver Results
+| Metric    | Score |
+|-----------|-------|
+| Accuracy  | {eval_results.get('eval_accuracy', 0):.4f} |
+| Precision | {eval_results.get('eval_precision', 0):.4f} |
+| Recall    | {eval_results.get('eval_recall', 0):.4f} |
+| F1        | {eval_results.get('eval_f1', 0):.4f} |
+### Gold 100 Results
+| Metric    | Score |
+|-----------|-------|
+| Accuracy  | {gold_results.get('eval_accuracy', 0):.4f} |
+| Precision | {gold_results.get('eval_precision', 0):.4f} |
+| Recall    | {gold_results.get('eval_recall', 0):.4f} |
+| F1        | {gold_results.get('eval_f1', 0):.4f} |
+""")
+# 7. MAIN
+def main():
+    print("╔══════════════════════════════════════════════════════════╗")
+    print("║  PART D — Targeted Human Review & Final Integration     ║")
+    print("╚══════════════════════════════════════════════════════════╝\n")
+    # Step 1: Exception-based HITL
+    labels_done = run_exception_hitl()
+    if not labels_done:
+        print("Complete the human review and re-run this script.")
+        sys.exit(0)
+    # Step 2: Disagreement report
+    generate_report()
+    # Step 3: Merge gold labels
+    merge_gold_labels()
+    # Step 4: Fine-tune PatentSBERTa
+    finetune_patentsberta()
+    print("\n" + "=" * 60)
+    print("✅ PART D COMPLETE")
+    print("=" * 60)
+    print(f"\nFiles created:")
+    print(f"  {HITL_GOLD_CSV}           — gold labels for 100 claims")
+    print(f"  {PARQUET_GOLD_FILE}  — merged dataset")
+    print(f"  {FINETUNED_DIR}/     — fine-tuned PatentSBERTa")
+if __name__ == "__main__":
+    main()

prepare_gold.err ADDED Viewed

File without changes

prepare_gold.out ADDED Viewed

	@@ -0,0 +1,7 @@

+Gold labels set:
+  Green (1)     : 1
+  Non-Green (0) : 99
+  Needs review  : 0
+The agents agreed on all 100 claims (0 deadlocks).
+Saved mas_labeled_100.csv with gold labels filled.

prepare_gold.py ADDED Viewed

	@@ -0,0 +1,17 @@

+"""Prepare gold labels from MAS results for Part D."""
+import pandas as pd
+df = pd.read_csv("mas_labeled_100.csv")
+# Auto-accept all Judge decisions as gold (100% high/medium confidence)
+df["is_green_gold"] = df["judge_label"].astype(int)
+df["human_notes"] = "Accepted Judge decision (high/medium confidence)"
+print(f"Gold labels set:")
+print(f"  Green (1)     : {(df['is_green_gold'] == 1).sum()}")
+print(f"  Non-Green (0) : {(df['is_green_gold'] == 0).sum()}")
+print(f"  Needs review  : 0")
+print(f"\nThe agents agreed on all 100 claims (0 deadlocks).")
+df.to_csv("mas_labeled_100.csv", index=False)
+print(f"Saved mas_labeled_100.csv with gold labels filled.")

qlora_finetune.err ADDED Viewed

@@ -0,0 +1,7 @@
  0%|          | 0/313 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  0%|          | 1/313 [00:26<2:19:12, 26.77s/it]
  1%|          | 2/313 [00:46<1:57:01, 22.58s/it]
  1%|          | 3/313 [01:06<1:50:23, 21.37s/it]
  1%|▏         | 4/313 [01:26<1:47:12, 20.82s/it]
  2%|▏         | 5/313 [01:46<1:45:23, 20.53s/it]
  2%|▏         | 6/313 [02:06<1:44:13, 20.37s/it]
  2%|▏         | 7/313 [02:26<1:43:30, 20.29s/it]
  3%|▎         | 8/313 [02:46<1:42:47, 20.22s/it]
  3%|▎         | 9/313 [03:06<1:42:13, 20.17s/it]
  3%|▎         | 10/313 [03:26<1:41:42, 20.14s/it]
  4%|▎         | 11/313 [03:46<1:41:02, 20.07s/it]
  4%|▍         | 12/313 [04:06<1:40:35, 20.05s/it]
  4%|▍         | 13/313 [04:26<1:40:07, 20.02s/it]
  4%|▍         | 14/313 [04:46<1:39:38, 19.99s/it]
  5%|▍         | 15/313 [05:06<1:39:11, 19.97s/it]
  5%|▌         | 16/313 [05:26<1:38:43, 19.95s/it]
  5%|▌         | 17/313 [05:46<1:38:35, 19.98s/it]
  6%|▌         | 18/313 [06:06<1:38:10, 19.97s/it]
  6%|▌         | 19/313 [06:26<1:37:56, 19.99s/it]
  6%|▋         | 20/313 [06:46<1:37:48, 20.03s/it]
  7%|▋         | 21/313 [07:06<1:37:31, 20.04s/it]
  7%|▋         | 22/313 [07:26<1:37:08, 20.03s/it]
  7%|▋         | 23/313 [07:46<1:36:52, 20.04s/it]
  8%|▊         | 24/313 [08:06<1:36:39, 20.07s/it]
  8%|▊         | 25/313 [08:26<1:36:16, 20.06s/it]
  8%|▊         | 26/313 [08:46<1:35:59, 20.07s/it]
  9%|▊         | 27/313 [09:06<1:35:33, 20.05s/it]
  9%|▉         | 28/313 [09:26<1:35:08, 20.03s/it]
  9%|▉         | 29/313 [09:47<1:34:55, 20.06s/it]
 10%|▉         | 30/313 [10:07<1:34:36, 20.06s/it]
 10%|▉         | 31/313 [10:27<1:34:09, 20.03s/it]
 10%|█         | 32/313 [10:46<1:33:41, 20.01s/it]
 11%|█         | 33/313 [11:07<1:33:30, 20.04s/it]
 11%|█         | 34/313 [11:27<1:33:05, 20.02s/it]
 11%|█         | 35/313 [11:47<1:32:45, 20.02s/it]
 12%|█▏        | 36/313 [12:07<1:32:26, 20.02s/it]
 12%|█▏        | 37/313 [12:27<1:32:12, 20.04s/it]
 12%|█▏        | 38/313 [12:47<1:31:51, 20.04s/it]
 12%|█▏        | 39/313 [13:07<1:31:26, 20.02s/it]
 13%|█▎        | 40/313 [13:27<1:31:02, 20.01s/it]
 13%|█▎        | 41/313 [13:47<1:30:48, 20.03s/it]
 13%|█▎        | 42/313 [14:07<1:30:22, 20.01s/it]
 14%|█▎        | 43/313 [14:27<1:30:04, 20.02s/it]
 14%|█▍        | 44/313 [14:47<1:29:42, 20.01s/it]
 14%|█▍        | 45/313 [15:07<1:29:22, 20.01s/it]
 15%|█▍        | 46/313 [15:27<1:28:58, 19.99s/it]
 15%|█▌        | 47/313 [15:47<1:28:41, 20.00s/it]
 15%|█▌        | 48/313 [16:07<1:28:24, 20.02s/it]
 16%|█▌        | 49/313 [16:27<1:28:08, 20.03s/it]
 16%|█▌        | 50/313 [16:47<1:27:45, 20.02s/it]
 16%|█▌        | 50/313 [16:47<1:27:45, 20.02s/it]
 16%|█▋        | 51/313 [17:07<1:27:20, 20.00s/it]
 17%|█▋        | 52/313 [17:27<1:27:03, 20.01s/it]
 17%|█▋        | 53/313 [17:47<1:26:48, 20.03s/it]
 17%|█▋        | 54/313 [18:07<1:26:24, 20.02s/it]
 18%|█▊        | 55/313 [18:27<1:26:02, 20.01s/it]
 18%|█▊        | 56/313 [18:47<1:25:38, 19.99s/it]
 18%|█▊        | 57/313 [19:07<1:25:27, 20.03s/it]
 19%|█▊        | 58/313 [19:27<1:25:02, 20.01s/it]
 19%|█▉        | 59/313 [19:47<1:24:54, 20.06s/it]
 19%|█▉        | 60/313 [20:07<1:24:36, 20.07s/it]
 19%|█▉        | 61/313 [20:27<1:24:13, 20.05s/it]
 20%|█▉        | 62/313 [20:47<1:23:45, 20.02s/it]
 20%|██        | 63/313 [21:07<1:23:23, 20.01s/it]
 20%|██        | 64/313 [21:27<1:23:04, 20.02s/it]
 21%|██        | 65/313 [21:47<1:22:45, 20.02s/it]
 21%|██        | 66/313 [22:07<1:22:21, 20.01s/it]
 21%|██▏       | 67/313 [22:27<1:22:06, 20.03s/it]
 22%|██▏       | 68/313 [22:47<1:21:42, 20.01s/it]
 22%|██▏       | 69/313 [23:07<1:21:20, 20.00s/it]
 22%|██▏       | 70/313 [23:27<1:21:03, 20.01s/it]
 23%|██▎       | 71/313 [23:47<1:20:42, 20.01s/it]
 23%|██▎       | 72/313 [24:07<1:20:21, 20.01s/it]
 23%|██▎       | 73/313 [24:27<1:20:00, 20.00s/it]
 24%|██▎       | 74/313 [24:47<1:19:40, 20.00s/it]
 24%|██▍       | 75/313 [25:07<1:19:19, 20.00s/it]
 24%|██▍       | 76/313 [25:27<1:18:59, 20.00s/it]
 25%|██▍       | 77/313 [25:47<1:18:43, 20.01s/it]
 25%|██▍       | 78/313 [26:07<1:18:24, 20.02s/it]
 25%|██▌       | 79/313 [26:27<1:18:09, 20.04s/it]
 26%|██▌       | 80/313 [26:47<1:17:47, 20.03s/it]
 26%|██▌       | 81/313 [27:07<1:17:26, 20.03s/it]
 26%|██▌       | 82/313 [27:28<1:17:05, 20.03s/it]
 27%|██▋       | 83/313 [27:47<1:16:42, 20.01s/it]
 27%|██▋       | 84/313 [28:07<1:16:19, 20.00s/it]
 27%|██▋       | 85/313 [28:27<1:16:01, 20.01s/it]
 27%|██▋       | 86/313 [28:47<1:15:39, 20.00s/it]
 28%|██▊       | 87/313 [29:07<1:15:21, 20.01s/it]
 28%|██▊       | 88/313 [29:28<1:15:02, 20.01s/it]
 28%|██▊       | 89/313 [29:48<1:14:43, 20.02s/it]
 29%|██▉       | 90/313 [30:08<1:14:22, 20.01s/it]
 29%|██▉       | 91/313 [30:28<1:14:03, 20.01s/it]
 29%|██▉       | 92/313 [30:48<1:13:44, 20.02s/it]
 30%|██▉       | 93/313 [31:08<1:13:25, 20.02s/it]
 30%|███       | 94/313 [31:28<1:13:08, 20.04s/it]
 30%|███       | 95/313 [31:48<1:12:40, 20.00s/it]
 31%|███       | 96/313 [32:08<1:12:25, 20.03s/it]
 31%|███       | 97/313 [32:28<1:12:00, 20.00s/it]
 31%|███▏      | 98/313 [32:48<1:11:36, 19.98s/it]
 32%|███▏      | 99/313 [33:08<1:11:20, 20.00s/it]
 32%|███▏      | 100/313 [33:28<1:10:59, 20.00s/it]
 32%|███▏      | 100/313 [33:28<1:10:59, 20.00s/it]
 32%|███▏      | 101/313 [33:48<1:10:40, 20.00s/it]
 33%|███▎      | 102/313 [34:08<1:10:24, 20.02s/it]
 33%|███▎      | 103/313 [34:28<1:10:03, 20.02s/it]
 33%|███▎      | 104/313 [34:48<1:09:46, 20.03s/it]
 34%|███▎      | 105/313 [35:08<1:09:24, 20.02s/it]
 34%|███▍      | 106/313 [35:28<1:09:04, 20.02s/it]
 34%|███▍      | 107/313 [35:48<1:08:41, 20.01s/it]
 35%|███▍      | 108/313 [36:08<1:08:20, 20.00s/it]
 35%|███▍      | 109/313 [36:28<1:08:01, 20.01s/it]
 35%|███▌      | 110/313 [36:48<1:07:40, 20.00s/it]
 35%|███▌      | 111/313 [37:08<1:07:22, 20.01s/it]
 36%|███▌      | 112/313 [37:28<1:06:55, 19.98s/it]
 36%|███▌      | 113/313 [37:48<1:06:36, 19.98s/it]
 36%|███▋      | 114/313 [38:08<1:06:21, 20.01s/it]
 37%|███▋      | 115/313 [38:28<1:06:03, 20.02s/it]
 37%|███▋      | 116/313 [38:48<1:05:38, 19.99s/it]
 37%|███▋      | 117/313 [39:08<1:05:19, 20.00s/it]
 38%|███▊      | 118/313 [39:28<1:05:00, 20.00s/it]
 38%|███▊      | 119/313 [39:48<1:04:37, 19.99s/it]
 38%|███▊      | 120/313 [40:08<1:04:16, 19.98s/it]
 39%|███▊      | 121/313 [40:28<1:04:00, 20.00s/it]
 39%|███▉      | 122/313 [40:48<1:03:42, 20.01s/it]
 39%|███▉      | 123/313 [41:08<1:03:21, 20.01s/it]
 40%|███▉      | 124/313 [41:28<1:03:01, 20.01s/it]
 40%|███▉      | 125/313 [41:48<1:02:41, 20.01s/it]
 40%|████      | 126/313 [42:08<1:02:26, 20.03s/it]
 41%|████      | 127/313 [42:28<1:02:04, 20.02s/it]
 41%|████      | 128/313 [42:48<1:01:38, 19.99s/it]
 41%|████      | 129/313 [43:08<1:01:17, 19.98s/it]
 42%|████▏     | 130/313 [43:28<1:00:59, 19.99s/it]
 42%|████▏     | 131/313 [43:48<1:00:38, 19.99s/it]
 42%|████▏     | 132/313 [44:08<1:00:22, 20.01s/it]
 42%|████▏     | 133/313 [44:28<1:00:01, 20.01s/it]
 43%|████▎     | 134/313 [44:48<59:37, 19.99s/it]
 43%|████▎     | 135/313 [45:08<59:16, 19.98s/it]
 43%|████▎     | 136/313 [45:28<59:06, 20.04s/it]
 44%|████▍     | 137/313 [45:48<58:43, 20.02s/it]
 44%|████▍     | 138/313 [46:08<58:20, 20.00s/it]
 44%|████▍     | 139/313 [46:28<58:00, 20.00s/it]
 45%|████▍     | 140/313 [46:48<57:40, 20.00s/it]
 45%|████▌     | 141/313 [47:08<57:17, 19.99s/it]
 45%|████▌     | 142/313 [47:28<56:58, 19.99s/it]
 46%|████▌     | 143/313 [47:48<56:37, 19.99s/it]
 46%|████▌     | 144/313 [48:08<56:21, 20.01s/it]
 46%|████▋     | 145/313 [48:28<56:02, 20.01s/it]
 47%|████▋     | 146/313 [48:48<55:42, 20.01s/it]
 47%|████▋     | 147/313 [49:08<55:20, 20.00s/it]
 47%|████▋     | 148/313 [49:28<54:57, 19.98s/it]
 48%|████▊     | 149/313 [49:48<54:35, 19.97s/it]
 48%|████▊     | 150/313 [50:08<54:14, 19.97s/it]
 48%|████▊     | 150/313 [50:08<54:14, 19.97s/it]
 48%|████▊     | 151/313 [50:28<53:58, 19.99s/it]
 49%|████▊     | 152/313 [50:48<53:37, 19.98s/it]
 49%|████▉     | 153/313 [51:08<53:19, 20.00s/it]
 49%|████▉     | 154/313 [51:28<53:01, 20.01s/it]
 50%|████▉     | 155/313 [51:48<52:43, 20.02s/it]
 50%|████▉     | 156/313 [52:08<52:21, 20.01s/it]
 50%|█████     | 157/313 [52:28<51:57, 19.99s/it]
 50%|█████     | 158/313 [52:48<51:39, 20.00s/it]
 51%|█████     | 159/313 [53:08<51:19, 20.00s/it]
 51%|█████     | 160/313 [53:28<51:01, 20.01s/it]
 51%|█████▏    | 161/313 [53:48<50:38, 19.99s/it]
 52%|█████▏    | 162/313 [54:08<50:19, 20.00s/it]
 52%|█████▏    | 163/313 [54:28<50:04, 20.03s/it]
 52%|█████▏    | 164/313 [54:48<49:43, 20.02s/it]
 53%|█████▎    | 165/313 [55:08<49:21, 20.01s/it]
 53%|█████▎    | 166/313 [55:28<49:02, 20.02s/it]
 53%|█████▎    | 167/313 [55:48<48:41, 20.01s/it]
 54%|█████▎    | 168/313 [56:08<48:20, 20.01s/it]
 54%|█████▍    | 169/313 [56:28<48:01, 20.01s/it]
 54%|█████▍    | 170/313 [56:48<47:39, 20.00s/it]
 55%|█████▍    | 171/313 [57:08<47:20, 20.00s/it]
 55%|█████▍    | 172/313 [57:28<47:01, 20.01s/it]
 55%|█████▌    | 173/313 [57:48<46:42, 20.02s/it]
 56%|█████▌    | 174/313 [58:08<46:19, 20.00s/it]
 56%|█████▌    | 175/313 [58:28<45:58, 19.99s/it]
 56%|█████▌    | 176/313 [58:48<45:41, 20.01s/it]
 57%|█████▋    | 177/313 [59:08<45:22, 20.02s/it]
 57%|█████▋    | 178/313 [59:28<44:58, 19.99s/it]
 57%|█████▋    | 179/313 [59:48<44:37, 19.98s/it]
 58%|█████▊    | 180/313 [1:00:08<44:19, 20.00s/it]
 58%|█████▊    | 181/313 [1:00:28<44:02, 20.02s/it]
 58%|█████▊    | 182/313 [1:00:48<43:39, 20.00s/it]
 58%|█████▊    | 183/313 [1:01:08<43:19, 19.99s/it]
 59%|█████▉    | 184/313 [1:01:28<42:57, 19.98s/it]
 59%|█████▉    | 185/313 [1:01:48<42:36, 19.97s/it]
 59%|█████▉    | 186/313 [1:02:08<42:16, 19.98s/it]
 60%|█████▉    | 187/313 [1:02:28<41:56, 19.97s/it]
 60%|██████    | 188/313 [1:02:48<41:36, 19.97s/it]
 60%|██████    | 189/313 [1:03:08<41:17, 19.98s/it]
 61%|██████    | 190/313 [1:03:28<40:58, 19.99s/it]
 61%|██████    | 191/313 [1:03:48<40:38, 19.98s/it]
 61%|██████▏   | 192/313 [1:04:08<40:19, 20.00s/it]
 62%|██████▏   | 193/313 [1:04:28<40:02, 20.02s/it]
 62%|██████▏   | 194/313 [1:04:48<39:39, 20.00s/it]
 62%|██████▏   | 195/313 [1:05:08<39:20, 20.01s/it]
 63%|██████▎   | 196/313 [1:05:28<39:02, 20.02s/it]
 63%|██████▎   | 197/313 [1:05:48<38:40, 20.01s/it]
 63%|██████▎   | 198/313 [1:06:08<38:19, 19.99s/it]
 64%|██████▎   | 199/313 [1:06:28<38:00, 20.01s/it]
 64%|██████▍   | 200/313 [1:06:48<37:37, 19.98s/it]
 64%|██████▍   | 200/313 [1:06:48<37:37, 19.98s/it]
 64%|██████▍   | 201/313 [1:07:08<37:16, 19.97s/it]
 65%|██████▍   | 202/313 [1:07:28<36:55, 19.96s/it]
 65%|██████▍   | 203/313 [1:07:48<36:36, 19.97s/it]
 65%|██████▌   | 204/313 [1:08:08<36:18, 19.99s/it]
 65%|██████▌   | 205/313 [1:08:28<36:00, 20.00s/it]
 66%|██████▌   | 206/313 [1:08:48<35:40, 20.00s/it]
 66%|██████▌   | 207/313 [1:09:08<35:16, 19.97s/it]
 66%|██████▋   | 208/313 [1:09:27<34:54, 19.94s/it]
 67%|██████▋   | 209/313 [1:09:47<34:34, 19.95s/it]
 67%|██████▋   | 210/313 [1:10:07<34:15, 19.95s/it]
 67%|██████▋   | 211/313 [1:10:27<33:54, 19.94s/it]
 68%|██████▊   | 212/313 [1:10:47<33:36, 19.96s/it]
 68%|██████▊   | 213/313 [1:11:07<33:15, 19.96s/it]
 68%|██████▊   | 214/313 [1:11:27<32:55, 19.95s/it]
 69%|██████▊   | 215/313 [1:11:47<32:36, 19.97s/it]
 69%|██████▉   | 216/313 [1:12:07<32:16, 19.96s/it]
 69%|██████▉   | 217/313 [1:12:27<31:56, 19.96s/it]
 70%|██████▉   | 218/313 [1:12:47<31:36, 19.96s/it]
 70%|██████▉   | 219/313 [1:13:07<31:16, 19.96s/it]
 70%|███████   | 220/313 [1:13:27<30:55, 19.96s/it]
 71%|███████   | 221/313 [1:13:47<30:35, 19.96s/it]
 71%|███████   | 222/313 [1:14:07<30:16, 19.96s/it]
 71%|███████   | 223/313 [1:14:27<29:55, 19.96s/it]
 72%|███████▏  | 224/313 [1:14:47<29:36, 19.96s/it]
 72%|███████▏  | 225/313 [1:15:07<29:16, 19.96s/it]
 72%|███████▏  | 226/313 [1:15:27<28:56, 19.96s/it]
 73%|███████▎  | 227/313 [1:15:47<28:35, 19.94s/it]
 73%|███████▎  | 228/313 [1:16:07<28:15, 19.95s/it]
 73%|███████▎  | 229/313 [1:16:26<27:55, 19.95s/it]
 73%|███████▎  | 230/313 [1:16:46<27:36, 19.96s/it]
 74%|███████▍  | 231/313 [1:17:06<27:16, 19.95s/it]
 74%|███████▍  | 232/313 [1:17:26<26:55, 19.95s/it]
 74%|███████▍  | 233/313 [1:17:46<26:37, 19.97s/it]
 75%|███████▍  | 234/313 [1:18:06<26:18, 19.98s/it]
 75%|███████▌  | 235/313 [1:18:26<25:56, 19.96s/it]
 75%|███████▌  | 236/313 [1:18:46<25:36, 19.95s/it]
 76%|███████▌  | 237/313 [1:19:06<25:15, 19.94s/it]
 76%|███████▌  | 238/313 [1:19:26<24:57, 19.96s/it]
 76%|███████▋  | 239/313 [1:19:46<24:37, 19.97s/it]
 77%|███████▋  | 240/313 [1:20:06<24:17, 19.96s/it]
 77%|███████▋  | 241/313 [1:20:26<23:57, 19.97s/it]
 77%|███████▋  | 242/313 [1:20:46<23:37, 19.97s/it]
 78%|███████▊  | 243/313 [1:21:06<23:18, 19.98s/it]
 78%|███████▊  | 244/313 [1:21:26<22:57, 19.97s/it]
 78%|███████▊  | 245/313 [1:21:46<22:39, 20.00s/it]
 79%|███████▊  | 246/313 [1:22:06<22:20, 20.01s/it]
 79%|███████▉  | 247/313 [1:22:26<21:59, 19.99s/it]
 79%|███████▉  | 248/313 [1:22:46<21:38, 19.97s/it]
 80%|███████▉  | 249/313 [1:23:06<21:18, 19.97s/it]
 80%|███████▉  | 250/313 [1:23:26<20:58, 19.97s/it]
 80%|███████▉  | 250/313 [1:23:26<20:58, 19.97s/it]
 80%|████████  | 251/313 [1:23:46<20:37, 19.96s/it]
 81%|████████  | 252/313 [1:24:06<20:18, 19.97s/it]
 81%|████████  | 253/313 [1:24:26<19:59, 19.98s/it]
 81%|████████  | 254/313 [1:24:46<19:38, 19.98s/it]
 81%|████████▏ | 255/313 [1:25:06<19:17, 19.96s/it]
 82%|████████▏ | 256/313 [1:25:26<18:57, 19.95s/it]
 82%|████████▏ | 257/313 [1:25:46<18:36, 19.95s/it]
 82%|████████▏ | 258/313 [1:26:06<18:17, 19.95s/it]
 83%|████████▎ | 259/313 [1:26:26<17:58, 19.96s/it]
 83%|████████▎ | 260/313 [1:26:46<17:38, 19.97s/it]
 83%|████████▎ | 261/313 [1:27:05<17:18, 19.97s/it]
 84%|████████▎ | 262/313 [1:27:25<16:58, 19.97s/it]
 84%|████████▍ | 263/313 [1:27:45<16:38, 19.97s/it]
 84%|████████▍ | 264/313 [1:28:05<16:19, 19.99s/it]
 85%|████████▍ | 265/313 [1:28:26<16:00, 20.00s/it]
 85%|████████▍ | 266/313 [1:28:45<15:39, 19.98s/it]
 85%|████████▌ | 267/313 [1:29:05<15:19, 19.98s/it]
 86%|████████▌ | 268/313 [1:29:25<14:59, 20.00s/it]
 86%|████████▌ | 269/313 [1:29:45<14:40, 20.01s/it]
 86%|████████▋ | 270/313 [1:30:05<14:19, 19.99s/it]
 87%|████████▋ | 271/313 [1:30:25<13:59, 20.00s/it]
 87%|████████▋ | 272/313 [1:30:45<13:39, 19.99s/it]
 87%|████████▋ | 273/313 [1:31:05<13:19, 19.98s/it]
 88%|████████▊ | 274/313 [1:31:25<12:58, 19.97s/it]
 88%|████████▊ | 275/313 [1:31:45<12:39, 19.99s/it]
 88%|████████▊ | 276/313 [1:32:05<12:19, 19.99s/it]
 88%|████████▊ | 277/313 [1:32:25<11:59, 19.99s/it]
 89%|████████▉ | 278/313 [1:32:45<11:39, 19.97s/it]
 89%|████████▉ | 279/313 [1:33:05<11:18, 19.96s/it]
 89%|████████▉ | 280/313 [1:33:25<10:58, 19.97s/it]
 90%|████████▉ | 281/313 [1:33:45<10:38, 19.95s/it]
 90%|█████████ | 282/313 [1:34:05<10:18, 19.96s/it]
 90%|█████████ | 283/313 [1:34:25<09:58, 19.96s/it]
 91%|█████████ | 284/313 [1:34:45<09:39, 19.97s/it]
 91%|█████████ | 285/313 [1:35:05<09:18, 19.96s/it]
 91%|█████████▏| 286/313 [1:35:25<08:58, 19.95s/it]
 92%|█████████▏| 287/313 [1:35:45<08:39, 19.96s/it]
 92%|█████████▏| 288/313 [1:36:05<08:19, 19.97s/it]
 92%|█████████▏| 289/313 [1:36:25<07:59, 19.97s/it]
 93%|█████████▎| 290/313 [1:36:45<07:39, 19.97s/it]
 93%|█████████▎| 291/313 [1:37:05<07:19, 19.97s/it]
 93%|█████████▎| 292/313 [1:37:25<06:59, 19.97s/it]
 94%|█████████▎| 293/313 [1:37:45<06:39, 19.98s/it]
 94%|█████████▍| 294/313 [1:38:05<06:19, 19.97s/it]
 94%|█████████▍| 295/313 [1:38:25<05:59, 19.96s/it]
 95%|█████████▍| 296/313 [1:38:45<05:39, 19.96s/it]
 95%|█████████▍| 297/313 [1:39:05<05:19, 19.96s/it]
 95%|█████████▌| 298/313 [1:39:25<04:59, 19.97s/it]
 96%|█████████▌| 299/313 [1:39:44<04:39, 19.95s/it]
 96%|█████████▌| 300/313 [1:40:04<04:19, 19.96s/it]
 96%|█████████▌| 300/313 [1:40:04<04:19, 19.96s/it]
 96%|█████████▌| 301/313 [1:40:24<03:59, 19.94s/it]
 96%|█████████▋| 302/313 [1:40:44<03:39, 19.94s/it]
 97%|█████████▋| 303/313 [1:41:04<03:19, 19.96s/it]
 97%|█████████▋| 304/313 [1:41:24<02:59, 19.97s/it]
 97%|█████████▋| 305/313 [1:41:44<02:39, 19.96s/it]
 98%|█████████▊| 306/313 [1:42:04<02:19, 19.96s/it]
 98%|█████████▊| 307/313 [1:42:24<01:59, 19.97s/it]
 98%|█████████▊| 308/313 [1:42:44<01:39, 19.95s/it]
 99%|█████████▊| 309/313 [1:43:04<01:19, 19.96s/it]
 99%|█████████▉| 310/313 [1:43:24<00:59, 19.95s/it]
 99%|█████████▉| 311/313 [1:43:44<00:39, 19.94s/it]

+No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
  0%|          | 0/313 [00:00<?, ?it/s]`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
+/ceph/home/student.aau.dk/jx14ak/myenv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py:1044: UserWarning: torch.utils.checkpoint: the use_reentrant parameter should be passed explicitly. Starting in PyTorch 2.9, calling checkpoint without use_reentrant will raise an exception. use_reentrant=False is recommended, but if you need to preserve the current default behavior, you can pass use_reentrant=True. Refer to docs for more details on the differences between the two variants.
+  return fn(*args, **kwargs)
  0%|          | 1/313 [00:26<2:19:12, 26.77s/it]
  1%|          | 2/313 [00:46<1:57:01, 22.58s/it]
  1%|          | 3/313 [01:06<1:50:23, 21.37s/it]
  1%|▏         | 4/313 [01:26<1:47:12, 20.82s/it]
  2%|▏         | 5/313 [01:46<1:45:23, 20.53s/it]
  2%|▏         | 6/313 [02:06<1:44:13, 20.37s/it]
  2%|▏         | 7/313 [02:26<1:43:30, 20.29s/it]
  3%|▎         | 8/313 [02:46<1:42:47, 20.22s/it]
  3%|▎         | 9/313 [03:06<1:42:13, 20.17s/it]
  3%|▎         | 10/313 [03:26<1:41:42, 20.14s/it]
  4%|▎         | 11/313 [03:46<1:41:02, 20.07s/it]
  4%|▍         | 12/313 [04:06<1:40:35, 20.05s/it]
  4%|▍         | 13/313 [04:26<1:40:07, 20.02s/it]
  4%|▍         | 14/313 [04:46<1:39:38, 19.99s/it]
  5%|▍         | 15/313 [05:06<1:39:11, 19.97s/it]
  5%|▌         | 16/313 [05:26<1:38:43, 19.95s/it]
  5%|▌         | 17/313 [05:46<1:38:35, 19.98s/it]
  6%|▌         | 18/313 [06:06<1:38:10, 19.97s/it]
  6%|▌         | 19/313 [06:26<1:37:56, 19.99s/it]
  6%|▋         | 20/313 [06:46<1:37:48, 20.03s/it]
  7%|▋         | 21/313 [07:06<1:37:31, 20.04s/it]
  7%|▋         | 22/313 [07:26<1:37:08, 20.03s/it]
  7%|▋         | 23/313 [07:46<1:36:52, 20.04s/it]
  8%|▊         | 24/313 [08:06<1:36:39, 20.07s/it]
  8%|▊         | 25/313 [08:26<1:36:16, 20.06s/it]
  8%|▊         | 26/313 [08:46<1:35:59, 20.07s/it]
  9%|▊         | 27/313 [09:06<1:35:33, 20.05s/it]
  9%|▉         | 28/313 [09:26<1:35:08, 20.03s/it]
  9%|▉         | 29/313 [09:47<1:34:55, 20.06s/it]
 10%|▉         | 30/313 [10:07<1:34:36, 20.06s/it]
 10%|▉         | 31/313 [10:27<1:34:09, 20.03s/it]
 10%|█         | 32/313 [10:46<1:33:41, 20.01s/it]
 11%|█         | 33/313 [11:07<1:33:30, 20.04s/it]
 11%|█         | 34/313 [11:27<1:33:05, 20.02s/it]
 11%|█         | 35/313 [11:47<1:32:45, 20.02s/it]
 12%|█▏        | 36/313 [12:07<1:32:26, 20.02s/it]
 12%|█▏        | 37/313 [12:27<1:32:12, 20.04s/it]
 12%|█▏        | 38/313 [12:47<1:31:51, 20.04s/it]
 12%|█▏        | 39/313 [13:07<1:31:26, 20.02s/it]
 13%|█▎        | 40/313 [13:27<1:31:02, 20.01s/it]
 13%|█▎        | 41/313 [13:47<1:30:48, 20.03s/it]
 13%|█▎        | 42/313 [14:07<1:30:22, 20.01s/it]
 14%|█▎        | 43/313 [14:27<1:30:04, 20.02s/it]
 14%|█▍        | 44/313 [14:47<1:29:42, 20.01s/it]
 14%|█▍        | 45/313 [15:07<1:29:22, 20.01s/it]
 15%|█▍        | 46/313 [15:27<1:28:58, 19.99s/it]
 15%|█▌        | 47/313 [15:47<1:28:41, 20.00s/it]
 15%|█▌        | 48/313 [16:07<1:28:24, 20.02s/it]
 16%|█▌        | 49/313 [16:27<1:28:08, 20.03s/it]
 16%|█▌        | 50/313 [16:47<1:27:45, 20.02s/it]
 16%|█▌        | 50/313 [16:47<1:27:45, 20.02s/it]
 16%|█▋        | 51/313 [17:07<1:27:20, 20.00s/it]
 17%|█▋        | 52/313 [17:27<1:27:03, 20.01s/it]
 17%|█▋        | 53/313 [17:47<1:26:48, 20.03s/it]
 17%|█▋        | 54/313 [18:07<1:26:24, 20.02s/it]
 18%|█▊        | 55/313 [18:27<1:26:02, 20.01s/it]
 18%|█▊        | 56/313 [18:47<1:25:38, 19.99s/it]
 18%|█▊        | 57/313 [19:07<1:25:27, 20.03s/it]
 19%|█▊        | 58/313 [19:27<1:25:02, 20.01s/it]
 19%|█▉        | 59/313 [19:47<1:24:54, 20.06s/it]
 19%|█▉        | 60/313 [20:07<1:24:36, 20.07s/it]
 19%|█▉        | 61/313 [20:27<1:24:13, 20.05s/it]
 20%|█▉        | 62/313 [20:47<1:23:45, 20.02s/it]
 20%|██        | 63/313 [21:07<1:23:23, 20.01s/it]
 20%|██        | 64/313 [21:27<1:23:04, 20.02s/it]
 21%|██        | 65/313 [21:47<1:22:45, 20.02s/it]
 21%|██        | 66/313 [22:07<1:22:21, 20.01s/it]
 21%|██▏       | 67/313 [22:27<1:22:06, 20.03s/it]
 22%|██▏       | 68/313 [22:47<1:21:42, 20.01s/it]
 22%|██▏       | 69/313 [23:07<1:21:20, 20.00s/it]
 22%|██▏       | 70/313 [23:27<1:21:03, 20.01s/it]
 23%|██▎       | 71/313 [23:47<1:20:42, 20.01s/it]
 23%|██▎       | 72/313 [24:07<1:20:21, 20.01s/it]
 23%|██▎       | 73/313 [24:27<1:20:00, 20.00s/it]
 24%|██▎       | 74/313 [24:47<1:19:40, 20.00s/it]
 24%|██▍       | 75/313 [25:07<1:19:19, 20.00s/it]
 24%|██▍       | 76/313 [25:27<1:18:59, 20.00s/it]
 25%|██▍       | 77/313 [25:47<1:18:43, 20.01s/it]
 25%|██▍       | 78/313 [26:07<1:18:24, 20.02s/it]
 25%|██▌       | 79/313 [26:27<1:18:09, 20.04s/it]
 26%|██▌       | 80/313 [26:47<1:17:47, 20.03s/it]
 26%|██▌       | 81/313 [27:07<1:17:26, 20.03s/it]
 26%|██▌       | 82/313 [27:28<1:17:05, 20.03s/it]
 27%|██▋       | 83/313 [27:47<1:16:42, 20.01s/it]
 27%|██▋       | 84/313 [28:07<1:16:19, 20.00s/it]
 27%|██▋       | 85/313 [28:27<1:16:01, 20.01s/it]
 27%|██▋       | 86/313 [28:47<1:15:39, 20.00s/it]
 28%|██▊       | 87/313 [29:07<1:15:21, 20.01s/it]
 28%|██▊       | 88/313 [29:28<1:15:02, 20.01s/it]
 28%|██▊       | 89/313 [29:48<1:14:43, 20.02s/it]
 29%|██▉       | 90/313 [30:08<1:14:22, 20.01s/it]
 29%|██▉       | 91/313 [30:28<1:14:03, 20.01s/it]
 29%|██▉       | 92/313 [30:48<1:13:44, 20.02s/it]
 30%|██▉       | 93/313 [31:08<1:13:25, 20.02s/it]
 30%|███       | 94/313 [31:28<1:13:08, 20.04s/it]
 30%|███       | 95/313 [31:48<1:12:40, 20.00s/it]
 31%|███       | 96/313 [32:08<1:12:25, 20.03s/it]
 31%|███       | 97/313 [32:28<1:12:00, 20.00s/it]
 31%|███▏      | 98/313 [32:48<1:11:36, 19.98s/it]
 32%|███▏      | 99/313 [33:08<1:11:20, 20.00s/it]
 32%|███▏      | 100/313 [33:28<1:10:59, 20.00s/it]
 32%|███▏      | 100/313 [33:28<1:10:59, 20.00s/it]
 32%|███▏      | 101/313 [33:48<1:10:40, 20.00s/it]
 33%|███▎      | 102/313 [34:08<1:10:24, 20.02s/it]
 33%|███▎      | 103/313 [34:28<1:10:03, 20.02s/it]
 33%|███▎      | 104/313 [34:48<1:09:46, 20.03s/it]
 34%|███▎      | 105/313 [35:08<1:09:24, 20.02s/it]
 34%|███▍      | 106/313 [35:28<1:09:04, 20.02s/it]
 34%|███▍      | 107/313 [35:48<1:08:41, 20.01s/it]
 35%|███▍      | 108/313 [36:08<1:08:20, 20.00s/it]
 35%|███▍      | 109/313 [36:28<1:08:01, 20.01s/it]
 35%|███▌      | 110/313 [36:48<1:07:40, 20.00s/it]
 35%|███▌      | 111/313 [37:08<1:07:22, 20.01s/it]
 36%|███▌      | 112/313 [37:28<1:06:55, 19.98s/it]
 36%|███▌      | 113/313 [37:48<1:06:36, 19.98s/it]
 36%|███▋      | 114/313 [38:08<1:06:21, 20.01s/it]
 37%|███▋      | 115/313 [38:28<1:06:03, 20.02s/it]
 37%|███▋      | 116/313 [38:48<1:05:38, 19.99s/it]
 37%|███▋      | 117/313 [39:08<1:05:19, 20.00s/it]
 38%|███▊      | 118/313 [39:28<1:05:00, 20.00s/it]
 38%|███▊      | 119/313 [39:48<1:04:37, 19.99s/it]
 38%|███▊      | 120/313 [40:08<1:04:16, 19.98s/it]
 39%|███▊      | 121/313 [40:28<1:04:00, 20.00s/it]
 39%|███▉      | 122/313 [40:48<1:03:42, 20.01s/it]
 39%|███▉      | 123/313 [41:08<1:03:21, 20.01s/it]
 40%|███▉      | 124/313 [41:28<1:03:01, 20.01s/it]
 40%|███▉      | 125/313 [41:48<1:02:41, 20.01s/it]
 40%|████      | 126/313 [42:08<1:02:26, 20.03s/it]
 41%|████      | 127/313 [42:28<1:02:04, 20.02s/it]
 41%|████      | 128/313 [42:48<1:01:38, 19.99s/it]
 41%|████      | 129/313 [43:08<1:01:17, 19.98s/it]
 42%|████▏     | 130/313 [43:28<1:00:59, 19.99s/it]
 42%|████▏     | 131/313 [43:48<1:00:38, 19.99s/it]
 42%|████▏     | 132/313 [44:08<1:00:22, 20.01s/it]
 42%|████▏     | 133/313 [44:28<1:00:01, 20.01s/it]
 43%|████▎     | 134/313 [44:48<59:37, 19.99s/it]
 43%|████▎     | 135/313 [45:08<59:16, 19.98s/it]
 43%|████▎     | 136/313 [45:28<59:06, 20.04s/it]
 44%|████▍     | 137/313 [45:48<58:43, 20.02s/it]
 44%|████▍     | 138/313 [46:08<58:20, 20.00s/it]
 44%|████▍     | 139/313 [46:28<58:00, 20.00s/it]
 45%|████▍     | 140/313 [46:48<57:40, 20.00s/it]
 45%|████▌     | 141/313 [47:08<57:17, 19.99s/it]
 45%|████▌     | 142/313 [47:28<56:58, 19.99s/it]
 46%|████▌     | 143/313 [47:48<56:37, 19.99s/it]
 46%|████▌     | 144/313 [48:08<56:21, 20.01s/it]
 46%|████▋     | 145/313 [48:28<56:02, 20.01s/it]
 47%|████▋     | 146/313 [48:48<55:42, 20.01s/it]
 47%|████▋     | 147/313 [49:08<55:20, 20.00s/it]
 47%|████▋     | 148/313 [49:28<54:57, 19.98s/it]
 48%|████▊     | 149/313 [49:48<54:35, 19.97s/it]
 48%|████▊     | 150/313 [50:08<54:14, 19.97s/it]
 48%|████▊     | 150/313 [50:08<54:14, 19.97s/it]
 48%|████▊     | 151/313 [50:28<53:58, 19.99s/it]
 49%|████▊     | 152/313 [50:48<53:37, 19.98s/it]
 49%|████▉     | 153/313 [51:08<53:19, 20.00s/it]
 49%|████▉     | 154/313 [51:28<53:01, 20.01s/it]
 50%|████▉     | 155/313 [51:48<52:43, 20.02s/it]
 50%|████▉     | 156/313 [52:08<52:21, 20.01s/it]
 50%|█████     | 157/313 [52:28<51:57, 19.99s/it]
 50%|█████     | 158/313 [52:48<51:39, 20.00s/it]
 51%|█████     | 159/313 [53:08<51:19, 20.00s/it]
 51%|█████     | 160/313 [53:28<51:01, 20.01s/it]
 51%|█████▏    | 161/313 [53:48<50:38, 19.99s/it]
 52%|█████▏    | 162/313 [54:08<50:19, 20.00s/it]
 52%|█████▏    | 163/313 [54:28<50:04, 20.03s/it]
 52%|█████▏    | 164/313 [54:48<49:43, 20.02s/it]
 53%|█████▎    | 165/313 [55:08<49:21, 20.01s/it]
 53%|█████▎    | 166/313 [55:28<49:02, 20.02s/it]
 53%|█████▎    | 167/313 [55:48<48:41, 20.01s/it]
 54%|█████▎    | 168/313 [56:08<48:20, 20.01s/it]
 54%|█████▍    | 169/313 [56:28<48:01, 20.01s/it]
 54%|█████▍    | 170/313 [56:48<47:39, 20.00s/it]
 55%|█████▍    | 171/313 [57:08<47:20, 20.00s/it]
 55%|█████▍    | 172/313 [57:28<47:01, 20.01s/it]
 55%|█████▌    | 173/313 [57:48<46:42, 20.02s/it]
 56%|█████▌    | 174/313 [58:08<46:19, 20.00s/it]
 56%|█████▌    | 175/313 [58:28<45:58, 19.99s/it]
 56%|█████▌    | 176/313 [58:48<45:41, 20.01s/it]
 57%|█████▋    | 177/313 [59:08<45:22, 20.02s/it]
 57%|█████▋    | 178/313 [59:28<44:58, 19.99s/it]
 57%|█████▋    | 179/313 [59:48<44:37, 19.98s/it]
 58%|█████▊    | 180/313 [1:00:08<44:19, 20.00s/it]
 58%|█████▊    | 181/313 [1:00:28<44:02, 20.02s/it]
 58%|█████▊    | 182/313 [1:00:48<43:39, 20.00s/it]
 58%|█████▊    | 183/313 [1:01:08<43:19, 19.99s/it]
 59%|█████▉    | 184/313 [1:01:28<42:57, 19.98s/it]
 59%|█████▉    | 185/313 [1:01:48<42:36, 19.97s/it]
 59%|█████▉    | 186/313 [1:02:08<42:16, 19.98s/it]
 60%|█████▉    | 187/313 [1:02:28<41:56, 19.97s/it]
 60%|██████    | 188/313 [1:02:48<41:36, 19.97s/it]
 60%|██████    | 189/313 [1:03:08<41:17, 19.98s/it]
 61%|██████    | 190/313 [1:03:28<40:58, 19.99s/it]
 61%|██████    | 191/313 [1:03:48<40:38, 19.98s/it]
 61%|██████▏   | 192/313 [1:04:08<40:19, 20.00s/it]
 62%|██████▏   | 193/313 [1:04:28<40:02, 20.02s/it]
 62%|██████▏   | 194/313 [1:04:48<39:39, 20.00s/it]
 62%|██████▏   | 195/313 [1:05:08<39:20, 20.01s/it]
 63%|██████▎   | 196/313 [1:05:28<39:02, 20.02s/it]
 63%|██████▎   | 197/313 [1:05:48<38:40, 20.01s/it]
 63%|██████▎   | 198/313 [1:06:08<38:19, 19.99s/it]
 64%|██████▎   | 199/313 [1:06:28<38:00, 20.01s/it]
 64%|██████▍   | 200/313 [1:06:48<37:37, 19.98s/it]
 64%|██████▍   | 200/313 [1:06:48<37:37, 19.98s/it]
 64%|██████▍   | 201/313 [1:07:08<37:16, 19.97s/it]
 65%|██████▍   | 202/313 [1:07:28<36:55, 19.96s/it]
 65%|██████▍   | 203/313 [1:07:48<36:36, 19.97s/it]
 65%|██████▌   | 204/313 [1:08:08<36:18, 19.99s/it]
 65%|██████▌   | 205/313 [1:08:28<36:00, 20.00s/it]
 66%|██████▌   | 206/313 [1:08:48<35:40, 20.00s/it]
 66%|██████▌   | 207/313 [1:09:08<35:16, 19.97s/it]
 66%|██████▋   | 208/313 [1:09:27<34:54, 19.94s/it]
 67%|██████▋   | 209/313 [1:09:47<34:34, 19.95s/it]
 67%|██████▋   | 210/313 [1:10:07<34:15, 19.95s/it]
 67%|██████▋   | 211/313 [1:10:27<33:54, 19.94s/it]
 68%|██████▊   | 212/313 [1:10:47<33:36, 19.96s/it]
 68%|██████▊   | 213/313 [1:11:07<33:15, 19.96s/it]
 68%|██████▊   | 214/313 [1:11:27<32:55, 19.95s/it]
 69%|██████▊   | 215/313 [1:11:47<32:36, 19.97s/it]
 69%|██████▉   | 216/313 [1:12:07<32:16, 19.96s/it]
 69%|██████▉   | 217/313 [1:12:27<31:56, 19.96s/it]
 70%|██████▉   | 218/313 [1:12:47<31:36, 19.96s/it]
 70%|██████▉   | 219/313 [1:13:07<31:16, 19.96s/it]
 70%|███████   | 220/313 [1:13:27<30:55, 19.96s/it]
 71%|███████   | 221/313 [1:13:47<30:35, 19.96s/it]
 71%|███████   | 222/313 [1:14:07<30:16, 19.96s/it]
 71%|███████   | 223/313 [1:14:27<29:55, 19.96s/it]
 72%|███████▏  | 224/313 [1:14:47<29:36, 19.96s/it]
 72%|███████▏  | 225/313 [1:15:07<29:16, 19.96s/it]
 72%|███████▏  | 226/313 [1:15:27<28:56, 19.96s/it]
 73%|███████▎  | 227/313 [1:15:47<28:35, 19.94s/it]
 73%|███████▎  | 228/313 [1:16:07<28:15, 19.95s/it]
 73%|███████▎  | 229/313 [1:16:26<27:55, 19.95s/it]
 73%|███████▎  | 230/313 [1:16:46<27:36, 19.96s/it]
 74%|███████▍  | 231/313 [1:17:06<27:16, 19.95s/it]
 74%|███████▍  | 232/313 [1:17:26<26:55, 19.95s/it]
 74%|███████▍  | 233/313 [1:17:46<26:37, 19.97s/it]
 75%|███████▍  | 234/313 [1:18:06<26:18, 19.98s/it]
 75%|███████▌  | 235/313 [1:18:26<25:56, 19.96s/it]
 75%|███████▌  | 236/313 [1:18:46<25:36, 19.95s/it]
 76%|███████▌  | 237/313 [1:19:06<25:15, 19.94s/it]
 76%|███████▌  | 238/313 [1:19:26<24:57, 19.96s/it]
 76%|███████▋  | 239/313 [1:19:46<24:37, 19.97s/it]
 77%|███████▋  | 240/313 [1:20:06<24:17, 19.96s/it]
 77%|███████▋  | 241/313 [1:20:26<23:57, 19.97s/it]
 77%|███████▋  | 242/313 [1:20:46<23:37, 19.97s/it]
 78%|███████▊  | 243/313 [1:21:06<23:18, 19.98s/it]
 78%|███████▊  | 244/313 [1:21:26<22:57, 19.97s/it]
 78%|███████▊  | 245/313 [1:21:46<22:39, 20.00s/it]
 79%|███████▊  | 246/313 [1:22:06<22:20, 20.01s/it]
 79%|███████▉  | 247/313 [1:22:26<21:59, 19.99s/it]
 79%|███████▉  | 248/313 [1:22:46<21:38, 19.97s/it]
 80%|███████▉  | 249/313 [1:23:06<21:18, 19.97s/it]
 80%|███████▉  | 250/313 [1:23:26<20:58, 19.97s/it]
 80%|███████▉  | 250/313 [1:23:26<20:58, 19.97s/it]
 80%|████████  | 251/313 [1:23:46<20:37, 19.96s/it]
 81%|████████  | 252/313 [1:24:06<20:18, 19.97s/it]
 81%|████████  | 253/313 [1:24:26<19:59, 19.98s/it]
 81%|████████  | 254/313 [1:24:46<19:38, 19.98s/it]
 81%|████████▏ | 255/313 [1:25:06<19:17, 19.96s/it]
 82%|████████▏ | 256/313 [1:25:26<18:57, 19.95s/it]
 82%|████████▏ | 257/313 [1:25:46<18:36, 19.95s/it]
 82%|████████▏ | 258/313 [1:26:06<18:17, 19.95s/it]
 83%|████████▎ | 259/313 [1:26:26<17:58, 19.96s/it]
 83%|████████▎ | 260/313 [1:26:46<17:38, 19.97s/it]
 83%|████████▎ | 261/313 [1:27:05<17:18, 19.97s/it]
 84%|████████▎ | 262/313 [1:27:25<16:58, 19.97s/it]
 84%|████████▍ | 263/313 [1:27:45<16:38, 19.97s/it]
 84%|████████▍ | 264/313 [1:28:05<16:19, 19.99s/it]
 85%|████████▍ | 265/313 [1:28:26<16:00, 20.00s/it]
 85%|████████▍ | 266/313 [1:28:45<15:39, 19.98s/it]
 85%|████████▌ | 267/313 [1:29:05<15:19, 19.98s/it]
 86%|████████▌ | 268/313 [1:29:25<14:59, 20.00s/it]
 86%|████████▌ | 269/313 [1:29:45<14:40, 20.01s/it]
 86%|████████▋ | 270/313 [1:30:05<14:19, 19.99s/it]
 87%|████████▋ | 271/313 [1:30:25<13:59, 20.00s/it]
 87%|████████▋ | 272/313 [1:30:45<13:39, 19.99s/it]
 87%|████████▋ | 273/313 [1:31:05<13:19, 19.98s/it]
 88%|████████▊ | 274/313 [1:31:25<12:58, 19.97s/it]
 88%|████████▊ | 275/313 [1:31:45<12:39, 19.99s/it]
 88%|████████▊ | 276/313 [1:32:05<12:19, 19.99s/it]
 88%|████████▊ | 277/313 [1:32:25<11:59, 19.99s/it]
 89%|████████▉ | 278/313 [1:32:45<11:39, 19.97s/it]
 89%|████████▉ | 279/313 [1:33:05<11:18, 19.96s/it]
 89%|████████▉ | 280/313 [1:33:25<10:58, 19.97s/it]
 90%|████████▉ | 281/313 [1:33:45<10:38, 19.95s/it]
 90%|█████████ | 282/313 [1:34:05<10:18, 19.96s/it]
 90%|█████████ | 283/313 [1:34:25<09:58, 19.96s/it]
 91%|█████████ | 284/313 [1:34:45<09:39, 19.97s/it]
 91%|█████████ | 285/313 [1:35:05<09:18, 19.96s/it]
 91%|█████████▏| 286/313 [1:35:25<08:58, 19.95s/it]
 92%|█████████▏| 287/313 [1:35:45<08:39, 19.96s/it]
 92%|█████████▏| 288/313 [1:36:05<08:19, 19.97s/it]
 92%|█████████▏| 289/313 [1:36:25<07:59, 19.97s/it]
 93%|█████████▎| 290/313 [1:36:45<07:39, 19.97s/it]
 93%|█████████▎| 291/313 [1:37:05<07:19, 19.97s/it]
 93%|█████████▎| 292/313 [1:37:25<06:59, 19.97s/it]
 94%|█████████▎| 293/313 [1:37:45<06:39, 19.98s/it]
 94%|█████████▍| 294/313 [1:38:05<06:19, 19.97s/it]
 94%|█████████▍| 295/313 [1:38:25<05:59, 19.96s/it]
 95%|█████████▍| 296/313 [1:38:45<05:39, 19.96s/it]
 95%|█████████▍| 297/313 [1:39:05<05:19, 19.96s/it]
 95%|█████████▌| 298/313 [1:39:25<04:59, 19.97s/it]
 96%|█████████▌| 299/313 [1:39:44<04:39, 19.95s/it]
 96%|█████████▌| 300/313 [1:40:04<04:19, 19.96s/it]
 96%|█████████▌| 300/313 [1:40:04<04:19, 19.96s/it]
 96%|█████████▌| 301/313 [1:40:24<03:59, 19.94s/it]
 96%|█████████▋| 302/313 [1:40:44<03:39, 19.94s/it]
 97%|█████████▋| 303/313 [1:41:04<03:19, 19.96s/it]
 97%|█████████▋| 304/313 [1:41:24<02:59, 19.97s/it]
 97%|█████████▋| 305/313 [1:41:44<02:39, 19.96s/it]
 98%|█████████▊| 306/313 [1:42:04<02:19, 19.96s/it]
 98%|█████████▊| 307/313 [1:42:24<01:59, 19.97s/it]
 98%|█████████▊| 308/313 [1:42:44<01:39, 19.95s/it]
 99%|█████████▊| 309/313 [1:43:04<01:19, 19.96s/it]
 99%|█████████▉| 310/313 [1:43:24<00:59, 19.95s/it]
 99%|█████████▉| 311/313 [1:43:44<00:39, 19.94s/it]

qlora_finetune.out ADDED Viewed

	@@ -0,0 +1,44 @@

+============================================================
+STEP 1 — Loading and formatting training data
+============================================================
+Training examples: 5,000
+Formatted 5000 training examples.
+============================================================
+STEP 2 — Loading model in 4-bit quantization
+============================================================
+Model loaded in 4-bit.
+============================================================
+STEP 3 — Attaching LoRA adapters
+============================================================
+Total parameters     : 7,289,966,592
+Trainable parameters : 41,943,040 (0.58%)
+============================================================
+STEP 4 — Tokenizing dataset
+============================================================
+Tokenized 5000 examples, max_length=512
+============================================================
+STEP 5 — Fine-tuning with QLoRA
+============================================================
+Training: 5,000 examples, 1 epoch
+Effective batch size: 16
+{'loss': 1.0607, 'grad_norm': 0.7086756229400635, 'learning_rate': 0.00019193530389822363, 'epoch': 0.16}
+{'loss': 0.9639, 'grad_norm': 0.5901047587394714, 'learning_rate': 0.00016036076085226814, 'epoch': 0.32}
+{'loss': 0.9558, 'grad_norm': 0.5561144351959229, 'learning_rate': 0.0001129241134155949, 'epoch': 0.48}
+{'loss': 0.9428, 'grad_norm': 1.1043578386306763, 'learning_rate': 6.209115961596208e-05, 'epoch': 0.64}
+{'loss': 0.9361, 'grad_norm': 0.5233080983161926, 'learning_rate': 2.1220207206178688e-05, 'epoch': 0.8}
+{'loss': 0.9374, 'grad_norm': 0.48741865158081055, 'learning_rate': 1.0516660902673448e-06, 'epoch': 0.96}
+{'train_runtime': 6254.5575, 'train_samples_per_second': 0.799, 'train_steps_per_second': 0.05, 'train_loss': 0.9650596307870298, 'epoch': 1.0}
+Training complete.
+============================================================
+STEP 6 — Saving fine-tuned adapter
+============================================================
+Saved to: ./qlora_patent_model/
+✅ QLoRA fine-tuning complete!

qlora_finetune.py ADDED Viewed

	@@ -0,0 +1,193 @@

+# PART C — STEP 1: QLoRA Fine-Tuning on Patent Claims
+# Uses standard Trainer — no trl dependency
+# 1. IMPORTS
+import os
+import torch
+import pandas as pd
+from datasets import Dataset
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    BitsAndBytesConfig,
+    TrainingArguments,
+    Trainer,
+    DataCollatorForLanguageModeling,
+)
+from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
+# 2. PARAMETERS
+BASE_MODEL     = "mistralai/Mistral-7B-Instruct-v0.3"
+PARQUET_FILE   = "patents_50k_green.parquet"
+OUTPUT_DIR     = "./qlora_patent_model"
+MAX_SEQ_LEN    = 512
+NUM_EPOCHS     = 1
+LEARNING_RATE  = 2e-4
+BATCH_SIZE     = 4
+GRAD_ACCUM     = 4
+LOGGING_STEPS  = 50
+RANDOM_SEED    = 42
+MAX_TRAIN_SAMPLES = 5000
+LORA_R         = 16
+LORA_ALPHA     = 32
+LORA_DROPOUT   = 0.05
+# 3. FORMAT TRAINING DATA
+print("=" * 60)
+print("STEP 1 — Loading and formatting training data")
+print("=" * 60)
+df = pd.read_parquet(PARQUET_FILE)
+train_df = df[df["split"] == "train_silver"].copy()
+if len(train_df) > MAX_TRAIN_SAMPLES:
+    train_df = train_df.sample(n=MAX_TRAIN_SAMPLES, random_state=RANDOM_SEED)
+print(f"Training examples: {len(train_df):,}")
+def format_training_example(row):
+    label = int(row["is_green_silver"])
+    label_word = "green technology" if label == 1 else "not green technology"
+    text = f"""### Instruction:
+You are a patent examiner. Classify this patent claim as green technology (1) or not green technology (0). Green technology includes inventions for reducing emissions, renewable energy, energy efficiency, pollution reduction, or environmental protection. Respond with JSON only.
+### Claim:
+{row['text'][:1500]}
+### Response:
+{{"label": {label}, "rationale": "This patent claim describes {label_word} based on the technical content of the claim."}}"""
+    return {"text": text}
+formatted_data = train_df.apply(format_training_example, axis=1).tolist()
+train_dataset = Dataset.from_list(formatted_data)
+print(f"Formatted {len(train_dataset)} training examples.")
+# 4. LOAD MODEL IN 4-BIT
+print("\n" + "=" * 60)
+print("STEP 2 — Loading model in 4-bit quantization")
+print("=" * 60)
+bnb_config = BitsAndBytesConfig(
+    load_in_4bit=True,
+    bnb_4bit_quant_type="nf4",
+    bnb_4bit_compute_dtype=torch.float16,
+    bnb_4bit_use_double_quant=True,
+)
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
+tokenizer.pad_token = tokenizer.eos_token
+tokenizer.padding_side = "right"
+model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL,
+    quantization_config=bnb_config,
+    device_map="auto",
+    trust_remote_code=True,
+)
+model = prepare_model_for_kbit_training(model)
+print("Model loaded in 4-bit.")
+# 5. ATTACH LoRA ADAPTERS
+print("\n" + "=" * 60)
+print("STEP 3 — Attaching LoRA adapters")
+print("=" * 60)
+lora_config = LoraConfig(
+    r=LORA_R,
+    lora_alpha=LORA_ALPHA,
+    lora_dropout=LORA_DROPOUT,
+    bias="none",
+    task_type="CAUSAL_LM",
+    target_modules=[
+        "q_proj", "k_proj", "v_proj", "o_proj",
+        "gate_proj", "up_proj", "down_proj",
+    ],
+)
+model = get_peft_model(model, lora_config)
+trainable, total = model.get_nb_trainable_parameters()
+print(f"Total parameters     : {total:,}")
+print(f"Trainable parameters : {trainable:,} ({100 * trainable / total:.2f}%)")
+# 6. TOKENIZE DATASET
+print("\n" + "=" * 60)
+print("STEP 4 — Tokenizing dataset")
+print("=" * 60)
+def tokenize_function(examples):
+    tokens = tokenizer(
+        examples["text"],
+        truncation=True,
+        max_length=MAX_SEQ_LEN,
+        padding="max_length",
+    )
+    # For causal language modeling, labels = input_ids
+    # The model learns to predict the next token at each position
+    tokens["labels"] = tokens["input_ids"].copy()
+    return tokens
+tokenized_dataset = train_dataset.map(
+    tokenize_function,
+    batched=True,
+    remove_columns=["text"],
+)
+tokenized_dataset.set_format("torch")
+print(f"Tokenized {len(tokenized_dataset)} examples, max_length={MAX_SEQ_LEN}")
+# 7. TRAIN
+print("\n" + "=" * 60)
+print("STEP 5 — Fine-tuning with QLoRA")
+print("=" * 60)
+training_args = TrainingArguments(
+    output_dir="./qlora_checkpoints",
+    num_train_epochs=NUM_EPOCHS,
+    per_device_train_batch_size=BATCH_SIZE,
+    gradient_accumulation_steps=GRAD_ACCUM,
+    learning_rate=LEARNING_RATE,
+    weight_decay=0.01,
+    logging_steps=LOGGING_STEPS,
+    save_strategy="no",
+    bf16=torch.cuda.is_bf16_supported(),
+    fp16=not torch.cuda.is_bf16_supported(),
+    optim="paged_adamw_8bit",
+    warmup_ratio=0.03,
+    lr_scheduler_type="cosine",
+    report_to="none",
+    seed=RANDOM_SEED,
+)
+data_collator = DataCollatorForLanguageModeling(
+    tokenizer=tokenizer,
+    mlm=False,   # False = causal LM (predict next token, not masked)
+)
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_dataset,
+    data_collator=data_collator,
+)
+print(f"Training: {len(tokenized_dataset):,} examples, {NUM_EPOCHS} epoch")
+print(f"Effective batch size: {BATCH_SIZE * GRAD_ACCUM}")
+print()
+trainer.train()
+print("\nTraining complete.")
+# 8. SAVE ADAPTER
+print("\n" + "=" * 60)
+print("STEP 6 — Saving fine-tuned adapter")
+print("=" * 60)
+model.save_pretrained(OUTPUT_DIR)
+tokenizer.save_pretrained(OUTPUT_DIR)
+print(f"Saved to: {OUTPUT_DIR}/")
+print("\n✅ QLoRA fine-tuning complete!")