42hgyn26hz-cpu commited on Feb 16

Commit

f6ceb9b

1 Parent(s): 8ff2929

update

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

app.py +609 -408
config.json +9 -12
generation_config.json +4 -3
main.py +658 -437
mergekit_config.yml +2 -2
offsec_model/emergency_save/model.safetensors → model.safetensors +2 -2
offsec_model/checkpoint-3/README.md +207 -0
offsec_model/checkpoint-3/adapter_config.json +41 -0
model-00001-of-00004.safetensors → offsec_model/checkpoint-3/adapter_model.safetensors +2 -2
model-00002-of-00004.safetensors → offsec_model/checkpoint-3/optimizer.pt +2 -2
model-00003-of-00004.safetensors → offsec_model/checkpoint-3/rng_state.pth +2 -2
model-00004-of-00004.safetensors → offsec_model/checkpoint-3/scheduler.pt +2 -2
offsec_model/{emergency_save → checkpoint-3}/tokenizer.json +10 -1
offsec_model/checkpoint-3/tokenizer_config.json +12 -0
offsec_model/checkpoint-3/trainer_state.json +33 -0
offsec_model/{emergency_save → checkpoint-3}/training_args.bin +1 -1
offsec_model/emergency_save/config.json +0 -36
offsec_model/emergency_save/generation_config.json +0 -15
offsec_model/final_model/README.md +207 -0
offsec_model/final_model/adapter_config.json +41 -0
offsec_model/final_model/adapter_model.safetensors +3 -0
offsec_model/final_model/config.json +0 -36
offsec_model/final_model/generation_config.json +0 -15
offsec_model/final_model/model.safetensors +0 -3
offsec_model/final_model/tokenizer.json +27 -0
offsec_model/final_model/tokenizer_config.json +3 -3
offsec_model/final_model/training_args.bin +2 -2
offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/README.md +207 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/adapter_config.json +41 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/adapter_model.safetensors +3 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/optimizer.pt +3 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/rng_state.pth +3 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/scheduler.pt +3 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/tokenizer.json +0 -0
offsec_model/{emergency_save → huihui-ai_Guilherme34_uncensor-v2/checkpoint-21}/tokenizer_config.json +2 -2
offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/trainer_state.json +33 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/training_args.bin +3 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/final_model/README.md +207 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/final_model/adapter_config.json +41 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/final_model/adapter_model.safetensors +3 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/final_model/tokenizer.json +0 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/final_model/tokenizer_config.json +12 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/final_model/training_args.bin +3 -0
offsec_model/huihui-ai_Guilherme34_uncensor-v2/trainer_state.json +43 -0
offsec_model/trainer_state.json +22 -21
offsec_model/zxc4wewewe_offsec/checkpoint-6/README.md +207 -0
offsec_model/zxc4wewewe_offsec/checkpoint-6/adapter_config.json +41 -0
offsec_model/zxc4wewewe_offsec/checkpoint-6/adapter_model.safetensors +3 -0
offsec_model/zxc4wewewe_offsec/checkpoint-6/optimizer.pt +3 -0
offsec_model/zxc4wewewe_offsec/checkpoint-6/rng_state.pth +3 -0

app.py CHANGED Viewed

@@ -1,6 +1,10 @@
 import os
 import torch
 import gc
 from datasets import load_dataset, Dataset, DatasetDict
 from transformers import (
     AutoTokenizer,
@@ -8,460 +12,657 @@ from transformers import (
     TrainingArguments,
     Trainer,
     DataCollatorForLanguageModeling,
-    EarlyStoppingCallback
 )
 import shutil
-from typing import Dict, Any
 # ─── Configuration ───────────────────────────────────────────────────────────
-MODEL_NAME = "zxc4wewewe/blackthinking"  # Your base model
-OUTPUT_DIR = "./offsec_model"
 MAX_LENGTH = 512
-BATCH_SIZE = 2  # Reduced for safety
-GRADIENT_ACCUMULATION = 8  # Effective batch = 16
-EPOCHS = 3
 LEARNING_RATE = 2e-5
-SAVE_STEPS = 500
-EVAL_STEPS = 500
-LOGGING_STEPS = 50
-# ─── 1. Load Dataset with Schema Handling ────────────────────────────────────
-def load_and_fix_dataset():
-    """Load dataset handling both 'messages' and 'prompt/response' formats"""
-    cache_dir = os.path.expanduser("~/.cache/huggingface/hub/datasets--zxc4wewewe--offsec")
-    # Clear corrupted cache
-    if os.path.exists(cache_dir):
-        shutil.rmtree(cache_dir)
     try:
-        print("Loading dataset: huihui-ai/Guilherme34_uncensor-v2")
-        dataset = load_dataset("huihui-ai/Guilherme34_uncensor-v2")
-        print("✓ Loaded huihui-ai/Guilherme34_uncensor-v2")
     except Exception as e:
-        print(f"Specific file load failed: {e}")
-        print("Trying generic load: zxc4wewewe/offsec")
-        try:
-            dataset = load_dataset("zxc4wewewe/offsec")
-            print("✓ Loaded zxc4wewewe/offsec")
-        except Exception as e2:
-            print(f"Generic load failed: {e2}")
-            # Create a minimal dummy dataset for testing
-            print("Creating dummy dataset for testing...")
-            dummy_data = {
-                "train": [{"prompt": "What is cybersecurity?", "response": "Cybersecurity involves protecting systems from digital attacks."}],
-                "test": [{"prompt": "What is a firewall?", "response": "A firewall monitors and controls network traffic."}]
             }
-            dataset = DatasetDict({
-                split: Dataset.from_list(data)
-                for split, data in dummy_data.items()
-            })
-    # Ensure we have train/test splits
-    if "train" not in dataset:
-        # Split if only one split exists
-        if len(dataset.keys()) == 1:
-            split_key = list(dataset.keys())[0]
-            dataset = dataset[split_key].train_test_split(test_size=0.1)
-        else:
-            # Use first key as train, create test from it
-            keys = list(dataset.keys())
-            dataset = DatasetDict({
-                "train": dataset[keys[0]],
-                "test": dataset[keys[0]].select(range(min(100, len(dataset[keys[0]]))))
-            })
-    # ─── Schema Normalization ────────────────────────────────────────────────
-    def normalize_example(example):
-        """Convert any format to prompt/response"""
-        # Handle None values
-        if example is None:
-            return {"prompt": "", "response": ""}
-        # If already has prompt/response, return as-is
         if "prompt" in example and "response" in example:
-            prompt = str(example["prompt"]) if example["prompt"] is not None else ""
-            response = str(example["response"]) if example["response"] is not None else ""
-            return {"prompt": prompt, "response": response}
-        # If has messages (chat format), convert
         if "messages" in example and isinstance(example["messages"], list):
-            messages = example["messages"]
-            prompt = ""
-            response = ""
-            for msg in messages:
                 if isinstance(msg, dict):
-                    role = msg.get("role", "")
-                    content = str(msg.get("content", ""))
                     if role.lower() in ["user", "human"]:
                         prompt = content
                     elif role.lower() in ["assistant", "bot"]:
                         response = content
-            return {"prompt": prompt, "response": response}
-        # Fallback: treat as single text field
-        text = ""
-        if isinstance(example, dict):
-            text = str(example.get("text", example.get("content", "")))
-        else:
-            text = str(example)
-        # Try to split on common separators
-        if "Assistant:" in text or "Response:" in text:
-            parts = text.split("Assistant:", 1) if "Assistant:" in text else text.split("Response:", 1)
-            if len(parts) > 1:
-                return {
-                    "prompt": parts[0].replace("User:", "").strip(),
-                    "response": parts[1].strip()
-                }
-        return {"prompt": text[:100], "response": text[-100:] if len(text) > 100 else text}
-    # Apply normalization safely
     try:
-        normalized_dataset = {}
-        for split in dataset.keys():
-            if len(dataset[split]) > 0:
-                normalized_dataset[split] = dataset[split].map(
-                    normalize_example,
-                    remove_columns=dataset[split].column_names,
-                    desc=f"Normalizing {split}"
-                )
-        dataset = DatasetDict(normalized_dataset)
     except Exception as e:
-        print(f"Normalization failed: {e}")
-        # Fallback: create minimal dataset
-        dataset = DatasetDict({
-            "train": Dataset.from_list([{"prompt": "test", "response": "test response"}]),
-            "test": Dataset.from_list([{"prompt": "test", "response": "test response"}])
-        })
-    # Filter out empty examples safely
-    def filter_empty_examples(example):
-        return (len(str(example.get("prompt", ""))) > 0 and
-                len(str(example.get("response", ""))) > 0)
-    try:
-        filtered_dataset = {}
-        for split in dataset.keys():
-            if len(dataset[split]) > 0:
-                filtered_dataset[split] = dataset[split].filter(
-                    filter_empty_examples,
-                    desc=f"Filtering {split}"
-                )
-        dataset = DatasetDict(filtered_dataset)
-    except Exception as e:
-        print(f"Filtering failed: {e}")
-    print(f"✓ Dataset processed:")
-    for split in dataset.keys():
-        print(f"  {split}: {len(dataset[split])} examples")
-        if len(dataset[split]) > 0:
-            print(f"  Sample: {dataset[split][0]}")
-    return dataset
-# Load dataset
-dataset = load_and_fix_dataset()
-# ─── 2. Tokenizer & Model Setup ─────────────────────────────────────────────
-print(f"\nLoading tokenizer and model: {MODEL_NAME}")
-# Load tokenizer with fallback options
-try:
-    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
-except Exception as e:
-    print(f"Primary tokenizer load failed: {e}")
-    try:
-        # Fallback: load with different options
-        tokenizer = AutoTokenizer.from_pretrained(
-            MODEL_NAME,
-            use_fast=False,
-            trust_remote_code=True
-        )
-    except Exception as e2:
-        print(f"Fallback tokenizer load failed: {e2}")
-        # Create minimal tokenizer as emergency fallback
-        from transformers import GPT2TokenizerFast
-        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
-        print("Using GPT2 tokenizer as fallback")
-# Fix padding token for causal LM
-if tokenizer.pad_token is None:
-    if tokenizer.eos_token is not None:
-        tokenizer.pad_token = tokenizer.eos_token
-        tokenizer.pad_token_id = tokenizer.eos_token_id
-    else:
-        # Add a new pad token
-        tokenizer.add_special_tokens({"pad_token": "[PAD]"})
-# Load model with memory-saving options
-try:
-    model = AutoModelForCausalLM.from_pretrained(
-        MODEL_NAME,
-        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-        device_map="auto" if torch.cuda.is_available() else None,
-        trust_remote_code=True,
-        low_cpu_mem_usage=True,  # Reduce memory usage during loading
-        # load_in_8bit=True,  # Uncomment for 8-bit loading if needed
-    )
-except Exception as e:
-    print(f"Model loading failed: {e}")
-    print("Please check if the model exists and you have sufficient memory")
-    exit(1)
-# Resize embeddings if needed
-try:
-    model.resize_token_embeddings(len(tokenizer))
-    print(f"✓ Tokenizer vocabulary size: {len(tokenizer)}")
-    print(f"✓ Model embedding size: {model.get_input_embeddings().weight.size(0)}")
-except Exception as e:
-    print(f"Warning: Could not resize embeddings: {e}")
-# ─── 3. Tokenization ─────────────────────────────────────────────────────────
-def tokenize_function(examples):
-    """Combine prompt and response for causal LM training"""
-    # Format: Prompt\n\nResponse\n
-    full_texts = [
-        f"{prompt}\n\n{response}{tokenizer.eos_token}"
-        for prompt, response in zip(examples["prompt"], examples["response"])
     ]
-    # Tokenize with dynamic padding (more memory efficient)
-    result = tokenizer(
-        full_texts,
-        truncation=True,
-        max_length=MAX_LENGTH,
-        padding=False,  # Dynamic padding in collator
-        return_tensors=None
-    )
-    # For causal LM, labels = input_ids (predict next token)
-    result["labels"] = result["input_ids"].copy()
-    return result
-print("Tokenizing dataset...")
-try:
-    tokenized_dataset = dataset.map(
-        tokenize_function,
-        batched=True,
-        batch_size=100,  # Process in smaller batches
-        num_proc=1,  # Reduce parallel processing to save memory
-        remove_columns=["prompt", "response"],
-        desc="Tokenizing"
-    )
-    # Filter out too-long sequences
-    def filter_long_sequences(example):
-        return len(example["input_ids"]) <= MAX_LENGTH
-    tokenized_dataset = tokenized_dataset.filter(
-        filter_long_sequences,
-        desc="Filtering long sequences"
-    )
-    print(f"✓ Tokenization completed:")
-    for split in tokenized_dataset.keys():
-        print(f"  {split}: {len(tokenized_dataset[split])} examples")
-except Exception as e:
-    print(f"Tokenization failed: {e}")
-    # Create minimal tokenized dataset for testing
-    dummy_text = "This is a test prompt.\n\nThis is a test response." + tokenizer.eos_token
-    dummy_tokens = tokenizer(dummy_text, return_tensors=None)
-    dummy_tokens["labels"] = dummy_tokens["input_ids"].copy()
-    tokenized_dataset = DatasetDict({
-        "train": Dataset.from_list([dummy_tokens]),
-        "test": Dataset.from_list([dummy_tokens])
-    })
-# ─── 4. Data Collator ────────────────────────────────────────────────────────
-data_collator = DataCollatorForLanguageModeling(
-    tokenizer=tokenizer,
-    mlm=False,  # Causal LM, not masked
-    pad_to_multiple_of=8  # Efficient for GPU
-)
-# ─── 5. Training Arguments ───────────────────────────────────────────────────
-training_args = TrainingArguments(
-    output_dir=OUTPUT_DIR,
-    # Training hyperparameters
-    num_train_epochs=EPOCHS,
-    per_device_train_batch_size=BATCH_SIZE,
-    per_device_eval_batch_size=BATCH_SIZE,
-    gradient_accumulation_steps=GRADIENT_ACCUMULATION,
-    # Optimizer
-    learning_rate=LEARNING_RATE,
-    weight_decay=0.01,
-    warmup_ratio=0.03,
-    lr_scheduler_type="cosine",
-    # Logging & Saving
-    logging_dir=f"{OUTPUT_DIR}/logs",
-    logging_steps=LOGGING_STEPS,
-    save_strategy="steps",
-    save_steps=SAVE_STEPS,
-    save_total_limit=2,  # Keep fewer checkpoints
-    # Evaluation
-    eval_strategy="steps",
-    eval_steps=EVAL_STEPS,
-    load_best_model_at_end=True,
-    metric_for_best_model="eval_loss",
-    # Performance
-    fp16=torch.cuda.is_available(),  # Use mixed precision if GPU
-    bf16=False,  # Disable bf16 for compatibility
-    dataloader_num_workers=2,  # Reduced workers
-    remove_unused_columns=False,
-    dataloader_pin_memory=False,  # Reduce memory pressure
-    # Reporting
-    report_to="none",  # Change to "wandb" or "tensorboard" if needed
-    run_name="offsec_training",
-    # Memory optimization
-    optim="adamw_torch",
-    dataloader_drop_last=True,
-)
-# ─── 6. Initialize Trainer ───────────────────────────────────────────────────
-try:
-    trainer = Trainer(
-        model=model,
-        args=training_args,
-        train_dataset=tokenized_dataset["train"],
-        eval_dataset=tokenized_dataset["test"] if len(tokenized_dataset["test"]) > 0 else tokenized_dataset["train"],
-        data_collator=data_collator,
-        processing_class=tokenizer,
-        callbacks=[EarlyStoppingCallback(early_stopping_patience=2)]
-    )
-    print("✓ Trainer initialized successfully")
-except Exception as e:
-    print(f"Trainer initialization failed: {e}")
-# ─── 7. Train ────────────────────────────────────────────────────────────────
-print("\n" + "="*50)
-print("Starting Training...")
-print("="*50)
-# Resume from checkpoint if exists
-last_checkpoint = None
-if os.path.isdir(OUTPUT_DIR) and len(os.listdir(OUTPUT_DIR)) > 0:
-    checkpoints = [f for f in os.listdir(OUTPUT_DIR) if f.startswith("checkpoint-")]
-    if checkpoints:
-        last_checkpoint = os.path.join(OUTPUT_DIR, sorted(checkpoints)[-1])
-        print(f"Resuming from {last_checkpoint}")
-try:
-    train_result = trainer.train(resume_from_checkpoint=last_checkpoint)
-    # Print metrics
-    print("\nTraining completed!")
-    print(f"Final training loss: {getattr(train_result, 'training_loss', 'N/A')}")
-    if hasattr(train_result, 'metrics'):
-        print(f"Training time: {train_result.metrics.get('train_runtime', 0)/60:.2f} minutes")
-except Exception as e:
-    print(f"Training failed: {e}")
-    # Continue with saving anyway to preserve what was learned
-# ─── 8. Save Final Model ─────────────────────────────────────────────────────
-print(f"\nSaving model to {OUTPUT_DIR}/final_model...")
-try:
-    # Save full model
-    trainer.save_model(f"{OUTPUT_DIR}/final_model")
-    # Save tokenizer
-    tokenizer.save_pretrained(f"{OUTPUT_DIR}/final_model")
-    # Save training config
-    trainer.save_state()
-    print(f"✓ Model saved to {OUTPUT_DIR}/final_model")
-    print(f"✓ Tokenizer saved")
-    print(f"✓ Checkpoints saved in {OUTPUT_DIR}")
-except Exception as e:
-    print(f"Saving failed: {e}")
-# ─── 9. Inference/Testing ────────────────────────────────────────────────────
-def generate_response(prompt, max_new_tokens=128, temperature=0.7):
-    """Test the trained model"""
     try:
-        model.eval()
     except Exception as e:
-        print(f"Error setting model to eval: {e}")
-        return "[Error setting model to eval]"
-    # Format input
-    formatted_prompt = f"{prompt}\n\n"
     try:
-        inputs = tokenizer(
-            formatted_prompt,
-            return_tensors="pt",
-            truncation=True,
-            max_length=MAX_LENGTH - max_new_tokens
         )
-        if torch.cuda.is_available() and hasattr(model, 'device') and model.device.type == 'cuda':
-            inputs = {k: v.cuda() for k, v in inputs.items()}
-        with torch.no_grad():
-            outputs = model.generate(
-                **inputs,
-                max_new_tokens=max_new_tokens,
-                temperature=temperature,
-                top_p=0.9,
-                do_sample=True,
-                pad_token_id=tokenizer.pad_token_id,
-                eos_token_id=tokenizer.eos_token_id,
-                repetition_penalty=1.2,
-            )
-        # Decode only the new tokens
-        input_length = inputs["input_ids"].shape[1]
-        new_tokens = outputs[0][input_length:]
-        response = tokenizer.decode(new_tokens, skip_special_tokens=True)
-        return response.strip()
     except Exception as e:
-        print(f"Error during inference: {e}")
-        return "[Error generating response]"
-print("\n" + "="*50)
-print("Testing Model:")
-print("="*50)
-# Get test prompts safely
-test_prompts = [
-    "How do I perform a SQL injection attack?",
-    "What is the best way to secure a Linux server?",
-]
-# Add sample from dataset if available
-try:
-    if len(dataset["test"]) > 0:
-        test_prompts.append(dataset["test"][0]["prompt"])
-except:
-    test_prompts.append("Explain basic cybersecurity concepts")
-for i, prompt in enumerate(test_prompts[:3]):
-    print(f"\nTest {i+1}:")
-    print(f"Prompt: {str(prompt)[:100]}...")
     try:
-        response = generate_response(prompt, max_new_tokens=100)
-        print(f"Response: {response[:200]}...")
     except Exception as e:
-        print(f"Error during test {i+1}: {e}")
-print("\n" + "="*50)
-print("Training pipeline completed!")
-print("="*50)

 import os
 import torch
 import gc
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from functools import partial
+import psutil
+import multiprocessing as mp
 from datasets import load_dataset, Dataset, DatasetDict
 from transformers import (
     AutoTokenizer,
     TrainingArguments,
     Trainer,
     DataCollatorForLanguageModeling,
+    GPT2TokenizerFast
 )
 import shutil
+from typing import Dict, Any, List
+import warnings
+import platform
+import traceback
+warnings.filterwarnings("ignore")
 # ─── Configuration ───────────────────────────────────────────────────────────
+MODEL_NAME = "zxc4wewewe/blackthinking"
+OUTPUT_DIR = "."
 MAX_LENGTH = 512
+BATCH_SIZE = 1  # Very conservative
+GRADIENT_ACCUMULATION = 8
+EPOCHS = 1  # For testing
 LEARNING_RATE = 2e-5
+SAVE_STEPS = 50
+EVAL_STEPS = 50
+LOGGING_STEPS = 25
+# Optimize for performance
+NUM_WORKERS = 1  # Single thread for stability
+BATCH_SIZE_TOKENIZATION = 25
+# ─── Utility Functions ───────────────────────────────────────────────────────
+def safe_makedirs(path):
+    """Safely create directories"""
+    try:
+        os.makedirs(path, exist_ok=True)
+        return True
+    except Exception as e:
+        print(f"⚠️  Failed to create directory {path}: {e}")
+        return False
+def load_tokenizer_robust(model_name):
+    """Load tokenizer with multiple fallback strategies"""
+    print(f"🔄 Attempting to load tokenizer for: {model_name}")
+    # Strategy 1: Try the model's tokenizer with trust_remote_code
+    try:
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            use_fast=True,
+            trust_remote_code=True
+        )
+        if hasattr(tokenizer, 'get_vocab') or hasattr(tokenizer, 'vocab'):
+            print("✅ Successfully loaded model tokenizer")
+            return tokenizer
+        else:
+            print("⚠️  Model tokenizer loaded but missing vocab methods")
+    except Exception as e:
+        print(f"⚠️  Primary tokenizer load failed: {str(e)[:100]}...")
+    # Strategy 2: Try without trust_remote_code
     try:
+        tokenizer = AutoTokenizer.from_pretrained(
+            model_name,
+            use_fast=True,
+            trust_remote_code=False
+        )
+        print("✅ Successfully loaded tokenizer (no remote code)")
+        return tokenizer
     except Exception as e:
+        print(f"⚠️  Secondary tokenizer load failed: {str(e)[:100]}...")
+    # Strategy 3: Create a minimal tokenizer workaround
+    print("🔄 Creating minimal tokenizer workaround...")
+    try:
+        # Use GPT-2 tokenizer as base
+        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
+        # Add special tokens that the model might expect
+        special_tokens = {
+            "pad_token": "<|pad|>",
+            "eos_token": "</s>",
+            "bos_token": "<s>",
+        }
+        # Only add tokens that don't already exist
+        existing_tokens = set(tokenizer.all_special_tokens)
+        tokens_to_add = {k: v for k, v in special_tokens.items() if v not in existing_tokens}
+        if tokens_to_add:
+            tokenizer.add_special_tokens(tokens_to_add)
+        print("✅ Created minimal tokenizer workaround")
+        return tokenizer
+    except Exception as e:
+        print(f"⚠️  Minimal tokenizer creation failed: {str(e)[:100]}...")
+    # Strategy 4: Create absolute minimal tokenizer
+    print("🔄 Creating absolute minimal tokenizer...")
+    try:
+        from transformers import PreTrainedTokenizerFast
+        import json
+        # Create minimal vocab
+        vocab = {
+            "<|pad|>": 0,
+            "</s>": 1,
+            "<s>": 2,
+            "<|unk|>": 3,
+        }
+        # Add basic ASCII characters
+        for i, char in enumerate("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \n\t.,!?-", start=4):
+            vocab[char] = i
+        # Create tokenizer JSON structure
+        tokenizer_json = {
+            "version": "1.0",
+            "truncation": {"direction": "Right", "max_length": 512, "strategy": "LongestFirst"},
+            "padding": {"direction": "Right", "pad_id": 0, "pad_token": "<|pad|>", "pad_type_id": 0},
+            "model": {
+                "type": "BPE",
+                "dropout": None,
+                "unk_token": "<|unk|>",
+                "continuing_subword_prefix": "",
+                "end_of_word_suffix": "",
+                "fuse_unk": False,
+                "vocab": vocab,
+                "merges": []
             }
+        }
+        # Save to temporary file
+        import tempfile
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
+            json.dump(tokenizer_json, f)
+            temp_path = f.name
+        # Load the tokenizer
+        tokenizer = PreTrainedTokenizerFast(tokenizer_file=temp_path)
+        tokenizer.pad_token = "<|pad|>"
+        tokenizer.eos_token = "</s>"
+        tokenizer.bos_token = "<s>"
+        # Clean up temp file
+        os.unlink(temp_path)
+        print("✅ Created absolute minimal tokenizer")
+        return tokenizer
+    except Exception as e:
+        print(f"⚠️  Absolute minimal tokenizer failed: {str(e)[:100]}...")
+    # Final fallback: return None to signal failure
+    print("❌ All tokenizer loading strategies failed")
+    return None
+def load_dataset_with_fallback():
+    """Load dataset with comprehensive fallbacks"""
+    print("📥 Loading dataset with fallbacks...")
+    # Try multiple sources
+    datasets_sources = [
+        "huihui-ai/Guilherme34_uncensor-v2",
+        "zxc4wewewe/offsec",
+    ]
+    for dataset_name in datasets_sources:
+        try:
+            print(f"🔄 Trying to load: {dataset_name}")
+            dataset = load_dataset(dataset_name, streaming=False)
+            print(f"✅ Successfully loaded: {dataset_name}")
+            # Ensure we have proper splits
+            if "train" not in dataset and "test" not in dataset:
+                # Convert single split to train/test
+                keys = list(dataset.keys())
+                if keys:
+                    main_split = dataset[keys[0]]
+                    dataset = main_split.train_test_split(test_size=0.1, seed=42)
+                else:
+                    continue  # Try next source
+            return dataset
+        except Exception as e:
+            print(f"⚠️  Failed to load {dataset_name}: {str(e)[:100]}...")
+    # Create minimal dummy dataset
+    print("🔄 Creating minimal dummy dataset for emergency...")
+    try:
+        dummy_data = {
+            "train": [
+                {"prompt": "What is AI?", "response": "Artificial Intelligence is computer systems performing human tasks."},
+                {"prompt": "How to code?", "response": "Start with basics like variables, loops, functions."},
+                {"prompt": "What is ML?", "response": "Machine Learning enables computers to learn from data."},
+            ] * 5,
+            "test": [
+                {"prompt": "Define deep learning", "response": "Deep learning uses neural networks with multiple layers."},
+            ] * 3,
+        }
+        dataset = DatasetDict({
+            split: Dataset.from_list(data)
+            for split, data in dummy_data.items()
+        })
+        print("✅ Created minimal dummy dataset")
+        return dataset
+    except Exception as e:
+        print(f"❌ Failed to create dummy dataset: {e}")
+        return None
+def normalize_example_safe(example):
+    """Safe example normalization with comprehensive error handling"""
+    try:
+        if not example:
+            return {"prompt": "default prompt", "response": "default response"}
+        # Fast path for standard format
         if "prompt" in example and "response" in example:
+            p = str(example.get("prompt", "") or "default prompt")
+            r = str(example.get("response", "") or "default response")
+            return {"prompt": p.strip() or "default prompt", "response": r.strip() or "default response"}
+        # Handle messages format
         if "messages" in example and isinstance(example["messages"], list):
+            prompt, response = "", ""
+            for msg in example["messages"]:
                 if isinstance(msg, dict):
+                    role, content = str(msg.get("role", "")), str(msg.get("content", ""))
                     if role.lower() in ["user", "human"]:
                         prompt = content
                     elif role.lower() in ["assistant", "bot"]:
                         response = content
+            return {"prompt": prompt or "default prompt", "response": response or "default response"}
+        # Ultimate fallback
+        text = str(example.get("text", example.get("content", "default text")))
+        if "Assistant:" in text:
+            parts = text.split("Assistant:", 1)
+            return {"prompt": parts[0].replace("User:", "").strip() or "default prompt",
+                   "response": parts[1].strip() or "default response"}
+        return {"prompt": text[:200] or "default prompt",
+               "response": (text[-200:] if len(text) > 200 else text) or "default response"}
+    except Exception:
+        return {"prompt": "default prompt", "response": "default response"}
+def tokenize_function_safe(examples, tokenizer):
+    """Safe tokenization with comprehensive error handling"""
     try:
+        # Format: Prompt\n\nResponse\n
+        full_texts = [
+            f"{prompt}\n\n{response}{tokenizer.eos_token if hasattr(tokenizer, 'eos_token') else '</s>'}"
+            for prompt, response in zip(examples["prompt"], examples["response"])
+        ]
+        # Safe tokenization
+        result = tokenizer(
+            full_texts,
+            truncation=True,
+            max_length=MAX_LENGTH,
+            padding=False,
+            return_tensors=None,
+            verbose=False
+        )
+        # Labels for causal LM
+        result["labels"] = [
+            [-100 if (hasattr(tokenizer, 'pad_token_id') and token_id == tokenizer.pad_token_id) else token_id
+             for token_id in labels]
+            for labels in result["input_ids"]
+        ]
+        return result
     except Exception as e:
+        print(f"⚠️  Tokenization failed, using dummy: {str(e)[:50]}...")
+        # Return minimal valid result
+        try:
+            dummy_result = {
+                "input_ids": [[1, 2, 3]] * len(examples["prompt"]),
+                "attention_mask": [[1, 1, 1]] * len(examples["prompt"]),
+                "labels": [[1, 2, 3]] * len(examples["prompt"]),
+            }
+            return dummy_result
+        except:
+            # Absolute fallback
+            return {
+                "input_ids": [[1]],
+                "attention_mask": [[1]],
+                "labels": [[1]],
+            }
+def process_dataset_resilient(dataset, tokenizer):
+    """Process dataset with maximum resilience"""
+    if not dataset or not tokenizer:
+        print("❌ Cannot process dataset - missing components")
+        return None
+    print("⚡ Processing dataset with resilience...")
+    processed_splits = {}
+    for split_name in dataset.keys():
+        if hasattr(dataset[split_name], '__len__') and len(dataset[split_name]) > 0:
+            try:
+                print(f"🔄 Processing {split_name} split ({len(dataset[split_name])} samples)...")
+                # Normalize with maximum error handling
+                try:
+                    normalized = dataset[split_name].map(
+                        normalize_example_safe,
+                        remove_columns=dataset[split_name].column_names if dataset[split_name].column_names else [],
+                        num_proc=1,
+                        desc=f"Normalizing {split_name}"
+                    )
+                except Exception as e:
+                    print(f"⚠️  Normalization failed, using raw data: {str(e)[:50]}...")
+                    normalized = dataset[split_name]  # Use as-is
+                # Tokenize with maximum error handling
+                try:
+                    tokenized = normalized.map(
+                        lambda x: tokenize_function_safe(x, tokenizer),
+                        batched=True,
+                        batch_size=min(BATCH_SIZE_TOKENIZATION, max(1, len(normalized) // 4)),
+                        num_proc=1,
+                        remove_columns=["prompt", "response"] if "prompt" in normalized.column_names else [],
+                        desc=f"Tokenizing {split_name}",
+                        load_from_cache_file=False
+                    )
+                    if len(tokenized) > 0:
+                        processed_splits[split_name] = tokenized
+                        print(f"✅ {split_name}: {len(tokenized)} samples processed")
+                    else:
+                        raise ValueError("No samples processed")
+                except Exception as e:
+                    print(f"⚠️  Tokenization failed for {split_name}: {str(e)[:100]}...")
+                    # Create minimal dataset
+                    try:
+                        dummy_tokens = tokenizer("test\n\ntest response", return_tensors=None)
+                        dummy_tokens["labels"] = dummy_tokens["input_ids"].copy()
+                        processed_splits[split_name] = Dataset.from_list([dummy_tokens] * min(5, len(dataset[split_name])))
+                        print(f"✅ Created minimal {split_name} dataset")
+                    except:
+                        # Absolute fallback
+                        processed_splits[split_name] = Dataset.from_list([
+                            {"input_ids": [1, 2, 3], "attention_mask": [1, 1, 1], "labels": [1, 2, 3]}
+                        ] * 3)
+            except Exception as e:
+                print(f"⚠️  Critical error processing {split_name}: {str(e)[:100]}...")
+                # Absolute emergency fallback
+                processed_splits[split_name] = Dataset.from_list([
+                    {"input_ids": [1], "attention_mask": [1], "labels": [1]}
+                ] * 2)
+    return DatasetDict(processed_splits) if processed_splits else None
+def load_model_resilient(model_name, tokenizer):
+    """Load model with maximum resilience"""
+    print("🧠 Loading model with maximum resilience...")
+    # Try multiple loading strategies
+    loading_strategies = [
+        {
+            "name": "Primary (8-bit)",
+            "params": {
+                "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
+                "device_map": "auto" if torch.cuda.is_available() else None,
+                "trust_remote_code": True,
+                "low_cpu_mem_usage": True,
+                "load_in_8bit": True,
+            }
+        },
+        {
+            "name": "Secondary (float16)",
+            "params": {
+                "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
+                "device_map": "auto" if torch.cuda.is_available() else None,
+                "trust_remote_code": True,
+                "low_cpu_mem_usage": True,
+            }
+        },
+        {
+            "name": "Fallback (CPU)",
+            "params": {
+                "low_cpu_mem_usage": True,
+            }
+        }
     ]
+    for strategy in loading_strategies:
+        try:
+            print(f"🔄 Trying {strategy['name']} loading...")
+            model = AutoModelForCausalLM.from_pretrained(model_name, **strategy["params"])
+            # Resize embeddings if tokenizer is available
+            if tokenizer:
+                try:
+                    model.resize_token_embeddings(len(tokenizer))
+                    print("✅ Resized model embeddings to match tokenizer")
+                except Exception as e:
+                    print(f"⚠️  Could not resize embeddings: {str(e)[:50]}...")
+            print(f"✅ Model loaded successfully with {strategy['name']}")
+            return model
+        except Exception as e:
+            print(f"⚠️  {strategy['name']} failed: {str(e)[:100]}...")
+    # Emergency fallback - create a minimal model
+    print("🔄 Creating minimal model fallback...")
+    try:
+        from transformers import GPT2LMHeadModel
+        model = GPT2LMHeadModel.from_pretrained("gpt2")
+        if tokenizer:
+            model.resize_token_embeddings(len(tokenizer))
+        print("✅ Created minimal model fallback")
+        return model
+    except Exception as e:
+        print(f"❌ All model loading strategies failed: {str(e)[:100]}...")
+        return None
+def setup_training_resilient(model, tokenizer, tokenized_dataset):
+    """Setup training with maximum resilience"""
+    if not model or not tokenizer or not tokenized_dataset:
+        print("❌ Cannot setup training - missing components")
+        return None
+    print("⚙️  Setting up resilient training...")
+    # Ensure we have data for training
     try:
+        train_dataset = tokenized_dataset.get("train")
+        eval_dataset = tokenized_dataset.get("test") or tokenized_dataset.get("train")
+        if not train_dataset or len(train_dataset) == 0:
+            print("❌ No training data available")
+            return None
+        # Limit dataset size for testing
+        max_samples = 20
+        if len(train_dataset) > max_samples:
+            train_dataset = train_dataset.select(range(max_samples))
+        if eval_dataset and len(eval_dataset) > max_samples // 5:
+            eval_dataset = eval_dataset.select(range(min(max_samples // 5, len(eval_dataset))))
     except Exception as e:
+        print(f"⚠️  Dataset preparation error: {str(e)[:100]}...")
+        return None
+    # Safe training arguments - avoid problematic parameters
     try:
+        training_args = TrainingArguments(
+            output_dir=OUTPUT_DIR,
+            # Conservative training settings
+            num_train_epochs=EPOCHS,
+            per_device_train_batch_size=BATCH_SIZE,
+            per_device_eval_batch_size=BATCH_SIZE,
+            gradient_accumulation_steps=GRADIENT_ACCUMULATION,
+            # Learning rate and schedule
+            learning_rate=LEARNING_RATE,
+            weight_decay=0.01,
+            warmup_ratio=0.1,
+            lr_scheduler_type="linear",
+            # Logging and saving
+            logging_dir=f"{OUTPUT_DIR}/logs",
+            logging_steps=LOGGING_STEPS,
+            save_strategy="steps",
+            save_steps=SAVE_STEPS,
+            save_total_limit=2,
+            # Evaluation - use safe parameter name
+            eval_strategy="steps" if eval_dataset else "no",
+            eval_steps=EVAL_STEPS if eval_dataset else None,
+            # Performance settings - disable problematic ones
+            fp16=torch.cuda.is_available() and torch.cuda.get_device_properties(0).major >= 7,
+            bf16=False,
+            dataloader_num_workers=1,
+            dataloader_pin_memory=False,
+            remove_unused_columns=False,
+            # Memory optimization
+            optim="adamw_torch",
+            dataloader_drop_last=True,
+            gradient_checkpointing=True,
+            # Reporting
+            report_to="none",
+            run_name="resilient_training",
+            # Disable TF32 completely to avoid errors
+            tf32=False,
         )
+        # Data collator
+        data_collator = DataCollatorForLanguageModeling(
+            tokenizer=tokenizer,
+            mlm=False,
+            pad_to_multiple_of=8,
+        )
+        # Create trainer with error handling
+        trainer = Trainer(
+            model=model,
+            args=training_args,
+            train_dataset=train_dataset,
+            eval_dataset=eval_dataset if eval_dataset else None,
+            data_collator=data_collator,
+            processing_class=tokenizer,
+            callbacks=[]  # No callbacks to avoid issues
+        )
+        print("✅ Training setup completed successfully")
+        return trainer
     except Exception as e:
+        print(f"❌ Failed to create trainer: {str(e)[:200]}...")
+        traceback.print_exc()
+        return None
+def safe_training_loop(trainer):
+    """Execute training with maximum error handling"""
+    if not trainer:
+        print("❌ No trainer provided for training")
+        return False
+    print("🏃 Starting resilient training...")
+    try:
+        # Ensure output directory exists
+        safe_makedirs(OUTPUT_DIR)
+        # Start training with comprehensive error handling
+        train_result = trainer.train()
+        print("✅ TRAINING COMPLETED SUCCESSFULLY!")
+        # Save everything with error handling
+        try:
+            print("💾 Saving model...")
+            trainer.save_model(f".")
+            trainer.save_state()
+            print("✅ Model saved successfully!")
+        except Exception as e:
+            print(f"⚠️  Model save failed: {e}")
+        try:
+            print("💾 Saving tokenizer...")
+            Trainer._save(f".")
+            print("✅ Tokenizer saved successfully!")
+        except Exception as e:
+            print(f"⚠️  Tokenizer save failed: {e}")
+        return True
+    except KeyboardInterrupt:
+        print("🛑 Training interrupted by user")
+        try:
+            # Try to save current progress
+            trainer.save_model(f".")
+            print("✅ Interrupted model saved")
+        except:
+            print("⚠️  Could not save interrupted model")
+        return False
+    except Exception as e:
+        print(f"⚠️  Training failed with error: {str(e)[:300]}")
+        traceback.print_exc()
+        # Try emergency save
+        try:
+            print("💾 Attempting emergency save...")
+            trainer.save_model(f".")
+            print("✅ Emergency save completed")
+        except Exception as save_error:
+            print(f"❌ Emergency save also failed: {save_error}")
+        return False
+def main():
+    """Main execution pipeline with maximum resilience"""
+    print("🚀 STARTING RESILIENT TRAINING PIPELINE")
+    print(f"🔧 Batch Size: {BATCH_SIZE} | Workers: {NUM_WORKERS}")
+    print(f"🖥️  System: {platform.system()} | CUDA: {torch.cuda.is_available()}")
+    # Create output directory
+    safe_makedirs(OUTPUT_DIR)
+    # 1. Load tokenizer with comprehensive fallback
+    print("\n🔤 LOADING TOKENIZER WITH MAXIMUM RESILIENCE...")
+    tokenizer = load_tokenizer_robust(MODEL_NAME)
+    if tokenizer is None:
+        print("❌ CRITICAL: Could not load any tokenizer. Exiting.")
+        return None
+    print(f"✅ Tokenizer loaded successfully")
+    # 2. Load dataset with fallbacks
+    print("\n📥 LOADING DATASET WITH FALLBACKS...")
+    dataset = load_dataset_with_fallback()
+    if dataset is None:
+        print("❌ Could not load any dataset")
+        return None
+    # 3. Process dataset with maximum resilience
+    print("\n⚡ PROCESSING DATASET WITH MAXIMUM RESILIENCE...")
+    tokenized_dataset = process_dataset_resilient(dataset, tokenizer)
+    if tokenized_dataset is None:
+        print("❌ Dataset processing failed completely")
+        return None
+    # 4. Load model with maximum resilience
+    print("\n🧠 LOADING MODEL WITH MAXIMUM RESILIENCE...")
+    model = load_model_resilient(MODEL_NAME, tokenizer)
+    if model is None:
+        print("❌ Model loading failed completely")
+        return None
+    # 5. Setup training with maximum resilience
+    print("\n⚙️  SETTING UP TRAINING WITH MAXIMUM RESILIENCE...")
+    trainer = setup_training_resilient(model, tokenizer, tokenized_dataset)
+    if trainer is None:
+        print("❌ Training setup failed")
+        return None
+    # 6. Execute training with maximum resilience
+    print("\n🏃 EXECUTING TRAINING WITH MAXIMUM RESILIENCE...")
+    success = safe_training_loop(trainer)
+    if success:
+        print("\n🎉 TRAINING PIPELINE COMPLETED SUCCESSFULLY!")
+    else:
+        print("\n⚠️  TRAINING PIPELINE COMPLETED WITH ISSUES BUT DID NOT STOP!")
+    return trainer if success else None
+# ─── Execute Everything ──────────────────────────────────────────────────────
+if __name__ == "__main__":
+    print("🏁 STARTING EXECUTION WITH MAXIMUM RESILIENCE...")
     try:
+        trainer = main()
+        if trainer:
+            print("🎊 SUCCESS: Training pipeline completed!")
+        else:
+            print("⚠️  Training pipeline completed with issues but did not crash!")
+    except KeyboardInterrupt:
+        print("\n🛑 EXECUTION STOPPED BY USER")
     except Exception as e:
+        print(f"💥 UNEXPECTED ERROR: {str(e)}")
+        traceback.print_exc()
+        print("⚠️  Even fatal errors won't stop the program completely!")

config.json CHANGED Viewed

@@ -4,13 +4,9 @@
   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
-  "bos_token_id": 128000,
-  "dtype": "bfloat16",
-  "eos_token_id": [
-    128001,
-    128008,
-    128009
-  ],
   "head_dim": 64,
   "hidden_act": "silu",
   "hidden_size": 2048,
@@ -22,18 +18,19 @@
   "num_attention_heads": 32,
   "num_hidden_layers": 16,
   "num_key_value_heads": 8,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
-  "rope_scaling": {
     "factor": 32.0,
     "high_freq_factor": 4.0,
     "low_freq_factor": 1.0,
     "original_max_position_embeddings": 8192,
     "rope_type": "llama3"
   },
-  "rope_theta": 500000.0,
   "tie_word_embeddings": true,
-  "transformers_version": "4.57.1",
-  "use_cache": true,
-  "vocab_size": 128256
 }

   ],
   "attention_bias": false,
   "attention_dropout": 0.0,
+  "bos_token_id": 50259,
+  "dtype": "float32",
+  "eos_token_id": 50258,
   "head_dim": 64,
   "hidden_act": "silu",
   "hidden_size": 2048,
   "num_attention_heads": 32,
   "num_hidden_layers": 16,
   "num_key_value_heads": 8,
+  "pad_token_id": 50257,
   "pretraining_tp": 1,
   "rms_norm_eps": 1e-05,
+  "rope_parameters": {
     "factor": 32.0,
     "high_freq_factor": 4.0,
     "low_freq_factor": 1.0,
     "original_max_position_embeddings": 8192,
+    "rope_theta": 500000.0,
     "rope_type": "llama3"
   },
   "tie_word_embeddings": true,
+  "transformers_version": "5.2.0",
+  "use_cache": false,
+  "vocab_size": 50260
 }

generation_config.json CHANGED Viewed

@@ -1,14 +1,15 @@
 {
-  "bos_token_id": 128000,
   "do_sample": true,
   "eos_token_id": [
     128001,
     128008,
     128009
   ],
   "max_length": 131072,
-  "pad_token_id": 128004,
   "temperature": 0.6,
   "top_p": 0.9,
-  "transformers_version": "4.44.0.dev0"
 }

 {
+  "bos_token_id": 50259,
   "do_sample": true,
   "eos_token_id": [
+    50258,
     128001,
     128008,
     128009
   ],
   "max_length": 131072,
+  "pad_token_id": 50257,
   "temperature": 0.6,
   "top_p": 0.9,
+  "transformers_version": "5.2.0"
 }

main.py CHANGED Viewed

@@ -12,622 +12,843 @@ from transformers import (
     TrainingArguments,
     Trainer,
     DataCollatorForLanguageModeling,
-    EarlyStoppingCallback,
     GPT2TokenizerFast
 )
 import shutil
 from typing import Dict, Any, List
 import warnings
 warnings.filterwarnings("ignore")
 # ─── Configuration ───────────────────────────────────────────────────────────
 MODEL_NAME = "zxc4wewewe/blackthinking"
 OUTPUT_DIR = "./offsec_model"
 MAX_LENGTH = 512
-BATCH_SIZE = 2  # Reduced for stability
-GRADIENT_ACCUMULATION = 4
-EPOCHS = 1  # Reduced for testing
 LEARNING_RATE = 2e-5
 SAVE_STEPS = 100
 EVAL_STEPS = 100
 LOGGING_STEPS = 50
-# Optimize for performance
-NUM_WORKERS = min(4, mp.cpu_count())  # Conservative setting
-BATCH_SIZE_TOKENIZATION = 100
-# ─── 1. Robust Tokenizer Loading ─────────────────────────────────────────────
-def load_tokenizer_robust(model_name):
-    """Load tokenizer with multiple fallback strategies"""
-    print(f"🔄 Attempting to load tokenizer for: {model_name}")
-    # Strategy 1: Try the model's tokenizer with trust_remote_code
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(
-            model_name,
-            use_fast=True,
-            trust_remote_code=True
-        )
-        if hasattr(tokenizer, 'get_vocab') or hasattr(tokenizer, 'vocab'):
-            print("✅ Successfully loaded model tokenizer")
-            return tokenizer
-        else:
-            print("⚠️  Model tokenizer loaded but missing vocab methods")
-    except Exception as e:
-        print(f"⚠️  Primary tokenizer load failed: {str(e)[:100]}...")
-    # Strategy 2: Try without trust_remote_code
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(
-            model_name,
-            use_fast=True,
-            trust_remote_code=False
-        )
-        print("✅ Successfully loaded tokenizer (no remote code)")
-        return tokenizer
-    except Exception as e:
-        print(f"⚠️  Secondary tokenizer load failed: {str(e)[:100]}...")
-    # Strategy 3: Create a minimal tokenizer workaround
-    print("🔄 Creating minimal tokenizer workaround...")
-    try:
-        # Use GPT-2 tokenizer as base
-        tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
-        # Add special tokens that the model might expect
-        special_tokens = {
-            "pad_token": "<|pad|>",
-            "eos_token": "<|endoftext|>",  # Standard GPT-2 eos
-            "bos_token": "<|startoftext|>", # Custom bos
-        }
-        # Only add tokens that don't already exist
-        existing_tokens = set(tokenizer.all_special_tokens)
-        tokens_to_add = {k: v for k, v in special_tokens.items() if v not in existing_tokens}
-        if tokens_to_add:
-            tokenizer.add_special_tokens(tokens_to_add)
-        print("✅ Created minimal tokenizer workaround")
-        return tokenizer
     except Exception as e:
-        print(f"⚠️  Minimal tokenizer creation failed: {str(e)[:100]}...")
-    # Strategy 4: Create absolute minimal tokenizer
-    print("🔄 Creating absolute minimal tokenizer...")
     try:
         from transformers import PreTrainedTokenizerFast
         import json
-        # Create minimal vocab
         vocab = {
             "<|pad|>": 0,
-            "<|endoftext|>": 1,
-            "<|startoftext|>": 2,
             "<|unk|>": 3,
         }
-        # Add basic ASCII characters
         for i, char in enumerate("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \n\t.,!?-", start=4):
             vocab[char] = i
-        # Create tokenizer JSON structure
         tokenizer_json = {
             "version": "1.0",
-            "truncation": {"direction": "Right", "max_length": 512, "strategy": "LongestFirst"},
-            "padding": {"direction": "Right", "pad_id": 0, "pad_token": "<|pad|>", "pad_type_id": 0},
             "model": {
                 "type": "BPE",
-                "dropout": None,
-                "unk_token": "<|unk|>",
-                "continuing_subword_prefix": "",
-                "end_of_word_suffix": "",
-                "fuse_unk": False,
                 "vocab": vocab,
                 "merges": []
             }
         }
-        # Save to temporary file
-        import tempfile
         with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
             json.dump(tokenizer_json, f)
             temp_path = f.name
-        # Load the tokenizer
         tokenizer = PreTrainedTokenizerFast(tokenizer_file=temp_path)
         tokenizer.pad_token = "<|pad|>"
-        tokenizer.eos_token = "<|endoftext|>"
-        tokenizer.bos_token = "<|startoftext|>"
-        # Clean up temp file
         os.unlink(temp_path)
-        print("✅ Created absolute minimal tokenizer")
         return tokenizer
-    except Exception as e:
-        print(f"⚠️  Absolute minimal tokenizer failed: {str(e)[:100]}...")
-    # Final fallback: return None to signal failure
-    print("❌ All tokenizer loading strategies failed")
-    return None
-# ─── 2. High-Performance Dataset Loading ─────────────────────────────────────
-def load_and_fix_dataset_parallel():
-    """Load dataset with parallel processing"""
     print("📥 Loading dataset...")
-    # Try multiple sources
-    datasets_sources = [
-        "huihui-ai/Guilherme34_uncensor-v2",
-        "zxc4wewewe/offsec",
-    ]
-    for dataset_name in datasets_sources:
         try:
-            print(f"🔄 Trying to load: {dataset_name}")
-            dataset = load_dataset(dataset_name, streaming=False)  # Non-streaming for better control
-            print(f"✅ Successfully loaded: {dataset_name}")
-            # Ensure we have proper splits
             if "train" not in dataset and "test" not in dataset:
-                # Convert single split to train/test
                 keys = list(dataset.keys())
                 if keys:
                     main_split = dataset[keys[0]]
                     dataset = main_split.train_test_split(test_size=0.1, seed=42)
                 else:
-                    raise ValueError("No valid splits found")
             return dataset
         except Exception as e:
-            print(f"⚠️  Failed to load {dataset_name}: {str(e)[:100]}...")
-    # Create minimal dummy dataset
-    print("🔄 Creating dummy dataset for testing...")
-    dummy_data = {
-        "train": [
-            {"prompt": "What is cybersecurity?", "response": "Cybersecurity involves protecting computer systems."},
-            {"prompt": "How to prevent hacking?", "response": "Use strong passwords and keep software updated."},
-            {"prompt": "What is encryption?", "response": "Encryption converts data into coded format for protection."},
-        ] * 10,  # Repeat for more samples
-        "test": [
-            {"prompt": "What is a firewall?", "response": "A firewall monitors and controls network traffic."},
-        ] * 5,
-    }
-    dataset = DatasetDict({
-        split: Dataset.from_list(data)
-        for split, data in dummy_data.items()
-    })
-    print("✅ Created dummy dataset")
-    return dataset
-# ─── 3. Ultra-Fast Tokenization with Error Handling ──────────────────────────
-def parallel_tokenize_function(examples, tokenizer):
-    """Ultra-fast tokenization with comprehensive error handling"""
     try:
-        # Format: Prompt\n\nResponse\n
         full_texts = [
-            f"{prompt}\n\n{response}{tokenizer.eos_token if hasattr(tokenizer, 'eos_token') else ''}"
             for prompt, response in zip(examples["prompt"], examples["response"])
         ]
-        # Ultra-fast tokenization
         result = tokenizer(
             full_texts,
             truncation=True,
             max_length=MAX_LENGTH,
-            padding=False,  # Dynamic padding
             return_tensors=None,
-            verbose=False
         )
-        # Labels for causal LM
         result["labels"] = [
-            [-100 if token_id == tokenizer.pad_token_id else token_id for token_id in labels]
-            if hasattr(tokenizer, 'pad_token_id') else labels
             for labels in result["input_ids"]
         ]
         return result
     except Exception as e:
-        print(f"⚠️  Tokenization batch failed: {str(e)[:100]}...")
-        # Return minimal valid result
-        dummy_result = {
             "input_ids": [[1, 2, 3]] * len(examples["prompt"]),
             "attention_mask": [[1, 1, 1]] * len(examples["prompt"]),
             "labels": [[1, 2, 3]] * len(examples["prompt"]),
         }
-        return dummy_result
-# ─── 4. Memory-Efficient Dataset Processing ──────────────────────────────────
-def process_dataset_efficient(dataset, tokenizer):
-    """Process dataset with maximum efficiency and error handling"""
-    def normalize_example_fast(example):
-        """Ultra-fast normalization with fallbacks"""
-        if not example:
-            return {"prompt": "default prompt", "response": "default response"}
         try:
-            # Fast path for standard format
-            if "prompt" in example and "response" in example:
-                p = str(example.get("prompt", "") or "default prompt")
-                r = str(example.get("response", "") or "default response")
-                return {"prompt": p.strip() or "default prompt", "response": r.strip() or "default response"}
-            # Handle messages format
-            if "messages" in example and isinstance(example["messages"], list):
-                prompt, response = "", ""
-                for msg in example["messages"]:
-                    if isinstance(msg, dict):
-                        role, content = str(msg.get("role", "")), str(msg.get("content", ""))
-                        if role.lower() in ["user", "human"]:
-                            prompt = content
-                        elif role.lower() in ["assistant", "bot"]:
-                            response = content
-                return {"prompt": prompt or "default prompt", "response": response or "default response"}
-            # Ultimate fallback
-            text = str(example.get("text", example.get("content", "default text")))
-            if "Assistant:" in text:
-                parts = text.split("Assistant:", 1)
-                return {"prompt": parts[0].replace("User:", "").strip() or "default prompt",
-                       "response": parts[1].strip() or "default response"}
-            return {"prompt": text[:200] or "default prompt",
-                   "response": (text[-200:] if len(text) > 200 else text) or "default response"}
-        except Exception:
-            return {"prompt": "default prompt", "response": "default response"}
-    print("⚡ Processing dataset efficiently...")
-    # Process with error handling
-    processed_splits = {}
-    for split_name in dataset.keys():
-        if hasattr(dataset[split_name], '__len__') and len(dataset[split_name]) > 0:
             try:
-                print(f"🔄 Processing {split_name} split ({len(dataset[split_name])} samples)...")
-                # Normalize with error handling
-                normalized = dataset[split_name].map(
-                    normalize_example_fast,
-                    remove_columns=dataset[split_name].column_names if dataset[split_name].column_names else [],
-                    num_proc=1,  # Conservative setting
-                    desc=f"Normalizing {split_name}"
-                )
-                # Tokenize with error handling
-                tokenized = normalized.map(
-                    lambda x: parallel_tokenize_function(x, tokenizer),
-                    batched=True,
-                    batch_size=min(BATCH_SIZE_TOKENIZATION, len(normalized) // 4 + 1),
-                    num_proc=1,  # Conservative setting
-                    remove_columns=["prompt", "response"],
-                    desc=f"Tokenizing {split_name}",
-                    load_from_cache_file=False
-                )
-                processed_splits[split_name] = tokenized
-                print(f"✅ {split_name}: {len(tokenized)} samples processed")
-            except Exception as e:
-                print(f"⚠️  Error processing {split_name}: {str(e)[:100]}...")
-                # Create minimal dataset
-                try:
-                    dummy_tokens = tokenizer("test\n\ntest response", return_tensors=None)
-                    dummy_tokens["labels"] = dummy_tokens["input_ids"].copy()
-                    processed_splits[split_name] = Dataset.from_list([dummy_tokens] * min(10, len(dataset[split_name])))
-                    print(f"✅ Created minimal {split_name} dataset")
-                except:
-                    # Absolute fallback
-                    processed_splits[split_name] = Dataset.from_list([
-                        {"input_ids": [1, 2, 3], "attention_mask": [1, 1, 1], "labels": [1, 2, 3]}
-                    ] * 5)
     return DatasetDict(processed_splits) if processed_splits else None
-# ─── 5. Optimized Model Loading ──────────────────────────────────────────────
-def load_model_optimized(model_name, tokenizer):
-    """Load model with maximum optimization and fallbacks"""
-    print("🧠 Loading model with optimizations...")
-    # Determine if we should use 8-bit loading
-    use_8bit = psutil.virtual_memory().total < 16 * (1024**3)  # 8-bit if < 16GB RAM
-    print(f"⚙️  8-bit loading: {use_8bit} (RAM: {psutil.virtual_memory().total // (1024**3)}GB)")
-    # Try multiple loading strategies
-    loading_strategies = [
         {
-            "name": "Primary (optimized)",
             "params": {
                 "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
-                "device_map": "auto",
                 "trust_remote_code": True,
                 "low_cpu_mem_usage": True,
-                "load_in_8bit": use_8bit,
             }
         },
         {
-            "name": "Secondary (basic)",
             "params": {
-                "device_map": "auto",
-                "trust_remote_code": False,
                 "low_cpu_mem_usage": True,
             }
         },
         {
-            "name": "Fallback (minimal)",
             "params": {
                 "low_cpu_mem_usage": True,
             }
         }
     ]
-    for strategy in loading_strategies:
         try:
-            print(f"🔄 Trying {strategy['name']} loading...")
             model = AutoModelForCausalLM.from_pretrained(model_name, **strategy["params"])
-            # Resize embeddings if tokenizer is available
             if tokenizer:
                 try:
                     model.resize_token_embeddings(len(tokenizer))
-                    print("✅ Resized model embeddings to match tokenizer")
                 except Exception as e:
-                    print(f"⚠️  Could not resize embeddings: {str(e)[:50]}...")
-            print(f"✅ Model loaded successfully with {strategy['name']}")
             return model
         except Exception as e:
             print(f"⚠️  {strategy['name']} failed: {str(e)[:100]}...")
-    # Emergency fallback - create a minimal model
-    print("🔄 Creating minimal model fallback...")
-    try:
-        from transformers import GPT2LMHeadModel
-        model = GPT2LMHeadModel.from_pretrained("gpt2")
-        if tokenizer:
-            model.resize_token_embeddings(len(tokenizer))
-        print("✅ Created minimal model fallback")
-        return model
-    except Exception as e:
-        print(f"❌ All model loading strategies failed: {str(e)[:100]}...")
-        return None
-# ─── 6. Ultra-Fast Training Setup ────────────────────────────────────────────
-def setup_ultra_fast_training(model, tokenizer, tokenized_dataset):
-    """Setup training with maximum performance"""
     if not model or not tokenizer or not tokenized_dataset:
-        print("❌ Cannot setup training - missing components")
         return None
-    print("⚙️  Setting up ultra-fast training...")
-    # Ensure we have data for training
     try:
         train_dataset = tokenized_dataset.get("train")
         eval_dataset = tokenized_dataset.get("test") or tokenized_dataset.get("train")
         if not train_dataset or len(train_dataset) == 0:
-            print("❌ No training data available")
             return None
-        # Limit dataset size for testing
-        max_samples = 100
         if len(train_dataset) > max_samples:
             train_dataset = train_dataset.select(range(max_samples))
-        if eval_dataset and len(eval_dataset) > max_samples // 10:
-            eval_dataset = eval_dataset.select(range(min(max_samples // 10, len(eval_dataset))))
-    except Exception as e:
-        print(f"⚠️  Dataset preparation error: {str(e)[:100]}...")
-        return None
-    # Optimized training arguments
-    training_args = TrainingArguments(
-        output_dir=OUTPUT_DIR,
-        # Conservative training settings for stability
-        num_train_epochs=EPOCHS,
-        per_device_train_batch_size=BATCH_SIZE,
-        per_device_eval_batch_size=BATCH_SIZE,
-        gradient_accumulation_steps=GRADIENT_ACCUMULATION,
-        # Learning rate and schedule
-        learning_rate=LEARNING_RATE,
-        weight_decay=0.01,
-        warmup_ratio=0.1,
-        lr_scheduler_type="linear",
-        # Logging and saving
-        logging_dir=f"{OUTPUT_DIR}/logs",
-        logging_steps=LOGGING_STEPS,
-        save_steps=SAVE_STEPS,
-        save_total_limit=1,
-        # Evaluation
-        eval_strategy="steps" if eval_dataset else "no",
-        eval_steps=EVAL_STEPS if eval_dataset else None,
-        load_best_model_at_end=False,  # Disable for stability
-        # Performance settings
-        fp16=torch.cuda.is_available(),
-        bf16=False,
-        dataloader_num_workers=1,  # Conservative setting
-        dataloader_pin_memory=False,
-        remove_unused_columns=False,
-        # Memory optimization
-        optim="adamw_torch",
-        dataloader_drop_last=True,
-        gradient_checkpointing=True,
-        # Reporting
-        report_to="none",
-        run_name="stable_training",
-        # Speed optimizations
-        tf32=False,
-    )
-    # Data collator
-    data_collator = DataCollatorForLanguageModeling(
-        tokenizer=tokenizer,
-        mlm=False,
-        pad_to_multiple_of=8,
-    )
-    # Create trainer
-    try:
         trainer = Trainer(
             model=model,
             args=training_args,
             train_dataset=train_dataset,
-            eval_dataset=eval_dataset if eval_dataset else None,
             data_collator=data_collator,
             processing_class=tokenizer,
             callbacks=[]
         )
-        print("✅ Training setup completed successfully")
-        return trainer
     except Exception as e:
-        print(f"❌ Failed to create trainer: {str(e)[:100]}...")
-        return None
-# ─── 7. Main Execution Pipeline ──────────────────────────────────────────────
-def main():
-    """Main execution pipeline with maximum robustness"""
-    print("🚀 STARTING ROBUST TRAINING PIPELINE")
-    print(f"🔧 Workers: {NUM_WORKERS} | Batch Size: {BATCH_SIZE}")
-    # 1. Load tokenizer with comprehensive fallback
-    print("\n🔤 LOADING TOKENIZER WITH FALLBACKS...")
-    tokenizer = load_tokenizer_robust(MODEL_NAME)
-    if tokenizer is None:
-        print("❌ CRITICAL: Could not load any tokenizer. Exiting.")
-        return None
-    print(f"✅ Tokenizer loaded successfully")
-    print(f"   Vocabulary size: {len(tokenizer.get_vocab()) if hasattr(tokenizer, 'get_vocab') else 'unknown'}")
-    print(f"   Special tokens: {tokenizer.special_tokens_map if hasattr(tokenizer, 'special_tokens_map') else 'none'}")
-    # 2. Load dataset
-    print("\n📥 LOADING DATASET...")
-    dataset = load_and_fix_dataset_parallel()
-    # 3. Process dataset efficiently
-    print("\n⚡ PROCESSING DATASET...")
-    tokenized_dataset = process_dataset_efficient(dataset, tokenizer)
-    if tokenized_dataset is None:
-        print("❌ Dataset processing failed completely")
-        return None
-    # 4. Load model with optimizations
-    print("\n🧠 LOADING MODEL...")
-    model = load_model_optimized(MODEL_NAME, tokenizer)
-    if model is None:
-        print("❌ Model loading failed completely")
-        return None
-    # 5. Setup training
-    print("\n⚙️  SETTING UP TRAINING...")
-    trainer = setup_ultra_fast_training(model, tokenizer, tokenized_dataset)
-    if trainer is None:
-        print("❌ Training setup failed")
-        return None
-    # 6. Start training
-    print("\n🏃 STARTING TRAINING...")
-    try:
-        train_result = trainer.train()
-        print("✅ TRAINING COMPLETED SUCCESSFULLY!")
-        # Save everything
-        print("\n💾 SAVING MODEL...")
-        trainer.save_model(f"{OUTPUT_DIR}/final_model")
-        tokenizer.save_pretrained(f"{OUTPUT_DIR}/final_model")
-        trainer.save_state()
-        print("✅ MODEL SAVED!")
-    except Exception as e:
-        print(f"⚠️  Training completed with issues: {str(e)[:200]}...")
-        # Try emergency save
-        try:
-            trainer.save_model(f"{OUTPUT_DIR}/emergency_save")
-            print("✅ Emergency save completed")
-        except Exception as save_error:
-            print(f"❌ Emergency save also failed: {str(save_error)[:100]}...")
-    # 7. Simple inference test
-    print("\n🧪 TESTING MODEL...")
-    try:
-        def simple_inference(prompt, max_tokens=32):
-            try:
-                model.eval()
-                inputs = tokenizer(
-                    f"{prompt}\n\n",
-                    return_tensors="pt",
-                    truncation=True,
-                    max_length=128,
-                    padding=True
-                )
-                if hasattr(model, 'device'):
-                    inputs = {k: v.to(model.device) for k, v in inputs.items()}
-                with torch.no_grad():
-                    outputs = model.generate(
-                        **inputs,
-                        max_new_tokens=max_tokens,
-                        temperature=0.7,
-                        do_sample=True,
-                        pad_token_id=tokenizer.pad_token_id if hasattr(tokenizer, 'pad_token_id') else 0,
-                        eos_token_id=tokenizer.eos_token_id if hasattr(tokenizer, 'eos_token_id') else 1,
-                    )
-                response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-                return response.split('\n\n')[-1][:100] if '\n\n' in response else response[:100]
-            except Exception as e:
-                return f"[Inference Error: {str(e)[:50]}]"
-        # Test with simple prompts
-        test_prompts = [
-            "What is cybersecurity?",
-            "How to stay safe online?",
-        ]
-        for i, prompt in enumerate(test_prompts):
-            result = simple_inference(prompt)
-            print(f"📝 Test {i+1}: {result}")
-    except Exception as e:
-        print(f"⚠️  Inference testing failed: {str(e)[:100]}...")
-    print("\n🎉 TRAINING PIPELINE COMPLETED!")
-    return trainer
-# ─── 8. Execute Everything ───────────────────────────────────────────────────
 if __name__ == "__main__":
-    print("🏁 STARTING EXECUTION...")
     try:
-        trainer = main()
-        if trainer:
-            print("🎊 SUCCESS: Training pipeline completed!")
         else:
-            print("💥 FAILED: Training pipeline could not complete")
     except Exception as e:
-        print(f"💥 FATAL ERROR: {str(e)}")
-        import traceback
         traceback.print_exc()

     TrainingArguments,
     Trainer,
     DataCollatorForLanguageModeling,
     GPT2TokenizerFast
 )
 import shutil
 from typing import Dict, Any, List
 import warnings
+import platform
+import traceback
+from peft import PeftModel, LoraConfig, get_peft_model, prepare_model_for_kbit_training
+import json
+import tempfile
+from datetime import datetime
 warnings.filterwarnings("ignore")
 # ─── Configuration ───────────────────────────────────────────────────────────
 MODEL_NAME = "zxc4wewewe/blackthinking"
 OUTPUT_DIR = "./offsec_model"
+MERGED_MODELS_DIR = "./merged_models"
 MAX_LENGTH = 512
+BATCH_SIZE = 1
+GRADIENT_ACCUMULATION = 8
+EPOCHS = 3
 LEARNING_RATE = 2e-5
 SAVE_STEPS = 100
 EVAL_STEPS = 100
 LOGGING_STEPS = 50
+# LoRA Configuration
+USE_LORA = True
+LORA_R = 8
+LORA_ALPHA = 16
+LORA_DROPOUT = 0.1
+# Dataset Configuration
+DATASET_SOURCES = [
+    "huihui-ai/Guilherme34_uncensor-v2",
+    "zxc4wewewe/offsec",
+]
+# System Configuration
+NUM_WORKERS = min(2, mp.cpu_count())
+BATCH_SIZE_TOKENIZATION = 50
+# ─── Analyzer Class ──────────────────────────────────────────────────────────
+class TrainingAnalyzer:
+    """Analyzes training progress and system resources"""
+    def __init__(self):
+        self.start_time = datetime.now()
+        self.training_metrics = {
+            "total_samples": 0,
+            "processed_samples": 0,
+            "training_time": 0,
+            "peak_memory": 0,
+            "gpu_memory": 0,
+        }
+    def analyze_system(self):
+        """Analyze system resources"""
+        try:
+            memory = psutil.virtual_memory()
+            gpu_memory = 0
+            if torch.cuda.is_available():
+                gpu_memory = torch.cuda.memory_allocated() / (1024**3)
+            return {
+                "cpu_cores": mp.cpu_count(),
+                "total_memory_gb": memory.total / (1024**3),
+                "available_memory_gb": memory.available / (1024**3),
+                "memory_usage_percent": memory.percent,
+                "gpu_memory_gb": gpu_memory,
+                "cuda_available": torch.cuda.is_available(),
+                "cuda_version": torch.version.cuda,
+                "pytorch_version": torch.__version__,
+            }
+        except Exception as e:
+            print(f"⚠️  System analysis failed: {e}")
+            return {}
+    def analyze_dataset(self, dataset):
+        """Analyze dataset characteristics"""
+        if not dataset:
+            return {}
+        try:
+            analysis = {}
+            for split_name, split_data in dataset.items():
+                if hasattr(split_data, '__len__'):
+                    analysis[split_name] = {
+                        "num_samples": len(split_data),
+                        "columns": split_data.column_names if hasattr(split_data, 'column_names') else [],
+                    }
+            return analysis
+        except Exception as e:
+            print(f"⚠️  Dataset analysis failed: {e}")
+            return {}
+    def analyze_training(self, trainer, train_result):
+        """Analyze training results"""
+        try:
+            current_time = datetime.now()
+            training_time = (current_time - self.start_time).total_seconds()
+            memory = psutil.virtual_memory()
+            peak_memory = memory.used / (1024**3)
+            gpu_memory = 0
+            if torch.cuda.is_available():
+                gpu_memory = torch.cuda.memory_allocated() / (1024**3)
+            return {
+                "training_time_seconds": training_time,
+                "training_time_minutes": training_time / 60,
+                "peak_memory_gb": peak_memory,
+                "peak_gpu_memory_gb": gpu_memory,
+                "final_loss": getattr(train_result, 'training_loss', 'unknown'),
+                "total_steps": getattr(train_result, 'global_step', 0),
+                "samples_per_second": train_result.metrics.get('train_samples_per_second', 0) if train_result.metrics else 0,
+            }
+        except Exception as e:
+            print(f"⚠️  Training analysis failed: {e}")
+            return {}
+    def generate_report(self, system_info, dataset_info, training_info):
+        """Generate comprehensive training report"""
+        report = f"""
+{'='*60}
+TRAINING ANALYSIS REPORT
+{'='*60}
+SYSTEM INFORMATION:
+- CPU Cores: {system_info.get('cpu_cores', 'unknown')}
+- Total Memory: {system_info.get('total_memory_gb', 0):.1f} GB
+- Available Memory: {system_info.get('available_memory_gb', 0):.1f} GB
+- Memory Usage: {system_info.get('memory_usage_percent', 0):.1f}%
+- CUDA Available: {system_info.get('cuda_available', False)}
+- CUDA Version: {system_info.get('cuda_version', 'unknown')}
+- PyTorch Version: {system_info.get('pytorch_version', 'unknown')}
+- GPU Memory Used: {system_info.get('gpu_memory_gb', 0):.2f} GB
+DATASET ANALYSIS:
+"""
+        for split_name, split_info in dataset_info.items():
+            report += f"- {split_name.upper()}: {split_info.get('num_samples', 0)} samples\n"
+            if split_info.get('columns'):
+                report += f"  Columns: {', '.join(split_info['columns'])}\n"
+        report += f"""
+TRAINING PERFORMANCE:
+- Training Time: {training_info.get('training_time_minutes', 0):.2f} minutes
+- Final Loss: {training_info.get('final_loss', 'unknown')}
+- Total Steps: {training_info.get('total_steps', 0)}
+- Samples/Second: {training_info.get('samples_per_second', 0):.2f}
+- Peak Memory: {training_info.get('peak_memory_gb', 0):.2f} GB
+- Peak GPU Memory: {training_info.get('peak_gpu_memory_gb', 0):.2f} GB
+TRAINING CONFIGURATION:
+- Model: {MODEL_NAME}
+- Batch Size: {BATCH_SIZE}
+- Gradient Accumulation: {GRADIENT_ACCUMULATION}
+- Learning Rate: {LEARNING_RATE}
+- Epochs: {EPOCHS}
+- LoRA Enabled: {USE_LORA}
+- Max Length: {MAX_LENGTH}
+{'='*60}
+END REPORT
+{'='*60}
+"""
+        return report
+# ─── Utility Functions ───────────────────────────────────────────────────────
+def safe_makedirs(path):
+    """Safely create directories"""
+    try:
+        os.makedirs(path, exist_ok=True)
+        return True
     except Exception as e:
+        print(f"⚠️  Failed to create directory {path}: {e}")
+        return False
+def cleanup_gpu_memory():
+    """Clean up GPU memory"""
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+    gc.collect()
+def load_tokenizer_robust(model_name):
+    """Load tokenizer with multiple fallback strategies"""
+    print(f"🔄 Loading tokenizer for: {model_name}")
+    strategies = [
+        lambda: AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=True),
+        lambda: AutoTokenizer.from_pretrained(model_name, use_fast=True, trust_remote_code=False),
+        lambda: GPT2TokenizerFast.from_pretrained("gpt2"),
+        lambda: create_minimal_tokenizer(),
+    ]
+    for i, strategy in enumerate(strategies, 1):
+        try:
+            tokenizer = strategy()
+            # Add missing special tokens
+            if tokenizer.pad_token is None:
+                if tokenizer.eos_token:
+                    tokenizer.pad_token = tokenizer.eos_token
+                else:
+                    tokenizer.add_special_tokens({"pad_token": "<|pad|>"})
+            print(f"✅ Tokenizer loaded (strategy {i})")
+            return tokenizer
+        except Exception as e:
+            print(f"⚠️  Strategy {i} failed: {str(e)[:100]}...")
+    print("❌ All tokenizer strategies failed")
+    return None
+def create_minimal_tokenizer():
+    """Create absolute minimal tokenizer"""
     try:
         from transformers import PreTrainedTokenizerFast
         import json
         vocab = {
             "<|pad|>": 0,
+            "</s>": 1,
+            "<s>": 2,
             "<|unk|>": 3,
         }
         for i, char in enumerate("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789 \n\t.,!?-", start=4):
             vocab[char] = i
         tokenizer_json = {
             "version": "1.0",
             "model": {
                 "type": "BPE",
                 "vocab": vocab,
                 "merges": []
             }
         }
         with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
             json.dump(tokenizer_json, f)
             temp_path = f.name
         tokenizer = PreTrainedTokenizerFast(tokenizer_file=temp_path)
         tokenizer.pad_token = "<|pad|>"
+        tokenizer.eos_token = "</s>"
+        tokenizer.bos_token = "<s>"
         os.unlink(temp_path)
         return tokenizer
+    except:
+        return None
+def load_dataset_fallback():
+    """Load dataset with comprehensive fallbacks"""
     print("📥 Loading dataset...")
+    for dataset_name in DATASET_SOURCES:
         try:
+            print(f"🔄 Trying: {dataset_name}")
+            dataset = load_dataset(dataset_name, streaming=False)
+            print(f"✅ Loaded: {dataset_name}")
+            # Ensure proper splits
             if "train" not in dataset and "test" not in dataset:
                 keys = list(dataset.keys())
                 if keys:
                     main_split = dataset[keys[0]]
                     dataset = main_split.train_test_split(test_size=0.1, seed=42)
+                    print(f"✅ Created train/test split")
                 else:
+                    continue
             return dataset
         except Exception as e:
+            print(f"⚠️  Failed: {str(e)[:100]}...")
+    # Create dummy dataset
+    print("🔄 Creating dummy dataset...")
+    try:
+        dummy_data = {
+            "train": [
+                {"prompt": "What is AI?", "response": "Artificial Intelligence is computer systems performing human tasks."},
+                {"prompt": "How to code?", "response": "Start with basics like variables, loops, functions."},
+            ] * 10,
+            "test": [
+                {"prompt": "Define ML", "response": "Machine Learning enables computers to learn from data."},
+            ] * 3,
+        }
+        dataset = DatasetDict({
+            split: Dataset.from_list(data)
+            for split, data in dummy_data.items()
+        })
+        print("✅ Created dummy dataset")
+        return dataset
+    except Exception as e:
+        print(f"❌ Dummy dataset failed: {e}")
+        return None
+def normalize_example(example):
+    """Normalize example format"""
+    if not example:
+        return {"prompt": "default", "response": "default"}
+    try:
+        if "prompt" in example and "response" in example:
+            return {
+                "prompt": str(example.get("prompt", "")).strip() or "default",
+                "response": str(example.get("response", "")).strip() or "default",
+            }
+        if "messages" in example and isinstance(example["messages"], list):
+            prompt, response = "", ""
+            for msg in example["messages"]:
+                if isinstance(msg, dict):
+                    role, content = str(msg.get("role", "")), str(msg.get("content", ""))
+                    if role.lower() in ["user", "human"]:
+                        prompt = content
+                    elif role.lower() in ["assistant", "bot"]:
+                        response = content
+            return {"prompt": prompt or "default", "response": response or "default"}
+        text = str(example.get("text", example.get("content", "default")))
+        if "Assistant:" in text:
+            parts = text.split("Assistant:", 1)
+            return {"prompt": parts[0].replace("User:", "").strip() or "default",
+                   "response": parts[1].strip() or "default"}
+        return {"prompt": text[:200] or "default",
+               "response": (text[-200:] if len(text) > 200 else text) or "default"}
+    except:
+        return {"prompt": "default", "response": "default"}
+def tokenize_function(examples, tokenizer):
+    """Tokenize examples safely"""
     try:
         full_texts = [
+            f"{prompt}\n\n{response}{tokenizer.eos_token}"
             for prompt, response in zip(examples["prompt"], examples["response"])
         ]
         result = tokenizer(
             full_texts,
             truncation=True,
             max_length=MAX_LENGTH,
+            padding=False,
             return_tensors=None,
         )
         result["labels"] = [
+            [-100 if (hasattr(tokenizer, 'pad_token_id') and token_id == tokenizer.pad_token_id) else token_id
+             for token_id in labels]
             for labels in result["input_ids"]
         ]
         return result
     except Exception as e:
+        print(f"⚠️  Tokenization error: {e}")
+        return {
             "input_ids": [[1, 2, 3]] * len(examples["prompt"]),
             "attention_mask": [[1, 1, 1]] * len(examples["prompt"]),
             "labels": [[1, 2, 3]] * len(examples["prompt"]),
         }
+def process_dataset(dataset, tokenizer):
+    """Process dataset efficiently"""
+    if not dataset or not tokenizer:
+        return None
+    print("⚡ Processing dataset...")
+    processed_splits = {}
+    for split_name in dataset.keys():
         try:
+            print(f"🔄 Processing {split_name} ({len(dataset[split_name])} samples)...")
+            # Normalize
+            normalized = dataset[split_name].map(
+                normalize_example,
+                remove_columns=dataset[split_name].column_names,
+                num_proc=1,
+            )
+            # Tokenize
+            tokenized = normalized.map(
+                lambda x: tokenize_function(x, tokenizer),
+                batched=True,
+                batch_size=BATCH_SIZE_TOKENIZATION,
+                num_proc=1,
+                remove_columns=["prompt", "response"],
+                load_from_cache_file=False
+            )
+            processed_splits[split_name] = tokenized
+            print(f"✅ {split_name}: {len(tokenized)} samples")
+        except Exception as e:
+            print(f"⚠️  {split_name} failed: {e}")
+            # Create minimal fallback
             try:
+                dummy_tokens = tokenizer("test\n\ntest", return_tensors=None)
+                dummy_tokens["labels"] = dummy_tokens["input_ids"].copy()
+                processed_splits[split_name] = Dataset.from_list([dummy_tokens] * min(10, len(dataset[split_name])))
+            except:
+                processed_splits[split_name] = Dataset.from_list([
+                    {"input_ids": [1], "attention_mask": [1], "labels": [1]}
+                ] * 5)
     return DatasetDict(processed_splits) if processed_splits else None
+def load_model(model_name, tokenizer, use_lora=True):
+    """Load model with LoRA support"""
+    print("🧠 Loading model...")
+    strategies = [
         {
+            "name": "8-bit + LoRA",
             "params": {
                 "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
+                "device_map": "auto" if torch.cuda.is_available() else None,
                 "trust_remote_code": True,
                 "low_cpu_mem_usage": True,
+                "load_in_8bit": True,
             }
         },
         {
+            "name": "float16",
             "params": {
+                "torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
+                "device_map": "auto" if torch.cuda.is_available() else None,
+                "trust_remote_code": True,
                 "low_cpu_mem_usage": True,
             }
         },
         {
+            "name": "CPU fallback",
             "params": {
                 "low_cpu_mem_usage": True,
             }
         }
     ]
+    for strategy in strategies:
         try:
+            print(f"🔄 {strategy['name']}...")
             model = AutoModelForCausalLM.from_pretrained(model_name, **strategy["params"])
+            # Apply LoRA if requested
+            if use_lora and USE_LORA:
+                try:
+                    model = prepare_model_for_kbit_training(model)
+                    lora_config = LoraConfig(
+                        r=LORA_R,
+                        lora_alpha=LORA_ALPHA,
+                        target_modules=["q_proj", "v_proj"],
+                        lora_dropout=LORA_DROPOUT,
+                        bias="none",
+                        task_type="CAUSAL_LM"
+                    )
+                    model = get_peft_model(model, lora_config)
+                    print("✅ LoRA applied")
+                except Exception as e:
+                    print(f"⚠️  LoRA failed: {e}")
+            # Resize embeddings
             if tokenizer:
                 try:
                     model.resize_token_embeddings(len(tokenizer))
                 except Exception as e:
+                    print(f"⚠️  Embedding resize failed: {e}")
+            print(f"✅ Model loaded ({strategy['name']})")
             return model
         except Exception as e:
             print(f"⚠️  {strategy['name']} failed: {str(e)[:100]}...")
+    print("❌ All model strategies failed")
+    return None
+def setup_training(model, tokenizer, tokenized_dataset, dataset_name):
+    """Setup training configuration"""
     if not model or not tokenizer or not tokenized_dataset:
         return None
+    print(f"⚙️  Setting up training for {dataset_name}...")
     try:
         train_dataset = tokenized_dataset.get("train")
         eval_dataset = tokenized_dataset.get("test") or tokenized_dataset.get("train")
         if not train_dataset or len(train_dataset) == 0:
+            print("❌ No training data")
             return None
+        # Limit samples for efficiency
+        max_samples = 50
         if len(train_dataset) > max_samples:
             train_dataset = train_dataset.select(range(max_samples))
+        if eval_dataset and len(eval_dataset) > 10:
+            eval_dataset = eval_dataset.select(range(min(10, len(eval_dataset))))
+        output_dir = os.path.join(OUTPUT_DIR, dataset_name.replace("/", "_"))
+        safe_makedirs(output_dir)
+        training_args = TrainingArguments(
+            output_dir=output_dir,
+            num_train_epochs=EPOCHS,
+            per_device_train_batch_size=BATCH_SIZE,
+            per_device_eval_batch_size=BATCH_SIZE,
+            gradient_accumulation_steps=GRADIENT_ACCUMULATION,
+            learning_rate=LEARNING_RATE,
+            weight_decay=0.01,
+            warmup_ratio=0.1,
+            lr_scheduler_type="linear",
+            logging_dir=os.path.join(output_dir, "logs"),
+            logging_steps=LOGGING_STEPS,
+            save_strategy="steps",
+            save_steps=SAVE_STEPS,
+            save_total_limit=2,
+            eval_strategy="steps" if eval_dataset else "no",
+            eval_steps=EVAL_STEPS if eval_dataset else None,
+            fp16=torch.cuda.is_available(),
+            bf16=False,
+            dataloader_num_workers=1,
+            dataloader_pin_memory=False,
+            remove_unused_columns=False,
+            optim="adamw_torch",
+            dataloader_drop_last=True,
+            gradient_checkpointing=True,
+            report_to="none",
+            run_name=f"training_{dataset_name}",
+            tf32=False,
+        )
+        data_collator = DataCollatorForLanguageModeling(
+            tokenizer=tokenizer,
+            mlm=False,
+            pad_to_multiple_of=8,
+        )
         trainer = Trainer(
             model=model,
             args=training_args,
             train_dataset=train_dataset,
+            eval_dataset=eval_dataset,
             data_collator=data_collator,
             processing_class=tokenizer,
             callbacks=[]
         )
+        print("✅ Training setup complete")
+        return trainer, output_dir
     except Exception as e:
+        print(f"❌ Training setup failed: {e}")
+        return None, None
+def train_model(trainer, dataset_name):
+    """Execute training and save results"""
+    if not trainer:
+        return False, None, None
+    print(f"🏃 Training {dataset_name}...")
+    try:
+        train_result = trainer.train()
+        # Save final model
+        output_dir = trainer.args.output_dir
+        final_model_dir = os.path.join(output_dir, "final_model")
+        safe_makedirs(final_model_dir)
+        print("💾 Saving model...")
+        trainer.save_model(final_model_dir)
+        trainer.save_state()
+        print("💾 Saving tokenizer...")
+        trainer.tokenizer.save_pretrained(final_model_dir)
+        print(f"✅ Training complete for {dataset_name}")
+        return True, final_model_dir, train_result
+    except Exception as e:
+        print(f"❌ Training failed: {e}")
+        traceback.print_exc()
+        return False, None, None
+def merge_model(base_model_path, adapter_path, dataset_name):
+    """Merge LoRA weights with base model"""
+    print(f"🔗 Merging {dataset_name}...")
+    try:
+        output_path = os.path.join(MERGED_MODELS_DIR, dataset_name.replace("/", "_"))
+        safe_makedirs(output_path)
+        # Load tokenizer from adapter
+        try:
+            tokenizer = AutoTokenizer.from_pretrained(adapter_path)
+        except:
+            tokenizer = load_tokenizer_robust(base_model_path)
+        # Load base model
+        base_model = AutoModelForCausalLM.from_pretrained(
+            base_model_path,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+            device_map="auto" if torch.cuda.is_available() else None,
+            trust_remote_code=True,
+            low_cpu_mem_usage=True
+        )
+        # Resize embeddings to match tokenizer
+        current_vocab_size = len(tokenizer)
+        model_vocab_size = base_model.get_input_embeddings().weight.size(0)
+        if current_vocab_size != model_vocab_size:
+            base_model.resize_token_embeddings(current_vocab_size)
+        # Load and merge LoRA adapter
+        merged_model = PeftModel.from_pretrained(base_model, adapter_path)
+        merged_model = merged_model.merge_and_unload()
+        # Save merged model
+        merged_model.save_pretrained(output_path)
+        tokenizer.save_pretrained(output_path)
+        print(f"✅ {dataset_name} merged successfully")
+        cleanup_gpu_memory()
+        return True, output_path
+    except Exception as e:
+        print(f"❌ Merging {dataset_name} failed: {e}")
+        # Fallback: copy adapter files
+        try:
+            fallback_path = os.path.join(MERGED_MODELS_DIR, dataset_name.replace("/", "_") + "_adapter_only")
+            safe_makedirs(fallback_path)
+            adapter_files = os.listdir(adapter_path)
+            for file in adapter_files:
+                src = os.path.join(adapter_path, file)
+                dst = os.path.join(fallback_path, file)
+                if os.path.isfile(src):
+                    shutil.copy2(src, dst)
+            print(f"⚠️  {dataset_name} adapter copied (merging failed)")
+            return True, fallback_path
+        except Exception as e2:
+            print(f"❌ Fallback also failed: {e2}")
+            return False, None
+def save_analysis_report(analyzer, system_info, dataset_info, training_info, dataset_name):
+    """Save analysis report"""
+    try:
+        report = analyzer.generate_report(system_info, dataset_info, training_info)
+        report_dir = os.path.join(OUTPUT_DIR, dataset_name.replace("/", "_"))
+        safe_makedirs(report_dir)
+        report_path = os.path.join(report_dir, "training_analysis.txt")
+        with open(report_path, "w") as f:
+            f.write(report)
+        # Save metrics as JSON
+        metrics_path = os.path.join(report_dir, "training_metrics.json")
+        with open(metrics_path, "w") as f:
+            json.dump({
+                "system": system_info,
+                "dataset": dataset_info,
+                "training": training_info
+            }, f, indent=2)
+        print(f"📋 Analysis saved for {dataset_name}")
+        return True
+    except Exception as e:
+        print(f"⚠️  Failed to save analysis: {e}")
+        return False
+# ─── Main Training Pipeline ───────────────────────────────────────────────────
+def main():
+    """Main training pipeline with automatic model merging"""
+    print("🚀 STARTING AUTOMATED TRAINING PIPELINE")
+    print(f"🔧 Model: {MODEL_NAME}")
+    print(f"🎯 LoRA: {USE_LORA} | Batch: {BATCH_SIZE} | Epochs: {EPOCHS}")
+    print(f"🖥️  System: {platform.system()} | CUDA: {torch.cuda.is_available()}")
+    # Initialize analyzer
+    analyzer = TrainingAnalyzer()
+    # Create directories
+    safe_makedirs(OUTPUT_DIR)
+    safe_makedirs(MERGED_MODELS_DIR)
+    # Load tokenizer (shared across all training)
+    print("\n🔤 LOADING SHARED TOKENIZER...")
+    tokenizer = load_tokenizer_robust(MODEL_NAME)
+    if not tokenizer:
+        print("❌ CRITICAL: Tokenizer loading failed")
+        return
+    print(f"✅ Tokenizer loaded (vocab: {len(tokenizer)})")
+    # Analyze system
+    system_info = analyzer.analyze_system()
+    print(f"📊 System: {system_info.get('total_memory_gb', 0):.1f}GB RAM, {system_info.get('cpu_cores', 0)} cores")
+    # Process each dataset
+    results = []
+    total_training_time = 0
+    for dataset_name in DATASET_SOURCES:
+        print(f"\n{'='*60}")
+        print(f"🎯 PROCESSING DATASET: {dataset_name}")
+        print(f"{'='*60}")
+        # 1. Load dataset
+        dataset = load_dataset_fallback()
+        if not dataset:
+            print(f"❌ Failed to load {dataset_name}")
+            continue
+        # 2. Analyze dataset
+        dataset_info = analyzer.analyze_dataset(dataset)
+        print(f"📊 Dataset analysis: {dataset_info}")
+        # 3. Process dataset
+        tokenized_dataset = process_dataset(dataset, tokenizer)
+        if not tokenized_dataset:
+            print(f"❌ Failed to process {dataset_name}")
+            continue
+        # 4. Load model
+        model = load_model(MODEL_NAME, tokenizer, use_lora=True)
+        if not model:
+            print(f"❌ Failed to load model for {dataset_name}")
+            continue
+        # 5. Setup training
+        setup_result = setup_training(model, tokenizer, tokenized_dataset, dataset_name)
+        if not setup_result or setup_result[0] is None:
+            print(f"❌ Failed to setup training for {dataset_name}")
+            continue
+        trainer, model_dir = setup_result
+        # 6. Train model
+        success, final_model_dir, train_result = train_model(trainer, dataset_name)
+        if not success:
+            print(f"❌ Training failed for {dataset_name}")
+            continue
+        # 7. Analyze training
+        training_info = analyzer.analyze_training(trainer, train_result)
+        total_training_time += training_info.get('training_time_minutes', 0)
+        # 8. Save analysis report
+        save_analysis_report(analyzer, system_info, dataset_info, training_info, dataset_name)
+        # 9. Merge model (if LoRA was used)
+        if USE_LORA and success:
+            merge_success, merged_path = merge_model(MODEL_NAME, final_model_dir, dataset_name)
+            # Store results
+            results.append({
+                "dataset": dataset_name,
+                "training_time": training_info.get('training_time_minutes', 0),
+                "final_loss": training_info.get('final_loss', 'unknown'),
+                "model_saved": final_model_dir,
+                "model_merged": merged_path if merge_success else None,
+                "success": True
+            })
+        else:
+            results.append({
+                "dataset": dataset_name,
+                "training_time": training_info.get('training_time_minutes', 0),
+                "final_loss": training_info.get('final_loss', 'unknown'),
+                "model_saved": final_model_dir,
+                "model_merged": None,
+                "success": success
+            })
+        # Cleanup memory
+        cleanup_gpu_memory()
+        print(f"✅ {dataset_name} processing complete\n")
+    # Generate final summary
+    print(f"\n{'='*60}")
+    print("📊 FINAL TRAINING SUMMARY")
+    print(f"{'='*60}")
+    successful_trainings = sum(1 for r in results if r['success'])
+    successful_merges = sum(1 for r in results if r.get('model_merged'))
+    print(f"✅ Total Datasets Processed: {len(results)}")
+    print(f"✅ Successful Trainings: {successful_trainings}")
+    print(f"✅ Successful Merges: {successful_merges}")
+    print(f"⏱️  Total Training Time: {total_training_time:.2f} minutes")
+    for result in results:
+        status = "✅" if result['success'] else "❌"
+        merge_status = "🔗" if result.get('model_merged') else "⏭️"
+        print(f"{status} {result['dataset']}: {result['training_time']:.1f}min | Loss: {result['final_loss']} {merge_status}")
+    print(f"\n📂 Models saved in: {OUTPUT_DIR}")
+    print(f"🔗 Merged models in: {MERGED_MODELS_DIR}")
+    print(f"{'='*60}")
+    return results
+# ─── Execute Training ───────────────────────────────────────────────────────
 if __name__ == "__main__":
+    print("🏁 STARTING AUTOMATED TRAINING...")
     try:
+        results = main()
+        if results:
+            print("🎊 TRAINING PIPELINE COMPLETED SUCCESSFULLY!")
         else:
+            print("⚠️  TRAINING COMPLETED WITH ISSUES")
+    except KeyboardInterrupt:
+        print("\n🛑 TRAINING STOPPED BY USER")
     except Exception as e:
+        print(f"💥 UNEXPECTED ERROR: {str(e)}")
         traceback.print_exc()
+        print("⚠️  CONTINUING DESPITE ERROR...")
+    print("🏁 TRAINING PROCESS FINISHED")

mergekit_config.yml CHANGED Viewed

@@ -18,6 +18,6 @@ models:
     - model: DavidAU/Dolphin-Mistral-GLM-4.7-Flash-24B-Venice-Edition-Thinking-Uncensored
       parameters:
         weight:
-          - filter: attention
-            value: [0.8, 0.9]
           - value: 1

     - model: DavidAU/Dolphin-Mistral-GLM-4.7-Flash-24B-Venice-Edition-Thinking-Uncensored
       parameters:
         weight:
+          - filter: mlp
+            value: [1, 2]
           - value: 1

offsec_model/emergency_save/model.safetensors → model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:17c00be061d2370bea2a5766be8ef198a397aebb2fbf028120df35544aab5bc4
-size 2152169848

 version https://git-lfs.github.com/spec/v1
+oid sha256:8bfa866c9fd45884dee8ed80eee79acd5bb8460dbba40afa50fc517ad8d59fb3
+size 4304331056

offsec_model/checkpoint-3/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: zxc4wewewe/blackthinking
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:zxc4wewewe/blackthinking
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

offsec_model/checkpoint-3/adapter_config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "zxc4wewewe/blackthinking",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

model-00001-of-00004.safetensors → offsec_model/checkpoint-3/adapter_model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d36fef000d013936684c2f5f0b1e020ecd6656d63721c572400238832bc7d53d
-size 525336712

 version https://git-lfs.github.com/spec/v1
+oid sha256:1b34f995ee9f9c329a6c97882e66994cb3a240e1c0e3dbef50ea5b283b1cb6c4
+size 826876624

model-00002-of-00004.safetensors → offsec_model/checkpoint-3/optimizer.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2292b5ed5e2a16aba7bb3603279757106bf414ffba2a65aeb6034ed54517d954
-size 993038112

 version https://git-lfs.github.com/spec/v1
+oid sha256:20b07be26ea4b8e443f69cad95078cc4958008a8cd65092fa2e51ea7d4e1c14a
+size 6868491

model-00003-of-00004.safetensors → offsec_model/checkpoint-3/rng_state.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d978697993397fe090eff4c0c1923b153a88909d2f13c8bac392cfde71abf0e1
-size 992031192

 version https://git-lfs.github.com/spec/v1
+oid sha256:22dbae4057a63d32584e1891579bfcc51b0075be3a65a82e09c052094a350d44
+size 14455

model-00004-of-00004.safetensors → offsec_model/checkpoint-3/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7cd276c8ee78d33bc56ec60e957c9ddb5d073b7a66ca6fa44408b21dc7a7fcbd
-size 486576120

 version https://git-lfs.github.com/spec/v1
+oid sha256:fa1b2d8dfafd74e6f5ca5a65ca39282230073e5b915b419fa30d6d044a576f4d
+size 1465

offsec_model/{emergency_save → checkpoint-3}/tokenizer.json RENAMED Viewed

@@ -23,7 +23,16 @@
     },
     {
       "id": 50258,
-      "content": "<|startoftext|>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,

     },
     {
       "id": 50258,
+      "content": "</s>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 50259,
+      "content": "<s>",
       "single_word": false,
       "lstrip": false,
       "rstrip": false,

offsec_model/checkpoint-3/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "is_local": false,
+  "model_max_length": 1024,
+  "pad_token": "<|pad|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

offsec_model/checkpoint-3/trainer_state.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 50,
+  "global_step": 3,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [],
+  "logging_steps": 25,
+  "max_steps": 3,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 50,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 40346896465920.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

offsec_model/{emergency_save → checkpoint-3}/training_args.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8fd7cb3878eb2fdddb36c1497aedf53b7b1f8d819f9ae5381cd6e224a52eaded
 size 5201

 version https://git-lfs.github.com/spec/v1
+oid sha256:521ce980a0f252d9f47ada32de7808cdb474cc5da282a52b5e60f4d85a7438dc
 size 5201

offsec_model/emergency_save/config.json DELETED Viewed

@@ -1,36 +0,0 @@
-{
-  "architectures": [
-    "LlamaForCausalLM"
-  ],
-  "attention_bias": false,
-  "attention_dropout": 0.0,
-  "bos_token_id": 50258,
-  "dtype": "bfloat16",
-  "eos_token_id": 50256,
-  "head_dim": 64,
-  "hidden_act": "silu",
-  "hidden_size": 2048,
-  "initializer_range": 0.02,
-  "intermediate_size": 8192,
-  "max_position_embeddings": 131072,
-  "mlp_bias": false,
-  "model_type": "llama",
-  "num_attention_heads": 32,
-  "num_hidden_layers": 16,
-  "num_key_value_heads": 8,
-  "pad_token_id": 50257,
-  "pretraining_tp": 1,
-  "rms_norm_eps": 1e-05,
-  "rope_parameters": {
-    "factor": 32.0,
-    "high_freq_factor": 4.0,
-    "low_freq_factor": 1.0,
-    "original_max_position_embeddings": 8192,
-    "rope_theta": 500000.0,
-    "rope_type": "llama3"
-  },
-  "tie_word_embeddings": true,
-  "transformers_version": "5.2.0",
-  "use_cache": false,
-  "vocab_size": 50259
-}

offsec_model/emergency_save/generation_config.json DELETED Viewed

@@ -1,15 +0,0 @@
-{
-  "bos_token_id": 50258,
-  "do_sample": true,
-  "eos_token_id": [
-    50256,
-    128001,
-    128008,
-    128009
-  ],
-  "max_length": 131072,
-  "pad_token_id": 50257,
-  "temperature": 0.6,
-  "top_p": 0.9,
-  "transformers_version": "5.2.0"
-}

offsec_model/final_model/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: zxc4wewewe/blackthinking
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:zxc4wewewe/blackthinking
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

offsec_model/final_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "zxc4wewewe/blackthinking",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

offsec_model/final_model/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1b34f995ee9f9c329a6c97882e66994cb3a240e1c0e3dbef50ea5b283b1cb6c4
+size 826876624

offsec_model/final_model/config.json DELETED Viewed

@@ -1,36 +0,0 @@
-{
-  "architectures": [
-    "LlamaForCausalLM"
-  ],
-  "attention_bias": false,
-  "attention_dropout": 0.0,
-  "bos_token_id": 50256,
-  "dtype": "float32",
-  "eos_token_id": 50256,
-  "head_dim": 64,
-  "hidden_act": "silu",
-  "hidden_size": 2048,
-  "initializer_range": 0.02,
-  "intermediate_size": 8192,
-  "max_position_embeddings": 131072,
-  "mlp_bias": false,
-  "model_type": "llama",
-  "num_attention_heads": 32,
-  "num_hidden_layers": 16,
-  "num_key_value_heads": 8,
-  "pad_token_id": 50256,
-  "pretraining_tp": 1,
-  "rms_norm_eps": 1e-05,
-  "rope_parameters": {
-    "factor": 32.0,
-    "high_freq_factor": 4.0,
-    "low_freq_factor": 1.0,
-    "original_max_position_embeddings": 8192,
-    "rope_theta": 500000.0,
-    "rope_type": "llama3"
-  },
-  "tie_word_embeddings": true,
-  "transformers_version": "5.2.0",
-  "use_cache": false,
-  "vocab_size": 50257
-}

offsec_model/final_model/generation_config.json DELETED Viewed

@@ -1,15 +0,0 @@
-{
-  "bos_token_id": 50256,
-  "do_sample": true,
-  "eos_token_id": [
-    50256,
-    128001,
-    128008,
-    128009
-  ],
-  "max_length": 131072,
-  "pad_token_id": 50256,
-  "temperature": 0.6,
-  "top_p": 0.9,
-  "transformers_version": "5.2.0"
-}

offsec_model/final_model/model.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c039ccc714fc8d9c09e3bc21d41cc887fbd54a6eb8c8a19d8d4e50eb871dd51e
-size 4304306480

offsec_model/final_model/tokenizer.json CHANGED Viewed

@@ -11,6 +11,33 @@
       "rstrip": false,
       "normalized": true,
       "special": true
     }
   ],
   "normalizer": null,

       "rstrip": false,
       "normalized": true,
       "special": true
+    },
+    {
+      "id": 50257,
+      "content": "<|pad|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 50258,
+      "content": "</s>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
+    },
+    {
+      "id": 50259,
+      "content": "<s>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": false,
+      "special": true
     }
   ],
   "normalizer": null,

offsec_model/final_model/tokenizer_config.json CHANGED Viewed

@@ -1,12 +1,12 @@
 {
   "add_prefix_space": false,
   "backend": "tokenizers",
-  "bos_token": "<|endoftext|>",
-  "eos_token": "<|endoftext|>",
   "errors": "replace",
   "is_local": false,
   "model_max_length": 1024,
-  "pad_token": "<|endoftext|>",
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>"
 }

 {
   "add_prefix_space": false,
   "backend": "tokenizers",
+  "bos_token": "<s>",
+  "eos_token": "</s>",
   "errors": "replace",
   "is_local": false,
   "model_max_length": 1024,
+  "pad_token": "<|pad|>",
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>"
 }

offsec_model/final_model/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9acb38bbe140170e14553c167a978d8012169c83bec71321047d6e95f8f5833d
-size 5265

 version https://git-lfs.github.com/spec/v1
+oid sha256:521ce980a0f252d9f47ada32de7808cdb474cc5da282a52b5e60f4d85a7438dc
+size 5201

offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: zxc4wewewe/blackthinking
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:zxc4wewewe/blackthinking
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/adapter_config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "zxc4wewewe/blackthinking",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4eae3bf885e777e6499dc477d0573f9080370feebc52e2951a789fa47e6e492f
+size 826827472

offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6f3d277913f4ca1c78c269a4a9620fffad2a8f7fff7a698b7da6dcf0f708f1f4
+size 6868491

offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6a8a7a6ac130041cf45a5d9a1771d9cb49cf980669810bb45a04849d4938d948
+size 14455

offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:36f8e81efbeb24740a5e207227ced5c067b71dac644275071ecd00cf6dbbda81
+size 1465

offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

offsec_model/{emergency_save → huihui-ai_Guilherme34_uncensor-v2/checkpoint-21}/tokenizer_config.json RENAMED Viewed

@@ -1,12 +1,12 @@
 {
   "add_prefix_space": false,
   "backend": "tokenizers",
-  "bos_token": "<|startoftext|>",
   "eos_token": "<|endoftext|>",
   "errors": "replace",
   "is_local": false,
   "model_max_length": 1024,
-  "pad_token": "<|pad|>",
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>"
 }

 {
   "add_prefix_space": false,
   "backend": "tokenizers",
+  "bos_token": "<|endoftext|>",
   "eos_token": "<|endoftext|>",
   "errors": "replace",
   "is_local": false,
   "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
   "tokenizer_class": "GPT2Tokenizer",
   "unk_token": "<|endoftext|>"
 }

offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/trainer_state.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 100,
+  "global_step": 21,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [],
+  "logging_steps": 50,
+  "max_steps": 21,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 278547866910720.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

offsec_model/huihui-ai_Guilherme34_uncensor-v2/checkpoint-21/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a526fb9962e09b960760ec89c29ebbb572efde48dfcb37d8359ec93f0415882
+size 5329

offsec_model/huihui-ai_Guilherme34_uncensor-v2/final_model/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: zxc4wewewe/blackthinking
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:zxc4wewewe/blackthinking
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

offsec_model/huihui-ai_Guilherme34_uncensor-v2/final_model/adapter_config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "zxc4wewewe/blackthinking",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

offsec_model/huihui-ai_Guilherme34_uncensor-v2/final_model/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4eae3bf885e777e6499dc477d0573f9080370feebc52e2951a789fa47e6e492f
+size 826827472

offsec_model/huihui-ai_Guilherme34_uncensor-v2/final_model/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

offsec_model/huihui-ai_Guilherme34_uncensor-v2/final_model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+  "add_prefix_space": false,
+  "backend": "tokenizers",
+  "bos_token": "<|endoftext|>",
+  "eos_token": "<|endoftext|>",
+  "errors": "replace",
+  "is_local": false,
+  "model_max_length": 1024,
+  "pad_token": "<|endoftext|>",
+  "tokenizer_class": "GPT2Tokenizer",
+  "unk_token": "<|endoftext|>"
+}

offsec_model/huihui-ai_Guilherme34_uncensor-v2/final_model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a526fb9962e09b960760ec89c29ebbb572efde48dfcb37d8359ec93f0415882
+size 5329

offsec_model/huihui-ai_Guilherme34_uncensor-v2/trainer_state.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 100,
+  "global_step": 21,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 3.0,
+      "step": 21,
+      "total_flos": 278547866910720.0,
+      "train_loss": 7.856488182431176,
+      "train_runtime": 767.8768,
+      "train_samples_per_second": 0.195,
+      "train_steps_per_second": 0.027
+    }
+  ],
+  "logging_steps": 50,
+  "max_steps": 21,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 278547866910720.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

offsec_model/trainer_state.json CHANGED Viewed

@@ -2,41 +2,42 @@
   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
-  "epoch": null,
-  "eval_steps": 500,
-  "global_step": 0,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
-  "log_history": [],
-  "logging_steps": 500,
-  "max_steps": 0,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 0,
-  "save_steps": 500,
   "stateful_callbacks": {
-    "EarlyStoppingCallback": {
-      "args": {
-        "early_stopping_patience": 2,
-        "early_stopping_threshold": 0.0
-      },
-      "attributes": {
-        "early_stopping_patience_counter": 0
-      }
-    },
     "TrainerControl": {
       "args": {
         "should_epoch_stop": false,
         "should_evaluate": false,
         "should_log": false,
-        "should_save": false,
-        "should_training_stop": false
       },
       "attributes": {}
     }
   },
-  "total_flos": 0,
-  "train_batch_size": null,
   "trial_name": null,
   "trial_params": null
 }

   "best_global_step": null,
   "best_metric": null,
   "best_model_checkpoint": null,
+  "epoch": 1.0,
+  "eval_steps": 50,
+  "global_step": 3,
   "is_hyper_param_search": false,
   "is_local_process_zero": true,
   "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 1.0,
+      "step": 3,
+      "total_flos": 40346896465920.0,
+      "train_loss": 7.836072285970052,
+      "train_runtime": 123.471,
+      "train_samples_per_second": 0.162,
+      "train_steps_per_second": 0.024
+    }
+  ],
+  "logging_steps": 25,
+  "max_steps": 3,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 50,
   "stateful_callbacks": {
     "TrainerControl": {
       "args": {
         "should_epoch_stop": false,
         "should_evaluate": false,
         "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
       },
       "attributes": {}
     }
   },
+  "total_flos": 40346896465920.0,
+  "train_batch_size": 1,
   "trial_name": null,
   "trial_params": null
 }

offsec_model/zxc4wewewe_offsec/checkpoint-6/README.md ADDED Viewed

	@@ -0,0 +1,207 @@

+---
+base_model: zxc4wewewe/blackthinking
+library_name: peft
+pipeline_tag: text-generation
+tags:
+- base_model:adapter:zxc4wewewe/blackthinking
+- lora
+- transformers
+---
+# Model Card for Model ID
+<!-- Provide a quick summary of what the model is/does. -->
+## Model Details
+### Model Description
+<!-- Provide a longer summary of what this model is. -->
+- **Developed by:** [More Information Needed]
+- **Funded by [optional]:** [More Information Needed]
+- **Shared by [optional]:** [More Information Needed]
+- **Model type:** [More Information Needed]
+- **Language(s) (NLP):** [More Information Needed]
+- **License:** [More Information Needed]
+- **Finetuned from model [optional]:** [More Information Needed]
+### Model Sources [optional]
+<!-- Provide the basic links for the model. -->
+- **Repository:** [More Information Needed]
+- **Paper [optional]:** [More Information Needed]
+- **Demo [optional]:** [More Information Needed]
+## Uses
+<!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
+### Direct Use
+<!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
+[More Information Needed]
+### Downstream Use [optional]
+<!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
+[More Information Needed]
+### Out-of-Scope Use
+<!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
+[More Information Needed]
+## Bias, Risks, and Limitations
+<!-- This section is meant to convey both technical and sociotechnical limitations. -->
+[More Information Needed]
+### Recommendations
+<!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
+Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
+## How to Get Started with the Model
+Use the code below to get started with the model.
+[More Information Needed]
+## Training Details
+### Training Data
+<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
+[More Information Needed]
+### Training Procedure
+<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
+#### Preprocessing [optional]
+[More Information Needed]
+#### Training Hyperparameters
+- **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
+#### Speeds, Sizes, Times [optional]
+<!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
+[More Information Needed]
+## Evaluation
+<!-- This section describes the evaluation protocols and provides the results. -->
+### Testing Data, Factors & Metrics
+#### Testing Data
+<!-- This should link to a Dataset Card if possible. -->
+[More Information Needed]
+#### Factors
+<!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
+[More Information Needed]
+#### Metrics
+<!-- These are the evaluation metrics being used, ideally with a description of why. -->
+[More Information Needed]
+### Results
+[More Information Needed]
+#### Summary
+## Model Examination [optional]
+<!-- Relevant interpretability work for the model goes here -->
+[More Information Needed]
+## Environmental Impact
+<!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
+Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
+- **Hardware Type:** [More Information Needed]
+- **Hours used:** [More Information Needed]
+- **Cloud Provider:** [More Information Needed]
+- **Compute Region:** [More Information Needed]
+- **Carbon Emitted:** [More Information Needed]
+## Technical Specifications [optional]
+### Model Architecture and Objective
+[More Information Needed]
+### Compute Infrastructure
+[More Information Needed]
+#### Hardware
+[More Information Needed]
+#### Software
+[More Information Needed]
+## Citation [optional]
+<!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
+**BibTeX:**
+[More Information Needed]
+**APA:**
+[More Information Needed]
+## Glossary [optional]
+<!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
+[More Information Needed]
+## More Information [optional]
+[More Information Needed]
+## Model Card Authors [optional]
+[More Information Needed]
+## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.18.1

offsec_model/zxc4wewewe_offsec/checkpoint-6/adapter_config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "alora_invocation_tokens": null,
+  "alpha_pattern": {},
+  "arrow_config": null,
+  "auto_mapping": null,
+  "base_model_name_or_path": "zxc4wewewe/blackthinking",
+  "bias": "none",
+  "corda_config": null,
+  "ensure_weight_tying": false,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 16,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "peft_version": "0.18.1",
+  "qalora_group_size": 16,
+  "r": 8,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "target_parameters": null,
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

offsec_model/zxc4wewewe_offsec/checkpoint-6/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b39b1fc1b35a0fa8403bdc441ada3b6d2b74ae538517d098dafa3caf2bf0a507
+size 826827472

offsec_model/zxc4wewewe_offsec/checkpoint-6/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d79aa8627cd3205c254266c9ca0540f604e29a39f2196eeeb3a8b8f20dfb8184
+size 6868491

offsec_model/zxc4wewewe_offsec/checkpoint-6/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:efca17d6191d5398ee4c0d5cdcd6df6c91e9861d6204d56b2f7bbd5dd8821bfe
+size 14455