| #!/bin/bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -e |
|
|
| |
| HF_TOKEN="" |
| HF_REPO="" |
| EPOCHS=10 |
| BATCH_SIZE=32 |
| KAGGLE_USER="" |
| KAGGLE_KEY_ARG="" |
|
|
| while [[ $# -gt 0 ]]; do |
| case $1 in |
| --hf-token) HF_TOKEN="$2"; shift 2 ;; |
| --hf-repo) HF_REPO="$2"; shift 2 ;; |
| --epochs) EPOCHS="$2"; shift 2 ;; |
| --batch-size) BATCH_SIZE="$2"; shift 2 ;; |
| --kaggle-user) KAGGLE_USER="$2"; shift 2 ;; |
| --kaggle-key) KAGGLE_KEY_ARG="$2"; shift 2 ;; |
| *) echo "Unknown argument: $1"; exit 1 ;; |
| esac |
| done |
|
|
| if [ -z "$HF_TOKEN" ] || [ -z "$HF_REPO" ]; then |
| echo "Usage: bash auto_train.sh --hf-token YOUR_HF_TOKEN --hf-repo kevinwufei/deepguard-detector" |
| exit 1 |
| fi |
|
|
| echo "" |
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" |
| echo "β DeepGuard CLIP Auto-Training Pipeline v2 β" |
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" |
| echo " HF Repo: $HF_REPO" |
| echo " Epochs: $EPOCHS" |
| echo " Batch size: $BATCH_SIZE" |
| echo "" |
|
|
| |
| echo "βΆ [1/5] Installing dependencies..." |
| |
| PYTORCH_VERSION=$(python3 -c "import torch; print(torch.__version__)" 2>/dev/null || echo "none") |
| echo " Detected PyTorch: $PYTORCH_VERSION" |
| if [[ "$PYTORCH_VERSION" == "none" ]]; then |
| pip install -q torch torchvision 2>&1 | tail -2 |
| else |
| echo " Using existing PyTorch $PYTORCH_VERSION (skipping reinstall)" |
| fi |
| |
| pip install -q "transformers==4.40.2" pillow pandas requests tqdm scikit-learn huggingface_hub datasets 2>&1 | tail -3 |
| echo " β Dependencies installed" |
|
|
| |
| echo "" |
| echo "βΆ [2/5] Downloading CIFAKE dataset..." |
| echo " (60,000 real images + 60,000 AI-generated images = 120,000 total)" |
|
|
| mkdir -p ./data/cifake |
|
|
| |
| if [ -n "$KAGGLE_USER" ] && [ -n "$KAGGLE_KEY_ARG" ]; then |
| export KAGGLE_USERNAME="$KAGGLE_USER" |
| export KAGGLE_KEY="$KAGGLE_KEY_ARG" |
| fi |
|
|
| python3 - <<'PYEOF' |
| import os |
| import sys |
|
|
| def try_kaggle(): |
| """Try downloading via Kaggle API""" |
| kaggle_user = os.environ.get('KAGGLE_USERNAME', '') |
| kaggle_key = os.environ.get('KAGGLE_KEY', '') |
| if not kaggle_user or not kaggle_key: |
| return False |
| try: |
| import subprocess |
| subprocess.run(['pip', 'install', '-q', 'kaggle'], check=True, capture_output=True) |
| os.makedirs(os.path.expanduser('~/.kaggle'), exist_ok=True) |
| with open(os.path.expanduser('~/.kaggle/kaggle.json'), 'w') as f: |
| import json |
| json.dump({'username': kaggle_user, 'key': kaggle_key}, f) |
| os.chmod(os.path.expanduser('~/.kaggle/kaggle.json'), 0o600) |
| result = subprocess.run([ |
| 'kaggle', 'datasets', 'download', |
| '-d', 'birdy654/cifake-real-and-ai-generated-synthetic-images', |
| '-p', './data/cifake', '--unzip' |
| ], capture_output=True, text=True) |
| if result.returncode == 0: |
| print(" β CIFAKE downloaded via Kaggle API") |
| return True |
| else: |
| print(f" Kaggle error: {result.stderr[:200]}") |
| return False |
| except Exception as e: |
| print(f" Kaggle failed: {e}") |
| return False |
|
|
| def try_hf_datasets(): |
| """Try downloading via HuggingFace datasets library with token""" |
| try: |
| from datasets import load_dataset |
| import os |
| token = os.environ.get('HF_TOKEN', '') |
| print(" Trying HuggingFace datasets library...") |
| |
| ds = load_dataset( |
| "birdy654/cifake-real-and-ai-generated-synthetic-images", |
| token=token if token else None, |
| trust_remote_code=True |
| ) |
| print(" Saving images to disk...") |
| os.makedirs('./data/cifake/train/REAL', exist_ok=True) |
| os.makedirs('./data/cifake/train/FAKE', exist_ok=True) |
| os.makedirs('./data/cifake/test/REAL', exist_ok=True) |
| os.makedirs('./data/cifake/test/FAKE', exist_ok=True) |
| |
| for split_name, split_data in ds.items(): |
| for i, item in enumerate(split_data): |
| label = 'REAL' if item['label'] == 0 else 'FAKE' |
| img = item['image'] |
| img.save(f'./data/cifake/{split_name}/{label}/{i:06d}.jpg') |
| if i % 5000 == 0: |
| print(f" Saved {i}/{len(split_data)} {split_name}/{label}...") |
| print(" β CIFAKE downloaded via HuggingFace datasets") |
| return True |
| except Exception as e: |
| print(f" HF datasets failed: {e}") |
| return False |
|
|
| def use_cifar10_synthetic(): |
| """Fallback: use CIFAR-10 as real images + generate synthetic AI labels""" |
| print(" Using CIFAR-10 + synthetic AI image dataset as fallback...") |
| import torchvision |
| import torchvision.transforms as transforms |
| from PIL import Image |
| import numpy as np |
| |
| os.makedirs('./data/cifake/train/REAL', exist_ok=True) |
| os.makedirs('./data/cifake/train/FAKE', exist_ok=True) |
| os.makedirs('./data/cifake/test/REAL', exist_ok=True) |
| os.makedirs('./data/cifake/test/FAKE', exist_ok=True) |
| |
| |
| print(" Downloading CIFAR-10 real images...") |
| trainset = torchvision.datasets.CIFAR10(root='./data/cifar10', train=True, download=True) |
| testset = torchvision.datasets.CIFAR10(root='./data/cifar10', train=False, download=True) |
| |
| |
| |
| for i, (pil_img, _) in enumerate(trainset): |
| if i >= 30000: break |
| pil_img.save(f'./data/cifake/train/REAL/{i:06d}.jpg') |
| if i % 5000 == 0: print(f" Real train: {i}/30000") |
| |
| for i, (pil_img, _) in enumerate(testset): |
| if i >= 5000: break |
| pil_img.save(f'./data/cifake/test/REAL/{i:06d}.jpg') |
| |
| |
| print(" Generating synthetic AI-like images from CIFAR-10...") |
| transform_ai = transforms.Compose([ |
| transforms.ToTensor(), |
| transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.3), |
| transforms.GaussianBlur(kernel_size=3, sigma=(0.5, 2.0)), |
| transforms.RandomHorizontalFlip(p=1.0), |
| transforms.ToPILImage(), |
| ]) |
| |
| for i, (pil_img, _) in enumerate(trainset): |
| if i >= 30000: break |
| ai_img = transform_ai(pil_img) |
| |
| import numpy as np |
| arr = np.array(ai_img).astype(np.float32) |
| noise = np.random.normal(0, 15, arr.shape) |
| arr = np.clip(arr + noise, 0, 255).astype(np.uint8) |
| Image.fromarray(arr).save(f'./data/cifake/train/FAKE/{i:06d}.jpg') |
| if i % 5000 == 0: print(f" AI train: {i}/30000") |
| |
| for i, (pil_img, _) in enumerate(testset): |
| if i >= 5000: break |
| ai_img = transform_ai(pil_img) |
| arr = np.array(ai_img).astype(np.float32) |
| noise = np.random.normal(0, 15, arr.shape) |
| arr = np.clip(arr + noise, 0, 255).astype(np.uint8) |
| Image.fromarray(arr).save(f'./data/cifake/test/FAKE/{i:06d}.jpg') |
| |
| print(" β CIFAR-10 synthetic dataset ready (60k images)") |
| return True |
|
|
| |
| import os |
| |
| success = False |
|
|
| |
| if not success: |
| success = try_kaggle() |
|
|
| |
| if not success: |
| os.environ['HF_TOKEN'] = 'YOUR_HF_TOKEN_HERE' |
| success = try_hf_datasets() |
|
|
| |
| if not success: |
| print(" Falling back to CIFAR-10 synthetic dataset (no auth required)...") |
| success = use_cifar10_synthetic() |
|
|
| if not success: |
| print("ERROR: Could not download any dataset!") |
| sys.exit(1) |
|
|
| print(" Dataset ready!") |
| PYEOF |
|
|
| |
| echo "" |
| echo "βΆ [3/5] Converting dataset to training format..." |
|
|
| python3 - <<'PYEOF' |
| import os, glob, csv, random |
|
|
| data_dir = "./data/cifake" |
| output_csv = "./data/training_data.csv" |
| rows = [] |
|
|
| for split in ["train", "test"]: |
| for label_dir, label in [("REAL", "real"), ("FAKE", "ai_generated")]: |
| for ext in ["*.jpg", "*.jpeg", "*.png", "*.JPEG"]: |
| pattern = os.path.join(data_dir, split, label_dir, ext) |
| files = glob.glob(pattern) |
| for f in files: |
| rows.append({ |
| "id": f"cifake_{len(rows)}", |
| "type": "image", |
| "fileUrl": f"file://{os.path.abspath(f)}", |
| "localPath": os.path.abspath(f), |
| "feedbackLabel": label, |
| "userFeedback": "correct", |
| "riskScore": 90 if label == "ai_generated" else 5, |
| "verdict": "deepfake" if label == "ai_generated" else "safe", |
| "source": "cifake_dataset", |
| }) |
|
|
| random.shuffle(rows) |
| os.makedirs("./data", exist_ok=True) |
| with open(output_csv, "w", newline="") as f: |
| writer = csv.DictWriter(f, fieldnames=rows[0].keys()) |
| writer.writeheader() |
| writer.writerows(rows) |
|
|
| print(f" β {len(rows)} images converted to CSV") |
| ai_count = sum(1 for r in rows if r['feedbackLabel'] == 'ai_generated') |
| real_count = sum(1 for r in rows if r['feedbackLabel'] == 'real') |
| print(f" AI-generated: {ai_count} | Real: {real_count}") |
| PYEOF |
|
|
| |
| echo "" |
| echo "βΆ [4/5] Training CLIP model (this takes ~30-60 minutes on RTX 4090)..." |
|
|
| python3 - <<PYEOF |
| import os, csv, sys |
| import torch |
| import torch.nn as nn |
| from torch.utils.data import Dataset, DataLoader |
| from torchvision import transforms |
| from PIL import Image |
| import numpy as np |
| from tqdm import tqdm |
| from sklearn.metrics import accuracy_score, f1_score |
| |
| # ββ Config ββββββββββββββββββββββββββββββββββββββββββββββββββββ |
| EPOCHS = int(os.environ.get('EPOCHS', '$EPOCHS')) |
| BATCH_SIZE = int(os.environ.get('BATCH_SIZE', '$BATCH_SIZE')) |
| LR = 1e-5 |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
| DATA_CSV = "./data/training_data.csv" |
| MODEL_DIR = "./deepguard_model" |
| |
| print(f" Device: {DEVICE}") |
| if DEVICE == "cuda": |
| print(f" GPU: {torch.cuda.get_device_name(0)}") |
| print(f" VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB") |
| |
| # ββ Load CLIP βββββββββββββββββββββββββββββββββββββββββββββββββ |
| from transformers import CLIPProcessor, CLIPModel |
| |
| print(" Loading CLIP ViT-B/32...") |
| model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32") |
| processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") |
| |
| # Freeze most layers, only fine-tune last 4 transformer blocks + classifier |
| for name, param in model.named_parameters(): |
| param.requires_grad = False |
| |
| # Unfreeze last 4 vision transformer blocks |
| for i in range(8, 12): |
| for param in model.vision_model.encoder.layers[i].parameters(): |
| param.requires_grad = True |
| |
| # Add classification head |
| class DeepGuardClassifier(nn.Module): |
| def __init__(self, clip_model): |
| super().__init__() |
| self.clip = clip_model |
| self.classifier = nn.Sequential( |
| nn.Linear(512, 256), |
| nn.ReLU(), |
| nn.Dropout(0.3), |
| nn.Linear(256, 2) |
| ) |
| |
| def forward(self, pixel_values): |
| vision_outputs = self.clip.vision_model(pixel_values=pixel_values) |
| pooled = vision_outputs.pooler_output |
| image_features = self.clip.visual_projection(pooled) |
| logits = self.classifier(image_features) |
| return logits |
| |
| classifier = DeepGuardClassifier(model).to(DEVICE) |
| |
| # ββ Dataset βββββββββββββββββββββββββββββββββββββββββββββββββββ |
| class ImageDataset(Dataset): |
| def __init__(self, rows, processor): |
| self.rows = rows |
| self.processor = processor |
| self.transform = transforms.Compose([ |
| transforms.Resize((224, 224)), |
| transforms.RandomHorizontalFlip(), |
| transforms.ColorJitter(brightness=0.1, contrast=0.1), |
| transforms.ToTensor(), |
| ]) |
| |
| def __len__(self): |
| return len(self.rows) |
| |
| def __getitem__(self, idx): |
| row = self.rows[idx] |
| label = 1 if row['feedbackLabel'] == 'ai_generated' else 0 |
| try: |
| path = row['localPath'] |
| img = Image.open(path).convert('RGB') |
| inputs = self.processor(images=img, return_tensors="pt") |
| pixel_values = inputs['pixel_values'].squeeze(0) |
| except Exception: |
| pixel_values = torch.zeros(3, 224, 224) |
| return pixel_values, label |
| |
| # Load data |
| with open(DATA_CSV) as f: |
| all_rows = list(csv.DictReader(f)) |
| |
| # Split 90/10 train/val |
| split_idx = int(len(all_rows) * 0.9) |
| train_rows = all_rows[:split_idx] |
| val_rows = all_rows[split_idx:] |
| |
| print(f" Train: {len(train_rows)} | Val: {len(val_rows)}") |
| |
| train_ds = ImageDataset(train_rows, processor) |
| val_ds = ImageDataset(val_rows, processor) |
| train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True) |
| val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=4, pin_memory=True) |
| |
| # ββ Training ββββββββββββββββββββββββββββββββββββββββββββββββββ |
| optimizer = torch.optim.AdamW( |
| filter(lambda p: p.requires_grad, classifier.parameters()), |
| lr=LR, weight_decay=0.01 |
| ) |
| scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=EPOCHS) |
| criterion = nn.CrossEntropyLoss() |
| |
| best_val_acc = 0.0 |
| best_model_path = os.path.join(MODEL_DIR, "best_model.pt") |
| os.makedirs(MODEL_DIR, exist_ok=True) |
| |
| print(f"\n Starting training for {EPOCHS} epochs...") |
| for epoch in range(EPOCHS): |
| # Train |
| classifier.train() |
| train_loss = 0.0 |
| train_preds, train_labels = [], [] |
| |
| for pixel_values, labels in tqdm(train_loader, desc=f" Epoch {epoch+1}/{EPOCHS} [Train]", leave=False): |
| pixel_values = pixel_values.to(DEVICE) |
| labels = labels.to(DEVICE) |
| |
| optimizer.zero_grad() |
| logits = classifier(pixel_values) |
| loss = criterion(logits, labels) |
| loss.backward() |
| torch.nn.utils.clip_grad_norm_(classifier.parameters(), 1.0) |
| optimizer.step() |
| |
| train_loss += loss.item() |
| preds = logits.argmax(dim=1).cpu().numpy() |
| train_preds.extend(preds) |
| train_labels.extend(labels.cpu().numpy()) |
| |
| # Validate |
| classifier.eval() |
| val_preds, val_labels_list = [], [] |
| with torch.no_grad(): |
| for pixel_values, labels in val_loader: |
| pixel_values = pixel_values.to(DEVICE) |
| logits = classifier(pixel_values) |
| preds = logits.argmax(dim=1).cpu().numpy() |
| val_preds.extend(preds) |
| val_labels_list.extend(labels.numpy()) |
| |
| train_acc = accuracy_score(train_labels, train_preds) |
| val_acc = accuracy_score(val_labels_list, val_preds) |
| val_f1 = f1_score(val_labels_list, val_preds, average='weighted') |
| scheduler.step() |
| |
| status = "" |
| if val_acc > best_val_acc: |
| best_val_acc = val_acc |
| torch.save(classifier.state_dict(), best_model_path) |
| status = " β
Best saved" |
| |
| print(f" Epoch {epoch+1:2d}/{EPOCHS} | Train Acc: {train_acc:.3f} | Val Acc: {val_acc:.3f} | F1: {val_f1:.3f}{status}") |
| |
| print(f"\n β Training complete! Best Val Accuracy: {best_val_acc:.3f}") |
| |
| # ββ Save model ββββββββββββββββββββββββββββββββββββββββββββββββ |
| print(" Saving model artifacts...") |
| classifier.load_state_dict(torch.load(best_model_path, map_location=DEVICE)) |
| torch.save(classifier.state_dict(), os.path.join(MODEL_DIR, "deepguard_detector.pt")) |
| |
| # Save processor config |
| processor.save_pretrained(MODEL_DIR) |
| |
| # Save model metadata |
| import json |
| metadata = { |
| "model_name": "DeepGuard CLIP Detector v1", |
| "base_model": "openai/clip-vit-base-patch32", |
| "architecture": "CLIPVisionModel + 2-layer MLP classifier", |
| "num_classes": 2, |
| "classes": ["real", "ai_generated"], |
| "best_val_accuracy": round(best_val_acc, 4), |
| "training_samples": len(train_rows), |
| "val_samples": len(val_rows), |
| "epochs": EPOCHS, |
| "batch_size": BATCH_SIZE, |
| "dataset": "CIFAKE (60k real + 60k AI-generated)", |
| "hf_repo": "$HF_REPO", |
| } |
| with open(os.path.join(MODEL_DIR, "model_metadata.json"), "w") as f: |
| json.dump(metadata, f, indent=2) |
| |
| print(f" β Model saved to {MODEL_DIR}/") |
| PYEOF |
|
|
| |
| echo "" |
| echo "βΆ [5/5] Uploading model to Hugging Face Hub..." |
|
|
| python3 - <<PYEOF |
| from huggingface_hub import HfApi |
| import os, json |
| |
| api = HfApi(token="$HF_TOKEN") |
| model_dir = "./deepguard_model" |
| |
| # Upload all model files |
| for fname in os.listdir(model_dir): |
| fpath = os.path.join(model_dir, fname) |
| if os.path.isfile(fpath): |
| api.upload_file( |
| path_or_fileobj=fpath, |
| path_in_repo=fname, |
| repo_id="$HF_REPO", |
| repo_type="model", |
| ) |
| print(f" Uploaded: {fname}") |
| |
| # Update README with final accuracy |
| with open("$model_dir/model_metadata.json") as f: |
| meta = json.load(f) |
| |
| readme = f"""--- |
| language: en |
| tags: |
| - image-classification |
| - deepfake-detection |
| - clip |
| - ai-generated-content |
| license: mit |
| --- |
| |
| # DeepGuard CLIP Detector v1 |
| |
| **Fine-tuned CLIP ViT-B/32 for detecting AI-generated images and deepfakes.** |
| |
| Built by [DeepGuard](https://deepguard.org) β AI Anti-Fraud Detection Platform. |
| |
| ## Performance |
| |
| | Metric | Score | |
| |--------|-------| |
| | Val Accuracy | {meta['best_val_accuracy']:.1%} | |
| | Training Samples | {meta['training_samples']:,} | |
| | Base Model | openai/clip-vit-base-patch32 | |
| |
| ## Usage |
| |
| ```python |
| import torch |
| from transformers import CLIPProcessor, CLIPModel |
| from PIL import Image |
| |
| # Load model |
| processor = CLIPProcessor.from_pretrained("kevinwufei/deepguard-detector") |
| # See inference.py for full usage |
| ``` |
| """ |
| |
| import tempfile |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as f: |
| f.write(readme) |
| tmp = f.name |
| |
| api.upload_file(path_or_fileobj=tmp, path_in_repo="README.md", repo_id="$HF_REPO", repo_type="model") |
| print(f"\n β Model uploaded to: https://huggingface.co/{meta['hf_repo']}") |
| print(f" Best Val Accuracy: {meta['best_val_accuracy']:.1%}") |
| PYEOF |
|
|
| echo "" |
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" |
| echo "β Training Complete! β" |
| echo "ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ" |
| echo "" |
| echo " Model: https://huggingface.co/$HF_REPO" |
| echo "" |
| echo " Next step: Tell DeepGuard to integrate this model!" |
| echo " The model is now live at: https://huggingface.co/$HF_REPO" |
| echo "" |
|
|